/* * * Copyright (c) 1998-2022 * John Maddock * * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ /* * LOCATION: see http://www.boost.org for most recent version. * FILE regex_split_example_2.cpp * VERSION see * DESCRIPTION: regex_split example: spit out linked URL's. */ #if (defined(__cpp_lib_modules) || (defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 193833135))) && !defined(TEST_HEADERS) import std; #elif defined(MSVC_EXPERIMENTAL_STD_MODULE) && !defined(TEST_HEADERS) import std.core; #else #include #include #include #include #endif #ifdef TEST_HEADERS #include #else import boost.regex; #endif boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"", boost::regex::normal | boost::regex::icase); void load_file(std::string& s, std::istream& is) { s.erase(); if(is.bad()) return; // // attempt to grow string buffer to match file size, // this doesn't always work... s.reserve(static_cast(is.rdbuf()->in_avail())); char c; while(is.get(c)) { // use logarithmic growth stategy, in case // in_avail (above) returned zero: if(s.capacity() == s.size()) s.reserve(s.capacity() * 3); s.append(1, c); } } int main(int argc, char** argv) { std::string s; std::list l; int i; for(i = 1; i < argc; ++i) { std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; s.erase(); std::ifstream is(argv[i]); load_file(s, is); is.close(); boost::regex_split(std::back_inserter(l), s, e); while(l.size()) { s = *(l.begin()); l.pop_front(); std::cout << s << std::endl; } } // // alternative method: // split one match at a time and output direct to // cout via ostream_iterator.... // for(i = 1; i < argc; ++i) { std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; s.erase(); std::ifstream is(argv[i]); load_file(s, is); is.close(); while(boost::regex_split(std::ostream_iterator(std::cout), s, e, boost::match_default, 1)) std::cout << std::endl; } return 0; }