forked from boostorg/regex
Initial commit of quickbook conversion of docs.
[SVN r37942]
This commit is contained in:
114
doc/regex_split.qbk
Normal file
114
doc/regex_split.qbk
Normal file
@ -0,0 +1,114 @@
|
||||
|
||||
[section:regex_split regex_split (deprecated)]
|
||||
|
||||
The algorithm [regex_split] has been deprecated in favor of the iterator
|
||||
[regex_token_iterator] which has a more flexible and powerful interface,
|
||||
as well as following the more usual standard library "pull" rather than
|
||||
"push" semantics.
|
||||
|
||||
Code which uses [regex_split] will continue to compile, the following
|
||||
documentation is taken from a previous Boost.Regex version:
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
Algorithm [regex_split] performs a similar operation to the perl split operation,
|
||||
and comes in three overloaded forms:
|
||||
|
||||
template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||||
const basic_regex<charT, Traits2>& e,
|
||||
boost::match_flag_type flags,
|
||||
std::size_t max_split);
|
||||
|
||||
template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||||
const basic_regex<charT, Traits2>& e,
|
||||
boost::match_flag_type flags = match_default);
|
||||
|
||||
template <class OutputIterator, class charT, class Traits1, class Alloc1>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s);
|
||||
|
||||
[*Effects]: Each version of the algorithm takes an output-iterator for
|
||||
output, and a string for input. If the expression contains no marked
|
||||
sub-expressions, then the algorithm writes one string onto the output-iterator
|
||||
for each section of input that does not match the expression.
|
||||
If the expression does contain marked sub-expressions, then each
|
||||
time a match is found, one string for each marked sub-expression will be
|
||||
written to the output-iterator. No more than max_split strings will be written
|
||||
to the output-iterator. Before returning, all the input processed will be
|
||||
deleted from the string /s/ (if /max_split/ is not reached then all of /s/
|
||||
will be deleted). Returns the number of strings written to the output-iterator.
|
||||
If the parameter /max_split/ is not specified then it defaults to `UINT_MAX`.
|
||||
If no expression is specified, then it defaults to "\\s+", and splitting occurs
|
||||
on whitespace.
|
||||
|
||||
[*Throws]: `std::runtime_error` if the complexity of matching the expression
|
||||
against an N character string begins to exceed O(N[super 2]), or if the
|
||||
program runs out of stack space while matching the expression (if Boost.Regex is
|
||||
configured in recursive mode), or if the matcher exhausts it's permitted
|
||||
memory allocation (if Boost.Regex is configured in non-recursive mode).
|
||||
|
||||
[*Example]: the following function will split the input string into a
|
||||
series of tokens, and remove each token from the string /s/:
|
||||
|
||||
unsigned tokenise(std::list<std::string>& l, std::string& s)
|
||||
{
|
||||
return boost::regex_split(std::back_inserter(l), s);
|
||||
}
|
||||
|
||||
Example: the following short program will extract all of the URL's
|
||||
from a html file, and print them out to cout:
|
||||
|
||||
#include <list>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
|
||||
boost::regbase::normal | boost::regbase::icase);
|
||||
|
||||
void load_file(std::string& s, std::istream& is)
|
||||
{
|
||||
s.erase();
|
||||
//
|
||||
// attempt to grow string buffer to match file size,
|
||||
// this doesn't always work...
|
||||
s.reserve(is.rdbuf()->in_avail());
|
||||
char c;
|
||||
while(is.get(c))
|
||||
{
|
||||
// use logarithmic growth stategy, in case
|
||||
// in_avail (above) returned zero:
|
||||
if(s.capacity() == s.size())
|
||||
s.reserve(s.capacity() * 3);
|
||||
s.append(1, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
std::string s;
|
||||
std::list<std::string> l;
|
||||
|
||||
for(int i = 1; i < argc; ++i)
|
||||
{
|
||||
std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
|
||||
s.erase();
|
||||
std::ifstream is(argv[i]);
|
||||
load_file(s, is);
|
||||
boost::regex_split(std::back_inserter(l), s, e);
|
||||
while(l.size())
|
||||
{
|
||||
s = *(l.begin());
|
||||
l.pop_front();
|
||||
std::cout << s << std::endl;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
[endsect]
|
Reference in New Issue
Block a user