2003-05-17 11:45:48 +00:00
|
|
|
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
|
|
|
<html>
|
|
|
|
|
<head>
|
|
|
|
|
<title>Boost.Regex: regex_token_iterator</title>
|
|
|
|
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
|
|
|
|
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
|
|
|
|
<body>
|
|
|
|
|
<P>
|
|
|
|
|
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
|
|
|
|
<TR>
|
|
|
|
|
<td vAlign="top" width="300">
|
|
|
|
|
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
|
|
|
|
|
</td>
|
|
|
|
|
<TD width="353">
|
|
|
|
|
<H1 align="center">Boost.Regex</H1>
|
|
|
|
|
<H2 align="center">regex_token_iterator</H2>
|
|
|
|
|
</TD>
|
|
|
|
|
<td width="50">
|
|
|
|
|
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
|
|
|
|
</td>
|
|
|
|
|
</TR>
|
|
|
|
|
</TABLE>
|
|
|
|
|
</P>
|
|
|
|
|
<HR>
|
|
|
|
|
<H3>Contents</H3>
|
|
|
|
|
<dl class="index">
|
|
|
|
|
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
|
|
|
|
|
Examples</A></dt></dl>
|
|
|
|
|
<H3><A name="synopsis"></A>Synopsis</H3>
|
|
|
|
|
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
|
|
|
|
|
that is to say it represents a new view of an existing iterator sequence, by
|
|
|
|
|
enumerating all the occurrences of a regular expression within that sequence,
|
2003-11-03 11:25:30 +00:00
|
|
|
|
and presenting one or more character sequence for each match found. Each
|
|
|
|
|
position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
|
|
|
|
|
object that represents what matched a particular sub-expression within the
|
|
|
|
|
regular expression. When class <CODE>regex_token_iterator</CODE> is used to
|
|
|
|
|
enumerate a single sub-expression with index -1, then the iterator performs
|
|
|
|
|
field splitting: that is to say it enumerates one character sequence for each
|
2003-05-17 11:45:48 +00:00
|
|
|
|
section of the character container sequence that does not match the regular
|
|
|
|
|
expression specified.</P>
|
|
|
|
|
<PRE>
|
|
|
|
|
template <class BidirectionalIterator,
|
|
|
|
|
class charT = iterator_traits<BidirectionalIterator>::value_type,
|
|
|
|
|
class traits = regex_traits<charT>,
|
|
|
|
|
class Allocator = allocator<charT> >
|
|
|
|
|
class regex_token_iterator
|
|
|
|
|
{
|
|
|
|
|
public:
|
2003-10-21 11:18:40 +00:00
|
|
|
|
typedef <A href="basic_regex.html">basic_regex</A><charT, traits, Allocator> regex_type;
|
2003-11-03 11:25:30 +00:00
|
|
|
|
typedef <A href="sub_match.html">sub_match</A>
|
|
|
|
|
<BidirectionalIterator> value_type;
|
2003-05-17 11:45:48 +00:00
|
|
|
|
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
|
|
|
|
typedef const value_type* pointer;
|
|
|
|
|
typedef const value_type& reference;
|
|
|
|
|
typedef std::forward_iterator_tag iterator_category;
|
|
|
|
|
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<A href="#c1">regex_token_iterator</A>();
|
|
|
|
|
<A href="#c2">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
|
|
|
|
int submatch = 0, <A href="match_flag_type.html">match_flag_type</A> m = match_default);
|
|
|
|
|
<A href="#c3">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
2003-05-17 11:45:48 +00:00
|
|
|
|
const std::vector<int>& submatches, match_flag_type m = match_default);
|
|
|
|
|
template <std::size_t N>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<A href="#c4">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
2003-05-17 11:45:48 +00:00
|
|
|
|
const int (&submatches)[N], match_flag_type m = match_default);
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<A href="#c5">regex_token_iterator</A>(const regex_token_iterator&);
|
|
|
|
|
regex_token_iterator& <A href="#o1">operator</A>=(const regex_token_iterator&);
|
|
|
|
|
bool <A href="#o2">operator</A>==(const regex_token_iterator&)const;
|
|
|
|
|
bool <A href="#o3">operator</A>!=(const regex_token_iterator&)const;
|
|
|
|
|
const value_type& <A href="#o4">operator</A>*()const;
|
|
|
|
|
const value_type* <A href="#o5">operator</A>->()const;
|
|
|
|
|
regex_token_iterator& <A href="#o6">operator</A>++();
|
|
|
|
|
regex_token_iterator <A href="#o7">operator</A>++(int);
|
2003-05-17 11:45:48 +00:00
|
|
|
|
};
|
|
|
|
|
</PRE>
|
|
|
|
|
<H3><A name="description"></A>Description</H3>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=c1></A>regex_token_iterator();</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
2003-05-17 11:45:48 +00:00
|
|
|
|
int submatch = 0, match_flag_type m = match_default);</PRE>
|
|
|
|
|
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
|
|
|
|
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
|
|
|
|
string for each regular expression match of the expression <EM>re</EM> found
|
|
|
|
|
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
|
|
|
|
string enumerated is the sub-expression <EM>submatch </EM>for each match
|
|
|
|
|
found; if <EM>submatch </EM>is -1, then enumerates all the text sequences that
|
|
|
|
|
did not match the expression <EM>re </EM>(that is to performs field splitting).</P>
|
|
|
|
|
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
|
|
|
|
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
|
|
|
|
or if the program runs out of stack space while matching the expression (if
|
|
|
|
|
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
|
|
|
|
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
|
|
|
|
configured</A> in non-recursive mode).</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
2003-05-17 11:45:48 +00:00
|
|
|
|
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
|
|
|
|
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.</P>
|
|
|
|
|
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
|
|
|
|
strings for each regular expression match of the expression <EM>re</EM> found
|
|
|
|
|
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
|
|
|
|
each match found one string will be enumerated for each sub-expression
|
|
|
|
|
index contained within <EM>submatches </EM>vector; if <EM>submatches[0] </EM>
|
|
|
|
|
is -1, then the first string enumerated for each match will be all of the text
|
|
|
|
|
from end of the last match to the start of the current match, in addition there
|
|
|
|
|
will be one extra string enumerated when no more matches can be found: from the
|
|
|
|
|
end of the last match found, to the end of the underlying sequence.</P>
|
|
|
|
|
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
|
|
|
|
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
|
|
|
|
or if the program runs out of stack space while matching the expression (if
|
|
|
|
|
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
|
|
|
|
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
|
|
|
|
configured</A> in non-recursive mode).</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=c4></A>template <std::size_t N>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
|
|
|
|
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
|
|
|
|
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
|
|
|
|
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
|
|
|
|
enumerate <EM>R</EM> strings for each regular expression match of the
|
|
|
|
|
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
|
|
|
|
flags <EM>m</EM>. For each match found one string will be
|
|
|
|
|
enumerated for each sub-expression index contained within the <EM>submatches
|
|
|
|
|
</EM>array; if <EM>submatches[0] </EM>is -1, then the first string enumerated
|
|
|
|
|
for each match will be all of the text from end of the last match to the start
|
|
|
|
|
of the current match, in addition there will be one extra string enumerated
|
|
|
|
|
when no more matches can be found: from the end of the last match found, to the
|
|
|
|
|
end of the underlying sequence.</P>
|
|
|
|
|
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
|
|
|
|
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
|
|
|
|
or if the program runs out of stack space while matching the expression (if
|
|
|
|
|
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
|
|
|
|
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
|
|
|
|
configured</A> in non-recursive mode).</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=c5></A>regex_token_iterator(const regex_token_iterator& that);</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P><B> Effects: </B>constructs a copy of <CODE>that</CODE>.</P>
|
|
|
|
|
<P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=o1></A>regex_token_iterator& operator=(const regex_token_iterator& that);</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P><B> Effects: </B>sets <CODE>*this</CODE> to be equal to <CODE>that</CODE>.</P>
|
|
|
|
|
<P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=o2></A>bool operator==(const regex_token_iterator&)const;</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P>
|
|
|
|
|
<B>Effects: </B>returns true if *this is the same position as that.</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=o3></A>bool operator!=(const regex_token_iterator&)const;</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P>
|
|
|
|
|
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=o4></A>const value_type& operator*()const;</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P>
|
2003-11-03 11:25:30 +00:00
|
|
|
|
<B>Effects: </B>returns the current character sequence being enumerated.</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=o5></A>const value_type* operator->()const;</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P>
|
|
|
|
|
<B>Effects: </B>returns <CODE>&(*this)</CODE>.</P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
<PRE><A name=o6></A>regex_token_iterator& operator++();</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P>
|
2003-11-03 11:25:30 +00:00
|
|
|
|
<B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
|
|
|
|
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
|
|
|
|
or if the program runs out of stack space while matching the expression (if
|
|
|
|
|
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
|
|
|
|
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
|
|
|
|
configured</A> in non-recursive mode).</P>
|
|
|
|
|
<B>
|
|
|
|
|
<P>
|
2003-10-21 11:18:40 +00:00
|
|
|
|
Returns:</B><CODE> *this</CODE>.</P><PRE><A name=o7></A>regex_token_iterator& operator++(int);</PRE>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
<P><B> Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>,
|
|
|
|
|
then calls <CODE>++(*this)</CODE>.</P>
|
|
|
|
|
<P><B> Returns:</B> <CODE>result</CODE>.<A name="examples"></A>
|
|
|
|
|
<H3>Examples</H3>
|
2003-11-28 15:17:45 +00:00
|
|
|
|
<P>The following <A href="../example/snippets/regex_token_iterator_eg_1.cpp">example</A>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
takes a string and splits it into a series of tokens:</P>
|
|
|
|
|
<pre>
|
|
|
|
|
<FONT color=#008040>#include <iostream></FONT>
|
|
|
|
|
<FONT color=#008040>#include <boost/regex.hpp></FONT>
|
|
|
|
|
|
|
|
|
|
<B>using</B> <B>namespace</B> std;
|
|
|
|
|
|
|
|
|
|
<B>int</B> main(<B>int</B> argc)
|
|
|
|
|
{
|
|
|
|
|
string s;
|
|
|
|
|
<B>do</B>{
|
|
|
|
|
<B>if</B>(argc == <FONT color=#0000a0>1</FONT>)
|
|
|
|
|
{
|
|
|
|
|
cout << <FONT color=#0000ff>"Enter text to split (or \"quit\" to exit): "</FONT>;
|
|
|
|
|
getline(cin, s);
|
|
|
|
|
<B>if</B>(s == <FONT color=#0000ff>"quit"</FONT>) <B>break</B>;
|
|
|
|
|
}
|
|
|
|
|
<B>else</B>
|
|
|
|
|
s = <FONT color=#0000ff>"This is a string of tokens"</FONT>;
|
|
|
|
|
|
|
|
|
|
boost::regex re(<FONT color=#0000ff>"\\s+"</FONT>);
|
|
|
|
|
boost::regex_token_iterator<std::string::const_iterator> i(s.begin(), s.end(), re, -<FONT color=#0000a0>1</FONT>);
|
|
|
|
|
boost::regex_token_iterator<std::string::const_iterator> j;
|
|
|
|
|
|
|
|
|
|
<B>unsigned</B> count = <FONT color=#0000a0>0</FONT>;
|
|
|
|
|
<B>while</B>(i != j)
|
|
|
|
|
{
|
|
|
|
|
cout << *i++ << endl;
|
|
|
|
|
count++;
|
|
|
|
|
}
|
|
|
|
|
cout << <FONT color=#0000ff>"There were "</FONT> << count << <FONT color=#0000ff>" tokens found."</FONT> << endl;
|
|
|
|
|
|
|
|
|
|
}<B>while</B>(argc == <FONT color=#0000a0>1</FONT>);
|
|
|
|
|
<B>return</B> <FONT color=#0000a0>0</FONT>;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
</pre>
|
|
|
|
|
<P>The following <A href="../example/snippets/regex_token_iterator_example_2.cpp">example</A>
|
|
|
|
|
takes a html file and outputs a list of all the linked files:</P>
|
|
|
|
|
<pre>
|
|
|
|
|
<FONT color=#008040>#include <fstream></FONT>
|
|
|
|
|
<FONT color=#008040>#include <iostream></FONT>
|
|
|
|
|
<FONT color=#008040>#include <iterator></FONT>
|
|
|
|
|
<FONT color=#008040>#include <boost/regex.hpp></FONT>
|
|
|
|
|
|
|
|
|
|
boost::regex e(<FONT color=#0000ff>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\""</FONT>,
|
|
|
|
|
boost::regex::normal | boost::regbase::icase);
|
|
|
|
|
|
|
|
|
|
<B>void</B> load_file(std::string& s, std::istream& is)
|
|
|
|
|
{
|
|
|
|
|
s.erase();
|
|
|
|
|
<I><FONT color=#000080>//</FONT></I>
|
|
|
|
|
<I><FONT color=#000080>// attempt to grow string buffer to match file size,</FONT></I>
|
|
|
|
|
<I><FONT color=#000080>// this doesn't always work...</FONT></I>
|
|
|
|
|
s.reserve(is.rdbuf()->in_avail());
|
|
|
|
|
<B>char</B> c;
|
|
|
|
|
<B>while</B>(is.get(c))
|
|
|
|
|
{
|
|
|
|
|
<I><FONT color=#000080>// use logarithmic growth stategy, in case</FONT></I>
|
|
|
|
|
<I><FONT color=#000080>// in_avail (above) returned zero:</FONT></I>
|
|
|
|
|
<B>if</B>(s.capacity() == s.size())
|
|
|
|
|
s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
|
|
|
|
|
s.append(<FONT color=#0000a0>1</FONT>, c);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
<B>int</B> main(<B>int</B> argc, <B>char</B>** argv)
|
|
|
|
|
{
|
|
|
|
|
std::string s;
|
|
|
|
|
<B>int</B> i;
|
|
|
|
|
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i < argc; ++i)
|
|
|
|
|
{
|
|
|
|
|
std::cout << <FONT color=#0000ff>"Findings URL's in "</FONT> << argv[i] << <FONT color=#0000ff>":"</FONT> << std::endl;
|
|
|
|
|
s.erase();
|
|
|
|
|
std::ifstream is(argv[i]);
|
|
|
|
|
load_file(s, is);
|
|
|
|
|
boost::regex_token_iterator<std::string::const_iterator>
|
|
|
|
|
i(s.begin(), s.end(), e, <FONT color=#0000a0>1</FONT>);
|
|
|
|
|
boost::regex_token_iterator<std::string::const_iterator> j;
|
|
|
|
|
<B>while</B>(i != j)
|
|
|
|
|
{
|
|
|
|
|
std::cout << *i++ << std::endl;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
<I><FONT color=#000080>//</FONT></I>
|
|
|
|
|
<I><FONT color=#000080>// alternative method:</FONT></I>
|
|
|
|
|
<I><FONT color=#000080>// test the array-literal constructor, and split out the whole</FONT></I>
|
|
|
|
|
<I><FONT color=#000080>// match as well as $1....</FONT></I>
|
|
|
|
|
<I><FONT color=#000080>//</FONT></I>
|
|
|
|
|
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i < argc; ++i)
|
|
|
|
|
{
|
|
|
|
|
std::cout << <FONT color=#0000ff>"Findings URL's in "</FONT> << argv[i] << <FONT color=#0000ff>":"</FONT> << std::endl;
|
|
|
|
|
s.erase();
|
|
|
|
|
std::ifstream is(argv[i]);
|
|
|
|
|
load_file(s, is);
|
|
|
|
|
<B>const</B> <B>int</B> subs[] = {<FONT color=#0000a0>1</FONT>, <FONT color=#0000a0>0</FONT>,};
|
|
|
|
|
boost::regex_token_iterator<std::string::const_iterator>
|
|
|
|
|
i(s.begin(), s.end(), e, subs);
|
|
|
|
|
boost::regex_token_iterator<std::string::const_iterator> j;
|
|
|
|
|
<B>while</B>(i != j)
|
|
|
|
|
{
|
|
|
|
|
std::cout << *i++ << std::endl;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
<B>return</B> <FONT color=#0000a0>0</FONT>;
|
|
|
|
|
}
|
|
|
|
|
</pre>
|
|
|
|
|
<HR>
|
|
|
|
|
<p>Revised
|
|
|
|
|
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
2003-10-24 10:51:38 +00:00
|
|
|
|
24 Oct 2003
|
|
|
|
|
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
|
|
|
|
<p><i><EFBFBD> Copyright John Maddock 1998-
|
2003-11-03 11:25:30 +00:00
|
|
|
|
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
2003-10-24 10:51:38 +00:00
|
|
|
|
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
|
|
|
|
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
|
|
|
|
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
2003-05-17 11:45:48 +00:00
|
|
|
|
</body>
|
|
|
|
|
</html>
|