Files
boost_regex/doc/Attic/regex_token_iterator.html

291 lines
16 KiB
HTML
Raw Normal View History

2003-05-17 11:45:48 +00:00
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: regex_token_iterator</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td vAlign="top" width="300">
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">regex_token_iterator</H2>
</TD>
<td width="50">
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<H3>Contents</H3>
<dl class="index">
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
Examples</A></dt></dl>
<H3><A name="synopsis"></A>Synopsis</H3>
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
that is to say it represents a new view of an existing iterator sequence, by
enumerating all the occurrences of a regular expression within that sequence,
and presenting one or more character sequence for each match found. Each
position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
object that represents what matched a particular sub-expression within the
regular expression. When class <CODE>regex_token_iterator</CODE> is used to
enumerate a single sub-expression with index -1, then the iterator performs
field splitting: that is to say it enumerates one character sequence for each
2003-05-17 11:45:48 +00:00
section of the character container sequence that does not match the regular
expression specified.</P>
<PRE>
template &lt;class BidirectionalIterator,
class charT = iterator_traits&lt;BidirectionalIterator&gt;::value_type,
class traits = regex_traits&lt;charT&gt;,
class Allocator = allocator&lt;charT&gt; &gt;
class regex_token_iterator
{
public:
typedef <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt; regex_type;
typedef <A href="sub_match.html">sub_match</A>
&lt;BidirectionalIterator&gt; value_type;
2003-05-17 11:45:48 +00:00
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer;
typedef const value_type&amp; reference;
typedef std::forward_iterator_tag iterator_category;
<A href="#c1">regex_token_iterator</A>();
<A href="#c2">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
int submatch = 0, <A href="match_flag_type.html">match_flag_type</A> m = match_default);
<A href="#c3">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
2003-05-17 11:45:48 +00:00
const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);
template &lt;std::size_t N&gt;
<A href="#c4">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
2003-05-17 11:45:48 +00:00
const int (&amp;submatches)[N], match_flag_type m = match_default);
<A href="#c5">regex_token_iterator</A>(const regex_token_iterator&amp;);
regex_token_iterator&amp; <A href="#o1">operator</A>=(const regex_token_iterator&amp;);
bool <A href="#o2">operator</A>==(const regex_token_iterator&amp;)const;
bool <A href="#o3">operator</A>!=(const regex_token_iterator&amp;)const;
const value_type&amp; <A href="#o4">operator</A>*()const;
const value_type* <A href="#o5">operator</A>-&gt;()const;
regex_token_iterator&amp; <A href="#o6">operator</A>++();
regex_token_iterator <A href="#o7">operator</A>++(int);
2003-05-17 11:45:48 +00:00
};
typedef regex_token_iterator&lt;const char*&gt; cregex_token_iterator;
typedef regex_token_iterator&lt;std::string::const_iterator&gt; sregex_token_iterator;
#ifndef BOOST_NO_WREGEX
typedef regex_token_iterator&lt;const wchar_t*&gt; wcregex_token_iterator;
typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_iterator;
#endif
2003-05-17 11:45:48 +00:00
</PRE>
<H3><A name="description"></A>Description</H3>
<PRE><A name=c1></A>regex_token_iterator();</PRE>
2003-05-17 11:45:48 +00:00
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
2003-05-17 11:45:48 +00:00
int submatch = 0, match_flag_type m = match_default);</PRE>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
string for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; The
string enumerated is the&nbsp;sub-expression <EM>submatch </EM>for each match
found; if <EM>submatch </EM>is -1, then enumerates all the text sequences that
did not match the expression <EM>re </EM>(that is to performs field splitting).</P>
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
or if the program runs out of stack space while matching the expression (if
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
configured</A> in non-recursive mode).</P>
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
2003-05-17 11:45:48 +00:00
const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);</PRE>
<P><B> Preconditions:</B> <CODE>submatches.size() &amp;&amp; !re.empty()</CODE>.</P>
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
strings for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; For
each match found one string will be enumerated&nbsp;for each sub-expression
index&nbsp;contained within <EM>submatches </EM>vector; if <EM>submatches[0] </EM>
is -1, then the first string enumerated for each match will be all of the text
from end of the last match to the start of the current match, in addition there
will be one extra string enumerated when no more matches can be found: from the
end of the last match found, to the end of the underlying sequence.</P>
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
or if the program runs out of stack space while matching the expression (if
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
configured</A> in non-recursive mode).</P>
<PRE><A name=c4></A>template &lt;std::size_t N&gt;
2003-05-17 11:45:48 +00:00
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const int (&amp;submatches)[R], match_flag_type m = match_default);</PRE>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
enumerate&nbsp;<EM>R</EM> strings for each regular expression match of the
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
flags <EM>m</EM>.&nbsp; For each match found one string will be
enumerated&nbsp;for each sub-expression index&nbsp;contained within the <EM>submatches
</EM>array; if <EM>submatches[0] </EM>is -1, then the first string enumerated
for each match will be all of the text from end of the last match to the start
of the current match, in addition there will be one extra string enumerated
when no more matches can be found: from the end of the last match found, to the
end of the underlying sequence.</P>
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
or if the program runs out of stack space while matching the expression (if
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
configured</A> in non-recursive mode).</P>
<PRE><A name=c5></A>regex_token_iterator(const regex_token_iterator&amp; that);</PRE>
2003-05-17 11:45:48 +00:00
<P><B> Effects: </B>constructs a copy of <CODE>that</CODE>.</P>
<P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
<PRE><A name=o1></A>regex_token_iterator&amp; operator=(const regex_token_iterator&amp; that);</PRE>
2003-05-17 11:45:48 +00:00
<P><B> Effects: </B>sets <CODE>*this</CODE> to be equal to&nbsp;<CODE>that</CODE>.</P>
<P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
<PRE><A name=o2></A>bool operator==(const regex_token_iterator&amp;)const;</PRE>
2003-05-17 11:45:48 +00:00
<P>
<B>Effects: </B>returns true if *this is the same position as that.</P>
<PRE><A name=o3></A>bool operator!=(const regex_token_iterator&amp;)const;</PRE>
2003-05-17 11:45:48 +00:00
<P>
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
<PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE>
2003-05-17 11:45:48 +00:00
<P>
<B>Effects: </B>returns the current character sequence being enumerated.</P>
<PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE>
2003-05-17 11:45:48 +00:00
<P>
<B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P>
<PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE>
2003-05-17 11:45:48 +00:00
<P>
<B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
2003-05-17 11:45:48 +00:00
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
or if the program runs out of stack space while matching the expression (if
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
configured</A> in non-recursive mode).</P>
<B>
<P>
Returns:</B><CODE> *this</CODE>.</P><PRE><A name=o7></A>regex_token_iterator&amp; operator++(int);</PRE>
2003-05-17 11:45:48 +00:00
<P><B> Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>,
then calls <CODE>++(*this)</CODE>.</P>
<P><B> Returns:</B> <CODE>result</CODE>.<A name="examples"></A>
<H3>Examples</H3>
2003-11-28 15:17:45 +00:00
<P>The following <A href="../example/snippets/regex_token_iterator_eg_1.cpp">example</A>
2003-05-17 11:45:48 +00:00
takes a string and splits it into a series of tokens:</P>
<pre>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>
<B>using</B> <B>namespace</B> std;
<B>int</B> main(<B>int</B> argc)
{
string s;
<B>do</B>{
<B>if</B>(argc == <FONT color=#0000a0>1</FONT>)
{
cout &lt;&lt; <FONT color=#0000ff>"Enter text to split (or \"quit\" to exit): "</FONT>;
getline(cin, s);
<B>if</B>(s == <FONT color=#0000ff>"quit"</FONT>) <B>break</B>;
}
<B>else</B>
s = <FONT color=#0000ff>"This is a string of tokens"</FONT>;
boost::regex re(<FONT color=#0000ff>"\\s+"</FONT>);
boost::sregex_token_iterator i(s.begin(), s.end(), re, -<FONT color=#0000a0>1</FONT>);
boost::sregex_token_iterator j;
2003-05-17 11:45:48 +00:00
<B>unsigned</B> count = <FONT color=#0000a0>0</FONT>;
<B>while</B>(i != j)
{
cout &lt;&lt; *i++ &lt;&lt; endl;
count++;
}
cout &lt;&lt; <FONT color=#0000ff>"There were "</FONT> &lt;&lt; count &lt;&lt; <FONT color=#0000ff>" tokens found."</FONT> &lt;&lt; endl;
}<B>while</B>(argc == <FONT color=#0000a0>1</FONT>);
<B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
2003-12-27 02:37:02 +00:00
<P>The following <A href="../example/snippets/regex_token_iterator_eg_2.cpp">example</A>
2003-05-17 11:45:48 +00:00
takes a html file and outputs a list of all the linked files:</P>
<pre>
<FONT color=#008040>#include &lt;fstream&gt;</FONT>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;iterator&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>
boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)\""</FONT>,
boost::regex::normal | boost::regbase::icase);
<B>void</B> load_file(std::string&amp; s, std::istream&amp; is)
{
s.erase();
<I><FONT color=#000080>//</FONT></I>
<I><FONT color=#000080>// attempt to grow string buffer to match file size,</FONT></I>
<I><FONT color=#000080>// this doesn't always work...</FONT></I>
s.reserve(is.rdbuf()-&gt;in_avail());
<B>char</B> c;
<B>while</B>(is.get(c))
{
<I><FONT color=#000080>// use logarithmic growth stategy, in case</FONT></I>
<I><FONT color=#000080>// in_avail (above) returned zero:</FONT></I>
<B>if</B>(s.capacity() == s.size())
s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
s.append(<FONT color=#0000a0>1</FONT>, c);
}
}
<B>int</B> main(<B>int</B> argc, <B>char</B>** argv)
{
std::string s;
<B>int</B> i;
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
{
std::cout &lt;&lt; <FONT color=#0000ff>"Findings URL's in "</FONT> &lt;&lt; argv[i] &lt;&lt; <FONT color=#0000ff>":"</FONT> &lt;&lt; std::endl;
s.erase();
std::ifstream is(argv[i]);
load_file(s, is);
boost::sregex_token_iterator i(s.begin(), s.end(), e, <FONT color=#0000a0>1</FONT>);
boost::sregex_token_iterator j;
2003-05-17 11:45:48 +00:00
<B>while</B>(i != j)
{
std::cout &lt;&lt; *i++ &lt;&lt; std::endl;
}
}
<I><FONT color=#000080>//</FONT></I>
<I><FONT color=#000080>// alternative method:</FONT></I>
<I><FONT color=#000080>// test the array-literal constructor, and split out the whole</FONT></I>
<I><FONT color=#000080>// match as well as $1....</FONT></I>
<I><FONT color=#000080>//</FONT></I>
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
{
std::cout &lt;&lt; <FONT color=#0000ff>"Findings URL's in "</FONT> &lt;&lt; argv[i] &lt;&lt; <FONT color=#0000ff>":"</FONT> &lt;&lt; std::endl;
s.erase();
std::ifstream is(argv[i]);
load_file(s, is);
<B>const</B> <B>int</B> subs[] = {<FONT color=#0000a0>1</FONT>, <FONT color=#0000a0>0</FONT>,};
boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
boost::sregex_token_iterator j;
2003-05-17 11:45:48 +00:00
<B>while</B>(i != j)
{
std::cout &lt;&lt; *i++ &lt;&lt; std::endl;
}
}
<B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
2003-10-24 10:51:38 +00:00
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
2003-10-24 10:51:38 +00:00
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
2003-05-17 11:45:48 +00:00
</body>
</html>