mirror of
https://github.com/boostorg/regex.git
synced 2025-07-04 16:16:32 +02:00
382 lines
23 KiB
HTML
382 lines
23 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||
<html>
|
||
<head>
|
||
<title>Boost.Regex: regex_token_iterator</title>
|
||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||
<body>
|
||
<P>
|
||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||
<TR>
|
||
<td vAlign="top" width="300">
|
||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||
</td>
|
||
<TD width="353">
|
||
<H1 align="center">Boost.Regex</H1>
|
||
<H2 align="center">regex_token_iterator</H2>
|
||
</TD>
|
||
<td width="50">
|
||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||
</td>
|
||
</TR>
|
||
</TABLE>
|
||
</P>
|
||
<HR>
|
||
<H3>Contents</H3>
|
||
<dl class="index">
|
||
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
|
||
Examples</A></dt></dl>
|
||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
|
||
that is to say it represents a new view of an existing iterator sequence, by
|
||
enumerating all the occurrences of a regular expression within that sequence,
|
||
and presenting one or more character sequence for each match found. Each
|
||
position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
|
||
object that represents what matched a particular sub-expression within the
|
||
regular expression. When class <CODE>regex_token_iterator</CODE> is used to
|
||
enumerate a single sub-expression with index -1, then the iterator performs
|
||
field splitting: that is to say it enumerates one character sequence for each
|
||
section of the character container sequence that does not match the regular
|
||
expression specified.</P>
|
||
<PRE>
|
||
template <class BidirectionalIterator,
|
||
class charT = iterator_traits<BidirectionalIterator>::value_type,
|
||
class traits = regex_traits<charT> >
|
||
class regex_token_iterator
|
||
{
|
||
public:
|
||
typedef <A href="basic_regex.html">basic_regex</A><charT, traits> regex_type;
|
||
typedef <A href="sub_match.html">sub_match</A><BidirectionalIterator> value_type;
|
||
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
||
typedef const value_type* pointer;
|
||
typedef const value_type& reference;
|
||
typedef std::forward_iterator_tag iterator_category;
|
||
|
||
<A href="#c1">regex_token_iterator</A>();
|
||
<A href="#c2">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||
int submatch = 0, <A href="match_flag_type.html">match_flag_type</A> m = match_default);
|
||
<A href="#c3">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||
const std::vector<int>& submatches, match_flag_type m = match_default);
|
||
template <std::size_t N>
|
||
<A href="#c4">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||
const int (&submatches)[N], match_flag_type m = match_default);
|
||
<A href="#c5">regex_token_iterator</A>(const regex_token_iterator&);
|
||
regex_token_iterator& <A href="#o1">operator</A>=(const regex_token_iterator&);
|
||
bool <A href="#o2">operator</A>==(const regex_token_iterator&)const;
|
||
bool <A href="#o3">operator</A>!=(const regex_token_iterator&)const;
|
||
const value_type& <A href="#o4">operator</A>*()const;
|
||
const value_type* <A href="#o5">operator</A>->()const;
|
||
regex_token_iterator& <A href="#o6">operator</A>++();
|
||
regex_token_iterator <A href="#o7">operator</A>++(int);
|
||
};
|
||
|
||
typedef regex_token_iterator<const char*> cregex_token_iterator;
|
||
typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
|
||
#ifndef BOOST_NO_WREGEX
|
||
typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
|
||
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
||
#endif
|
||
|
||
template <class charT, class traits>
|
||
regex_token_iterator<const charT*, charT, traits>
|
||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const charT* p,
|
||
const basic_regex<charT, traits>& e,
|
||
int submatch = 0,
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits, class ST, class SA>
|
||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const std::basic_string<charT, ST, SA>& p,
|
||
const basic_regex<charT, traits>& e,
|
||
int submatch = 0,
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits, std::size_t N>
|
||
regex_token_iterator<const charT*, charT, traits>
|
||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const charT* p,
|
||
const basic_regex<charT, traits>& e,
|
||
const int (&submatch)[N],
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits, class ST, class SA, std::size_t N>
|
||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const std::basic_string<charT, ST, SA>& p,
|
||
const basic_regex<charT, traits>& e,
|
||
const int (&submatch)[N],
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits>
|
||
regex_token_iterator<const charT*, charT, traits>
|
||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const charT* p,
|
||
const basic_regex<charT, traits>& e,
|
||
const std::vector<int>& submatch,
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits, class ST, class SA>
|
||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const std::basic_string<charT, ST, SA>& p,
|
||
const basic_regex<charT, traits>& e,
|
||
const std::vector<int>& submatch,
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
</PRE>
|
||
<H3><A name="description"></A>Description</H3>
|
||
<PRE><A name=c1></A>regex_token_iterator();</PRE>
|
||
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
||
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||
int submatch = 0, match_flag_type m = match_default);</PRE>
|
||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||
for the lifetime of the iterator constructed from it.</P>
|
||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
||
string for each regular expression match of the expression <EM>re</EM> found
|
||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
||
string enumerated is the sub-expression <EM>submatch </EM>for each match
|
||
found; if <EM>submatch </EM>is -1, then enumerates all the text sequences that
|
||
did not match the expression <EM>re </EM>(that is to performs field splitting).</P>
|
||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||
or if the program runs out of stack space while matching the expression (if
|
||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||
configured</A> in non-recursive mode).</P>
|
||
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
||
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.
|
||
Object re shall exist for the lifetime of the iterator constructed from it.</P>
|
||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
||
strings for each regular expression match of the expression <EM>re</EM> found
|
||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
||
each match found one string will be enumerated for each sub-expression
|
||
index contained within <EM>submatches </EM>vector; if <EM>submatches[0] </EM>
|
||
is -1, then the first string enumerated for each match will be all of the text
|
||
from end of the last match to the start of the current match, in addition there
|
||
will be one extra string enumerated when no more matches can be found: from the
|
||
end of the last match found, to the end of the underlying sequence.</P>
|
||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||
or if the program runs out of stack space while matching the expression (if
|
||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||
configured</A> in non-recursive mode).</P>
|
||
<PRE><A name=c4></A>template <std::size_t N>
|
||
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||
for the lifetime of the iterator constructed from it.</P>
|
||
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
||
enumerate <EM>R</EM> strings for each regular expression match of the
|
||
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
||
flags <EM>m</EM>. For each match found one string will be
|
||
enumerated for each sub-expression index contained within the <EM>submatches
|
||
</EM>array; if <EM>submatches[0] </EM>is -1, then the first string enumerated
|
||
for each match will be all of the text from end of the last match to the start
|
||
of the current match, in addition there will be one extra string enumerated
|
||
when no more matches can be found: from the end of the last match found, to the
|
||
end of the underlying sequence.</P>
|
||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||
or if the program runs out of stack space while matching the expression (if
|
||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||
configured</A> in non-recursive mode).</P>
|
||
<PRE><A name=c5></A>regex_token_iterator(const regex_token_iterator& that);</PRE>
|
||
<P><B> Effects: </B>constructs a copy of <CODE>that</CODE>.</P>
|
||
<P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
|
||
<PRE><A name=o1></A>regex_token_iterator& operator=(const regex_token_iterator& that);</PRE>
|
||
<P><B> Effects: </B>sets <CODE>*this</CODE> to be equal to <CODE>that</CODE>.</P>
|
||
<P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
|
||
<PRE><A name=o2></A>bool operator==(const regex_token_iterator&)const;</PRE>
|
||
<P>
|
||
<B>Effects: </B>returns true if *this is the same position as that.</P>
|
||
<PRE><A name=o3></A>bool operator!=(const regex_token_iterator&)const;</PRE>
|
||
<P>
|
||
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
|
||
<PRE><A name=o4></A>const value_type& operator*()const;</PRE>
|
||
<P>
|
||
<B>Effects: </B>returns the current character sequence being enumerated.</P>
|
||
<PRE><A name=o5></A>const value_type* operator->()const;</PRE>
|
||
<P>
|
||
<B>Effects: </B>returns <CODE>&(*this)</CODE>.</P>
|
||
<PRE><A name=o6></A>regex_token_iterator& operator++();</PRE>
|
||
<P>
|
||
<B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
|
||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||
or if the program runs out of stack space while matching the expression (if
|
||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||
configured</A> in non-recursive mode).</P>
|
||
<B>
|
||
<P>
|
||
Returns:</B><CODE> *this</CODE>.</P><PRE><A name=o7></A>regex_token_iterator& operator++(int);</PRE>
|
||
<P><B> Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>,
|
||
then calls <CODE>++(*this)</CODE>.</P>
|
||
<P><B> Returns:</B> <CODE>result</CODE>.<A name="examples"></A>
|
||
<PRE><A name=make_regex_token_iterator></A>template <class charT, class traits>
|
||
regex_token_iterator<const charT*, charT, traits>
|
||
make_regex_token_iterator(const charT* p,
|
||
const basic_regex<charT, traits>& e,
|
||
int submatch = 0,
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits, class ST, class SA>
|
||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||
make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p,
|
||
const basic_regex<charT, traits>& e,
|
||
int submatch = 0,
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits, std::size_t N>
|
||
regex_token_iterator<const charT*, charT, traits>
|
||
make_regex_token_iterator(const charT* p,
|
||
const basic_regex<charT, traits>& e,
|
||
const int (&submatch)[N],
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits, class ST, class SA, std::size_t N>
|
||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||
make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p,
|
||
const basic_regex<charT, traits>& e,
|
||
const int (&submatch)[N],
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits>
|
||
regex_token_iterator<const charT*, charT, traits>
|
||
make_regex_token_iterator(const charT* p,
|
||
const basic_regex<charT, traits>& e,
|
||
const std::vector<int>& submatch,
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
|
||
template <class charT, class traits, class ST, class SA>
|
||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||
make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p,
|
||
const basic_regex<charT, traits>& e,
|
||
const std::vector<int>& submatch,
|
||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||
</PRE>
|
||
<P>Effects: returns a <A href="#synopsis">regex_token_iterator</A> that enumerates
|
||
one <A href="sub_match.html">sub_match</A> for each value in <EM>submatch</EM> for
|
||
each occurrence of regular expression <EM>e</EM> in string <EM>p</EM>, matched
|
||
using <A href="match_flag_type.html">match_flags</A> <EM>m</EM>.</P>
|
||
<P></P>
|
||
<H3>Examples</H3>
|
||
<P>The following <A href="../example/snippets/regex_token_iterator_eg_1.cpp">example</A>
|
||
takes a string and splits it into a series of tokens:</P>
|
||
<pre>
|
||
<FONT color=#008040>#include <iostream></FONT>
|
||
<FONT color=#008040>#include <boost/regex.hpp></FONT>
|
||
|
||
<B>using</B> <B>namespace</B> std;
|
||
|
||
<B>int</B> main(<B>int</B> argc)
|
||
{
|
||
string s;
|
||
<B>do</B>{
|
||
<B>if</B>(argc == <FONT color=#0000a0>1</FONT>)
|
||
{
|
||
cout << <FONT color=#0000ff>"Enter text to split (or \"quit\" to exit): "</FONT>;
|
||
getline(cin, s);
|
||
<B>if</B>(s == <FONT color=#0000ff>"quit"</FONT>) <B>break</B>;
|
||
}
|
||
<B>else</B>
|
||
s = <FONT color=#0000ff>"This is a string of tokens"</FONT>;
|
||
|
||
boost::regex re(<FONT color=#0000ff>"\\s+"</FONT>);
|
||
boost::sregex_token_iterator i(s.begin(), s.end(), re, -<FONT color=#0000a0>1</FONT>);
|
||
boost::sregex_token_iterator j;
|
||
|
||
<B>unsigned</B> count = <FONT color=#0000a0>0</FONT>;
|
||
<B>while</B>(i != j)
|
||
{
|
||
cout << *i++ << endl;
|
||
count++;
|
||
}
|
||
cout << <FONT color=#0000ff>"There were "</FONT> << count << <FONT color=#0000ff>" tokens found."</FONT> << endl;
|
||
|
||
}<B>while</B>(argc == <FONT color=#0000a0>1</FONT>);
|
||
<B>return</B> <FONT color=#0000a0>0</FONT>;
|
||
}
|
||
|
||
</pre>
|
||
<P>The following <A href="../example/snippets/regex_token_iterator_eg_2.cpp">example</A>
|
||
takes a html file and outputs a list of all the linked files:</P>
|
||
<pre>
|
||
<FONT color=#008040>#include <fstream></FONT>
|
||
<FONT color=#008040>#include <iostream></FONT>
|
||
<FONT color=#008040>#include <iterator></FONT>
|
||
<FONT color=#008040>#include <boost/regex.hpp></FONT>
|
||
|
||
boost::regex e(<FONT color=#0000ff>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\""</FONT>,
|
||
boost::regex::normal | boost::regbase::icase);
|
||
|
||
<B>void</B> load_file(std::string& s, std::istream& is)
|
||
{
|
||
s.erase();
|
||
<I><FONT color=#000080>//</FONT></I>
|
||
<I><FONT color=#000080>// attempt to grow string buffer to match file size,</FONT></I>
|
||
<I><FONT color=#000080>// this doesn't always work...</FONT></I>
|
||
s.reserve(is.rdbuf()->in_avail());
|
||
<B>char</B> c;
|
||
<B>while</B>(is.get(c))
|
||
{
|
||
<I><FONT color=#000080>// use logarithmic growth stategy, in case</FONT></I>
|
||
<I><FONT color=#000080>// in_avail (above) returned zero:</FONT></I>
|
||
<B>if</B>(s.capacity() == s.size())
|
||
s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
|
||
s.append(<FONT color=#0000a0>1</FONT>, c);
|
||
}
|
||
}
|
||
|
||
<B>int</B> main(<B>int</B> argc, <B>char</B>** argv)
|
||
{
|
||
std::string s;
|
||
<B>int</B> i;
|
||
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i < argc; ++i)
|
||
{
|
||
std::cout << <FONT color=#0000ff>"Findings URL's in "</FONT> << argv[i] << <FONT color=#0000ff>":"</FONT> << std::endl;
|
||
s.erase();
|
||
std::ifstream is(argv[i]);
|
||
load_file(s, is);
|
||
boost::sregex_token_iterator i(s.begin(), s.end(), e, <FONT color=#0000a0>1</FONT>);
|
||
boost::sregex_token_iterator j;
|
||
<B>while</B>(i != j)
|
||
{
|
||
std::cout << *i++ << std::endl;
|
||
}
|
||
}
|
||
<I><FONT color=#000080>//</FONT></I>
|
||
<I><FONT color=#000080>// alternative method:</FONT></I>
|
||
<I><FONT color=#000080>// test the array-literal constructor, and split out the whole</FONT></I>
|
||
<I><FONT color=#000080>// match as well as $1....</FONT></I>
|
||
<I><FONT color=#000080>//</FONT></I>
|
||
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i < argc; ++i)
|
||
{
|
||
std::cout << <FONT color=#0000ff>"Findings URL's in "</FONT> << argv[i] << <FONT color=#0000ff>":"</FONT> << std::endl;
|
||
s.erase();
|
||
std::ifstream is(argv[i]);
|
||
load_file(s, is);
|
||
<B>const</B> <B>int</B> subs[] = {<FONT color=#0000a0>1</FONT>, <FONT color=#0000a0>0</FONT>,};
|
||
boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
|
||
boost::sregex_token_iterator j;
|
||
<B>while</B>(i != j)
|
||
{
|
||
std::cout << *i++ << std::endl;
|
||
}
|
||
}
|
||
|
||
<B>return</B> <FONT color=#0000a0>0</FONT>;
|
||
}
|
||
</pre>
|
||
<HR>
|
||
<p>Revised
|
||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||
26 June 2004
|
||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||
</body>
|
||
</html>
|
||
|