Added regex iterator examples and docs, recovering from cvs repository crash...

[SVN r18361]
This commit is contained in:
John Maddock
2003-05-08 11:05:13 +00:00
parent 7321d18f08
commit 1c1ed068f4
19 changed files with 2173 additions and 21 deletions

View File

@ -9,6 +9,8 @@ lib boost_regex : ../src/$(SOURCES).cpp
<sysinclude>$(BOOST_ROOT)
<define>BOOST_REGEX_NO_LIB=1
<define>BOOST_REGEX_STATIC_LINK=1
<msvc><release><runtime-link-dynamic><threading>multi
<msvc><debug><runtime-link-dynamic><threading>multi
:
debug release
;
@ -23,12 +25,77 @@ dll boost_regex : ../src/$(SOURCES).cpp
;
rule boost-regex-stage-tag ( toolset variant : properties * )
{
local lib-thread-opt = s ;
if <threading>multi in $(properties)
{
lib-thread-opt = m ;
}
local lib-rt-opt = s ;
if <runtime-link>dynamic in $(properties)
{
lib-rt-opt = d ;
}
local lib-link-opt = s ;
if <target-type>DLL in $(properties)
{
lib-link-opt = i ;
}
local lib-debug-opt = "" ;
if [ MATCH .*(debug).* : $(variant) ]
{
lib-debug-opt = d ;
}
local lib-toolset = $(toolset) ;
local warning-var = regex.$(toolset)-warning-issued ;
local warning ;
switch $(toolset)
{
case borland :
if ! ( BORLAND_VERSION) in 4 5 6 )
{
BORLAND_VERSION = 5 ; # chose default version
warning = "BORLAND_VERSION not set to 4, 5, or 6
staged Boost.Regex library will be named appropriately for version" $(BORLAND_VERSION) ;
}
lib-toolset = bcb$(BORLAND_VERSION) ;
case msvc :
warning = "msvc toolset builds Boost.Regex library for vc6; use vc7 or vc7.1 toolsets for other versions" ;
lib-toolset = vc6 ;
case msvc-stlport :
warning = "msvc-stlport toolset only builds Boost.Regex library for use with vc6" ;
lib-toolset = vc6-stlport ;
if <runtime-build>debug in $(properties)
{
lib-debug-opt = dd ;
}
}
if $(warning) && ! $($(warning-var))
{
ECHO Warning: $(warning) ;
$(warning-var) = issued ;
}
return $(properties) <tag><$(variant)>_$(lib-toolset)_$(lib-thread-opt)$(lib-rt-opt)$(lib-link-opt)$(lib-debug-opt) ;
}
stage bin-stage : <lib>boost_regex <dll>boost_regex
:
<tag><debug>"_debug"
boost-regex-stage-tag
:
debug release
;

View File

@ -0,0 +1,163 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: Configuration and setup</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" type="text/css" href="../../../boost.css">
</head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td valign="top" width="300">
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">Configuration and setup</H2>
</TD>
<td width="50">
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<h2>Contents</h2>
<dl class="index">
<dt><a href="#compiler">Compiler setup</a></dt>
<dt><a href="#locale">Locale and traits class selection</a></dt>
<dt><a href="#linkage">Linkage Options</a></dt>
<dt><a href="#algorithm">Algorithm Selection</a></dt>
<dt><a href="#tuning">Algorithm Tuning</a></dt>
</dl>
<H3><A name="compiler"></A>Compiler setup.</H3>
<P>You shouldn't need to do anything special to configure boost.regex for use with
your compiler - the <A href="../../config/index.html">boost.config</A> subsystem
should already take care of it, if you do have problems (or you are using a
particularly obscure compiler or platform) then <A href="../../config/index.html">boost.config</A>&nbsp;has
a&nbsp;<A href="../../config/config.htm#config_script">configure</A> script.</P>
<H3><A name="locale"></A>Locale and traits class selection.</H3>
<P>The following macros (see <A href="../../../boost/regex/user.hpp">user.hpp</A>)
control how boost.regex interacts with the users locale:</P>
<P>
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD width="265">BOOST_REGEX_USE_C_LOCALE</TD>
<TD>Forces boost.regex to use the global C locale in it's traits class support:
this is the default behaviour on non-windows platforms, but MS Windows
platforms normally use the Win32 API for locale support.</TD>
</TR>
<TR>
<TD width="265">BOOST_REGEX_USE_CPP_LOCALE</TD>
<TD>Forces boost.regex to use std::locale in it's default traits class, regular
expressions can then be imbued with an instance&nbsp;specific locale.</TD>
</TR>
<TR>
<TD width="265">BOOST_REGEX_NO_W32</TD>
<TD>Tells boost.regex not to use any Win32 API's even when available (implies
BOOST_REGEX_USE_C_LOCALE unless BOOST_REGEX_USE_CPP_LOCALE is set).</TD>
</TR>
</TABLE>
</P>
<H3><A name="linkage"></A>Linkage Options</H3>
<P>
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD>BOOST_REGEX_STATIC_LINK</TD>
<TD>For Microsoft and Borland C++ builds, this tells boost.regex that it is going
to be linked to a static library even when using a dynamic C runtime.</TD>
</TR>
<TR>
<TD>BOOST_REGEX_NO_LIB</TD>
<TD>For Microsoft and Borland C++ builds, this tells boost.regex that it should
not automatically select the library to link to.</TD>
</TR>
</TABLE>
</P>
<H3><A name="algorithm"></A>Algorithm Selection</H3>
<P>
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD width="253">BOOST_REGEX_V3</TD>
<TD>Tells boost.regex to use the boost-1.30.0 matching algorithm, define only if
you need maximum compatibility with previous behaviour.</TD>
</TR>
<TR>
<TD width="253">BOOST_REGEX_RECURSIVE</TD>
<TD>Tells boost.regex to use a stack-recursive matching algorithm.&nbsp; This is
generally the fastest option (although there is very little in it), but can
cause stack overflow in extreme cases, on Win32 this can be handled safely, but
this is not the case on other platforms.</TD>
</TR>
<TR>
<TD width="253">BOOST_REGEX_NON_RECURSIVE</TD>
<TD>Tells boost.regex to use a non-stack recursive matching algorithm, this can be
slightly slower than the alternative, but is always safe no matter how
pathological the regular expression.&nbsp; This is the default on non-Win32
platforms.</TD>
</TR>
</TABLE>
</P>
<H3><A name="tuning"></A>Algorithm Tuning</H3>
<P>The following option applies only if BOOST_REGEX_RECURSIVE is set.</P>
<P>
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD>BOOST_REGEX_HAS_MS_STACK_GUARD</TD>
<TD>Tells boost.regex that Microsoft style __try - __except blocks are supported,
and can be used to safely trap stack overflow.</TD>
</TR>
</TABLE>
</P>
<P>The following options apply only if BOOST_REGEX_NON_RECURSIVE is set.</P>
<P>
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD>BOOST_REGEX_BLOCKSIZE</TD>
<TD>In non-recursive mode, boost.regex uses largish blocks of memory to act as a
stack for the state machine, the larger the block size then the fewer
allocations that will take place.&nbsp; This defaults to 4096 bytes, which is
large enough to match the vast majority of regular expressions&nbsp;without
further allocations, however, you can choose smaller or larger values depending
upon your platforms characteristics.</TD>
</TR>
<TR>
<TD>BOOST_REGEX_MAX_BLOCKS</TD>
<TD>Tells boost.regex how many blocks of size BOOST_REGEX_BLOCKSIZE it is
permitted to use.&nbsp; If this value is exceeded then boost.regex will stop
trying to find a match and throw a std::runtime_error.&nbsp; Defaults to 1024,
don't forget to tweek this value if you alter BOOST_REGEX_BLOCKSIZE by much.</TD>
</TR>
<TR>
<TD>BOOST_REGEX_MAX_CACHE_BLOCKS</TD>
<TD>Tells boost.regex how many memory blocks to store in it's internal cache -
memory blocks are taken from this cache rather than by calling ::operator
new.&nbsp; Generally speeking this can be an order of magnitude faster than
calling ::opertator new each time a memory block is required, but has the
downside that boost.regex can end up caching a large chunk of memory (by
default up to 16 blocks each of BOOST_REGEX_BLOCKSIZE size).&nbsp; If memory is
tight then try defining this to 0 (disables all caching), or if that is too
slow, then a value of 1 or 2, may be sufficient.&nbsp; On the other hand, on
large multi-processor, multi-threaded systems, you may find that a higher value
is in order.</TD>
</TR>
</TABLE>
</P>
<P>
<HR>
<P></P>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
11 April 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
</p>
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a>&nbsp;1998-<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></I></P>
<P align="left"><I>Permission to use, copy, modify, distribute and sell this software
and its documentation for any purpose is hereby granted without fee, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting documentation.
Dr John Maddock makes no representations about the suitability of this software
for any purpose. It is provided "as is" without express or implied warranty.</I></P>
</body>
</html>

View File

@ -0,0 +1,370 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: regex_iterator</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td vAlign="top" width="300">
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">regex_iterator</H2>
</TD>
<td width="50">
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<H3>Contents</H3>
<dl class="index">
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
Examples</A></dt></dl>
<H3><A name="synopsis"></A>Synopsis</H3>
<P>The iterator type regex_iterator will enumerate all of the regular expression
matches found in some sequence: dereferencing a regex_iterator yields a
reference to a&nbsp;<A href="match_results.html">match_results</A> object.</P>
<PRE>
template &lt;class BidirectionalIterator,
class charT = iterator_traits&lt;BidirectionalIterator&gt;::value_type,
class traits = regex_traits&lt;charT&gt;,
class Allocator = allocator&lt;charT&gt; &gt;
class regex_iterator
{
public:
typedef basic_regex&lt;charT, traits, Allocator&gt; regex_type;
typedef match_results&lt;BidirectionalIterator&gt; value_type;
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer;
typedef const value_type&amp; reference;
typedef std::forward_iterator_tag iterator_category;
regex_iterator();
regex_iterator(BidirectionalIterator a, BidirectionalIterator b,
const regex_type&amp; re,
match_flag_type m = match_default);
regex_iterator(const regex_iterator&amp;);
regex_iterator&amp; operator=(const regex_iterator&amp;);
bool operator==(const regex_iterator&amp;);
bool operator!=(const regex_iterator&amp;);
const value_type&amp; operator*();
const value_type* operator-&gt;();
regex_iterator&amp; operator++();
regex_iterator operator++(int);
};
</PRE>
<H3><A name="description"></A>Description</H3>
<P>A regex_iterator is constructed from a pair of iterators, and enumerates all
occurances of a regular expression within that iterator range.</P>
<PRE>regex_iterator();</PRE>
<B>
<P>
Effects:</B> constructs an end of sequence regex_iterator.</P><PRE>regex_iterator(BidirectionalIterator a, BidirectionalIterator b,
const regex_type&amp; re,
match_flag_type m = match_default);</PRE>
<B>
<P>
Effects: </B>constructs a regex_iterator that will enumerate all occurances
of the expression <EM>re</EM>, within the sequence <EM>[a,b)</EM>, and found
using match flags <EM>m</EM>.&nbsp; The object <EM>re </EM>must exist for the
lifetime of the regex_iterator.</P><PRE>regex_iterator(const regex_iterator&amp; that);</PRE>
<B>
<P>
Effects: </B>constructs a copy of <CODE>that</CODE>.</P><B>
<P>
Postconditions:</B> <CODE>*this == that</CODE>.</P><PRE>regex_iterator&amp; operator=(const regex_iterator&amp;);</PRE>
<B>
<P>
Effects: </B>sets&nbsp;<CODE>*this</CODE> equal to those in <CODE>that</CODE>.</P><B>
<P>
Postconditions:</B> <CODE>*this == that</CODE>.</P><PRE>bool operator==(const regex_iterator&amp; that);</PRE>
<B>
<P>
Effects: </B>returns true if *this is equal to that.</P><PRE>bool operator!=(const regex_iterator&amp;);</PRE>
<B>
<P>
Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
<H5>RE.8.1.1 regex_iterator dereference</H5>
<PRE>const value_type&amp; operator*();</PRE>
<B>
<P>
Effects: d</B>ereferencing a regex_iterator object <EM>it</EM> yields a
const reference to a <A href="match_results.html">match_results</A> object,
whose members are set as follows:</P>
<P>
<TABLE id="Table2" cellSpacing="1" cellPadding="7" width="624" border="1">
<TBODY>
<TR>
<TD vAlign="top" width="50%"><B>
<P>
Element</B>
</P>
</TD>
<TD vAlign="top" width="50%"><B>
<P>
Value</B> </P></TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).size()</P>
</TD>
<TD vAlign="top" width="50%">
<P>re.mark_count()</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).empty()</P>
</TD>
<TD vAlign="top" width="50%">
<P>false</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).prefix().first</P>
</TD>
<TD vAlign="top" width="50%">
<P>The end of the last match found, or the start of the underlying sequence if
this is the first match enumerated</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).prefix().last</P>
</TD>
<TD vAlign="top" width="50%">
<P>(*it)[0].first</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).prefix().matched</P>
</TD>
<TD vAlign="top" width="50%">
<P>(*it).prefix().first != (*it).prefix().second</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).suffix().first</P>
</TD>
<TD vAlign="top" width="50%">
<P>(*it)[0].second</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).suffix().last</P>
</TD>
<TD vAlign="top" width="50%">
<P>The end of the underlying sequence.</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).suffix().matched</P>
</TD>
<TD vAlign="top" width="50%">
<P>(*it).suffix().first != (*it).suffix().second</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[0].first</P>
</TD>
<TD vAlign="top" width="50%">
<P>The start of the sequence of characters that matched the regular expression</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[0].second</P>
</TD>
<TD vAlign="top" width="50%">
<P>The end of the sequence of characters that matched the regular expression</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[0].matched</P>
</TD>
<TD vAlign="top" width="50%"><CODE>
<P>
true</CODE> if a full match was found, and <CODE>false</CODE> if it was a
partial match (found as a result of the <CODE>match_partial</CODE> flag being
set).</P></TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[n].first</P>
</TD>
<TD vAlign="top" width="50%">
<P>For all integers n &lt; (*it).size(), the start of the sequence that matched
sub-expression <I>n</I>. Alternatively, if sub-expression n did not participate
in the match, then <I>last</I>.</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[n].second</P>
</TD>
<TD vAlign="top" width="50%">
<P>For all integers n &lt; (*it).size(), the end of the sequence that matched
sub-expression <I>n</I>. Alternatively, if sub-expression n did not participate
in the match, then <I>last</I>.</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[n].matched</P>
</TD>
<TD vAlign="top" width="50%">
<P>For all integers n &lt; (*it).size(), true if sub-expression <I>n</I> participated
in the match, false otherwise.</P>
<P></P>
</TD>
<TR>
<TD vAlign="top" width="50%">(*it).position(n)</TD>
<TD vAlign="top" width="50%">For all integers n &lt; (*it).size(), then the
distance from the start of the underlying sequence to the start of
sub-expression match <EM>n</EM>.</TD>
</TR>
</TBODY></TD></TR></TABLE></P><PRE>const value_type* operator-&gt;();</PRE>
<B>
<P>
Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P><PRE>regex_iterator&amp; operator++();</PRE>
<P><STRONG>Effects:</STRONG> moves the iterator to the next match in the
underlying sequence, or the end of sequence iterator if none if found.
&nbsp;When the last match found matched a zero length string, then the
regex_iterator will find the next match as follows: if there exists a non-zero
length match that starts at the same location as the last one, then returns it,
otherwise starts looking for the next (possibly zero length) match from one
position to the right of the last match.</P>
<B>
<P>
Returns:</B> <CODE>*this</CODE>.</P><PRE>regex_iterator operator++(int);</PRE>
<B>
<P>
Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>,
then calls <CODE>++(*this)</CODE>.</P><B>
<P>
Returns:</B> <CODE>result</CODE>.</P>
<H3>Examples</H3>
<P>The following <A href="../example/snippets/regex_iterator_example.cpp">example</A>
takes a C++ source file and builds up an index of class names, and the location
of that class in the file.</P>
<pre>
<FONT color=#008040>#include &lt;string&gt;</FONT>
<FONT color=#008040>#include &lt;map&gt;</FONT>
<FONT color=#008040>#include &lt;fstream&gt;</FONT>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>
<B>using</B> <B>namespace</B> std;
<I><FONT color=#000080>// purpose:</FONT></I>
<I><FONT color=#000080>// takes the contents of a file in the form of a string</FONT></I>
<I><FONT color=#000080>// and searches for all the C++ class definitions, storing</FONT></I>
<I><FONT color=#000080>// their locations in a map of strings/int's</FONT></I>
<B>typedef</B> std::map&lt;std::string, std::string::difference_type, std::less&lt;std::string&gt; &gt; map_type;
<B>const</B> <B>char</B>* re =
<I><FONT color=#000080>// possibly leading whitespace: </FONT></I>
<FONT color=#0000ff>"^[[:space:]]*"</FONT>
<I><FONT color=#000080>// possible template declaration:</FONT></I>
<FONT color=#0000ff>"(template[[:space:]]*&lt;[^;:{]+&gt;[[:space:]]*)?"</FONT>
<I><FONT color=#000080>// class or struct:</FONT></I>
<FONT color=#0000ff>"(class|struct)[[:space:]]*"</FONT>
<I><FONT color=#000080>// leading declspec macros etc:</FONT></I>
<FONT color=#0000ff>"("</FONT>
<FONT color=#0000ff>"\\&lt;\\w+\\&gt;"</FONT>
<FONT color=#0000ff>"("</FONT>
<FONT color=#0000ff>"[[:blank:]]*\\([^)]*\\)"</FONT>
<FONT color=#0000ff>")?"</FONT>
<FONT color=#0000ff>"[[:space:]]*"</FONT>
<FONT color=#0000ff>")*"</FONT>
<I><FONT color=#000080>// the class name</FONT></I>
<FONT color=#0000ff>"(\\&lt;\\w*\\&gt;)[[:space:]]*"</FONT>
<I><FONT color=#000080>// template specialisation parameters</FONT></I>
<FONT color=#0000ff>"(&lt;[^;:{]+&gt;)?[[:space:]]*"</FONT>
<I><FONT color=#000080>// terminate in { or :</FONT></I>
<FONT color=#0000ff>"(\\{|:[^;\\{()]*\\{)"</FONT>;
boost::regex expression(re);
map_type class_index;
<B>bool</B> regex_callback(<B>const</B> boost::match_results&lt;std::string::const_iterator&gt;&amp; what)
{
<I><FONT color=#000080>// what[0] contains the whole string</FONT></I>
<I><FONT color=#000080>// what[5] contains the class name.</FONT></I>
<I><FONT color=#000080>// what[6] contains the template specialisation if any.</FONT></I>
<I><FONT color=#000080>// add class name and position to map:</FONT></I>
class_index[what[<FONT color=#0000a0>5</FONT>].str() + what[<FONT color=#0000a0>6</FONT>].str()] = what.position(<FONT color=#0000a0>5</FONT>);
<B>return</B> <B>true</B>;
}
<B>void</B> load_file(std::string&amp; s, std::istream&amp; is)
{
s.erase();
s.reserve(is.rdbuf()-&gt;in_avail());
<B>char</B> c;
<B>while</B>(is.get(c))
{
<B>if</B>(s.capacity() == s.size())
s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
s.append(<FONT color=#0000a0>1</FONT>, c);
}
}
<B>int</B> main(<B>int</B> argc, <B>const</B> <B>char</B>** argv)
{
std::string text;
<B>for</B>(<B>int</B> i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
{
cout &lt;&lt; <FONT color=#0000ff>"Processing file "</FONT> &lt;&lt; argv[i] &lt;&lt; endl;
std::ifstream fs(argv[i]);
load_file(text, fs);
<I><FONT color=#000080>// construct our iterators:</FONT></I>
boost::regex_iterator&lt;std::string::const_iterator&gt; m1(text.begin(), text.end(), expression);
boost::regex_iterator&lt;std::string::const_iterator&gt; m2;
std::for_each(m1, m2, <20>ex_callback);
<I><FONT color=#000080>// copy results:</FONT></I>
cout &lt;&lt; class_index.size() &lt;&lt; <FONT color=#0000ff>" matches found"</FONT> &lt;&lt; endl;
map_type::iterator c, d;
c = class_index.begin();
d = class_index.end();
<B>while</B>(c != d)
{
cout &lt;&lt; <FONT color=#0000ff>"class \""</FONT> &lt;&lt; (*c).first &lt;&lt; <FONT color=#0000ff>"\" found at index: "</FONT> &lt;&lt; (*c).second &lt;&lt; endl;
++c;
}
class_index.erase(class_index.begin(), class_index.end());
}
<B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
11 April 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
</p>
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a>&nbsp;1998-<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></I></P>
<P align="left"><I>Permission to use, copy, modify, distribute and sell this software
and its documentation for any purpose is hereby granted without fee, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting documentation.
Dr John Maddock makes no representations about the suitability of this software
for any purpose. It is provided "as is" without express or implied warranty.</I></P>
</body>
</html>

View File

@ -85,8 +85,8 @@ basic_string&lt;charT&gt; regex_replace(const basic_string&lt;charT&gt;&amp; s,
calls <CODE>regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt,
flags)</CODE>, and then returns <CODE>result</CODE>.
<H3><A name="examples"></A>Examples</H3>
<P>The following <A href="../example/snippets/regex_replace_example.cpp">example</A> takes
C/C++ source code as input, and outputs syntax highlighted HTML code.</P>
<P>The following <A href="../example/snippets/regex_replace_example.cpp">example</A>
takes C/C++ source code as input, and outputs syntax highlighted HTML code.</P>
<P></P>
<PRE><FONT color=#008080>#include &lt;fstream&gt;
#include &lt;sstream&gt;
@ -139,12 +139,14 @@ boost::regex e1, e2;
</I></FONT> <FONT color=#000080><I>// temporary string stream
</I></FONT> std::ostringstream t(std::ios::out | std::ios::binary);
std::ostream_iterator&lt;<B>char</B>, <B>char</B>&gt; oi(t);
boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format);
boost::regex_replace(oi, in.begin(), in.end(),
e2, pre_format, boost::match_default | boost::format_all);
<FONT color=#000080><I>// then output to final output stream
</I></FONT> <FONT color=#000080><I>// adding syntax highlighting:
</I></FONT> std::string s(t.str());
std::ostream_iterator&lt;<B>char</B>, <B>char</B>&gt; out(os);
boost::regex_replace(out, s.begin(), s.end(), e1, format_string);
boost::regex_replace(out, s.begin(), s.end(),
e1, format_string, boost::match_default | boost::format_all);
os &lt;&lt; footer_text;
}
}

View File

@ -0,0 +1,279 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: regex_token_iterator</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td vAlign="top" width="300">
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">regex_token_iterator</H2>
</TD>
<td width="50">
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<H3>Contents</H3>
<dl class="index">
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
Examples</A></dt></dl>
<H3><A name="synopsis"></A>Synopsis</H3>
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
that is to say it represents a new view of an existing iterator sequence, by
enumerating all the occurrences of a regular expression within that sequence,
and presenting one or more new strings for each match found. Each position
enumerated by the iterator is a string that represents what matched a
particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE>
is used to enumerate a single sub-expression with index -1, then the iterator
performs field splitting: that is to say it enumerates one string for each
section of the character container sequence that does not match the regular
expression specified.</P>
<PRE>
template &lt;class BidirectionalIterator,
class charT = iterator_traits&lt;BidirectionalIterator&gt;::value_type,
class traits = regex_traits&lt;charT&gt;,
class Allocator = allocator&lt;charT&gt; &gt;
class regex_token_iterator
{
public:
typedef basic_regex&lt;charT, traits, Allocator&gt; regex_type;
typedef basic_string&lt;charT&gt; value_type;
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer;
typedef const value_type&amp; reference;
typedef std::forward_iterator_tag iterator_category;
regex_token_iterator();
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
int submatch = 0, match_flag_type m = match_default);
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);
template &lt;std::size_t N&gt;
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const int (&amp;submatches)[N], match_flag_type m = match_default);
regex_token_iterator(const regex_token_iterator&amp;);
regex_token_iterator&amp; operator=(const regex_token_iterator&amp;);
bool operator==(const regex_token_iterator&amp;);
bool operator!=(const regex_token_iterator&amp;);
const value_type&amp; operator*();
const value_type* operator-&gt;();
regex_token_iterator&amp; operator++();
regex_token_iterator operator++(int);
};
</PRE>
<H3><A name="description"></A>Description</H3>
<PRE>regex_token_iterator();</PRE>
<B>
<P>
Effects:</B> constructs an end of sequence iterator.</P><PRE>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
int submatch = 0, match_flag_type m = match_default);</PRE>
<B>
<P>
Preconditions: </B><CODE>!re.empty()</CODE>.</P><B>
<P>
Effects:</B> constructs a regex_token_iterator that will enumerate one
string for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; The
string enumerated is the&nbsp;sub-expression <EM>submatch </EM>for each match
found; if <EM>submatch </EM>is -1, then enumerates all the text sequences that
did not match the expression <EM>re </EM>(that is to performs field
splitting).</P><PRE>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);</PRE>
<B>
<P>
Preconditions:</B> <CODE>submatches.size() &amp;&amp; !re.empty()</CODE>.</P><B>
<P>
Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
strings for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; For
each match found one string will be enumerated&nbsp;for each sub-expression
index&nbsp;contained within <EM>submatches </EM>vector; if <EM>submatches[0] </EM>
is -1, then the first string enumerated for each match will be all of the text
from end of the last match to the start of the current match, in addition there
will be one extra string enumerated when no more matches can be found: from the
end of the last match found, to the end of the underlying sequence.</P><PRE>template &lt;std::size_t N&gt;
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const int (&amp;submatches)[R], match_flag_type m = match_default);</PRE>
<B>
<P>
Preconditions: </B><CODE>!re.empty()</CODE>.</P>
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
enumerate&nbsp;<EM>R</EM> strings for each regular expression match of the
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
flags <EM>m</EM>.&nbsp; For each match found one string will be
enumerated&nbsp;for each sub-expression index&nbsp;contained within the <EM>submatches
</EM>array; if <EM>submatches[0] </EM>is -1, then the first string enumerated
for each match will be all of the text from end of the last match to the start
of the current match, in addition there will be one extra string enumerated
when no more matches can be found: from the end of the last match found, to the
end of the underlying sequence.</P>
<PRE>regex_token_iterator(const regex_token_iterator&amp; that);</PRE>
<B>
<P>
Effects: </B>constructs a copy of <CODE>that</CODE>.</P><B>
<P>
Postconditions:</B> <CODE>*this == that</CODE>.</P><PRE>regex_token_iterator&amp; operator=(const regex_token_iterator&amp; that);</PRE>
<B>
<P>
Effects: </B>sets <CODE>*this</CODE> to be equal to&nbsp;<CODE>that</CODE>.</P><B>
<P>
Postconditions:</B> <CODE>*this == that</CODE>.</P><PRE>bool operator==(const regex_token_iterator&amp;);</PRE>
<B>
<P>
Effects: </B>returns true if *this is the same position as that.</P><PRE>bool operator!=(const regex_token_iterator&amp;);</PRE>
<B>
<P>
Effects: </B>returns <CODE>!(*this == that)</CODE>.</P><PRE>const value_type&amp; operator*();</PRE>
<B>
<P>
Effects: </B>returns the current string being enumerated.</P><PRE>const value_type* operator-&gt;();</PRE>
<B>
<P>
Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P><PRE>regex_token_iterator&amp; operator++();</PRE>
<B>
<P>
Effects: </B>Moves on to the next string to be enumerated.</P><B>
<P>
Returns:</B><CODE> *this</CODE>.</P><PRE>regex_token_iterator&amp; operator++(int);</PRE>
<B>
<P>
Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>,
then calls <CODE>++(*this)</CODE>.</P><B>
<P>
Returns:</B> <CODE>result</CODE>.<A name="examples"></A>
<H3>Examples</H3>
<P>The following <A href="../example/snippets/regex_token_iterator_example_1.cpp">example</A>
takes a string and splits it into a series of tokens:</P>
<pre>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>
<B>using</B> <B>namespace</B> std;
<B>int</B> main(<B>int</B> argc)
{
string s;
<B>do</B>{
<B>if</B>(argc == <FONT color=#0000a0>1</FONT>)
{
cout &lt;&lt; <FONT color=#0000ff>"Enter text to split (or \"quit\" to exit): "</FONT>;
getline(cin, s);
<B>if</B>(s == <FONT color=#0000ff>"quit"</FONT>) <B>break</B>;
}
<B>else</B>
s = <FONT color=#0000ff>"This is a string of tokens"</FONT>;
boost::regex re(<FONT color=#0000ff>"\\s+"</FONT>);
boost::regex_token_iterator&lt;std::string::const_iterator&gt; i(s.begin(), s.end(), re, -<FONT color=#0000a0>1</FONT>);
boost::regex_token_iterator&lt;std::string::const_iterator&gt; j;
<B>unsigned</B> count = <FONT color=#0000a0>0</FONT>;
<B>while</B>(i != j)
{
cout &lt;&lt; *i++ &lt;&lt; endl;
count++;
}
cout &lt;&lt; <FONT color=#0000ff>"There were "</FONT> &lt;&lt; count &lt;&lt; <FONT color=#0000ff>" tokens found."</FONT> &lt;&lt; endl;
}<B>while</B>(argc == <FONT color=#0000a0>1</FONT>);
<B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
<P>The following <A href="../example/snippets/regex_token_iterator_example_2.cpp">example</A>
takes a html file and outputs a list of all the linked files:</P>
<pre>
<FONT color=#008040>#include &lt;fstream&gt;</FONT>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;iterator&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>
boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)\""</FONT>,
boost::regex::normal | boost::regbase::icase);
<B>void</B> load_file(std::string&amp; s, std::istream&amp; is)
{
s.erase();
<I><FONT color=#000080>//</FONT></I>
<I><FONT color=#000080>// attempt to grow string buffer to match file size,</FONT></I>
<I><FONT color=#000080>// this doesn't always work...</FONT></I>
s.reserve(is.rdbuf()-&gt;in_avail());
<B>char</B> c;
<B>while</B>(is.get(c))
{
<I><FONT color=#000080>// use logarithmic growth stategy, in case</FONT></I>
<I><FONT color=#000080>// in_avail (above) returned zero:</FONT></I>
<B>if</B>(s.capacity() == s.size())
s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
s.append(<FONT color=#0000a0>1</FONT>, c);
}
}
<B>int</B> main(<B>int</B> argc, <B>char</B>** argv)
{
std::string s;
<B>int</B> i;
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
{
std::cout &lt;&lt; <FONT color=#0000ff>"Findings URL's in "</FONT> &lt;&lt; argv[i] &lt;&lt; <FONT color=#0000ff>":"</FONT> &lt;&lt; std::endl;
s.erase();
std::ifstream is(argv[i]);
load_file(s, is);
boost::regex_token_iterator&lt;std::string::const_iterator&gt;
i(s.begin(), s.end(), e, <FONT color=#0000a0>1</FONT>);
boost::regex_token_iterator&lt;std::string::const_iterator&gt; j;
<B>while</B>(i != j)
{
std::cout &lt;&lt; *i++ &lt;&lt; std::endl;
}
}
<I><FONT color=#000080>//</FONT></I>
<I><FONT color=#000080>// alternative method:</FONT></I>
<I><FONT color=#000080>// test the array-literal constructor, and split out the whole</FONT></I>
<I><FONT color=#000080>// match as well as $1....</FONT></I>
<I><FONT color=#000080>//</FONT></I>
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
{
std::cout &lt;&lt; <FONT color=#0000ff>"Findings URL's in "</FONT> &lt;&lt; argv[i] &lt;&lt; <FONT color=#0000ff>":"</FONT> &lt;&lt; std::endl;
s.erase();
std::ifstream is(argv[i]);
load_file(s, is);
<B>const</B> <B>int</B> subs[] = {<FONT color=#0000a0>1</FONT>, <FONT color=#0000a0>0</FONT>,};
boost::regex_token_iterator&lt;std::string::const_iterator&gt;
i(s.begin(), s.end(), e, subs);
boost::regex_token_iterator&lt;std::string::const_iterator&gt; j;
<B>while</B>(i != j)
{
std::cout &lt;&lt; *i++ &lt;&lt; std::endl;
}
}
<B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
<P>
<HR>
<P></P>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
11 April 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
</p>
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a>&nbsp;1998-<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></I></P>
<P align="left"><I>Permission to use, copy, modify, distribute and sell this software
and its documentation for any purpose is hereby granted without fee, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting documentation.
Dr John Maddock makes no representations about the suitability of this software
for any purpose. It is provided "as is" without express or implied warranty.</I></P>
</body>
</html>

80
doc/Attic/standards.html Normal file
View File

@ -0,0 +1,80 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: Standards Conformance</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" type="text/css" href="../../../boost.css">
</head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td valign="top" width="300">
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">Standards Conformance</H2>
</TD>
<td width="50">
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<p></p>
<H3>C++</H3>
<P>Boost.regex is intended to conform to the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">
regular expression standardisation proposal</A>, which will appear in a
future C++ standard technical report (and hopefully in a future version of the
standard).&nbsp; Currently there are some differences in how the regular
expression traits classes are defined, these will be fixed in a future release.</P>
<H3>ECMAScript / JavaScript</H3>
<P>All of the ECMAScript regular expression syntax features are supported, except
that:</P>
<P>Negated class escapes (\S, \D and \W) are not permitted inside character class
definitions ( [...] ).</P>
<P>The escape sequence \u matches any upper case character (the same as
[[:upper:]])&nbsp;rather than a unicode escape sequence; use \x{DDDD} for
unicode escape sequences.</P>
<H3>Perl</H3>
<P>Almost all perl features are supported, except for:</P>
<P>\N{name}&nbsp; Use [[:name:]] instead.</P>
<P>\pP and \PP</P>
<P>(?imsx-imsx)</P>
<P>(?&lt;=pattern)</P>
<P>(?&lt;!pattern)</P>
<P>(?{code})</P>
<P>(??{code})</P>
<P>(?(condition)yes-pattern) and (?(condition)yes-pattern|no-pattern)</P>
<P>These embarressments / limitations will be removed in due course, mainly
dependent upon user demand.</P>
<H3>POSIX</H3>
<P>All the POSIX basic and extended regular expression features are supported,
except that:</P>
<P>No character collating names are recognised except those specified in the POSIX
standard for the C locale, unless they are explicitly registered with the
traits class.</P>
<P>Character equivalence classes ( [[=a=]] etc) are probably buggy except on
Win32.&nbsp; Implimenting this feature requires knowledge of the format of the
string sort keys produced by the system; if you need this, and the default
implementation doesn't work on your platfrom, then you will need to supply a
custom traits class.</P>
<P>
<HR>
<P></P>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
11 April 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
</p>
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a>&nbsp;1998-<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></I></P>
<P align="left"><I>Permission to use, copy, modify, distribute and sell this software
and its documentation for any purpose is hereby granted without fee, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting documentation.
Dr John Maddock makes no representations about the suitability of this software
for any purpose. It is provided "as is" without express or implied warranty.</I></P>
</body>
</html>

163
doc/configuration.html Normal file
View File

@ -0,0 +1,163 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: Configuration and setup</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" type="text/css" href="../../../boost.css">
</head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td valign="top" width="300">
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">Configuration and setup</H2>
</TD>
<td width="50">
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<h2>Contents</h2>
<dl class="index">
<dt><a href="#compiler">Compiler setup</a></dt>
<dt><a href="#locale">Locale and traits class selection</a></dt>
<dt><a href="#linkage">Linkage Options</a></dt>
<dt><a href="#algorithm">Algorithm Selection</a></dt>
<dt><a href="#tuning">Algorithm Tuning</a></dt>
</dl>
<H3><A name="compiler"></A>Compiler setup.</H3>
<P>You shouldn't need to do anything special to configure boost.regex for use with
your compiler - the <A href="../../config/index.html">boost.config</A> subsystem
should already take care of it, if you do have problems (or you are using a
particularly obscure compiler or platform) then <A href="../../config/index.html">boost.config</A>&nbsp;has
a&nbsp;<A href="../../config/config.htm#config_script">configure</A> script.</P>
<H3><A name="locale"></A>Locale and traits class selection.</H3>
<P>The following macros (see <A href="../../../boost/regex/user.hpp">user.hpp</A>)
control how boost.regex interacts with the users locale:</P>
<P>
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD width="265">BOOST_REGEX_USE_C_LOCALE</TD>
<TD>Forces boost.regex to use the global C locale in it's traits class support:
this is the default behaviour on non-windows platforms, but MS Windows
platforms normally use the Win32 API for locale support.</TD>
</TR>
<TR>
<TD width="265">BOOST_REGEX_USE_CPP_LOCALE</TD>
<TD>Forces boost.regex to use std::locale in it's default traits class, regular
expressions can then be imbued with an instance&nbsp;specific locale.</TD>
</TR>
<TR>
<TD width="265">BOOST_REGEX_NO_W32</TD>
<TD>Tells boost.regex not to use any Win32 API's even when available (implies
BOOST_REGEX_USE_C_LOCALE unless BOOST_REGEX_USE_CPP_LOCALE is set).</TD>
</TR>
</TABLE>
</P>
<H3><A name="linkage"></A>Linkage Options</H3>
<P>
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD>BOOST_REGEX_STATIC_LINK</TD>
<TD>For Microsoft and Borland C++ builds, this tells boost.regex that it is going
to be linked to a static library even when using a dynamic C runtime.</TD>
</TR>
<TR>
<TD>BOOST_REGEX_NO_LIB</TD>
<TD>For Microsoft and Borland C++ builds, this tells boost.regex that it should
not automatically select the library to link to.</TD>
</TR>
</TABLE>
</P>
<H3><A name="algorithm"></A>Algorithm Selection</H3>
<P>
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD width="253">BOOST_REGEX_V3</TD>
<TD>Tells boost.regex to use the boost-1.30.0 matching algorithm, define only if
you need maximum compatibility with previous behaviour.</TD>
</TR>
<TR>
<TD width="253">BOOST_REGEX_RECURSIVE</TD>
<TD>Tells boost.regex to use a stack-recursive matching algorithm.&nbsp; This is
generally the fastest option (although there is very little in it), but can
cause stack overflow in extreme cases, on Win32 this can be handled safely, but
this is not the case on other platforms.</TD>
</TR>
<TR>
<TD width="253">BOOST_REGEX_NON_RECURSIVE</TD>
<TD>Tells boost.regex to use a non-stack recursive matching algorithm, this can be
slightly slower than the alternative, but is always safe no matter how
pathological the regular expression.&nbsp; This is the default on non-Win32
platforms.</TD>
</TR>
</TABLE>
</P>
<H3><A name="tuning"></A>Algorithm Tuning</H3>
<P>The following option applies only if BOOST_REGEX_RECURSIVE is set.</P>
<P>
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD>BOOST_REGEX_HAS_MS_STACK_GUARD</TD>
<TD>Tells boost.regex that Microsoft style __try - __except blocks are supported,
and can be used to safely trap stack overflow.</TD>
</TR>
</TABLE>
</P>
<P>The following options apply only if BOOST_REGEX_NON_RECURSIVE is set.</P>
<P>
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
<TR>
<TD>BOOST_REGEX_BLOCKSIZE</TD>
<TD>In non-recursive mode, boost.regex uses largish blocks of memory to act as a
stack for the state machine, the larger the block size then the fewer
allocations that will take place.&nbsp; This defaults to 4096 bytes, which is
large enough to match the vast majority of regular expressions&nbsp;without
further allocations, however, you can choose smaller or larger values depending
upon your platforms characteristics.</TD>
</TR>
<TR>
<TD>BOOST_REGEX_MAX_BLOCKS</TD>
<TD>Tells boost.regex how many blocks of size BOOST_REGEX_BLOCKSIZE it is
permitted to use.&nbsp; If this value is exceeded then boost.regex will stop
trying to find a match and throw a std::runtime_error.&nbsp; Defaults to 1024,
don't forget to tweek this value if you alter BOOST_REGEX_BLOCKSIZE by much.</TD>
</TR>
<TR>
<TD>BOOST_REGEX_MAX_CACHE_BLOCKS</TD>
<TD>Tells boost.regex how many memory blocks to store in it's internal cache -
memory blocks are taken from this cache rather than by calling ::operator
new.&nbsp; Generally speeking this can be an order of magnitude faster than
calling ::opertator new each time a memory block is required, but has the
downside that boost.regex can end up caching a large chunk of memory (by
default up to 16 blocks each of BOOST_REGEX_BLOCKSIZE size).&nbsp; If memory is
tight then try defining this to 0 (disables all caching), or if that is too
slow, then a value of 1 or 2, may be sufficient.&nbsp; On the other hand, on
large multi-processor, multi-threaded systems, you may find that a higher value
is in order.</TD>
</TR>
</TABLE>
</P>
<P>
<HR>
<P></P>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
11 April 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
</p>
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a>&nbsp;1998-<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></I></P>
<P align="left"><I>Permission to use, copy, modify, distribute and sell this software
and its documentation for any purpose is hereby granted without fee, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting documentation.
Dr John Maddock makes no representations about the suitability of this software
for any purpose. It is provided "as is" without express or implied warranty.</I></P>
</body>
</html>

View File

@ -27,7 +27,9 @@
<hr>
<h2>Contents</h2>
<dl class="index">
<dt><a href="introduction.html">Overview</a></dt> <dt><a href="install.html">Installation</a></dt>
<dt><a href="introduction.html">Overview</a></dt>
<dt><a href="configuration.html">Configuration and setup</a></dt>
<dt><a href="install.html">Installation</a></dt>
<dd>
<dl class="index">
<dt><a href="install.html#bcb">Borland C++ Builder</a></dt> <dt><a href="install.html#vc">
@ -61,6 +63,7 @@
<dd>
<dl class="index">
<dt><a href="regex_iterator.html">regex_iterator</a></dt>
<dt><a href="regex_token_iterator.html">regex_token_iterator</a></dt>
</dl>
</dd>
<dt>Misc.</dt>
@ -68,7 +71,7 @@
<dl class="index">
<dt><a href="posix_api.html">POSIX API Compatibility Functions</a></dt>
<dt><a href="partial_matches.html">Partial matches</a></dt>
<dt><a href="synatx.html">Regular Expression Syntax</a></dt>
<dt><a href="syntax.html">Regular Expression Syntax</a></dt>
<dt><a href="format_syntax.html">Format String Syntax</a></dt>
</dl>
</dd>
@ -95,6 +98,7 @@
<dt><a href="examples.html">Examples</a></dt>
<dt><a href="headers.html">Headers</a></dt>
<dt><a href="redistributables.html">Redistributables and Library Names</a></dt>
<dt><a href="standards.html">Standards Conformance</a></dt>
<dt><a href="history.html">History</a></dt>
<dt><a href="contacts.html">Contacts and Acknowledgements</a></dt>
</dl>
@ -117,3 +121,4 @@
for any purpose. It is provided "as is" without express or implied warranty.</i></p>
</body>
</html>

370
doc/regex_iterator.html Normal file
View File

@ -0,0 +1,370 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: regex_iterator</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td vAlign="top" width="300">
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">regex_iterator</H2>
</TD>
<td width="50">
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<H3>Contents</H3>
<dl class="index">
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
Examples</A></dt></dl>
<H3><A name="synopsis"></A>Synopsis</H3>
<P>The iterator type regex_iterator will enumerate all of the regular expression
matches found in some sequence: dereferencing a regex_iterator yields a
reference to a&nbsp;<A href="match_results.html">match_results</A> object.</P>
<PRE>
template &lt;class BidirectionalIterator,
class charT = iterator_traits&lt;BidirectionalIterator&gt;::value_type,
class traits = regex_traits&lt;charT&gt;,
class Allocator = allocator&lt;charT&gt; &gt;
class regex_iterator
{
public:
typedef basic_regex&lt;charT, traits, Allocator&gt; regex_type;
typedef match_results&lt;BidirectionalIterator&gt; value_type;
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer;
typedef const value_type&amp; reference;
typedef std::forward_iterator_tag iterator_category;
regex_iterator();
regex_iterator(BidirectionalIterator a, BidirectionalIterator b,
const regex_type&amp; re,
match_flag_type m = match_default);
regex_iterator(const regex_iterator&amp;);
regex_iterator&amp; operator=(const regex_iterator&amp;);
bool operator==(const regex_iterator&amp;);
bool operator!=(const regex_iterator&amp;);
const value_type&amp; operator*();
const value_type* operator-&gt;();
regex_iterator&amp; operator++();
regex_iterator operator++(int);
};
</PRE>
<H3><A name="description"></A>Description</H3>
<P>A regex_iterator is constructed from a pair of iterators, and enumerates all
occurances of a regular expression within that iterator range.</P>
<PRE>regex_iterator();</PRE>
<B>
<P>
Effects:</B> constructs an end of sequence regex_iterator.</P><PRE>regex_iterator(BidirectionalIterator a, BidirectionalIterator b,
const regex_type&amp; re,
match_flag_type m = match_default);</PRE>
<B>
<P>
Effects: </B>constructs a regex_iterator that will enumerate all occurances
of the expression <EM>re</EM>, within the sequence <EM>[a,b)</EM>, and found
using match flags <EM>m</EM>.&nbsp; The object <EM>re </EM>must exist for the
lifetime of the regex_iterator.</P><PRE>regex_iterator(const regex_iterator&amp; that);</PRE>
<B>
<P>
Effects: </B>constructs a copy of <CODE>that</CODE>.</P><B>
<P>
Postconditions:</B> <CODE>*this == that</CODE>.</P><PRE>regex_iterator&amp; operator=(const regex_iterator&amp;);</PRE>
<B>
<P>
Effects: </B>sets&nbsp;<CODE>*this</CODE> equal to those in <CODE>that</CODE>.</P><B>
<P>
Postconditions:</B> <CODE>*this == that</CODE>.</P><PRE>bool operator==(const regex_iterator&amp; that);</PRE>
<B>
<P>
Effects: </B>returns true if *this is equal to that.</P><PRE>bool operator!=(const regex_iterator&amp;);</PRE>
<B>
<P>
Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
<H5>RE.8.1.1 regex_iterator dereference</H5>
<PRE>const value_type&amp; operator*();</PRE>
<B>
<P>
Effects: d</B>ereferencing a regex_iterator object <EM>it</EM> yields a
const reference to a <A href="match_results.html">match_results</A> object,
whose members are set as follows:</P>
<P>
<TABLE id="Table2" cellSpacing="1" cellPadding="7" width="624" border="1">
<TBODY>
<TR>
<TD vAlign="top" width="50%"><B>
<P>
Element</B>
</P>
</TD>
<TD vAlign="top" width="50%"><B>
<P>
Value</B> </P></TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).size()</P>
</TD>
<TD vAlign="top" width="50%">
<P>re.mark_count()</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).empty()</P>
</TD>
<TD vAlign="top" width="50%">
<P>false</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).prefix().first</P>
</TD>
<TD vAlign="top" width="50%">
<P>The end of the last match found, or the start of the underlying sequence if
this is the first match enumerated</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).prefix().last</P>
</TD>
<TD vAlign="top" width="50%">
<P>(*it)[0].first</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).prefix().matched</P>
</TD>
<TD vAlign="top" width="50%">
<P>(*it).prefix().first != (*it).prefix().second</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).suffix().first</P>
</TD>
<TD vAlign="top" width="50%">
<P>(*it)[0].second</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).suffix().last</P>
</TD>
<TD vAlign="top" width="50%">
<P>The end of the underlying sequence.</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it).suffix().matched</P>
</TD>
<TD vAlign="top" width="50%">
<P>(*it).suffix().first != (*it).suffix().second</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[0].first</P>
</TD>
<TD vAlign="top" width="50%">
<P>The start of the sequence of characters that matched the regular expression</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[0].second</P>
</TD>
<TD vAlign="top" width="50%">
<P>The end of the sequence of characters that matched the regular expression</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[0].matched</P>
</TD>
<TD vAlign="top" width="50%"><CODE>
<P>
true</CODE> if a full match was found, and <CODE>false</CODE> if it was a
partial match (found as a result of the <CODE>match_partial</CODE> flag being
set).</P></TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[n].first</P>
</TD>
<TD vAlign="top" width="50%">
<P>For all integers n &lt; (*it).size(), the start of the sequence that matched
sub-expression <I>n</I>. Alternatively, if sub-expression n did not participate
in the match, then <I>last</I>.</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[n].second</P>
</TD>
<TD vAlign="top" width="50%">
<P>For all integers n &lt; (*it).size(), the end of the sequence that matched
sub-expression <I>n</I>. Alternatively, if sub-expression n did not participate
in the match, then <I>last</I>.</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="50%">
<P>(*it)[n].matched</P>
</TD>
<TD vAlign="top" width="50%">
<P>For all integers n &lt; (*it).size(), true if sub-expression <I>n</I> participated
in the match, false otherwise.</P>
<P></P>
</TD>
<TR>
<TD vAlign="top" width="50%">(*it).position(n)</TD>
<TD vAlign="top" width="50%">For all integers n &lt; (*it).size(), then the
distance from the start of the underlying sequence to the start of
sub-expression match <EM>n</EM>.</TD>
</TR>
</TBODY></TD></TR></TABLE></P><PRE>const value_type* operator-&gt;();</PRE>
<B>
<P>
Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P><PRE>regex_iterator&amp; operator++();</PRE>
<P><STRONG>Effects:</STRONG> moves the iterator to the next match in the
underlying sequence, or the end of sequence iterator if none if found.
&nbsp;When the last match found matched a zero length string, then the
regex_iterator will find the next match as follows: if there exists a non-zero
length match that starts at the same location as the last one, then returns it,
otherwise starts looking for the next (possibly zero length) match from one
position to the right of the last match.</P>
<B>
<P>
Returns:</B> <CODE>*this</CODE>.</P><PRE>regex_iterator operator++(int);</PRE>
<B>
<P>
Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>,
then calls <CODE>++(*this)</CODE>.</P><B>
<P>
Returns:</B> <CODE>result</CODE>.</P>
<H3>Examples</H3>
<P>The following <A href="../example/snippets/regex_iterator_example.cpp">example</A>
takes a C++ source file and builds up an index of class names, and the location
of that class in the file.</P>
<pre>
<FONT color=#008040>#include &lt;string&gt;</FONT>
<FONT color=#008040>#include &lt;map&gt;</FONT>
<FONT color=#008040>#include &lt;fstream&gt;</FONT>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>
<B>using</B> <B>namespace</B> std;
<I><FONT color=#000080>// purpose:</FONT></I>
<I><FONT color=#000080>// takes the contents of a file in the form of a string</FONT></I>
<I><FONT color=#000080>// and searches for all the C++ class definitions, storing</FONT></I>
<I><FONT color=#000080>// their locations in a map of strings/int's</FONT></I>
<B>typedef</B> std::map&lt;std::string, std::string::difference_type, std::less&lt;std::string&gt; &gt; map_type;
<B>const</B> <B>char</B>* re =
<I><FONT color=#000080>// possibly leading whitespace: </FONT></I>
<FONT color=#0000ff>"^[[:space:]]*"</FONT>
<I><FONT color=#000080>// possible template declaration:</FONT></I>
<FONT color=#0000ff>"(template[[:space:]]*&lt;[^;:{]+&gt;[[:space:]]*)?"</FONT>
<I><FONT color=#000080>// class or struct:</FONT></I>
<FONT color=#0000ff>"(class|struct)[[:space:]]*"</FONT>
<I><FONT color=#000080>// leading declspec macros etc:</FONT></I>
<FONT color=#0000ff>"("</FONT>
<FONT color=#0000ff>"\\&lt;\\w+\\&gt;"</FONT>
<FONT color=#0000ff>"("</FONT>
<FONT color=#0000ff>"[[:blank:]]*\\([^)]*\\)"</FONT>
<FONT color=#0000ff>")?"</FONT>
<FONT color=#0000ff>"[[:space:]]*"</FONT>
<FONT color=#0000ff>")*"</FONT>
<I><FONT color=#000080>// the class name</FONT></I>
<FONT color=#0000ff>"(\\&lt;\\w*\\&gt;)[[:space:]]*"</FONT>
<I><FONT color=#000080>// template specialisation parameters</FONT></I>
<FONT color=#0000ff>"(&lt;[^;:{]+&gt;)?[[:space:]]*"</FONT>
<I><FONT color=#000080>// terminate in { or :</FONT></I>
<FONT color=#0000ff>"(\\{|:[^;\\{()]*\\{)"</FONT>;
boost::regex expression(re);
map_type class_index;
<B>bool</B> regex_callback(<B>const</B> boost::match_results&lt;std::string::const_iterator&gt;&amp; what)
{
<I><FONT color=#000080>// what[0] contains the whole string</FONT></I>
<I><FONT color=#000080>// what[5] contains the class name.</FONT></I>
<I><FONT color=#000080>// what[6] contains the template specialisation if any.</FONT></I>
<I><FONT color=#000080>// add class name and position to map:</FONT></I>
class_index[what[<FONT color=#0000a0>5</FONT>].str() + what[<FONT color=#0000a0>6</FONT>].str()] = what.position(<FONT color=#0000a0>5</FONT>);
<B>return</B> <B>true</B>;
}
<B>void</B> load_file(std::string&amp; s, std::istream&amp; is)
{
s.erase();
s.reserve(is.rdbuf()-&gt;in_avail());
<B>char</B> c;
<B>while</B>(is.get(c))
{
<B>if</B>(s.capacity() == s.size())
s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
s.append(<FONT color=#0000a0>1</FONT>, c);
}
}
<B>int</B> main(<B>int</B> argc, <B>const</B> <B>char</B>** argv)
{
std::string text;
<B>for</B>(<B>int</B> i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
{
cout &lt;&lt; <FONT color=#0000ff>"Processing file "</FONT> &lt;&lt; argv[i] &lt;&lt; endl;
std::ifstream fs(argv[i]);
load_file(text, fs);
<I><FONT color=#000080>// construct our iterators:</FONT></I>
boost::regex_iterator&lt;std::string::const_iterator&gt; m1(text.begin(), text.end(), expression);
boost::regex_iterator&lt;std::string::const_iterator&gt; m2;
std::for_each(m1, m2, <20>ex_callback);
<I><FONT color=#000080>// copy results:</FONT></I>
cout &lt;&lt; class_index.size() &lt;&lt; <FONT color=#0000ff>" matches found"</FONT> &lt;&lt; endl;
map_type::iterator c, d;
c = class_index.begin();
d = class_index.end();
<B>while</B>(c != d)
{
cout &lt;&lt; <FONT color=#0000ff>"class \""</FONT> &lt;&lt; (*c).first &lt;&lt; <FONT color=#0000ff>"\" found at index: "</FONT> &lt;&lt; (*c).second &lt;&lt; endl;
++c;
}
class_index.erase(class_index.begin(), class_index.end());
}
<B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
11 April 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
</p>
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a>&nbsp;1998-<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></I></P>
<P align="left"><I>Permission to use, copy, modify, distribute and sell this software
and its documentation for any purpose is hereby granted without fee, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting documentation.
Dr John Maddock makes no representations about the suitability of this software
for any purpose. It is provided "as is" without express or implied warranty.</I></P>
</body>
</html>

View File

@ -85,8 +85,8 @@ basic_string&lt;charT&gt; regex_replace(const basic_string&lt;charT&gt;&amp; s,
calls <CODE>regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt,
flags)</CODE>, and then returns <CODE>result</CODE>.
<H3><A name="examples"></A>Examples</H3>
<P>The following <A href="../example/snippets/regex_replace_example.cpp">example</A> takes
C/C++ source code as input, and outputs syntax highlighted HTML code.</P>
<P>The following <A href="../example/snippets/regex_replace_example.cpp">example</A>
takes C/C++ source code as input, and outputs syntax highlighted HTML code.</P>
<P></P>
<PRE><FONT color=#008080>#include &lt;fstream&gt;
#include &lt;sstream&gt;
@ -139,12 +139,14 @@ boost::regex e1, e2;
</I></FONT> <FONT color=#000080><I>// temporary string stream
</I></FONT> std::ostringstream t(std::ios::out | std::ios::binary);
std::ostream_iterator&lt;<B>char</B>, <B>char</B>&gt; oi(t);
boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format);
boost::regex_replace(oi, in.begin(), in.end(),
e2, pre_format, boost::match_default | boost::format_all);
<FONT color=#000080><I>// then output to final output stream
</I></FONT> <FONT color=#000080><I>// adding syntax highlighting:
</I></FONT> std::string s(t.str());
std::ostream_iterator&lt;<B>char</B>, <B>char</B>&gt; out(os);
boost::regex_replace(out, s.begin(), s.end(), e1, format_string);
boost::regex_replace(out, s.begin(), s.end(),
e1, format_string, boost::match_default | boost::format_all);
os &lt;&lt; footer_text;
}
}

View File

@ -0,0 +1,279 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: regex_token_iterator</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td vAlign="top" width="300">
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">regex_token_iterator</H2>
</TD>
<td width="50">
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<H3>Contents</H3>
<dl class="index">
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
Examples</A></dt></dl>
<H3><A name="synopsis"></A>Synopsis</H3>
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
that is to say it represents a new view of an existing iterator sequence, by
enumerating all the occurrences of a regular expression within that sequence,
and presenting one or more new strings for each match found. Each position
enumerated by the iterator is a string that represents what matched a
particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE>
is used to enumerate a single sub-expression with index -1, then the iterator
performs field splitting: that is to say it enumerates one string for each
section of the character container sequence that does not match the regular
expression specified.</P>
<PRE>
template &lt;class BidirectionalIterator,
class charT = iterator_traits&lt;BidirectionalIterator&gt;::value_type,
class traits = regex_traits&lt;charT&gt;,
class Allocator = allocator&lt;charT&gt; &gt;
class regex_token_iterator
{
public:
typedef basic_regex&lt;charT, traits, Allocator&gt; regex_type;
typedef basic_string&lt;charT&gt; value_type;
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer;
typedef const value_type&amp; reference;
typedef std::forward_iterator_tag iterator_category;
regex_token_iterator();
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
int submatch = 0, match_flag_type m = match_default);
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);
template &lt;std::size_t N&gt;
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const int (&amp;submatches)[N], match_flag_type m = match_default);
regex_token_iterator(const regex_token_iterator&amp;);
regex_token_iterator&amp; operator=(const regex_token_iterator&amp;);
bool operator==(const regex_token_iterator&amp;);
bool operator!=(const regex_token_iterator&amp;);
const value_type&amp; operator*();
const value_type* operator-&gt;();
regex_token_iterator&amp; operator++();
regex_token_iterator operator++(int);
};
</PRE>
<H3><A name="description"></A>Description</H3>
<PRE>regex_token_iterator();</PRE>
<B>
<P>
Effects:</B> constructs an end of sequence iterator.</P><PRE>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
int submatch = 0, match_flag_type m = match_default);</PRE>
<B>
<P>
Preconditions: </B><CODE>!re.empty()</CODE>.</P><B>
<P>
Effects:</B> constructs a regex_token_iterator that will enumerate one
string for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; The
string enumerated is the&nbsp;sub-expression <EM>submatch </EM>for each match
found; if <EM>submatch </EM>is -1, then enumerates all the text sequences that
did not match the expression <EM>re </EM>(that is to performs field
splitting).</P><PRE>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);</PRE>
<B>
<P>
Preconditions:</B> <CODE>submatches.size() &amp;&amp; !re.empty()</CODE>.</P><B>
<P>
Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
strings for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; For
each match found one string will be enumerated&nbsp;for each sub-expression
index&nbsp;contained within <EM>submatches </EM>vector; if <EM>submatches[0] </EM>
is -1, then the first string enumerated for each match will be all of the text
from end of the last match to the start of the current match, in addition there
will be one extra string enumerated when no more matches can be found: from the
end of the last match found, to the end of the underlying sequence.</P><PRE>template &lt;std::size_t N&gt;
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const int (&amp;submatches)[R], match_flag_type m = match_default);</PRE>
<B>
<P>
Preconditions: </B><CODE>!re.empty()</CODE>.</P>
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
enumerate&nbsp;<EM>R</EM> strings for each regular expression match of the
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
flags <EM>m</EM>.&nbsp; For each match found one string will be
enumerated&nbsp;for each sub-expression index&nbsp;contained within the <EM>submatches
</EM>array; if <EM>submatches[0] </EM>is -1, then the first string enumerated
for each match will be all of the text from end of the last match to the start
of the current match, in addition there will be one extra string enumerated
when no more matches can be found: from the end of the last match found, to the
end of the underlying sequence.</P>
<PRE>regex_token_iterator(const regex_token_iterator&amp; that);</PRE>
<B>
<P>
Effects: </B>constructs a copy of <CODE>that</CODE>.</P><B>
<P>
Postconditions:</B> <CODE>*this == that</CODE>.</P><PRE>regex_token_iterator&amp; operator=(const regex_token_iterator&amp; that);</PRE>
<B>
<P>
Effects: </B>sets <CODE>*this</CODE> to be equal to&nbsp;<CODE>that</CODE>.</P><B>
<P>
Postconditions:</B> <CODE>*this == that</CODE>.</P><PRE>bool operator==(const regex_token_iterator&amp;);</PRE>
<B>
<P>
Effects: </B>returns true if *this is the same position as that.</P><PRE>bool operator!=(const regex_token_iterator&amp;);</PRE>
<B>
<P>
Effects: </B>returns <CODE>!(*this == that)</CODE>.</P><PRE>const value_type&amp; operator*();</PRE>
<B>
<P>
Effects: </B>returns the current string being enumerated.</P><PRE>const value_type* operator-&gt;();</PRE>
<B>
<P>
Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P><PRE>regex_token_iterator&amp; operator++();</PRE>
<B>
<P>
Effects: </B>Moves on to the next string to be enumerated.</P><B>
<P>
Returns:</B><CODE> *this</CODE>.</P><PRE>regex_token_iterator&amp; operator++(int);</PRE>
<B>
<P>
Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>,
then calls <CODE>++(*this)</CODE>.</P><B>
<P>
Returns:</B> <CODE>result</CODE>.<A name="examples"></A>
<H3>Examples</H3>
<P>The following <A href="../example/snippets/regex_token_iterator_example_1.cpp">example</A>
takes a string and splits it into a series of tokens:</P>
<pre>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>
<B>using</B> <B>namespace</B> std;
<B>int</B> main(<B>int</B> argc)
{
string s;
<B>do</B>{
<B>if</B>(argc == <FONT color=#0000a0>1</FONT>)
{
cout &lt;&lt; <FONT color=#0000ff>"Enter text to split (or \"quit\" to exit): "</FONT>;
getline(cin, s);
<B>if</B>(s == <FONT color=#0000ff>"quit"</FONT>) <B>break</B>;
}
<B>else</B>
s = <FONT color=#0000ff>"This is a string of tokens"</FONT>;
boost::regex re(<FONT color=#0000ff>"\\s+"</FONT>);
boost::regex_token_iterator&lt;std::string::const_iterator&gt; i(s.begin(), s.end(), re, -<FONT color=#0000a0>1</FONT>);
boost::regex_token_iterator&lt;std::string::const_iterator&gt; j;
<B>unsigned</B> count = <FONT color=#0000a0>0</FONT>;
<B>while</B>(i != j)
{
cout &lt;&lt; *i++ &lt;&lt; endl;
count++;
}
cout &lt;&lt; <FONT color=#0000ff>"There were "</FONT> &lt;&lt; count &lt;&lt; <FONT color=#0000ff>" tokens found."</FONT> &lt;&lt; endl;
}<B>while</B>(argc == <FONT color=#0000a0>1</FONT>);
<B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
<P>The following <A href="../example/snippets/regex_token_iterator_example_2.cpp">example</A>
takes a html file and outputs a list of all the linked files:</P>
<pre>
<FONT color=#008040>#include &lt;fstream&gt;</FONT>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;iterator&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>
boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)\""</FONT>,
boost::regex::normal | boost::regbase::icase);
<B>void</B> load_file(std::string&amp; s, std::istream&amp; is)
{
s.erase();
<I><FONT color=#000080>//</FONT></I>
<I><FONT color=#000080>// attempt to grow string buffer to match file size,</FONT></I>
<I><FONT color=#000080>// this doesn't always work...</FONT></I>
s.reserve(is.rdbuf()-&gt;in_avail());
<B>char</B> c;
<B>while</B>(is.get(c))
{
<I><FONT color=#000080>// use logarithmic growth stategy, in case</FONT></I>
<I><FONT color=#000080>// in_avail (above) returned zero:</FONT></I>
<B>if</B>(s.capacity() == s.size())
s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
s.append(<FONT color=#0000a0>1</FONT>, c);
}
}
<B>int</B> main(<B>int</B> argc, <B>char</B>** argv)
{
std::string s;
<B>int</B> i;
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
{
std::cout &lt;&lt; <FONT color=#0000ff>"Findings URL's in "</FONT> &lt;&lt; argv[i] &lt;&lt; <FONT color=#0000ff>":"</FONT> &lt;&lt; std::endl;
s.erase();
std::ifstream is(argv[i]);
load_file(s, is);
boost::regex_token_iterator&lt;std::string::const_iterator&gt;
i(s.begin(), s.end(), e, <FONT color=#0000a0>1</FONT>);
boost::regex_token_iterator&lt;std::string::const_iterator&gt; j;
<B>while</B>(i != j)
{
std::cout &lt;&lt; *i++ &lt;&lt; std::endl;
}
}
<I><FONT color=#000080>//</FONT></I>
<I><FONT color=#000080>// alternative method:</FONT></I>
<I><FONT color=#000080>// test the array-literal constructor, and split out the whole</FONT></I>
<I><FONT color=#000080>// match as well as $1....</FONT></I>
<I><FONT color=#000080>//</FONT></I>
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
{
std::cout &lt;&lt; <FONT color=#0000ff>"Findings URL's in "</FONT> &lt;&lt; argv[i] &lt;&lt; <FONT color=#0000ff>":"</FONT> &lt;&lt; std::endl;
s.erase();
std::ifstream is(argv[i]);
load_file(s, is);
<B>const</B> <B>int</B> subs[] = {<FONT color=#0000a0>1</FONT>, <FONT color=#0000a0>0</FONT>,};
boost::regex_token_iterator&lt;std::string::const_iterator&gt;
i(s.begin(), s.end(), e, subs);
boost::regex_token_iterator&lt;std::string::const_iterator&gt; j;
<B>while</B>(i != j)
{
std::cout &lt;&lt; *i++ &lt;&lt; std::endl;
}
}
<B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
<P>
<HR>
<P></P>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
11 April 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
</p>
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a>&nbsp;1998-<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></I></P>
<P align="left"><I>Permission to use, copy, modify, distribute and sell this software
and its documentation for any purpose is hereby granted without fee, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting documentation.
Dr John Maddock makes no representations about the suitability of this software
for any purpose. It is provided "as is" without express or implied warranty.</I></P>
</body>
</html>

80
doc/standards.html Normal file
View File

@ -0,0 +1,80 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Boost.Regex: Standards Conformance</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" type="text/css" href="../../../boost.css">
</head>
<body>
<P>
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
<TR>
<td valign="top" width="300">
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
</td>
<TD width="353">
<H1 align="center">Boost.Regex</H1>
<H2 align="center">Standards Conformance</H2>
</TD>
<td width="50">
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
</td>
</TR>
</TABLE>
</P>
<HR>
<p></p>
<H3>C++</H3>
<P>Boost.regex is intended to conform to the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">
regular expression standardisation proposal</A>, which will appear in a
future C++ standard technical report (and hopefully in a future version of the
standard).&nbsp; Currently there are some differences in how the regular
expression traits classes are defined, these will be fixed in a future release.</P>
<H3>ECMAScript / JavaScript</H3>
<P>All of the ECMAScript regular expression syntax features are supported, except
that:</P>
<P>Negated class escapes (\S, \D and \W) are not permitted inside character class
definitions ( [...] ).</P>
<P>The escape sequence \u matches any upper case character (the same as
[[:upper:]])&nbsp;rather than a unicode escape sequence; use \x{DDDD} for
unicode escape sequences.</P>
<H3>Perl</H3>
<P>Almost all perl features are supported, except for:</P>
<P>\N{name}&nbsp; Use [[:name:]] instead.</P>
<P>\pP and \PP</P>
<P>(?imsx-imsx)</P>
<P>(?&lt;=pattern)</P>
<P>(?&lt;!pattern)</P>
<P>(?{code})</P>
<P>(??{code})</P>
<P>(?(condition)yes-pattern) and (?(condition)yes-pattern|no-pattern)</P>
<P>These embarressments / limitations will be removed in due course, mainly
dependent upon user demand.</P>
<H3>POSIX</H3>
<P>All the POSIX basic and extended regular expression features are supported,
except that:</P>
<P>No character collating names are recognised except those specified in the POSIX
standard for the C locale, unless they are explicitly registered with the
traits class.</P>
<P>Character equivalence classes ( [[=a=]] etc) are probably buggy except on
Win32.&nbsp; Implimenting this feature requires knowledge of the format of the
string sort keys produced by the system; if you need this, and the default
implementation doesn't work on your platfrom, then you will need to supply a
custom traits class.</P>
<P>
<HR>
<P></P>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
11 April 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
</p>
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a>&nbsp;1998-<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></I></P>
<P align="left"><I>Permission to use, copy, modify, distribute and sell this software
and its documentation for any purpose is hereby granted without fee, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting documentation.
Dr John Maddock makes no representations about the suitability of this software
for any purpose. It is provided "as is" without express or implied warranty.</I></P>
</body>
</html>

View File

@ -0,0 +1,115 @@
/*
*
* Copyright (c) 2003
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE regex_iterator_example_2.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: regex_iterator example 2: searches a cpp file for class definitions,
* using global data.
*/
#include <string>
#include <map>
#include <fstream>
#include <iostream>
#include <boost/regex.hpp>
using namespace std;
// purpose:
// takes the contents of a file in the form of a string
// and searches for all the C++ class definitions, storing
// their locations in a map of strings/int's
typedef std::map<std::string, std::string::difference_type, std::less<std::string> > map_type;
const char* re =
// possibly leading whitespace:
"^[[:space:]]*"
// possible template declaration:
"(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
// class or struct:
"(class|struct)[[:space:]]*"
// leading declspec macros etc:
"("
"\\<\\w+\\>"
"("
"[[:blank:]]*\\([^)]*\\)"
")?"
"[[:space:]]*"
")*"
// the class name
"(\\<\\w*\\>)[[:space:]]*"
// template specialisation parameters
"(<[^;:{]+>)?[[:space:]]*"
// terminate in { or :
"(\\{|:[^;\\{()]*\\{)";
boost::regex expression(re);
map_type class_index;
bool regex_callback(const boost::match_results<std::string::const_iterator>& what)
{
// what[0] contains the whole string
// what[5] contains the class name.
// what[6] contains the template specialisation if any.
// add class name and position to map:
class_index[what[5].str() + what[6].str()] = what.position(5);
return true;
}
void load_file(std::string& s, std::istream& is)
{
s.erase();
s.reserve(is.rdbuf()->in_avail());
char c;
while(is.get(c))
{
if(s.capacity() == s.size())
s.reserve(s.capacity() * 3);
s.append(1, c);
}
}
int main(int argc, const char** argv)
{
std::string text;
for(int i = 1; i < argc; ++i)
{
cout << "Processing file " << argv[i] << endl;
std::ifstream fs(argv[i]);
load_file(text, fs);
// construct our iterators:
boost::regex_iterator<std::string::const_iterator> m1(text.begin(), text.end(), expression);
boost::regex_iterator<std::string::const_iterator> m2;
std::for_each(m1, m2, &regex_callback);
// copy results:
cout << class_index.size() << " matches found" << endl;
map_type::iterator c, d;
c = class_index.begin();
d = class_index.end();
while(c != d)
{
cout << "class \"" << (*c).first << "\" found at index: " << (*c).second << endl;
++c;
}
class_index.erase(class_index.begin(), class_index.end());
}
return 0;
}

View File

@ -73,12 +73,12 @@ int main(int argc, const char** argv)
// temporary string stream
std::ostringstream t(std::ios::out | std::ios::binary);
std::ostream_iterator<char> oi(t);
boost::regex_merge(oi, in.begin(), in.end(), e2, pre_format);
boost::regex_merge(oi, in.begin(), in.end(), e2, pre_format, boost::match_default | boost::format_all);
// then output to final output stream
// adding syntax highlighting:
std::string s(t.str());
std::ostream_iterator<char> out(os);
boost::regex_merge(out, s.begin(), s.end(), e1, format_string);
boost::regex_merge(out, s.begin(), s.end(), e1, format_string, boost::match_default | boost::format_all);
os << footer_text;
}
}

View File

@ -73,12 +73,12 @@ int main(int argc, const char** argv)
// temporary string stream
std::ostringstream t(std::ios::out | std::ios::binary);
std::ostream_iterator<char> oi(t);
boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format);
boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format, boost::match_default | boost::format_all);
// then output to final output stream
// adding syntax highlighting:
std::string s(t.str());
std::ostream_iterator<char> out(os);
boost::regex_replace(out, s.begin(), s.end(), e1, format_string);
boost::regex_replace(out, s.begin(), s.end(), e1, format_string, boost::match_default | boost::format_all);
os << footer_text;
}
}
@ -135,3 +135,4 @@ const char* footer_text = "</PRE>\n</BODY>\n\n";

View File

@ -0,0 +1,75 @@
/*
*
* Copyright (c) 12003
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE regex_token_iterator_example_1.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: regex_token_iterator example: split a string into tokens.
*/
#include <boost/regex.hpp>
#include <iostream>
using namespace std;
#if defined(BOOST_MSVC) || (defined(__BORLANDC__) && (__BORLANDC__ == 0x550))
//
// problem with std::getline under MSVC6sp3
istream& getline(istream& is, std::string& s)
{
s.erase();
char c = is.get();
while(c != '\n')
{
s.append(1, c);
c = is.get();
}
return is;
}
#endif
int main(int argc)
{
string s;
do{
if(argc == 1)
{
cout << "Enter text to split (or \"quit\" to exit): ";
getline(cin, s);
if(s == "quit") break;
}
else
s = "This is a string of tokens";
boost::regex re("\\s+");
boost::regex_token_iterator<std::string::const_iterator> i(s.begin(), s.end(), re, -1);
boost::regex_token_iterator<std::string::const_iterator> j;
unsigned count = 0;
while(i != j)
{
cout << *i++ << endl;
count++;
}
cout << "There were " << count << " tokens found." << endl;
}while(argc == 1);
return 0;
}

View File

@ -0,0 +1,92 @@
/*
*
* Copyright (c) 2003
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE regex_token_iterator_example_2.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: regex_token_iterator example: spit out linked URL's.
*/
#include <fstream>
#include <iostream>
#include <iterator>
#include <boost/regex.hpp>
boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
boost::regex::normal | boost::regbase::icase);
void load_file(std::string& s, std::istream& is)
{
s.erase();
//
// attempt to grow string buffer to match file size,
// this doesn't always work...
s.reserve(is.rdbuf()->in_avail());
char c;
while(is.get(c))
{
// use logarithmic growth stategy, in case
// in_avail (above) returned zero:
if(s.capacity() == s.size())
s.reserve(s.capacity() * 3);
s.append(1, c);
}
}
int main(int argc, char** argv)
{
std::string s;
int i;
for(i = 1; i < argc; ++i)
{
std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
s.erase();
std::ifstream is(argv[i]);
load_file(s, is);
boost::regex_token_iterator<std::string::const_iterator>
i(s.begin(), s.end(), e, 1);
boost::regex_token_iterator<std::string::const_iterator> j;
while(i != j)
{
std::cout << *i++ << std::endl;
}
}
//
// alternative method:
// test the array-literal constructor, and split out the whole
// match as well as $1....
//
for(i = 1; i < argc; ++i)
{
std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
s.erase();
std::ifstream is(argv[i]);
load_file(s, is);
const int subs[] = {1, 0,};
boost::regex_token_iterator<std::string::const_iterator>
i(s.begin(), s.end(), e, subs);
boost::regex_token_iterator<std::string::const_iterator> j;
while(i != j)
{
std::cout << *i++ << std::endl;
}
}
return 0;
}

View File

@ -103,6 +103,15 @@ istream& getline(istream& is, std::string& s)
return is;
}
#endif
#if defined(__GNUC__) && (__GNUC__ == 3)
istream& getline(istream& is, std::string& s)
{
std::getline(is, s);
if(s.size() && (s[s.size() -1] == '\r'))
s.erase(s.size() - 1);
return is;
}
#endif
int main(int argc, char**argv)

View File

@ -57,7 +57,7 @@ double time_match(const std::string& re, const std::string& text, bool icase)
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what));
erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what)/sizeof(int));
}
result = tim.elapsed();
iter *= 2;
@ -70,7 +70,7 @@ double time_match(const std::string& re, const std::string& text, bool icase)
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what));
erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what)/sizeof(int));
}
run = tim.elapsed();
result = std::min(run, result);
@ -119,12 +119,12 @@ double time_find_all(const std::string& re, const std::string& text, bool icase)
{
matches = 0;
startoff = 0;
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)/sizeof(int));
while(exec_result >= 0)
{
++matches;
startoff = what[1];
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)/sizeof(int));
}
}
result = tim.elapsed();
@ -147,12 +147,12 @@ double time_find_all(const std::string& re, const std::string& text, bool icase)
{
matches = 0;
startoff = 0;
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)/sizeof(int));
while(exec_result >= 0)
{
++matches;
startoff = what[1];
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)/sizeof(int));
}
}
run = tim.elapsed();