mirror of
https://github.com/boostorg/regex.git
synced 2025-07-16 05:42:15 +02:00
196 lines
8.9 KiB
HTML
196 lines
8.9 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||
<html>
|
||
<head>
|
||
<title>Boost.Regex: Partial Matches</title>
|
||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||
</head>
|
||
<body>
|
||
<P>
|
||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||
<TR>
|
||
<td valign="top" width="300">
|
||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||
</td>
|
||
<TD width="353">
|
||
<H1 align="center">Boost.Regex</H1>
|
||
<H2 align="center">Partial Matches</H2>
|
||
</TD>
|
||
<td width="50">
|
||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||
</td>
|
||
</TR>
|
||
</TABLE>
|
||
</P>
|
||
<HR>
|
||
<p></p>
|
||
<P>The <A href="match_flag_type.html">match-flag</A> <CODE>match_partial</CODE> can
|
||
be passed to the following algorithms: <A href="regex_match.html">regex_match</A>,
|
||
<A href="regex_search.html">regex_search</A>, and <A href="regex_grep.html">regex_grep</A>,
|
||
and used with the iterator <A href="regex_iterator.html">regex_iterator</A>.
|
||
When used it indicates that partial as well as full matches should be found. A
|
||
partial match is one that matched one or more characters at the end of the text
|
||
input, but did not match all of the regular expression (although it may have
|
||
done so had more input been available). Partial matches are typically used when
|
||
either validating data input (checking each character as it is entered on the
|
||
keyboard), or when searching texts that are either too long to load into memory
|
||
(or even into a memory mapped file), or are of indeterminate length (for
|
||
example the source may be a socket or similar). Partial and full matches can be
|
||
differentiated as shown in the following table (the variable M represents an
|
||
instance of <A href="match_results.html">match_results<></A> as filled in
|
||
by regex_match, regex_search or regex_grep):<BR>
|
||
</P>
|
||
<P>
|
||
<TABLE id="Table2" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||
<TR>
|
||
<TD vAlign="top" width="20%"> </TD>
|
||
<TD vAlign="top" width="20%">Result</TD>
|
||
<TD vAlign="top" width="20%">M[0].matched</TD>
|
||
<TD vAlign="top" width="20%">M[0].first</TD>
|
||
<TD vAlign="top" width="20%">M[0].second</TD>
|
||
</TR>
|
||
<TR>
|
||
<TD vAlign="top" width="20%">No match</TD>
|
||
<TD vAlign="top" width="20%">False</TD>
|
||
<TD vAlign="top" width="20%">Undefined</TD>
|
||
<TD vAlign="top" width="20%">Undefined</TD>
|
||
<TD vAlign="top" width="20%">Undefined</TD>
|
||
</TR>
|
||
<TR>
|
||
<TD vAlign="top" width="20%">Partial match</TD>
|
||
<TD vAlign="top" width="20%">True</TD>
|
||
<TD vAlign="top" width="20%">False</TD>
|
||
<TD vAlign="top" width="20%">Start of partial match.</TD>
|
||
<TD vAlign="top" width="20%">End of partial match (end of text).</TD>
|
||
</TR>
|
||
<TR>
|
||
<TD vAlign="top" width="20%">Full match</TD>
|
||
<TD vAlign="top" width="20%">True</TD>
|
||
<TD vAlign="top" width="20%">True</TD>
|
||
<TD vAlign="top" width="20%">Start of full match.</TD>
|
||
<TD vAlign="top" width="20%">End of full match.</TD>
|
||
</TR>
|
||
</TABLE>
|
||
</P>
|
||
<P>Be aware that using partial matches can sometimes result in somewhat imperfect
|
||
behavior:</P>
|
||
<UL>
|
||
<LI>
|
||
There are some expressions, such as ".*abc" that will always produce a partial
|
||
match. This problem can be reduced by careful construction of the regular
|
||
expressions used, or by setting flags like match_not_dot_newline so that
|
||
expressions like .* can't match past line boundaries.</LI>
|
||
<LI>
|
||
Boost.Regex currently prefers leftmost matches to full matches, so for example
|
||
matching "abc|b" against "ab" produces a partial match against the "ab"
|
||
rather than a full match against "b". It's more efficient to work this
|
||
way, but may not be the behavior you want in all situations.</LI></UL>
|
||
<P>The following <A href="../example/snippets/partial_regex_match.cpp">example</A>
|
||
tests to see whether the text could be a valid credit card number, as the user
|
||
presses a key, the character entered would be added to the string being built
|
||
up, and passed to <CODE>is_possible_card_number</CODE>. If this returns true
|
||
then the text could be a valid card number, so the user interface's OK button
|
||
would be enabled. If it returns false, then this is not yet a valid card
|
||
number, but could be with more input, so the user interface would disable the
|
||
OK button. Finally, if the procedure throws an exception the input could never
|
||
become a valid number, and the inputted character must be discarded, and a
|
||
suitable error indication displayed to the user.</P>
|
||
<PRE>#include <string>
|
||
#include <iostream>
|
||
#include <boost/regex.hpp>
|
||
|
||
boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
|
||
|
||
bool is_possible_card_number(const std::string& input)
|
||
{
|
||
//
|
||
// return false for partial match, true for full match, or throw for
|
||
// impossible match based on what we have so far...
|
||
boost::match_results<std::string::const_iterator> what;
|
||
if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
|
||
{
|
||
// the input so far could not possibly be valid so reject it:
|
||
throw std::runtime_error("Invalid data entered - this could not possibly be a valid card number");
|
||
}
|
||
// OK so far so good, but have we finished?
|
||
if(what[0].matched)
|
||
{
|
||
// excellent, we have a result:
|
||
return true;
|
||
}
|
||
// what we have so far is only a partial match...
|
||
return false;
|
||
}</PRE>
|
||
<P>In the following <A href="../example/snippets/partial_regex_grep.cpp">example</A>,
|
||
text input is taken from a stream containing an unknown amount of text; this
|
||
example simply counts the number of html tags encountered in the stream. The
|
||
text is loaded into a buffer and searched a part at a time, if a partial match
|
||
was encountered, then the partial match gets searched a second time as the
|
||
start of the next batch of text:</P>
|
||
<PRE>#include <iostream>
|
||
#include <fstream>
|
||
#include <sstream>
|
||
#include <string>
|
||
#include <boost/regex.hpp>
|
||
|
||
// match some kind of html tag:
|
||
boost::regex e("<[^>]*>");
|
||
// count how many:
|
||
unsigned int tags = 0;
|
||
// saved position of partial match:
|
||
char* next_pos = 0;
|
||
|
||
bool grep_callback(const boost::match_results<char*>& m)
|
||
{
|
||
if(m[0].matched == false)
|
||
{
|
||
// save position and return:
|
||
next_pos = m[0].first;
|
||
}
|
||
else
|
||
++tags;
|
||
return true;
|
||
}
|
||
|
||
void search(std::istream& is)
|
||
{
|
||
char buf[4096];
|
||
next_pos = buf + sizeof(buf);
|
||
bool have_more = true;
|
||
while(have_more)
|
||
{
|
||
// how much do we copy forward from last try:
|
||
unsigned leftover = (buf + sizeof(buf)) - next_pos;
|
||
// and how much is left to fill:
|
||
unsigned size = next_pos - buf;
|
||
// copy forward whatever we have left:
|
||
memcpy(buf, next_pos, leftover);
|
||
// fill the rest from the stream:
|
||
unsigned read = is.readsome(buf + leftover, size);
|
||
// check to see if we've run out of text:
|
||
have_more = read == size;
|
||
// reset next_pos:
|
||
next_pos = buf + sizeof(buf);
|
||
// and then grep:
|
||
boost::regex_grep(grep_callback,
|
||
buf,
|
||
buf + read + leftover,
|
||
e,
|
||
boost::match_default | boost::match_partial);
|
||
}
|
||
}</PRE>
|
||
<P>
|
||
<HR>
|
||
<P></P>
|
||
<p>Revised
|
||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||
24 Oct 2003
|
||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||
</body>
|
||
</html>
|