forked from boostorg/regex
150 lines
5.8 KiB
Plaintext
150 lines
5.8 KiB
Plaintext
[/
|
|
Copyright 2006-2007 John Maddock.
|
|
Distributed under the Boost Software License, Version 1.0.
|
|
(See accompanying file LICENSE_1_0.txt or copy at
|
|
http://www.boost.org/LICENSE_1_0.txt).
|
|
]
|
|
|
|
[section:partial_matches Partial Matches]
|
|
|
|
The [match_flag_type] `match_partial` can be passed to the following algorithms:
|
|
[regex_match], [regex_search], and [regex_grep], and used with the
|
|
iterator [regex_iterator]. When used it indicates that partial as
|
|
well as full matches should be found. A partial match is one that
|
|
matched one or more characters at the end of the text input, but
|
|
did not match all of the regular expression (although it may have done
|
|
so had more input been available). Partial matches are typically used
|
|
when either validating data input (checking each character as it is
|
|
entered on the keyboard), or when searching texts that are either too long
|
|
to load into memory (or even into a memory mapped file), or are of
|
|
indeterminate length (for example the source may be a socket or similar).
|
|
Partial and full matches can be differentiated as shown in the following
|
|
table (the variable M represents an instance of [match_results] as filled in
|
|
by [regex_match], [regex_search] or [regex_grep]):
|
|
|
|
[table
|
|
[[ ][Result][M\[0\].matched][M\[0\].first][M\[0\].second]]
|
|
[[No match][False][Undefined][Undefined][Undefined]]
|
|
[[Partial match][True][False][Start of partial match.][End of partial match (end of text).]]
|
|
[[Full match][True][True][Start of full match.][End of full match.]]
|
|
]
|
|
|
|
Be aware that using partial matches can sometimes result in somewhat
|
|
imperfect behavior:
|
|
|
|
* There are some expressions, such as ".\*abc" that will always produce a partial match. This problem can be reduced by careful construction of the regular expressions used, or by setting flags like match_not_dot_newline so that expressions like .\* can't match past line boundaries.
|
|
* Boost.Regex currently prefers leftmost matches to full matches, so for example matching "abc|b" against "ab" produces a partial match against the "ab" rather than a full match against "b". It's more efficient to work this way, but may not be the behavior you want in all situations.
|
|
|
|
The following example tests to see whether the text could be a valid
|
|
credit card number, as the user presses a key, the character entered
|
|
would be added to the string being built up, and passed to `is_possible_card_number`.
|
|
If this returns true then the text could be a valid card number, so the
|
|
user interface's OK button would be enabled. If it returns false, then
|
|
this is not yet a valid card number, but could be with more input, so
|
|
the user interface would disable the OK button. Finally, if the procedure
|
|
throws an exception the input could never become a valid number, and the
|
|
inputted character must be discarded, and a suitable error indication
|
|
displayed to the user.
|
|
|
|
#include <string>
|
|
#include <iostream>
|
|
#include <boost/regex.hpp>
|
|
|
|
boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
|
|
|
|
bool is_possible_card_number(const std::string& input)
|
|
{
|
|
//
|
|
// return false for partial match, true for full match, or throw for
|
|
// impossible match based on what we have so far...
|
|
boost::match_results<std::string::const_iterator> what;
|
|
if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
|
|
{
|
|
// the input so far could not possibly be valid so reject it:
|
|
throw std::runtime_error(
|
|
"Invalid data entered - this could not possibly be a valid card number");
|
|
}
|
|
// OK so far so good, but have we finished?
|
|
if(what[0].matched)
|
|
{
|
|
// excellent, we have a result:
|
|
return true;
|
|
}
|
|
// what we have so far is only a partial match...
|
|
return false;
|
|
}
|
|
|
|
In the following example, text input is taken from a stream containing an
|
|
unknown amount of text; this example simply counts the number of html tags
|
|
encountered in the stream. The text is loaded into a buffer and searched a
|
|
part at a time, if a partial match was encountered, then the partial match
|
|
gets searched a second time as the start of the next batch of text:
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <boost/regex.hpp>
|
|
|
|
// match some kind of html tag:
|
|
boost::regex e("<[^>]*>");
|
|
// count how many:
|
|
unsigned int tags = 0;
|
|
|
|
void search(std::istream& is)
|
|
{
|
|
// buffer we'll be searching in:
|
|
char buf[4096];
|
|
// saved position of end of partial match:
|
|
const char* next_pos = buf + sizeof(buf);
|
|
// flag to indicate whether there is more input to come:
|
|
bool have_more = true;
|
|
|
|
while(have_more)
|
|
{
|
|
// how much do we copy forward from last try:
|
|
unsigned leftover = (buf + sizeof(buf)) - next_pos;
|
|
// and how much is left to fill:
|
|
unsigned size = next_pos - buf;
|
|
// copy forward whatever we have left:
|
|
std::memmove(buf, next_pos, leftover);
|
|
// fill the rest from the stream:
|
|
is.read(buf + leftover, size);
|
|
unsigned read = is.gcount();
|
|
// check to see if we've run out of text:
|
|
have_more = read == size;
|
|
// reset next_pos:
|
|
next_pos = buf + sizeof(buf);
|
|
// and then iterate:
|
|
boost::cregex_iterator a(
|
|
buf,
|
|
buf + read + leftover,
|
|
e,
|
|
boost::match_default | boost::match_partial);
|
|
boost::cregex_iterator b;
|
|
|
|
while(a != b)
|
|
{
|
|
if((*a)[0].matched == false)
|
|
{
|
|
// Partial match, save position and break:
|
|
next_pos = (*a)[0].first;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
// full match:
|
|
++tags;
|
|
}
|
|
|
|
// move to next match:
|
|
++a;
|
|
}
|
|
}
|
|
}
|
|
|
|
[endsect]
|
|
|
|
|
|
|