mirror of
https://github.com/boostorg/regex.git
synced 2025-07-29 20:17:24 +02:00
Initial commit of quickbook conversion of docs.
[SVN r37942]
This commit is contained in:
141
doc/partial_matches.qbk
Normal file
141
doc/partial_matches.qbk
Normal file
@ -0,0 +1,141 @@
|
||||
[section:partial_matches Partial Matches]
|
||||
|
||||
The [match_flag_type] `match_partial` can be passed to the following algorithms:
|
||||
[regex_match], [regex_search], and [regex_grep], and used with the
|
||||
iterator [regex_iterator]. When used it indicates that partial as
|
||||
well as full matches should be found. A partial match is one that
|
||||
matched one or more characters at the end of the text input, but
|
||||
did not match all of the regular expression (although it may have done
|
||||
so had more input been available). Partial matches are typically used
|
||||
when either validating data input (checking each character as it is
|
||||
entered on the keyboard), or when searching texts that are either too long
|
||||
to load into memory (or even into a memory mapped file), or are of
|
||||
indeterminate length (for example the source may be a socket or similar).
|
||||
Partial and full matches can be differentiated as shown in the following
|
||||
table (the variable M represents an instance of [match_results] as filled in
|
||||
by [regex_match], [regex_search] or [regex_grep]):
|
||||
|
||||
[table
|
||||
[[ ][Result][M\[0\].matched][M\[0\].first][M\[0\].second]]
|
||||
[[No match][False][Undefined][Undefined][Undefined]]
|
||||
[[Partial match][True][False][Start of partial match.][End of partial match (end of text).]]
|
||||
[[Full match][True][True][Start of full match.][End of full match.]]
|
||||
]
|
||||
|
||||
Be aware that using partial matches can sometimes result in somewhat
|
||||
imperfect behavior:
|
||||
|
||||
* There are some expressions, such as ".\*abc" that will always produce a partial match. This problem can be reduced by careful construction of the regular expressions used, or by setting flags like match_not_dot_newline so that expressions like .\* can't match past line boundaries.
|
||||
* Boost.Regex currently prefers leftmost matches to full matches, so for example matching "abc|b" against "ab" produces a partial match against the "ab" rather than a full match against "b". It's more efficient to work this way, but may not be the behavior you want in all situations.
|
||||
|
||||
The following example tests to see whether the text could be a valid
|
||||
credit card number, as the user presses a key, the character entered
|
||||
would be added to the string being built up, and passed to `is_possible_card_number`.
|
||||
If this returns true then the text could be a valid card number, so the
|
||||
user interface's OK button would be enabled. If it returns false, then
|
||||
this is not yet a valid card number, but could be with more input, so
|
||||
the user interface would disable the OK button. Finally, if the procedure
|
||||
throws an exception the input could never become a valid number, and the
|
||||
inputted character must be discarded, and a suitable error indication
|
||||
displayed to the user.
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
|
||||
|
||||
bool is_possible_card_number(const std::string& input)
|
||||
{
|
||||
//
|
||||
// return false for partial match, true for full match, or throw for
|
||||
// impossible match based on what we have so far...
|
||||
boost::match_results<std::string::const_iterator> what;
|
||||
if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
|
||||
{
|
||||
// the input so far could not possibly be valid so reject it:
|
||||
throw std::runtime_error(
|
||||
"Invalid data entered - this could not possibly be a valid card number");
|
||||
}
|
||||
// OK so far so good, but have we finished?
|
||||
if(what[0].matched)
|
||||
{
|
||||
// excellent, we have a result:
|
||||
return true;
|
||||
}
|
||||
// what we have so far is only a partial match...
|
||||
return false;
|
||||
}
|
||||
|
||||
In the following example, text input is taken from a stream containing an
|
||||
unknown amount of text; this example simply counts the number of html tags
|
||||
encountered in the stream. The text is loaded into a buffer and searched a
|
||||
part at a time, if a partial match was encountered, then the partial match
|
||||
gets searched a second time as the start of the next batch of text:
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
// match some kind of html tag:
|
||||
boost::regex e("<[^>]*>");
|
||||
// count how many:
|
||||
unsigned int tags = 0;
|
||||
|
||||
void search(std::istream& is)
|
||||
{
|
||||
// buffer we'll be searching in:
|
||||
char buf[4096];
|
||||
// saved position of end of partial match:
|
||||
const char* next_pos = buf + sizeof(buf);
|
||||
// flag to indicate whether there is more input to come:
|
||||
bool have_more = true;
|
||||
|
||||
while(have_more)
|
||||
{
|
||||
// how much do we copy forward from last try:
|
||||
unsigned leftover = (buf + sizeof(buf)) - next_pos;
|
||||
// and how much is left to fill:
|
||||
unsigned size = next_pos - buf;
|
||||
// copy forward whatever we have left:
|
||||
std::memmove(buf, next_pos, leftover);
|
||||
// fill the rest from the stream:
|
||||
is.read(buf + leftover, size);
|
||||
unsigned read = is.gcount();
|
||||
// check to see if we've run out of text:
|
||||
have_more = read == size;
|
||||
// reset next_pos:
|
||||
next_pos = buf + sizeof(buf);
|
||||
// and then iterate:
|
||||
boost::cregex_iterator a(
|
||||
buf,
|
||||
buf + read + leftover,
|
||||
e,
|
||||
boost::match_default | boost::match_partial);
|
||||
boost::cregex_iterator b;
|
||||
|
||||
while(a != b)
|
||||
{
|
||||
if((*a)[0].matched == false)
|
||||
{
|
||||
// Partial match, save position and break:
|
||||
next_pos = (*a)[0].first;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// full match:
|
||||
++tags;
|
||||
}
|
||||
|
||||
// move to next match:
|
||||
++a;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[endsect]
|
||||
|
||||
|
Reference in New Issue
Block a user