mirror of
https://github.com/boostorg/regex.git
synced 2025-07-06 09:06:29 +02:00
Updated regex code with tentative support for partial matches
[SVN r8160]
This commit is contained in:
@ -77,6 +77,7 @@ flag_info flag_data[] = {
|
|||||||
{ BOOST_RE_STR("match_any"), 9, match_any, 3 },
|
{ BOOST_RE_STR("match_any"), 9, match_any, 3 },
|
||||||
{ BOOST_RE_STR("match_not_null"), 14, match_not_null, 3 },
|
{ BOOST_RE_STR("match_not_null"), 14, match_not_null, 3 },
|
||||||
{ BOOST_RE_STR("match_continuous"), 16, match_continuous, 3 },
|
{ BOOST_RE_STR("match_continuous"), 16, match_continuous, 3 },
|
||||||
|
{ BOOST_RE_STR("match_partial"), 13, match_partial, 3 },
|
||||||
|
|
||||||
{ BOOST_RE_STR("format_sed"), 10, format_sed, 3 },
|
{ BOOST_RE_STR("format_sed"), 10, format_sed, 3 },
|
||||||
{ BOOST_RE_STR("format_perl"), 11, format_perl, 3 },
|
{ BOOST_RE_STR("format_perl"), 11, format_perl, 3 },
|
||||||
|
@ -30,3 +30,5 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -236,7 +236,7 @@ void cpp_tests(const reg_expression<C, T, A>& e, bool recurse = true)
|
|||||||
begin_error();
|
begin_error();
|
||||||
cout << "Expression did not compile using regex++ API" << endl;
|
cout << "Expression did not compile using regex++ API" << endl;
|
||||||
}
|
}
|
||||||
else if(recurse)
|
else if((recurse) && ((flags[3] & match_partial) == 0))
|
||||||
cpp_eh_tests(e);
|
cpp_eh_tests(e);
|
||||||
}
|
}
|
||||||
else if(flags[4] & REG_GREP)
|
else if(flags[4] & REG_GREP)
|
||||||
@ -299,7 +299,7 @@ void cpp_tests(const reg_expression<C, T, A>& e, bool recurse = true)
|
|||||||
(m[-1].first - x) << "," << (m[-1].second - x) << ") expected (0" <<
|
(m[-1].first - x) << "," << (m[-1].second - x) << ") expected (0" <<
|
||||||
"," << matches[0] << ")" << endl;
|
"," << matches[0] << ")" << endl;
|
||||||
}
|
}
|
||||||
if((m[-2].first != m[0].second) || (m[-2].second != y))
|
if(((m[-2].first != m[0].second) || (m[-2].second != y)) && ((flags[3] & match_partial) == 0))
|
||||||
{
|
{
|
||||||
begin_error();
|
begin_error();
|
||||||
cout << "regex++ API result mismatch in $' (match -2), found (" <<
|
cout << "regex++ API result mismatch in $' (match -2), found (" <<
|
||||||
@ -327,6 +327,26 @@ void cpp_tests(const reg_expression<C, T, A>& e, bool recurse = true)
|
|||||||
begin_error();
|
begin_error();
|
||||||
cout << "regex++ API result mismatch in regex_search(const std::string&, match_results&, const reg_expression&, int)" << endl;
|
cout << "regex++ API result mismatch in regex_search(const std::string&, match_results&, const reg_expression&, int)" << endl;
|
||||||
}
|
}
|
||||||
|
//
|
||||||
|
// partial match should give same result as full match
|
||||||
|
// provided a full match is expected:
|
||||||
|
//
|
||||||
|
if(matches[0] > 0)
|
||||||
|
{
|
||||||
|
if(regex_search(x, y, m, e, flags[3] | boost::match_partial))
|
||||||
|
{
|
||||||
|
if(compare_result(sm, m) == false)
|
||||||
|
{
|
||||||
|
begin_error();
|
||||||
|
cout << "regex++ API result mismatch in regex_search when enabling match_partial" << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
begin_error();
|
||||||
|
cout << "regex++ API result: match not found when match_partial specified" << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
if(s.find(char_t(0)) == std::basic_string<char_t>::npos)
|
if(s.find(char_t(0)) == std::basic_string<char_t>::npos)
|
||||||
{
|
{
|
||||||
match_results<const char_t*> ssm;
|
match_results<const char_t*> ssm;
|
||||||
@ -615,7 +635,7 @@ void run_tests()
|
|||||||
#if !defined(TEST_UNICODE)
|
#if !defined(TEST_UNICODE)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if((flags[2] == regbase::normal) && (has_nulls(search_text.begin(), search_text.end()) == false))
|
if(((flags[3] & match_partial) == 0) && (flags[2] == regbase::normal) && (has_nulls(search_text.begin(), search_text.end()) == false))
|
||||||
{
|
{
|
||||||
RegEx e;
|
RegEx e;
|
||||||
e.SetExpression(expression.c_str(), flags[0] & REG_ICASE);
|
e.SetExpression(expression.c_str(), flags[0] & REG_ICASE);
|
||||||
|
@ -850,6 +850,12 @@ abc|\w+? abcd 0 3
|
|||||||
(?:a+(b+)) xaaabbba 1 7 4 7
|
(?:a+(b+)) xaaabbba 1 7 4 7
|
||||||
a+(?#b+)b+ xaaabbba 1 7
|
a+(?#b+)b+ xaaabbba 1 7
|
||||||
|
|
||||||
|
;
|
||||||
|
; try some partial matches:
|
||||||
|
- match_partial match_default normal REG_EXTENDED REG_NO_POSIX_TEST
|
||||||
|
(xyz)(.*)abc xyzaaab -1 -1 0 3 3 7
|
||||||
|
(xyz)(.*)abc xyz -1 -1 0 3 3 3
|
||||||
|
(xyz)(.*)abc xy -1 -1 -1 -1 -1 -1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -171,7 +171,9 @@ enum match_flags
|
|||||||
match_not_null = match_any << 1, // string can't be null
|
match_not_null = match_any << 1, // string can't be null
|
||||||
match_continuous = match_not_null << 1, // each grep match must continue from
|
match_continuous = match_not_null << 1, // each grep match must continue from
|
||||||
// uninterupted from the previous one
|
// uninterupted from the previous one
|
||||||
match_stop = match_continuous << 1, // stop after first match (grep)
|
match_partial = match_continuous << 1, // find partial matches
|
||||||
|
|
||||||
|
match_stop = match_partial << 1, // stop after first match (grep)
|
||||||
match_max = match_stop
|
match_max = match_stop
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -263,7 +263,7 @@ bool query_match_aux(iterator first,
|
|||||||
iterator last,
|
iterator last,
|
||||||
match_results<iterator, Allocator>& m,
|
match_results<iterator, Allocator>& m,
|
||||||
const reg_expression<charT, traits, Allocator2>& e,
|
const reg_expression<charT, traits, Allocator2>& e,
|
||||||
unsigned flags,
|
unsigned flags,
|
||||||
_priv_match_data<iterator, Allocator>& pd,
|
_priv_match_data<iterator, Allocator>& pd,
|
||||||
iterator* restart)
|
iterator* restart)
|
||||||
{
|
{
|
||||||
@ -287,6 +287,7 @@ bool query_match_aux(iterator first,
|
|||||||
|
|
||||||
const re_syntax_base* ptr = access::first(e);
|
const re_syntax_base* ptr = access::first(e);
|
||||||
bool match_found = false;
|
bool match_found = false;
|
||||||
|
bool have_partial_match = false;
|
||||||
bool need_push_match = (e.mark_count() > 1);
|
bool need_push_match = (e.mark_count() > 1);
|
||||||
int cur_acc = -1; // no active accumulator
|
int cur_acc = -1; // no active accumulator
|
||||||
pd.set_accumulator_size(access::repeat_count(e));
|
pd.set_accumulator_size(access::repeat_count(e));
|
||||||
@ -748,7 +749,7 @@ bool query_match_aux(iterator first,
|
|||||||
//
|
//
|
||||||
// if we get to here then we've run out of characters to match against,
|
// if we get to here then we've run out of characters to match against,
|
||||||
// we could however still have non-character regex items left
|
// we could however still have non-character regex items left
|
||||||
if(ptr->can_be_null == 0)
|
if((ptr->can_be_null == 0) && ((flags & match_partial) == 0))
|
||||||
goto failure;
|
goto failure;
|
||||||
while(true)
|
while(true)
|
||||||
{
|
{
|
||||||
@ -838,7 +839,7 @@ bool query_match_aux(iterator first,
|
|||||||
|
|
||||||
// see if we can skip the repeat:
|
// see if we can skip the repeat:
|
||||||
if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min)
|
if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min)
|
||||||
&& (ptr->can_be_null & mask_skip))
|
&& ((ptr->can_be_null & mask_skip) || (flags & match_partial)))
|
||||||
{
|
{
|
||||||
// don't push failure info, there's no point:
|
// don't push failure info, there's no point:
|
||||||
ptr = ((re_repeat*)ptr)->alt.p;
|
ptr = ((re_repeat*)ptr)->alt.p;
|
||||||
@ -847,7 +848,7 @@ bool query_match_aux(iterator first,
|
|||||||
|
|
||||||
// otherwise see if we can take the repeat:
|
// otherwise see if we can take the repeat:
|
||||||
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max)
|
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max)
|
||||||
&& ((ptr->can_be_null & (mask_take | mask_skip)) == (mask_take | mask_skip)))
|
&& (((ptr->can_be_null & (mask_take | mask_skip)) == (mask_take | mask_skip))) || (flags & match_partial))
|
||||||
{
|
{
|
||||||
// move to next item in list:
|
// move to next item in list:
|
||||||
++accumulators[cur_acc];
|
++accumulators[cur_acc];
|
||||||
@ -870,6 +871,18 @@ bool query_match_aux(iterator first,
|
|||||||
|
|
||||||
failure:
|
failure:
|
||||||
|
|
||||||
|
//
|
||||||
|
// check for possible partial match:
|
||||||
|
//
|
||||||
|
if((flags & match_partial)
|
||||||
|
&& !match_found // no full match already
|
||||||
|
&& (base != first) // some charcters have been consumed
|
||||||
|
&& (first == last)) // end of input has been reached
|
||||||
|
{
|
||||||
|
have_partial_match = true;
|
||||||
|
m.maybe_assign(temp_match);
|
||||||
|
}
|
||||||
|
|
||||||
if(prev_record.empty() == false)
|
if(prev_record.empty() == false)
|
||||||
{
|
{
|
||||||
ptr = prev_record.peek();
|
ptr = prev_record.peek();
|
||||||
@ -931,7 +944,7 @@ bool query_match_aux(iterator first,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(match_found)
|
if(match_found || have_partial_match)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// if we get to here then everything has failed
|
// if we get to here then everything has failed
|
||||||
|
@ -898,7 +898,8 @@ public:
|
|||||||
|
|
||||||
size_type BOOST_RE_CALL size()const
|
size_type BOOST_RE_CALL size()const
|
||||||
{
|
{
|
||||||
return (*this)[0].matched ? ref->cmatches : 0;
|
//return (*this)[0].matched ? ref->cmatches : 0;
|
||||||
|
return ref->cmatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
const sub_match<iterator>& BOOST_RE_CALL operator[](int n) const
|
const sub_match<iterator>& BOOST_RE_CALL operator[](int n) const
|
||||||
|
@ -559,7 +559,7 @@ bool BOOST_RE_CALL c_regex_traits<wchar_t>::lookup_collatename(std::basic_string
|
|||||||
scoped_array<char> buf(new char[len]);
|
scoped_array<char> buf(new char[len]);
|
||||||
strnarrow(buf.get(), len, s.c_str());
|
strnarrow(buf.get(), len, s.c_str());
|
||||||
std::string t_out;
|
std::string t_out;
|
||||||
bool result = base_type::do_lookup_collate(t_out, buf.get());
|
bool result = do_lookup_collate(t_out, buf.get());
|
||||||
if(t_out.size() == 0) result = false;
|
if(t_out.size() == 0) result = false;
|
||||||
if(result)
|
if(result)
|
||||||
{
|
{
|
||||||
@ -990,7 +990,7 @@ jm_uintfast32_t BOOST_RE_CALL c_regex_traits<wchar_t>::lookup_classname(const wc
|
|||||||
unsigned int len = strnarrow((char*)NULL, 0, s.c_str());
|
unsigned int len = strnarrow((char*)NULL, 0, s.c_str());
|
||||||
scoped_array<char> buf(new char[len]);
|
scoped_array<char> buf(new char[len]);
|
||||||
strnarrow(buf.get(), len, s.c_str());
|
strnarrow(buf.get(), len, s.c_str());
|
||||||
len = base_type::do_lookup_class(buf.get());
|
len = do_lookup_class(buf.get());
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -761,7 +761,7 @@ jm_uintfast32_t BOOST_RE_CALL w32_regex_traits<wchar_t>::lookup_classname(const
|
|||||||
unsigned int len = strnarrow((char*)NULL, 0, s.c_str());
|
unsigned int len = strnarrow((char*)NULL, 0, s.c_str());
|
||||||
scoped_array<char> buf(new char[len]);
|
scoped_array<char> buf(new char[len]);
|
||||||
strnarrow(buf.get(), len, s.c_str());
|
strnarrow(buf.get(), len, s.c_str());
|
||||||
len = base_type::do_lookup_class(buf.get());
|
len = do_lookup_class(buf.get());
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user