Enabled negated character classes inside character sets.

[SVN r31053]
This commit is contained in:
John Maddock
2005-09-20 12:01:25 +00:00
parent ae36194500
commit b5bc6e2be9
7 changed files with 78 additions and 17 deletions

View File

@ -65,7 +65,9 @@
that regex iteration allows lookbehind to look back before the current search
range (into the last match).
<LI>
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.</LI></UL>
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
<LI>
Enabled negated character classes inside character sets.</LI></UL>
<P>Boost 1.33.0.</P>
<UL>
<LI>

View File

@ -65,7 +65,9 @@
that regex iteration allows lookbehind to look back before the current search
range (into the last match).
<LI>
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.</LI></UL>
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
<LI>
Enabled negated character classes inside character sets.</LI></UL>
<P>Boost 1.33.0.</P>
<UL>
<LI>

View File

@ -61,6 +61,7 @@ public:
m_negate = false;
m_has_digraphs = false;
m_classes = 0;
m_negated_classes = 0;
m_empty = true;
}
@ -92,6 +93,11 @@ public:
m_classes |= m;
m_empty = false;
}
void add_negated_class(mask_type m)
{
m_negated_classes |= m;
m_empty = false;
}
void add_equivalent(const digraph_type& s)
{
m_equivalents.insert(m_equivalents.end(), s);
@ -148,6 +154,10 @@ public:
{
return m_classes;
}
mask_type negated_classes()const
{
return m_negated_classes;
}
bool empty()const
{
return m_empty;
@ -158,6 +168,7 @@ private:
bool m_negate; // true if the set is to be negated
bool m_has_digraphs; // true if we have digraphs present
mask_type m_classes; // character classes to match
mask_type m_negated_classes; // negated character classes to match
bool m_empty; // whether we've added anything yet
std::vector<digraph_type> m_equivalents; // a list of equivalence classes
};
@ -350,11 +361,14 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
result->cclasses = char_set.classes();
result->cnclasses = char_set.negated_classes();
if(flags() & regbase::icase)
{
// adjust classes as needed:
if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
result->cclasses |= m_alpha_mask;
if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
result->cnclasses |= m_alpha_mask;
}
result->isnot = char_set.is_negated();
@ -596,6 +610,24 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
}
}
//
// and now the negated classes:
//
m = char_set.negated_classes();
if(flags() & regbase::icase)
{
// adjust m as needed:
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
m |= m_alpha_mask;
}
if(m != 0)
{
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
result->_map[i] = true;
}
}
//
// now process the equivalence classes:
//
first = char_set.equivalents_begin();

View File

@ -1013,12 +1013,13 @@ bool basic_regex_parser<charT, traits>::parse_set()
else if(this->m_traits.escape_syntax_type(*m_position)
== regex_constants::escape_type_not_class)
{
// negated character classes aren't supported:
// negated character class:
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
if(m != 0)
{
fail(regex_constants::error_escape, m_position - m_base);
return false;
char_set.add_negated_class(m);
++m_position;
break;
}
}
// not a character class, just a regular escape:
@ -1094,6 +1095,15 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
//
// check for negated class:
//
bool negated = false;
if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
{
++name_first;
negated = true;
}
typedef typename traits::char_class_type mask_type;
mask_type m = this->m_traits.lookup_classname(name_first, name_last);
if(m == 0)
@ -1125,7 +1135,10 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
fail(regex_constants::error_ctype, name_first - m_base);
return false;
}
if(negated == false)
char_set.add_class(m);
else
char_set.add_negated_class(m);
++m_position;
break;
}

View File

@ -226,6 +226,8 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
}
if(traits_inst.isctype(col, set_->cclasses) == true)
return set_->isnot ? next : ++next;
if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
return set_->isnot ? next : ++next;
return set_->isnot ? ++next : next;
}

View File

@ -197,6 +197,7 @@ struct re_set_long : public re_syntax_base
{
unsigned int csingles, cranges, cequivalents;
mask_type cclasses;
mask_type cnclasses;
bool isnot;
bool singleton;
};

View File

@ -241,20 +241,29 @@ void test_sets2()
TEST_REGEX_SEARCH("\\l+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\l]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\l-a]", perl);
TEST_INVALID_REGEX("[\\L]", perl);
TEST_REGEX_SEARCH("[\\L]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[[:^lower:]]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\L+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\u+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\u]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\U]", perl);
TEST_REGEX_SEARCH("[\\U]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[[:^upper:]]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\U+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\d+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\d]+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\D]", perl);
TEST_REGEX_SEARCH("[\\D]+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[[:^digit:]]+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\D+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\s]+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\S]", perl);
TEST_REGEX_SEARCH("[\\S]+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[[:^space:]]+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\S+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\w]+", perl, "AB_ AB", match_default, make_array(0, 3, -2, 6, 8, -2, -2));
TEST_REGEX_SEARCH("[\\W]+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("[[:^word:]]+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("\\W+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
test_sets2c();
}