Enabled negated character classes inside character sets.

[SVN r31053]
This commit is contained in:
John Maddock
2005-09-20 12:01:25 +00:00
parent ae36194500
commit b5bc6e2be9
7 changed files with 78 additions and 17 deletions

View File

@ -65,7 +65,9 @@
that regex iteration allows lookbehind to look back before the current search that regex iteration allows lookbehind to look back before the current search
range (into the last match). range (into the last match).
<LI> <LI>
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.</LI></UL> Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
<LI>
Enabled negated character classes inside character sets.</LI></UL>
<P>Boost 1.33.0.</P> <P>Boost 1.33.0.</P>
<UL> <UL>
<LI> <LI>

View File

@ -65,7 +65,9 @@
that regex iteration allows lookbehind to look back before the current search that regex iteration allows lookbehind to look back before the current search
range (into the last match). range (into the last match).
<LI> <LI>
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.</LI></UL> Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
<LI>
Enabled negated character classes inside character sets.</LI></UL>
<P>Boost 1.33.0.</P> <P>Boost 1.33.0.</P>
<UL> <UL>
<LI> <LI>

View File

@ -61,6 +61,7 @@ public:
m_negate = false; m_negate = false;
m_has_digraphs = false; m_has_digraphs = false;
m_classes = 0; m_classes = 0;
m_negated_classes = 0;
m_empty = true; m_empty = true;
} }
@ -92,6 +93,11 @@ public:
m_classes |= m; m_classes |= m;
m_empty = false; m_empty = false;
} }
void add_negated_class(mask_type m)
{
m_negated_classes |= m;
m_empty = false;
}
void add_equivalent(const digraph_type& s) void add_equivalent(const digraph_type& s)
{ {
m_equivalents.insert(m_equivalents.end(), s); m_equivalents.insert(m_equivalents.end(), s);
@ -148,18 +154,23 @@ public:
{ {
return m_classes; return m_classes;
} }
mask_type negated_classes()const
{
return m_negated_classes;
}
bool empty()const bool empty()const
{ {
return m_empty; return m_empty;
} }
private: private:
std::vector<digraph_type> m_singles; // a list of single characters to match std::vector<digraph_type> m_singles; // a list of single characters to match
std::vector<digraph_type> m_ranges; // a list of end points of our ranges std::vector<digraph_type> m_ranges; // a list of end points of our ranges
bool m_negate; // true if the set is to be negated bool m_negate; // true if the set is to be negated
bool m_has_digraphs; // true if we have digraphs present bool m_has_digraphs; // true if we have digraphs present
mask_type m_classes; // character classes to match mask_type m_classes; // character classes to match
bool m_empty; // whether we've added anything yet mask_type m_negated_classes; // negated character classes to match
std::vector<digraph_type> m_equivalents; // a list of equivalence classes bool m_empty; // whether we've added anything yet
std::vector<digraph_type> m_equivalents; // a list of equivalence classes
}; };
template <class charT, class traits> template <class charT, class traits>
@ -350,11 +361,14 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2; result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end())); result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
result->cclasses = char_set.classes(); result->cclasses = char_set.classes();
result->cnclasses = char_set.negated_classes();
if(flags() & regbase::icase) if(flags() & regbase::icase)
{ {
// adjust classes as needed: // adjust classes as needed:
if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask)) if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
result->cclasses |= m_alpha_mask; result->cclasses |= m_alpha_mask;
if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
result->cnclasses |= m_alpha_mask;
} }
result->isnot = char_set.is_negated(); result->isnot = char_set.is_negated();
@ -596,6 +610,24 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
} }
} }
// //
// and now the negated classes:
//
m = char_set.negated_classes();
if(flags() & regbase::icase)
{
// adjust m as needed:
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
m |= m_alpha_mask;
}
if(m != 0)
{
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
result->_map[i] = true;
}
}
//
// now process the equivalence classes: // now process the equivalence classes:
// //
first = char_set.equivalents_begin(); first = char_set.equivalents_begin();

View File

@ -1013,12 +1013,13 @@ bool basic_regex_parser<charT, traits>::parse_set()
else if(this->m_traits.escape_syntax_type(*m_position) else if(this->m_traits.escape_syntax_type(*m_position)
== regex_constants::escape_type_not_class) == regex_constants::escape_type_not_class)
{ {
// negated character classes aren't supported: // negated character class:
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
if(m != 0) if(m != 0)
{ {
fail(regex_constants::error_escape, m_position - m_base); char_set.add_negated_class(m);
return false; ++m_position;
break;
} }
} }
// not a character class, just a regular escape: // not a character class, just a regular escape:
@ -1094,6 +1095,15 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
fail(regex_constants::error_brack, m_position - m_base); fail(regex_constants::error_brack, m_position - m_base);
return false; return false;
} }
//
// check for negated class:
//
bool negated = false;
if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
{
++name_first;
negated = true;
}
typedef typename traits::char_class_type mask_type; typedef typename traits::char_class_type mask_type;
mask_type m = this->m_traits.lookup_classname(name_first, name_last); mask_type m = this->m_traits.lookup_classname(name_first, name_last);
if(m == 0) if(m == 0)
@ -1125,7 +1135,10 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
fail(regex_constants::error_ctype, name_first - m_base); fail(regex_constants::error_ctype, name_first - m_base);
return false; return false;
} }
char_set.add_class(m); if(negated == false)
char_set.add_class(m);
else
char_set.add_negated_class(m);
++m_position; ++m_position;
break; break;
} }

View File

@ -226,6 +226,8 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
} }
if(traits_inst.isctype(col, set_->cclasses) == true) if(traits_inst.isctype(col, set_->cclasses) == true)
return set_->isnot ? next : ++next; return set_->isnot ? next : ++next;
if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
return set_->isnot ? next : ++next;
return set_->isnot ? ++next : next; return set_->isnot ? ++next : next;
} }

View File

@ -197,6 +197,7 @@ struct re_set_long : public re_syntax_base
{ {
unsigned int csingles, cranges, cequivalents; unsigned int csingles, cranges, cequivalents;
mask_type cclasses; mask_type cclasses;
mask_type cnclasses;
bool isnot; bool isnot;
bool singleton; bool singleton;
}; };

View File

@ -241,20 +241,29 @@ void test_sets2()
TEST_REGEX_SEARCH("\\l+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("\\l+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\l]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("[\\l]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\l-a]", perl); TEST_INVALID_REGEX("[\\l-a]", perl);
TEST_INVALID_REGEX("[\\L]", perl); TEST_REGEX_SEARCH("[\\L]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[[:^lower:]]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\L+", perl, "abABCab", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("\\L+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\u+", perl, "abABCab", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("\\u+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\u]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("[\\u]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\U]", perl); TEST_REGEX_SEARCH("[\\U]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[[:^upper:]]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\U+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("\\U+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\d+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("\\d+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\d]+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("[\\d]+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\D]", perl); TEST_REGEX_SEARCH("[\\D]+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[[:^digit:]]+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\D+", perl, "01abc01", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("\\D+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\s]+", perl, "AB AB", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("[\\s]+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\S]", perl); TEST_REGEX_SEARCH("[\\S]+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[[:^space:]]+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\S+", perl, " abc ", match_default, make_array(2, 5, -2, -2)); TEST_REGEX_SEARCH("\\S+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\w]+", perl, "AB_ AB", match_default, make_array(0, 3, -2, 6, 8, -2, -2));
TEST_REGEX_SEARCH("[\\W]+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("[[:^word:]]+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("\\W+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
test_sets2c(); test_sets2c();
} }