mirror of
https://github.com/boostorg/regex.git
synced 2025-07-16 22:02:08 +02:00
Enabled negated character classes inside character sets.
[SVN r31053]
This commit is contained in:
@ -65,7 +65,9 @@
|
|||||||
that regex iteration allows lookbehind to look back before the current search
|
that regex iteration allows lookbehind to look back before the current search
|
||||||
range (into the last match).
|
range (into the last match).
|
||||||
<LI>
|
<LI>
|
||||||
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.</LI></UL>
|
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
|
||||||
|
<LI>
|
||||||
|
Enabled negated character classes inside character sets.</LI></UL>
|
||||||
<P>Boost 1.33.0.</P>
|
<P>Boost 1.33.0.</P>
|
||||||
<UL>
|
<UL>
|
||||||
<LI>
|
<LI>
|
||||||
|
@ -65,7 +65,9 @@
|
|||||||
that regex iteration allows lookbehind to look back before the current search
|
that regex iteration allows lookbehind to look back before the current search
|
||||||
range (into the last match).
|
range (into the last match).
|
||||||
<LI>
|
<LI>
|
||||||
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.</LI></UL>
|
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
|
||||||
|
<LI>
|
||||||
|
Enabled negated character classes inside character sets.</LI></UL>
|
||||||
<P>Boost 1.33.0.</P>
|
<P>Boost 1.33.0.</P>
|
||||||
<UL>
|
<UL>
|
||||||
<LI>
|
<LI>
|
||||||
|
@ -61,6 +61,7 @@ public:
|
|||||||
m_negate = false;
|
m_negate = false;
|
||||||
m_has_digraphs = false;
|
m_has_digraphs = false;
|
||||||
m_classes = 0;
|
m_classes = 0;
|
||||||
|
m_negated_classes = 0;
|
||||||
m_empty = true;
|
m_empty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,6 +93,11 @@ public:
|
|||||||
m_classes |= m;
|
m_classes |= m;
|
||||||
m_empty = false;
|
m_empty = false;
|
||||||
}
|
}
|
||||||
|
void add_negated_class(mask_type m)
|
||||||
|
{
|
||||||
|
m_negated_classes |= m;
|
||||||
|
m_empty = false;
|
||||||
|
}
|
||||||
void add_equivalent(const digraph_type& s)
|
void add_equivalent(const digraph_type& s)
|
||||||
{
|
{
|
||||||
m_equivalents.insert(m_equivalents.end(), s);
|
m_equivalents.insert(m_equivalents.end(), s);
|
||||||
@ -148,18 +154,23 @@ public:
|
|||||||
{
|
{
|
||||||
return m_classes;
|
return m_classes;
|
||||||
}
|
}
|
||||||
|
mask_type negated_classes()const
|
||||||
|
{
|
||||||
|
return m_negated_classes;
|
||||||
|
}
|
||||||
bool empty()const
|
bool empty()const
|
||||||
{
|
{
|
||||||
return m_empty;
|
return m_empty;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
std::vector<digraph_type> m_singles; // a list of single characters to match
|
std::vector<digraph_type> m_singles; // a list of single characters to match
|
||||||
std::vector<digraph_type> m_ranges; // a list of end points of our ranges
|
std::vector<digraph_type> m_ranges; // a list of end points of our ranges
|
||||||
bool m_negate; // true if the set is to be negated
|
bool m_negate; // true if the set is to be negated
|
||||||
bool m_has_digraphs; // true if we have digraphs present
|
bool m_has_digraphs; // true if we have digraphs present
|
||||||
mask_type m_classes; // character classes to match
|
mask_type m_classes; // character classes to match
|
||||||
bool m_empty; // whether we've added anything yet
|
mask_type m_negated_classes; // negated character classes to match
|
||||||
std::vector<digraph_type> m_equivalents; // a list of equivalence classes
|
bool m_empty; // whether we've added anything yet
|
||||||
|
std::vector<digraph_type> m_equivalents; // a list of equivalence classes
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class charT, class traits>
|
template <class charT, class traits>
|
||||||
@ -350,11 +361,14 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
|||||||
result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
|
result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
|
||||||
result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
|
result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
|
||||||
result->cclasses = char_set.classes();
|
result->cclasses = char_set.classes();
|
||||||
|
result->cnclasses = char_set.negated_classes();
|
||||||
if(flags() & regbase::icase)
|
if(flags() & regbase::icase)
|
||||||
{
|
{
|
||||||
// adjust classes as needed:
|
// adjust classes as needed:
|
||||||
if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
|
if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
|
||||||
result->cclasses |= m_alpha_mask;
|
result->cclasses |= m_alpha_mask;
|
||||||
|
if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
|
||||||
|
result->cnclasses |= m_alpha_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
result->isnot = char_set.is_negated();
|
result->isnot = char_set.is_negated();
|
||||||
@ -596,6 +610,24 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
|
// and now the negated classes:
|
||||||
|
//
|
||||||
|
m = char_set.negated_classes();
|
||||||
|
if(flags() & regbase::icase)
|
||||||
|
{
|
||||||
|
// adjust m as needed:
|
||||||
|
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
|
||||||
|
m |= m_alpha_mask;
|
||||||
|
}
|
||||||
|
if(m != 0)
|
||||||
|
{
|
||||||
|
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
||||||
|
{
|
||||||
|
if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
|
||||||
|
result->_map[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//
|
||||||
// now process the equivalence classes:
|
// now process the equivalence classes:
|
||||||
//
|
//
|
||||||
first = char_set.equivalents_begin();
|
first = char_set.equivalents_begin();
|
||||||
|
@ -1013,12 +1013,13 @@ bool basic_regex_parser<charT, traits>::parse_set()
|
|||||||
else if(this->m_traits.escape_syntax_type(*m_position)
|
else if(this->m_traits.escape_syntax_type(*m_position)
|
||||||
== regex_constants::escape_type_not_class)
|
== regex_constants::escape_type_not_class)
|
||||||
{
|
{
|
||||||
// negated character classes aren't supported:
|
// negated character class:
|
||||||
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
||||||
if(m != 0)
|
if(m != 0)
|
||||||
{
|
{
|
||||||
fail(regex_constants::error_escape, m_position - m_base);
|
char_set.add_negated_class(m);
|
||||||
return false;
|
++m_position;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// not a character class, just a regular escape:
|
// not a character class, just a regular escape:
|
||||||
@ -1094,6 +1095,15 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
|
|||||||
fail(regex_constants::error_brack, m_position - m_base);
|
fail(regex_constants::error_brack, m_position - m_base);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
//
|
||||||
|
// check for negated class:
|
||||||
|
//
|
||||||
|
bool negated = false;
|
||||||
|
if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
|
||||||
|
{
|
||||||
|
++name_first;
|
||||||
|
negated = true;
|
||||||
|
}
|
||||||
typedef typename traits::char_class_type mask_type;
|
typedef typename traits::char_class_type mask_type;
|
||||||
mask_type m = this->m_traits.lookup_classname(name_first, name_last);
|
mask_type m = this->m_traits.lookup_classname(name_first, name_last);
|
||||||
if(m == 0)
|
if(m == 0)
|
||||||
@ -1125,7 +1135,10 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
|
|||||||
fail(regex_constants::error_ctype, name_first - m_base);
|
fail(regex_constants::error_ctype, name_first - m_base);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
char_set.add_class(m);
|
if(negated == false)
|
||||||
|
char_set.add_class(m);
|
||||||
|
else
|
||||||
|
char_set.add_negated_class(m);
|
||||||
++m_position;
|
++m_position;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -226,6 +226,8 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
|
|||||||
}
|
}
|
||||||
if(traits_inst.isctype(col, set_->cclasses) == true)
|
if(traits_inst.isctype(col, set_->cclasses) == true)
|
||||||
return set_->isnot ? next : ++next;
|
return set_->isnot ? next : ++next;
|
||||||
|
if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
|
||||||
|
return set_->isnot ? next : ++next;
|
||||||
return set_->isnot ? ++next : next;
|
return set_->isnot ? ++next : next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,6 +197,7 @@ struct re_set_long : public re_syntax_base
|
|||||||
{
|
{
|
||||||
unsigned int csingles, cranges, cequivalents;
|
unsigned int csingles, cranges, cequivalents;
|
||||||
mask_type cclasses;
|
mask_type cclasses;
|
||||||
|
mask_type cnclasses;
|
||||||
bool isnot;
|
bool isnot;
|
||||||
bool singleton;
|
bool singleton;
|
||||||
};
|
};
|
||||||
|
@ -241,20 +241,29 @@ void test_sets2()
|
|||||||
TEST_REGEX_SEARCH("\\l+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("\\l+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("[\\l]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("[\\l]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_INVALID_REGEX("[\\l-a]", perl);
|
TEST_INVALID_REGEX("[\\l-a]", perl);
|
||||||
TEST_INVALID_REGEX("[\\L]", perl);
|
TEST_REGEX_SEARCH("[\\L]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("[[:^lower:]]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("\\L+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("\\L+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("\\u+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("\\u+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("[\\u]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("[\\u]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_INVALID_REGEX("[\\U]", perl);
|
TEST_REGEX_SEARCH("[\\U]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("[[:^upper:]]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("\\U+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("\\U+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("\\d+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("\\d+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("[\\d]+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("[\\d]+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_INVALID_REGEX("[\\D]", perl);
|
TEST_REGEX_SEARCH("[\\D]+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("[[:^digit:]]+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("\\D+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("\\D+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("[\\s]+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("[\\s]+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_INVALID_REGEX("[\\S]", perl);
|
TEST_REGEX_SEARCH("[\\S]+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("[[:^space:]]+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
|
||||||
TEST_REGEX_SEARCH("\\S+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
|
TEST_REGEX_SEARCH("\\S+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("[\\w]+", perl, "AB_ AB", match_default, make_array(0, 3, -2, 6, 8, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("[\\W]+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("[[:^word:]]+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("\\W+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
|
||||||
test_sets2c();
|
test_sets2c();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user