@@ -147,7 +149,7 @@
Revised
-
+
28 June 2004
© Copyright John Maddock 1998-
diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp
index 20602ebf..373c79f1 100644
--- a/include/boost/regex/v4/basic_regex_creator.hpp
+++ b/include/boost/regex/v4/basic_regex_creator.hpp
@@ -61,6 +61,7 @@ public:
m_negate = false;
m_has_digraphs = false;
m_classes = 0;
+ m_negated_classes = 0;
m_empty = true;
}
@@ -92,6 +93,11 @@ public:
m_classes |= m;
m_empty = false;
}
+ void add_negated_class(mask_type m)
+ {
+ m_negated_classes |= m;
+ m_empty = false;
+ }
void add_equivalent(const digraph_type& s)
{
m_equivalents.insert(m_equivalents.end(), s);
@@ -148,18 +154,23 @@ public:
{
return m_classes;
}
+ mask_type negated_classes()const
+ {
+ return m_negated_classes;
+ }
bool empty()const
{
return m_empty;
}
private:
- std::vector m_singles; // a list of single characters to match
- std::vector m_ranges; // a list of end points of our ranges
- bool m_negate; // true if the set is to be negated
- bool m_has_digraphs; // true if we have digraphs present
- mask_type m_classes; // character classes to match
- bool m_empty; // whether we've added anything yet
- std::vector m_equivalents; // a list of equivalence classes
+ std::vector m_singles; // a list of single characters to match
+ std::vector m_ranges; // a list of end points of our ranges
+ bool m_negate; // true if the set is to be negated
+ bool m_has_digraphs; // true if we have digraphs present
+ mask_type m_classes; // character classes to match
+ mask_type m_negated_classes; // negated character classes to match
+ bool m_empty; // whether we've added anything yet
+ std::vector m_equivalents; // a list of equivalence classes
};
template
@@ -350,11 +361,14 @@ re_syntax_base* basic_regex_creator::append_set(
result->cranges = static_cast(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
result->cequivalents = static_cast(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
result->cclasses = char_set.classes();
+ result->cnclasses = char_set.negated_classes();
if(flags() & regbase::icase)
{
// adjust classes as needed:
if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
result->cclasses |= m_alpha_mask;
+ if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
+ result->cnclasses |= m_alpha_mask;
}
result->isnot = char_set.is_negated();
@@ -596,6 +610,24 @@ re_syntax_base* basic_regex_creator::append_set(
}
}
//
+ // and now the negated classes:
+ //
+ m = char_set.negated_classes();
+ if(flags() & regbase::icase)
+ {
+ // adjust m as needed:
+ if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
+ m |= m_alpha_mask;
+ }
+ if(m != 0)
+ {
+ for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
+ {
+ if(0 == this->m_traits.isctype(static_cast(i), m))
+ result->_map[i] = true;
+ }
+ }
+ //
// now process the equivalence classes:
//
first = char_set.equivalents_begin();
diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp
index f17a440e..61be3ccf 100644
--- a/include/boost/regex/v4/basic_regex_parser.hpp
+++ b/include/boost/regex/v4/basic_regex_parser.hpp
@@ -1013,12 +1013,13 @@ bool basic_regex_parser::parse_set()
else if(this->m_traits.escape_syntax_type(*m_position)
== regex_constants::escape_type_not_class)
{
- // negated character classes aren't supported:
+ // negated character class:
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
if(m != 0)
{
- fail(regex_constants::error_escape, m_position - m_base);
- return false;
+ char_set.add_negated_class(m);
+ ++m_position;
+ break;
}
}
// not a character class, just a regular escape:
@@ -1094,6 +1095,15 @@ bool basic_regex_parser::parse_inner_set(basic_char_setm_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
+ {
+ ++name_first;
+ negated = true;
+ }
typedef typename traits::char_class_type mask_type;
mask_type m = this->m_traits.lookup_classname(name_first, name_last);
if(m == 0)
@@ -1125,7 +1135,10 @@ bool basic_regex_parser::parse_inner_set(basic_char_setcclasses) == true)
return set_->isnot ? next : ++next;
+ if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
+ return set_->isnot ? next : ++next;
return set_->isnot ? ++next : next;
}
diff --git a/include/boost/regex/v4/states.hpp b/include/boost/regex/v4/states.hpp
index 99bfa23e..551ed669 100644
--- a/include/boost/regex/v4/states.hpp
+++ b/include/boost/regex/v4/states.hpp
@@ -197,6 +197,7 @@ struct re_set_long : public re_syntax_base
{
unsigned int csingles, cranges, cequivalents;
mask_type cclasses;
+ mask_type cnclasses;
bool isnot;
bool singleton;
};
diff --git a/test/regress/test_sets.cpp b/test/regress/test_sets.cpp
index 5d692092..7f23c105 100644
--- a/test/regress/test_sets.cpp
+++ b/test/regress/test_sets.cpp
@@ -241,20 +241,29 @@ void test_sets2()
TEST_REGEX_SEARCH("\\l+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\l]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_INVALID_REGEX("[\\l-a]", perl);
- TEST_INVALID_REGEX("[\\L]", perl);
+ TEST_REGEX_SEARCH("[\\L]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
+ TEST_REGEX_SEARCH("[[:^lower:]]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\L+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\u+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\u]+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
- TEST_INVALID_REGEX("[\\U]", perl);
+ TEST_REGEX_SEARCH("[\\U]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
+ TEST_REGEX_SEARCH("[[:^upper:]]+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\U+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\d+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\d]+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
- TEST_INVALID_REGEX("[\\D]", perl);
+ TEST_REGEX_SEARCH("[\\D]+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
+ TEST_REGEX_SEARCH("[[:^digit:]]+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\D+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("[\\s]+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
- TEST_INVALID_REGEX("[\\S]", perl);
+ TEST_REGEX_SEARCH("[\\S]+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
+ TEST_REGEX_SEARCH("[[:^space:]]+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("\\S+", perl, " abc ", match_default, make_array(2, 5, -2, -2));
+ TEST_REGEX_SEARCH("\\s+", perl, "AB AB", match_default, make_array(2, 5, -2, -2));
+ TEST_REGEX_SEARCH("[\\w]+", perl, "AB_ AB", match_default, make_array(0, 3, -2, 6, 8, -2, -2));
+ TEST_REGEX_SEARCH("[\\W]+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
+ TEST_REGEX_SEARCH("[[:^word:]]+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
+ TEST_REGEX_SEARCH("\\W+", perl, "AB_ AB", match_default, make_array(3, 6, -2, -2));
test_sets2c();
}