diff --git a/doc/html/boost_regex/syntax/perl_syntax.html b/doc/html/boost_regex/syntax/perl_syntax.html index eed6b47b..358de1a8 100644 --- a/doc/html/boost_regex/syntax/perl_syntax.html +++ b/doc/html/boost_regex/syntax/perl_syntax.html @@ -405,7 +405,8 @@ characters

- For example =[abc]=, will match any of the characters 'a', 'b', or 'c'. + For example [abc], will match any of the characters 'a', + 'b', or 'c'.

@@ -413,10 +414,11 @@ ranges

- For example =[a-c]= will match any single character in the range 'a' to 'c'. - By default, for Perl regular expressions, a character x is within the range - y to z, if the code point of the character lies within the codepoints of - the endpoints of the range. Alternatively, if you set the collate + For example [a-c] will match any single character in the + range 'a' to 'c'. By default, for Perl regular expressions, a character x + is within the range y to z, if the code point of the character lies within + the codepoints of the endpoints of the range. Alternatively, if you set the + collate flag when constructing the regular expression, then ranges are locale sensitive.

@@ -426,7 +428,7 @@

If the bracket-expression begins with the ^ character, then it matches the - complement of the characters it contains, for example =a-c= + complement of the characters it contains, for example [^a-c] matches any character that is not in the range a-c.

diff --git a/doc/html/index.html b/doc/html/index.html index 247c0a84..cabf3cc6 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -198,7 +198,7 @@

- +

Last revised: November 29, 2012 at 10:43:51 GMT

Last revised: January 31, 2013 at 17:33:20 GMT


diff --git a/doc/syntax_perl.qbk b/doc/syntax_perl.qbk index fc412ebb..5c794d75 100644 --- a/doc/syntax_perl.qbk +++ b/doc/syntax_perl.qbk @@ -218,11 +218,11 @@ A bracket expression may contain any combination of the following: [h5 Single characters] -For example =[abc]=, will match any of the characters 'a', 'b', or 'c'. +For example [^\[abc\]], will match any of the characters 'a', 'b', or 'c'. [h5 Character ranges] -For example =[a-c]= will match any single character in the range 'a' to 'c'. +For example [^\[a-c\]] will match any single character in the range 'a' to 'c'. By default, for Perl regular expressions, a character x is within the range y to z, if the code point of the character lies within the codepoints of the endpoints of the range. Alternatively, if you set the @@ -232,7 +232,7 @@ when constructing the regular expression, then ranges are locale sensitive. [h5 Negation] If the bracket-expression begins with the ^ character, then it matches the -complement of the characters it contains, for example =[^a-c]= matches +complement of the characters it contains, for example [^\[^a-c\]] matches any character that is not in the range =a-c=. [h5 Character classes] diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 72dc4eeb..5dfb81e7 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -1107,7 +1107,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic) } // get the value if any: v = this->m_traits.toi(m_position, m_end, 10); - max = (v >= 0) ? v : (std::numeric_limits::max)(); + max = (v >= 0) ? (std::size_t)v : (std::numeric_limits::max)(); } else { diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index b8c4e963..9a80e503 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -65,6 +65,8 @@ void perl_matcher::construct_init(const basic_r m_match_flags |= match_perl; else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) m_match_flags |= match_perl; + else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal)) + m_match_flags |= match_perl; else m_match_flags |= match_posix; } @@ -326,6 +328,10 @@ bool perl_matcher::match_prefix() m_has_found_match = true; m_presult->set_second(last, 0, false); position = last; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + } } #ifdef BOOST_REGEX_MATCH_EXTRA if(m_has_found_match && (match_extra & m_match_flags)) diff --git a/include/boost/regex/v4/regex_traits_defaults.hpp b/include/boost/regex/v4/regex_traits_defaults.hpp index ca13f195..a087d784 100644 --- a/include/boost/regex/v4/regex_traits_defaults.hpp +++ b/include/boost/regex/v4/regex_traits_defaults.hpp @@ -3,12 +3,12 @@ * Copyright (c) 2004 * John Maddock * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ - + /* * LOCATION: see http://www.boost.org for most recent version. * FILE regex_traits_defaults.hpp @@ -85,7 +85,7 @@ inline bool is_combining(unsigned char) return false; } #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives -#ifdef _MSC_VER +#ifdef _MSC_VER template<> inline bool is_combining(wchar_t c) { @@ -115,11 +115,11 @@ template inline bool is_separator(charT c) { return BOOST_REGEX_MAKE_BOOL( - (c == static_cast('\n')) - || (c == static_cast('\r')) - || (c == static_cast('\f')) - || (static_cast(c) == 0x2028u) - || (static_cast(c) == 0x2029u) + (c == static_cast('\n')) + || (c == static_cast('\r')) + || (c == static_cast('\f')) + || (static_cast(c) == 0x2028u) + || (static_cast(c) == 0x2029u) || (static_cast(c) == 0x85u)); } template <> @@ -177,7 +177,7 @@ int get_default_class_id(const charT* p1, const charT* p2) 'x', 'd', 'i', 'g', 'i', 't', }; - static const character_pointer_range ranges[21] = + static const character_pointer_range ranges[21] = { {data+0, data+5,}, // alnum {data+5, data+10,}, // alpha @@ -203,7 +203,7 @@ int get_default_class_id(const charT* p1, const charT* p2) }; static const character_pointer_range* ranges_begin = ranges; static const character_pointer_range* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); - + character_pointer_range t = { p1, p2, }; const character_pointer_range* p = std::lower_bound(ranges_begin, ranges_end, t); if((p != ranges_end) && (t == *p)) @@ -324,15 +324,15 @@ inline const charT* get_escape_R_string() # pragma warning(push) # pragma warning(disable:4309 4245) #endif - static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', - '|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}', + static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', static_cast('\x85'), '\\', 'x', '{', '2', '0', '2', '8', '}', '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' }; - static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', - '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; + static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', static_cast('\x85'), ']', ')', '\0' }; charT c = static_cast(0x2029u); bool b = (static_cast(c) == 0x2029u); - + return (b ? e1 : e2); #ifdef BOOST_MSVC # pragma warning(pop) @@ -346,7 +346,7 @@ inline const char* get_escape_R_string() # pragma warning(push) # pragma warning(disable:4309) #endif - static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', + static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; return e2; #ifdef BOOST_MSVC diff --git a/test/regress/basic_tests.cpp b/test/regress/basic_tests.cpp index af191823..784c2b99 100644 --- a/test/regress/basic_tests.cpp +++ b/test/regress/basic_tests.cpp @@ -134,6 +134,17 @@ void test_partial_match() TEST_REGEX_SEARCH("a*?", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2)); TEST_REGEX_SEARCH("\\w*?", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2)); TEST_REGEX_SEARCH("(\\w)*?", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2)); + + TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xyzaaab", match_default|match_partial, make_array(0, 7, -2, -2)); + TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xyz", match_default|match_partial, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xy", match_default|match_partial, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "x", match_default|match_partial, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "", match_default|match_partial, make_array(-2, -2)); + TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "aaaa", match_default|match_partial, make_array(-2, -2)); + TEST_REGEX_SEARCH(".abc", extended, "aaab", match_default|match_partial, make_array(1, 4, -2, -2)); + TEST_REGEX_SEARCH("a[_]", extended, "xxa", match_default|match_partial, make_array(2, 3, -2, -2)); + TEST_REGEX_SEARCH(".{4,}", extended, "xxa", match_default|match_partial, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH(".{4,}", extended, "xxa", match_default|match_partial|match_not_dot_null, make_array(0, 3, -2, -2)); } void test_nosubs()