diff --git a/doc/history.qbk b/doc/history.qbk index 9dbbcf4d..05a9710e 100644 --- a/doc/history.qbk +++ b/doc/history.qbk @@ -10,6 +10,12 @@ [h4 Boost 1.38] +* [*Breaking change]: empty expressions, and empty alternatives are now +allowed when using the Perl regular expression syntax. This change has +been added for Perl compatibility, when the new [syntax_option_type] +['no_empty_expressions] is set then the old behaviour is preserved and +empty expressions are prohibited. This is issue +[@https://svn.boost.org/trac/boost/ticket/1081 #1081]. * Added support for Perl style ${n} expressions in format strings (issue [@https://svn.boost.org/trac/boost/ticket/2556 #2556]). * Added support for accessing the location of sub-expressions within the diff --git a/doc/html/boost_regex/background_information/examples.html b/doc/html/boost_regex/background_information/examples.html index 6b7f8b55..9f9dcc6c 100644 --- a/doc/html/boost_regex/background_information/examples.html +++ b/doc/html/boost_regex/background_information/examples.html @@ -28,7 +28,7 @@ Example Programs
- + Test Programs
@@ -107,7 +107,7 @@ Files: captures_test.cpp.

- + Example programs
@@ -133,7 +133,7 @@ Files: regex_timer.cpp.

- + Code snippets
diff --git a/doc/html/boost_regex/background_information/history.html b/doc/html/boost_regex/background_information/history.html index cffee7a8..b2069ec7 100644 --- a/doc/html/boost_regex/background_information/history.html +++ b/doc/html/boost_regex/background_information/history.html @@ -26,11 +26,18 @@ History
- + Boost 1.38
- + Boost 1.34
@@ -68,7 +76,7 @@
- + Boost 1.33.1
@@ -138,7 +146,7 @@
- + Boost 1.33.0
@@ -193,7 +201,7 @@
- + Boost 1.32.1
@@ -201,7 +209,7 @@ Fixed bug in partial matches of bounded repeats of '.'.
- + Boost 1.31.0
diff --git a/doc/html/boost_regex/background_information/locale.html b/doc/html/boost_regex/background_information/locale.html index 267d99d7..0e8b0694 100644 --- a/doc/html/boost_regex/background_information/locale.html +++ b/doc/html/boost_regex/background_information/locale.html @@ -58,7 +58,7 @@ There are three separate localization mechanisms supported by Boost.Regex:

- + Win32 localization model.
@@ -90,7 +90,7 @@ are treated as "unknown" graphic characters.

- + C localization model.
@@ -114,7 +114,7 @@ libraries including version 1 of this library.

- + C++ localization model.
@@ -151,7 +151,7 @@ in your code. The best way to ensure this is to add the #define to <boost/regex/user.hpp>.

- + Providing a message catalogue
diff --git a/doc/html/boost_regex/background_information/standards.html b/doc/html/boost_regex/background_information/standards.html index 60650ef6..d2732b2f 100644 --- a/doc/html/boost_regex/background_information/standards.html +++ b/doc/html/boost_regex/background_information/standards.html @@ -28,7 +28,7 @@ Conformance
- + C++

@@ -36,7 +36,7 @@ Report on C++ Library Extensions.

- + ECMAScript / JavaScript
@@ -49,7 +49,7 @@ rather than a Unicode escape sequence; use \x{DDDD} for Unicode escape sequences.

- + Perl

@@ -62,7 +62,7 @@ (??{code}) Not implementable in a compiled strongly typed language.

- + POSIX

@@ -82,7 +82,7 @@ a custom traits class.

- + Unicode

diff --git a/doc/html/boost_regex/ref/concepts/traits_concept.html b/doc/html/boost_regex/ref/concepts/traits_concept.html index af155a02..ef09ab1c 100644 --- a/doc/html/boost_regex/ref/concepts/traits_concept.html +++ b/doc/html/boost_regex/ref/concepts/traits_concept.html @@ -34,7 +34,7 @@ Boost-specific enhanced interface.

- + Minimal requirements.
@@ -381,7 +381,7 @@
- + Additional Optional Requirements
diff --git a/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html b/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html index 5ca0844e..62293232 100644 --- a/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html +++ b/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html @@ -34,7 +34,7 @@ previous version of Boost.Regex and will not be further updated:

- + Algorithm regex_format
diff --git a/doc/html/boost_regex/ref/error_type.html b/doc/html/boost_regex/ref/error_type.html index dd26f8ce..d91f0e9e 100644 --- a/doc/html/boost_regex/ref/error_type.html +++ b/doc/html/boost_regex/ref/error_type.html @@ -27,7 +27,7 @@ error_type
- + Synopsis

@@ -57,7 +57,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/match_flag_type.html b/doc/html/boost_regex/ref/match_flag_type.html index 0285afd8..50784ec1 100644 --- a/doc/html/boost_regex/ref/match_flag_type.html +++ b/doc/html/boost_regex/ref/match_flag_type.html @@ -69,7 +69,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html index db172d3d..e636e849 100644 --- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html +++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html @@ -43,7 +43,7 @@ on to the "real" algorithm.

- + u32regex_match

@@ -89,7 +89,7 @@ }

- + u32regex_search

@@ -128,7 +128,7 @@ }

- + u32regex_replace

diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html index aa461fc0..0fbd6483 100644 --- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html +++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html @@ -28,7 +28,7 @@ Unicode Aware Regex Iterators

- + u32regex_iterator

@@ -126,7 +126,7 @@ Provided of course that the input is encoded as UTF-8.

- + u32regex_token_iterator

diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html index 5e9272f1..1f73a084 100644 --- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html +++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html @@ -34,7 +34,7 @@ here they are anyway:

- + regex_match

@@ -82,7 +82,7 @@ }

- + regex_match (second overload)
@@ -110,7 +110,7 @@ }
- + regex_search

@@ -149,7 +149,7 @@ }

- + regex_search (second overload)
@@ -164,7 +164,7 @@ + s.GetLength(), e, f);

- + regex_replace

diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html index 2d38fce9..1cae51e5 100644 --- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html +++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html @@ -32,7 +32,7 @@ an MFC/ATL string to a regex_iterator or regex_token_iterator:

- + regex_iterator creation helper
@@ -68,7 +68,7 @@ }
- + regex_token_iterator creation helpers
diff --git a/doc/html/boost_regex/ref/posix.html b/doc/html/boost_regex/ref/posix.html index 7fd25512..b60ac8c4 100644 --- a/doc/html/boost_regex/ref/posix.html +++ b/doc/html/boost_regex/ref/posix.html @@ -165,7 +165,7 @@

- + regcomp

@@ -379,7 +379,7 @@

- + regerror

@@ -467,7 +467,7 @@

- + regexec

@@ -537,7 +537,7 @@

- + regfree

diff --git a/doc/html/boost_regex/ref/regex_traits.html b/doc/html/boost_regex/ref/regex_traits.html index 50c47d4d..0d515761 100644 --- a/doc/html/boost_regex/ref/regex_traits.html +++ b/doc/html/boost_regex/ref/regex_traits.html @@ -46,7 +46,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_perl.html b/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_perl.html index b6168a64..53eea33c 100644 --- a/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_perl.html +++ b/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_perl.html @@ -372,6 +372,23 @@ + +

+ no_empty_expressions +

+ + +

+ No +

+ + +

+ When set then empty expressions/alternatives are prohibited. +

+ + +

save_subexpression_location diff --git a/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_synopsis.html b/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_synopsis.html index 89ce344b..8ca8f042 100644 --- a/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_synopsis.html +++ b/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_synopsis.html @@ -69,6 +69,7 @@ static const syntax_option_type no_mod_s; static const syntax_option_type mod_s; static const syntax_option_type mod_x; +static const syntax_option_type no_empty_expressions; // POSIX extended specific options: static const syntax_option_type no_escape_in_lists; diff --git a/doc/html/index.html b/doc/html/index.html index a76835e6..9b623b67 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -196,7 +196,7 @@

- +

Last revised: December 22, 2008 at 19:39:18 GMT

Last revised: December 23, 2008 at 17:35:37 GMT


diff --git a/doc/syntax_option_type.qbk b/doc/syntax_option_type.qbk index f501fcf4..41036dbb 100644 --- a/doc/syntax_option_type.qbk +++ b/doc/syntax_option_type.qbk @@ -50,6 +50,7 @@ duplicated within the scope of class template [basic_regex]. static const syntax_option_type no_mod_s; static const syntax_option_type mod_s; static const syntax_option_type mod_x; + static const syntax_option_type no_empty_expressions; // POSIX extended specific options: static const syntax_option_type no_escape_in_lists; @@ -151,6 +152,7 @@ The following options may also be set when using perl-style regular expressions: whether `match_not_dot_newline` is set in the match flags.]] [[mod_x][No][Turns on the perl x-modifier: causes unescaped whitespace in the expression to be ignored.]] +[[no_empty_expressions][No][When set then empty expressions/alternatives are prohibited.]] [[save_subexpression_location][No][When set then the locations of individual sub-expressions within the ['original regular expression string] can be accessed via the [link boost_regex.basic_regex.subexpression `subexpression()`] member function of `basic_regex`.]] diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 38542bf9..f94eb89c 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -109,7 +109,11 @@ void basic_regex_parser::parse(const charT* p1, const charT* p2, m_position = m_base = p1; m_end = p2; // empty strings are errors: - if(p1 == p2) + if((p1 == p2) && + ( + (l_flags & regbase::main_option_type) != regbase::perl_syntax_group) + || (l_flags & regbase::no_empty_expressions) + ) { fail(regex_constants::error_empty, 0); return; @@ -926,7 +930,15 @@ bool basic_regex_parser::parse_alt() // error check: if there have been no previous states, // or if the last state was a '(' then error: // - if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark)) + if( + ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark)) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) { fail(regex_constants::error_empty, this->m_position - this->m_base); return false; @@ -2075,7 +2087,14 @@ bool basic_regex_parser::unwind_alts(std::ptrdiff_t last_paren_st // alternative then that's an error: // if((this->m_alt_insert_point == static_cast(this->m_pdata->m_data.size())) - && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)) + && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) { fail(regex_constants::error_empty, this->m_position - this->m_base); return false; diff --git a/include/boost/regex/v4/regbase.hpp b/include/boost/regex/v4/regbase.hpp index a6d8e7d1..2b737d5a 100644 --- a/include/boost/regex/v4/regbase.hpp +++ b/include/boost/regex/v4/regbase.hpp @@ -85,6 +85,7 @@ public: collate = 1 << 21, // use locale specific collation nosubs = 1 << 22, // don't mark sub-expressions save_subexpression_location = 1 << 23, // save subexpression locations + no_empty_expressions = 1 << 24, // no empty expressions allowed optimize = 0, // not really supported @@ -143,6 +144,7 @@ namespace regex_constants{ mod_s = ::boost::regbase::mod_s, no_mod_s = ::boost::regbase::no_mod_s, save_subexpression_location = ::boost::regbase::save_subexpression_location, + no_empty_expressions = ::boost::regbase::no_empty_expressions, basic = ::boost::regbase::basic, extended = ::boost::regbase::extended, diff --git a/test/regress/basic_tests.cpp b/test/regress/basic_tests.cpp index 2c248fc0..af191823 100644 --- a/test/regress/basic_tests.cpp +++ b/test/regress/basic_tests.cpp @@ -42,7 +42,8 @@ void basic_tests() TEST_REGEX_SEARCH("()", perl, "zzz", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 2, 2, 2, 2, -2, 3, 3, 3, 3, -2, -2)); TEST_REGEX_SEARCH("()", perl, "", match_default, make_array(0, 0, 0, 0, -2, -2)); TEST_INVALID_REGEX("(", perl); - TEST_INVALID_REGEX("", perl); + TEST_INVALID_REGEX("", perl|no_empty_expressions); + TEST_REGEX_SEARCH("", perl, "abc", match_default, make_array(0, 0, -2, 1, 1, -2, 2, 2, -2, 3, 3, -2, -2)); TEST_INVALID_REGEX(")", perl); TEST_INVALID_REGEX("(aa", perl); TEST_INVALID_REGEX("aa)", perl); diff --git a/test/regress/main.cpp b/test/regress/main.cpp index f3324c24..ffbe5efe 100644 --- a/test/regress/main.cpp +++ b/test/regress/main.cpp @@ -33,38 +33,43 @@ int* get_array_data(); int error_count = 0; +#define RUN_TESTS(name) \ + std::cout << "Running test case \"" #name "\".\n";\ + name(); + + void run_tests() { - basic_tests(); - test_simple_repeats(); - test_alt(); - test_sets(); - test_sets2(); - test_anchors(); - test_backrefs(); - test_character_escapes(); - test_assertion_escapes(); - test_tricky_cases(); - test_grep(); - test_replace(); - test_non_greedy_repeats(); - test_non_marking_paren(); - test_partial_match(); - test_forward_lookahead_asserts(); - test_fast_repeats(); - test_fast_repeats2(); - test_independent_subs(); - test_nosubs(); - test_conditionals(); - test_options(); - test_options2(); + RUN_TESTS(basic_tests); + RUN_TESTS(test_simple_repeats); + RUN_TESTS(test_alt); + RUN_TESTS(test_sets); + RUN_TESTS(test_sets2); + RUN_TESTS(test_anchors); + RUN_TESTS(test_backrefs); + RUN_TESTS(test_character_escapes); + RUN_TESTS(test_assertion_escapes); + RUN_TESTS(test_tricky_cases); + RUN_TESTS(test_grep); + RUN_TESTS(test_replace); + RUN_TESTS(test_non_greedy_repeats); + RUN_TESTS(test_non_marking_paren); + RUN_TESTS(test_partial_match); + RUN_TESTS(test_forward_lookahead_asserts); + RUN_TESTS(test_fast_repeats); + RUN_TESTS(test_fast_repeats2); + RUN_TESTS(test_independent_subs); + RUN_TESTS(test_nosubs); + RUN_TESTS(test_conditionals); + RUN_TESTS(test_options); + RUN_TESTS(test_options2); #ifndef TEST_THREADS - test_en_locale(); + RUN_TESTS(test_en_locale); #endif - test_emacs(); - test_operators(); - test_overloads(); - test_unicode(); + RUN_TESTS(test_emacs); + RUN_TESTS(test_operators); + RUN_TESTS(test_overloads); + RUN_TESTS(test_unicode); } int cpp_main(int /*argc*/, char * /*argv*/[]) diff --git a/test/regress/test_alt.cpp b/test/regress/test_alt.cpp index e55129a2..7eb187fc 100644 --- a/test/regress/test_alt.cpp +++ b/test/regress/test_alt.cpp @@ -29,11 +29,16 @@ void test_alt() TEST_REGEX_SEARCH("a(b|c)", perl, "ad", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("(a|b|c)", perl, "c", match_default, make_array(0, 1, 0, 1, -2, -2)); TEST_REGEX_SEARCH("(a|(b)|.)", perl, "b", match_default, make_array(0, 1, 0, 1, 0, 1, -2, -2)); - TEST_INVALID_REGEX("|c", perl); - TEST_INVALID_REGEX("c|", perl); - TEST_INVALID_REGEX("(|)", perl); - TEST_INVALID_REGEX("(a|)", perl); - TEST_INVALID_REGEX("(|a)", perl); + TEST_INVALID_REGEX("|c", perl|no_empty_expressions); + TEST_REGEX_SEARCH("|c", perl, " c", match_default, make_array(0, 0, -2, 1, 1, -2, 1, 2, -2, 2, 2, -2, -2)); + TEST_INVALID_REGEX("c|", perl|no_empty_expressions); + TEST_REGEX_SEARCH("c|", perl, " c", match_default, make_array(0, 0, -2, 1, 2, -2, 2, 2, -2, -2)); + TEST_INVALID_REGEX("(|)", perl|no_empty_expressions); + TEST_REGEX_SEARCH("(|)", perl, " c", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 2, 2, 2, 2, -2, -2)); + TEST_INVALID_REGEX("(a|)", perl|no_empty_expressions); + TEST_REGEX_SEARCH("(a|)", perl, " a", match_default, make_array(0, 0, 0, 0, -2, 1, 2, 1, 2, -2, 2, 2, 2, 2, -2, -2)); + TEST_INVALID_REGEX("(|a)", perl|no_empty_expressions); + TEST_REGEX_SEARCH("(|a)", perl, " a", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 1, 2, 1, 2, -2, 2, 2, 2, 2, -2, -2)); TEST_REGEX_SEARCH("a\\|", perl, "a|", match_default, make_array(0, 2, -2, -2)); TEST_REGEX_SEARCH("a|", basic, "a|", match_default, make_array(0, 2, -2, -2)); diff --git a/test/regress/test_deprecated.cpp b/test/regress/test_deprecated.cpp index 171a3b56..af50ec76 100644 --- a/test/regress/test_deprecated.cpp +++ b/test/regress/test_deprecated.cpp @@ -38,7 +38,7 @@ int get_posix_compile_options(boost::regex_constants::syntax_option_type opts) { case regbase::perl: result = (opts & regbase::no_perl_ex) ? REG_EXTENDED : REG_PERL; - if(opts & (regbase::no_bk_refs|regbase::no_mod_m|regbase::mod_x|regbase::mod_s|regbase::no_mod_s|regbase::no_escape_in_lists)) + if(opts & (regbase::no_bk_refs|regbase::no_mod_m|regbase::mod_x|regbase::mod_s|regbase::no_mod_s|regbase::no_escape_in_lists|regbase::no_empty_expressions)) return -1; break; case regbase::basic: