diff --git a/doc/history.qbk b/doc/history.qbk
index 9dbbcf4d..05a9710e 100644
--- a/doc/history.qbk
+++ b/doc/history.qbk
@@ -10,6 +10,12 @@
[h4 Boost 1.38]
+* [*Breaking change]: empty expressions, and empty alternatives are now
+allowed when using the Perl regular expression syntax. This change has
+been added for Perl compatibility, when the new [syntax_option_type]
+['no_empty_expressions] is set then the old behaviour is preserved and
+empty expressions are prohibited. This is issue
+[@https://svn.boost.org/trac/boost/ticket/1081 #1081].
* Added support for Perl style ${n} expressions in format strings
(issue [@https://svn.boost.org/trac/boost/ticket/2556 #2556]).
* Added support for accessing the location of sub-expressions within the
diff --git a/doc/html/boost_regex/background_information/examples.html b/doc/html/boost_regex/background_information/examples.html
index 6b7f8b55..9f9dcc6c 100644
--- a/doc/html/boost_regex/background_information/examples.html
+++ b/doc/html/boost_regex/background_information/examples.html
@@ -28,7 +28,7 @@
Example Programs
@@ -62,7 +62,7 @@
(??{code}) Not implementable in a compiled strongly typed language.
diff --git a/doc/html/boost_regex/ref/concepts/traits_concept.html b/doc/html/boost_regex/ref/concepts/traits_concept.html
index af155a02..ef09ab1c 100644
--- a/doc/html/boost_regex/ref/concepts/traits_concept.html
+++ b/doc/html/boost_regex/ref/concepts/traits_concept.html
@@ -34,7 +34,7 @@
Boost-specific enhanced interface.
diff --git a/doc/html/boost_regex/ref/match_flag_type.html b/doc/html/boost_regex/ref/match_flag_type.html
index 0285afd8..50784ec1 100644
--- a/doc/html/boost_regex/ref/match_flag_type.html
+++ b/doc/html/boost_regex/ref/match_flag_type.html
@@ -69,7 +69,7 @@
}
diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
index db172d3d..e636e849 100644
--- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
+++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
@@ -43,7 +43,7 @@
on to the "real" algorithm.
diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html
index aa461fc0..0fbd6483 100644
--- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html
+++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html
@@ -28,7 +28,7 @@
Unicode Aware Regex Iterators
@@ -126,7 +126,7 @@
Provided of course that the input is encoded as UTF-8.
diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html
index 5e9272f1..1f73a084 100644
--- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html
+++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html
@@ -34,7 +34,7 @@
here they are anyway:
diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html
index 2d38fce9..1cae51e5 100644
--- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html
+++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html
@@ -32,7 +32,7 @@
an MFC/ATL string to a regex_iterator
or regex_token_iterator
:
diff --git a/doc/html/boost_regex/ref/regex_traits.html b/doc/html/boost_regex/ref/regex_traits.html
index 50c47d4d..0d515761 100644
--- a/doc/html/boost_regex/ref/regex_traits.html
+++ b/doc/html/boost_regex/ref/regex_traits.html
@@ -46,7 +46,7 @@
}
diff --git a/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_perl.html b/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_perl.html
index b6168a64..53eea33c 100644
--- a/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_perl.html
+++ b/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_perl.html
@@ -372,6 +372,23 @@
save_subexpression_location
diff --git a/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_synopsis.html b/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_synopsis.html
index 89ce344b..8ca8f042 100644
--- a/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_synopsis.html
+++ b/doc/html/boost_regex/ref/syntax_option_type/syntax_option_type_synopsis.html
@@ -69,6 +69,7 @@
static const syntax_option_type no_mod_s;
static const syntax_option_type mod_s;
static const syntax_option_type mod_x;
+static const syntax_option_type no_empty_expressions;
static const syntax_option_type no_escape_in_lists;
diff --git a/doc/html/index.html b/doc/html/index.html
index a76835e6..9b623b67 100644
--- a/doc/html/index.html
+++ b/doc/html/index.html
@@ -196,7 +196,7 @@
-Last revised: December 22, 2008 at 19:39:18 GMT |
+Last revised: December 23, 2008 at 17:35:37 GMT |
|
diff --git a/doc/syntax_option_type.qbk b/doc/syntax_option_type.qbk
index f501fcf4..41036dbb 100644
--- a/doc/syntax_option_type.qbk
+++ b/doc/syntax_option_type.qbk
@@ -50,6 +50,7 @@ duplicated within the scope of class template [basic_regex].
static const syntax_option_type no_mod_s;
static const syntax_option_type mod_s;
static const syntax_option_type mod_x;
+ static const syntax_option_type no_empty_expressions;
// POSIX extended specific options:
static const syntax_option_type no_escape_in_lists;
@@ -151,6 +152,7 @@ The following options may also be set when using perl-style regular expressions:
whether `match_not_dot_newline` is set in the match flags.]]
[[mod_x][No][Turns on the perl x-modifier: causes unescaped whitespace
in the expression to be ignored.]]
+[[no_empty_expressions][No][When set then empty expressions/alternatives are prohibited.]]
[[save_subexpression_location][No][When set then the locations of individual
sub-expressions within the ['original regular expression string] can be accessed
via the [link boost_regex.basic_regex.subexpression `subexpression()`] member function of `basic_regex`.]]
diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp
index 38542bf9..f94eb89c 100644
--- a/include/boost/regex/v4/basic_regex_parser.hpp
+++ b/include/boost/regex/v4/basic_regex_parser.hpp
@@ -109,7 +109,11 @@ void basic_regex_parser::parse(const charT* p1, const charT* p2,
m_position = m_base = p1;
m_end = p2;
// empty strings are errors:
- if(p1 == p2)
+ if((p1 == p2) &&
+ (
+ (l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
+ || (l_flags & regbase::no_empty_expressions)
+ )
{
fail(regex_constants::error_empty, 0);
return;
@@ -926,7 +930,15 @@ bool basic_regex_parser::parse_alt()
// error check: if there have been no previous states,
// or if the last state was a '(' then error:
//
- if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
+ if(
+ ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
+ &&
+ !(
+ ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
+ &&
+ ((this->flags() & regbase::no_empty_expressions) == 0)
+ )
+ )
{
fail(regex_constants::error_empty, this->m_position - this->m_base);
return false;
@@ -2075,7 +2087,14 @@ bool basic_regex_parser::unwind_alts(std::ptrdiff_t last_paren_st
// alternative then that's an error:
//
if((this->m_alt_insert_point == static_cast(this->m_pdata->m_data.size()))
- && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
+ && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
+ &&
+ !(
+ ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
+ &&
+ ((this->flags() & regbase::no_empty_expressions) == 0)
+ )
+ )
{
fail(regex_constants::error_empty, this->m_position - this->m_base);
return false;
diff --git a/include/boost/regex/v4/regbase.hpp b/include/boost/regex/v4/regbase.hpp
index a6d8e7d1..2b737d5a 100644
--- a/include/boost/regex/v4/regbase.hpp
+++ b/include/boost/regex/v4/regbase.hpp
@@ -85,6 +85,7 @@ public:
collate = 1 << 21, // use locale specific collation
nosubs = 1 << 22, // don't mark sub-expressions
save_subexpression_location = 1 << 23, // save subexpression locations
+ no_empty_expressions = 1 << 24, // no empty expressions allowed
optimize = 0, // not really supported
@@ -143,6 +144,7 @@ namespace regex_constants{
mod_s = ::boost::regbase::mod_s,
no_mod_s = ::boost::regbase::no_mod_s,
save_subexpression_location = ::boost::regbase::save_subexpression_location,
+ no_empty_expressions = ::boost::regbase::no_empty_expressions,
basic = ::boost::regbase::basic,
extended = ::boost::regbase::extended,
diff --git a/test/regress/basic_tests.cpp b/test/regress/basic_tests.cpp
index 2c248fc0..af191823 100644
--- a/test/regress/basic_tests.cpp
+++ b/test/regress/basic_tests.cpp
@@ -42,7 +42,8 @@ void basic_tests()
TEST_REGEX_SEARCH("()", perl, "zzz", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 2, 2, 2, 2, -2, 3, 3, 3, 3, -2, -2));
TEST_REGEX_SEARCH("()", perl, "", match_default, make_array(0, 0, 0, 0, -2, -2));
TEST_INVALID_REGEX("(", perl);
- TEST_INVALID_REGEX("", perl);
+ TEST_INVALID_REGEX("", perl|no_empty_expressions);
+ TEST_REGEX_SEARCH("", perl, "abc", match_default, make_array(0, 0, -2, 1, 1, -2, 2, 2, -2, 3, 3, -2, -2));
TEST_INVALID_REGEX(")", perl);
TEST_INVALID_REGEX("(aa", perl);
TEST_INVALID_REGEX("aa)", perl);
diff --git a/test/regress/main.cpp b/test/regress/main.cpp
index f3324c24..ffbe5efe 100644
--- a/test/regress/main.cpp
+++ b/test/regress/main.cpp
@@ -33,38 +33,43 @@ int* get_array_data();
int error_count = 0;
+#define RUN_TESTS(name) \
+ std::cout << "Running test case \"" #name "\".\n";\
+ name();
+
+
void run_tests()
{
- basic_tests();
- test_simple_repeats();
- test_alt();
- test_sets();
- test_sets2();
- test_anchors();
- test_backrefs();
- test_character_escapes();
- test_assertion_escapes();
- test_tricky_cases();
- test_grep();
- test_replace();
- test_non_greedy_repeats();
- test_non_marking_paren();
- test_partial_match();
- test_forward_lookahead_asserts();
- test_fast_repeats();
- test_fast_repeats2();
- test_independent_subs();
- test_nosubs();
- test_conditionals();
- test_options();
- test_options2();
+ RUN_TESTS(basic_tests);
+ RUN_TESTS(test_simple_repeats);
+ RUN_TESTS(test_alt);
+ RUN_TESTS(test_sets);
+ RUN_TESTS(test_sets2);
+ RUN_TESTS(test_anchors);
+ RUN_TESTS(test_backrefs);
+ RUN_TESTS(test_character_escapes);
+ RUN_TESTS(test_assertion_escapes);
+ RUN_TESTS(test_tricky_cases);
+ RUN_TESTS(test_grep);
+ RUN_TESTS(test_replace);
+ RUN_TESTS(test_non_greedy_repeats);
+ RUN_TESTS(test_non_marking_paren);
+ RUN_TESTS(test_partial_match);
+ RUN_TESTS(test_forward_lookahead_asserts);
+ RUN_TESTS(test_fast_repeats);
+ RUN_TESTS(test_fast_repeats2);
+ RUN_TESTS(test_independent_subs);
+ RUN_TESTS(test_nosubs);
+ RUN_TESTS(test_conditionals);
+ RUN_TESTS(test_options);
+ RUN_TESTS(test_options2);
#ifndef TEST_THREADS
- test_en_locale();
+ RUN_TESTS(test_en_locale);
#endif
- test_emacs();
- test_operators();
- test_overloads();
- test_unicode();
+ RUN_TESTS(test_emacs);
+ RUN_TESTS(test_operators);
+ RUN_TESTS(test_overloads);
+ RUN_TESTS(test_unicode);
}
int cpp_main(int /*argc*/, char * /*argv*/[])
diff --git a/test/regress/test_alt.cpp b/test/regress/test_alt.cpp
index e55129a2..7eb187fc 100644
--- a/test/regress/test_alt.cpp
+++ b/test/regress/test_alt.cpp
@@ -29,11 +29,16 @@ void test_alt()
TEST_REGEX_SEARCH("a(b|c)", perl, "ad", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(a|b|c)", perl, "c", match_default, make_array(0, 1, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(a|(b)|.)", perl, "b", match_default, make_array(0, 1, 0, 1, 0, 1, -2, -2));
- TEST_INVALID_REGEX("|c", perl);
- TEST_INVALID_REGEX("c|", perl);
- TEST_INVALID_REGEX("(|)", perl);
- TEST_INVALID_REGEX("(a|)", perl);
- TEST_INVALID_REGEX("(|a)", perl);
+ TEST_INVALID_REGEX("|c", perl|no_empty_expressions);
+ TEST_REGEX_SEARCH("|c", perl, " c", match_default, make_array(0, 0, -2, 1, 1, -2, 1, 2, -2, 2, 2, -2, -2));
+ TEST_INVALID_REGEX("c|", perl|no_empty_expressions);
+ TEST_REGEX_SEARCH("c|", perl, " c", match_default, make_array(0, 0, -2, 1, 2, -2, 2, 2, -2, -2));
+ TEST_INVALID_REGEX("(|)", perl|no_empty_expressions);
+ TEST_REGEX_SEARCH("(|)", perl, " c", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 2, 2, 2, 2, -2, -2));
+ TEST_INVALID_REGEX("(a|)", perl|no_empty_expressions);
+ TEST_REGEX_SEARCH("(a|)", perl, " a", match_default, make_array(0, 0, 0, 0, -2, 1, 2, 1, 2, -2, 2, 2, 2, 2, -2, -2));
+ TEST_INVALID_REGEX("(|a)", perl|no_empty_expressions);
+ TEST_REGEX_SEARCH("(|a)", perl, " a", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 1, 2, 1, 2, -2, 2, 2, 2, 2, -2, -2));
TEST_REGEX_SEARCH("a\\|", perl, "a|", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a|", basic, "a|", match_default, make_array(0, 2, -2, -2));
diff --git a/test/regress/test_deprecated.cpp b/test/regress/test_deprecated.cpp
index 171a3b56..af50ec76 100644
--- a/test/regress/test_deprecated.cpp
+++ b/test/regress/test_deprecated.cpp
@@ -38,7 +38,7 @@ int get_posix_compile_options(boost::regex_constants::syntax_option_type opts)
{
case regbase::perl:
result = (opts & regbase::no_perl_ex) ? REG_EXTENDED : REG_PERL;
- if(opts & (regbase::no_bk_refs|regbase::no_mod_m|regbase::mod_x|regbase::mod_s|regbase::no_mod_s|regbase::no_escape_in_lists))
+ if(opts & (regbase::no_bk_refs|regbase::no_mod_m|regbase::mod_x|regbase::mod_s|regbase::no_mod_s|regbase::no_escape_in_lists|regbase::no_empty_expressions))
return -1;
break;
case regbase::basic:
|