From 40b7a4902f5ddcb9853b3caf1a206b59c51dc9da Mon Sep 17 00:00:00 2001 From: John Maddock Date: Wed, 14 Sep 2005 12:20:41 +0000 Subject: [PATCH] Fixed remaining zero-width assertion issues. Added separate file instantiation of ICU support templates. Ensured code is STLport debug mode clean. [SVN r30980] --- build/Jamfile | 10 ++ doc/Attic/history.html | 10 +- doc/history.html | 10 +- include/boost/regex/icu.hpp | 89 +++++++++++++----- include/boost/regex/regex_traits.hpp | 6 -- include/boost/regex/v4/basic_regex.hpp | 15 ++- .../boost/regex/v4/basic_regex_creator.hpp | 35 ++++++- include/boost/regex/v4/basic_regex_parser.hpp | 2 + include/boost/regex/v4/cpp_regex_traits.hpp | 2 +- include/boost/regex/v4/instances.hpp | 91 +++++++++++++++---- include/boost/regex/v4/perl_matcher.hpp | 5 +- .../boost/regex/v4/perl_matcher_common.hpp | 30 +++--- include/boost/regex/v4/regex_grep.hpp | 2 +- include/boost/regex/v4/regex_iterator.hpp | 10 +- include/boost/regex/v4/regex_match.hpp | 2 +- include/boost/regex/v4/regex_search.hpp | 14 ++- .../boost/regex/v4/regex_token_iterator.hpp | 10 +- include/boost/regex/v4/u32regex_iterator.hpp | 12 +-- .../regex/v4/u32regex_token_iterator.hpp | 12 ++- src/icu.cpp | 18 +++- test/regress/test_asserts.cpp | 5 +- test/regress/test_emacs.cpp | 9 ++ test/regress/test_grep.cpp | 10 ++ test/regress/test_icu.cpp | 39 ++++++-- test/regress/test_perl_ex.cpp | 15 +++ test/regress/test_simple_repeats.cpp | 7 ++ 26 files changed, 358 insertions(+), 112 deletions(-) diff --git a/build/Jamfile b/build/Jamfile index 28037ec0..074b830f 100644 --- a/build/Jamfile +++ b/build/Jamfile @@ -47,6 +47,10 @@ rule check-icu-config ( ) { gICU_LIBS += $(ICU_PATH)/lib/cygicuuc.dll ; } + else if [ GLOB $(ICU_PATH)/lib : cygicuuc32.dll ] + { + gICU_LIBS += $(ICU_PATH)/lib/cygicuuc32.dll ; + } else if [ GLOB /usr/local/lib : cygicuuc.dll ] { gICU_LIBS += /usr/local/lib/cygicuuc.dll ; @@ -85,6 +89,10 @@ rule check-icu-config ( ) { gICU_LIBS += $(ICU_PATH)/lib/cygicuin.dll ; } + else if [ GLOB $(ICU_PATH)/lib : cygicuin32.dll ] + { + gICU_LIBS += $(ICU_PATH)/lib/cygicuin32.dll ; + } else if [ GLOB /usr/local/lib : cygicuin.dll ] { gICU_LIBS += /usr/local/lib/cygicuin.dll ; @@ -255,3 +263,5 @@ install regex lib + + diff --git a/doc/Attic/history.html b/doc/Attic/history.html index a050fe0a..b3dfa193 100644 --- a/doc/Attic/history.html +++ b/doc/Attic/history.html @@ -57,7 +57,15 @@ Initialise all the data members of the unicode_iterators: this keeps gcc from issuing needless warnings.
  • - Ported the ICU integration code to VC6 and VC7.
  • + Ported the ICU integration code to VC6 and VC7. +
  • + Ensured code is STLport debug mode clean. +
  • + Fixed lookbehind assertions so that fixed length repeats are permitted, and so + that regex iteration allows lookbehind to look back before the current search + range (into the last match). +
  • + Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
  • Boost 1.33.0.

    • diff --git a/doc/history.html b/doc/history.html index a050fe0a..b3dfa193 100644 --- a/doc/history.html +++ b/doc/history.html @@ -57,7 +57,15 @@ Initialise all the data members of the unicode_iterators: this keeps gcc from issuing needless warnings.
    • - Ported the ICU integration code to VC6 and VC7.
    + Ported the ICU integration code to VC6 and VC7. +
  • + Ensured code is STLport debug mode clean. +
  • + Fixed lookbehind assertions so that fixed length repeats are permitted, and so + that regex iteration allows lookbehind to look back before the current search + range (into the last match). +
  • + Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
  • Boost 1.33.0.

    • diff --git a/include/boost/regex/icu.hpp b/include/boost/regex/icu.hpp index 167b8fd0..059b7b16 100644 --- a/include/boost/regex/icu.hpp +++ b/include/boost/regex/icu.hpp @@ -203,6 +203,28 @@ private: boost::shared_ptr< ::boost::re_detail::icu_regex_traits_implementation> m_pimpl; }; +} // namespace boost + +// +// template instances: +// +#define BOOST_REGEX_CHAR_T UChar32 +#undef BOOST_REGEX_TRAITS_T +#define BOOST_REGEX_TRAITS_T , icu_regex_traits +#define BOOST_REGEX_ICU_INSTANCES +#ifdef BOOST_REGEX_ICU_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include +#undef BOOST_REGEX_CHAR_T +#undef BOOST_REGEX_TRAITS_T +#undef BOOST_REGEX_ICU_INSTANCES +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif + +namespace boost{ + // types: typedef basic_regex< ::UChar32, icu_regex_traits> u32regex; typedef match_results u32match; @@ -258,7 +280,9 @@ inline u32regex do_make_u32regex(InputIterator i, v.push_back(*a); ++a; } - return u32regex(&*v.begin(), v.size(), opt); + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); } template @@ -276,7 +300,9 @@ inline u32regex do_make_u32regex(InputIterator i, v.push_back(*a); ++a; } - return u32regex(&*v.begin(), v.size(), opt); + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); } template @@ -292,7 +318,9 @@ inline u32regex do_make_u32regex(InputIterator i, v.push_back((UCHAR32)(*i)); ++a; } - return u32regex(&*v.begin(), v.size(), opt); + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); } #endif } @@ -547,22 +575,24 @@ inline bool do_regex_search(BidiIterator first, BidiIterator last, match_results& m, const u32regex& e, match_flag_type flags, + BidiIterator base, boost::mpl::int_<4> const*) { - return ::boost::regex_search(first, last, m, e, flags); + return ::boost::regex_search(first, last, m, e, flags, base); } template bool do_regex_search(BidiIterator first, BidiIterator last, match_results& m, const u32regex& e, match_flag_type flags, + BidiIterator base, boost::mpl::int_<2> const*) { typedef u16_to_u32_iterator conv_type; typedef match_results match_type; typedef typename match_type::allocator_type alloc_type; match_type what; - bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags); + bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base)); // copy results across to m: if(result) copy_results(m, what); return result; @@ -572,13 +602,14 @@ bool do_regex_search(BidiIterator first, BidiIterator last, match_results& m, const u32regex& e, match_flag_type flags, + BidiIterator base, boost::mpl::int_<1> const*) { typedef u8_to_u32_iterator conv_type; typedef match_results match_type; typedef typename match_type::allocator_type alloc_type; match_type what; - bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags); + bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base)); // copy results across to m: if(result) copy_results(m, what); return result; @@ -591,14 +622,23 @@ inline bool u32regex_search(BidiIterator first, BidiIterator last, const u32regex& e, match_flag_type flags = match_default) { - return re_detail::do_regex_search(first, last, m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); +} +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base) +{ + return re_detail::do_regex_search(first, last, m, e, flags, base, static_cast const*>(0)); } inline bool u32regex_search(const UChar* p, match_results& m, const u32regex& e, match_flag_type flags = match_default) { - return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); } #if !defined(U_WCHAR_IS_UTF16) && !defined(BOOST_NO_WREGEX) inline bool u32regex_search(const wchar_t* p, @@ -606,7 +646,7 @@ inline bool u32regex_search(const wchar_t* p, const u32regex& e, match_flag_type flags = match_default) { - return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); } #endif inline bool u32regex_search(const char* p, @@ -614,21 +654,21 @@ inline bool u32regex_search(const char* p, const u32regex& e, match_flag_type flags = match_default) { - return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); } inline bool u32regex_search(const unsigned char* p, match_results& m, const u32regex& e, match_flag_type flags = match_default) { - return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); } inline bool u32regex_search(const std::string& s, match_results& m, const u32regex& e, match_flag_type flags = match_default) { - return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); } #ifndef BOOST_NO_STD_WSTRING inline bool u32regex_search(const std::wstring& s, @@ -636,7 +676,7 @@ inline bool u32regex_search(const std::wstring& s, const u32regex& e, match_flag_type flags = match_default) { - return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); } #endif inline bool u32regex_search(const UnicodeString& s, @@ -644,7 +684,7 @@ inline bool u32regex_search(const UnicodeString& s, const u32regex& e, match_flag_type flags = match_default) { - return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); } template inline bool u32regex_search(BidiIterator first, BidiIterator last, @@ -652,14 +692,14 @@ inline bool u32regex_search(BidiIterator first, BidiIterator last, match_flag_type flags = match_default) { match_results m; - return re_detail::do_regex_search(first, last, m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); } inline bool u32regex_search(const UChar* p, const u32regex& e, match_flag_type flags = match_default) { match_results m; - return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); } #if !defined(U_WCHAR_IS_UTF16) && !defined(BOOST_NO_WREGEX) inline bool u32regex_search(const wchar_t* p, @@ -667,7 +707,7 @@ inline bool u32regex_search(const wchar_t* p, match_flag_type flags = match_default) { match_results m; - return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); } #endif inline bool u32regex_search(const char* p, @@ -675,21 +715,21 @@ inline bool u32regex_search(const char* p, match_flag_type flags = match_default) { match_results m; - return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); } inline bool u32regex_search(const unsigned char* p, const u32regex& e, match_flag_type flags = match_default) { match_results m; - return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); } inline bool u32regex_search(const std::string& s, const u32regex& e, match_flag_type flags = match_default) { match_results m; - return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); } #ifndef BOOST_NO_STD_WSTRING inline bool u32regex_search(const std::wstring& s, @@ -697,7 +737,7 @@ inline bool u32regex_search(const std::wstring& s, match_flag_type flags = match_default) { match_results m; - return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); } #endif inline bool u32regex_search(const UnicodeString& s, @@ -705,7 +745,7 @@ inline bool u32regex_search(const UnicodeString& s, match_flag_type flags = match_default) { match_results m; - return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); + return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); } // @@ -797,7 +837,10 @@ OutputIterator do_regex_replace(OutputIterator out, { if(!(flags & regex_constants::format_no_copy)) out = re_detail::copy(i->prefix().first, i->prefix().second, out); - out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.end(), flags, e.get_traits()); + if(f.size()) + out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits()); + else + out = ::boost::re_detail::regex_format_imp(out, *i, static_cast(0), static_cast(0), flags, e.get_traits()); last_m = (*i)[0].second; if(flags & regex_constants::format_first_only) break; diff --git a/include/boost/regex/regex_traits.hpp b/include/boost/regex/regex_traits.hpp index 9d81f356..730ba6e0 100644 --- a/include/boost/regex/regex_traits.hpp +++ b/include/boost/regex/regex_traits.hpp @@ -23,15 +23,9 @@ # include #endif -#ifdef BOOST_REGEX_V3 -# ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED -# include -# endif -#else # ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED # include # endif -#endif #endif // include diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp index edf7c688..4d1aef27 100644 --- a/include/boost/regex/v4/basic_regex.hpp +++ b/include/boost/regex/v4/basic_regex.hpp @@ -272,7 +272,10 @@ public: { typedef typename traits::string_type seq_type; seq_type a(arg_first, arg_last); - assign(&*a.begin(), &*a.begin() + a.size(), f); + if(a.size()) + assign(&*a.begin(), &*a.begin() + a.size(), f); + else + assign(static_cast(0), static_cast(0), f); } template @@ -296,9 +299,13 @@ public: { typedef typename traits::string_type seq_type; seq_type a(arg_first, arg_last); - const charT* p1 = &*a.begin(); - const charT* p2 = &*a.begin() + a.size(); - return assign(p1, p2, f); + if(a.size()) + { + const charT* p1 = &*a.begin(); + const charT* p2 = &*a.begin() + a.size(); + return assign(p1, p2, f); + } + return assign(static_cast(0), static_cast(0), f); } #else unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string& p, flag_type f = regex_constants::normal) diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index 86ce0594..20602ebf 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -762,6 +762,7 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) state = p.second; v.pop_back(); + // Build maps: create_startmap(state->next.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_take); m_bad_repeats = 0; create_startmap(static_cast(state)->alt.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_skip); @@ -806,14 +807,40 @@ int basic_regex_creator::calculate_backstep(re_syntax_base* state case syntax_element_set: result += 1; break; - case syntax_element_backref: - case syntax_element_rep: - case syntax_element_combining: case syntax_element_dot_rep: case syntax_element_char_rep: case syntax_element_short_set_rep: + case syntax_element_backref: + case syntax_element_rep: + case syntax_element_combining: case syntax_element_long_set_rep: case syntax_element_backstep: + { + re_repeat* rep = static_cast(state); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + if((state->type == syntax_element_dot_rep) + || (state->type == syntax_element_char_rep) + || (state->type == syntax_element_short_set_rep)) + { + if(rep->max != rep->min) + return -1; + result += static_cast(rep->min); + state = rep->alt.p; + continue; + } + else if((state->type == syntax_element_long_set_rep)) + { + BOOST_ASSERT(rep->next.p->type == syntax_element_long_set); + if(static_cast*>(rep->next.p)->singleton == 0) + return -1; + if(rep->max != rep->min) + return -1; + result += static_cast(rep->min); + state = rep->alt.p; + continue; + } + } return -1; case syntax_element_long_set: if(static_cast*>(state)->singleton == 0) @@ -1027,7 +1054,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, return; case syntax_element_endmark: // need to handle independent subs as a special case: - if(static_cast(state)->index == -3) + if(static_cast(state)->index < 0) { // can be null, any character can match: set_all_masks(l_map, mask); diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index f544fdaa..f17a440e 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -1729,6 +1729,8 @@ bool basic_regex_parser::parse_perl_extension() // lets assume that we have a (?imsx) group and try and parse it: // regex_constants::syntax_option_type opts = parse_options(); + if(m_position == m_end) + return false; // make a note of whether we have a case change: m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase)); pb->index = markid = 0; diff --git a/include/boost/regex/v4/cpp_regex_traits.hpp b/include/boost/regex/v4/cpp_regex_traits.hpp index 0e6e4aca..f7ba0bcc 100644 --- a/include/boost/regex/v4/cpp_regex_traits.hpp +++ b/include/boost/regex/v4/cpp_regex_traits.hpp @@ -816,7 +816,7 @@ bool cpp_regex_traits_implementation::isctype(const charT c, char_class_t template -boost::shared_ptr > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) +inline boost::shared_ptr > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) { cpp_regex_traits_base key(l); return ::boost::object_cache, cpp_regex_traits_implementation >::get(key, 5); diff --git a/include/boost/regex/v4/instances.hpp b/include/boost/regex/v4/instances.hpp index 9fcfa41a..a0a4ee41 100644 --- a/include/boost/regex/v4/instances.hpp +++ b/include/boost/regex/v4/instances.hpp @@ -33,6 +33,10 @@ namespace boost{ # error "BOOST_REGEX_CHAR_T not defined" #endif +#ifndef BOOST_REGEX_TRAITS_T +# define BOOST_REGEX_TRAITS_T , boost::regex_traits +#endif + // // what follows is compiler specific: // @@ -47,7 +51,11 @@ namespace boost{ # pragma option push -Jgx # endif -template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >; +template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >; +template class BOOST_REGEX_DECL match_results< const BOOST_REGEX_CHAR_T* >; +#ifndef BOOST_NO_STD_ALLOCATOR +template class BOOST_REGEX_DECL ::boost::re_detail::perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >; +#endif # ifndef BOOST_REGEX_INSTANTIATE # pragma option pop @@ -72,16 +80,18 @@ template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >; # pragma warning(disable : 4251 4231 4660) # endif -template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >; +template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >; + #if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) template class BOOST_REGEX_DECL match_results< const BOOST_REGEX_CHAR_T* >; #endif #ifndef BOOST_NO_STD_ALLOCATOR -template class BOOST_REGEX_DECL ::boost::re_detail::perl_matcher::allocator_type, boost::regex_traits >; +template class BOOST_REGEX_DECL ::boost::re_detail::perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >; #endif #if !(defined(BOOST_DINKUMWARE_STDLIB) && (BOOST_DINKUMWARE_STDLIB <= 1))\ && !(defined(BOOST_INTEL_CXX_VERSION) && (BOOST_INTEL_CXX_VERSION <= 800))\ - && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) + && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))\ + && !defined(BOOST_REGEX_ICU_INSTANCES) #if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) template class BOOST_REGEX_DECL match_results< std::basic_string::const_iterator >; #endif @@ -105,40 +115,87 @@ template class BOOST_REGEX_DECL ::boost::re_detail::perl_matcher< std::basic_str # define template __extension__ extern template # endif -template BOOST_REGEX_DECL basic_regex& - basic_regex::do_assign( +#if !defined(BOOST_NO_STD_LOCALE) && !defined(BOOST_REGEX_ICU_INSTANCES) +namespace re_detail{ +template BOOST_REGEX_DECL +std::locale cpp_regex_traits_base::imbue(const std::locale& l); +/* +template BOOST_REGEX_DECL +void cpp_regex_traits_char_layer::init(); +template BOOST_REGEX_DECL +typename cpp_regex_traits_char_layer::string_type + cpp_regex_traits_char_layer::get_default_message(regex_constants::syntax_type i); + */ +template BOOST_REGEX_DECL +cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform_primary(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::lookup_collatename(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +void cpp_regex_traits_implementation::init(); +template BOOST_REGEX_DECL +cpp_regex_traits_implementation::char_class_type + cpp_regex_traits_implementation::lookup_classname_imp(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +template BOOST_REGEX_DECL +bool cpp_regex_traits_implementation::isctype(const BOOST_REGEX_CHAR_T c, char_class_type mask) const; +#endif +} // namespace +template BOOST_REGEX_DECL +int cpp_regex_traits::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const; +template BOOST_REGEX_DECL +std::string cpp_regex_traits::catalog_name(const std::string& name); +template BOOST_REGEX_DECL +std::string& cpp_regex_traits::get_catalog_name_inst(); +template BOOST_REGEX_DECL +std::string cpp_regex_traits::get_catalog_name(); +#ifdef BOOST_HAS_THREADS +template BOOST_REGEX_DECL +static_mutex& cpp_regex_traits::get_mutex_inst(); +#endif +#endif + +template BOOST_REGEX_DECL basic_regex& + basic_regex::do_assign( const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2, flag_type f); -template BOOST_REGEX_DECL basic_regex::locale_type BOOST_REGEX_CALL - basic_regex::imbue(locale_type l); +template BOOST_REGEX_DECL basic_regex::locale_type BOOST_REGEX_CALL + basic_regex::imbue(locale_type l); template BOOST_REGEX_DECL void BOOST_REGEX_CALL match_results::maybe_assign( const match_results& m); - -template BOOST_REGEX_DECL void ::boost::re_detail::perl_matcher::allocator_type, boost::regex_traits >::construct_init( +namespace re_detail{ +template BOOST_REGEX_DECL void perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::construct_init( BOOST_REGEX_CHAR_T const * first, BOOST_REGEX_CHAR_T const * end, match_results& what, - const basic_regex& e, + const basic_regex& e, match_flag_type f); -template BOOST_REGEX_DECL bool ::boost::re_detail::perl_matcher::allocator_type, boost::regex_traits >::match(); -template BOOST_REGEX_DECL bool ::boost::re_detail::perl_matcher::allocator_type, boost::regex_traits >::find(); +template BOOST_REGEX_DECL bool perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::match(); +template BOOST_REGEX_DECL bool perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::find(); +} // namespace -#if defined(__GLIBCPP__) || defined(__GLIBCXX__) +#if (defined(__GLIBCPP__) || defined(__GLIBCXX__)) && !defined(BOOST_REGEX_ICU_INSTANCES) // std:basic_string<>::const_iterator instances as well: template BOOST_REGEX_DECL void BOOST_REGEX_CALL match_results::const_iterator>::maybe_assign( const match_results::const_iterator>& m); -template BOOST_REGEX_DECL void ::boost::re_detail::perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::construct_init( +namespace re_detail{ +template BOOST_REGEX_DECL void perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::construct_init( std::basic_string::const_iterator first, std::basic_string::const_iterator end, match_results::const_iterator>& what, const basic_regex& e, match_flag_type f); -template BOOST_REGEX_DECL bool ::boost::re_detail::perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::match(); -template BOOST_REGEX_DECL bool ::boost::re_detail::perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::find(); +template BOOST_REGEX_DECL bool perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::match(); +template BOOST_REGEX_DECL bool perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::find(); +} // namespace #endif # ifdef template diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 6a4fa1dc..f8009273 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -324,7 +324,8 @@ public: perl_matcher(BidiIterator first, BidiIterator end, match_results& what, const basic_regex& e, - match_flag_type f); + match_flag_type f, + BidiIterator base); bool match(); bool find(); @@ -409,6 +410,8 @@ private: BidiIterator restart; // where the current search started from, acts as base for $` during grep: BidiIterator search_base; + // how far we can go back when matching lookbehind: + BidiIterator backstop; // the expression being examined: const basic_regex& re; // the expression's traits class: diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 5f4e6fa0..dd63e98e 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -35,9 +35,10 @@ template perl_matcher::perl_matcher(BidiIterator first, BidiIterator end, match_results& what, const basic_regex& e, - match_flag_type f) + match_flag_type f, + BidiIterator b) : m_result(what), base(first), last(end), - position(first), re(e), traits_inst(e.get_traits()), + position(first), backstop(b), re(e), traits_inst(e.get_traits()), m_independent(false), next_count(&rep_obj), rep_obj(&next_count) { construct_init(first, last, what, e, f); @@ -161,7 +162,7 @@ bool perl_matcher::match_imp() search_base = base; state_count = 0; m_match_flags |= regex_constants::match_all; - m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last); + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); m_presult->set_base(base); if(m_match_flags & match_posix) m_result = *m_presult; @@ -220,8 +221,7 @@ bool perl_matcher::find_imp() if((m_match_flags & regex_constants::match_init) == 0) { // reset our state machine: - position = base; - search_base = base; + search_base = position = base; pstate = re.get_first_state(); m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last); m_presult->set_base(base); @@ -242,8 +242,8 @@ bool perl_matcher::find_imp() } // reset $` start: m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); - if(base != search_base) - m_match_flags |= match_prev_avail; + //if((base != search_base) && (base == backstop)) + // m_match_flags |= match_prev_avail; } if(m_match_flags & match_posix) { @@ -346,7 +346,7 @@ bool perl_matcher::match_literal() template bool perl_matcher::match_start_line() { - if(position == base) + if(position == backstop) { if((m_match_flags & match_prev_avail) == 0) { @@ -390,7 +390,7 @@ bool perl_matcher::match_end_line() // we're not yet at the end so *first is always valid: if(is_separator(*position)) { - if((position != base) || (m_match_flags & match_prev_avail)) + if((position != backstop) || (m_match_flags & match_prev_avail)) { // check that we're not in the middle of \r\n sequence BidiIterator t(position); @@ -472,7 +472,7 @@ bool perl_matcher::match_word_boundary() { b = (m_match_flags & match_not_eow) ? true : false; } - if((position == base) && ((m_match_flags & match_prev_avail) == 0)) + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) { if(m_match_flags & match_not_bow) b ^= true; @@ -502,7 +502,7 @@ bool perl_matcher::match_within_word() if(traits_inst.isctype(*position, m_word_mask)) { bool b; - if((position == base) && ((m_match_flags & match_prev_avail) == 0)) + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) return false; else { @@ -526,7 +526,7 @@ bool perl_matcher::match_word_start() return false; // can't be starting a word if we're already at the end of input if(!traits_inst.isctype(*position, m_word_mask)) return false; // next character isn't a word character - if((position == base) && ((m_match_flags & match_prev_avail) == 0)) + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) { if(m_match_flags & match_not_bow) return false; // no previous input @@ -547,7 +547,7 @@ bool perl_matcher::match_word_start() template bool perl_matcher::match_word_end() { - if((position == base) && ((m_match_flags & match_prev_avail) == 0)) + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) return false; // start of buffer can't be end of word BidiIterator t(position); --t; @@ -572,7 +572,7 @@ bool perl_matcher::match_word_end() template bool perl_matcher::match_buffer_start() { - if((position != base) || (m_match_flags & match_not_bob)) + if((position != backstop) || (m_match_flags & match_not_bob)) return false; // OK match: pstate = pstate->next.p; @@ -685,7 +685,7 @@ bool perl_matcher::match_restart_continue() template bool perl_matcher::match_backstep() { - std::ptrdiff_t maxlen = ::boost::re_detail::distance(search_base, position); + std::ptrdiff_t maxlen = ::boost::re_detail::distance(backstop, position); if(maxlen < static_cast(pstate)->index) return false; std::advance(position, -static_cast(pstate)->index); diff --git a/include/boost/regex/v4/regex_grep.hpp b/include/boost/regex/v4/regex_grep.hpp index faba4947..b8bb8479 100644 --- a/include/boost/regex/v4/regex_grep.hpp +++ b/include/boost/regex/v4/regex_grep.hpp @@ -43,7 +43,7 @@ inline unsigned int regex_grep(Predicate foo, typedef typename match_results::allocator_type match_allocator_type; match_results m; - re_detail::perl_matcher matcher(first, last, m, e, flags); + re_detail::perl_matcher matcher(first, last, m, e, flags, first); unsigned int count = 0; while(matcher.find()) { diff --git a/include/boost/regex/v4/regex_iterator.hpp b/include/boost/regex/v4/regex_iterator.hpp index 3a7328b2..eb51a46f 100644 --- a/include/boost/regex/v4/regex_iterator.hpp +++ b/include/boost/regex/v4/regex_iterator.hpp @@ -57,15 +57,15 @@ public: { return what; } bool next() { - if(what.prefix().first != what[0].second) - flags |= match_prev_avail; + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; BidirectionalIterator next_start = what[0].second; match_flag_type f(flags); if(!what.length()) f |= regex_constants::match_not_initial_null; - if(base != next_start) - f |= regex_constants::match_not_bob; - bool result = regex_search(next_start, end, what, re, f); + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = regex_search(next_start, end, what, re, f, base); if(result) what.set_base(base); return result; diff --git a/include/boost/regex/v4/regex_match.hpp b/include/boost/regex/v4/regex_match.hpp index b12de6fe..7c1f88d6 100644 --- a/include/boost/regex/v4/regex_match.hpp +++ b/include/boost/regex/v4/regex_match.hpp @@ -39,7 +39,7 @@ bool regex_match(BidiIterator first, BidiIterator last, const basic_regex& e, match_flag_type flags = match_default) { - re_detail::perl_matcher matcher(first, last, m, e, flags); + re_detail::perl_matcher matcher(first, last, m, e, flags, first); return matcher.match(); } template diff --git a/include/boost/regex/v4/regex_search.hpp b/include/boost/regex/v4/regex_search.hpp index 784e2104..ee6028c2 100644 --- a/include/boost/regex/v4/regex_search.hpp +++ b/include/boost/regex/v4/regex_search.hpp @@ -31,11 +31,21 @@ bool regex_search(BidiIterator first, BidiIterator last, match_results& m, const basic_regex& e, match_flag_type flags = match_default) +{ + return regex_search(first, last, m, e, flags, first); +} + +template +bool regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags, + BidiIterator base) { if(e.flags() & regex_constants::failbit) return false; - re_detail::perl_matcher matcher(first, last, m, e, flags); + re_detail::perl_matcher matcher(first, last, m, e, flags, base); return matcher.find(); } @@ -124,7 +134,7 @@ bool regex_search(BidiIterator first, BidiIterator last, match_results m; typedef typename match_results::allocator_type match_alloc_type; - re_detail::perl_matcher matcher(first, last, m, e, flags | regex_constants::match_any); + re_detail::perl_matcher matcher(first, last, m, e, flags | regex_constants::match_any, first); return matcher.find(); } diff --git a/include/boost/regex/v4/regex_token_iterator.hpp b/include/boost/regex/v4/regex_token_iterator.hpp index 7b259cd6..adbb534d 100644 --- a/include/boost/regex/v4/regex_token_iterator.hpp +++ b/include/boost/regex/v4/regex_token_iterator.hpp @@ -53,6 +53,7 @@ class regex_token_iterator_implementation typedef sub_match value_type; match_results what; // current match + BidirectionalIterator base; // start of search area BidirectionalIterator end; // end of search area const regex_type re; // the expression match_flag_type flags; // match flags @@ -97,7 +98,8 @@ public: bool init(BidirectionalIterator first) { N = 0; - if(regex_search(first, end, what, re, flags) == true) + base = first; + if(regex_search(first, end, what, re, flags, base) == true) { N = 0; result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); @@ -134,10 +136,10 @@ public: result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); return true; } - if(what.prefix().first != what[0].second) - flags |= match_prev_avail | regex_constants::match_not_bob; + //if(what.prefix().first != what[0].second) + // flags |= /*match_prev_avail |*/ regex_constants::match_not_bob; BidirectionalIterator last_end(what[0].second); - if(regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags))) + if(regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) { N =0; result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); diff --git a/include/boost/regex/v4/u32regex_iterator.hpp b/include/boost/regex/v4/u32regex_iterator.hpp index 3491ed95..9e49c6f3 100644 --- a/include/boost/regex/v4/u32regex_iterator.hpp +++ b/include/boost/regex/v4/u32regex_iterator.hpp @@ -42,7 +42,7 @@ public: bool init(BidirectionalIterator first) { base = first; - return u32regex_search(first, end, what, re, flags); + return u32regex_search(first, end, what, re, flags, base); } bool compare(const u32regex_iterator_implementation& that) { @@ -53,15 +53,15 @@ public: { return what; } bool next() { - if(what.prefix().first != what[0].second) - flags |= match_prev_avail; + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; BidirectionalIterator next_start = what[0].second; match_flag_type f(flags); if(!what.length()) f |= regex_constants::match_not_initial_null; - if(base != next_start) - f |= regex_constants::match_not_bob; - bool result = u32regex_search(next_start, end, what, re, f); + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = u32regex_search(next_start, end, what, re, f, base); if(result) what.set_base(base); return result; diff --git a/include/boost/regex/v4/u32regex_token_iterator.hpp b/include/boost/regex/v4/u32regex_token_iterator.hpp index f6e949f8..323d0024 100644 --- a/include/boost/regex/v4/u32regex_token_iterator.hpp +++ b/include/boost/regex/v4/u32regex_token_iterator.hpp @@ -50,7 +50,8 @@ class u32regex_token_iterator_implementation match_results what; // current match BidirectionalIterator end; // end of search area - const regex_type re; // the expression + BidirectionalIterator base; // start of search area + const regex_type re; // the expression match_flag_type flags; // match flags value_type result; // the current string result int N; // the current sub-expression being enumerated @@ -93,8 +94,9 @@ public: bool init(BidirectionalIterator first) { + base = first; N = 0; - if(u32regex_search(first, end, what, re, flags) == true) + if(u32regex_search(first, end, what, re, flags, base) == true) { N = 0; result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); @@ -131,10 +133,10 @@ public: result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); return true; } - if(what.prefix().first != what[0].second) - flags |= match_prev_avail | regex_constants::match_not_bob; + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail | regex_constants::match_not_bob; BidirectionalIterator last_end(what[0].second); - if(u32regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags))) + if(u32regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) { N =0; result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); diff --git a/src/icu.cpp b/src/icu.cpp index 8ad9afc7..e06c3176 100644 --- a/src/icu.cpp +++ b/src/icu.cpp @@ -19,6 +19,7 @@ #include #ifdef BOOST_HAS_ICU +#define BOOST_REGEX_ICU_INSTANTIATE #include namespace boost{ @@ -38,11 +39,18 @@ icu_regex_traits_implementation::string_type icu_regex_traits_implementation::do t.push_back(*i++); #endif ::uint8_t result[100]; - ::int32_t len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result)); + ::int32_t len; + if(t.size()) + len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result)); + else + len = pcoll->getSortKey(static_cast(0), static_cast< ::int32_t>(0), result, sizeof(result)); if(std::size_t(len) > sizeof(result)) { scoped_array< ::uint8_t> presult(new ::uint8_t[len+1]); - len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len+1); + if(t.size()) + len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len+1); + else + len = pcoll->getSortKey(static_cast(0), static_cast< ::int32_t>(0), presult.get(), len+1); if((0 == presult[len-1]) && (len > 1)) --len; #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS @@ -398,10 +406,12 @@ icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_ ++i; } } - id = ::boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + if(s.size()) + id = ::boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); if(id >= 0) return masks[id+1]; - result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size()); + if(s.size()) + result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size()); if(result != 0) return result; } diff --git a/test/regress/test_asserts.cpp b/test/regress/test_asserts.cpp index 909f5618..6ec2e450 100644 --- a/test/regress/test_asserts.cpp +++ b/test/regress/test_asserts.cpp @@ -41,13 +41,16 @@ void test_forward_lookahead_asserts() TEST_REGEX_SEARCH("^(?=.*\\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$", perl, "abC3", match_default, make_array(0, 4, -2, -2)); TEST_REGEX_SEARCH("^(?=.*\\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$", perl, "ABCD3", match_default, make_array(-2, -2)); + // bug report test cases: + TEST_REGEX_SEARCH("(?=.{1,10}$).*.", perl, "AAAAA", match_default, make_array(0, 5, -2, -2)); + // lookbehind assertions, added 2004-04-30 TEST_REGEX_SEARCH("/\\*.*(?<=\\*)/", perl, "/**/", match_default, make_array(0, 4, -2, -2)); TEST_REGEX_SEARCH("/\\*.*(?<=\\*)/", perl, "/*****/ ", match_default, make_array(0, 7, -2, -2)); TEST_REGEX_SEARCH("(?<=['\"]).*?(?=['\"])", perl, " 'ac' ", match_default, make_array(2, 4, -2, -2)); TEST_REGEX_SEARCH("(?<=['\"]).*?(?=['\"])", perl, " \"ac\" ", match_default, make_array(2, 4, -2, -2)); TEST_REGEX_SEARCH("(?<=['\"]).*?(?(0), expression.size(), syntax_options); #endif if(r.status()) { @@ -379,7 +382,11 @@ void test_icu(const wchar_t&, const test_invalid_regex_tag&) #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) if(0 == r.assign(expression.begin(), expression.end(), syntax_options | boost::regex_constants::no_except).status()) #else - if(0 == r.assign(&*expression.begin(), expression.size(), syntax_options | boost::regex_constants::no_except).status()) + if(expression.size()) + r.assign(&*expression.begin(), expression.size(), syntax_options | boost::regex_constants::no_except); + else + r.assign(static_cast(0), static_cast(0), syntax_options | boost::regex_constants::no_except); + if(0 == r.status()) #endif { BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); @@ -397,7 +404,10 @@ void test_icu(const wchar_t&, const test_invalid_regex_tag&) #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) r.assign(expression.begin(), expression.end(), syntax_options); #else - r.assign(&*expression.begin(), expression.size(), syntax_options); + if(expression.size()) + r.assign(&*expression.begin(), expression.size(), syntax_options); + else + r.assign(static_cast(0), static_cast(0), syntax_options); #endif #ifdef BOOST_NO_EXCEPTIONS if(r.status()) @@ -492,7 +502,10 @@ void test_icu(const wchar_t&, const test_regex_replace_tag&) #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) r.assign(expression.begin(), expression.end(), syntax_options); #else - r.assign(&*expression.begin(), expression.size(), syntax_options); + if(expression.size()) + r.assign(&*expression.begin(), expression.size(), syntax_options); + else + r.assign(static_cast(0), static_cast(0), syntax_options); #endif if(r.status()) { @@ -552,10 +565,13 @@ void test_icu(const wchar_t&, const test_regex_replace_tag&) // Now with UnicodeString: // UnicodeString expression16u, text16u, format16u, result16u, found16u; - expression16u.setTo(&*expression16.begin(), expression16.size()); - text16u.setTo(&*text16.begin(), text16.size()); + if(expression16.size()) + expression16u.setTo(&*expression16.begin(), expression16.size()); + if(text16.size()) + text16u.setTo(&*text16.begin(), text16.size()); format16u.setTo(&*format16.begin(), format16.size()-1); - result16u.setTo(&*result16.begin(), result16.size()); + if(result16.size()) + result16u.setTo(&*result16.begin(), result16.size()); r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options); found16u = boost::u32regex_replace(text16u, r, format16u, opts); if(result16u != found16u) @@ -589,10 +605,13 @@ void test_icu(const wchar_t&, const test_regex_replace_tag&) // Now with std::string and UTF-8: // std::string expression8s, text8s, format8s, result8s, found8s; - expression8s.assign(&*expression8.begin(), expression8.size()); - text8s.assign(&*text8.begin(), text8.size()); + if(expression8.size()) + expression8s.assign(&*expression8.begin(), expression8.size()); + if(text8.size()) + text8s.assign(&*text8.begin(), text8.size()); format8s.assign(&*format8.begin(), format8.size()-1); - result8s.assign(&*result8.begin(), result8.size()); + if(result8.size()) + result8s.assign(&*result8.begin(), result8.size()); r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options); found8s = boost::u32regex_replace(text8s, r, format8s, opts); if(result8s != found8s) diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 1ebd64dc..82a0517f 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -15,6 +15,8 @@ #pragma warning(disable:4127) #endif +void test_options3(); + void test_independent_subs() { using namespace boost::regex_constants; @@ -258,6 +260,13 @@ void test_options2() TEST_REGEX_SEARCH("(?<=a(?i)b)(\\w\\w)c", perl, "ABxxc", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("(?<=a(?i)b)(\\w\\w)c", perl, "abxxC", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("(?<=^.{4})(?:bar|cat)", perl, "fooocat", match_default, make_array(4, 7, -2, -2)); + TEST_REGEX_SEARCH("(?<=^.{4})(?:bar|cat)", perl, "foocat", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("(?<=^a{4})(?:bar|cat)", perl, "aaaacat", match_default, make_array(4, 7, -2, -2)); + TEST_REGEX_SEARCH("(?<=^a{4})(?:bar|cat)", perl, "aaacat", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("(?<=^[[:alpha:]]{4})(?:bar|cat)", perl, "aaaacat", match_default, make_array(4, 7, -2, -2)); + TEST_REGEX_SEARCH("(?<=^[[:alpha:]]{4})(?:bar|cat)", perl, "aaacat", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "abxyZZ", match_default, make_array(4, 6, -2, -2)); TEST_REGEX_SEARCH("(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "abXyZZ", match_default, make_array(4, 6, -2, -2)); TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "ZZZ", match_default, make_array(0, 3, -2, -2)); @@ -315,6 +324,12 @@ void test_options2() TEST_REGEX_SEARCH("(?s).", perl, "\n", match_default|match_not_dot_newline, make_array(0, 1, -2, -2)); TEST_REGEX_SEARCH("(?-s).", perl, "\n", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("(?-s).", perl, "\n", match_default|match_not_dot_newline, make_array(-2, -2)); + test_options3(); +} + +void test_options3() +{ + using namespace boost::regex_constants; TEST_REGEX_SEARCH(".+", perl, " \n ", match_default, make_array(0, 5, -2, -2)); TEST_REGEX_SEARCH(".+", perl, " \n ", match_default|match_not_dot_newline, make_array(0, 2, -2, 3, 5, -2, -2)); diff --git a/test/regress/test_simple_repeats.cpp b/test/regress/test_simple_repeats.cpp index e095ac20..684d2d99 100644 --- a/test/regress/test_simple_repeats.cpp +++ b/test/regress/test_simple_repeats.cpp @@ -15,6 +15,8 @@ #pragma warning(disable:4127) #endif +void test_simple_repeats2(); + void test_simple_repeats() { using namespace boost::regex_constants; @@ -111,6 +113,11 @@ void test_simple_repeats() TEST_REGEX_SEARCH("^a{0,1}?$", perl, "aaaaa", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("^(?:a){0,1}?$", perl, "aaaaa", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("^a(?:bc)?", perl, "abcbc", match_any|match_all, make_array(-2, -2)); +} + +void test_simple_repeats2() +{ + using namespace boost::regex_constants; TEST_INVALID_REGEX("a{}", perl); TEST_INVALID_REGEX("a{", perl);