From b0027d4b86e1fb745d0d60ae29a7287d3a595eaa Mon Sep 17 00:00:00 2001 From: John Maddock Date: Tue, 27 Jul 2004 12:45:32 +0000 Subject: [PATCH] Added tests for locales. [SVN r24113] --- .../boost/regex/v4/basic_regex_creator.hpp | 18 +- include/boost/regex/v4/basic_regex_parser.hpp | 19 +- include/boost/regex/v4/cpp_regex_traits.hpp | 95 +- src/c_regex_traits.cpp | 7 +- src/cpp_regex_traits.cpp | 4 + src/wc_regex_traits.cpp | 3 +- test/Jamfile | 28 +- test/regress/basic_tests.cpp | 1862 ----------------- test/regress/info.hpp | 31 +- test/regress/main.cpp | 2 + test/regress/test.hpp | 25 +- test/regress/test_alt.cpp | 42 + test/regress/test_anchors.cpp | 63 + test/regress/test_asserts.cpp | 65 + test/regress/test_backrefs.cpp | 39 + test/regress/test_escapes.cpp | 141 ++ test/regress/test_grep.cpp | 50 + test/regress/test_locale.cpp | 101 + test/regress/test_locale.hpp | 62 + test/regress/test_non_greedy_repeats.cpp | 38 + test/regress/test_not_regex.hpp | 17 + test/regress/test_perl_ex.cpp | 618 ++++++ test/regress/test_replace.cpp | 100 + test/regress/test_sets.cpp | 230 ++ test/regress/test_simple_repeats.cpp | 341 +++ test/regress/test_tricky_cases.cpp | 292 +++ 26 files changed, 2373 insertions(+), 1920 deletions(-) create mode 100644 test/regress/test_alt.cpp create mode 100644 test/regress/test_anchors.cpp create mode 100644 test/regress/test_asserts.cpp create mode 100644 test/regress/test_backrefs.cpp create mode 100644 test/regress/test_escapes.cpp create mode 100644 test/regress/test_grep.cpp create mode 100644 test/regress/test_locale.cpp create mode 100644 test/regress/test_locale.hpp create mode 100644 test/regress/test_non_greedy_repeats.cpp create mode 100644 test/regress/test_perl_ex.cpp create mode 100644 test/regress/test_replace.cpp create mode 100644 test/regress/test_sets.cpp create mode 100644 test/regress/test_simple_repeats.cpp create mode 100644 test/regress/test_tricky_cases.cpp diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index d57f47e8..9683cdfd 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -429,9 +429,11 @@ re_syntax_base* basic_regex_creator::append_set( return 0; } charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) ); - std::memcpy(p, &*s1.begin(), sizeof(charT) * (s1.size() + 1)); + std::copy(s1.begin(), s1.end(), p); + p[s1.size()] = charT(0); p += s1.size() + 1; - std::memcpy(p, &*s2.begin(), sizeof(charT) * (s2.size() + 1)); + std::copy(s2.begin(), s2.end(), p); + p[s2.size()] = charT(0); } // // now process the equivalence classes: @@ -451,7 +453,8 @@ re_syntax_base* basic_regex_creator::append_set( if(s.empty()) return 0; // invalid or unsupported equivalence class charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) ); - std::memcpy(p, &*s.begin(), sizeof(charT) * (s.size() + 1)); + std::copy(s.begin(), s.end(), p); + p[s.size()] = charT(0); ++first; } // @@ -691,9 +694,12 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) // Oops error: if(0 == this->m_pdata->m_status) // update the error code if not already set this->m_pdata->m_status = boost::regex_constants::error_brack; - std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_brack); - boost::regex_error e(message, boost::regex_constants::error_brack, 0); - e.raise(); + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_brack); + boost::regex_error e(message, boost::regex_constants::error_brack, 0); + e.raise(); + } } // fall through: default: diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 38f9210d..824828d0 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -81,17 +81,17 @@ basic_regex_parser::basic_regex_parser(regex_data* template void basic_regex_parser::parse(const charT* p1, const charT* p2, unsigned flags) { + // pass flags on to base class: + this->init(flags); + // set up pointers: + m_position = m_base = p1; + m_end = p2; // empty strings are errors: if(p1 == p2) { fail(regex_constants::error_empty, 0); return; } - // pass flags on to base class: - this->init(flags); - // set up pointers: - m_position = m_base = p1; - m_end = p2; // select which parser to use: switch(flags & regbase::main_option_type) { @@ -134,8 +134,13 @@ void basic_regex_parser::fail(regex_constants::error_type error_c // get the error message: std::string message = this->m_pdata->m_ptraits->error_string(error_code); // and raise the exception, this will do nothing if exceptions are disabled: - boost::regex_error e(message, error_code, position); - e.raise(); +#ifndef BOOST_NO_EXCEPTIONS + if(0 == (this->flags() & regex_constants::no_except)) + { + boost::regex_error e(message, error_code, position); + e.raise(); + } +#endif } template diff --git a/include/boost/regex/v4/cpp_regex_traits.hpp b/include/boost/regex/v4/cpp_regex_traits.hpp index c8b1a975..22107795 100644 --- a/include/boost/regex/v4/cpp_regex_traits.hpp +++ b/include/boost/regex/v4/cpp_regex_traits.hpp @@ -158,7 +158,9 @@ struct cpp_regex_traits_base std::locale m_locale; std::ctype const* m_pctype; +#ifndef BOOST_NO_STD_MESSAGES std::messages const* m_pmessages; +#endif std::collate const* m_pcollate; }; @@ -168,7 +170,9 @@ std::locale cpp_regex_traits_base::imbue(const std::locale& l) std::locale result(m_locale); m_locale = l; m_pctype = &BOOST_USE_FACET(std::ctype, l); +#ifndef BOOST_NO_STD_MESSAGES m_pmessages = &BOOST_USE_FACET(std::messages, l); +#endif m_pcollate = &BOOST_USE_FACET(std::collate, l); return result; } @@ -215,6 +219,7 @@ cpp_regex_traits_char_layer::cpp_regex_traits_char_layer(const std::local { // we need to start by initialising our syntax map so we know which // character is used for which purpose: +#ifndef BOOST_NO_STD_MESSAGES #ifndef __IBMCPP__ typename std::messages::catalog cat = static_cast::catalog>(-1); #else @@ -257,6 +262,7 @@ cpp_regex_traits_char_layer::cpp_regex_traits_char_layer(const std::local } else { +#endif for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) { const char* ptr = get_default_syntax(i); @@ -266,7 +272,9 @@ cpp_regex_traits_char_layer::cpp_regex_traits_char_layer(const std::local ++ptr; } } +#ifndef BOOST_NO_STD_MESSAGES } +#endif } template @@ -425,39 +433,48 @@ typename cpp_regex_traits_implementation::string_type { string_type result; // - // What we do here depends upon the format of the sort key returned by - // sort key returned by this->transform: + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... // - switch(m_collate_type) - { - case sort_C: - case sort_unknown: - // the best we can do is translate to lower case, then get a regular sort key: + try{ + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) { - result.assign(p1, p2); - this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size()); - result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size()); - break; - } - case sort_fixed: - { - // get a regular sort key, and then truncate it: - result.assign(this->m_pcollate->transform(p1, p2)); - result.erase(this->m_collate_delim); - break; - } - case sort_delim: - // get a regular sort key, and then truncate everything after the delim: - result.assign(this->m_pcollate->transform(p1, p2)); - std::size_t i; - for(i = 0; i < result.size(); ++i) + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: { - if(result[i] == m_collate_delim) - break; + result.assign(p1, p2); + this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size()); + result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size()); + break; } - result.erase(i); - break; - } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->m_pcollate->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->m_pcollate->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } + }catch(...){} + while(result.size() && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); return result; } @@ -509,6 +526,7 @@ template cpp_regex_traits_implementation::cpp_regex_traits_implementation(const std::locale& l) : cpp_regex_traits_char_layer(l), m_is(&m_sbuf) { +#ifndef BOOST_NO_STD_MESSAGES #ifndef __IBMCPP__ typename std::messages::catalog cat = static_cast::catalog>(-1); #else @@ -582,6 +600,7 @@ cpp_regex_traits_implementation::cpp_regex_traits_implementation(const st this->m_custom_class_names[s] = masks[j]; } } +#endif // // get the collation format used by m_pcollate: // @@ -688,7 +707,23 @@ public: } string_type transform(const charT* p1, const charT* p2) const { - return m_pimpl->m_pcollate->transform(p1, p2); + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // + string_type result; + try{ + result = m_pimpl->m_pcollate->transform(p1, p2); + // + // some implementations append unnecessary trailing \0's: + while(result.size() && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); + } + catch(...) + { + } + return result; } string_type transform_primary(const charT* p1, const charT* p2) const { diff --git a/src/c_regex_traits.cpp b/src/c_regex_traits.cpp index 4ccb985f..a50d521d 100644 --- a/src/c_regex_traits.cpp +++ b/src/c_regex_traits.cpp @@ -47,13 +47,14 @@ namespace boost{ c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const char* p1, const char* p2) { - std::size_t s = 10; - std::string result(s, ' '); + std::string result(10, ' '); + std::size_t s = result.size(); std::size_t r; std::string src(p1, p2); - while(s < (r = std::strxfrm(&*result.begin(), src.c_str(), result.size()))) + while(s < (r = std::strxfrm(&*result.begin(), src.c_str(), s))) { result.append(r - s + 3, ' '); + s = result.size(); } result.erase(r); return result; diff --git a/src/cpp_regex_traits.cpp b/src/cpp_regex_traits.cpp index d7566e3a..98840c6a 100644 --- a/src/cpp_regex_traits.cpp +++ b/src/cpp_regex_traits.cpp @@ -33,6 +33,7 @@ void cpp_regex_traits_char_layer::init() // we need to start by initialising our syntax map so we know which // character is used for which purpose: std::memset(m_char_map, 0, sizeof(m_char_map)); +#ifndef BOOST_NO_STD_MESSAGES #ifndef __IBMCPP__ std::messages::catalog cat = static_cast::catalog>(-1); #else @@ -75,6 +76,7 @@ void cpp_regex_traits_char_layer::init() } else { +#endif for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) { const char* ptr = get_default_syntax(i); @@ -84,7 +86,9 @@ void cpp_regex_traits_char_layer::init() ++ptr; } } +#ifndef BOOST_NO_STD_MESSAGES } +#endif // // finish off by calculating our escape types: // diff --git a/src/wc_regex_traits.cpp b/src/wc_regex_traits.cpp index 49bc0520..079dcf9b 100644 --- a/src/wc_regex_traits.cpp +++ b/src/wc_regex_traits.cpp @@ -43,9 +43,10 @@ c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::t std::size_t s = 10; std::wstring src(p1, p2); std::wstring result(s, L' '); - while(s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), result.size()))) + while(s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s))) { result.append(r - s + 3, L' '); + s = result.size(); } result.erase(r); return result; diff --git a/test/Jamfile b/test/Jamfile index 5432cc1c..7a16e13f 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -6,6 +6,25 @@ subproject libs/regex/test ; import testing ; subinclude libs/regex/test/captures ; +R_SOURCE = +basic_tests.cpp +main.cpp +test_alt.cpp +test_anchors.cpp +test_asserts.cpp +test_backrefs.cpp +test_deprecated.cpp +test_escapes.cpp +test_grep.cpp +test_locale.cpp +test_non_greedy_repeats.cpp +test_perl_ex.cpp +test_replace.cpp +test_sets.cpp +test_simple_repeats.cpp +test_tricky_cases.cpp +; + # # this template defines the options common to # all regex tests: @@ -22,9 +41,7 @@ template test # template regression :