From 03694d72ae6f448eed8c7e89b7f545941f3ce189 Mon Sep 17 00:00:00 2001 From: John Maddock Date: Wed, 12 May 2004 12:13:49 +0000 Subject: [PATCH] Added tests for deprecated and POSIX functions. [SVN r22805] --- include/boost/regex/pattern_except.hpp | 4 +- include/boost/regex/v4/basic_regex_parser.hpp | 150 ++++----- include/boost/regex/v4/cregex.hpp | 33 ++ include/boost/regex/v4/error_type.hpp | 72 ++--- .../boost/regex/v4/perl_matcher_common.hpp | 2 +- .../regex/v4/perl_matcher_non_recursive.hpp | 4 +- .../boost/regex/v4/perl_matcher_recursive.hpp | 2 +- src/posix_api.cpp | 10 +- src/regex_traits_defaults.cpp | 4 +- src/wide_posix_api.cpp | 10 +- test/regress/basic_tests.cpp | 3 + test/regress/test.hpp | 5 +- test/regress/test_deprecated.cpp | 285 ++++++++++++++++++ test/regress/test_deprecated.hpp | 18 ++ 14 files changed, 466 insertions(+), 136 deletions(-) create mode 100644 test/regress/test_deprecated.cpp create mode 100644 test/regress/test_deprecated.hpp diff --git a/include/boost/regex/pattern_except.hpp b/include/boost/regex/pattern_except.hpp index 94850522..c689c194 100644 --- a/include/boost/regex/pattern_except.hpp +++ b/include/boost/regex/pattern_except.hpp @@ -49,9 +49,9 @@ public: explicit bad_expression(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos) : bad_pattern(s), m_error_code(err), m_position(pos) {} ~bad_expression() throw(); - regex_constants::error_type errorno() + regex_constants::error_type errorno()const { return m_error_code; } - std::ptrdiff_t position() + std::ptrdiff_t position()const { return m_position; } private: regex_constants::error_type m_error_code; diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index e407669b..522ac428 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -83,7 +83,7 @@ void basic_regex_parser::parse(const charT* p1, const charT* p2, { // empty strings are errors: if(p1 == p2) - fail(REG_EMPTY, 0); + fail(regex_constants::error_empty, 0); // pass flags on to base class: this->init(flags); // set up pointers: @@ -198,24 +198,24 @@ bool basic_regex_parser::parse_extended() break; case regex_constants::syntax_star: if(m_position == this->m_base) - fail(REG_BADRPT, 0); + fail(regex_constants::error_badrepeat, 0); ++m_position; return parse_repeat(); case regex_constants::syntax_question: if(m_position == this->m_base) - fail(REG_BADRPT, 0); + fail(regex_constants::error_badrepeat, 0); ++m_position; return parse_repeat(0,1); case regex_constants::syntax_plus: if(m_position == this->m_base) - fail(REG_BADRPT, 0); + fail(regex_constants::error_badrepeat, 0); ++m_position; return parse_repeat(1); case regex_constants::syntax_open_brace: ++m_position; return parse_repeat_range(false); case regex_constants::syntax_close_brace: - fail(REG_EBRACE, this->m_position - this->m_end); + fail(regex_constants::error_brace, this->m_position - this->m_end); // we don't ever get here, because we will have thrown: BOOST_ASSERT(0); result = false; @@ -269,7 +269,7 @@ bool basic_regex_parser::parse_open_paren() // skip the '(' and error check: // if(++m_position == m_end) - fail(REG_EPAREN, m_position - m_base); + fail(regex_constants::error_paren, m_position - m_base); // // begin by checking for a perl-style (?...) extension: // @@ -320,7 +320,7 @@ bool basic_regex_parser::parse_open_paren() // we either have a ')' or we have run out of characters prematurely: // if(m_position == m_end) - this->fail(REG_EPAREN, std::distance(m_base, m_end)); + this->fail(regex_constants::error_paren, std::distance(m_base, m_end)); BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); ++m_position; // @@ -377,7 +377,7 @@ bool basic_regex_parser::parse_basic_escape() case regex_constants::syntax_close_brace: if(this->flags() & regbase::no_intervals) return parse_literal(); - fail(REG_EBRACE, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base); result = false; break; case regex_constants::syntax_or: @@ -416,7 +416,7 @@ bool basic_regex_parser::parse_extended_escape() char_set.negate(); char_set.add_class(m); if(0 == this->append_set(char_set)) - fail(REG_ERANGE, m_position - m_base); + fail(regex_constants::error_range, m_position - m_base); ++m_position; return true; } @@ -511,7 +511,7 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ } if(0 == this->m_last_state) { - fail(REG_BADRPT, std::distance(m_base, m_position)); + fail(regex_constants::error_badrepeat, std::distance(m_base, m_position)); } if(this->m_last_state->type == syntax_element_endmark) { @@ -548,7 +548,7 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ case syntax_element_restart_continue: case syntax_element_jump: // can't legally repeat any of the above: - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); default: // do nothing... break; @@ -588,16 +588,16 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic) ++m_position; // fail if at end: if(this->m_position == this->m_end) - fail(REG_EBRACE, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base); // get min: v = this->m_traits.toi(m_position, m_end, 10); // skip whitespace: while((m_position != m_end) && this->m_traits.is_class(*m_position, this->m_mask_space)) ++m_position; if(v < 0) - fail(REG_BADBR, this->m_position - this->m_base); + fail(regex_constants::error_badbrace, this->m_position - this->m_base); else if(this->m_position == this->m_end) - fail(REG_EBRACE, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base); min = v; // see if we have a comma: if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) @@ -608,7 +608,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic) while((m_position != m_end) && this->m_traits.is_class(*m_position, this->m_mask_space)) ++m_position; if(this->m_position == this->m_end) - fail(REG_EBRACE, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base); // get the value if any: v = this->m_traits.toi(m_position, m_end, 10); max = (v >= 0) ? v : (std::numeric_limits::max)(); @@ -623,29 +623,29 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic) ++m_position; // OK now check trailing }: if(this->m_position == this->m_end) - fail(REG_EBRACE, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base); if(isbasic) { if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape) { ++m_position; if(this->m_position == this->m_end) - fail(REG_EBRACE, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base); } else { - fail(REG_BADBR, this->m_position - this->m_base); + fail(regex_constants::error_badbrace, this->m_position - this->m_base); } } if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace) ++m_position; else - fail(REG_BADBR, this->m_position - this->m_base); + fail(regex_constants::error_badbrace, this->m_position - this->m_base); // // finally go and add the repeat, unless error: // if(min > max) - fail(REG_ERANGE, this->m_position - this->m_base); + fail(regex_constants::error_range, this->m_position - this->m_base); return parse_repeat(min, max); } @@ -657,7 +657,7 @@ bool basic_regex_parser::parse_alt() // or if the last state was a '(' then error: // if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark)) - fail(REG_EMPTY, this->m_position - this->m_base); + fail(regex_constants::error_empty, this->m_position - this->m_base); ++m_position; // // we need to append a trailing jump: @@ -694,7 +694,7 @@ bool basic_regex_parser::parse_alt() // if we didn't actually add any trailing states then that's an error: // if(this->m_alt_insert_point == static_cast(this->m_pdata->m_data.size())) - fail(REG_EMPTY, this->m_position - this->m_base); + fail(regex_constants::error_empty, this->m_position - this->m_base); // // fix up the jump we added to point to the end of the states // that we've just added: @@ -711,7 +711,7 @@ bool basic_regex_parser::parse_set() { ++m_position; if(m_position == m_end) - fail(REG_EBRACK, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base); basic_char_set char_set; const charT* base = m_position; // where the '[' was @@ -741,7 +741,7 @@ bool basic_regex_parser::parse_set() { ++m_position; if(0 == this->append_set(char_set)) - fail(REG_ERANGE, m_position - m_base); + fail(regex_constants::error_range, m_position - m_base); } return true; case regex_constants::syntax_open_set: @@ -772,7 +772,7 @@ bool basic_regex_parser::parse_set() char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); if(m) { - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); } } // not a character class, just a regular escape: @@ -797,7 +797,7 @@ bool basic_regex_parser::parse_inner_set(basic_char_setm_traits.syntax_type(*m_position)) { case regex_constants::syntax_dot: @@ -819,20 +819,20 @@ bool basic_regex_parser::parse_inner_set(basic_char_setm_traits.syntax_type(*m_position) != regex_constants::syntax_colon)) ++m_position; const charT* name_last = m_position; if(m_end == m_position) - fail(REG_EBRACK, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base); if((m_end == ++m_position) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) - fail(REG_EBRACK, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base); typedef typename traits::char_class_type mask_type; mask_type m = this->m_traits.lookup_classname(name_first, name_last); if(0 == m) @@ -861,7 +861,7 @@ bool basic_regex_parser::parse_inner_set(basic_char_set::parse_inner_set(basic_char_setm_traits.syntax_type(*m_position) != regex_constants::syntax_equal)) ++m_position; const charT* name_last = m_position; if(m_end == m_position) - fail(REG_EBRACK, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base); if((m_end == ++m_position) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) - fail(REG_EBRACK, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base); string_type m = this->m_traits.lookup_collatename(name_first, name_last); if((0 == m.size()) || (m.size() > 2)) - fail(REG_ECOLLATE, name_first - m_base); + fail(regex_constants::error_collate, name_first - m_base); digraph d; d.first = m[0]; if(m.size() > 1) @@ -911,18 +911,18 @@ void basic_regex_parser::parse_set_literal(basic_char_set start_range = get_next_set_literal(char_set); if(m_end == m_position) - fail(REG_EBRACK, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base); if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) { // we have a range: if(m_end == ++m_position) - fail(REG_EBRACK, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base); if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set) { digraph end_range = get_next_set_literal(char_set); char_set.add_range(start_range, end_range); if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) - fail(REG_ERANGE, m_position - m_base); + fail(regex_constants::error_range, m_position - m_base); return; } --m_position; @@ -942,7 +942,7 @@ digraph basic_regex_parser::get_next_set_literal(basic_cha { // see if we are at the end of the set: if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) - fail(REG_ERANGE, m_position - m_base); + fail(regex_constants::error_range, m_position - m_base); --m_position; } result.first = *m_position++; @@ -960,7 +960,7 @@ digraph basic_regex_parser::get_next_set_literal(basic_cha case regex_constants::syntax_open_set: { if(m_end == ++m_position) - fail(REG_ECOLLATE, m_position - m_base); + fail(regex_constants::error_collate, m_position - m_base); if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot) { --m_position; @@ -969,24 +969,24 @@ digraph basic_regex_parser::get_next_set_literal(basic_cha return result; } if(m_end == ++m_position) - fail(REG_ECOLLATE, m_position - m_base); + fail(regex_constants::error_collate, m_position - m_base); const charT* name_first = m_position; // skip at least one character, then find the matching ':]' if(m_end == ++m_position) - fail(REG_ECOLLATE, name_first - m_base); + fail(regex_constants::error_collate, name_first - m_base); while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)) ++m_position; const charT* name_last = m_position; if(m_end == m_position) - fail(REG_ECOLLATE, name_first - m_base); + fail(regex_constants::error_collate, name_first - m_base); if((m_end == ++m_position) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) - fail(REG_ECOLLATE, name_first - m_base); + fail(regex_constants::error_collate, name_first - m_base); ++m_position; string_type s = this->m_traits.lookup_collatename(name_first, name_last); if(s.empty() || (s.size() > 2)) - fail(REG_ECOLLATE, name_first - m_base); + fail(regex_constants::error_collate, name_first - m_base); result.first = s[0]; if(s.size() > 1) result.second = s[1]; @@ -1005,7 +1005,7 @@ charT basic_regex_parser::unescape_character() { charT result(0); if(m_position == m_end) - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::escape_type_control_a: @@ -1036,13 +1036,13 @@ charT basic_regex_parser::unescape_character() ++m_position; if(m_position == m_end) { - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); return result; } if((*m_position < charT('@')) || (*m_position > charT(125)) ) { - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); return result; } result = static_cast(*m_position - charT('@')); @@ -1051,7 +1051,7 @@ charT basic_regex_parser::unescape_character() ++m_position; if(m_position == m_end) { - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); break; } // maybe have \x{ddd} @@ -1060,7 +1060,7 @@ charT basic_regex_parser::unescape_character() ++m_position; if(m_position == m_end) { - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); break; } int i = this->m_traits.toi(m_position, m_end, 16); @@ -1069,7 +1069,7 @@ charT basic_regex_parser::unescape_character() || (i > (std::numeric_limits::max)()) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) { - fail(REG_BADBR, m_position - m_base); + fail(regex_constants::error_badbrace, m_position - m_base); } ++m_position; result = charT(i); @@ -1081,7 +1081,7 @@ charT basic_regex_parser::unescape_character() if((i < 0) || (i >> (sizeof(charT) * CHAR_BIT))) { - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); } result = charT(i); } @@ -1093,7 +1093,7 @@ charT basic_regex_parser::unescape_character() std::ptrdiff_t len = (std::min)(std::distance(m_position, m_end), static_cast(4)); int val = this->m_traits.toi(m_position, m_position + len, 8); if(val < 0) - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); return static_cast(val); } default: @@ -1109,7 +1109,7 @@ bool basic_regex_parser::parse_backref() { if(m_position == m_end) { - fail(REG_EESCAPE, m_position - m_end); + fail(regex_constants::error_escape, m_position - m_end); } int i = this->m_traits.toi(m_position, m_position + 1, 10); if((i > 0) && (this->m_backrefs & (1u << (i-1)))) @@ -1125,7 +1125,7 @@ bool basic_regex_parser::parse_backref() this->append_literal(c); } else - fail(REG_ESUBREG, m_position - m_end); + fail(regex_constants::error_subreg, m_position - m_end); return true; } @@ -1155,7 +1155,7 @@ bool basic_regex_parser::parse_QE() } if(++m_position == m_end) // skip the escape { - fail(REG_EESCAPE, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base); return false; } // check to see if it's a \E: @@ -1185,7 +1185,7 @@ template bool basic_regex_parser::parse_perl_extension() { if(++m_position == m_end) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); // // treat comments as a special case, as these // are the only ones that don't start with a leading @@ -1244,14 +1244,14 @@ bool basic_regex_parser::parse_perl_extension() { // a lookbehind assertion: if(++m_position == m_end) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position); if(t == regex_constants::syntax_not) pb->index = markid = -2; else if(t == regex_constants::syntax_equal) pb->index = markid = -1; else - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); ++m_position; jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); this->append_state(syntax_element_backstep, sizeof(re_brace)); @@ -1274,38 +1274,38 @@ bool basic_regex_parser::parse_perl_extension() // a conditional expression: pb->index = markid = -4; if(++m_position == m_end) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); int v = this->m_traits.toi(m_position, m_end, 10); if(v > 0) { re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); br->index = v; if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); if(++m_position == m_end) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); } else { // verify that we have a lookahead or lookbehind assert: if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); if(++m_position == m_end) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word) { if(++m_position == m_end) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); m_position -= 3; } else { if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); m_position -= 2; } } @@ -1333,7 +1333,7 @@ bool basic_regex_parser::parse_perl_extension() ++m_position; } else - fail(REG_BADRPT, m_position - m_base); + fail(regex_constants::error_badrepeat, m_position - m_base); // finally append a case change state if we need it: if(m_has_case_change) @@ -1353,7 +1353,7 @@ bool basic_regex_parser::parse_perl_extension() // we either have a ')' or we have run out of characters prematurely: // if(m_position == m_end) - this->fail(REG_EPAREN, std::distance(m_base, m_end)); + this->fail(regex_constants::error_paren, std::distance(m_base, m_end)); BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); ++m_position; // @@ -1381,7 +1381,7 @@ bool basic_regex_parser::parse_perl_extension() if(this->m_last_state == jmp) { // Oops... we didn't have anything inside the assertion: - fail(REG_EMPTY, m_position - m_base); + fail(regex_constants::error_empty, m_position - m_base); } } // @@ -1397,7 +1397,7 @@ bool basic_regex_parser::parse_perl_extension() alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt); } else if(this->getaddress(static_cast(b)->alt.i, b)->type == syntax_element_alt) - fail(REG_BADPAT, m_position - m_base); + fail(regex_constants::error_bad_pattern, m_position - m_base); } // // append closing parenthesis state: @@ -1444,14 +1444,14 @@ regex_constants::syntax_option_type basic_regex_parser::parse_opt continue; } if(++m_position == m_end) - fail(REG_EPAREN, m_position - m_base); + fail(regex_constants::error_paren, m_position - m_base); } while(!breakout); if(*m_position == '-') { if(++m_position == m_end) - fail(REG_EPAREN, m_position - m_base); + fail(regex_constants::error_paren, m_position - m_base); do { switch(*m_position) @@ -1474,7 +1474,7 @@ regex_constants::syntax_option_type basic_regex_parser::parse_opt continue; } if(++m_position == m_end) - fail(REG_EPAREN, m_position - m_base); + fail(regex_constants::error_paren, m_position - m_base); } while(!breakout); } diff --git a/include/boost/regex/v4/cregex.hpp b/include/boost/regex/v4/cregex.hpp index c82856e7..07bfeb06 100644 --- a/include/boost/regex/v4/cregex.hpp +++ b/include/boost/regex/v4/cregex.hpp @@ -111,6 +111,39 @@ typedef enum{ REG_STARTEND = 00004 } reg_exec_flags; +// +// POSIX error codes: +// +typedef unsigned reg_error_t; +typedef reg_error_t reg_errcode_t; // backwards compatibility + +static const reg_error_t REG_NOERROR = 0; /* Success. */ +static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ +static const reg_error_t REG_BADPAT = 2; /* Invalid pattern. */ +static const reg_error_t REG_ECOLLATE = 3; /* Undefined collating element. */ +static const reg_error_t REG_ECTYPE = 4; /* Invalid character class name. */ +static const reg_error_t REG_EESCAPE = 5; /* Trailing backslash. */ +static const reg_error_t REG_ESUBREG = 6; /* Invalid back reference. */ +static const reg_error_t REG_EBRACK = 7; /* Unmatched left bracket. */ +static const reg_error_t REG_EPAREN = 8; /* Parenthesis imbalance. */ +static const reg_error_t REG_EBRACE = 9; /* Unmatched \{. */ +static const reg_error_t REG_BADBR = 10; /* Invalid contents of \{\}. */ +static const reg_error_t REG_ERANGE = 11; /* Invalid range end. */ +static const reg_error_t REG_ESPACE = 12; /* Ran out of memory. */ +static const reg_error_t REG_BADRPT = 13; /* No preceding re for repetition op. */ +static const reg_error_t REG_EEND = 14; /* unexpected end of expression */ +static const reg_error_t REG_ESIZE = 15; /* expression too big */ +static const reg_error_t REG_ERPAREN = 8; /* = REG_EPAREN : unmatched right parenthesis */ +static const reg_error_t REG_EMPTY = 17; /* empty expression */ +static const reg_error_t REG_E_MEMORY = 15; /* = REG_ESIZE : out of memory */ +static const reg_error_t REG_ECOMPLEXITY = 18; /* complexity too high */ +static const reg_error_t REG_ESTACK = 19; /* out of stack space */ +static const reg_error_t REG_E_UNKNOWN = 20; /* unknown error */ +static const reg_error_t REG_ENOSYS = 20; /* = REG_E_UNKNOWN : Reserved. */ + BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA*, const char*, int); BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorA(int, const regex_tA*, char*, regsize_t); BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA*, const char*, regsize_t, regmatch_t*, int); diff --git a/include/boost/regex/v4/error_type.hpp b/include/boost/regex/v4/error_type.hpp index b89ff01c..b8d6b35f 100644 --- a/include/boost/regex/v4/error_type.hpp +++ b/include/boost/regex/v4/error_type.hpp @@ -27,60 +27,32 @@ namespace boost{ #endif -// -// start with the POSIX API versions of these: -// -typedef unsigned reg_error_t; -typedef reg_error_t reg_errcode_t; // backwards compatibility - -static const reg_error_t REG_NOERROR = 0; /* Success. */ -static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */ - - /* POSIX regcomp return error codes. (In the order listed in the - standard.) */ -static const reg_error_t REG_BADPAT = 2; /* Invalid pattern. */ -static const reg_error_t REG_ECOLLATE = 3; /* Undefined collating element. */ -static const reg_error_t REG_ECTYPE = 4; /* Invalid character class name. */ -static const reg_error_t REG_EESCAPE = 5; /* Trailing backslash. */ -static const reg_error_t REG_ESUBREG = 6; /* Invalid back reference. */ -static const reg_error_t REG_EBRACK = 7; /* Unmatched left bracket. */ -static const reg_error_t REG_EPAREN = 8; /* Parenthesis imbalance. */ -static const reg_error_t REG_EBRACE = 9; /* Unmatched \{. */ -static const reg_error_t REG_BADBR = 10; /* Invalid contents of \{\}. */ -static const reg_error_t REG_ERANGE = 11; /* Invalid range end. */ -static const reg_error_t REG_ESPACE = 12; /* Ran out of memory. */ -static const reg_error_t REG_BADRPT = 13; /* No preceding re for repetition op. */ -static const reg_error_t REG_EEND = 14; /* unexpected end of expression */ -static const reg_error_t REG_ESIZE = 15; /* expression too big */ -static const reg_error_t REG_ERPAREN = 8; /* = REG_EPAREN : unmatched right parenthesis */ -static const reg_error_t REG_EMPTY = 17; /* empty expression */ -static const reg_error_t REG_E_MEMORY = 15; /* = REG_ESIZE : out of memory */ -static const reg_error_t REG_ECOMPLEXITY = 18; /* complexity too high */ -static const reg_error_t REG_ESTACK = 19; /* out of stack space */ -static const reg_error_t REG_E_UNKNOWN = 20; /* unknown error */ -static const reg_error_t REG_ENOSYS = 20; /* = REG_E_UNKNOWN : Reserved. */ - #ifdef __cplusplus namespace regex_constants{ -typedef ::boost::reg_error_t error_type; +typedef unsigned error_type; -static const error_type error_collate = REG_ECOLLATE; -static const error_type error_ctype = REG_ECTYPE; -static const error_type error_escape = REG_EESCAPE; -static const error_type error_subreg = REG_ESUBREG; -static const error_type error_brack = REG_EBRACK; -static const error_type error_paren = REG_EPAREN; -static const error_type error_brace = REG_EBRACE; -static const error_type error_badbrace = REG_BADBR; -static const error_type error_range = REG_ERANGE; -static const error_type error_space = REG_ESPACE; -static const error_type error_badrepeat = REG_BADRPT; -static const error_type error_size = REG_ESIZE; -static const error_type error_empty = REG_EMPTY; -static const error_type error_complexity = REG_ECOMPLEXITY; -static const error_type error_stack = REG_ESTACK; -static const error_type error_unknown = REG_E_UNKNOWN; +static const error_type error_ok = 0; // not used +static const error_type error_no_match = 1; // not used +static const error_type error_bad_pattern = 2; +static const error_type error_collate = 3; +static const error_type error_ctype = 4; +static const error_type error_escape = 5; +static const error_type error_subreg = 6; +static const error_type error_brack = 7; +static const error_type error_paren = 8; +static const error_type error_brace = 9; +static const error_type error_badbrace = 10; +static const error_type error_range = 11; +static const error_type error_space = 12; +static const error_type error_badrepeat = 13; +static const error_type error_end = 14; // not used +static const error_type error_size = 15; +static const error_type error_right_paren = 16; // not used +static const error_type error_empty = 17; +static const error_type error_complexity = 18; +static const error_type error_stack = 19; +static const error_type error_unknown = 20; } } diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 25007648..80129a00 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -108,7 +108,7 @@ bool perl_matcher::protected_call( reset_stack_guard_page(); } // we only get here after a stack overflow: - raise_error(traits_inst, REG_E_MEMORY); + raise_error(traits_inst, regex_constants::error_size); // and we never really get here at all: return false; } diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index 1c881b9f..61e5fe91 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -155,7 +155,7 @@ bool perl_matcher::match_all_states() if(!(this->*proc)()) { if(state_count > max_state_count) - raise_error(traits_inst, REG_ESPACE); + raise_error(traits_inst, regex_constants::error_space); if((m_match_flags & match_partial) && (position == last) && (position != search_base)) m_has_partial_match = true; if(false == unwind(false)) @@ -183,7 +183,7 @@ void perl_matcher::extend_stack() m_backup_state = block; } else - raise_error(traits_inst, REG_E_MEMORY); + raise_error(traits_inst, regex_constants::error_size); } template diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index c81546e4..3b389f9d 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -82,7 +82,7 @@ bool perl_matcher::match_all_states() }; if(state_count > max_state_count) - raise_error(traits_inst, REG_ESPACE); + raise_error(traits_inst, regex_constants::error_space); while(pstate) { matcher_proc_type proc = s_match_vtable[pstate->type]; diff --git a/src/posix_api.cpp b/src/posix_api.cpp index cd8bd330..1e986bb3 100644 --- a/src/posix_api.cpp +++ b/src/posix_api.cpp @@ -66,7 +66,10 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA* expression, const char } if(f & REG_NOSUB) + { expression->eflags |= match_any; + flags |= regex::nosubs; + } if(f & REG_NOSPEC) flags |= regex::literal; @@ -92,7 +95,12 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA* expression, const char expression->re_nsub = static_cast(expression->guts)->mark_count() - 1; result = static_cast(expression->guts)->error_code(); #ifndef BOOST_NO_EXCEPTIONS - } catch(...) + } + catch(const boost::bad_expression& be) + { + result = be.errorno(); + } + catch(...) { result = REG_E_UNKNOWN; } diff --git a/src/regex_traits_defaults.cpp b/src/regex_traits_defaults.cpp index d0d54e2c..d5210c78 100644 --- a/src/regex_traits_defaults.cpp +++ b/src/regex_traits_defaults.cpp @@ -92,12 +92,12 @@ BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_con "Invalid regular expression", /* REG_BADPAT */ "Invalid collation character", /* REG_ECOLLATE */ "Invalid character class name", /* REG_ECTYPE */ - "Trailing backslash", /* REG_EESCAPE */ + "Invalid or trailing backslash", /* REG_EESCAPE */ "Invalid back reference", /* REG_ESUBREG */ "Unmatched [ or [^", /* REG_EBRACK */ "Unmatched ( or \\(", /* REG_EPAREN */ "Unmatched { or \\{", /* REG_EBRACE */ - "Invalid content of \\{\\}", /* REG_BADBR */ + "Invalid content of repeat range", /* REG_BADBR */ "Invalid range end", /* REG_ERANGE */ "Memory exhausted", /* REG_ESPACE */ "Invalid preceding regular expression", /* REG_BADRPT */ diff --git a/src/wide_posix_api.cpp b/src/wide_posix_api.cpp index 700ba217..be3d52d7 100644 --- a/src/wide_posix_api.cpp +++ b/src/wide_posix_api.cpp @@ -74,7 +74,10 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompW(regex_tW* expression, const wcha } if(f & REG_NOSUB) + { expression->eflags |= match_any; + flags |= wregex::nosubs; + } if(f & REG_NOSPEC) flags |= wregex::literal; @@ -100,7 +103,12 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompW(regex_tW* expression, const wcha expression->re_nsub = static_cast(expression->guts)->mark_count() - 1; result = static_cast(expression->guts)->error_code(); #ifndef BOOST_NO_EXCEPTIONS - } catch(...) + } + catch(const boost::bad_expression& be) + { + result = be.errorno(); + } + catch(...) { result = REG_E_UNKNOWN; } diff --git a/test/regress/basic_tests.cpp b/test/regress/basic_tests.cpp index cddf43d2..92536b29 100644 --- a/test/regress/basic_tests.cpp +++ b/test/regress/basic_tests.cpp @@ -785,6 +785,9 @@ void test_tricky_cases2() TEST_REGEX_SEARCH("(.+)\\1", perl, "abcdxxxyyyxxxyyy", match_default, make_array(4, 16, 4, 10, -2, -2)); // this should not throw: TEST_REGEX_SEARCH("[_]+$", perl, "___________________________________________x", match_default, make_array(-2, -2)); + // bug in V4 code detected 2004/05/12: + TEST_REGEX_SEARCH("\\l+", perl|icase, "abcXYZ", match_default, make_array(0, 6, -2, -2)); + TEST_REGEX_SEARCH("\\u+", perl|icase, "abcXYZ", match_default, make_array(0, 6, -2, -2)); // // the strings in the next test case are too long for most compilers to cope with, diff --git a/test/regress/test.hpp b/test/regress/test.hpp index dfe367ef..a45096c5 100644 --- a/test/regress/test.hpp +++ b/test/regress/test.hpp @@ -5,6 +5,7 @@ #include "test_not_regex.hpp" #include "test_regex_search.hpp" #include "test_regex_replace.hpp" +#include "test_deprecated.hpp" // @@ -12,10 +13,12 @@ // real test: // template -void test(const charT&, const tagT& tag) +void test(const charT& c, const tagT& tag) { boost::basic_regex e; test(e, tag); + // test old depecated code: + test_deprecated(c, tag); } // diff --git a/test/regress/test_deprecated.cpp b/test/regress/test_deprecated.cpp new file mode 100644 index 00000000..c18cea5b --- /dev/null +++ b/test/regress/test_deprecated.cpp @@ -0,0 +1,285 @@ + +#include "test.hpp" + +int get_posix_compile_options(boost::regex_constants::syntax_option_type opts) +{ + using namespace boost; + int result = 0; + switch(opts & regbase::main_option_type) + { + case regbase::perl: + result = (opts & regbase::no_perl_ex) ? REG_EXTENDED : REG_PERL; + if(opts & (regbase::no_bk_refs|regbase::no_mod_m|regbase::mod_x|regbase::mod_s|regbase::no_mod_s|regbase::no_escape_in_lists)) + return -1; + break; + case regbase::basic: + result = REG_BASIC; + if(opts & (regbase::no_char_classes|regbase::no_intervals|regbase::bk_plus_qm|regbase::bk_vbar)) + return -1; + if((opts & regbase::no_escape_in_lists) == 0) + return -1; + break; + default: + return -1; + } + + if(opts & regbase::icase) + result |= REG_ICASE; + if(opts & regbase::nosubs) + result |= REG_NOSUB; + if(opts & regbase::newline_alt) + result |= REG_NEWLINE; + if((opts & regbase::collate) == 0) + result |= REG_NOCOLLATE; + + return result; +} + +int get_posix_match_flags(boost::regex_constants::match_flag_type f) +{ + int result = 0; + if(f & boost::regex_constants::match_not_bol) + result |= boost::REG_NOTBOL; + if(f & boost::regex_constants::match_not_eol) + result |= boost::REG_NOTEOL; + if(f & ~(boost::regex_constants::match_not_bol|boost::regex_constants::match_not_eol)) + return -1; + return result; +} + +void test_deprecated(const char&, const test_regex_search_tag&) +{ + const std::string& expression = test_info::expression(); + if(expression.find('\0') != std::string::npos) + return; + const std::string& search_text = test_info::search_text(); + if(search_text.find('\0') != std::string::npos) + return; + int posix_options = get_posix_compile_options(test_info::syntax_options()); + if(posix_options < 0) + return; + int posix_match_options = get_posix_match_flags(test_info::match_options()); + if(posix_match_options < 0) + return; + const int* results = test_info::answer_table(); + + // OK try and compile the expression: + boost::regex_tA re; + if(boost::regcompA(&re, expression.c_str(), posix_options) != 0) + { + BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" did not compile with the POSIX C API.", char); + return; + } + // try and find the first occurance: + boost::regmatch_t matches[50]; + if(boost::regexecA(&re, search_text.c_str(), 50, matches, posix_match_options) == 0) + { + int i = 0; + while(results[2*i] != -2) + { + if(results[2*i] != matches[i].rm_so) + { + BOOST_REGEX_TEST_ERROR("Mismatch in start of subexpression " << i << " found with the POSIX C API.", char); + } + if(results[2*i+1] != matches[i].rm_eo) + { + BOOST_REGEX_TEST_ERROR("Mismatch in end of subexpression " << i << " found with the POSIX C API.", char); + } + ++i; + } + } + else + { + if(results[0] >= 0) + { + BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" was not found with the POSIX C API.", char); + } + } + // clean up whatever: + boost::regfreeA(&re); + + // + // now try the RegEx class: + // + if(test_info::syntax_options() & ~boost::regex::icase) + return; + bool have_catch = false; + try{ + boost::RegEx e(expression, test_info::syntax_options() & boost::regex::icase); + if(e.Search(search_text, test_info::match_options())) + { + int i = 0; + while(results[i*2] != -2) + { + if(e.Matched(i)) + { + if(results[2*i] != e.Position(i)) + { + BOOST_REGEX_TEST_ERROR("Mismatch in start of subexpression " << i << " found with the RegEx class (found " << e.Position(i) << " expected " << results[2*i] << ").", char); + } + if(results[2*i+1] != e.Position(i) + e.Length(i)) + { + BOOST_REGEX_TEST_ERROR("Mismatch in end of subexpression " << i << " found with the RegEx class (found " << e.Position(i) + e.Length(i) << " expected " << results[2*i+1] << ").", char); + } + } + else + { + if(results[2*i] >= 0) + { + BOOST_REGEX_TEST_ERROR("Mismatch in start of subexpression " << i << " found with the RegEx class (found " << e.Position(i) << " expected " << results[2*i] << ").", char); + } + if(results[2*i+1] >= 0) + { + BOOST_REGEX_TEST_ERROR("Mismatch in end of subexpression " << i << " found with the RegEx class (found " << e.Position(i) + e.Length(i) << " expected " << results[2*i+1] << ").", char); + } + } + ++i; + } + } + else + { + if(results[0] >= 0) + { + BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" was not found with class RegEx.", char); + } + } + } + catch(const boost::bad_expression& r) + { + BOOST_REGEX_TEST_ERROR("Expression did not compile with RegEx class: " << r.what(), char); + } + catch(const std::runtime_error& r) + { + BOOST_REGEX_TEST_ERROR("Unexpected std::runtime_error : " << r.what(), char); + } + catch(const std::exception& r) + { + BOOST_REGEX_TEST_ERROR("Unexpected std::exception: " << r.what(), char); + } + catch(...) + { + BOOST_REGEX_TEST_ERROR("Unexpected exception of unknown type", char); + } + +} + +void test_deprecated(const wchar_t&, const test_regex_search_tag&) +{ + const std::wstring& expression = test_info::expression(); + if(expression.find(L'\0') != std::wstring::npos) + return; + const std::wstring& search_text = test_info::search_text(); + if(search_text.find(L'\0') != std::wstring::npos) + return; + int posix_options = get_posix_compile_options(test_info::syntax_options()); + if(posix_options < 0) + return; + int posix_match_options = get_posix_match_flags(test_info::match_options()); + if(posix_match_options < 0) + return; + const int* results = test_info::answer_table(); + + // OK try and compile the expression: + boost::regex_tW re; + if(boost::regcompW(&re, expression.c_str(), posix_options) != 0) + { + BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" did not compile with the POSIX C API.", wchar_t); + return; + } + // try and find the first occurance: + boost::regmatch_t matches[50]; + if(boost::regexecW(&re, search_text.c_str(), 50, matches, posix_match_options) == 0) + { + int i = 0; + while(results[2*i] != -2) + { + if(results[2*i] != matches[i].rm_so) + { + BOOST_REGEX_TEST_ERROR("Mismatch in start of subexpression " << i << " found with the POSIX C API.", wchar_t); + } + if(results[2*i+1] != matches[i].rm_eo) + { + BOOST_REGEX_TEST_ERROR("Mismatch in end of subexpression " << i << " found with the POSIX C API.", wchar_t); + } + ++i; + } + } + else + { + if(results[0] >= 0) + { + BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" was not found with the POSIX C API.", wchar_t); + } + } + // clean up whatever: + boost::regfreeW(&re); +} + +void test_deprecated(const char&, const test_invalid_regex_tag&) +{ + const std::string& expression = test_info::expression(); + if(expression.find('\0') != std::string::npos) + return; + int posix_options = get_posix_compile_options(test_info::syntax_options()); + if(posix_options < 0) + return; + + // OK try and compile the expression: + boost::regex_tA re; + if(regcompA(&re, expression.c_str(), posix_options) == 0) + { + boost::regfreeA(&re); + BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" unexpectedly compiled with the POSIX C API.", char); + } + // + // now try the RegEx class: + // + if(test_info::syntax_options() & ~boost::regex::icase) + return; + bool have_catch = false; + try{ + boost::RegEx e(expression, test_info::syntax_options() & boost::regex::icase); + } + catch(const boost::bad_expression&) + { + have_catch = true; + } + catch(const std::runtime_error& r) + { + have_catch = true; + BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::runtime_error instead: " << r.what(), char); + } + catch(const std::exception& r) + { + have_catch = true; + BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::exception instead: " << r.what(), char); + } + catch(...) + { + have_catch = true; + BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but got an exception of unknown type instead", char); + } + if(!have_catch) + { + // oops expected exception was not thrown: + BOOST_REGEX_TEST_ERROR("Expected an exception, but didn't find one.", char); + } +} + +void test_deprecated(const wchar_t&, const test_invalid_regex_tag&) +{ + const std::wstring& expression = test_info::expression(); + if(expression.find(L'\0') != std::string::npos) + return; + int posix_options = get_posix_compile_options(test_info::syntax_options()); + if(posix_options < 0) + return; + + // OK try and compile the expression: + boost::regex_tW re; + if(regcompW(&re, expression.c_str(), posix_options) == 0) + { + boost::regfreeW(&re); + BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" unexpectedly compiled with the POSIX C API.", wchar_t); + } +} diff --git a/test/regress/test_deprecated.hpp b/test/regress/test_deprecated.hpp new file mode 100644 index 00000000..4e5cc7bc --- /dev/null +++ b/test/regress/test_deprecated.hpp @@ -0,0 +1,18 @@ + +#ifndef BOOST_REGEX_TEST_DEPRECATED +#define BOOST_REGEX_TEST_DEPRECATED + +template +void test_deprecated(const charT&, const Tag&) +{ + // do nothing +} + +void test_deprecated(const char&, const test_regex_search_tag&); +void test_deprecated(const wchar_t&, const test_regex_search_tag&); +void test_deprecated(const char&, const test_invalid_regex_tag&); +void test_deprecated(const wchar_t&, const test_invalid_regex_tag&); + + +#endif +