1) Disabled recursive implementation for VC8: stack overflows can't be reliably detected unless the whole program is compiled with asynchronous exceptions.

2) Changed std::copy calls on VC8 to avoid "dangerous code" warnings.
3) Moved backreference and octal escape code into line with POSIX-extended requirements.
4) Changed match_results leftmost-longest rules to stop unnecessary std::distance computations (an optimisation for non-random access iterators).
5) Changed C lib calls to use "safe" versions of string API's where available.
6) Added many new POSIX-extended leftmost-longest tests, to verify the above.


[SVN r27880]
This commit is contained in:
John Maddock
2005-03-30 11:38:51 +00:00
parent ca144bb2b3
commit de28eb9b18
17 changed files with 361 additions and 106 deletions

View File

@ -15,6 +15,9 @@
#pragma warning(disable:4127)
#endif
void test_tricky_cases2();
void test_tricky_cases3();
void test_tricky_cases()
{
using namespace boost::regex_constants;
@ -98,6 +101,92 @@ void test_tricky_cases()
TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcbcd", match_default, make_array(0, 6, 4, 5, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,}d", perl, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
test_tricky_cases2();
test_tricky_cases3();
}
void test_tricky_cases2()
{
using namespace boost::regex_constants;
TEST_REGEX_SEARCH("a(((b)))c", extended, "abc", match_default, make_array(0, 3, 1, 2, 1, 2, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", extended, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", extended, "acd", match_default, make_array(0, 3, 1, 2, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b*|c)d", extended, "abbd", match_default, make_array(0, 4, 1, 3, -2, -2));
// just gotta have one DFA-buster, of course
TEST_REGEX_SEARCH("a[ab]{20}", extended, "aaaaabaaaabaaaabaaaab", match_default, make_array(0, 21, -2, -2));
// and an inline expansion in case somebody gets tricky
TEST_REGEX_SEARCH("a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]", extended, "aaaaabaaaabaaaabaaaab", match_default, make_array(0, 21, -2, -2));
// and in case somebody just slips in an NFA...
TEST_REGEX_SEARCH("a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)", extended, "aaaaabaaaabaaaabaaaabweeknights", match_default, make_array(0, 31, 21, 24, 24, 31, -2, -2));
// one really big one
TEST_REGEX_SEARCH("1234567890123456789012345678901234567890123456789012345678901234567890", extended, "a1234567890123456789012345678901234567890123456789012345678901234567890b", match_default, make_array(1, 71, -2, -2));
// fish for problems as brackets go past 8
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn]", extended, "xacegikmoq", match_default, make_array(1, 8, -2, -2));
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op]", extended, "xacegikmoq", match_default, make_array(1, 9, -2, -2));
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op][qr]", extended, "xacegikmoqy", match_default, make_array(1, 10, -2, -2));
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op][q]", extended, "xacegikmoqy", match_default, make_array(1, 10, -2, -2));
// and as parenthesis go past 9:
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)", extended, "zabcdefghi", match_default, make_array(1, 9, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, -2, -2));
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)", extended, "zabcdefghij", match_default, make_array(1, 10, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, -2, -2));
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)", extended, "zabcdefghijk", match_default, make_array(1, 11, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, -2, -2));
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)", extended, "zabcdefghijkl", match_default, make_array(1, 12, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, -2, -2));
TEST_REGEX_SEARCH("(a)d|(b)c", extended, "abc", match_default, make_array(1, 3, -1, -1, 1, 2, -2, -2));
TEST_REGEX_SEARCH("_+((www)|(ftp)|(mailto)):_*", extended, "_wwwnocolon _mailto:", match_default, make_array(12, 20, 13, 19, -1, -1, -1, -1, 13, 19, -2, -2));
// subtleties of matching
TEST_REGEX_SEARCH("a\\(b\\)\\?c\\1d", basic|bk_plus_qm, "acd", match_default, make_array(0, 3, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b?c)+d", extended, "accd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("(wee|week)(knights|night)", extended, "weeknights", match_default, make_array(0, 10, 0, 3, 3, 10, -2, -2));
TEST_REGEX_SEARCH(".*", extended, "abc", match_default, make_array(0, 3, -2, 3, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", extended, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", extended, "acd", match_default, make_array(0, 3, 1, 2, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", extended, "abbd", match_default, make_array(0, 4, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", extended, "acd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", extended, "ad", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a(b?)c", extended, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b?)c", extended, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a(b+)c", extended, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b+)c", extended, "abbbc", match_default, make_array(0, 5, 1, 4, -2, -2));
TEST_REGEX_SEARCH("a(b*)c", extended, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("(a|ab)(bc([de]+)f|cde)", extended, "abcdef", match_default, make_array(0, 6, 0, 1, 1, 6, 3, 5, -2, -2));
TEST_REGEX_SEARCH("a([bc]?)c", extended, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a([bc]?)c", extended, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)c", extended, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)c", extended, "abcc", match_default, make_array(0, 4, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)bc", extended, "abcbc", match_default, make_array(0, 5, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a(bb+|b)b", extended, "abb", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", extended, "abb", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", extended, "abbb", match_default, make_array(0, 4, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", extended, "abbb", match_default, make_array(0, 4, 1, 2, -2, -2));
TEST_REGEX_SEARCH("(.*).*", extended, "abcdef", match_default, make_array(0, 6, 0, 6, -2, 6, 6, 6, 6, -2, -2));
TEST_REGEX_SEARCH("(a*)*", extended, "bc", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 2, 2, 2, 2, -2, -2));
TEST_REGEX_SEARCH("xyx*xz", extended, "xyxxxxyxxxz", match_default, make_array(5, 11, -2, -2));
// do we get the right subexpression when it is used more than once?
TEST_REGEX_SEARCH("a(b|c)*d", extended, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c)*d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c)+d", extended, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c)+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c?)+d", extended, "ad", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,0}d", extended, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,1}d", extended, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,1}d", extended, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,2}d", extended, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,2}d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,}d", extended, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,}d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,1}d", extended, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,2}d", extended, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,2}d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,}d", extended, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,}d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,2}d", extended, "acbd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,2}d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,4}d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,4}d", extended, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,4}d", extended, "abcbcd", match_default, make_array(0, 6, 4, 5, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,}d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,}d", extended, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
// perl only:
TEST_REGEX_SEARCH("a(b|c?)+d", perl, "abcd", match_default, make_array(0, 4, 3, 3, -2, -2));
TEST_REGEX_SEARCH("a(b+|((c)*))+d", perl, "abd", match_default, make_array(0, 3, 2, 2, 2, 2, -1, -1, -2, -2));
@ -123,12 +212,32 @@ void test_tricky_cases()
// preprossor directives:
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol", match_default, make_array(0, 19, -1, -1, -2, -2));
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol(x) #x", match_default, make_array(0, 25, -1, -1, -2, -2));
// try to match C++ syntax elements:
// line comment:
TEST_REGEX_SEARCH("//[^\\n]*", extended&~no_escape_in_lists, "++i //here is a line comment\n", match_default, make_array(4, 28, -2, -2));
// block comment:
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", extended&~no_escape_in_lists, "/* here is a block comment */", match_default, make_array(0, 29, 26, 27, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", extended&~no_escape_in_lists, "/**/", match_default, make_array(0, 4, -1, -1, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", extended&~no_escape_in_lists, "/***/", match_default, make_array(0, 5, -1, -1, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", extended&~no_escape_in_lists, "/****/", match_default, make_array(0, 6, -1, -1, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", extended&~no_escape_in_lists, "/*****/", match_default, make_array(0, 7, -1, -1, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", extended&~no_escape_in_lists, "/*****/*/", match_default, make_array(0, 7, -1, -1, -2, -2));
// preprossor directives:
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", extended&~no_escape_in_lists, "#define some_symbol", match_default, make_array(0, 19, -1, -1, -2, -2));
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", extended&~no_escape_in_lists, "#define some_symbol(x) #x", match_default, make_array(0, 25, -1, -1, -2, -2));
// perl only:
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);", match_default, make_array(0, 53, 30, 42, -2, -2));
// literals:
// POSIX leftmost longest checks:
TEST_REGEX_SEARCH("(aaa)|(\\w+)", extended&~no_escape_in_lists, "a", match_default, make_array(0, 1, -1, -1, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(aaa)|(\\w+)", extended&~no_escape_in_lists, "aa", match_default, make_array(0, 2, -1, -1, 0, 2, -2, -2));
TEST_REGEX_SEARCH("(aaa)|(\\w+)", extended&~no_escape_in_lists, "aaa", match_default, make_array(0, 3, 0, 3, -1, -1, -2, -2));
TEST_REGEX_SEARCH("(aaa)|(\\w+)", extended&~no_escape_in_lists, "aaaa", match_default, make_array(0, 4, -1, -1, 0, 4, -2, -2));
TEST_REGEX_SEARCH("($)|(\\>)", extended&~no_escape_in_lists, "aaaa", match_default, make_array(4, 4, 4, 4, -1, -1, -2, -2));
TEST_REGEX_SEARCH("($)|(\\>)", extended&~no_escape_in_lists, "aaaa", match_default|match_not_eol, make_array(4, 4, -1, -1, 4, 4, -2, -2));
TEST_REGEX_SEARCH("(aaa)(ab)*", extended, "aaaabab", match_default, make_array(0, 7, 0, 3, 5, 7, -2, -2));
}
void test_tricky_cases2()
void test_tricky_cases3()
{
using namespace boost::regex_constants;
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFF", match_default, make_array(0, 4, 0, 4, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2));