diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index ca7f801f..a914ad5b 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -79,6 +79,10 @@ rule check-icu-config ( ) { gICU_IN_LIB = icuin ; } + else if [ GLOB $(dir)/lib : icui18n.* ] + { + gICU_IN_LIB = icui18n ; + } else if [ GLOB $(dir)/lib64 : libicui18n.* ] { gICU_IN_LIB = icui18n ; diff --git a/doc/history.qbk b/doc/history.qbk index 05a9710e..360f8922 100644 --- a/doc/history.qbk +++ b/doc/history.qbk @@ -8,6 +8,11 @@ [section:history History] +[h4 Boost 1.40] + +* Added support for many Perl 5.10 syntax elements including named +sub-expressions, branch resets and recursive regular expressions. + [h4 Boost 1.38] * [*Breaking change]: empty expressions, and empty alternatives are now diff --git a/doc/html/boost_regex/background_information/examples.html b/doc/html/boost_regex/background_information/examples.html index dd6786c5..39bd0df1 100644 --- a/doc/html/boost_regex/background_information/examples.html +++ b/doc/html/boost_regex/background_information/examples.html @@ -28,7 +28,7 @@ Example Programs
- + Test Programs
@@ -107,7 +107,7 @@ Files: captures_test.cpp.

- + Example programs
@@ -133,7 +133,7 @@ Files: regex_timer.cpp.

- + Code snippets
diff --git a/doc/html/boost_regex/background_information/history.html b/doc/html/boost_regex/background_information/history.html index f04aecc4..084381fc 100644 --- a/doc/html/boost_regex/background_information/history.html +++ b/doc/html/boost_regex/background_information/history.html @@ -25,8 +25,17 @@

History

+
+ + Boost + 1.40 +
+
- + Boost 1.38
@@ -53,7 +62,7 @@
- + Boost 1.34
@@ -76,7 +85,7 @@
- + Boost 1.33.1
@@ -146,7 +155,7 @@
- + Boost 1.33.0
@@ -201,7 +210,7 @@
- + Boost 1.32.1
@@ -209,7 +218,7 @@ Fixed bug in partial matches of bounded repeats of '.'.
- + Boost 1.31.0
diff --git a/doc/html/boost_regex/background_information/locale.html b/doc/html/boost_regex/background_information/locale.html index d6af3988..eb901bae 100644 --- a/doc/html/boost_regex/background_information/locale.html +++ b/doc/html/boost_regex/background_information/locale.html @@ -58,7 +58,7 @@ There are three separate localization mechanisms supported by Boost.Regex:

- + Win32 localization model.
@@ -90,7 +90,7 @@ are treated as "unknown" graphic characters.

- + C localization model.
@@ -114,7 +114,7 @@ libraries including version 1 of this library.

- + C++ localization model.
@@ -151,7 +151,7 @@ in your code. The best way to ensure this is to add the #define to <boost/regex/user.hpp>.

- + Providing a message catalogue
diff --git a/doc/html/boost_regex/background_information/standards.html b/doc/html/boost_regex/background_information/standards.html index 115e5ce3..31c09f0d 100644 --- a/doc/html/boost_regex/background_information/standards.html +++ b/doc/html/boost_regex/background_information/standards.html @@ -28,7 +28,7 @@ Conformance
- + C++

@@ -36,7 +36,7 @@ Report on C++ Library Extensions.

- + ECMAScript / JavaScript
@@ -49,7 +49,7 @@ rather than a Unicode escape sequence; use \x{DDDD} for Unicode escape sequences.

- + Perl

@@ -62,7 +62,7 @@ (??{code}) Not implementable in a compiled strongly typed language.

- + POSIX

@@ -82,7 +82,7 @@ a custom traits class.

- + Unicode

diff --git a/doc/html/boost_regex/captures.html b/doc/html/boost_regex/captures.html index 1b9c755b..c6345106 100644 --- a/doc/html/boost_regex/captures.html +++ b/doc/html/boost_regex/captures.html @@ -35,7 +35,7 @@ accessed.

- + Marked sub-expressions

@@ -218,7 +218,7 @@ output stream.

- + Unmatched Sub-Expressions

@@ -231,7 +231,7 @@ you can determine which sub-expressions matched by accessing the sub_match::matched data member.

- + Repeated Captures

diff --git a/doc/html/boost_regex/format/boost_format_syntax.html b/doc/html/boost_regex/format/boost_format_syntax.html index a927f24b..90053a97 100644 --- a/doc/html/boost_regex/format/boost_format_syntax.html +++ b/doc/html/boost_regex/format/boost_format_syntax.html @@ -32,7 +32,7 @@ '$', '\', '(', ')', '?', and ':'.

- + Grouping

@@ -40,7 +40,7 @@ you want a to output literal parenthesis.

- + Conditionals

@@ -79,7 +79,7 @@ ?{NAME}true-expression:false-expression

- + Placeholder Sequences
@@ -319,7 +319,7 @@ as a literal.

- + Escape Sequences
diff --git a/doc/html/boost_regex/install.html b/doc/html/boost_regex/install.html index ac178cc9..3f61840d 100644 --- a/doc/html/boost_regex/install.html +++ b/doc/html/boost_regex/install.html @@ -49,7 +49,7 @@ file before you can use it, instructions for specific platforms are as follows:

- + Building with bjam

@@ -58,7 +58,7 @@ started guide for more information.

- + Building With Unicode and ICU Support
@@ -96,11 +96,11 @@ ICU you are using is binary compatible with the toolset you use to build Boost.

- + Building via makefiles
- + Borland C++ Builder:
- + GCC(2.95 and later)

@@ -302,7 +302,7 @@ see the config library documentation.

- + Sun Workshop 6.1

@@ -347,7 +347,7 @@ will build v9 variants of the regex library named libboost_regex_v9.a etc.

- + Makefiles for Other compilers
diff --git a/doc/html/boost_regex/ref/bad_expression.html b/doc/html/boost_regex/ref/bad_expression.html index 6ab5beae..f4b5f922 100644 --- a/doc/html/boost_regex/ref/bad_expression.html +++ b/doc/html/boost_regex/ref/bad_expression.html @@ -27,7 +27,7 @@ bad_expression
- + Synopsis
#include <boost/pattern_except.hpp>
@@ -54,7 +54,7 @@
 } // namespace boost
 
- + Description
regex_error(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos);
diff --git a/doc/html/boost_regex/ref/basic_regex.html b/doc/html/boost_regex/ref/basic_regex.html
index eb7cb84c..30d3449c 100644
--- a/doc/html/boost_regex/ref/basic_regex.html
+++ b/doc/html/boost_regex/ref/basic_regex.html
@@ -27,7 +27,7 @@
  basic_regex
 
 
- + Synopsis
#include <boost/regex.hpp>
@@ -244,7 +244,7 @@
 } // namespace boost
 
- + Description

@@ -327,7 +327,7 @@ basic_regex.

-

Table 1. basic_regex default construction postconditions

+

Table 1. basic_regex default construction postconditions

@@ -407,7 +407,7 @@ flags specified in f.

-

Table 2. Postconditions for basic_regex construction

+

Table 2. Postconditions for basic_regex construction

@@ -512,7 +512,7 @@ specified in f.

-

Table 3. Postconditions for basic_regex construction

+

Table 3. Postconditions for basic_regex construction

@@ -616,7 +616,7 @@ according the option flags specified in f.

-

Table 4. Postconditions for basic_regex construction

+

Table 4. Postconditions for basic_regex construction

@@ -727,7 +727,7 @@ flags specified in f.

-

Table 5. Postconditions for basic_regex construction

+

Table 5. Postconditions for basic_regex construction

@@ -829,7 +829,7 @@ flags specified in f.

-

Table 6. Postconditions for basic_regex construction

+

Table 6. Postconditions for basic_regex construction

@@ -1043,7 +1043,7 @@ in f.

-

Table 7. Postconditions for basic_regex::assign

+

Table 7. Postconditions for basic_regex::assign

diff --git a/doc/html/boost_regex/ref/concepts/traits_concept.html b/doc/html/boost_regex/ref/concepts/traits_concept.html index 1dcd410c..3d051b83 100644 --- a/doc/html/boost_regex/ref/concepts/traits_concept.html +++ b/doc/html/boost_regex/ref/concepts/traits_concept.html @@ -34,7 +34,7 @@ Boost-specific enhanced interface.

- + Minimal requirements.
@@ -381,7 +381,7 @@
- + Additional Optional Requirements
diff --git a/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html b/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html index fab2dea1..8e482ee8 100644 --- a/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html +++ b/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html @@ -34,7 +34,7 @@ previous version of Boost.Regex and will not be further updated:

- + Algorithm regex_format
diff --git a/doc/html/boost_regex/ref/error_type.html b/doc/html/boost_regex/ref/error_type.html index 26ce70d1..331567be 100644 --- a/doc/html/boost_regex/ref/error_type.html +++ b/doc/html/boost_regex/ref/error_type.html @@ -27,7 +27,7 @@ error_type
- + Synopsis

@@ -57,7 +57,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/match_flag_type.html b/doc/html/boost_regex/ref/match_flag_type.html index 941bdcb9..102543d0 100644 --- a/doc/html/boost_regex/ref/match_flag_type.html +++ b/doc/html/boost_regex/ref/match_flag_type.html @@ -69,7 +69,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/match_results.html b/doc/html/boost_regex/ref/match_results.html index 2d1c346c..6e8f7c0b 100644 --- a/doc/html/boost_regex/ref/match_results.html +++ b/doc/html/boost_regex/ref/match_results.html @@ -27,7 +27,7 @@ match_results

- + Synopsis
#include <boost/regex.hpp>
@@ -166,7 +166,7 @@
          match_results<BidirectionalIterator, Allocator>& m2);
 
- + Description

diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html index 8a5e4325..4ea31468 100644 --- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html +++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html @@ -43,7 +43,7 @@ on to the "real" algorithm.

- + u32regex_match

@@ -89,7 +89,7 @@ }

- + u32regex_search

@@ -128,7 +128,7 @@ }

- + u32regex_replace

diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html index f1fd4fed..892034a2 100644 --- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html +++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html @@ -28,7 +28,7 @@ Unicode Aware Regex Iterators

- + u32regex_iterator

@@ -126,7 +126,7 @@ Provided of course that the input is encoded as UTF-8.

- + u32regex_token_iterator

diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html index 4a6b3297..6148023b 100644 --- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html +++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html @@ -34,7 +34,7 @@ here they are anyway:

- + regex_match

@@ -82,7 +82,7 @@ }

- + regex_match (second overload)
@@ -110,7 +110,7 @@ }
- + regex_search

@@ -149,7 +149,7 @@ }

- + regex_search (second overload)
@@ -164,7 +164,7 @@ + s.GetLength(), e, f);

- + regex_replace

diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html index a2df7d3d..54858bc8 100644 --- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html +++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html @@ -32,7 +32,7 @@ an MFC/ATL string to a regex_iterator or regex_token_iterator:

- + regex_iterator creation helper
@@ -68,7 +68,7 @@ }
- + regex_token_iterator creation helpers
diff --git a/doc/html/boost_regex/ref/posix.html b/doc/html/boost_regex/ref/posix.html index d8b73499..6424b99c 100644 --- a/doc/html/boost_regex/ref/posix.html +++ b/doc/html/boost_regex/ref/posix.html @@ -165,7 +165,7 @@

- + regcomp

@@ -379,7 +379,7 @@

- + regerror

@@ -467,7 +467,7 @@

- + regexec

@@ -537,7 +537,7 @@

- + regfree

diff --git a/doc/html/boost_regex/ref/regex_iterator.html b/doc/html/boost_regex/ref/regex_iterator.html index 9930174b..2a101c5e 100644 --- a/doc/html/boost_regex/ref/regex_iterator.html +++ b/doc/html/boost_regex/ref/regex_iterator.html @@ -78,7 +78,7 @@ regex_constants::match_flag_type m = regex_constants::match_default);

- + Description

@@ -436,7 +436,7 @@ m.

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_match.html b/doc/html/boost_regex/ref/regex_match.html index 5f432601..9b1a7108 100644 --- a/doc/html/boost_regex/ref/regex_match.html +++ b/doc/html/boost_regex/ref/regex_match.html @@ -80,7 +80,7 @@ match_flag_type flags = match_default);

- + Description
template <class BidirectionalIterator, class Allocator, class charT, class traits>
@@ -360,7 +360,7 @@
         Effects: Returns the result of regex_match(s.begin(), s.end(), e, flags).
       

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_replace.html b/doc/html/boost_regex/ref/regex_replace.html index 650c1178..70a65308 100644 --- a/doc/html/boost_regex/ref/regex_replace.html +++ b/doc/html/boost_regex/ref/regex_replace.html @@ -53,7 +53,7 @@ match_flag_type flags = match_default);

- + Description
template <class OutputIterator, class BidirectionalIterator, class traits, class charT>
@@ -163,7 +163,7 @@
         and then returns result.
       

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_search.html b/doc/html/boost_regex/ref/regex_search.html index 282023dd..44faddee 100644 --- a/doc/html/boost_regex/ref/regex_search.html +++ b/doc/html/boost_regex/ref/regex_search.html @@ -73,7 +73,7 @@ match_flag_type flags = match_default);

- + Description
template <class BidirectionalIterator, class Allocator, class charT, class traits>
@@ -355,7 +355,7 @@
         Effects: Returns the result of regex_search(s.begin(), s.end(), e, flags).
       

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_token_iterator.html b/doc/html/boost_regex/ref/regex_token_iterator.html index 864905fd..8728ee16 100644 --- a/doc/html/boost_regex/ref/regex_token_iterator.html +++ b/doc/html/boost_regex/ref/regex_token_iterator.html @@ -136,7 +136,7 @@ regex_constants::match_flag_type m = regex_constants::match_default);

- + Description

@@ -383,7 +383,7 @@ m.

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_traits.html b/doc/html/boost_regex/ref/regex_traits.html index eeb38e8e..cccf4917 100644 --- a/doc/html/boost_regex/ref/regex_traits.html +++ b/doc/html/boost_regex/ref/regex_traits.html @@ -46,7 +46,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/sub_match.html b/doc/html/boost_regex/ref/sub_match.html index becc2a20..999f0ce4 100644 --- a/doc/html/boost_regex/ref/sub_match.html +++ b/doc/html/boost_regex/ref/sub_match.html @@ -329,11 +329,11 @@ } // namespace boost

- + Description
- + Members

@@ -473,7 +473,7 @@

- + sub_match non-member operators
@@ -1008,7 +1008,7 @@ + m2.str().

- + Stream inserter

diff --git a/doc/html/boost_regex/syntax/basic_extended.html b/doc/html/boost_regex/syntax/basic_extended.html index 787c411f..6821fd0d 100644 --- a/doc/html/boost_regex/syntax/basic_extended.html +++ b/doc/html/boost_regex/syntax/basic_extended.html @@ -28,7 +28,7 @@ Expression Syntax

- + Synopsis

@@ -46,7 +46,7 @@

- + POSIX Extended Syntax

@@ -56,7 +56,7 @@

.[{()\*+?|^$
- + Wildcard:

@@ -74,7 +74,7 @@

- + Anchors:

@@ -86,7 +86,7 @@ of an expression, or the last character of a sub-expression.

- + Marked sub-expressions:
@@ -98,7 +98,7 @@ to by a back-reference.

- + Repeats:

@@ -184,7 +184,7 @@ cab operator to be applied to.

- + Back references:

@@ -214,7 +214,7 @@ cab

- + Alternation

@@ -227,7 +227,7 @@ cab will match either of "abd" or "abef".

- + Character sets:
@@ -240,7 +240,7 @@ cab A bracket expression may contain any combination of the following:

- + Single characters:
@@ -249,7 +249,7 @@ cab or 'c'.

- + Character ranges:
@@ -265,7 +265,7 @@ cab the code points of the characters only.

- + Negation:

@@ -274,7 +274,7 @@ cab range a-c.

- + Character classes:
@@ -284,7 +284,7 @@ cab character class names.

- + Collating Elements:
@@ -312,7 +312,7 @@ cab matches a NUL character.

- + Equivalence classes:
@@ -329,7 +329,7 @@ cab or even all locales on one platform.

- + Combinations:

@@ -337,7 +337,7 @@ cab [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -363,7 +363,7 @@ cab extensions are also supported by Boost.Regex:

- + Escapes matching a specific character
@@ -552,7 +552,7 @@ cab
- + "Single character" character classes:
@@ -706,7 +706,7 @@ cab
- + Character Properties
@@ -813,7 +813,7 @@ cab matches any "digit" character, as does \p{digit}.

- + Word Boundaries

@@ -888,7 +888,7 @@ cab

- + Buffer boundaries
@@ -979,7 +979,7 @@ cab
- + Continuation Escape
@@ -991,7 +991,7 @@ cab match to start where the last one ended.

- + Quoting escape
@@ -1005,7 +1005,7 @@ cab \*+aaa
- + Unicode escapes
@@ -1056,7 +1056,7 @@ cab
- + Any other escape
@@ -1065,7 +1065,7 @@ cab \@ matches a literal '@'.

- + Operator precedence
@@ -1101,7 +1101,7 @@ cab
- + What Gets Matched
@@ -1111,11 +1111,11 @@ cab rule.

- + Variations

- + Egrep

@@ -1136,7 +1136,7 @@ cab used with the -E option.

- + awk

@@ -1150,7 +1150,7 @@ cab these by default anyway.

- + Options

@@ -1163,7 +1163,7 @@ cab modify how the case and locale sensitivity are to be applied.

- + References

diff --git a/doc/html/boost_regex/syntax/basic_syntax.html b/doc/html/boost_regex/syntax/basic_syntax.html index eeb9ef51..0c6e6604 100644 --- a/doc/html/boost_regex/syntax/basic_syntax.html +++ b/doc/html/boost_regex/syntax/basic_syntax.html @@ -28,7 +28,7 @@ Expression Syntax

- + Synopsis

@@ -45,7 +45,7 @@

- + POSIX Basic Syntax

@@ -55,7 +55,7 @@

.[\*^$
- + Wildcard:

@@ -73,7 +73,7 @@

- + Anchors:

@@ -85,7 +85,7 @@ of an expression, or the last character of a sub-expression.

- + Marked sub-expressions:
@@ -97,7 +97,7 @@ by a back-reference.

- + Repeats:

@@ -155,7 +155,7 @@ aaaa to.

- + Back references:

@@ -173,7 +173,7 @@ aaaa

aaabba
- + Character sets:
@@ -186,7 +186,7 @@ aaaa A bracket expression may contain any combination of the following:

- + Single characters:
@@ -195,7 +195,7 @@ aaaa or 'c'.

- + Character ranges:
@@ -211,7 +211,7 @@ aaaa of the characters only.

- + Negation:

@@ -220,7 +220,7 @@ aaaa range a-c.

- + Character classes:
@@ -230,7 +230,7 @@ aaaa character class names.

- + Collating Elements:
@@ -259,7 +259,7 @@ aaaa element names.

- + Equivalence classes:
@@ -276,7 +276,7 @@ aaaa or even all locales on one platform.

- + Combinations:

@@ -284,7 +284,7 @@ aaaa [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -299,7 +299,7 @@ aaaa will match either a literal '\' or a '^'.

- + What Gets Matched

@@ -309,13 +309,13 @@ aaaa rule.

- + Variations

- + Grep

@@ -333,7 +333,7 @@ aaaa As its name suggests, this behavior is consistent with the Unix utility grep.

- + emacs

@@ -613,7 +613,7 @@ aaaa leftmost-longest rule.

- + Options

@@ -627,7 +627,7 @@ aaaa options modify how the case and locale sensitivity are to be applied.

- + References

diff --git a/doc/html/boost_regex/syntax/perl_syntax.html b/doc/html/boost_regex/syntax/perl_syntax.html index 46aa35e1..cc7a368a 100644 --- a/doc/html/boost_regex/syntax/perl_syntax.html +++ b/doc/html/boost_regex/syntax/perl_syntax.html @@ -28,7 +28,7 @@ Syntax

- + Synopsis

@@ -43,7 +43,7 @@ boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);

- + Perl Regular Expression Syntax

@@ -53,7 +53,7 @@

.[{()\*+?|^$
- + Wildcard

@@ -73,7 +73,7 @@

- + Anchors

@@ -83,7 +83,7 @@ A '$' character shall match the end of a line.

- + Marked sub-expressions
@@ -94,7 +94,7 @@ can also repeated, or referred to by a back-reference.

- + Non-marking grouping
@@ -107,7 +107,7 @@ without splitting out any separate sub-expressions.

- + Repeats

@@ -188,7 +188,7 @@ to be applied to.

- + Non greedy repeats
@@ -218,7 +218,7 @@ while consuming as little input as possible.

- + Pocessive repeats
@@ -250,7 +250,7 @@ while giving nothing back.

- + Back references

@@ -360,7 +360,7 @@ named "two".

- + Alternation

@@ -387,7 +387,7 @@ (?:abc)?? has exactly the same effect.

- + Character sets

@@ -399,7 +399,7 @@ A bracket expression may contain any combination of the following:

- + Single characters

@@ -407,7 +407,7 @@ 'b', or 'c'.

- + Character ranges
@@ -421,7 +421,7 @@ sensitive.

- + Negation

@@ -430,7 +430,7 @@ matches any character that is not in the range a-c.

- + Character classes
@@ -441,7 +441,7 @@ class names.

- + Collating Elements
@@ -463,7 +463,7 @@ matches a \0 character.

- + Equivalence classes
@@ -480,7 +480,7 @@ or even all locales on one platform.

- + Escaped Characters
@@ -492,7 +492,7 @@ is not a "word" character.

- + Combinations

@@ -500,7 +500,7 @@ [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -692,7 +692,7 @@

- + "Single character" character classes:
@@ -894,7 +894,7 @@
- + Character Properties
@@ -1002,7 +1002,7 @@ as does \p{digit}.

- + Word Boundaries

@@ -1021,7 +1021,7 @@ \B Matches only when not at a word boundary.

- + Buffer boundaries

@@ -1046,7 +1046,7 @@ to the regular expression \n*\z

- + Continuation Escape
@@ -1058,7 +1058,7 @@ one ended.

- + Quoting escape

@@ -1071,7 +1071,7 @@ \*+aaa

- + Unicode escapes

@@ -1081,7 +1081,7 @@ followed by a sequence of zero or more combining characters.

- + Matching Line Endings
@@ -1090,7 +1090,7 @@ sequence, specifically it is identical to the expression (?>\x0D\x0A?|[\x0A-\x0C\x85\x{2028}\x{2029}]).

- + Keeping back some text
@@ -1105,7 +1105,7 @@ This can be used to simulate variable width lookbehind assertions.

- + Any other escape
@@ -1114,7 +1114,7 @@ \@ matches a literal '@'.

- + Perl Extended Patterns
@@ -1123,7 +1123,7 @@ (?.

- + Named Subexpressions
@@ -1145,14 +1145,14 @@ format string for search and replace operations, or in the match_results member functions.

- + Comments

(?# ... ) is treated as a comment, it's contents are ignored.

- + Modifiers

@@ -1166,7 +1166,7 @@ pattern only.

- + Non-marking groups
@@ -1175,7 +1175,7 @@ an additional sub-expression.

- + Branch reset

@@ -1197,7 +1197,7 @@ # 1 2 2 3 2 3 4

- + Lookahead

@@ -1220,7 +1220,7 @@ could be used to validate the password.

- + Lookbehind

@@ -1234,7 +1234,7 @@ (pattern must be of fixed length).

- + Independent sub-expressions
@@ -1247,7 +1247,7 @@ no match is found at all.

- + Recursive Expressions
@@ -1271,7 +1271,7 @@ to the next sub-expression to be declared.

- + Conditional Expressions
@@ -1311,9 +1311,15 @@ yes-pattern if we are executing inside a recursion to sub-expression N, otherwise executes no-pattern. +
  • +(?(DEFINE)never-exectuted-pattern) Defines a block of + code that is never executed and matches no characters: this is usually + used to define one or more named sub-expressions which are refered to from + elsewhere in the pattern. +
  • - + Operator precedence
    @@ -1348,7 +1354,7 @@

    - + What gets matched

    @@ -1523,7 +1529,7 @@

    - + Variations

    @@ -1532,7 +1538,7 @@ and JScript are all synonyms for perl.

    - + Options

    @@ -1544,7 +1550,7 @@ are to be applied.

    - + Pattern Modifiers

    @@ -1556,7 +1562,7 @@ and no_mod_s.

    - + References

    diff --git a/doc/html/boost_regex/unicode.html b/doc/html/boost_regex/unicode.html index 5abbf2a0..94b8709f 100644 --- a/doc/html/boost_regex/unicode.html +++ b/doc/html/boost_regex/unicode.html @@ -30,7 +30,7 @@ There are two ways to use Boost.Regex with Unicode strings:

    - + Rely on wchar_t

    @@ -56,7 +56,7 @@

    - + Use a Unicode Aware Regular Expression Type.
    diff --git a/doc/html/index.html b/doc/html/index.html index 51da76e4..cb420c49 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -28,7 +28,7 @@
    -

    +

    Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

    @@ -196,7 +196,7 @@

    - +

    Last revised: July 16, 2009 at 15:54:11 GMT

    Last revised: July 29, 2009 at 15:59:46 GMT


    diff --git a/doc/syntax_perl.qbk b/doc/syntax_perl.qbk index d97eb989..716df83c 100644 --- a/doc/syntax_perl.qbk +++ b/doc/syntax_perl.qbk @@ -545,6 +545,8 @@ executes /no-pattern/. executes /no-pattern/. * [^(?(R['N])yes-pattern|no-pattern)] Executes /yes-pattern/ if we are executing inside a recursion to sub-expression /N/, otherwise executes /no-pattern/. +* [^(?(DEFINE)never-exectuted-pattern)] Defines a block of code that is never executed and matches no characters: +this is usually used to define one or more named sub-expressions which are refered to from elsewhere in the pattern. [h4 Operator precedence] diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp index 617d578e..09b0467b 100644 --- a/include/boost/regex/v4/basic_regex.hpp +++ b/include/boost/regex/v4/basic_regex.hpp @@ -20,6 +20,7 @@ #define BOOST_REGEX_V4_BASIC_REGEX_HPP #include +#include #ifdef BOOST_MSVC #pragma warning(push) @@ -68,34 +69,53 @@ template class named_subexpressions_base { public: - virtual int get_id(const charT* i, const charT* j) = 0; + virtual int get_id(const charT* i, const charT* j)const = 0; + virtual int get_id(std::size_t hash)const = 0; #ifdef __GNUC__ // warning supression: virtual ~named_subexpressions_base(){} #endif }; +template +inline std::size_t hash_value_from_capture_name(Iterator i, Iterator j) +{ + std::size_t r = boost::hash_range(i, j); + r %= ((std::numeric_limits::max)() - 10001); + r += 10000; + return r; +} + template class named_subexpressions : public named_subexpressions_base { struct name { name(const charT* i, const charT* j, int idx) - : n(i, j), index(idx) {} - std::vector n; + : /*n(i, j), */ index(idx) + { + hash = hash_value_from_capture_name(i, j); + } + name(std::size_t h, int idx) + : index(idx), hash(h) + { + } + //std::vector n; int index; + std::size_t hash; bool operator < (const name& other)const { - return std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end()); + return hash < other.hash; //std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end()); } bool operator == (const name& other)const { - return n == other.n; + return hash == other.hash; //n == other.n; } void swap(name& other) { - n.swap(other.n); + //n.swap(other.n); std::swap(index, other.index); + std::swap(hash, other.hash); } }; public: @@ -105,7 +125,7 @@ public: m_sub_names.push_back(name(i, j, index)); bubble_down_one(m_sub_names.begin(), m_sub_names.end()); } - int get_id(const charT* i, const charT* j) + int get_id(const charT* i, const charT* j)const { name t(i, j, 0); typename std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); @@ -115,6 +135,16 @@ public: } return -1; } + int get_id(std::size_t h)const + { + name t(h, 0); + typename std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } private: std::vector m_sub_names; }; @@ -126,7 +156,7 @@ class named_subexpressions_converter : public named_subexpressions_base public: named_subexpressions_converter(boost::shared_ptr > s) : m_converter(s) {} - virtual int get_id(const charT* i, const charT* j) + int get_id(const charT* i, const charT* j)const { if(i == j) return -1; @@ -138,6 +168,10 @@ public: } return m_converter->get_id(&v[0], &v[0] + v.size()); } + int get_id(std::size_t h)const + { + return m_converter->get_id(h); + } }; template diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index d6a5c09b..6f005054 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -762,29 +762,57 @@ void basic_regex_creator::fixup_recursions(re_syntax_base* state) { switch(state->type) { + case syntax_element_assert_backref: + { + // just check that the index is valid: + int id = static_cast(state)->index; + if(id < 0) + { + id = -id-1; + if(id >= 10000) + { + id = m_pdata->get_id(id); + if(id <= 0) + { + // check of sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern); + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + } + } + break; case syntax_element_recurse: { bool ok = false; re_syntax_base* p = base; - /* - if(static_cast(state)->alt.i == 0) + int id = static_cast(state)->alt.i; + if(id > 10000) + id = m_pdata->get_id(id); + while(p) { - ok = true; - static_cast(state)->alt.p = p; - } - else - {*/ - while(p) + if((p->type == syntax_element_startmark) && (static_cast(p)->index == id)) { - if((p->type == syntax_element_startmark) && (static_cast(p)->index == static_cast(state)->alt.i)) - { - static_cast(state)->alt.p = p; - ok = true; - break; - } - p = p->next.p; + static_cast(state)->alt.p = p; + ok = true; + break; } - //} + p = p->next.p; + } if(!ok) { // recursion to sub-expression that doesn't exist: diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index a9e77b3b..7d2f5543 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -1961,8 +1961,27 @@ insert_recursion: int v = this->m_traits.toi(m_position, m_end, 10); if(*m_position == charT('R')) { - ++m_position; - v = -this->m_traits.toi(m_position, m_end, 10); + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(*m_position == charT('&')) + { + const charT* base = ++m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + v = -static_cast(hash_value_from_capture_name(base, m_position)); + } + else + { + v = -this->m_traits.toi(m_position, m_end, 10); + } re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); br->index = v < 0 ? (v - 1) : 0; if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) @@ -1976,6 +1995,58 @@ insert_recursion: return false; } } + else if((*m_position == charT('\'')) || (*m_position == charT('<'))) + { + const charT* base = ++m_position; + while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\''))) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v; + if((*m_position != charT('>')) && (*m_position != charT('\'')) || (++m_position == m_end)) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + } + else if(*m_position == charT('D')) + { + const char* def = "DEFINE"; + while(*def && (m_position != m_end) && (*m_position == charT(*def))) + ++m_position, ++def; + if((m_position == m_end) || *def) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = 9999; // special magic value! + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + } else if(v > 0) { re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); @@ -2081,6 +2152,43 @@ named_capture_jump: } goto insert_recursion; } + if(*m_position == charT('&')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + if(*m_position == charT('P')) + { + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + if(*m_position == charT('>')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + } // // lets assume that we have a (?imsx) group and try and parse it: // @@ -2183,9 +2291,20 @@ option_group_jump: } else if(this->getaddress(static_cast(b)->alt.i, b)->type == syntax_element_alt) { + // Can't have seen more than one alternative: fail(regex_constants::error_bad_pattern, m_position - m_base); return false; } + else + { + // We must *not* have seen an alternative inside a (DEFINE) block: + b = this->getaddress(b->next.i, b); + if((b->type == syntax_element_assert_backref) && (static_cast(b)->index == 9999)) + { + fail(regex_constants::error_bad_pattern, m_position - m_base); + return false; + } + } // check for invalid repetition of next state: b = this->getaddress(expected_alt_point); b = this->getaddress(static_cast(b)->next.i, b); diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index a3eed0c2..fd439f84 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -714,8 +714,16 @@ inline bool perl_matcher::match_assert_backref( // return true if marked sub-expression N has been matched: int index = static_cast(pstate)->index; bool result; - if(index > 0) + if(index == 9999) { + // Magic value for a (DEFINE) block: + return false; + } + else if(index > 0) + { + // Check if index is a hash value: + if(index >= 10000) + index = re.get_data().get_id(index); // Have we matched subexpression "index"? result = (*m_presult)[index].matched; pstate = pstate->next.p; @@ -724,7 +732,10 @@ inline bool perl_matcher::match_assert_backref( { // Have we recursed into subexpression "index"? // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. - result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == -index-1) || (index == 0)); + int id = -index-1; + if(id >= 10000) + id = re.get_data().get_id(id); + result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == id) || (index == 0)); pstate = pstate->next.p; } return result; diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 764d0c43..3c58fa42 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -670,7 +670,7 @@ void test_mark_resets() TEST_REGEX_SEARCH("(?|(abc)|(xyz))(?1)", perl, "xyzxyz", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("^X(?5)(a)(?|(b)|(q))(c)(d)(Y)", perl, "XYabcdY", match_default, make_array(0, 7, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, -2, -2)); TEST_INVALID_REGEX("^X(?5)(a)(?|(b)|(q))(c)(d)Y", perl); - //TEST_REGEX_SEARCH("^X(?&N)(a)(?|(b)|(q))(c)(d)(?Y)", perl, "XYabcdY", match_default, make_array(0, 7, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, -2, -2)); + TEST_REGEX_SEARCH("^X(?&N)(a)(?|(b)|(q))(c)(d)(?Y)", perl, "XYabcdY", match_default, make_array(0, 7, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, -2, -2)); TEST_REGEX_SEARCH("^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)", perl, "XYabcdY", match_default, make_array(0, 7, 2, 3, 3, 4, -1, -1, -1, -1, 4, 5, 5, 6, 6, 7, -2, -2)); TEST_REGEX_SEARCH("^X(?7)(a)(?|(b|(r)(s))|(q))(c)(d)(Y)", perl, "XYabcdY", match_default, make_array(0, 7, 2, 3, 3, 4, -1, -1, -1, -1, 4, 5, 5, 6, 6, 7, -2, -2)); TEST_REGEX_SEARCH("^X(?7)(a)(?|(b|(?|(r)|(t))(s))|(q))(c)(d)(Y)", perl, "XYabcdY", match_default, make_array(0, 7, 2, 3, 3, 4, -1, -1, -1, -1, 4, 5, 5, 6, 6, 7, -2, -2)); @@ -833,5 +833,64 @@ void test_recursion() TEST_REGEX_SEARCH("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", perl|mod_x, "", match_default, make_array(4, 6, -2, -2)); TEST_REGEX_SEARCH("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", perl|mod_x, "(?.)\\W*(?&one)\\W*\\k|)|(?(?.)\\W*(?&three)\\W*\\k'four'|\\W*.\\W*))\\W*$", perl|mod_x|icase, "Satan, oscillate my metallic sonatas!", match_default, make_array(0, 37, -1, -1, -1, -1, 0, 36, 0, 1, -2, -2)); + TEST_REGEX_SEARCH("(?'abc'a|b)(?d|e)(?&abc){2}", perl|mod_x, "bdaa", match_default, make_array(0, 4, 0, 1, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("(?'abc'a|b)(?d|e)(?&abc){2}", perl|mod_x, "bdab", match_default, make_array(0, 4, 0, 1, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("(?'abc'a|b)(?d|e)(?&abc){2}", perl|mod_x, "bddd", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("(?&abc)X(?P)", perl|mod_x, "abcPXP123", match_default, make_array(3, 6, 5, 6, -2, -2)); + TEST_REGEX_SEARCH("(?:a(?&abc)b)*(?x)", perl|mod_x, "123axbaxbaxbx456", match_default, make_array(3, 13, 12, 13 , -2, -2)); + TEST_REGEX_SEARCH("(?:a(?&abc)b){1,5}(?x)", perl|mod_x, "123axbaxbaxbx456", match_default, make_array(3, 13, 12, 13 , -2, -2)); + TEST_REGEX_SEARCH("(?:a(?&abc)b){2,5}(?x)", perl|mod_x, "123axbaxbaxbx456", match_default, make_array(3, 13, 12, 13 , -2, -2)); + TEST_REGEX_SEARCH("(?:a(?&abc)b){2,}(?x)", perl|mod_x, "123axbaxbaxbx456", match_default, make_array(3, 13, 12, 13 , -2, -2)); + TEST_INVALID_REGEX("(?)(?&)", perl|mod_x); + TEST_INVALID_REGEX("(?)(?&a)", perl|mod_x); + TEST_INVALID_REGEX("(?)(?&aaaaaaaaaaaaaaaaaaaaaaa)", perl|mod_x); + TEST_INVALID_REGEX("(?&N)[]a(?)](?abc)", perl|mod_x); + TEST_INVALID_REGEX("(?&N)[]a(?)](abc)", perl|mod_x); + TEST_INVALID_REGEX("(?&N)[]a(?)](abc)", perl|mod_x); + TEST_REGEX_SEARCH("^X(?&N)(a)(?|(b)|(q))(c)(d)(?Y)", perl|mod_x, "XYabcdY", match_default, make_array(0, 7, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, -2, -2)); + // And again with (?P> : + //TEST_REGEX_SEARCH("^\\W*(?:(?(?.)\\W*(?&one)\\W*\\k|)|(?(?.)\\W*(?&three)\\W*\\k'four'|\\W*.\\W*))\\W*$", perl|mod_x|icase, "Satan, oscillate my metallic sonatas!", match_default, make_array(0, 37, -1, -1, -1, -1, 0, 36, 0, 1, -2, -2)); + TEST_REGEX_SEARCH("(?'abc'a|b)(?d|e)(?P>abc){2}", perl|mod_x, "bdaa", match_default, make_array(0, 4, 0, 1, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("(?'abc'a|b)(?d|e)(?P>abc){2}", perl|mod_x, "bdab", match_default, make_array(0, 4, 0, 1, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("(?'abc'a|b)(?d|e)(?P>abc){2}", perl|mod_x, "bddd", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("(?P>abc)X(?P)", perl|mod_x, "abcPXP123", match_default, make_array(3, 6, 5, 6, -2, -2)); + TEST_REGEX_SEARCH("(?:a(?P>abc)b)*(?x)", perl|mod_x, "123axbaxbaxbx456", match_default, make_array(3, 13, 12, 13 , -2, -2)); + TEST_REGEX_SEARCH("(?:a(?P>abc)b){1,5}(?x)", perl|mod_x, "123axbaxbaxbx456", match_default, make_array(3, 13, 12, 13 , -2, -2)); + TEST_REGEX_SEARCH("(?:a(?P>abc)b){2,5}(?x)", perl|mod_x, "123axbaxbaxbx456", match_default, make_array(3, 13, 12, 13 , -2, -2)); + TEST_REGEX_SEARCH("(?:a(?P>abc)b){2,}(?x)", perl|mod_x, "123axbaxbaxbx456", match_default, make_array(3, 13, 12, 13 , -2, -2)); + TEST_INVALID_REGEX("(?)(?P>)", perl|mod_x); + TEST_INVALID_REGEX("(?)(?P>a)", perl|mod_x); + TEST_INVALID_REGEX("(?)(?P>aaaaaaaaaaaaaaaaaaaaaaa)", perl|mod_x); + TEST_INVALID_REGEX("(?P>N)[]a(?)](?abc)", perl|mod_x); + TEST_INVALID_REGEX("(?P>N)[]a(?)](abc)", perl|mod_x); + TEST_INVALID_REGEX("(?P>N)[]a(?)](abc)", perl|mod_x); + TEST_REGEX_SEARCH("^X(?P>N)(a)(?|(b)|(q))(c)(d)(?Y)", perl|mod_x, "XYabcdY", match_default, make_array(0, 7, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, -2, -2)); + // Now check (?(R&NAME) : + TEST_REGEX_SEARCH("(? (?'B' abc (?(R) (?(R&A)1) (?(R&B)2) X | (?1) (?2) (?R) ))) ", perl|mod_x, "abcabc1Xabc2XabcXabcabc", match_default, make_array(0, 17, 0, 17, 0, 17, -2, -2)); + TEST_INVALID_REGEX("(? (?'B' abc (?(R) (?(R&1)1) (?(R&B)2) X | (?1) (?2) (?R) ))) ", perl|mod_x); + TEST_REGEX_SEARCH("(?<1> (?'B' abc (?(R) (?(R&1)1) (?(R&B)2) X | (?1) (?2) (?R) ))) ", perl|mod_x, "abcabc1Xabc2XabcXabcabc", match_default, make_array(0, 17, 0, 17, 0, 17, -2, -2)); + + // Now check for named conditionals: + TEST_REGEX_SEARCH("^(?a)? (?()b|c) (?('ab')d|e)", perl|mod_x, "abd", match_default, make_array(0, 3, 0, 1, -2, -2)); + TEST_REGEX_SEARCH("^(?a)? (?()b|c) (?('ab')d|e)", perl|mod_x, "ce", match_default, make_array(0, 2, -1, -1, -2, -2)); + + // Recursions in combination with (DEFINE): + TEST_REGEX_SEARCH("^(?(DEFINE) (? a) (? b) ) (?&A) (?&B) ", perl|mod_x, "abcd", match_default, make_array(0, 2, -1, -1, -1, -1, -2, -2)); + TEST_REGEX_SEARCH("(?(?&NAME_PAT))\\s+(?(?&ADDRESS_PAT)) (?(DEFINE) (?[a-z]+) (?\\d+))", perl|mod_x, "metcalfe 33", match_default, make_array(0, 11, 0, 8, 9, 11, -1, -1, -1, -1, -2, -2)); + TEST_INVALID_REGEX("^(?(DEFINE) abc | xyz ) ", perl|mod_x); + //TEST_INVALID_REGEX("(?(DEFINE) abc){3} xyz", perl|mod_x); + TEST_REGEX_SEARCH("(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))\\b(?&byte)(\\.(?&byte)){3}", perl|mod_x, "1.2.3.4", match_default, make_array(0, 7, -1, -1, 5, 7, -2, -2)); + TEST_REGEX_SEARCH("(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))\\b(?&byte)(\\.(?&byte)){3}", perl|mod_x, "131.111.10.206", match_default, make_array(0, 14, -1, -1, 10, 14, -2, -2)); + TEST_REGEX_SEARCH("(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))\\b(?&byte)(\\.(?&byte)){3}", perl|mod_x, "10.0.0.0", match_default, make_array(0, 8, -1, -1, 6, 8, -2, -2)); + TEST_REGEX_SEARCH("(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))\\b(?&byte)(\\.(?&byte)){3}", perl|mod_x, "10.6", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))\\b(?&byte)(\\.(?&byte)){3}", perl|mod_x, "455.3.4.5", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "1.2.3.4", match_default, make_array(0, 7, 5, 7, -1, -1, -2, -2)); + TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "131.111.10.206", match_default, make_array(0, 14, 10, 14, -1, -1, -2, -2)); + TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "10.0.0.0", match_default, make_array(0, 8, 6, 8, -1, -1, -2, -2)); + TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "10.6", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "455.3.4.5", match_default, make_array(-2, -2)); }