From 55d979060c47a833672f7735ffba3c5f55cc3973 Mon Sep 17 00:00:00 2001 From: John Maddock Date: Thu, 7 May 2009 09:46:51 +0000 Subject: [PATCH] Add support for named sub-expressions. [SVN r52823] --- doc/format_perl_syntax.qbk | 13 +- .../background_information/examples.html | 6 +- .../background_information/history.html | 12 +- .../background_information/locale.html | 8 +- .../background_information/standards.html | 10 +- doc/html/boost_regex/captures.html | 6 +- .../format/boost_format_syntax.html | 8 +- doc/html/boost_regex/format/perl_format.html | 133 +++++++++++++++ doc/html/boost_regex/install.html | 16 +- doc/html/boost_regex/ref/bad_expression.html | 4 +- doc/html/boost_regex/ref/basic_regex.html | 18 +-- .../ref/concepts/traits_concept.html | 4 +- .../deprecated_interfaces/regex_format.html | 2 +- doc/html/boost_regex/ref/error_type.html | 4 +- doc/html/boost_regex/ref/match_flag_type.html | 2 +- doc/html/boost_regex/ref/match_results.html | 113 ++++++++++++- .../ref/non_std_strings/icu/unicode_algo.html | 6 +- .../ref/non_std_strings/icu/unicode_iter.html | 4 +- .../non_std_strings/mfc_strings/mfc_algo.html | 10 +- .../non_std_strings/mfc_strings/mfc_iter.html | 4 +- doc/html/boost_regex/ref/posix.html | 8 +- doc/html/boost_regex/ref/regex_iterator.html | 4 +- doc/html/boost_regex/ref/regex_match.html | 4 +- doc/html/boost_regex/ref/regex_replace.html | 4 +- doc/html/boost_regex/ref/regex_search.html | 4 +- .../boost_regex/ref/regex_token_iterator.html | 4 +- doc/html/boost_regex/ref/regex_traits.html | 2 +- doc/html/boost_regex/ref/sub_match.html | 8 +- .../boost_regex/syntax/basic_extended.html | 66 ++++---- doc/html/boost_regex/syntax/basic_syntax.html | 44 ++--- doc/html/boost_regex/syntax/perl_syntax.html | 129 +++++++++------ doc/html/boost_regex/unicode.html | 4 +- doc/html/index.html | 4 +- doc/match_result.qbk | 95 ++++++++++- doc/syntax_perl.qbk | 19 +++ include/boost/regex/concepts.hpp | 36 +++++ include/boost/regex/v4/basic_regex.hpp | 119 +++++++++++++- include/boost/regex/v4/basic_regex_parser.hpp | 68 +++++++- include/boost/regex/v4/match_results.hpp | 149 ++++++++++++++++- .../boost/regex/v4/perl_matcher_common.hpp | 2 + include/boost/regex/v4/regex_format.hpp | 151 +++++++++++++++++- .../boost/regex/v4/regex_traits_defaults.hpp | 4 +- src/regex_traits_defaults.cpp | 2 +- test/Jamfile.v2 | 4 + .../named_subexpressions_test.cpp | 109 +++++++++++++ test/regress/test_backrefs.cpp | 13 ++ test/regress/test_deprecated.cpp | 4 +- test/regress/test_escapes.cpp | 2 +- test/regress/test_replace.cpp | 48 ++++++ 49 files changed, 1287 insertions(+), 206 deletions(-) create mode 100644 test/named_subexpressions/named_subexpressions_test.cpp diff --git a/doc/format_perl_syntax.qbk b/doc/format_perl_syntax.qbk index cfd57500..63cdbab2 100644 --- a/doc/format_perl_syntax.qbk +++ b/doc/format_perl_syntax.qbk @@ -17,13 +17,24 @@ should be sent to output as follows: [table [[Placeholder][Meaning]] [[$&][Outputs what matched the whole expression.]] -[[$`][Outputs the text between the end of the last match found (or the +[[$MATCH][As $&]] +[[${^MATCH}][As $&]] +[[$\`][Outputs the text between the end of the last match found (or the start of the text if no previous match was found), and the start of the current match.]] +[[$PREMATCH][As $\`]] +[[${^PREMATCH}][As $\`]] [[$'][Outputs all the text following the end of the current match.]] +[[$POSTMATCH][As $']] +[[${^POSTMATCH}][As $']] +[[$+][Outputs what matched the last marked sub-expression in the regular expression.]] +[[$LAST_PAREN_MATCH][As $+]] +[[$LAST_SUBMATCH_RESULT][Outputs what matched the last sub-expression to be actually matched.]] +[[$^N][As $LAST_SUBMATCH_RESULT]] [[$$][Outputs a literal '$']] [[$n][Outputs what matched the n'th sub-expression.]] [[${n}][Outputs what matched the n'th sub-expression.]] +[[$+{NAME}][Outputs whatever matched the sub-expression named "NAME".]] ] Any $-placeholder sequence not listed above, results in '$' being treated diff --git a/doc/html/boost_regex/background_information/examples.html b/doc/html/boost_regex/background_information/examples.html index b5d3b485..d551f231 100644 --- a/doc/html/boost_regex/background_information/examples.html +++ b/doc/html/boost_regex/background_information/examples.html @@ -28,7 +28,7 @@ Example Programs
- + Test Programs
@@ -107,7 +107,7 @@ Files: captures_test.cpp.

- + Example programs
@@ -133,7 +133,7 @@ Files: regex_timer.cpp.

- + Code snippets
diff --git a/doc/html/boost_regex/background_information/history.html b/doc/html/boost_regex/background_information/history.html index f42f7bff..52f8efe2 100644 --- a/doc/html/boost_regex/background_information/history.html +++ b/doc/html/boost_regex/background_information/history.html @@ -26,7 +26,7 @@ History
- + Boost 1.38
@@ -53,7 +53,7 @@
- + Boost 1.34
@@ -76,7 +76,7 @@
- + Boost 1.33.1
@@ -146,7 +146,7 @@
- + Boost 1.33.0
@@ -201,7 +201,7 @@
- + Boost 1.32.1
@@ -209,7 +209,7 @@ Fixed bug in partial matches of bounded repeats of '.'.
- + Boost 1.31.0
diff --git a/doc/html/boost_regex/background_information/locale.html b/doc/html/boost_regex/background_information/locale.html index 62284e82..8d9e50d7 100644 --- a/doc/html/boost_regex/background_information/locale.html +++ b/doc/html/boost_regex/background_information/locale.html @@ -58,7 +58,7 @@ There are three separate localization mechanisms supported by Boost.Regex:

- + Win32 localization model.
@@ -90,7 +90,7 @@ are treated as "unknown" graphic characters.

- + C localization model.
@@ -114,7 +114,7 @@ libraries including version 1 of this library.

- + C++ localization model.
@@ -151,7 +151,7 @@ in your code. The best way to ensure this is to add the #define to <boost/regex/user.hpp>.

- + Providing a message catalogue
diff --git a/doc/html/boost_regex/background_information/standards.html b/doc/html/boost_regex/background_information/standards.html index 437e53e0..a711f234 100644 --- a/doc/html/boost_regex/background_information/standards.html +++ b/doc/html/boost_regex/background_information/standards.html @@ -28,7 +28,7 @@ Conformance
- + C++

@@ -36,7 +36,7 @@ Report on C++ Library Extensions.

- + ECMAScript / JavaScript
@@ -49,7 +49,7 @@ rather than a Unicode escape sequence; use \x{DDDD} for Unicode escape sequences.

- + Perl

@@ -62,7 +62,7 @@ (??{code}) Not implementable in a compiled strongly typed language.

- + POSIX

@@ -82,7 +82,7 @@ a custom traits class.

- + Unicode

diff --git a/doc/html/boost_regex/captures.html b/doc/html/boost_regex/captures.html index c4dcc347..de155209 100644 --- a/doc/html/boost_regex/captures.html +++ b/doc/html/boost_regex/captures.html @@ -35,7 +35,7 @@ accessed.

- + Marked sub-expressions

@@ -218,7 +218,7 @@ output stream.

- + Unmatched Sub-Expressions

@@ -231,7 +231,7 @@ you can determine which sub-expressions matched by accessing the sub_match::matched data member.

- + Repeated Captures

diff --git a/doc/html/boost_regex/format/boost_format_syntax.html b/doc/html/boost_regex/format/boost_format_syntax.html index 809f2205..73b7d8ae 100644 --- a/doc/html/boost_regex/format/boost_format_syntax.html +++ b/doc/html/boost_regex/format/boost_format_syntax.html @@ -32,7 +32,7 @@ '$', '\', '(', ')', '?', and ':'.

- + Grouping

@@ -40,7 +40,7 @@ you want a to output literal parenthesis.

- + Conditionals

@@ -66,7 +66,7 @@ with "bar" otherwise.

- + Placeholder Sequences
@@ -161,7 +161,7 @@ as a literal.

- + Escape Sequences
diff --git a/doc/html/boost_regex/format/perl_format.html b/doc/html/boost_regex/format/perl_format.html index fde101e3..616dd3fb 100644 --- a/doc/html/boost_regex/format/perl_format.html +++ b/doc/html/boost_regex/format/perl_format.html @@ -65,6 +65,30 @@ + +

+ $MATCH +

+ + +

+ As $& +

+ + + + +

+ ${^MATCH} +

+ + +

+ As $& +

+ + +

$` @@ -79,6 +103,30 @@ + +

+ $PREMATCH +

+ + +

+ As $` +

+ + + + +

+ ${^PREMATCH} +

+ + +

+ As $` +

+ + +

$' @@ -91,6 +139,79 @@ + +

+ $POSTMATCH +

+ + +

+ As $' +

+ + + + +

+ ${^POSTMATCH} +

+ + +

+ As $' +

+ + + + +

+ $+ +

+ + +

+ Outputs what matched the last marked sub-expression in the regular + expression. +

+ + + + +

+ $LAST_PAREN_MATCH +

+ + +

+ As $+ +

+ + + + +

+ $LAST_SUBMATCH_RESULT +

+ + +

+ Outputs what matched the last sub-expression to be actually matched. +

+ + + + +

+ $^N +

+ + +

+ As $LAST_SUBMATCH_RESULT +

+ + +

$$ @@ -126,6 +247,18 @@

+ + +

+ $+{NAME} +

+ + +

+ Outputs whatever matched the sub-expression named "NAME". +

+ +

diff --git a/doc/html/boost_regex/install.html b/doc/html/boost_regex/install.html index 05d2a54d..b8aeb72e 100644 --- a/doc/html/boost_regex/install.html +++ b/doc/html/boost_regex/install.html @@ -49,7 +49,7 @@ file before you can use it, instructions for specific platforms are as follows:

- + Building with bjam

@@ -58,7 +58,7 @@ started guide for more information.

- + Building With Unicode and ICU Support
@@ -96,11 +96,11 @@ ICU you are using is binary compatible with the toolset you use to build Boost.

- + Building via makefiles
- + Borland C++ Builder:
- + GCC(2.95 and later)

@@ -302,7 +302,7 @@ see the config library documentation.

- + Sun Workshop 6.1

@@ -347,7 +347,7 @@ will build v9 variants of the regex library named libboost_regex_v9.a etc.

- + Makefiles for Other compilers
diff --git a/doc/html/boost_regex/ref/bad_expression.html b/doc/html/boost_regex/ref/bad_expression.html index 1d079e4f..e81b591d 100644 --- a/doc/html/boost_regex/ref/bad_expression.html +++ b/doc/html/boost_regex/ref/bad_expression.html @@ -27,7 +27,7 @@ bad_expression
- + Synopsis
#include <boost/pattern_except.hpp>
@@ -54,7 +54,7 @@
 } // namespace boost
 
- + Description
regex_error(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos);
diff --git a/doc/html/boost_regex/ref/basic_regex.html b/doc/html/boost_regex/ref/basic_regex.html
index a0c13601..6b6340eb 100644
--- a/doc/html/boost_regex/ref/basic_regex.html
+++ b/doc/html/boost_regex/ref/basic_regex.html
@@ -27,7 +27,7 @@
  basic_regex
 
 
- + Synopsis
#include <boost/regex.hpp>
@@ -244,7 +244,7 @@
 } // namespace boost
 
- + Description

@@ -327,7 +327,7 @@ basic_regex.

-

Table 1. basic_regex default construction postconditions

+

Table 1. basic_regex default construction postconditions

@@ -407,7 +407,7 @@ flags specified in f.

-

Table 2. Postconditions for basic_regex construction

+

Table 2. Postconditions for basic_regex construction

@@ -512,7 +512,7 @@ specified in f.

-

Table 3. Postconditions for basic_regex construction

+

Table 3. Postconditions for basic_regex construction

@@ -616,7 +616,7 @@ according the option flags specified in f.

-

Table 4. Postconditions for basic_regex construction

+

Table 4. Postconditions for basic_regex construction

@@ -727,7 +727,7 @@ flags specified in f.

-

Table 5. Postconditions for basic_regex construction

+

Table 5. Postconditions for basic_regex construction

@@ -829,7 +829,7 @@ flags specified in f.

-

Table 6. Postconditions for basic_regex construction

+

Table 6. Postconditions for basic_regex construction

@@ -1043,7 +1043,7 @@ in f.

-

Table 7. Postconditions for basic_regex::assign

+

Table 7. Postconditions for basic_regex::assign

diff --git a/doc/html/boost_regex/ref/concepts/traits_concept.html b/doc/html/boost_regex/ref/concepts/traits_concept.html index fe4b0274..224279f6 100644 --- a/doc/html/boost_regex/ref/concepts/traits_concept.html +++ b/doc/html/boost_regex/ref/concepts/traits_concept.html @@ -34,7 +34,7 @@ Boost-specific enhanced interface.

- + Minimal requirements.
@@ -381,7 +381,7 @@
- + Additional Optional Requirements
diff --git a/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html b/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html index 70fae8b1..d613605c 100644 --- a/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html +++ b/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html @@ -34,7 +34,7 @@ previous version of Boost.Regex and will not be further updated:

- + Algorithm regex_format
diff --git a/doc/html/boost_regex/ref/error_type.html b/doc/html/boost_regex/ref/error_type.html index 7fb8bbe7..4c8d7a3f 100644 --- a/doc/html/boost_regex/ref/error_type.html +++ b/doc/html/boost_regex/ref/error_type.html @@ -27,7 +27,7 @@ error_type
- + Synopsis

@@ -57,7 +57,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/match_flag_type.html b/doc/html/boost_regex/ref/match_flag_type.html index dfbdfdc1..d515cb48 100644 --- a/doc/html/boost_regex/ref/match_flag_type.html +++ b/doc/html/boost_regex/ref/match_flag_type.html @@ -69,7 +69,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/match_results.html b/doc/html/boost_regex/ref/match_results.html index 7961ad1e..7b674765 100644 --- a/doc/html/boost_regex/ref/match_results.html +++ b/doc/html/boost_regex/ref/match_results.html @@ -27,7 +27,7 @@ match_results

- + Synopsis
#include <boost/regex.hpp>
@@ -98,9 +98,33 @@
    bool empty() const;
    // element access:
    difference_type length(int sub = 0) const;
+   difference_type length(const char_type* sub) const;
+   template <class charT>
+   difference_type length(const charT* sub) const;
+   template <class charT, class Traits, class A>
+   difference_type length(const std::basic_string<charT, Traits, A>& sub) const;
    difference_type position(unsigned int sub = 0) const;
+   difference_type position(const char_type* sub) const;
+   template <class charT>
+   difference_type position(const charT* sub) const;
+   template <class charT, class Traits, class A>
+   difference_type position(const std::basic_string<charT, Traits, A>& sub) const;
    string_type str(int sub = 0) const;
+   string_type str(const char_type* sub)const;
+   template <class Traits, class A>
+   string_type str(const std::basic_string<char_type, Traits, A>& sub)const;
+   template <class charT>
+   string_type str(const charT* sub)const;
+   template <class charT, class Traits, class A>
+   string_type str(const std::basic_string<charT, Traits, A>& sub)const;
    const_reference operator[](int n) const;
+   const_reference operator[](const char_type* n) const;
+   template <class Traits, class A>
+   const_reference operator[](const std::basic_string<char_type, Traits, A>& n) const;
+   template <class charT>
+   const_reference operator[](const charT* n) const;
+   template <class charT, class Traits, class A>
+   const_reference operator[](const std::basic_string<charT, Traits, A>& n) const;
 
    const_reference prefix() const;
 
@@ -142,7 +166,7 @@
          match_results<BidirectionalIterator, Allocator>& m2);
 
- + Description

@@ -375,14 +399,39 @@

difference_type length(int sub = 0)const;
+difference_type length(const char_type* sub)const;
+template <class charT>
+difference_type length(const charT* sub)const;
+template <class charT, class Traits, class A>
+difference_type length(const std::basic_string<charT, Traits, A>&)const;
 

Effects: Returns the length of sub-expression sub, that is to say: (*this)[sub].length().

+

+ The overloads that accept a string refer to a named sub-expression n. + In the event that there is no such named sub-expression then returns an empty + string. +

+

+ The template overloads of this function, allow the string and/or character + type to be different from the character type of the underlying sequence and/or + regular expression: in this case the characters will be widened to the underlying + character type of the original regular expression. A compiler error will + occur if the argument passes a wider character type than the underlying sequence. + These overloads allow a normal narrow character C string literal to be used + as an argument, even when the underlying character type of the expression + being matched may be something more exotic such as a Unicode character type. +

difference_type position(unsigned int sub = 0)const;
+difference_type position(const char_type* sub)const;
+template <class charT>
+difference_type position(const charT* sub)const;
+template <class charT, class Traits, class A>
+difference_type position(const std::basic_string<charT, Traits, A>&)const;
 

Effects: Returns the starting location of @@ -391,17 +440,61 @@ will return the location of the partial match even though (*this)[0].matched is false.

+

+ The overloads that accept a string refer to a named sub-expression n. + In the event that there is no such named sub-expression then returns an empty + string. +

+

+ The template overloads of this function, allow the string and/or character + type to be different from the character type of the underlying sequence and/or + regular expression: in this case the characters will be widened to the underlying + character type of the original regular expression. A compiler error will + occur if the argument passes a wider character type than the underlying sequence. + These overloads allow a normal narrow character C string literal to be used + as an argument, even when the underlying character type of the expression + being matched may be something more exotic such as a Unicode character type. +

string_type str(int sub = 0)const;
+string_type str(const char_type* sub)const;
+template <class Traits, class A>
+string_type str(const std::basic_string<char_type, Traits, A>& sub)const;
+template <class charT>
+string_type str(const charT* sub)const;
+template <class charT, class Traits, class A>
+string_type str(const std::basic_string<charT, Traits, A>& sub)const;
 

Effects: Returns sub-expression sub as a string: string_type((*this)[sub]).

+

+ The overloads that accept a string, return the string that matched the named + sub-expression n. In the event that there is no such + named sub-expression then returns an empty string. +

+

+ The template overloads of this function, allow the string and/or character + type to be different from the character type of the underlying sequence and/or + regular expression: in this case the characters will be widened to the underlying + character type of the original regular expression. A compiler error will + occur if the argument passes a wider character type than the underlying sequence. + These overloads allow a normal narrow character C string literal to be used + as an argument, even when the underlying character type of the expression + being matched may be something more exotic such as a Unicode character type. +

const_reference operator[](int n) const;
+const_reference operator[](const char_type* n) const;
+template <class Traits, class A>
+const_reference operator[](const std::basic_string<char_type, Traits, A>& n) const;
+template <class charT>
+const_reference operator[](const charT* n) const;
+template <class charT, class Traits, class A>
+const_reference operator[](const std::basic_string<charT, Traits, A>& n) const;
 

Effects: Returns a reference to the sub_match @@ -413,6 +506,22 @@ then returns a sub_match object whose matched member is false.

+

+ The overloads that accept a string, return a reference to the sub_match object representing the + character sequence that matched the named sub-expression n. + In the event that there is no such named sub-expression then returns a sub_match + object whose matched member is false. +

+

+ The template overloads of this function, allow the string and/or character + type to be different from the character type of the underlying sequence and/or + regular expression: in this case the characters will be widened to the underlying + character type of the original regular expression. A compiler error will + occur if the argument passes a wider character type than the underlying sequence. + These overloads allow a normal narrow character C string literal to be used + as an argument, even when the underlying character type of the expression + being matched may be something more exotic such as a Unicode character type. +

const_reference prefix()const;
diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
index f7379cd2..732889c4 100644
--- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
+++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
@@ -43,7 +43,7 @@
             on to the "real" algorithm.
           

- + u32regex_match

@@ -89,7 +89,7 @@ }

- + u32regex_search

@@ -128,7 +128,7 @@ }

- + u32regex_replace

diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html index 6f432bd3..6c781423 100644 --- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html +++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html @@ -28,7 +28,7 @@ Unicode Aware Regex Iterators

- + u32regex_iterator

@@ -126,7 +126,7 @@ Provided of course that the input is encoded as UTF-8.

- + u32regex_token_iterator

diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html index 49195299..11f9d1ea 100644 --- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html +++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html @@ -34,7 +34,7 @@ here they are anyway:

- + regex_match

@@ -82,7 +82,7 @@ }

- + regex_match (second overload)
@@ -110,7 +110,7 @@ }
- + regex_search

@@ -149,7 +149,7 @@ }

- + regex_search (second overload)
@@ -164,7 +164,7 @@ + s.GetLength(), e, f);

- + regex_replace

diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html index 8286c553..6f3d7ddf 100644 --- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html +++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html @@ -32,7 +32,7 @@ an MFC/ATL string to a regex_iterator or regex_token_iterator:

- + regex_iterator creation helper
@@ -68,7 +68,7 @@ }
- + regex_token_iterator creation helpers
diff --git a/doc/html/boost_regex/ref/posix.html b/doc/html/boost_regex/ref/posix.html index 44ead2e9..b173b48e 100644 --- a/doc/html/boost_regex/ref/posix.html +++ b/doc/html/boost_regex/ref/posix.html @@ -165,7 +165,7 @@

- + regcomp

@@ -379,7 +379,7 @@

- + regerror

@@ -467,7 +467,7 @@

- + regexec

@@ -537,7 +537,7 @@

- + regfree

diff --git a/doc/html/boost_regex/ref/regex_iterator.html b/doc/html/boost_regex/ref/regex_iterator.html index ba71a9af..17494ce8 100644 --- a/doc/html/boost_regex/ref/regex_iterator.html +++ b/doc/html/boost_regex/ref/regex_iterator.html @@ -78,7 +78,7 @@ regex_constants::match_flag_type m = regex_constants::match_default);

- + Description

@@ -436,7 +436,7 @@ m.

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_match.html b/doc/html/boost_regex/ref/regex_match.html index 5ce57f3c..9c4413f1 100644 --- a/doc/html/boost_regex/ref/regex_match.html +++ b/doc/html/boost_regex/ref/regex_match.html @@ -80,7 +80,7 @@ match_flag_type flags = match_default);

- + Description
template <class BidirectionalIterator, class Allocator, class charT, class traits>
@@ -360,7 +360,7 @@
         Effects: Returns the result of regex_match(s.begin(), s.end(), e, flags).
       

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_replace.html b/doc/html/boost_regex/ref/regex_replace.html index f25d7aab..5019aeac 100644 --- a/doc/html/boost_regex/ref/regex_replace.html +++ b/doc/html/boost_regex/ref/regex_replace.html @@ -53,7 +53,7 @@ match_flag_type flags = match_default);

- + Description
template <class OutputIterator, class BidirectionalIterator, class traits, class charT>
@@ -163,7 +163,7 @@
         and then returns result.
       

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_search.html b/doc/html/boost_regex/ref/regex_search.html index cefe95b9..bcf9ce4f 100644 --- a/doc/html/boost_regex/ref/regex_search.html +++ b/doc/html/boost_regex/ref/regex_search.html @@ -73,7 +73,7 @@ match_flag_type flags = match_default);

- + Description
template <class BidirectionalIterator, class Allocator, class charT, class traits>
@@ -355,7 +355,7 @@
         Effects: Returns the result of regex_search(s.begin(), s.end(), e, flags).
       

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_token_iterator.html b/doc/html/boost_regex/ref/regex_token_iterator.html index fbd5e0e5..58683e16 100644 --- a/doc/html/boost_regex/ref/regex_token_iterator.html +++ b/doc/html/boost_regex/ref/regex_token_iterator.html @@ -136,7 +136,7 @@ regex_constants::match_flag_type m = regex_constants::match_default);

- + Description

@@ -383,7 +383,7 @@ m.

- + Examples

diff --git a/doc/html/boost_regex/ref/regex_traits.html b/doc/html/boost_regex/ref/regex_traits.html index 16ac10d0..62cded54 100644 --- a/doc/html/boost_regex/ref/regex_traits.html +++ b/doc/html/boost_regex/ref/regex_traits.html @@ -46,7 +46,7 @@ } // namespace boost

- + Description

diff --git a/doc/html/boost_regex/ref/sub_match.html b/doc/html/boost_regex/ref/sub_match.html index 18629564..db25820f 100644 --- a/doc/html/boost_regex/ref/sub_match.html +++ b/doc/html/boost_regex/ref/sub_match.html @@ -329,11 +329,11 @@ } // namespace boost

- + Description
- + Members

@@ -473,7 +473,7 @@

- + sub_match non-member operators
@@ -1008,7 +1008,7 @@ + m2.str().

- + Stream inserter

diff --git a/doc/html/boost_regex/syntax/basic_extended.html b/doc/html/boost_regex/syntax/basic_extended.html index 6f13adaa..389a5933 100644 --- a/doc/html/boost_regex/syntax/basic_extended.html +++ b/doc/html/boost_regex/syntax/basic_extended.html @@ -28,7 +28,7 @@ Expression Syntax

- + Synopsis

@@ -46,7 +46,7 @@

- + POSIX Extended Syntax

@@ -56,7 +56,7 @@

.[{()\*+?|^$
- + Wildcard:

@@ -74,7 +74,7 @@

- + Anchors:

@@ -86,7 +86,7 @@ of an expression, or the last character of a sub-expression.

- + Marked sub-expressions:
@@ -98,7 +98,7 @@ to by a back-reference.

- + Repeats:

@@ -184,7 +184,7 @@ cab operator to be applied to.

- + Back references:

@@ -214,7 +214,7 @@ cab

- + Alternation

@@ -227,7 +227,7 @@ cab will match either of "abd" or "abef".

- + Character sets:
@@ -240,7 +240,7 @@ cab A bracket expression may contain any combination of the following:

- + Single characters:
@@ -249,7 +249,7 @@ cab or 'c'.

- + Character ranges:
@@ -265,7 +265,7 @@ cab the code points of the characters only.

- + Negation:

@@ -274,7 +274,7 @@ cab range a-c.

- + Character classes:
@@ -284,7 +284,7 @@ cab character class names.

- + Collating Elements:
@@ -312,7 +312,7 @@ cab matches a NUL character.

- + Equivalence classes:
@@ -329,7 +329,7 @@ cab or even all locales on one platform.

- + Combinations:

@@ -337,7 +337,7 @@ cab [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -363,7 +363,7 @@ cab extensions are also supported by Boost.Regex:

- + Escapes matching a specific character
@@ -552,7 +552,7 @@ cab
- + "Single character" character classes:
@@ -706,7 +706,7 @@ cab
- + Character Properties
@@ -813,7 +813,7 @@ cab matches any "digit" character, as does \p{digit}.

- + Word Boundaries

@@ -888,7 +888,7 @@ cab

- + Buffer boundaries
@@ -979,7 +979,7 @@ cab
- + Continuation Escape
@@ -991,7 +991,7 @@ cab match to start where the last one ended.

- + Quoting escape
@@ -1005,7 +1005,7 @@ cab \*+aaa
- + Unicode escapes
@@ -1056,7 +1056,7 @@ cab
- + Any other escape
@@ -1065,7 +1065,7 @@ cab \@ matches a literal '@'.

- + Operator precedence
@@ -1101,7 +1101,7 @@ cab
- + What Gets Matched
@@ -1111,11 +1111,11 @@ cab rule.

- + Variations

- + Egrep

@@ -1136,7 +1136,7 @@ cab used with the -E option.

- + awk

@@ -1150,7 +1150,7 @@ cab these by default anyway.

- + Options

@@ -1163,7 +1163,7 @@ cab modify how the case and locale sensitivity are to be applied.

- + References

diff --git a/doc/html/boost_regex/syntax/basic_syntax.html b/doc/html/boost_regex/syntax/basic_syntax.html index 8de6b127..6a6f32d9 100644 --- a/doc/html/boost_regex/syntax/basic_syntax.html +++ b/doc/html/boost_regex/syntax/basic_syntax.html @@ -28,7 +28,7 @@ Expression Syntax

- + Synopsis

@@ -45,7 +45,7 @@

- + POSIX Basic Syntax

@@ -55,7 +55,7 @@

.[\*^$
- + Wildcard:

@@ -73,7 +73,7 @@

- + Anchors:

@@ -85,7 +85,7 @@ of an expression, or the last character of a sub-expression.

- + Marked sub-expressions:
@@ -97,7 +97,7 @@ by a back-reference.

- + Repeats:

@@ -155,7 +155,7 @@ aaaa to.

- + Back references:

@@ -173,7 +173,7 @@ aaaa

aaabba
- + Character sets:
@@ -186,7 +186,7 @@ aaaa A bracket expression may contain any combination of the following:

- + Single characters:
@@ -195,7 +195,7 @@ aaaa or 'c'.

- + Character ranges:
@@ -211,7 +211,7 @@ aaaa of the characters only.

- + Negation:

@@ -220,7 +220,7 @@ aaaa range a-c.

- + Character classes:
@@ -230,7 +230,7 @@ aaaa character class names.

- + Collating Elements:
@@ -259,7 +259,7 @@ aaaa element names.

- + Equivalence classes:
@@ -276,7 +276,7 @@ aaaa or even all locales on one platform.

- + Combinations:

@@ -284,7 +284,7 @@ aaaa [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -299,7 +299,7 @@ aaaa will match either a literal '\' or a '^'.

- + What Gets Matched

@@ -309,13 +309,13 @@ aaaa rule.

- + Variations

- + Grep

@@ -333,7 +333,7 @@ aaaa As its name suggests, this behavior is consistent with the Unix utility grep.

- + emacs

@@ -613,7 +613,7 @@ aaaa leftmost-longest rule.

- + Options

@@ -627,7 +627,7 @@ aaaa options modify how the case and locale sensitivity are to be applied.

- + References

diff --git a/doc/html/boost_regex/syntax/perl_syntax.html b/doc/html/boost_regex/syntax/perl_syntax.html index c97738cf..add2ff1c 100644 --- a/doc/html/boost_regex/syntax/perl_syntax.html +++ b/doc/html/boost_regex/syntax/perl_syntax.html @@ -28,7 +28,7 @@ Syntax

- + Synopsis

@@ -43,7 +43,7 @@ boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);

- + Perl Regular Expression Syntax

@@ -53,7 +53,7 @@

.[{()\*+?|^$
- + Wildcard

@@ -73,7 +73,7 @@

- + Anchors

@@ -83,7 +83,7 @@ A '$' character shall match the end of a line.

- + Marked sub-expressions
@@ -94,7 +94,7 @@ can also repeated, or referred to by a back-reference.

- + Non-marking grouping
@@ -107,7 +107,7 @@ without splitting out any separate sub-expressions.

- + Repeats

@@ -188,7 +188,7 @@ to be applied to.

- + Non greedy repeats
@@ -218,7 +218,7 @@ while consuming as little input as possible.

- + Pocessive repeats
@@ -250,7 +250,7 @@ while giving nothing back.

- + Back references

@@ -340,10 +340,27 @@

+ + +

+ \g{one} +

+ + +

+ Match whatever matched the sub-expression named "one" +

+ + +

+ Finally the \k escape can be used to refer to named subexpressions, for example + \k<two> will match whatever matched the subexpression + named "two". +

- + Alternation

@@ -370,7 +387,7 @@ (?:abc)?? has exactly the same effect.

- + Character sets

@@ -382,7 +399,7 @@ A bracket expression may contain any combination of the following:

- + Single characters

@@ -390,7 +407,7 @@ 'b', or 'c'.

- + Character ranges
@@ -404,7 +421,7 @@ sensitive.

- + Negation

@@ -413,7 +430,7 @@ matches any character that is not in the range a-c.

- + Character classes
@@ -424,7 +441,7 @@ class names.

- + Collating Elements
@@ -446,7 +463,7 @@ matches a \0 character.

- + Equivalence classes
@@ -463,7 +480,7 @@ or even all locales on one platform.

- + Escaped Characters
@@ -475,7 +492,7 @@ is not a "word" character.

- + Combinations

@@ -483,7 +500,7 @@ [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -675,7 +692,7 @@

- + "Single character" character classes:
@@ -877,7 +894,7 @@
- + Character Properties
@@ -985,7 +1002,7 @@ as does \p{digit}.

- + Word Boundaries

@@ -1004,7 +1021,7 @@ \B Matches only when not at a word boundary.

- + Buffer boundaries

@@ -1029,7 +1046,7 @@ to the regular expression \n*\z

- + Continuation Escape
@@ -1041,7 +1058,7 @@ one ended.

- + Quoting escape

@@ -1054,7 +1071,7 @@ \*+aaa

- + Unicode escapes

@@ -1064,7 +1081,7 @@ followed by a sequence of zero or more combining characters.

- + Matching Line Endings
@@ -1073,7 +1090,7 @@ sequence, specifically it is identical to the expression (?>\x0D\x0A?|[\x0A-\x0C\x85\x{2028}\x{2029}]).

- + Keeping back some text
@@ -1088,7 +1105,7 @@ This can be used to simulate variable width lookbehind assertions.

- + Any other escape
@@ -1097,7 +1114,7 @@ \@ matches a literal '@'.

- + Perl Extended Patterns
@@ -1105,15 +1122,37 @@ Perl-specific extensions to the regular expression syntax all start with (?.

+
+ + Named + Subexpressions +
+

+ You can create a named subexpression using: +

+
(?<NAME>expression)
+
+

+ Which can be then be refered to by the name NAME. Alternatively + you can delimit the name using 'NAME' as in: +

+
(?'NAME'expression)
+
+

+ These named subexpressions can be refered to in a backreference using either + \g{NAME} or \k<NAME> and can + also be refered to by name in a Perl + format string for search and replace operations, or in the match_results member functions. +

- + Comments

(?# ... ) is treated as a comment, it's contents are ignored.

- + Modifiers

@@ -1127,7 +1166,7 @@ pattern only.

- + Non-marking groups
@@ -1136,7 +1175,7 @@ an additional sub-expression.

- + Lookahead

@@ -1159,7 +1198,7 @@ could be used to validate the password.

- + Lookbehind

@@ -1173,7 +1212,7 @@ (pattern must be of fixed length).

- + Independent sub-expressions
@@ -1186,7 +1225,7 @@ no match is found at all.

- + Conditional Expressions
@@ -1205,7 +1244,7 @@ sub-expression has been matched).

- + Operator precedence
@@ -1240,7 +1279,7 @@

- + What gets matched

@@ -1415,7 +1454,7 @@

- + Variations

@@ -1424,7 +1463,7 @@ and JScript are all synonyms for perl.

- + Options

@@ -1436,7 +1475,7 @@ are to be applied.

- + Pattern Modifiers

@@ -1448,7 +1487,7 @@ and no_mod_s.

- + References

diff --git a/doc/html/boost_regex/unicode.html b/doc/html/boost_regex/unicode.html index a21286ca..b3ceab35 100644 --- a/doc/html/boost_regex/unicode.html +++ b/doc/html/boost_regex/unicode.html @@ -30,7 +30,7 @@ There are two ways to use Boost.Regex with Unicode strings:

- + Rely on wchar_t

@@ -56,7 +56,7 @@

- + Use a Unicode Aware Regular Expression Type.
diff --git a/doc/html/index.html b/doc/html/index.html index 6945b093..af141e1e 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -28,7 +28,7 @@
-

+

Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

@@ -196,7 +196,7 @@

- +

Last revised: April 25, 2009 at 17:20:04 GMT

Last revised: May 06, 2009 at 16:25:16 GMT


diff --git a/doc/match_result.qbk b/doc/match_result.qbk index eb3861fd..8e1ae788 100644 --- a/doc/match_result.qbk +++ b/doc/match_result.qbk @@ -71,9 +71,33 @@ Class template `match_results` is most commonly used as one of the typedefs bool ``[link boost_regex.match_results.empty empty]``() const; // element access: difference_type ``[link boost_regex.match_results.length length]``(int sub = 0) const; + difference_type ``[link boost_regex.match_results.length length]``(const char_type* sub) const; + template + difference_type ``[link boost_regex.match_results.length length]``(const charT* sub) const; + template + difference_type ``[link boost_regex.match_results.length length]``(const std::basic_string& sub) const; difference_type ``[link boost_regex.match_results.position position]``(unsigned int sub = 0) const; + difference_type ``[link boost_regex.match_results.position position]``(const char_type* sub) const; + template + difference_type ``[link boost_regex.match_results.position position]``(const charT* sub) const; + template + difference_type ``[link boost_regex.match_results.position position]``(const std::basic_string& sub) const; string_type ``[link boost_regex.match_results.str str]``(int sub = 0) const; + string_type ``[link boost_regex.match_results.str str]``(const char_type* sub)const; + template + string_type ``[link boost_regex.match_results.str str]``(const std::basic_string& sub)const; + template + string_type ``[link boost_regex.match_results.str str]``(const charT* sub)const; + template + string_type ``[link boost_regex.match_results.str str]``(const std::basic_string& sub)const; const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(int n) const; + const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(const char_type* n) const; + template + const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(const std::basic_string& n) const; + template + const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(const charT* n) const; + template + const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(const std::basic_string& n) const; const_reference ``[link boost_regex.match_results.prefix prefix]``() const; @@ -190,30 +214,86 @@ stored in *this. [#boost_regex.match_results.length] difference_type length(int sub = 0)const; + difference_type length(const char_type* sub)const; + template + difference_type length(const charT* sub)const; + template + difference_type length(const std::basic_string&)const; [*Effects]: Returns the length of sub-expression /sub/, that is to say: `(*this)[sub].length()`. +The overloads that accept a string refer to a named sub-expression /n/. +In the event that there is no such named sub-expression then returns an empty string. + +The template overloads of this function, allow the string and\/or character type +to be different from the character type of the underlying sequence and\/or regular expression: +in this case the characters will be widened to the underlying character type of the original regular expression. +A compiler error will occur if the argument passes a wider character type than the underlying sequence. +These overloads allow a normal narrow character C string literal to be used as an argument, even when +the underlying character type of the expression being matched may be something more exotic such as a +Unicode character type. [#boost_regex.match_results.position] difference_type position(unsigned int sub = 0)const; + difference_type position(const char_type* sub)const; + template + difference_type position(const charT* sub)const; + template + difference_type position(const std::basic_string&)const; [*Effects]: Returns the starting location of sub-expression /sub/, or -1 if /sub/ was not matched. Note that if this represents a partial match , then `position()` will return the location of the partial match even though `(*this)[0].matched` is false. +The overloads that accept a string refer to a named sub-expression /n/. +In the event that there is no such named sub-expression then returns an empty string. + +The template overloads of this function, allow the string and\/or character type +to be different from the character type of the underlying sequence and\/or regular expression: +in this case the characters will be widened to the underlying character type of the original regular expression. +A compiler error will occur if the argument passes a wider character type than the underlying sequence. +These overloads allow a normal narrow character C string literal to be used as an argument, even when +the underlying character type of the expression being matched may be something more exotic such as a +Unicode character type. + [#boost_regex.match_results.str] string_type str(int sub = 0)const; + string_type str(const char_type* sub)const; + template + string_type str(const std::basic_string& sub)const; + template + string_type str(const charT* sub)const; + template + string_type str(const std::basic_string& sub)const; [*Effects]: Returns sub-expression /sub/ as a string: `string_type((*this)[sub])`. +The overloads that accept a string, return the string that matched the named sub-expression /n/. +In the event that there is no such named sub-expression then returns an empty string. + +The template overloads of this function, allow the string and\/or character type +to be different from the character type of the underlying sequence and\/or regular expression: +in this case the characters will be widened to the underlying character type of the original regular expression. +A compiler error will occur if the argument passes a wider character type than the underlying sequence. +These overloads allow a normal narrow character C string literal to be used as an argument, even when +the underlying character type of the expression being matched may be something more exotic such as a +Unicode character type. + [#boost_regex.match_results.subscript] - const_reference operator[](int n) const; + const_reference operator[](int n) const; + const_reference operator[](const char_type* n) const; + template + const_reference operator[](const std::basic_string& n) const; + template + const_reference operator[](const charT* n) const; + template + const_reference operator[](const std::basic_string& n) const; [*Effects]: Returns a reference to the [sub_match] object representing the character sequence that matched marked sub-expression /n/. If `n == 0` then returns a @@ -222,6 +302,19 @@ matched the whole regular expression. If /n/ is out of range, or if /n/ is an unmatched sub-expression, then returns a [sub_match] object whose matched member is false. +The overloads that accept a string, return a reference to the [sub_match] +object representing the character sequence that matched the named sub-expression /n/. +In the event that there is no such named sub-expression then returns a [sub_match] object whose matched +member is false. + +The template overloads of this function, allow the string and\/or character type +to be different from the character type of the underlying sequence and\/or regular expression: +in this case the characters will be widened to the underlying character type of the original regular expression. +A compiler error will occur if the argument passes a wider character type than the underlying sequence. +These overloads allow a normal narrow character C string literal to be used as an argument, even when +the underlying character type of the expression being matched may be something more exotic such as a +Unicode character type. + [#boost_regex.match_results.prefix] diff --git a/doc/syntax_perl.qbk b/doc/syntax_perl.qbk index a67bc771..ff14c703 100644 --- a/doc/syntax_perl.qbk +++ b/doc/syntax_perl.qbk @@ -185,8 +185,12 @@ You can also use the \g escape for the same function, for example: parsing of the expression in cases like =\g{1}2= or for indexes higher than 9 as in =\g{1234}=]] [[=\g-1=][Match whatever matched the last opened sub-expression]] [[=\g{-2}=][Match whatever matched the last but one opened sub-expression]] +[[=\g{one}=][Match whatever matched the sub-expression named "one"]] ] +Finally the \k escape can be used to refer to named subexpressions, for example [^\k] will match +whatever matched the subexpression named "two". + [h4 Alternation] The =|= operator will match either of its arguments, so for example: @@ -425,6 +429,21 @@ Any other escape sequence matches the character that is escaped, for example Perl-specific extensions to the regular expression syntax all start with =(?=. +[h5 Named Subexpressions] + +You can create a named subexpression using: + + (?expression) + +Which can be then be refered to by the name /NAME/. Alternatively you can delimit the name +using 'NAME' as in: + + (?'NAME'expression) + +These named subexpressions can be refered to in a backreference using either [^\g{NAME}] or [^\k] +and can also be refered to by name in a [perl_format] format string for search and replace operations, or in the +[match_results] member functions. + [h5 Comments] =(?# ... )= is treated as a comment, it's contents are ignored. diff --git a/include/boost/regex/concepts.hpp b/include/boost/regex/concepts.hpp index 0a22aebd..98fd5941 100644 --- a/include/boost/regex/concepts.hpp +++ b/include/boost/regex/concepts.hpp @@ -844,6 +844,42 @@ struct BoostRegexConcept m_string = m_char + m_sub; ignore_unused_variable_warning(m_string); + // Named sub-expressions: + m_sub = m_cresults[&m_char]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[m_string]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[""]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[std::string("")]; + ignore_unused_variable_warning(m_sub); + m_string = m_cresults.str(&m_char); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(m_string); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(""); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(std::string("")); + ignore_unused_variable_warning(m_string); + + typename match_results_type::difference_type diff; + diff = m_cresults.length(&m_char); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(m_string); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(""); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(std::string("")); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(&m_char); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(m_string); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(""); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(std::string("")); + ignore_unused_variable_warning(diff); + #ifndef BOOST_NO_STD_LOCALE m_stream << m_sub; m_stream << m_cresults; diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp index cb9ff3c5..aed79528 100644 --- a/include/boost/regex/v4/basic_regex.hpp +++ b/include/boost/regex/v4/basic_regex.hpp @@ -19,6 +19,8 @@ #ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP #define BOOST_REGEX_V4_BASIC_REGEX_HPP +#include + #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) @@ -44,12 +46,123 @@ namespace re_detail{ template class basic_regex_parser; +template +void bubble_down_one(I first, I last) +{ + if(first != last) + { + I next = last - 1; + while((next != first) && !(*(next-1) < *next)) + { + (next-1)->swap(*next); + --next; + } + } +} + +// +// Class named_subexpressions +// Contains information about named subexpressions within the regex. +// +template +class named_subexpressions_base +{ +public: + virtual int get_id(const charT* i, const charT* j) = 0; +}; + +template +class named_subexpressions : public named_subexpressions_base +{ + struct name + { + name(const charT* i, const charT* j, int idx) + : n(i, j), index(idx) {} + std::vector n; + int index; + bool operator < (const name& other)const + { + return std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end()); + } + bool operator == (const name& other)const + { + return n == other.n; + } + void swap(name& other) + { + n.swap(other.n); + std::swap(index, other.index); + } + }; +public: + named_subexpressions(){} + void set_name(const charT* i, const charT* j, int index) + { + m_sub_names.push_back(name(i, j, index)); + bubble_down_one(m_sub_names.begin(), m_sub_names.end()); + } + int get_id(const charT* i, const charT* j) + { + name t(i, j, 0); + typename std::vector::const_iterator pos = lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } +private: + std::vector m_sub_names; +}; + +template +class named_subexpressions_converter : public named_subexpressions_base +{ + boost::shared_ptr > m_converter; +public: + named_subexpressions_converter(boost::shared_ptr > s) + : m_converter(s) {} + virtual int get_id(const charT* i, const charT* j) + { + if(i == j) + return -1; + std::vector v; + while(i != j) + { + v.push_back(*i); + ++i; + } + return m_converter->get_id(&v[0], &v[0] + v.size()); + } +}; + +template +inline boost::shared_ptr > convert_to_named_subs_imp( + boost::shared_ptr > s, + boost::integral_constant const&) +{ + return s; +} +template +inline boost::shared_ptr > convert_to_named_subs_imp( + boost::shared_ptr > s, + boost::integral_constant const&) +{ + return boost::shared_ptr >(new named_subexpressions_converter(s)); +} +template +inline boost::shared_ptr > convert_to_named_subs( + boost::shared_ptr > s) +{ + typedef typename boost::is_same::type tag_type; + return convert_to_named_subs_imp(s, tag_type()); +} // // class regex_data: // represents the data we wish to expose to the matching algorithms. // template -struct regex_data +struct regex_data : public named_subexpressions { typedef regex_constants::syntax_option_type flag_type; typedef std::size_t size_type; @@ -520,6 +633,10 @@ public: BOOST_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_data(); } + boost::shared_ptr > get_named_subs()const + { + return m_pimpl; + } private: shared_ptr > m_pimpl; diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 6431d16e..09777d20 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -777,6 +777,15 @@ escape_type_class_jump: } const charT* pc = m_position; int i = this->m_traits.toi(pc, m_end, 10); + if(i < 0) + { + // Check for a named capture: + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + i = this->m_pdata->get_id(base, m_position); + pc = m_position; + } if(negative) i = 1 + m_mark_count - i; if((i > 0) && (this->m_backrefs & (1u << (i-1)))) @@ -1784,6 +1793,7 @@ bool basic_regex_parser::parse_perl_extension() regex_constants::syntax_option_type old_flags = this->flags(); bool old_case_change = m_has_case_change; m_has_case_change = false; + charT name_delim; // // select the actual extension used: // @@ -1825,8 +1835,10 @@ bool basic_regex_parser::parse_perl_extension() pb->index = markid = -1; else { - fail(regex_constants::error_badrepeat, m_position - m_base); - return false; + // Probably a named capture which also starts (?< : + name_delim = '>'; + --m_position; + goto named_capture_jump; } ++m_position; jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); @@ -1903,7 +1915,7 @@ bool basic_regex_parser::parse_perl_extension() if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) { - fail(regex_constants::error_badrepeat, m_position - m_base); + fail(regex_constants::error_paren, m_position - m_base); return false; } m_position -= 2; @@ -1914,6 +1926,40 @@ bool basic_regex_parser::parse_perl_extension() case regex_constants::syntax_close_mark: fail(regex_constants::error_badrepeat, m_position - m_base); return false; + case regex_constants::escape_type_end_buffer: + { + name_delim = *m_position; +named_capture_jump: + markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; + #ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair(std::distance(m_base, m_position) - 2, 0)); + #else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair((m_position - m_base) - 2, 0)); + #endif + } + pb->index = markid; + const charT* base = ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + while((m_position != m_end) && (*m_position != name_delim)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + this->m_pdata->set_name(base, m_position, markid); + ++m_position; + break; + } default: // // lets assume that we have a (?imsx) group and try and parse it: @@ -2043,6 +2089,22 @@ bool basic_regex_parser::parse_perl_extension() // and the case change data: // m_has_case_change = old_case_change; + + if(markid > 0) + { +#ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1; +#else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1; +#endif + // + // allow backrefs to this mark: + // + if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT))) + this->m_backrefs |= 1u << (markid - 1); + } return true; } diff --git a/include/boost/regex/v4/match_results.hpp b/include/boost/regex/v4/match_results.hpp index 5642508b..09dd31f0 100644 --- a/include/boost/regex/v4/match_results.hpp +++ b/include/boost/regex/v4/match_results.hpp @@ -36,6 +36,13 @@ namespace boost{ #pragma warning(disable : 4251 4231 4660) #endif +namespace re_detail{ + +template +class named_subexpressions; + +} + template class match_results { @@ -62,13 +69,14 @@ public: typedef typename re_detail::regex_iterator_traits< BidiIterator>::value_type char_type; typedef std::basic_string string_type; + typedef re_detail::named_subexpressions_base named_sub_type; // construct/copy/destroy: explicit match_results(const Allocator& a = Allocator()) #ifndef BOOST_NO_STD_ALLOCATOR - : m_subs(a), m_base() {} + : m_subs(a), m_base(), m_last_closed_paren(0) {} #else - : m_subs(), m_base() { (void)a; } + : m_subs(), m_base(), m_last_closed_paren(0) { (void)a; } #endif match_results(const match_results& m) : m_subs(m.m_subs), m_base(m.m_base) {} @@ -95,6 +103,24 @@ public: return m_subs[sub].length(); return 0; } + difference_type length(const char_type* sub) const + { + const char_type* end = sub; + while(*end) ++end; + return length(named_subexpression_index(sub, end)); + } + template + difference_type length(const charT* sub) const + { + const charT* end = sub; + while(*end) ++end; + return length(named_subexpression_index(sub, end)); + } + template + difference_type length(const std::basic_string& sub) const + { + return length(sub.c_str()); + } difference_type position(size_type sub = 0) const { sub += 2; @@ -108,6 +134,24 @@ public: } return ~static_cast(0); } + difference_type position(const char_type* sub) const + { + const char_type* end = sub; + while(*end) ++end; + return position(named_subexpression_index(sub, end)); + } + template + difference_type position(const charT* sub) const + { + const charT* end = sub; + while(*end) ++end; + return position(named_subexpression_index(sub, end)); + } + template + difference_type position(const std::basic_string& sub) const + { + return position(sub.c_str()); + } string_type str(int sub = 0) const { sub += 2; @@ -122,6 +166,25 @@ public: } return result; } + string_type str(const char_type* sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const std::basic_string& sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const charT* sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const std::basic_string& sub) const + { + return (*this)[sub].str(); + } const_reference operator[](int sub) const { sub += 2; @@ -131,6 +194,75 @@ public: } return m_null; } + // + // Named sub-expressions: + // + const_reference named_subexpression(const char_type* i, const char_type* j) const + { + int index = m_named_subs->get_id(i, j); + return index > 0 ? (*this)[index] : m_null; + } + template + const_reference named_subexpression(const charT* i, const charT* j) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(i == j) + return m_null; + std::vector s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + int named_subexpression_index(const char_type* i, const char_type* j) const + { + int index = m_named_subs->get_id(i, j); + return index > 0 ? index : -20; + } + template + int named_subexpression_index(const charT* i, const charT* j) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(i == j) + return -20; + std::vector s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression_index(&*s.begin(), &*s.begin() + s.size()); + } + template + const_reference operator[](const std::basic_string& s) const + { + return named_subexpression(s.c_str(), s.c_str() + s.size()); + } + const_reference operator[](const char_type* p) const + { + const char_type* e = p; + while(*e) ++e; + return named_subexpression(p, e); + } + + template + const_reference operator[](const charT* p) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(*p == 0) + return m_null; + std::vector s; + while(*p) + s.insert(s.end(), *p++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + template + const_reference operator[](const std::basic_string& ns) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(ns.empty()) + return m_null; + std::vector s; + for(unsigned i = 0; i < ns.size(); ++i) + s.insert(s.end(), ns[i]); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } const_reference prefix() const { @@ -186,6 +318,10 @@ public: ::boost::re_detail::regex_format_imp(i, *this, fmt.data(), fmt.data() + fmt.size(), flags, re.get_traits()); return result; } + const_reference get_last_closed_paren()const + { + return m_last_closed_paren == 0 ? m_null : (*this)[m_last_closed_paren]; + } allocator_type get_allocator() const { @@ -232,6 +368,8 @@ public: void BOOST_REGEX_CALL set_second(BidiIterator i, size_type pos, bool m = true, bool escape_k = false) { + if(pos) + m_last_closed_paren = pos; pos += 2; BOOST_ASSERT(m_subs.size() > pos); m_subs[pos].second = i; @@ -261,6 +399,7 @@ public: m_subs.insert(m_subs.end(), n+2-len, v); } m_subs[1].first = i; + m_last_closed_paren = 0; } void BOOST_REGEX_CALL set_base(BidiIterator pos) { @@ -301,11 +440,17 @@ public: } void BOOST_REGEX_CALL maybe_assign(const match_results& m); + void BOOST_REGEX_CALL set_named_subs(boost::shared_ptr subs) + { + m_named_subs = subs; + } private: vector_type m_subs; // subexpressions BidiIterator m_base; // where the search started from sub_match m_null; // a null match + boost::shared_ptr m_named_subs; + int m_last_closed_paren; }; template diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 1be1af6f..09b0a9bb 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -200,6 +200,7 @@ bool perl_matcher::match_imp() m_match_flags |= regex_constants::match_all; m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); m_presult->set_base(base); + m_presult->set_named_subs(re_detail::convert_to_named_subs::char_type>(this->re.get_named_subs())); if(m_match_flags & match_posix) m_result = *m_presult; verify_options(re.flags(), m_match_flags); @@ -261,6 +262,7 @@ bool perl_matcher::find_imp() pstate = re.get_first_state(); m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last); m_presult->set_base(base); + m_presult->set_named_subs(re_detail::convert_to_named_subs::char_type>(this->re.get_named_subs())); m_match_flags |= regex_constants::match_init; } else diff --git a/include/boost/regex/v4/regex_format.hpp b/include/boost/regex/v4/regex_format.hpp index d114c2ed..fcfd9dc3 100644 --- a/include/boost/regex/v4/regex_format.hpp +++ b/include/boost/regex/v4/regex_format.hpp @@ -107,6 +107,7 @@ private: void format_escape(); void format_conditional(); void format_until_scope_end(); + bool handle_perl_verb(bool have_brace); const traits& m_traits; // the traits class for localised formatting operations const Results& m_results; // the match_results being used. @@ -250,6 +251,25 @@ void basic_regex_formatter::format_perl() case '$': put(*m_position++); break; + case '+': + if((++m_position != m_end) && (*m_position == '{')) + { + const char_type* base = ++m_position; + while((m_position != m_end) && (*m_position != '}')) ++m_position; + if(m_position != m_end) + { + // Named sub-expression: + put(this->m_results.named_subexpression(base, m_position)); + ++m_position; + break; + } + else + { + m_position = --base; + } + } + put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]); + break; case '{': have_brace = true; ++m_position; @@ -258,14 +278,18 @@ void basic_regex_formatter::format_perl() // see if we have a number: { std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); - len = (std::min)(static_cast(2), len); + //len = (std::min)(static_cast(2), len); int v = m_traits.toi(m_position, m_position + len, 10); if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) { - // leave the $ as is, and carry on: - m_position = --save_position; - put(*m_position); - ++m_position; + // Look for a Perl-5.10 verb: + if(!handle_perl_verb(have_brace)) + { + // leave the $ as is, and carry on: + m_position = --save_position; + put(*m_position); + ++m_position; + } break; } // otherwise output sub v: @@ -276,6 +300,123 @@ void basic_regex_formatter::format_perl() } } +template +bool basic_regex_formatter::handle_perl_verb(bool have_brace) +{ + // + // We may have a capitalised string containing a Perl action: + // + static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' }; + static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' }; + static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' }; + static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' }; + + if(have_brace && (*m_position == '^')) + ++m_position; + + int max_len = m_end - m_position; + + if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH)) + { + m_position += 5; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 5; + return false; + } + } + put(this->m_results[0]); + return true; + } + if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH)) + { + m_position += 8; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 8; + return false; + } + } + put(this->m_results.prefix()); + return true; + } + if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH)) + { + m_position += 9; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 9; + return false; + } + } + put(this->m_results.suffix()); + return true; + } + if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH)) + { + m_position += 16; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 16; + return false; + } + } + put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]); + return true; + } + if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT)) + { + m_position += 20; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 20; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT)) + { + m_position += 2; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 2; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + return false; +} + template void basic_regex_formatter::format_escape() { diff --git a/include/boost/regex/v4/regex_traits_defaults.hpp b/include/boost/regex/v4/regex_traits_defaults.hpp index c213889c..55529c5e 100644 --- a/include/boost/regex/v4/regex_traits_defaults.hpp +++ b/include/boost/regex/v4/regex_traits_defaults.hpp @@ -326,9 +326,9 @@ inline const charT* get_escape_R_string() #endif static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', '|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}', - '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')' }; + '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' }; static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', - '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')' }; + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; charT c = static_cast(0x2029u); bool b = (static_cast(c) == 0x2029u); diff --git a/src/regex_traits_defaults.cpp b/src/regex_traits_defaults.cpp index 96ea0b3d..c9596a3d 100644 --- a/src/regex_traits_defaults.cpp +++ b/src/regex_traits_defaults.cpp @@ -537,7 +537,7 @@ BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_synta regex_constants::syntax_dollar, /*$*/ regex_constants::syntax_char, /*%*/ regex_constants::syntax_char, /*&*/ - regex_constants::syntax_char, /*'*/ + regex_constants::escape_type_end_buffer, /*'*/ regex_constants::syntax_open_mark, /*(*/ regex_constants::syntax_close_mark, /*)*/ regex_constants::syntax_star, /***/ diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 4d1a2967..40847731 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -87,6 +87,10 @@ test-suite regex ../build//boost_regex ] + [ run named_subexpressions/named_subexpressions_test.cpp + ../build//boost_regex + ] + [ run unicode/unicode_iterator_test.cpp ../build//boost_regex ] [ run static_mutex/static_mutex_test.cpp ../../thread/build//boost_thread ../build//boost_regex diff --git a/test/named_subexpressions/named_subexpressions_test.cpp b/test/named_subexpressions/named_subexpressions_test.cpp new file mode 100644 index 00000000..41011415 --- /dev/null +++ b/test/named_subexpressions/named_subexpressions_test.cpp @@ -0,0 +1,109 @@ +/* + * + * Copyright (c) 2009 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#include +#include + + +template +void test_named_subexpressions(charT) +{ + // + // Really this is just a test that the overloaded access functions work correctly: + // + static const charT e[] = + { + '(', '?', '\'', 'o', 'n', 'e', '\'', 'a', '+', ')', '(', '?', '<', 't', 'w', 'o', '>', 'b', '+', ')', '\0' + }; + static const charT t[] = + { + 'm', 'm', 'a', 'a', 'a', 'b', 'b', 'n', 'n', '\0' + }; + static const charT one[] = + { + 'o', 'n', 'e', '\0' + }; + static const charT two[] = + { + 't', 'w', 'o', '\0' + }; + static const std::basic_string s_one(one); + static const std::basic_string s_two(two); + static const charT result1[] = { 'a', 'a', 'a', '\0' }; + static const charT result2[] = { 'b', 'b', '\0' }; + static const std::basic_string s_result1(result1); + static const std::basic_string s_result2(result2); + + static const char* c_one = "one"; + static const char* c_two = "two"; + static const std::string cs_one(c_one); + static const std::string cs_two(c_two); + + boost::basic_regex expression(e); + boost::match_results what; + if(regex_search(t, what, expression)) + { + BOOST_CHECK(what.length(1) == 3); + BOOST_CHECK(what.length(one) == 3); + BOOST_CHECK(what.length(s_one) == 3); + BOOST_CHECK(what.length(c_one) == 3); + BOOST_CHECK(what.length(cs_one) == 3); + BOOST_CHECK(what.position(1) == 2); + BOOST_CHECK(what.position(one) == 2); + BOOST_CHECK(what.position(s_one) == 2); + BOOST_CHECK(what.position(c_one) == 2); + BOOST_CHECK(what.position(cs_one) == 2); + BOOST_CHECK(what.str(1) == s_result1); + BOOST_CHECK(what.str(one) == s_result1); + BOOST_CHECK(what.str(s_one) == s_result1); + BOOST_CHECK(what.str(c_one) == s_result1); + BOOST_CHECK(what.str(cs_one) == s_result1); + BOOST_CHECK(what[1] == s_result1); + BOOST_CHECK(what[one] == s_result1); + BOOST_CHECK(what[s_one] == s_result1); + BOOST_CHECK(what[c_one] == s_result1); + BOOST_CHECK(what[cs_one] == s_result1); + + BOOST_CHECK(what.length(2) == 2); + BOOST_CHECK(what.length(two) == 2); + BOOST_CHECK(what.length(s_two) == 2); + BOOST_CHECK(what.length(c_two) == 2); + BOOST_CHECK(what.length(cs_two) == 2); + BOOST_CHECK(what.position(2) == 5); + BOOST_CHECK(what.position(two) == 5); + BOOST_CHECK(what.position(s_two) == 5); + BOOST_CHECK(what.position(c_two) == 5); + BOOST_CHECK(what.position(cs_two) == 5); + BOOST_CHECK(what.str(2) == s_result2); + BOOST_CHECK(what.str(two) == s_result2); + BOOST_CHECK(what.str(s_two) == s_result2); + BOOST_CHECK(what.str(c_two) == s_result2); + BOOST_CHECK(what.str(cs_two) == s_result2); + BOOST_CHECK(what[2] == s_result2); + BOOST_CHECK(what[two] == s_result2); + BOOST_CHECK(what[s_two] == s_result2); + BOOST_CHECK(what[c_two] == s_result2); + BOOST_CHECK(what[cs_two] == s_result2); + } + else + { + BOOST_ERROR("Expected match not found"); + } +} + +int test_main( int , char* [] ) +{ + test_named_subexpressions(char(0)); + test_named_subexpressions(wchar_t(0)); + return 0; +} + +#include diff --git a/test/regress/test_backrefs.cpp b/test/regress/test_backrefs.cpp index e5c254ff..58f4dedb 100644 --- a/test/regress/test_backrefs.cpp +++ b/test/regress/test_backrefs.cpp @@ -90,5 +90,18 @@ void test_backrefs() TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbbd", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("^(.)\\g{-1}", perl, "abc", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("a([bc])\\g{-1}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2)); + + // And again but with named subexpressions: + TEST_REGEX_SEARCH("a(?(?(?(?b*))))c\\g{foo}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, 1, 3, 1, 3, 1, 3, -2, -2)); + TEST_REGEX_SEARCH("a(?(?(?(?b*))))c\\g{foo}d", perl, "abbcbd", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a(?(?(?(?b*))))c\\g{foo}d", perl, "abbcbbbd", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(?.)\\g{foo}", perl, "abc", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a(?[bc])\\g{foo}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2)); + + TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, 1, 3, 1, 3, 1, 3, -2, -2)); + TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbd", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbbbd", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(?'foo'.)\\g{foo}", perl, "abc", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a(?'foo'[bc])\\g{foo}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2)); } diff --git a/test/regress/test_deprecated.cpp b/test/regress/test_deprecated.cpp index af50ec76..84eefb9f 100644 --- a/test/regress/test_deprecated.cpp +++ b/test/regress/test_deprecated.cpp @@ -107,7 +107,7 @@ void test_deprecated(const char&, const test_regex_search_tag&) int i = 0; while(results[2*i] != -2) { - if(max_subs > i) + if((int)max_subs > i) { if(results[2*i] != matches[i].rm_so) { @@ -231,7 +231,7 @@ void test_deprecated(const wchar_t&, const test_regex_search_tag&) int i = 0; while(results[2*i] != -2) { - if(max_subs > i) + if((int)max_subs > i) { if(results[2*i] != matches[i].rm_so) { diff --git a/test/regress/test_escapes.cpp b/test/regress/test_escapes.cpp index 21d4262f..d2dbbe43 100644 --- a/test/regress/test_escapes.cpp +++ b/test/regress/test_escapes.cpp @@ -156,7 +156,7 @@ void test_assertion_escapes() TEST_REGEX_SEARCH("\\R", perl, "foo\rbar", match_default, make_array(3, 4, -2, -2)); TEST_REGEX_SEARCH("\\R", perl, "foo\r\nbar", match_default, make_array(3, 5, -2, -2)); // see if \u works: - const wchar_t* w = "\u2028"; + const wchar_t* w = L"\u2028"; if(*w == 0x2028u) { TEST_REGEX_SEARCH_W(L"\\R", perl, L"foo\u2028bar", match_default, make_array(3, 4, -2, -2)); diff --git a/test/regress/test_replace.cpp b/test/regress/test_replace.cpp index e2acf380..caf2c2e3 100644 --- a/test/regress/test_replace.cpp +++ b/test/regress/test_replace.cpp @@ -126,5 +126,53 @@ void test_replace() TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default, "/${10}/", "...//,,,"); TEST_REGEX_REPLACE("((((((((((a+))))))))))", perl, "...aaa,,,", match_default, "/${10}/", ".../aaa/,,,"); TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default, "/${1}0/", ".../aaa0/,,,"); + + // New Perl style operators: + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$MATCH", "aaa"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${MATCH}", "aaa"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${^MATCH}", "aaa"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$MATC", "$MATC"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${MATCH", "${MATCH"); + + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$PREMATCH", "..."); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${PREMATCH}", "..."); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${^PREMATCH}", "..."); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$PREMATC", "$PREMATC"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${PREMATCH", "${PREMATCH"); + + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$POSTMATCH", ",,,"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${POSTMATCH}", ",,,"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${^POSTMATCH}", ",,,"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$POSTMATC", "$POSTMATC"); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${POSTMATCH", "${POSTMATCH"); + + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_PAREN_MATCH", ""); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_PAREN_MATC", "$LAST_PAREN_MATC"); + TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_PAREN_MATCH", "aaa"); + TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$LAST_PAREN_MATCH", "bb"); + + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$+", ""); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$+foo", "foo"); + TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default|format_no_copy, "$+", "aaa"); + TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$+foo", "bbfoo"); + TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$+{", "bb{"); + TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$+{foo", "bb{foo"); + + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESULT", ""); + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESUL", "$LAST_SUBMATCH_RESUL"); + TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESULT", "aaa"); + TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESULT", "bb"); + TEST_REGEX_REPLACE("(a+)|(b+)", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESULT", "aaa"); + + TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$^N", ""); + TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default|format_no_copy, "$^N", "aaa"); + TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$^N", "bb"); + TEST_REGEX_REPLACE("(a+)|(b+)", perl, "...aaa,,,", match_default|format_no_copy, "$^N", "aaa"); + + TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "$&", "aabb"); + TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "$1", "aa"); + TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "$2", "bb"); + TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "d$+{one}c", "daac"); + TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "c$+{two}d", "cbbd"); }