diff --git a/doc/format_perl_syntax.qbk b/doc/format_perl_syntax.qbk
index cfd57500..63cdbab2 100644
--- a/doc/format_perl_syntax.qbk
+++ b/doc/format_perl_syntax.qbk
@@ -17,13 +17,24 @@ should be sent to output as follows:
[table
[[Placeholder][Meaning]]
[[$&][Outputs what matched the whole expression.]]
-[[$`][Outputs the text between the end of the last match found (or the
+[[$MATCH][As $&]]
+[[${^MATCH}][As $&]]
+[[$\`][Outputs the text between the end of the last match found (or the
start of the text if no previous match was found), and the start
of the current match.]]
+[[$PREMATCH][As $\`]]
+[[${^PREMATCH}][As $\`]]
[[$'][Outputs all the text following the end of the current match.]]
+[[$POSTMATCH][As $']]
+[[${^POSTMATCH}][As $']]
+[[$+][Outputs what matched the last marked sub-expression in the regular expression.]]
+[[$LAST_PAREN_MATCH][As $+]]
+[[$LAST_SUBMATCH_RESULT][Outputs what matched the last sub-expression to be actually matched.]]
+[[$^N][As $LAST_SUBMATCH_RESULT]]
[[$$][Outputs a literal '$']]
[[$n][Outputs what matched the n'th sub-expression.]]
[[${n}][Outputs what matched the n'th sub-expression.]]
+[[$+{NAME}][Outputs whatever matched the sub-expression named "NAME".]]
]
Any $-placeholder sequence not listed above, results in '$' being treated
diff --git a/doc/html/boost_regex/background_information/examples.html b/doc/html/boost_regex/background_information/examples.html
index b5d3b485..d551f231 100644
--- a/doc/html/boost_regex/background_information/examples.html
+++ b/doc/html/boost_regex/background_information/examples.html
@@ -28,7 +28,7 @@
Example Programs
@@ -107,7 +107,7 @@
Files: captures_test.cpp.
@@ -133,7 +133,7 @@
Files: regex_timer.cpp.
diff --git a/doc/html/boost_regex/background_information/history.html b/doc/html/boost_regex/background_information/history.html
index f42f7bff..52f8efe2 100644
--- a/doc/html/boost_regex/background_information/history.html
+++ b/doc/html/boost_regex/background_information/history.html
@@ -26,7 +26,7 @@
History
@@ -53,7 +53,7 @@
@@ -76,7 +76,7 @@
@@ -146,7 +146,7 @@
@@ -201,7 +201,7 @@
@@ -209,7 +209,7 @@
Fixed bug in partial matches of bounded repeats of '.'.
diff --git a/doc/html/boost_regex/background_information/locale.html b/doc/html/boost_regex/background_information/locale.html
index 62284e82..8d9e50d7 100644
--- a/doc/html/boost_regex/background_information/locale.html
+++ b/doc/html/boost_regex/background_information/locale.html
@@ -58,7 +58,7 @@
There are three separate localization mechanisms supported by Boost.Regex:
@@ -90,7 +90,7 @@
are treated as "unknown" graphic characters.
@@ -114,7 +114,7 @@
libraries including version 1 of this library.
@@ -151,7 +151,7 @@
in your code. The best way to ensure this is to add the #define to <boost/regex/user.hpp>
.
diff --git a/doc/html/boost_regex/background_information/standards.html b/doc/html/boost_regex/background_information/standards.html
index 437e53e0..a711f234 100644
--- a/doc/html/boost_regex/background_information/standards.html
+++ b/doc/html/boost_regex/background_information/standards.html
@@ -28,7 +28,7 @@
Conformance
@@ -36,7 +36,7 @@
Report on C++ Library Extensions.
@@ -49,7 +49,7 @@
rather than a Unicode escape sequence; use \x{DDDD} for Unicode escape sequences.
@@ -62,7 +62,7 @@
(??{code}) Not implementable in a compiled strongly typed language.
@@ -82,7 +82,7 @@
a custom traits class.
diff --git a/doc/html/boost_regex/captures.html b/doc/html/boost_regex/captures.html
index c4dcc347..de155209 100644
--- a/doc/html/boost_regex/captures.html
+++ b/doc/html/boost_regex/captures.html
@@ -35,7 +35,7 @@
accessed.
@@ -218,7 +218,7 @@
output stream.
@@ -231,7 +231,7 @@
you can determine which sub-expressions matched by accessing the sub_match::matched
data member.
diff --git a/doc/html/boost_regex/format/boost_format_syntax.html b/doc/html/boost_regex/format/boost_format_syntax.html
index 809f2205..73b7d8ae 100644
--- a/doc/html/boost_regex/format/boost_format_syntax.html
+++ b/doc/html/boost_regex/format/boost_format_syntax.html
@@ -32,7 +32,7 @@
'$', '\', '(', ')', '?', and ':'.
@@ -40,7 +40,7 @@
you want a to output literal parenthesis.
@@ -66,7 +66,7 @@
with "bar" otherwise.
@@ -161,7 +161,7 @@
as a literal.
diff --git a/doc/html/boost_regex/format/perl_format.html b/doc/html/boost_regex/format/perl_format.html
index fde101e3..616dd3fb 100644
--- a/doc/html/boost_regex/format/perl_format.html
+++ b/doc/html/boost_regex/format/perl_format.html
@@ -65,6 +65,30 @@
+
+
+ $MATCH
+
+ |
+
+
+ As $&
+
+ |
+
+
+
+
+ ${^MATCH}
+
+ |
+
+
+ As $&
+
+ |
+
+
$`
@@ -79,6 +103,30 @@
|
+
+
+ $PREMATCH
+
+ |
+
+
+ As $`
+
+ |
+
+
+
+
+ ${^PREMATCH}
+
+ |
+
+
+ As $`
+
+ |
+
+
$'
@@ -91,6 +139,79 @@
|
+
+
+ $POSTMATCH
+
+ |
+
+
+ As $'
+
+ |
+
+
+
+
+ ${^POSTMATCH}
+
+ |
+
+
+ As $'
+
+ |
+
+
+
+
+ $+
+
+ |
+
+
+ Outputs what matched the last marked sub-expression in the regular
+ expression.
+
+ |
+
+
+
+
+ $LAST_PAREN_MATCH
+
+ |
+
+
+ As $+
+
+ |
+
+
+
+
+ $LAST_SUBMATCH_RESULT
+
+ |
+
+
+ Outputs what matched the last sub-expression to be actually matched.
+
+ |
+
+
+
+
+ $^N
+
+ |
+
+
+ As $LAST_SUBMATCH_RESULT
+
+ |
+
+
$$
@@ -126,6 +247,18 @@
|
+
+
+
+ $+{NAME}
+
+ |
+
+
+ Outputs whatever matched the sub-expression named "NAME".
+
+ |
+
diff --git a/doc/html/boost_regex/install.html b/doc/html/boost_regex/install.html
index 05d2a54d..b8aeb72e 100644
--- a/doc/html/boost_regex/install.html
+++ b/doc/html/boost_regex/install.html
@@ -49,7 +49,7 @@
file before you can use it, instructions for specific platforms are as follows:
@@ -58,7 +58,7 @@
started guide for more information.
@@ -96,11 +96,11 @@
ICU you are using is binary compatible with the toolset you use to build Boost.
@@ -166,7 +166,7 @@
a lot in compile times!
@@ -253,7 +253,7 @@
@@ -302,7 +302,7 @@
see the config library documentation.
@@ -347,7 +347,7 @@
will build v9 variants of the regex library named libboost_regex_v9.a etc.
diff --git a/doc/html/boost_regex/ref/bad_expression.html b/doc/html/boost_regex/ref/bad_expression.html
index 1d079e4f..e81b591d 100644
--- a/doc/html/boost_regex/ref/bad_expression.html
+++ b/doc/html/boost_regex/ref/bad_expression.html
@@ -27,7 +27,7 @@
bad_expression
#include <boost/pattern_except.hpp>
@@ -54,7 +54,7 @@
}
regex_error(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos);
diff --git a/doc/html/boost_regex/ref/basic_regex.html b/doc/html/boost_regex/ref/basic_regex.html
index a0c13601..6b6340eb 100644
--- a/doc/html/boost_regex/ref/basic_regex.html
+++ b/doc/html/boost_regex/ref/basic_regex.html
@@ -27,7 +27,7 @@
basic_regex
#include <boost/regex.hpp>
@@ -244,7 +244,7 @@
}
@@ -327,7 +327,7 @@
basic_regex
.
-
Table 1. basic_regex default construction postconditions
+
Table 1. basic_regex default construction postconditions
@@ -407,7 +407,7 @@
flags specified in f.
-
Table 2. Postconditions for basic_regex construction
+
Table 2. Postconditions for basic_regex construction
@@ -512,7 +512,7 @@
specified in f.
-
Table 3. Postconditions for basic_regex construction
+
Table 3. Postconditions for basic_regex construction
@@ -616,7 +616,7 @@
according the option flags specified in f.
-
Table 4. Postconditions for basic_regex construction
+
Table 4. Postconditions for basic_regex construction
@@ -727,7 +727,7 @@
flags specified in f.
-
Table 5. Postconditions for basic_regex construction
+
Table 5. Postconditions for basic_regex construction
@@ -829,7 +829,7 @@
flags specified in f.
-
Table 6. Postconditions for basic_regex construction
+
Table 6. Postconditions for basic_regex construction
@@ -1043,7 +1043,7 @@
in f.
-
Table 7. Postconditions for basic_regex::assign
+
Table 7. Postconditions for basic_regex::assign
diff --git a/doc/html/boost_regex/ref/concepts/traits_concept.html b/doc/html/boost_regex/ref/concepts/traits_concept.html
index fe4b0274..224279f6 100644
--- a/doc/html/boost_regex/ref/concepts/traits_concept.html
+++ b/doc/html/boost_regex/ref/concepts/traits_concept.html
@@ -34,7 +34,7 @@
Boost-specific enhanced interface.
@@ -381,7 +381,7 @@
diff --git a/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html b/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html
index 70fae8b1..d613605c 100644
--- a/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html
+++ b/doc/html/boost_regex/ref/deprecated_interfaces/regex_format.html
@@ -34,7 +34,7 @@
previous version of Boost.Regex and will not be further updated:
diff --git a/doc/html/boost_regex/ref/error_type.html b/doc/html/boost_regex/ref/error_type.html
index 7fb8bbe7..4c8d7a3f 100644
--- a/doc/html/boost_regex/ref/error_type.html
+++ b/doc/html/boost_regex/ref/error_type.html
@@ -27,7 +27,7 @@
error_type
@@ -57,7 +57,7 @@
}
diff --git a/doc/html/boost_regex/ref/match_flag_type.html b/doc/html/boost_regex/ref/match_flag_type.html
index dfbdfdc1..d515cb48 100644
--- a/doc/html/boost_regex/ref/match_flag_type.html
+++ b/doc/html/boost_regex/ref/match_flag_type.html
@@ -69,7 +69,7 @@
}
diff --git a/doc/html/boost_regex/ref/match_results.html b/doc/html/boost_regex/ref/match_results.html
index 7961ad1e..7b674765 100644
--- a/doc/html/boost_regex/ref/match_results.html
+++ b/doc/html/boost_regex/ref/match_results.html
@@ -27,7 +27,7 @@
match_results
#include <boost/regex.hpp>
@@ -98,9 +98,33 @@
bool empty() const;
difference_type length(int sub = 0) const;
+ difference_type length(const char_type* sub) const;
+ template <class charT>
+ difference_type length(const charT* sub) const;
+ template <class charT, class Traits, class A>
+ difference_type length(const std::basic_string<charT, Traits, A>& sub) const;
difference_type position(unsigned int sub = 0) const;
+ difference_type position(const char_type* sub) const;
+ template <class charT>
+ difference_type position(const charT* sub) const;
+ template <class charT, class Traits, class A>
+ difference_type position(const std::basic_string<charT, Traits, A>& sub) const;
string_type str(int sub = 0) const;
+ string_type str(const char_type* sub)const;
+ template <class Traits, class A>
+ string_type str(const std::basic_string<char_type, Traits, A>& sub)const;
+ template <class charT>
+ string_type str(const charT* sub)const;
+ template <class charT, class Traits, class A>
+ string_type str(const std::basic_string<charT, Traits, A>& sub)const;
const_reference operator[](int n) const;
+ const_reference operator[](const char_type* n) const;
+ template <class Traits, class A>
+ const_reference operator[](const std::basic_string<char_type, Traits, A>& n) const;
+ template <class charT>
+ const_reference operator[](const charT* n) const;
+ template <class charT, class Traits, class A>
+ const_reference operator[](const std::basic_string<charT, Traits, A>& n) const;
const_reference prefix() const;
@@ -142,7 +166,7 @@
match_results<BidirectionalIterator, Allocator>& m2);
@@ -375,14 +399,39 @@
difference_type length(int sub = 0)const;
+difference_type length(const char_type* sub)const;
+template <class charT>
+difference_type length(const charT* sub)const;
+template <class charT, class Traits, class A>
+difference_type length(const std::basic_string<charT, Traits, A>&)const;
Effects: Returns the length of sub-expression
sub, that is to say: (*this)[sub].length()
.
+
+ The overloads that accept a string refer to a named sub-expression n.
+ In the event that there is no such named sub-expression then returns an empty
+ string.
+
+
+ The template overloads of this function, allow the string and/or character
+ type to be different from the character type of the underlying sequence and/or
+ regular expression: in this case the characters will be widened to the underlying
+ character type of the original regular expression. A compiler error will
+ occur if the argument passes a wider character type than the underlying sequence.
+ These overloads allow a normal narrow character C string literal to be used
+ as an argument, even when the underlying character type of the expression
+ being matched may be something more exotic such as a Unicode character type.
+
difference_type position(unsigned int sub = 0)const;
+difference_type position(const char_type* sub)const;
+template <class charT>
+difference_type position(const charT* sub)const;
+template <class charT, class Traits, class A>
+difference_type position(const std::basic_string<charT, Traits, A>&)const;
Effects: Returns the starting location of
@@ -391,17 +440,61 @@
will return the location of the partial match even though (*this)[0].matched
is false.
+
+ The overloads that accept a string refer to a named sub-expression n.
+ In the event that there is no such named sub-expression then returns an empty
+ string.
+
+
+ The template overloads of this function, allow the string and/or character
+ type to be different from the character type of the underlying sequence and/or
+ regular expression: in this case the characters will be widened to the underlying
+ character type of the original regular expression. A compiler error will
+ occur if the argument passes a wider character type than the underlying sequence.
+ These overloads allow a normal narrow character C string literal to be used
+ as an argument, even when the underlying character type of the expression
+ being matched may be something more exotic such as a Unicode character type.
+
string_type str(int sub = 0)const;
+string_type str(const char_type* sub)const;
+template <class Traits, class A>
+string_type str(const std::basic_string<char_type, Traits, A>& sub)const;
+template <class charT>
+string_type str(const charT* sub)const;
+template <class charT, class Traits, class A>
+string_type str(const std::basic_string<charT, Traits, A>& sub)const;
Effects: Returns sub-expression sub
as a string: string_type((*this)[sub])
.
+
+ The overloads that accept a string, return the string that matched the named
+ sub-expression n. In the event that there is no such
+ named sub-expression then returns an empty string.
+
+
+ The template overloads of this function, allow the string and/or character
+ type to be different from the character type of the underlying sequence and/or
+ regular expression: in this case the characters will be widened to the underlying
+ character type of the original regular expression. A compiler error will
+ occur if the argument passes a wider character type than the underlying sequence.
+ These overloads allow a normal narrow character C string literal to be used
+ as an argument, even when the underlying character type of the expression
+ being matched may be something more exotic such as a Unicode character type.
+
const_reference operator[](int n) const;
+const_reference operator[](const char_type* n) const;
+template <class Traits, class A>
+const_reference operator[](const std::basic_string<char_type, Traits, A>& n) const;
+template <class charT>
+const_reference operator[](const charT* n) const;
+template <class charT, class Traits, class A>
+const_reference operator[](const std::basic_string<charT, Traits, A>& n) const;
Effects: Returns a reference to the sub_match
@@ -413,6 +506,22 @@
then returns a sub_match
object whose matched member is false.
+
+ The overloads that accept a string, return a reference to the sub_match
object representing the
+ character sequence that matched the named sub-expression n.
+ In the event that there is no such named sub-expression then returns a sub_match
+ object whose matched member is false.
+
+
+ The template overloads of this function, allow the string and/or character
+ type to be different from the character type of the underlying sequence and/or
+ regular expression: in this case the characters will be widened to the underlying
+ character type of the original regular expression. A compiler error will
+ occur if the argument passes a wider character type than the underlying sequence.
+ These overloads allow a normal narrow character C string literal to be used
+ as an argument, even when the underlying character type of the expression
+ being matched may be something more exotic such as a Unicode character type.
+
const_reference prefix()const;
diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
index f7379cd2..732889c4 100644
--- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
+++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_algo.html
@@ -43,7 +43,7 @@
on to the "real" algorithm.
@@ -89,7 +89,7 @@
}
@@ -128,7 +128,7 @@
}
diff --git a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html
index 6f432bd3..6c781423 100644
--- a/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html
+++ b/doc/html/boost_regex/ref/non_std_strings/icu/unicode_iter.html
@@ -28,7 +28,7 @@
Unicode Aware Regex Iterators
@@ -126,7 +126,7 @@
Provided of course that the input is encoded as UTF-8.
diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html
index 49195299..11f9d1ea 100644
--- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html
+++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_algo.html
@@ -34,7 +34,7 @@
here they are anyway:
@@ -82,7 +82,7 @@
}
@@ -110,7 +110,7 @@
}
@@ -149,7 +149,7 @@
}
@@ -164,7 +164,7 @@
+ s.GetLength(), e, f);
diff --git a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html
index 8286c553..6f3d7ddf 100644
--- a/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html
+++ b/doc/html/boost_regex/ref/non_std_strings/mfc_strings/mfc_iter.html
@@ -32,7 +32,7 @@
an MFC/ATL string to a regex_iterator
or regex_token_iterator
:
@@ -68,7 +68,7 @@
}
diff --git a/doc/html/boost_regex/ref/posix.html b/doc/html/boost_regex/ref/posix.html
index 44ead2e9..b173b48e 100644
--- a/doc/html/boost_regex/ref/posix.html
+++ b/doc/html/boost_regex/ref/posix.html
@@ -165,7 +165,7 @@
@@ -379,7 +379,7 @@
@@ -467,7 +467,7 @@
@@ -537,7 +537,7 @@
diff --git a/doc/html/boost_regex/ref/regex_iterator.html b/doc/html/boost_regex/ref/regex_iterator.html
index ba71a9af..17494ce8 100644
--- a/doc/html/boost_regex/ref/regex_iterator.html
+++ b/doc/html/boost_regex/ref/regex_iterator.html
@@ -78,7 +78,7 @@
regex_constants::match_flag_type m = regex_constants::match_default);
@@ -436,7 +436,7 @@
m.
diff --git a/doc/html/boost_regex/ref/regex_match.html b/doc/html/boost_regex/ref/regex_match.html
index 5ce57f3c..9c4413f1 100644
--- a/doc/html/boost_regex/ref/regex_match.html
+++ b/doc/html/boost_regex/ref/regex_match.html
@@ -80,7 +80,7 @@
match_flag_type flags = match_default);
template <class BidirectionalIterator, class Allocator, class charT, class traits>
@@ -360,7 +360,7 @@
Effects: Returns the result of regex_match(s.begin(), s.end(), e, flags)
.
diff --git a/doc/html/boost_regex/ref/regex_replace.html b/doc/html/boost_regex/ref/regex_replace.html
index f25d7aab..5019aeac 100644
--- a/doc/html/boost_regex/ref/regex_replace.html
+++ b/doc/html/boost_regex/ref/regex_replace.html
@@ -53,7 +53,7 @@
match_flag_type flags = match_default);
template <class OutputIterator, class BidirectionalIterator, class traits, class charT>
@@ -163,7 +163,7 @@
and then returns result
.
diff --git a/doc/html/boost_regex/ref/regex_search.html b/doc/html/boost_regex/ref/regex_search.html
index cefe95b9..bcf9ce4f 100644
--- a/doc/html/boost_regex/ref/regex_search.html
+++ b/doc/html/boost_regex/ref/regex_search.html
@@ -73,7 +73,7 @@
match_flag_type flags = match_default);
template <class BidirectionalIterator, class Allocator, class charT, class traits>
@@ -355,7 +355,7 @@
Effects: Returns the result of regex_search(s.begin(), s.end(), e, flags)
.
diff --git a/doc/html/boost_regex/ref/regex_token_iterator.html b/doc/html/boost_regex/ref/regex_token_iterator.html
index fbd5e0e5..58683e16 100644
--- a/doc/html/boost_regex/ref/regex_token_iterator.html
+++ b/doc/html/boost_regex/ref/regex_token_iterator.html
@@ -136,7 +136,7 @@
regex_constants::match_flag_type m = regex_constants::match_default);
@@ -383,7 +383,7 @@
m.
diff --git a/doc/html/boost_regex/ref/regex_traits.html b/doc/html/boost_regex/ref/regex_traits.html
index 16ac10d0..62cded54 100644
--- a/doc/html/boost_regex/ref/regex_traits.html
+++ b/doc/html/boost_regex/ref/regex_traits.html
@@ -46,7 +46,7 @@
}
diff --git a/doc/html/boost_regex/ref/sub_match.html b/doc/html/boost_regex/ref/sub_match.html
index 18629564..db25820f 100644
--- a/doc/html/boost_regex/ref/sub_match.html
+++ b/doc/html/boost_regex/ref/sub_match.html
@@ -329,11 +329,11 @@
}
@@ -473,7 +473,7 @@
@@ -1008,7 +1008,7 @@
+ m2.str().
diff --git a/doc/html/boost_regex/syntax/basic_extended.html b/doc/html/boost_regex/syntax/basic_extended.html
index 6f13adaa..389a5933 100644
--- a/doc/html/boost_regex/syntax/basic_extended.html
+++ b/doc/html/boost_regex/syntax/basic_extended.html
@@ -28,7 +28,7 @@
Expression Syntax
@@ -46,7 +46,7 @@
@@ -56,7 +56,7 @@
.[{()\*+?|^$
@@ -74,7 +74,7 @@
@@ -86,7 +86,7 @@
of an expression, or the last character of a sub-expression.
@@ -98,7 +98,7 @@
to by a back-reference.
@@ -184,7 +184,7 @@ cab
operator to be applied to.
@@ -214,7 +214,7 @@ cab
@@ -227,7 +227,7 @@ cab
will match either of "abd" or "abef".
@@ -240,7 +240,7 @@ cab
A bracket expression may contain any combination of the following:
@@ -249,7 +249,7 @@ cab
or 'c'.
@@ -265,7 +265,7 @@ cab
the code points of the characters only.
@@ -274,7 +274,7 @@ cab
range a-c
.
@@ -284,7 +284,7 @@ cab
character class names.
@@ -312,7 +312,7 @@ cab
matches a NUL character.
@@ -329,7 +329,7 @@ cab
or even all locales on one platform.
@@ -337,7 +337,7 @@ cab
[[:digit:]a-c[.NUL.]]
.
@@ -363,7 +363,7 @@ cab
extensions are also supported by Boost.Regex:
@@ -552,7 +552,7 @@ cab
@@ -706,7 +706,7 @@ cab
@@ -813,7 +813,7 @@ cab
matches any "digit" character, as does
\p{digit}
.
@@ -888,7 +888,7 @@ cab
@@ -979,7 +979,7 @@ cab
@@ -991,7 +991,7 @@ cab
match to start where the last one ended.
@@ -1005,7 +1005,7 @@ cab
\*+aaa
@@ -1056,7 +1056,7 @@ cab
@@ -1065,7 +1065,7 @@ cab
\@ matches a literal '@'.
@@ -1101,7 +1101,7 @@ cab
@@ -1111,11 +1111,11 @@ cab
rule.
@@ -1136,7 +1136,7 @@ cab
used with the -E option.
@@ -1150,7 +1150,7 @@ cab
these by default anyway.
@@ -1163,7 +1163,7 @@ cab
modify how the case and locale sensitivity are to be applied.
diff --git a/doc/html/boost_regex/syntax/basic_syntax.html b/doc/html/boost_regex/syntax/basic_syntax.html
index 8de6b127..6a6f32d9 100644
--- a/doc/html/boost_regex/syntax/basic_syntax.html
+++ b/doc/html/boost_regex/syntax/basic_syntax.html
@@ -28,7 +28,7 @@
Expression Syntax
@@ -45,7 +45,7 @@
@@ -55,7 +55,7 @@
.[\*^$
@@ -73,7 +73,7 @@
@@ -85,7 +85,7 @@
of an expression, or the last character of a sub-expression.
@@ -97,7 +97,7 @@
by a back-reference.
@@ -155,7 +155,7 @@ aaaa
to.
@@ -173,7 +173,7 @@ aaaa
aaabba
@@ -186,7 +186,7 @@ aaaa
A bracket expression may contain any combination of the following:
@@ -195,7 +195,7 @@ aaaa
or 'c'.
@@ -211,7 +211,7 @@ aaaa
of the characters only.
@@ -220,7 +220,7 @@ aaaa
range a-c.
@@ -230,7 +230,7 @@ aaaa
character class names.
@@ -259,7 +259,7 @@ aaaa
element names.
@@ -276,7 +276,7 @@ aaaa
or even all locales on one platform.
@@ -284,7 +284,7 @@ aaaa
[[:digit:]a-c[.NUL.]].
@@ -299,7 +299,7 @@ aaaa
will match either a literal '\' or a '^'.
@@ -309,13 +309,13 @@ aaaa
rule.
@@ -333,7 +333,7 @@ aaaa
As its name suggests, this behavior is consistent with the Unix utility grep.
@@ -613,7 +613,7 @@ aaaa
leftmost-longest rule.
@@ -627,7 +627,7 @@ aaaa
options modify how the case and locale sensitivity are to be applied.
diff --git a/doc/html/boost_regex/syntax/perl_syntax.html b/doc/html/boost_regex/syntax/perl_syntax.html
index c97738cf..add2ff1c 100644
--- a/doc/html/boost_regex/syntax/perl_syntax.html
+++ b/doc/html/boost_regex/syntax/perl_syntax.html
@@ -28,7 +28,7 @@
Syntax
@@ -43,7 +43,7 @@
boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);
@@ -53,7 +53,7 @@
.[{()\*+?|^$
@@ -73,7 +73,7 @@
@@ -83,7 +83,7 @@
A '$' character shall match the end of a line.
@@ -94,7 +94,7 @@
can also repeated, or referred to by a back-reference.
@@ -107,7 +107,7 @@
without splitting out any separate sub-expressions.
@@ -188,7 +188,7 @@
to be applied to.
@@ -218,7 +218,7 @@
while consuming as little input as possible.
@@ -250,7 +250,7 @@
while giving nothing back.
@@ -340,10 +340,27 @@
+
+
+
+ \g{one}
+
+ |
+
+
+ Match whatever matched the sub-expression named "one"
+
+ |
+
+
+ Finally the \k escape can be used to refer to named subexpressions, for example
+ \k<two>
will match whatever matched the subexpression
+ named "two".
+
@@ -370,7 +387,7 @@
(?:abc)??
has exactly the same effect.
@@ -382,7 +399,7 @@
A bracket expression may contain any combination of the following:
@@ -390,7 +407,7 @@
'b', or 'c'.
@@ -404,7 +421,7 @@
sensitive.
@@ -413,7 +430,7 @@
matches any character that is not in the range a-c
.
@@ -424,7 +441,7 @@
class names.
@@ -446,7 +463,7 @@
matches a \0
character.
@@ -463,7 +480,7 @@
or even all locales on one platform.
@@ -475,7 +492,7 @@
is not a "word" character.
@@ -483,7 +500,7 @@
[[:digit:]a-c[.NUL.]]
.
@@ -675,7 +692,7 @@
@@ -877,7 +894,7 @@
@@ -985,7 +1002,7 @@
as does \p{digit}
.
@@ -1004,7 +1021,7 @@
\B
Matches only when not at a word boundary.
@@ -1029,7 +1046,7 @@
to the regular expression \n*\z
@@ -1041,7 +1058,7 @@
one ended.
@@ -1054,7 +1071,7 @@
\*+aaa
@@ -1064,7 +1081,7 @@
followed by a sequence of zero or more combining characters.
@@ -1073,7 +1090,7 @@
sequence, specifically it is identical to the expression (?>\x0D\x0A?|[\x0A-\x0C\x85\x{2028}\x{2029}])
.
@@ -1088,7 +1105,7 @@
This can be used to simulate variable width lookbehind assertions.
@@ -1097,7 +1114,7 @@
\@ matches a literal '@'.
@@ -1105,15 +1122,37 @@
Perl-specific extensions to the regular expression syntax all start with
(?
.
+
+
+ You can create a named subexpression using:
+
+(?<NAME>expression)
+
+
+ Which can be then be refered to by the name NAME. Alternatively
+ you can delimit the name using 'NAME' as in:
+
+(?'NAME'expression)
+
+
+ These named subexpressions can be refered to in a backreference using either
+ \g{NAME}
or \k<NAME>
and can
+ also be refered to by name in a Perl
+ format string for search and replace operations, or in the match_results
member functions.
+
(?# ... )
is treated as a comment, it's contents are ignored.
@@ -1127,7 +1166,7 @@
pattern only.
@@ -1136,7 +1175,7 @@
an additional sub-expression.
@@ -1159,7 +1198,7 @@
could be used to validate the password.
@@ -1173,7 +1212,7 @@
(pattern must be of fixed length).
@@ -1186,7 +1225,7 @@
no match is found at all.
@@ -1205,7 +1244,7 @@
sub-expression has been matched).
@@ -1240,7 +1279,7 @@
@@ -1415,7 +1454,7 @@
@@ -1424,7 +1463,7 @@
and JScript
are all synonyms for perl
.
@@ -1436,7 +1475,7 @@
are to be applied.
@@ -1448,7 +1487,7 @@
and no_mod_s
.
diff --git a/doc/html/boost_regex/unicode.html b/doc/html/boost_regex/unicode.html
index a21286ca..b3ceab35 100644
--- a/doc/html/boost_regex/unicode.html
+++ b/doc/html/boost_regex/unicode.html
@@ -30,7 +30,7 @@
There are two ways to use Boost.Regex with Unicode strings:
@@ -56,7 +56,7 @@
diff --git a/doc/html/index.html b/doc/html/index.html
index 6945b093..af141e1e 100644
--- a/doc/html/index.html
+++ b/doc/html/index.html
@@ -28,7 +28,7 @@
Copyright © 1998 -2007 John Maddock
-Last revised: April 25, 2009 at 17:20:04 GMT |
+Last revised: May 06, 2009 at 16:25:16 GMT |
|
diff --git a/doc/match_result.qbk b/doc/match_result.qbk
index eb3861fd..8e1ae788 100644
--- a/doc/match_result.qbk
+++ b/doc/match_result.qbk
@@ -71,9 +71,33 @@ Class template `match_results` is most commonly used as one of the typedefs
bool ``[link boost_regex.match_results.empty empty]``() const;
// element access:
difference_type ``[link boost_regex.match_results.length length]``(int sub = 0) const;
+ difference_type ``[link boost_regex.match_results.length length]``(const char_type* sub) const;
+ template
+ difference_type ``[link boost_regex.match_results.length length]``(const charT* sub) const;
+ template
+ difference_type ``[link boost_regex.match_results.length length]``(const std::basic_string& sub) const;
difference_type ``[link boost_regex.match_results.position position]``(unsigned int sub = 0) const;
+ difference_type ``[link boost_regex.match_results.position position]``(const char_type* sub) const;
+ template
+ difference_type ``[link boost_regex.match_results.position position]``(const charT* sub) const;
+ template
+ difference_type ``[link boost_regex.match_results.position position]``(const std::basic_string& sub) const;
string_type ``[link boost_regex.match_results.str str]``(int sub = 0) const;
+ string_type ``[link boost_regex.match_results.str str]``(const char_type* sub)const;
+ template
+ string_type ``[link boost_regex.match_results.str str]``(const std::basic_string& sub)const;
+ template
+ string_type ``[link boost_regex.match_results.str str]``(const charT* sub)const;
+ template
+ string_type ``[link boost_regex.match_results.str str]``(const std::basic_string& sub)const;
const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(int n) const;
+ const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(const char_type* n) const;
+ template
+ const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(const std::basic_string& n) const;
+ template
+ const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(const charT* n) const;
+ template
+ const_reference ``[link boost_regex.match_results.subscript operator\[\]]``(const std::basic_string& n) const;
const_reference ``[link boost_regex.match_results.prefix prefix]``() const;
@@ -190,30 +214,86 @@ stored in *this.
[#boost_regex.match_results.length]
difference_type length(int sub = 0)const;
+ difference_type length(const char_type* sub)const;
+ template
+ difference_type length(const charT* sub)const;
+ template
+ difference_type length(const std::basic_string&)const;
[*Effects]: Returns the length of sub-expression /sub/, that is to say:
`(*this)[sub].length()`.
+The overloads that accept a string refer to a named sub-expression /n/.
+In the event that there is no such named sub-expression then returns an empty string.
+
+The template overloads of this function, allow the string and\/or character type
+to be different from the character type of the underlying sequence and\/or regular expression:
+in this case the characters will be widened to the underlying character type of the original regular expression.
+A compiler error will occur if the argument passes a wider character type than the underlying sequence.
+These overloads allow a normal narrow character C string literal to be used as an argument, even when
+the underlying character type of the expression being matched may be something more exotic such as a
+Unicode character type.
[#boost_regex.match_results.position]
difference_type position(unsigned int sub = 0)const;
+ difference_type position(const char_type* sub)const;
+ template
+ difference_type position(const charT* sub)const;
+ template
+ difference_type position(const std::basic_string&)const;
[*Effects]: Returns the starting location of sub-expression /sub/, or -1 if /sub/ was
not matched. Note that if this represents a partial match , then `position()`
will return the location of the partial match even though `(*this)[0].matched` is false.
+The overloads that accept a string refer to a named sub-expression /n/.
+In the event that there is no such named sub-expression then returns an empty string.
+
+The template overloads of this function, allow the string and\/or character type
+to be different from the character type of the underlying sequence and\/or regular expression:
+in this case the characters will be widened to the underlying character type of the original regular expression.
+A compiler error will occur if the argument passes a wider character type than the underlying sequence.
+These overloads allow a normal narrow character C string literal to be used as an argument, even when
+the underlying character type of the expression being matched may be something more exotic such as a
+Unicode character type.
+
[#boost_regex.match_results.str]
string_type str(int sub = 0)const;
+ string_type str(const char_type* sub)const;
+ template
+ string_type str(const std::basic_string& sub)const;
+ template
+ string_type str(const charT* sub)const;
+ template
+ string_type str(const std::basic_string& sub)const;
[*Effects]: Returns sub-expression /sub/ as a string: `string_type((*this)[sub])`.
+The overloads that accept a string, return the string that matched the named sub-expression /n/.
+In the event that there is no such named sub-expression then returns an empty string.
+
+The template overloads of this function, allow the string and\/or character type
+to be different from the character type of the underlying sequence and\/or regular expression:
+in this case the characters will be widened to the underlying character type of the original regular expression.
+A compiler error will occur if the argument passes a wider character type than the underlying sequence.
+These overloads allow a normal narrow character C string literal to be used as an argument, even when
+the underlying character type of the expression being matched may be something more exotic such as a
+Unicode character type.
+
[#boost_regex.match_results.subscript]
- const_reference operator[](int n) const;
+ const_reference operator[](int n) const;
+ const_reference operator[](const char_type* n) const;
+ template
+ const_reference operator[](const std::basic_string& n) const;
+ template
+ const_reference operator[](const charT* n) const;
+ template
+ const_reference operator[](const std::basic_string& n) const;
[*Effects]: Returns a reference to the [sub_match] object representing the character
sequence that matched marked sub-expression /n/. If `n == 0` then returns a
@@ -222,6 +302,19 @@ matched the whole regular expression. If /n/ is out of range, or if /n/ is an
unmatched sub-expression, then returns a [sub_match] object whose matched
member is false.
+The overloads that accept a string, return a reference to the [sub_match]
+object representing the character sequence that matched the named sub-expression /n/.
+In the event that there is no such named sub-expression then returns a [sub_match] object whose matched
+member is false.
+
+The template overloads of this function, allow the string and\/or character type
+to be different from the character type of the underlying sequence and\/or regular expression:
+in this case the characters will be widened to the underlying character type of the original regular expression.
+A compiler error will occur if the argument passes a wider character type than the underlying sequence.
+These overloads allow a normal narrow character C string literal to be used as an argument, even when
+the underlying character type of the expression being matched may be something more exotic such as a
+Unicode character type.
+
[#boost_regex.match_results.prefix]
diff --git a/doc/syntax_perl.qbk b/doc/syntax_perl.qbk
index a67bc771..ff14c703 100644
--- a/doc/syntax_perl.qbk
+++ b/doc/syntax_perl.qbk
@@ -185,8 +185,12 @@ You can also use the \g escape for the same function, for example:
parsing of the expression in cases like =\g{1}2= or for indexes higher than 9 as in =\g{1234}=]]
[[=\g-1=][Match whatever matched the last opened sub-expression]]
[[=\g{-2}=][Match whatever matched the last but one opened sub-expression]]
+[[=\g{one}=][Match whatever matched the sub-expression named "one"]]
]
+Finally the \k escape can be used to refer to named subexpressions, for example [^\k] will match
+whatever matched the subexpression named "two".
+
[h4 Alternation]
The =|= operator will match either of its arguments, so for example:
@@ -425,6 +429,21 @@ Any other escape sequence matches the character that is escaped, for example
Perl-specific extensions to the regular expression syntax all start with =(?=.
+[h5 Named Subexpressions]
+
+You can create a named subexpression using:
+
+ (?expression)
+
+Which can be then be refered to by the name /NAME/. Alternatively you can delimit the name
+using 'NAME' as in:
+
+ (?'NAME'expression)
+
+These named subexpressions can be refered to in a backreference using either [^\g{NAME}] or [^\k]
+and can also be refered to by name in a [perl_format] format string for search and replace operations, or in the
+[match_results] member functions.
+
[h5 Comments]
=(?# ... )= is treated as a comment, it's contents are ignored.
diff --git a/include/boost/regex/concepts.hpp b/include/boost/regex/concepts.hpp
index 0a22aebd..98fd5941 100644
--- a/include/boost/regex/concepts.hpp
+++ b/include/boost/regex/concepts.hpp
@@ -844,6 +844,42 @@ struct BoostRegexConcept
m_string = m_char + m_sub;
ignore_unused_variable_warning(m_string);
+ // Named sub-expressions:
+ m_sub = m_cresults[&m_char];
+ ignore_unused_variable_warning(m_sub);
+ m_sub = m_cresults[m_string];
+ ignore_unused_variable_warning(m_sub);
+ m_sub = m_cresults[""];
+ ignore_unused_variable_warning(m_sub);
+ m_sub = m_cresults[std::string("")];
+ ignore_unused_variable_warning(m_sub);
+ m_string = m_cresults.str(&m_char);
+ ignore_unused_variable_warning(m_string);
+ m_string = m_cresults.str(m_string);
+ ignore_unused_variable_warning(m_string);
+ m_string = m_cresults.str("");
+ ignore_unused_variable_warning(m_string);
+ m_string = m_cresults.str(std::string(""));
+ ignore_unused_variable_warning(m_string);
+
+ typename match_results_type::difference_type diff;
+ diff = m_cresults.length(&m_char);
+ ignore_unused_variable_warning(diff);
+ diff = m_cresults.length(m_string);
+ ignore_unused_variable_warning(diff);
+ diff = m_cresults.length("");
+ ignore_unused_variable_warning(diff);
+ diff = m_cresults.length(std::string(""));
+ ignore_unused_variable_warning(diff);
+ diff = m_cresults.position(&m_char);
+ ignore_unused_variable_warning(diff);
+ diff = m_cresults.position(m_string);
+ ignore_unused_variable_warning(diff);
+ diff = m_cresults.position("");
+ ignore_unused_variable_warning(diff);
+ diff = m_cresults.position(std::string(""));
+ ignore_unused_variable_warning(diff);
+
#ifndef BOOST_NO_STD_LOCALE
m_stream << m_sub;
m_stream << m_cresults;
diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp
index cb9ff3c5..aed79528 100644
--- a/include/boost/regex/v4/basic_regex.hpp
+++ b/include/boost/regex/v4/basic_regex.hpp
@@ -19,6 +19,8 @@
#ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP
#define BOOST_REGEX_V4_BASIC_REGEX_HPP
+#include
+
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4103)
@@ -44,12 +46,123 @@ namespace re_detail{
template
class basic_regex_parser;
+template
+void bubble_down_one(I first, I last)
+{
+ if(first != last)
+ {
+ I next = last - 1;
+ while((next != first) && !(*(next-1) < *next))
+ {
+ (next-1)->swap(*next);
+ --next;
+ }
+ }
+}
+
+//
+// Class named_subexpressions
+// Contains information about named subexpressions within the regex.
+//
+template
+class named_subexpressions_base
+{
+public:
+ virtual int get_id(const charT* i, const charT* j) = 0;
+};
+
+template
+class named_subexpressions : public named_subexpressions_base
+{
+ struct name
+ {
+ name(const charT* i, const charT* j, int idx)
+ : n(i, j), index(idx) {}
+ std::vector n;
+ int index;
+ bool operator < (const name& other)const
+ {
+ return std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end());
+ }
+ bool operator == (const name& other)const
+ {
+ return n == other.n;
+ }
+ void swap(name& other)
+ {
+ n.swap(other.n);
+ std::swap(index, other.index);
+ }
+ };
+public:
+ named_subexpressions(){}
+ void set_name(const charT* i, const charT* j, int index)
+ {
+ m_sub_names.push_back(name(i, j, index));
+ bubble_down_one(m_sub_names.begin(), m_sub_names.end());
+ }
+ int get_id(const charT* i, const charT* j)
+ {
+ name t(i, j, 0);
+ typename std::vector::const_iterator pos = lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
+ if((pos != m_sub_names.end()) && (*pos == t))
+ {
+ return pos->index;
+ }
+ return -1;
+ }
+private:
+ std::vector m_sub_names;
+};
+
+template
+class named_subexpressions_converter : public named_subexpressions_base
+{
+ boost::shared_ptr > m_converter;
+public:
+ named_subexpressions_converter(boost::shared_ptr > s)
+ : m_converter(s) {}
+ virtual int get_id(const charT* i, const charT* j)
+ {
+ if(i == j)
+ return -1;
+ std::vector v;
+ while(i != j)
+ {
+ v.push_back(*i);
+ ++i;
+ }
+ return m_converter->get_id(&v[0], &v[0] + v.size());
+ }
+};
+
+template
+inline boost::shared_ptr > convert_to_named_subs_imp(
+ boost::shared_ptr > s,
+ boost::integral_constant const&)
+{
+ return s;
+}
+template
+inline boost::shared_ptr > convert_to_named_subs_imp(
+ boost::shared_ptr > s,
+ boost::integral_constant const&)
+{
+ return boost::shared_ptr >(new named_subexpressions_converter(s));
+}
+template
+inline boost::shared_ptr > convert_to_named_subs(
+ boost::shared_ptr > s)
+{
+ typedef typename boost::is_same::type tag_type;
+ return convert_to_named_subs_imp(s, tag_type());
+}
//
// class regex_data:
// represents the data we wish to expose to the matching algorithms.
//
template
-struct regex_data
+struct regex_data : public named_subexpressions
{
typedef regex_constants::syntax_option_type flag_type;
typedef std::size_t size_type;
@@ -520,6 +633,10 @@ public:
BOOST_ASSERT(0 != m_pimpl.get());
return m_pimpl->get_data();
}
+ boost::shared_ptr > get_named_subs()const
+ {
+ return m_pimpl;
+ }
private:
shared_ptr > m_pimpl;
diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp
index 6431d16e..09777d20 100644
--- a/include/boost/regex/v4/basic_regex_parser.hpp
+++ b/include/boost/regex/v4/basic_regex_parser.hpp
@@ -777,6 +777,15 @@ escape_type_class_jump:
}
const charT* pc = m_position;
int i = this->m_traits.toi(pc, m_end, 10);
+ if(i < 0)
+ {
+ // Check for a named capture:
+ const charT* base = m_position;
+ while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
+ ++m_position;
+ i = this->m_pdata->get_id(base, m_position);
+ pc = m_position;
+ }
if(negative)
i = 1 + m_mark_count - i;
if((i > 0) && (this->m_backrefs & (1u << (i-1))))
@@ -1784,6 +1793,7 @@ bool basic_regex_parser::parse_perl_extension()
regex_constants::syntax_option_type old_flags = this->flags();
bool old_case_change = m_has_case_change;
m_has_case_change = false;
+ charT name_delim;
//
// select the actual extension used:
//
@@ -1825,8 +1835,10 @@ bool basic_regex_parser::parse_perl_extension()
pb->index = markid = -1;
else
{
- fail(regex_constants::error_badrepeat, m_position - m_base);
- return false;
+ // Probably a named capture which also starts (?< :
+ name_delim = '>';
+ --m_position;
+ goto named_capture_jump;
}
++m_position;
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
@@ -1903,7 +1915,7 @@ bool basic_regex_parser::parse_perl_extension()
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
{
- fail(regex_constants::error_badrepeat, m_position - m_base);
+ fail(regex_constants::error_paren, m_position - m_base);
return false;
}
m_position -= 2;
@@ -1914,6 +1926,40 @@ bool basic_regex_parser::parse_perl_extension()
case regex_constants::syntax_close_mark:
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
+ case regex_constants::escape_type_end_buffer:
+ {
+ name_delim = *m_position;
+named_capture_jump:
+ markid = 0;
+ if(0 == (this->flags() & regbase::nosubs))
+ {
+ markid = ++m_mark_count;
+ #ifndef BOOST_NO_STD_DISTANCE
+ if(this->flags() & regbase::save_subexpression_location)
+ this->m_pdata->m_subs.push_back(std::pair(std::distance(m_base, m_position) - 2, 0));
+ #else
+ if(this->flags() & regbase::save_subexpression_location)
+ this->m_pdata->m_subs.push_back(std::pair((m_position - m_base) - 2, 0));
+ #endif
+ }
+ pb->index = markid;
+ const charT* base = ++m_position;
+ if(m_position == m_end)
+ {
+ fail(regex_constants::error_paren, m_position - m_base);
+ return false;
+ }
+ while((m_position != m_end) && (*m_position != name_delim))
+ ++m_position;
+ if(m_position == m_end)
+ {
+ fail(regex_constants::error_paren, m_position - m_base);
+ return false;
+ }
+ this->m_pdata->set_name(base, m_position, markid);
+ ++m_position;
+ break;
+ }
default:
//
// lets assume that we have a (?imsx) group and try and parse it:
@@ -2043,6 +2089,22 @@ bool basic_regex_parser::parse_perl_extension()
// and the case change data:
//
m_has_case_change = old_case_change;
+
+ if(markid > 0)
+ {
+#ifndef BOOST_NO_STD_DISTANCE
+ if(this->flags() & regbase::save_subexpression_location)
+ this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
+#else
+ if(this->flags() & regbase::save_subexpression_location)
+ this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
+#endif
+ //
+ // allow backrefs to this mark:
+ //
+ if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
+ this->m_backrefs |= 1u << (markid - 1);
+ }
return true;
}
diff --git a/include/boost/regex/v4/match_results.hpp b/include/boost/regex/v4/match_results.hpp
index 5642508b..09dd31f0 100644
--- a/include/boost/regex/v4/match_results.hpp
+++ b/include/boost/regex/v4/match_results.hpp
@@ -36,6 +36,13 @@ namespace boost{
#pragma warning(disable : 4251 4231 4660)
#endif
+namespace re_detail{
+
+template
+class named_subexpressions;
+
+}
+
template
class match_results
{
@@ -62,13 +69,14 @@ public:
typedef typename re_detail::regex_iterator_traits<
BidiIterator>::value_type char_type;
typedef std::basic_string string_type;
+ typedef re_detail::named_subexpressions_base named_sub_type;
// construct/copy/destroy:
explicit match_results(const Allocator& a = Allocator())
#ifndef BOOST_NO_STD_ALLOCATOR
- : m_subs(a), m_base() {}
+ : m_subs(a), m_base(), m_last_closed_paren(0) {}
#else
- : m_subs(), m_base() { (void)a; }
+ : m_subs(), m_base(), m_last_closed_paren(0) { (void)a; }
#endif
match_results(const match_results& m)
: m_subs(m.m_subs), m_base(m.m_base) {}
@@ -95,6 +103,24 @@ public:
return m_subs[sub].length();
return 0;
}
+ difference_type length(const char_type* sub) const
+ {
+ const char_type* end = sub;
+ while(*end) ++end;
+ return length(named_subexpression_index(sub, end));
+ }
+ template
+ difference_type length(const charT* sub) const
+ {
+ const charT* end = sub;
+ while(*end) ++end;
+ return length(named_subexpression_index(sub, end));
+ }
+ template
+ difference_type length(const std::basic_string& sub) const
+ {
+ return length(sub.c_str());
+ }
difference_type position(size_type sub = 0) const
{
sub += 2;
@@ -108,6 +134,24 @@ public:
}
return ~static_cast(0);
}
+ difference_type position(const char_type* sub) const
+ {
+ const char_type* end = sub;
+ while(*end) ++end;
+ return position(named_subexpression_index(sub, end));
+ }
+ template
+ difference_type position(const charT* sub) const
+ {
+ const charT* end = sub;
+ while(*end) ++end;
+ return position(named_subexpression_index(sub, end));
+ }
+ template
+ difference_type position(const std::basic_string& sub) const
+ {
+ return position(sub.c_str());
+ }
string_type str(int sub = 0) const
{
sub += 2;
@@ -122,6 +166,25 @@ public:
}
return result;
}
+ string_type str(const char_type* sub) const
+ {
+ return (*this)[sub].str();
+ }
+ template
+ string_type str(const std::basic_string& sub) const
+ {
+ return (*this)[sub].str();
+ }
+ template
+ string_type str(const charT* sub) const
+ {
+ return (*this)[sub].str();
+ }
+ template
+ string_type str(const std::basic_string& sub) const
+ {
+ return (*this)[sub].str();
+ }
const_reference operator[](int sub) const
{
sub += 2;
@@ -131,6 +194,75 @@ public:
}
return m_null;
}
+ //
+ // Named sub-expressions:
+ //
+ const_reference named_subexpression(const char_type* i, const char_type* j) const
+ {
+ int index = m_named_subs->get_id(i, j);
+ return index > 0 ? (*this)[index] : m_null;
+ }
+ template
+ const_reference named_subexpression(const charT* i, const charT* j) const
+ {
+ BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type));
+ if(i == j)
+ return m_null;
+ std::vector s;
+ while(i != j)
+ s.insert(s.end(), *i++);
+ return named_subexpression(&*s.begin(), &*s.begin() + s.size());
+ }
+ int named_subexpression_index(const char_type* i, const char_type* j) const
+ {
+ int index = m_named_subs->get_id(i, j);
+ return index > 0 ? index : -20;
+ }
+ template
+ int named_subexpression_index(const charT* i, const charT* j) const
+ {
+ BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type));
+ if(i == j)
+ return -20;
+ std::vector s;
+ while(i != j)
+ s.insert(s.end(), *i++);
+ return named_subexpression_index(&*s.begin(), &*s.begin() + s.size());
+ }
+ template
+ const_reference operator[](const std::basic_string& s) const
+ {
+ return named_subexpression(s.c_str(), s.c_str() + s.size());
+ }
+ const_reference operator[](const char_type* p) const
+ {
+ const char_type* e = p;
+ while(*e) ++e;
+ return named_subexpression(p, e);
+ }
+
+ template
+ const_reference operator[](const charT* p) const
+ {
+ BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type));
+ if(*p == 0)
+ return m_null;
+ std::vector s;
+ while(*p)
+ s.insert(s.end(), *p++);
+ return named_subexpression(&*s.begin(), &*s.begin() + s.size());
+ }
+ template
+ const_reference operator[](const std::basic_string& ns) const
+ {
+ BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type));
+ if(ns.empty())
+ return m_null;
+ std::vector s;
+ for(unsigned i = 0; i < ns.size(); ++i)
+ s.insert(s.end(), ns[i]);
+ return named_subexpression(&*s.begin(), &*s.begin() + s.size());
+ }
const_reference prefix() const
{
@@ -186,6 +318,10 @@ public:
::boost::re_detail::regex_format_imp(i, *this, fmt.data(), fmt.data() + fmt.size(), flags, re.get_traits());
return result;
}
+ const_reference get_last_closed_paren()const
+ {
+ return m_last_closed_paren == 0 ? m_null : (*this)[m_last_closed_paren];
+ }
allocator_type get_allocator() const
{
@@ -232,6 +368,8 @@ public:
void BOOST_REGEX_CALL set_second(BidiIterator i, size_type pos, bool m = true, bool escape_k = false)
{
+ if(pos)
+ m_last_closed_paren = pos;
pos += 2;
BOOST_ASSERT(m_subs.size() > pos);
m_subs[pos].second = i;
@@ -261,6 +399,7 @@ public:
m_subs.insert(m_subs.end(), n+2-len, v);
}
m_subs[1].first = i;
+ m_last_closed_paren = 0;
}
void BOOST_REGEX_CALL set_base(BidiIterator pos)
{
@@ -301,11 +440,17 @@ public:
}
void BOOST_REGEX_CALL maybe_assign(const match_results& m);
+ void BOOST_REGEX_CALL set_named_subs(boost::shared_ptr subs)
+ {
+ m_named_subs = subs;
+ }
private:
vector_type m_subs; // subexpressions
BidiIterator m_base; // where the search started from
sub_match m_null; // a null match
+ boost::shared_ptr m_named_subs;
+ int m_last_closed_paren;
};
template
diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp
index 1be1af6f..09b0a9bb 100644
--- a/include/boost/regex/v4/perl_matcher_common.hpp
+++ b/include/boost/regex/v4/perl_matcher_common.hpp
@@ -200,6 +200,7 @@ bool perl_matcher::match_imp()
m_match_flags |= regex_constants::match_all;
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
m_presult->set_base(base);
+ m_presult->set_named_subs(re_detail::convert_to_named_subs::char_type>(this->re.get_named_subs()));
if(m_match_flags & match_posix)
m_result = *m_presult;
verify_options(re.flags(), m_match_flags);
@@ -261,6 +262,7 @@ bool perl_matcher::find_imp()
pstate = re.get_first_state();
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last);
m_presult->set_base(base);
+ m_presult->set_named_subs(re_detail::convert_to_named_subs::char_type>(this->re.get_named_subs()));
m_match_flags |= regex_constants::match_init;
}
else
diff --git a/include/boost/regex/v4/regex_format.hpp b/include/boost/regex/v4/regex_format.hpp
index d114c2ed..fcfd9dc3 100644
--- a/include/boost/regex/v4/regex_format.hpp
+++ b/include/boost/regex/v4/regex_format.hpp
@@ -107,6 +107,7 @@ private:
void format_escape();
void format_conditional();
void format_until_scope_end();
+ bool handle_perl_verb(bool have_brace);
const traits& m_traits; // the traits class for localised formatting operations
const Results& m_results; // the match_results being used.
@@ -250,6 +251,25 @@ void basic_regex_formatter::format_perl()
case '$':
put(*m_position++);
break;
+ case '+':
+ if((++m_position != m_end) && (*m_position == '{'))
+ {
+ const char_type* base = ++m_position;
+ while((m_position != m_end) && (*m_position != '}')) ++m_position;
+ if(m_position != m_end)
+ {
+ // Named sub-expression:
+ put(this->m_results.named_subexpression(base, m_position));
+ ++m_position;
+ break;
+ }
+ else
+ {
+ m_position = --base;
+ }
+ }
+ put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]);
+ break;
case '{':
have_brace = true;
++m_position;
@@ -258,14 +278,18 @@ void basic_regex_formatter::format_perl()
// see if we have a number:
{
std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end);
- len = (std::min)(static_cast(2), len);
+ //len = (std::min)(static_cast(2), len);
int v = m_traits.toi(m_position, m_position + len, 10);
if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}'))))
{
- // leave the $ as is, and carry on:
- m_position = --save_position;
- put(*m_position);
- ++m_position;
+ // Look for a Perl-5.10 verb:
+ if(!handle_perl_verb(have_brace))
+ {
+ // leave the $ as is, and carry on:
+ m_position = --save_position;
+ put(*m_position);
+ ++m_position;
+ }
break;
}
// otherwise output sub v:
@@ -276,6 +300,123 @@ void basic_regex_formatter::format_perl()
}
}
+template
+bool basic_regex_formatter::handle_perl_verb(bool have_brace)
+{
+ //
+ // We may have a capitalised string containing a Perl action:
+ //
+ static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' };
+ static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' };
+ static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' };
+ static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' };
+ static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' };
+ static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' };
+
+ if(have_brace && (*m_position == '^'))
+ ++m_position;
+
+ int max_len = m_end - m_position;
+
+ if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH))
+ {
+ m_position += 5;
+ if(have_brace)
+ {
+ if(*m_position == '}')
+ ++m_position;
+ else
+ {
+ m_position -= 5;
+ return false;
+ }
+ }
+ put(this->m_results[0]);
+ return true;
+ }
+ if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH))
+ {
+ m_position += 8;
+ if(have_brace)
+ {
+ if(*m_position == '}')
+ ++m_position;
+ else
+ {
+ m_position -= 8;
+ return false;
+ }
+ }
+ put(this->m_results.prefix());
+ return true;
+ }
+ if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH))
+ {
+ m_position += 9;
+ if(have_brace)
+ {
+ if(*m_position == '}')
+ ++m_position;
+ else
+ {
+ m_position -= 9;
+ return false;
+ }
+ }
+ put(this->m_results.suffix());
+ return true;
+ }
+ if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH))
+ {
+ m_position += 16;
+ if(have_brace)
+ {
+ if(*m_position == '}')
+ ++m_position;
+ else
+ {
+ m_position -= 16;
+ return false;
+ }
+ }
+ put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]);
+ return true;
+ }
+ if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT))
+ {
+ m_position += 20;
+ if(have_brace)
+ {
+ if(*m_position == '}')
+ ++m_position;
+ else
+ {
+ m_position -= 20;
+ return false;
+ }
+ }
+ put(this->m_results.get_last_closed_paren());
+ return true;
+ }
+ if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT))
+ {
+ m_position += 2;
+ if(have_brace)
+ {
+ if(*m_position == '}')
+ ++m_position;
+ else
+ {
+ m_position -= 2;
+ return false;
+ }
+ }
+ put(this->m_results.get_last_closed_paren());
+ return true;
+ }
+ return false;
+}
+
template
void basic_regex_formatter::format_escape()
{
diff --git a/include/boost/regex/v4/regex_traits_defaults.hpp b/include/boost/regex/v4/regex_traits_defaults.hpp
index c213889c..55529c5e 100644
--- a/include/boost/regex/v4/regex_traits_defaults.hpp
+++ b/include/boost/regex/v4/regex_traits_defaults.hpp
@@ -326,9 +326,9 @@ inline const charT* get_escape_R_string()
#endif
static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?',
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}',
- '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')' };
+ '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' };
static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
- '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')' };
+ '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
charT c = static_cast(0x2029u);
bool b = (static_cast(c) == 0x2029u);
diff --git a/src/regex_traits_defaults.cpp b/src/regex_traits_defaults.cpp
index 96ea0b3d..c9596a3d 100644
--- a/src/regex_traits_defaults.cpp
+++ b/src/regex_traits_defaults.cpp
@@ -537,7 +537,7 @@ BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_synta
regex_constants::syntax_dollar, /*$*/
regex_constants::syntax_char, /*%*/
regex_constants::syntax_char, /*&*/
- regex_constants::syntax_char, /*'*/
+ regex_constants::escape_type_end_buffer, /*'*/
regex_constants::syntax_open_mark, /*(*/
regex_constants::syntax_close_mark, /*)*/
regex_constants::syntax_star, /***/
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
index 4d1a2967..40847731 100644
--- a/test/Jamfile.v2
+++ b/test/Jamfile.v2
@@ -87,6 +87,10 @@ test-suite regex
../build//boost_regex
]
+ [ run named_subexpressions/named_subexpressions_test.cpp
+ ../build//boost_regex
+ ]
+
[ run unicode/unicode_iterator_test.cpp ../build//boost_regex ]
[ run static_mutex/static_mutex_test.cpp
../../thread/build//boost_thread ../build//boost_regex
diff --git a/test/named_subexpressions/named_subexpressions_test.cpp b/test/named_subexpressions/named_subexpressions_test.cpp
new file mode 100644
index 00000000..41011415
--- /dev/null
+++ b/test/named_subexpressions/named_subexpressions_test.cpp
@@ -0,0 +1,109 @@
+/*
+ *
+ * Copyright (c) 2009
+ * John Maddock
+ *
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0. (See accompanying file
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ */
+
+#include
+#include
+
+
+template
+void test_named_subexpressions(charT)
+{
+ //
+ // Really this is just a test that the overloaded access functions work correctly:
+ //
+ static const charT e[] =
+ {
+ '(', '?', '\'', 'o', 'n', 'e', '\'', 'a', '+', ')', '(', '?', '<', 't', 'w', 'o', '>', 'b', '+', ')', '\0'
+ };
+ static const charT t[] =
+ {
+ 'm', 'm', 'a', 'a', 'a', 'b', 'b', 'n', 'n', '\0'
+ };
+ static const charT one[] =
+ {
+ 'o', 'n', 'e', '\0'
+ };
+ static const charT two[] =
+ {
+ 't', 'w', 'o', '\0'
+ };
+ static const std::basic_string s_one(one);
+ static const std::basic_string s_two(two);
+ static const charT result1[] = { 'a', 'a', 'a', '\0' };
+ static const charT result2[] = { 'b', 'b', '\0' };
+ static const std::basic_string s_result1(result1);
+ static const std::basic_string s_result2(result2);
+
+ static const char* c_one = "one";
+ static const char* c_two = "two";
+ static const std::string cs_one(c_one);
+ static const std::string cs_two(c_two);
+
+ boost::basic_regex expression(e);
+ boost::match_results what;
+ if(regex_search(t, what, expression))
+ {
+ BOOST_CHECK(what.length(1) == 3);
+ BOOST_CHECK(what.length(one) == 3);
+ BOOST_CHECK(what.length(s_one) == 3);
+ BOOST_CHECK(what.length(c_one) == 3);
+ BOOST_CHECK(what.length(cs_one) == 3);
+ BOOST_CHECK(what.position(1) == 2);
+ BOOST_CHECK(what.position(one) == 2);
+ BOOST_CHECK(what.position(s_one) == 2);
+ BOOST_CHECK(what.position(c_one) == 2);
+ BOOST_CHECK(what.position(cs_one) == 2);
+ BOOST_CHECK(what.str(1) == s_result1);
+ BOOST_CHECK(what.str(one) == s_result1);
+ BOOST_CHECK(what.str(s_one) == s_result1);
+ BOOST_CHECK(what.str(c_one) == s_result1);
+ BOOST_CHECK(what.str(cs_one) == s_result1);
+ BOOST_CHECK(what[1] == s_result1);
+ BOOST_CHECK(what[one] == s_result1);
+ BOOST_CHECK(what[s_one] == s_result1);
+ BOOST_CHECK(what[c_one] == s_result1);
+ BOOST_CHECK(what[cs_one] == s_result1);
+
+ BOOST_CHECK(what.length(2) == 2);
+ BOOST_CHECK(what.length(two) == 2);
+ BOOST_CHECK(what.length(s_two) == 2);
+ BOOST_CHECK(what.length(c_two) == 2);
+ BOOST_CHECK(what.length(cs_two) == 2);
+ BOOST_CHECK(what.position(2) == 5);
+ BOOST_CHECK(what.position(two) == 5);
+ BOOST_CHECK(what.position(s_two) == 5);
+ BOOST_CHECK(what.position(c_two) == 5);
+ BOOST_CHECK(what.position(cs_two) == 5);
+ BOOST_CHECK(what.str(2) == s_result2);
+ BOOST_CHECK(what.str(two) == s_result2);
+ BOOST_CHECK(what.str(s_two) == s_result2);
+ BOOST_CHECK(what.str(c_two) == s_result2);
+ BOOST_CHECK(what.str(cs_two) == s_result2);
+ BOOST_CHECK(what[2] == s_result2);
+ BOOST_CHECK(what[two] == s_result2);
+ BOOST_CHECK(what[s_two] == s_result2);
+ BOOST_CHECK(what[c_two] == s_result2);
+ BOOST_CHECK(what[cs_two] == s_result2);
+ }
+ else
+ {
+ BOOST_ERROR("Expected match not found");
+ }
+}
+
+int test_main( int , char* [] )
+{
+ test_named_subexpressions(char(0));
+ test_named_subexpressions(wchar_t(0));
+ return 0;
+}
+
+#include
diff --git a/test/regress/test_backrefs.cpp b/test/regress/test_backrefs.cpp
index e5c254ff..58f4dedb 100644
--- a/test/regress/test_backrefs.cpp
+++ b/test/regress/test_backrefs.cpp
@@ -90,5 +90,18 @@ void test_backrefs()
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(.)\\g{-1}", perl, "abc", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a([bc])\\g{-1}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
+
+ // And again but with named subexpressions:
+ TEST_REGEX_SEARCH("a(?(?(?(?b*))))c\\g{foo}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, 1, 3, 1, 3, 1, 3, -2, -2));
+ TEST_REGEX_SEARCH("a(?(?(?(?b*))))c\\g{foo}d", perl, "abbcbd", match_default, make_array(-2, -2));
+ TEST_REGEX_SEARCH("a(?(?(?(?b*))))c\\g{foo}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
+ TEST_REGEX_SEARCH("^(?.)\\g{foo}", perl, "abc", match_default, make_array(-2, -2));
+ TEST_REGEX_SEARCH("a(?[bc])\\g{foo}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
+
+ TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, 1, 3, 1, 3, 1, 3, -2, -2));
+ TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbd", match_default, make_array(-2, -2));
+ TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
+ TEST_REGEX_SEARCH("^(?'foo'.)\\g{foo}", perl, "abc", match_default, make_array(-2, -2));
+ TEST_REGEX_SEARCH("a(?'foo'[bc])\\g{foo}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
}
diff --git a/test/regress/test_deprecated.cpp b/test/regress/test_deprecated.cpp
index af50ec76..84eefb9f 100644
--- a/test/regress/test_deprecated.cpp
+++ b/test/regress/test_deprecated.cpp
@@ -107,7 +107,7 @@ void test_deprecated(const char&, const test_regex_search_tag&)
int i = 0;
while(results[2*i] != -2)
{
- if(max_subs > i)
+ if((int)max_subs > i)
{
if(results[2*i] != matches[i].rm_so)
{
@@ -231,7 +231,7 @@ void test_deprecated(const wchar_t&, const test_regex_search_tag&)
int i = 0;
while(results[2*i] != -2)
{
- if(max_subs > i)
+ if((int)max_subs > i)
{
if(results[2*i] != matches[i].rm_so)
{
diff --git a/test/regress/test_escapes.cpp b/test/regress/test_escapes.cpp
index 21d4262f..d2dbbe43 100644
--- a/test/regress/test_escapes.cpp
+++ b/test/regress/test_escapes.cpp
@@ -156,7 +156,7 @@ void test_assertion_escapes()
TEST_REGEX_SEARCH("\\R", perl, "foo\rbar", match_default, make_array(3, 4, -2, -2));
TEST_REGEX_SEARCH("\\R", perl, "foo\r\nbar", match_default, make_array(3, 5, -2, -2));
// see if \u works:
- const wchar_t* w = "\u2028";
+ const wchar_t* w = L"\u2028";
if(*w == 0x2028u)
{
TEST_REGEX_SEARCH_W(L"\\R", perl, L"foo\u2028bar", match_default, make_array(3, 4, -2, -2));
diff --git a/test/regress/test_replace.cpp b/test/regress/test_replace.cpp
index e2acf380..caf2c2e3 100644
--- a/test/regress/test_replace.cpp
+++ b/test/regress/test_replace.cpp
@@ -126,5 +126,53 @@ void test_replace()
TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default, "/${10}/", "...//,,,");
TEST_REGEX_REPLACE("((((((((((a+))))))))))", perl, "...aaa,,,", match_default, "/${10}/", ".../aaa/,,,");
TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default, "/${1}0/", ".../aaa0/,,,");
+
+ // New Perl style operators:
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$MATCH", "aaa");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${MATCH}", "aaa");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${^MATCH}", "aaa");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$MATC", "$MATC");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${MATCH", "${MATCH");
+
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$PREMATCH", "...");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${PREMATCH}", "...");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${^PREMATCH}", "...");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$PREMATC", "$PREMATC");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${PREMATCH", "${PREMATCH");
+
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$POSTMATCH", ",,,");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${POSTMATCH}", ",,,");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${^POSTMATCH}", ",,,");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$POSTMATC", "$POSTMATC");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "${POSTMATCH", "${POSTMATCH");
+
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_PAREN_MATCH", "");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_PAREN_MATC", "$LAST_PAREN_MATC");
+ TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_PAREN_MATCH", "aaa");
+ TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$LAST_PAREN_MATCH", "bb");
+
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$+", "");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$+foo", "foo");
+ TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default|format_no_copy, "$+", "aaa");
+ TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$+foo", "bbfoo");
+ TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$+{", "bb{");
+ TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$+{foo", "bb{foo");
+
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESULT", "");
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESUL", "$LAST_SUBMATCH_RESUL");
+ TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESULT", "aaa");
+ TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESULT", "bb");
+ TEST_REGEX_REPLACE("(a+)|(b+)", perl, "...aaa,,,", match_default|format_no_copy, "$LAST_SUBMATCH_RESULT", "aaa");
+
+ TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_no_copy, "$^N", "");
+ TEST_REGEX_REPLACE("(a+)", perl, "...aaa,,,", match_default|format_no_copy, "$^N", "aaa");
+ TEST_REGEX_REPLACE("(a+)(b+)", perl, "...aaabb,,,", match_default|format_no_copy, "$^N", "bb");
+ TEST_REGEX_REPLACE("(a+)|(b+)", perl, "...aaa,,,", match_default|format_no_copy, "$^N", "aaa");
+
+ TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "$&", "aabb");
+ TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "$1", "aa");
+ TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "$2", "bb");
+ TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "d$+{one}c", "daac");
+ TEST_REGEX_REPLACE("(?a+)(?b+)", perl, " ...aabb,,", match_default|format_no_copy, "c$+{two}d", "cbbd");
}