diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index 854ad300..01b14e26 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -192,11 +192,19 @@ if [ check-icu-config ] if $(gHAS_ICU) { BOOST_REGEX_ICU_OPTS = "freebsd:/usr/local/include" ; - ICU_SEARCH_OPTS = "freebsd:/$(ICU_PATH)/lib" ; + ICU_SEARCH_OPTS = "freebsd:$(ICU_PATH)/lib" ; BOOST_REGEX_ICU_OPTS += "BOOST_HAS_ICU=1" ; + BOOST_REGEX_ICU_OPTS += "$(gICU_LIBS)" ; if $(ICU_PATH) { + # If ICU_PATH is specified on the command line, then it's + # relative to the current directory, while paths specified + # in a Jamfile are relative to that Jamfile. So, to + # avoid confusing the user if he's not running from + # libs/regex/build, explicitly root this. + ICU_PATH = [ path.native + [ path.root [ path.make $(ICU_PATH) ] [ path.pwd ] ] ] ; if $(ICU_PATH) != "/usr" && $(ICU_PATH) != "/usr/local" { BOOST_REGEX_ICU_OPTS += "$(ICU_PATH)/include" ; diff --git a/doc/history.qbk b/doc/history.qbk index 360f8922..d21de129 100644 --- a/doc/history.qbk +++ b/doc/history.qbk @@ -8,6 +8,27 @@ [section:history History] +New issues should be submitted at [@http:svn.boost.org svn.boost.org] - don't forget to include your +email address in the ticket! + +Currently open issues can be viewed [@https://svn.boost.org/trac/boost/query?status=assigned&status=new&status=reopened&component=regex&order=priority&col=id&col=summary&col=status&col=type&col=milestone&col=component here]. + +All issues including closed ones can be viewed [@https://svn.boost.org/trac/boost/query?status=assigned&status=closed&status=new&status=reopened&component=regex&order=priority&col=id&col=summary&col=status&col=type&col=milestone&col=component here]. + +[h4 Boost 1.42] + +* Added support for Functors rather than strings as format expressions. +* Improved error reporting when throwing exceptions to include better more relevant information. +* Fixed tickets +[@https://svn.boost.org/trac/boost/ticket/2802 #2802], +[@https://svn.boost.org/trac/boost/ticket/3425 #3425], +[@https://svn.boost.org/trac/boost/ticket/3507 #3507], +[@https://svn.boost.org/trac/boost/ticket/3546 #3546], +[@https://svn.boost.org/trac/boost/ticket/3631 #3631], +[@https://svn.boost.org/trac/boost/ticket/3632 #3632], +[@https://svn.boost.org/trac/boost/ticket/3715 #3715], +[@https://svn.boost.org/trac/boost/ticket/3718 #3718] + [h4 Boost 1.40] * Added support for many Perl 5.10 syntax elements including named diff --git a/doc/html/boost_regex/background_information.html b/doc/html/boost_regex/background_information.html index 76048eed..897462fa 100644 --- a/doc/html/boost_regex/background_information.html +++ b/doc/html/boost_regex/background_information.html @@ -1,6 +1,6 @@ - + Background Information @@ -47,7 +47,7 @@ -
-
- + Test Programs
@@ -107,7 +107,7 @@ Files: captures_test.cpp.

- + Example programs
@@ -133,7 +133,7 @@ Files: regex_timer.cpp.

- + Code snippets
@@ -217,7 +217,7 @@ -
-
-
-
- + Boost 1.38
@@ -62,7 +96,7 @@
- + Boost 1.34
@@ -85,7 +119,7 @@
- + Boost 1.33.1
@@ -155,7 +189,7 @@
- + Boost 1.33.0
@@ -210,7 +244,7 @@
- + Boost 1.32.1
@@ -218,7 +252,7 @@ Fixed bug in partial matches of bounded repeats of '.'.
- + Boost 1.31.0
@@ -256,7 +290,7 @@ - - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + +
      
@@ -1323,9 +1323,9 @@ "Invalid content of \{\}"

      
@@ -1343,9 +1343,9 @@ "Invalid range end"

      
@@ -1363,9 +1363,9 @@ "Memory exhausted"

      
@@ -1383,9 +1383,9 @@ "Invalid preceding regular expression"

      
@@ -1403,9 +1403,9 @@ "Premature end of regular expression"

      
@@ -1423,9 +1423,9 @@ "Regular expression too big"

      
@@ -1443,9 +1443,9 @@ "Unmatched ) or \)"

      
@@ -1463,9 +1463,9 @@ "Empty expression"

      
@@ -1483,9 +1483,9 @@ "Unknown error"

      
@@ -1775,7 +1775,7 @@ -
-
- @@ -296,7 +296,7 @@ Not supported.

- +
- + C++

@@ -36,7 +36,7 @@ Report on C++ Library Extensions.

- + ECMAScript / JavaScript
@@ -49,7 +49,7 @@ rather than a Unicode escape sequence; use \x{DDDD} for Unicode escape sequences.

- + Perl

@@ -62,7 +62,7 @@ (??{code}) Not implementable in a compiled strongly typed language.

- + POSIX

@@ -82,7 +82,7 @@ a custom traits class.

- + Unicode

@@ -209,7 +209,7 @@

Supported, note that at this level, case transformations are 1:1, many to many case folding operations are not supported (for example - "ß" to "SS"). + "ß" to "SS").

  
@@ -549,7 +549,7 @@ -
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- + Synopsis
#include <boost/pattern_except.hpp>
@@ -54,7 +54,7 @@
 } // namespace boost
 
- + Description
regex_error(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos);
@@ -89,7 +89,7 @@
 
 
-
- + Synopsis
#include <boost/regex.hpp>
@@ -244,7 +244,7 @@
 } // namespace boost
 
- + Description

@@ -327,7 +327,7 @@ basic_regex.

-

Table 1. basic_regex default construction postconditions

+

Table 1. basic_regex default construction postconditions

@@ -407,7 +407,7 @@ flags specified in f.

-

Table 2. Postconditions for basic_regex construction

+

Table 2. Postconditions for basic_regex construction

@@ -512,7 +512,7 @@ specified in f.

-

Table 3. Postconditions for basic_regex construction

+

Table 3. Postconditions for basic_regex construction

@@ -616,7 +616,7 @@ according the option flags specified in f.

-

Table 4. Postconditions for basic_regex construction

+

Table 4. Postconditions for basic_regex construction

@@ -727,7 +727,7 @@ flags specified in f.

-

Table 5. Postconditions for basic_regex construction

+

Table 5. Postconditions for basic_regex construction

@@ -829,7 +829,7 @@ flags specified in f.

-

Table 6. Postconditions for basic_regex construction

+

Table 6. Postconditions for basic_regex construction

@@ -1043,7 +1043,7 @@ in f.

-

Table 7. Postconditions for basic_regex::assign

+

Table 7. Postconditions for basic_regex::assign

@@ -1319,7 +1319,7 @@
-
-
-
-
- + Additional Optional Requirements
@@ -579,7 +579,7 @@ -
- @@ -167,7 +157,7 @@
-
-
template <class iterator, class Allocator, class charT>
+
template <class iterator, class Allocator, class Formatter>
 std::basic_string<charT> regex_format
                                  (const match_results<iterator, Allocator>& m, 
-                                 const charT* fmt,
-                                 match_flag_type flags = 0);
-
-template <class iterator, class Allocator, class charT>
-std::basic_string<charT> regex_format
-                                 (const match_results<iterator, Allocator>& m, 
-                                 const std::basic_string<charT>& fmt,
+                                 Formatter fmt,
                                  match_flag_type flags = 0);
 

@@ -133,13 +122,14 @@

- const charT* fmt + Formatter fmt

- A format string that determines how the match is transformed into - the new string. + Either a format string that determines how the match is transformed + into the new string, or a functor that computes the new string + from m - see match_results<>::format.

-
-
-
- + Synopsis

@@ -57,7 +57,7 @@ } // namespace boost

- + Description

@@ -257,7 +257,7 @@ -
-
- + Synopsis
#include <boost/regex.hpp>
@@ -132,11 +132,12 @@
    const_iterator begin() const;
    const_iterator end() const;
    // format:
-   template <class OutputIterator>
+   template <class OutputIterator, class Formatter>
    OutputIterator format(OutputIterator out,
-                        const string_type& fmt,
+                        Formatter fmt,
                         match_flag_type flags = format_default) const;
-   string_type format(const string_type& fmt,
+   template <class Formatter>
+   string_type format(Formatter fmt,
                      match_flag_type flags = format_default) const;
 
    allocator_type get_allocator() const;
@@ -166,7 +167,7 @@
          match_results<BidirectionalIterator, Allocator>& m2);
 
- + Description

@@ -558,18 +559,32 @@

-
template <class OutputIterator>
+
template <class OutputIterator, class Formatter>
 OutputIterator format(OutputIterator out,
-                     const string_type& fmt,
-                     match_flag_type flags = format_default);
+                      Formatter fmt,
+                      match_flag_type flags = format_default);
 

Requires: The type OutputIterator conforms to the Output Iterator requirements (C++ std 24.1.2).

- Effects: Copies the character sequence - [fmt.begin(), fmt.end()) + The type Formatter must be + either a pointer to a null-terminated string of type char_type[], or be a container of char_type's + (for example std::basic_string<char_type>) + or be a unary, binary or ternary functor that computes the replacement string + from a function call: either fmt(*this) + which must return a container of char_type's + to be used as the replacement text, or either fmt(*this, + out) + or fmt(*this, out, flags), both of which write the replacement text + to *out, + and then return the new OutputIterator position. +

+

+ Effects: If fmt + is either a null-terminated string, or a container of char_type's, + then copies the character sequence [fmt.begin(), fmt.end()) to OutputIterator out. For each format specifier or escape sequence in fmt, replace that sequence with either the character(s) it represents, or the @@ -578,6 +593,27 @@ by default this is the format used by ECMA-262, ECMAScript Language Specification, Chapter 15 part 5.4.11 String.prototype.replace.

+

+ If fmt is a function object, + then depending on the number of arguments the function object accepts, it + will either: +

+
    +
  • + Call fmt(*this) and + copy the result to OutputIteratorout. +
  • +
  • + Call fmt(*this, out). +
  • +
  • + Call fmt(*this, out, flags). +
  • +
+

+ In all cases the new position of the OutputIterator + is returned. +

See the format syntax guide for more information.

@@ -586,18 +622,57 @@

-
string_type format(const string_type& fmt,
-                  match_flag_type flags = format_default);
+
template <class Formatter>
+string_type format(Formatter fmt,
+                   match_flag_type flags = format_default);
 

- Effects: Returns a copy of the string fmt. - For each format specifier or escape sequence in fmt, - replace that sequence with either the character(s) it represents, or the - sequence of characters within *this to which it refers. The bitmasks specified - in flags determines what format specifiers or escape sequences are recognized, - by default this is the format used by ECMA-262, ECMAScript Language Specification, - Chapter 15 part 5.4.11 String.prototype.replace. + Requires The type Formatter + must be either a pointer to a null-terminated string of type char_type[], + or be a container of char_type's + (for example std::basic_string<char_type>) + or be a unary, binary or ternary functor that computes the replacement string + from a function call: either fmt(*this) + which must return a container of char_type's + to be used as the replacement text, or either fmt(*this, + out) + or fmt(*this, out, flags), both of which write the replacement text + to *out, + and then return the new OutputIterator position.

+

+ Effects: If fmt + is either a null-terminated string, or a container of char_type's, + then copies the string fmt: For each format specifier + or escape sequence in fmt, replace that sequence with + either the character(s) it represents, or the sequence of characters within + *this + to which it refers. The bitmasks specified in flags determines what format + specifiers or escape sequences are recognized, by default this is the format + used by ECMA-262, ECMAScript Language Specification, Chapter 15 part 5.4.11 + String.prototype.replace. +

+

+ If fmt is a function object, + then depending on the number of arguments the function object accepts, it + will either: +

+
    +
  • + Call fmt(*this) and + return the result. +
  • +
  • + Call fmt(*this, unspecified-output-iterator), where unspecified-output-iterator + is an unspecified OutputIterator type used to copy the output to the string + result. +
  • +
  • + Call fmt(*this, unspecified-output-iterator, flags), where unspecified-output-iterator + is an unspecified OutputIterator type used to copy the output to the string + result. +
  • +

See the format syntax guide for more information.

@@ -715,7 +790,7 @@ -
-
-
-
-
- + u32regex_iterator

@@ -115,18 +115,18 @@

Calling

-
enumerate_currencies(" $100.23 or £198.12 ");
+
enumerate_currencies(" $100.23 or £198.12 ");

Yields the output:

$100.23
-£198.12
+£198.12
 

Provided of course that the input is encoded as UTF-8.

- + u32regex_token_iterator

@@ -309,7 +309,7 @@ -
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- + sub_match non-member operators
@@ -1008,7 +1008,7 @@ + m2.str().

- + Stream inserter

@@ -1024,7 +1024,7 @@ -
-
-
-
-
-
-
-
-

- + Synopsis

@@ -46,7 +46,7 @@

- + POSIX Extended Syntax

@@ -56,7 +56,7 @@

.[{()\*+?|^$
- + Wildcard:

@@ -74,7 +74,7 @@

- + Anchors:

@@ -86,7 +86,7 @@ of an expression, or the last character of a sub-expression.

- + Marked sub-expressions:
@@ -98,7 +98,7 @@ to by a back-reference.

- + Repeats:

@@ -184,7 +184,7 @@ cab operator to be applied to.

- + Back references:

@@ -214,7 +214,7 @@ cab

- + Alternation

@@ -227,7 +227,7 @@ cab will match either of "abd" or "abef".

- + Character sets:
@@ -240,7 +240,7 @@ cab A bracket expression may contain any combination of the following:

- + Single characters:
@@ -249,7 +249,7 @@ cab or 'c'.

- + Character ranges:
@@ -265,7 +265,7 @@ cab the code points of the characters only.

- + Negation:

@@ -274,7 +274,7 @@ cab range a-c.

- + Character classes:
@@ -284,7 +284,7 @@ cab character class names.

- + Collating Elements:
@@ -312,7 +312,7 @@ cab matches a NUL character.

- + Equivalence classes:
@@ -323,13 +323,13 @@ cab elements the name col may be a symbolic name. A primary sort key is one that ignores case, accentation, or locale-specific tailorings; so for example [[=a=]] matches - any of the characters: a, À, Á, Â, Ã, Ä, Å, A, à, á, â, ã, ä and å. Unfortunately implementation + any of the characters: a, À, Á, Â, Ã, Ä, Å, A, à, á, â, ã, ä and å. Unfortunately implementation of this is reliant on the platform's collation and localisation support; this feature can not be relied upon to work portably across all platforms, or even all locales on one platform.

- + Combinations:

@@ -337,7 +337,7 @@ cab [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -363,7 +363,7 @@ cab extensions are also supported by Boost.Regex:

- + Escapes matching a specific character
@@ -552,7 +552,7 @@ cab
- + "Single character" character classes:
@@ -706,7 +706,7 @@ cab
- + Character Properties
@@ -813,7 +813,7 @@ cab matches any "digit" character, as does \p{digit}.

- + Word Boundaries

@@ -888,7 +888,7 @@ cab

- + Buffer boundaries
@@ -979,7 +979,7 @@ cab
- + Continuation Escape
@@ -991,7 +991,7 @@ cab match to start where the last one ended.

- + Quoting escape
@@ -1005,7 +1005,7 @@ cab \*+aaa
- + Unicode escapes
@@ -1056,7 +1056,7 @@ cab
- + Any other escape
@@ -1065,7 +1065,7 @@ cab \@ matches a literal '@'.

- + Operator precedence
@@ -1101,7 +1101,7 @@ cab
- + What Gets Matched
@@ -1111,11 +1111,11 @@ cab rule.

- + Variations

- + Egrep

@@ -1136,7 +1136,7 @@ cab used with the -E option.

- + awk

@@ -1150,7 +1150,7 @@ cab these by default anyway.

- + Options

@@ -1163,7 +1163,7 @@ cab modify how the case and locale sensitivity are to be applied.

- + References

@@ -1184,7 +1184,7 @@ cab -

- + Synopsis

@@ -45,7 +45,7 @@

- + POSIX Basic Syntax

@@ -55,7 +55,7 @@

.[\*^$
- + Wildcard:

@@ -73,7 +73,7 @@

- + Anchors:

@@ -85,7 +85,7 @@ of an expression, or the last character of a sub-expression.

- + Marked sub-expressions:
@@ -97,7 +97,7 @@ by a back-reference.

- + Repeats:

@@ -155,7 +155,7 @@ aaaa to.

- + Back references:

@@ -173,7 +173,7 @@ aaaa

aaabba
- + Character sets:
@@ -186,7 +186,7 @@ aaaa A bracket expression may contain any combination of the following:

- + Single characters:
@@ -195,7 +195,7 @@ aaaa or 'c'.

- + Character ranges:
@@ -211,7 +211,7 @@ aaaa of the characters only.

- + Negation:

@@ -220,7 +220,7 @@ aaaa range a-c.

- + Character classes:
@@ -230,7 +230,7 @@ aaaa character class names.

- + Collating Elements:
@@ -259,7 +259,7 @@ aaaa element names.

- + Equivalence classes:
@@ -270,13 +270,13 @@ aaaa elements the name col may be a collating symbolic name. A primary sort key is one that ignores case, accentation, or locale-specific tailorings; so for example [[=a=]] matches - any of the characters: a, À, Á, Â, Ã, Ä, Å, A, à, á, â, ã, ä and å. Unfortunately implementation + any of the characters: a, À, Á, Â, Ã, Ä, Å, A, à, á, â, ã, ä and å. Unfortunately implementation of this is reliant on the platform's collation and localisation support; this feature can not be relied upon to work portably across all platforms, or even all locales on one platform.

- + Combinations:

@@ -284,7 +284,7 @@ aaaa [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -299,7 +299,7 @@ aaaa will match either a literal '\' or a '^'.

- + What Gets Matched

@@ -309,13 +309,13 @@ aaaa rule.

- + Variations

- + Grep

@@ -333,7 +333,7 @@ aaaa As its name suggests, this behavior is consistent with the Unix utility grep.

- + emacs

@@ -613,7 +613,7 @@ aaaa leftmost-longest rule.

- + Options

@@ -627,7 +627,7 @@ aaaa options modify how the case and locale sensitivity are to be applied.

- + References

@@ -646,7 +646,7 @@ aaaa -
-
-
-
-
-
-
-
-

- + Synopsis

@@ -43,7 +43,7 @@ boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);

- + Perl Regular Expression Syntax

@@ -53,7 +53,7 @@

.[{()\*+?|^$
- + Wildcard

@@ -73,7 +73,7 @@

- + Anchors

@@ -83,7 +83,7 @@ A '$' character shall match the end of a line.

- + Marked sub-expressions
@@ -94,7 +94,7 @@ can also repeated, or referred to by a back-reference.

- + Non-marking grouping
@@ -107,7 +107,7 @@ without splitting out any separate sub-expressions.

- + Repeats

@@ -188,7 +188,7 @@ to be applied to.

- + Non greedy repeats
@@ -218,7 +218,7 @@ while consuming as little input as possible.

- + Pocessive repeats
@@ -250,7 +250,7 @@ while giving nothing back.

- + Back references

@@ -360,7 +360,7 @@ named "two".

- + Alternation

@@ -387,7 +387,7 @@ (?:abc)?? has exactly the same effect.

- + Character sets

@@ -399,7 +399,7 @@ A bracket expression may contain any combination of the following:

- + Single characters

@@ -407,7 +407,7 @@ 'b', or 'c'.

- + Character ranges
@@ -421,7 +421,7 @@ sensitive.

- + Negation

@@ -430,7 +430,7 @@ matches any character that is not in the range a-c.

- + Character classes
@@ -441,7 +441,7 @@ class names.

- + Collating Elements
@@ -463,7 +463,7 @@ matches a \0 character.

- + Equivalence classes
@@ -474,13 +474,13 @@ may be a symbolic name. A primary sort key is one that ignores case, accentation, or locale-specific tailorings; so for example [[=a=]] matches - any of the characters: a, À, Á, Â, Ã, Ä, Å, A, à, á, â, ã, ä and å. Unfortunately implementation + any of the characters: a, À, Á, Â, Ã, Ä, Å, A, à, á, â, ã, ä and å. Unfortunately implementation of this is reliant on the platform's collation and localisation support; this feature can not be relied upon to work portably across all platforms, or even all locales on one platform.

- + Escaped Characters
@@ -492,7 +492,7 @@ is not a "word" character.

- + Combinations

@@ -500,7 +500,7 @@ [[:digit:]a-c[.NUL.]].

- + Escapes

@@ -692,7 +692,7 @@

- + "Single character" character classes:
@@ -894,7 +894,7 @@
- + Character Properties
@@ -1002,7 +1002,7 @@ as does \p{digit}.

- + Word Boundaries

@@ -1021,7 +1021,7 @@ \B Matches only when not at a word boundary.

- + Buffer boundaries

@@ -1046,7 +1046,7 @@ to the regular expression \n*\z

- + Continuation Escape
@@ -1058,7 +1058,7 @@ one ended.

- + Quoting escape

@@ -1071,7 +1071,7 @@ \*+aaa

- + Unicode escapes

@@ -1081,7 +1081,7 @@ followed by a sequence of zero or more combining characters.

- + Matching Line Endings
@@ -1090,7 +1090,7 @@ sequence, specifically it is identical to the expression (?>\x0D\x0A?|[\x0A-\x0C\x85\x{2028}\x{2029}]).

- + Keeping back some text
@@ -1105,7 +1105,7 @@ This can be used to simulate variable width lookbehind assertions.

- + Any other escape
@@ -1114,7 +1114,7 @@ \@ matches a literal '@'.

- + Perl Extended Patterns
@@ -1123,7 +1123,7 @@ (?.

- + Named Subexpressions
@@ -1145,14 +1145,14 @@ format string for search and replace operations, or in the match_results member functions.

- + Comments

(?# ... ) is treated as a comment, it's contents are ignored.

- + Modifiers

@@ -1166,7 +1166,7 @@ pattern only.

- + Non-marking groups
@@ -1175,7 +1175,7 @@ an additional sub-expression.

- + Branch reset

@@ -1197,7 +1197,7 @@ # 1 2 2 3 2 3 4

- + Lookahead

@@ -1220,7 +1220,7 @@ could be used to validate the password.

- + Lookbehind

@@ -1234,7 +1234,7 @@ (pattern must be of fixed length).

- + Independent sub-expressions
@@ -1247,7 +1247,7 @@ no match is found at all.

- + Recursive Expressions
@@ -1271,7 +1271,7 @@ to the next sub-expression to be declared.

- + Conditional Expressions
@@ -1319,7 +1319,7 @@
- + Operator precedence
@@ -1354,7 +1354,7 @@

- + What gets matched

@@ -1529,7 +1529,7 @@

- + Variations

@@ -1538,7 +1538,7 @@ and JScript are all synonyms for perl.

- + Options

@@ -1550,7 +1550,7 @@ are to be applied.

- + Pattern Modifiers

@@ -1562,7 +1562,7 @@ and no_mod_s.

- + References

@@ -1571,7 +1571,7 @@ -
- + Use a Unicode Aware Regular Expression Type.
@@ -71,7 +71,7 @@ -