diff --git a/changes.txt b/changes.txt index 7beaa70e..030ca55a 100644 --- a/changes.txt +++ b/changes.txt @@ -3,6 +3,9 @@ FIXED: Support for STLPort 4.1b6. FIXED: Library calling convention for VC6 debug builds. FIXED: Borland compiler support when using non-default calling convention. CHANGED: Reorganised and simplified config setup. +ADDED: Support for replacing only the first occurance during a regex_merge + using the format_first_only flag. +CHANGED: Removed use of deprecated api set_expression in one of the examples. Version 310: ADDED: Support for static linking with VC6 + dynamic runtime. @@ -259,5 +262,6 @@ BUG: character sets don't function correctly when regbase::char_classes + diff --git a/example/snippets/regex_merge_example.cpp b/example/snippets/regex_merge_example.cpp index 866796d7..95f6d03b 100644 --- a/example/snippets/regex_merge_example.cpp +++ b/example/snippets/regex_merge_example.cpp @@ -57,8 +57,9 @@ void load_file(std::string& s, std::istream& is) int main(int argc, const char** argv) { - e1.set_expression(expression_text); - e2.set_expression(pre_expression); + try{ + e1.assign(expression_text); + e2.assign(pre_expression); for(int i = 1; i < argc; ++i) { std::cout << "Processing file " << argv[i] << std::endl; @@ -80,6 +81,9 @@ int main(int argc, const char** argv) boost::regex_merge(out, s.begin(), s.end(), e1, format_string); os << footer_text; } + } + catch(...) + { return -1; } return 0; } @@ -129,3 +133,4 @@ const char* footer_text = "\n\n\n"; + diff --git a/hl_ref.htm b/hl_ref.htm index af24c64a..0d8f4bda 100644 --- a/hl_ref.htm +++ b/hl_ref.htm @@ -15,8 +15,9 @@ content="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot"> - - + @@ -196,7 +197,8 @@ are allowed.

+ char* p, unsignedint flags = + match_default); + char* p, unsignedint flags = + match_default); + char* p, unsignedint flags = + match_default); + syntax, match + flags and format flags. @@ -451,18 +460,23 @@ are allowed.

replace operation: searches through the string in for all occurrences of the current expression, for each occurrence replaces the match with the format string fmt. - Uses flags to determine what gets matched. If copy - is true then all unmatched sections of input are copied - unchanged to output. Returns the new string. See flags to determine what gets matched, and how + the format string should be treated. If copy is + true then all unmatched sections of input are copied + unchanged to output, if the flag format_first_only + is set then only the first occurance of the pattern found + is replaced. Returns the new string. See also format string - syntax. + syntax, match + flags and format flags. + v, std::string& s, unsigned flags = + match_default, unsigned max_count = ~0);

C++ Boost

+

C++ Boost

Regex++, RegEx Class Reference.

@@ -112,8 +113,8 @@ are allowed.

 RegEx(const RegEx& - o);RegEx(const + RegEx& o); Copy constructor, all the properties of parameter o are copied.  
  bool Match(const - char* p, unsigned int flags = match_default); Attempts to match the current expression against the text p using the match flags flags - see
  bool Search(const - char* p, unsigned int flags = match_default); Attempts to find a match for the current expression somewhere in the text p using the match flags flags - see   unsigned int Grep(std::vector<unsigned int>& v, const - char* p, unsigned int flags = match_default); Finds all matches of the current expression in the text p using the match flags flags - see replace operation: searches through the string in for all occurrences of the current expression, for each occurrence replaces the match with the format string fmt. - Uses flags to determine what gets matched. If copy - is true then all unmatched sections of input are copied - unchanged to output. Returns the new string. See flags to determine what gets matched, and how + the format string should be treated. If copy is + true then all unmatched sections of input are copied + unchanged to output, if the flag format_first_only + is set then only the first occurance of the pattern found + is replaced. Returns the new string. See also format string - syntax.  
 
  unsigned Split(std::vector<std::string>& - v, std::string& s, unsigned flags = match_default, - unsigned max_count = ~0); Splits the input string and pushes each one onto the vector. If the expression contains no marked sub-expressions, then one string is outputted for each diff --git a/include/boost/regex/detail/regex_format.hpp b/include/boost/regex/detail/regex_format.hpp index 1dfff46a..5fa3a563 100644 --- a/include/boost/regex/detail/regex_format.hpp +++ b/include/boost/regex/detail/regex_format.hpp @@ -33,7 +33,8 @@ enum format_flags_t{ format_sed = match_max << 1, // sed style replacement. format_perl = format_sed << 1, // perl style replacement. format_no_copy = format_perl << 1, // don't copy non-matching segments. - format_is_if = format_no_copy << 1 // internal use only. + format_first_only = format_no_copy << 1, // Only replace first occurance. + format_is_if = format_first_only << 1 // internal use only. }; namespace re_detail{ @@ -468,7 +469,7 @@ public: oi_assign(out, re_copy_out(*out, iterator(m[-1].first), iterator(m[-1].second))); oi_assign(out, _reg_format_aux(*out, m, f, flags, *pt)); *last = m[-2].first; - return true; + return flags & format_first_only ? false : true; } }; diff --git a/template_class_ref.htm b/template_class_ref.htm index f7097aae..4ed3f83b 100644 --- a/template_class_ref.htm +++ b/template_class_ref.htm @@ -492,288 +492,288 @@ for a container of charT.

}; } // namespace boost -

Class reg_expression has the following public +

Class reg_expression has the following public member functions:
 

- - - - + href="#regbase">regbase for allowable flag values. - - + See class regbase for allowable flag values. - - + See class regbase for allowable flag values. - - + - - + See class regbase for allowable flag values. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -783,16 +783,16 @@ member functions:

-

Class regex_traits

+

Class regex_traits

-

#include <boost/regex/regex_traits.hpp>

+

#include <boost/regex/regex_traits.hpp>

-

This is a preliminary version of the regular +

This is a preliminary version of the regular expression traits class, and is subject to change.

-

The purpose of the traits class is to make it +

The purpose of the traits class is to make it easier to customise the behaviour of reg_expression and the associated matching algorithms. Custom traits classes can handle special character sets or define additional character @@ -820,7 +820,7 @@ template<> class cpp_regex_traits<wchar_t> { /*details*/ }; template <class charT> class regex_traits : public base_type { /*detailts*/ }; -

Where "base_type" defaults to w32_regex_traits +

Where "base_type" defaults to w32_regex_traits on Win32 systems, and c_regex_traits otherwise. The default behaviour can be changed by defining one of BOOST_RE_LOCALE_C (forces use of c_regex_traits by default), @@ -828,19 +828,19 @@ or BOOST_RE_LOCALE_CPP (forces use of cpp_regex_traits by default). Alternatively a specific traits class can be passed to the reg_expression template.

-

The requirements for custom traits classes are documented separately -here....

+

The requirements for custom traits classes are documented separately +here....


-

Class match_results

+

Class match_results

-

#include <boost/regex.hpp>

+

#include <boost/regex.hpp>

-

Regular expressions are different from many +

Regular expressions are different from many simple pattern-matching algorithms in that as well as finding an overall match they can also produce sub-expression matches: each sub-expression being delimited in the pattern by a pair of @@ -905,12 +905,12 @@ in an object of type sub_match.

typedef match_results<const char*> cmatch; typedef match_results<const wchar_t*> wcmatch; -

Class match_results is used for reporting what +

Class match_results is used for reporting what matched a regular expression, it is passed to the matching -algorithms regex_match and regex_search, and is used by regex_grep to notify the +algorithms regex_match and regex_search, and is used by regex_grep to notify the callback function (or function object) what matched. Note that the default allocator parameter has been chosen to match the default allocator parameter to reg_expression. match_results has @@ -920,51 +920,51 @@ the following public member functions:

 reg_expression(Allocator + reg_expression(Allocator a = Allocator()); Constructs +  Constructs a default instance of reg_expression without any expression.  
 reg_expression(charT* + reg_expression(charT* p, unsigned f = regbase::normal, Allocator a = Allocator()); Constructs +  Constructs an instance of reg_expression from the expression denoted by the null terminated string p, using the flags f to determine regular expression syntax. See class regbase for allowable flag values.  
 reg_expression(charT* + reg_expression(charT* p1, charT* p2, unsigned f = regbase::normal, Allocator a = Allocator()); Constructs +  Constructs an instance of reg_expression from the expression denoted by pair of iterators p1 and p2, using the flags f to determine regular expression syntax. - See class regbase for allowable flag values.  
 reg_expression(charT* + reg_expression(charT* p, size_type len, unsigned f, Allocator a = Allocator()); Constructs +  Constructs an instance of reg_expression from the expression denoted by the string p of length len, using the flags f to determine regular expression syntax. - See class regbase for allowable flag values.  
 template + template <class ST, class SA>
reg_expression(const std::basic_string<charT, - ST, SA>& p, boost::int_fast32_t f = regbase::normal, const - Allocator& a = Allocator());
 Constructs + ST, SA>& p, boost::int_fast32_t f = regbase::normal, + const Allocator& a = Allocator()); Constructs an instance of reg_expression from the expression denoted by the string p, using the flags f to determine regular expression syntax. See class regbase for allowable flag values.

Note - this member may not be available + href="#regbase">regbase for allowable flag values.

Note - this member may not be available depending upon your compiler capabilities.

 
 template + template <class I>
reg_expression(I first, I last, flag_type f = regbase::normal, const Allocator& a = Allocator());
 Constructs +  Constructs an instance of reg_expression from the expression denoted by pair of iterators p1 and p2, using the flags f to determine regular expression syntax. - See class regbase for allowable flag values.  
 reg_expression(const + reg_expression(const reg_expression&);Copy + Copy constructor - copies an existing regular expression.  
 reg_expression& + reg_expression& operator=(const reg_expression&);Copies an + Copies an existing regular expression.  
 reg_expression& + reg_expression& operator=(const charT* ptr);Equivalent to + Equivalent to assign(ptr);  
 template - <class ST, class SA>

reg_expression& +

template + <class ST, class SA>

reg_expression& operator=(const std::basic_string<charT, ST, SA>& p);

Equivalent to + Equivalent to assign(p);  
 reg_expression& + reg_expression& assign(const reg_expression& that);Copies the + Copies the regular expression contained by that, throws bad_expression if that does not contain a valid + href="#bad_expression">bad_expression if that does not contain a valid expression. Returns *this.  
 reg_expression& + reg_expression& assign(const charT* p, flag_type f = regbase::normal);Compiles a + Compiles a regular expression from the expression denoted by the null terminated string p, using the flags f to determine regular expression syntax. See class regbase for allowable flag values. Throws bad_expression if p does not contain a valid expression. + href="#regbase">regbase for allowable flag values. Throws bad_expression if p does not contain a valid expression. Returns *this.  
 reg_expression& + reg_expression& assign(const charT* first, const charT* last, flag_type f = regbase::normal);Compiles a + Compiles a regular expression from the expression denoted by the pair of iterators first-last, using the flags f to determine regular expression syntax. See class regbase for allowable flag values. Throws bad_expression if first-last does not contain a valid + href="#regbase">regbase for allowable flag values. Throws bad_expression if first-last does not contain a valid expression. Returns *this.  
 template + template <class string_traits, class A>
reg_expression& assign(const std::basic_string<charT, string_traits, A>& s, flag_type f = regbase::normal);
Compiles a + Compiles a regular expression from the expression denoted by the string s, using the flags f to determine regular expression syntax. See class regbase for allowable flag values. Throws bad_expression if s does not contain a valid expression. + href="#regbase">regbase for allowable flag values. Throws bad_expression if s does not contain a valid expression. Returns *this.  
 template + template <class iterator>
reg_expression& assign(iterator first, iterator last, flag_type f = regbase::normal);
Compiles a + Compiles a regular expression from the expression denoted by the pair of iterators first-last, using the flags f to determine regular expression syntax. See class regbase for allowable flag values. Throws bad_expression if first-last does not contain a valid + href="#regbase">regbase for allowable flag values. Throws bad_expression if first-last does not contain a valid expression. Returns *this.  
 Allocator + Allocator get_allocator()const;Returns the + Returns the allocator used by the expression.  
 locale_type + locale_type imbue(const locale_type& l);Imbues the + Imbues the expression with the specified locale, and invalidates the current expression.  
 locale_type + locale_type getloc()const;Returns the + Returns the locale used by the expression.  
 flag_type + flag_type getflags()const;Returns the + Returns the flags used to compile the current expression.  
 std::basic_string<charT> + std::basic_string<charT> str()const;Returns the + Returns the current expression as a string.  
 const_iterator + const_iterator begin()const;Returns a + Returns a pointer to the first character of the current expression.  
 const_iterator + const_iterator end()const;Returns a + Returns a pointer to the end of the current expression.  
 size_type + size_type size()const;Returns the + Returns the length of the current expression.  
 size_type + size_type max_size()const;Returns the + Returns the maximum length of a regular expression text.  
 bool + bool empty()const;Returns true + Returns true if the object contains no valid expression.  
 unsigned + unsigned mark_count()const ;Returns the + Returns the number of sub-expressions in the compiled regular expression. Note that this includes the whole match (subexpression zero), so the value returned is always >= 1.
- - - - - - - - - - - - @@ -972,9 +972,9 @@ the following public member functions:
- - @@ -982,9 +982,9 @@ the following public member functions:
- - @@ -992,33 +992,33 @@ the following public member functions:
- - - - + href="#reg_search">regex_search / regex_match to determine whether any match occured.
 match_results(Allocator + match_results(Allocator a = Allocator());Constructs an + Constructs an instance of match_results, using allocator instance a.  
 match_results(const + match_results(const match_results& m);Copy + Copy constructor.  
 match_results& + match_results& operator=(const match_results& m);Assignment + Assignment operator.  
 const + const sub_match<iterator>& operator[](size_type n) const;Returns what + Returns what matched, item 0 represents the whole string, item 1 the first sub-expression and so on.  
 Allocator& + Allocator& allocator()const;Returns the + Returns the allocator used by the class.  
 difference_type + difference_type length(unsigned int sub = 0);Returns the + Returns the length of the matched subexpression, defaults to the length of the whole match, in effect this is equivalent to operator[](sub).second - operator[](sub).first.
 difference_type + difference_type position(unsigned int sub = 0);Returns the + Returns the position of the matched sub-expression, defaults to the position of the whole match. The returned value is the position of the match relative to the start of the string.
 unsigned + unsigned int line()const;Returns the + Returns the index of the line on which the match occurred, indices start with 1, not zero. Equivalent to the number of newline characters prior to operator[](0).first plus one.
 iterator + iterator line_start()const;Returns an + Returns an iterator denoting the start of the line on which the match occurred.  
 size_type + size_type size()const;Returns how + Returns how many sub-expressions are present in the match, including sub-expression zero (the whole match). This is the case even if no matches were found in the search operation - you must use the returned value from regex_search / regex_match to determine whether any match occured. 
-


+


 

-

The operator[] member function needs further +

The operator[] member function needs further explanation: it returns a const reference to a structure of type sub_match<iterator>, which has the following public members:
@@ -1027,68 +1027,68 @@ sub_match<iterator>, which has the following public members: - - - - - - - - - - - - - - - -
 typedef + typedef typename std::iterator_traits<iterator>::value_type value_type;The type + The type pointed to by the iterators.  
 typedef + typedef typename std::iterator_traits<iterator>::difference_type difference_type;A type that + A type that represents the difference between two iterators.  
 typedef + typedef iterator iterator_type;The iterator + The iterator type.  
 iterator + iterator firstAn iterator + An iterator denoting the position of the start of the match.  
 iterator + iterator secondAn iterator + An iterator denoting the position of the end of the match.  
 bool + bool matchedA Boolean + A Boolean value denoting whether this sub-expression participated in the match.  
 difference_type + difference_type length()const;Returns the + Returns the length of the sub-expression match.  
 operator + operator std::basic_string<value_type> ()const;Converts the + Converts the sub-expression match into an instance of std::basic_string<>. Note that this member may be either absent, or present to a more limited degree depending upon your compiler @@ -1097,7 +1097,7 @@ sub_match<iterator>, which has the following public members:
-

Operator[] takes an integer as an argument that +

Operator[] takes an integer as an argument that denotes the sub-expression for which to return information, the argument can take the following special values:
 

@@ -1105,67 +1105,67 @@ argument can take the following special values:
- - + - - + - - + - - - -
 -2Returns + -2Returns everything from the end of the match, to the end of the input string, equivalent to $' in perl. If this is a null - string, then:

first == second

-

And

-

matched == false.

+ string, then:

first == second

+

And

+

matched == false.

 
 -1Returns + -1Returns everything from the start of the input string (or the end of the last match if this is a grep operation), to the start of this match. Equivalent to $` in perl. If this is - a null string, then:

first == + a null string, then:

first == second

-

And

-

matched == false.

+

And

+

matched == false.

 
 0Returns the + 0Returns the whole of what matched, equivalent to $& in perl. The matched parameter is always true.  
 0 < N < + 0 < N < size()Returns what + Returns what matched sub-expression N, if this sub-expression did not participate in the match then 

matched == false

-

otherwise:

-

matched == true.

+ >matched == false

+

otherwise:

+

matched == true.

 
 N < -2 or + N < -2 or N >= size()Represents an + Represents an out-of range non-existent sub-expression. Returns a - "null" match in which

first + "null" match in which

first == last

-

And

-

matched == false.

+

And

+

matched == false.

 
-

Note that as well as being parameterised for an +

Note that as well as being parameterised for an allocator, match_results<> also takes an iterator type, this allows any pair of iterators to be searched for a given regular expression, provided the iterators have at least bi-directional @@ -1175,11 +1175,11 @@ properties.

Algorithm regex_match

-

#include <boost/regex.hpp>

+

#include <boost/regex.hpp>

-

The algorithm regex _match determines whether a +

The algorithm regex _match determines whether a given regular expression matches a given sequence denoted by a pair of iterators, the algorithm is defined as follows, note that the result is true only if the expression matches the whole of @@ -1193,7 +1193,7 @@ validation:

                 const reg_expression<charT, traits, Allocator2>& e,                   unsigned flags = match_default); -

The library also defines the following +

The library also defines the following convenience versions, which take either a const charT*, or a const std::basic_string<>& in place of a pair of iterators [note - these versions may not be available, or may be @@ -1212,7 +1212,7 @@ capabilities]:

                 const reg_expression<charT, traits, Allocator2>& e,                   unsigned flags = match_default); -

Finally there is a set of convenience versions +

Finally there is a set of convenience versions that simply return true or false and do not indicate what matched:

@@ -1232,30 +1232,30 @@ that simply return true or false and do not indicate what matched:                  const reg_expression<charT, traits, Allocator2>& e,                   unsigned flags = match_default); -

The parameters for the main function version +

The parameters for the main function version are as follows:
 

- - + - - + - - - - - - + more match_flags enumerators.
 iterator firstDenotes the start of the range to be + iterator firstDenotes the start of the range to be matched.  
 iterator lastDenotes the + iterator lastDenotes the end of the range to be matched.  
 match_results<iterator, + match_results<iterator, Allocator>& mAn instance + An instance of match_results in which what matched will be reported. On exit if a match occurred then m[0] denotes the whole of the string that matched, m[0].first must be equal to @@ -1267,29 +1267,29 @@ are as follows:
 const + const reg_expression<charT, traits, Allocator2>& eContains the + Contains the regular expression to be matched.  
 unsigned + unsigned flags = match_defaultDetermines + Determines the semantics used for matching, a combination of one or - more match_flags enumerators.  
-

regex_match returns false if no match occurs or +

regex_match returns false if no match occurs or true if it does. A match only occurs if it starts at first and finishes at last. Example: the following example processes an ftp response:

+href="example/snippets/regex_match_example.cpp">example processes an ftp response:

#include <stdlib.h> 
 #include <boost/regex.hpp> 
@@ -1322,16 +1322,16 @@ regex expression("([0-9]+)(\\-| |$)(.*)")
    return -1; 
 }
-

The -value of the flags parameter passed to the algorithm must be a -combination of one or more of the following values:
+

The value of the flags +parameter passed to the algorithm must be a combination of one or +more of the following values:
 

- - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + - - + @@ -1444,10 +1444,10 @@ combination of one or more of the following values:
- - + @@ -1460,11 +1460,11 @@ combination of one or more of the following values:

Algorithm regex_search

-

 #include <boost/regex.hpp>

+

 #include <boost/regex.hpp>

-

The algorithm regex_search will search a range +

The algorithm regex_search will search a range denoted by a pair of iterators for a given regular expression. The algorithm uses various heuristics to reduce the search time by only checking for a match if a match could conceivably start @@ -1477,7 +1477,7 @@ at that position. The algorithm is defined as follows:

                const reg_expression<charT, traits, Allocator2>& e,                  unsigned flags = match_default); -

The library also defines the following +

The library also defines the following convenience versions, which take either a const charT*, or a const std::basic_string<>& in place of a pair of iterators [note - these versions may not be available, or may be @@ -1496,31 +1496,31 @@ capabilities]:

                const reg_expression<charT, traits, Allocator2>& e,                  unsigned flags = match_default); -

The parameters for the main function version +

The parameters for the main function version are as follows:
 

 match_defaultThe default + match_defaultThe default value, indicates that first represents the start of a line, the start of a buffer, and (possibly) the start of a word. Also implies that last represents @@ -1343,72 +1343,72 @@ combination of one or more of the following values:
 match_not_bolWhen this + match_not_bolWhen this flag is set then first does not represent the start of a new line.  
 match_not_eolWhen this + match_not_eolWhen this flag is set then last does not represent the end of a line.  
 match_not_bobWhen this + match_not_bobWhen this flag is set then first is not the beginning of a buffer.  
 match_not_eobWhen this + match_not_eobWhen this flag is set then last does not represent the end of a buffer.  
 match_not_bowWhen this + match_not_bowWhen this flag is set then first can never match the start of a word.  
 match_not_eowWhen this + match_not_eowWhen this flag is set then last can never match the end of a word.  
 match_not_dot_newlineWhen this + match_not_dot_newlineWhen this flag is set then a dot expression "." can not match the newline character.  
 match_not_dot_nullWhen this + match_not_dot_nullWhen this flag is set then a dot expression "." can not match a null character.  
 match_prev_availWhen + match_prev_availWhen this flag is set, then *--first is a valid expression and the flags match_not_bol and match_not_bow have no effect, since the value of the previous character @@ -1417,8 +1417,8 @@ combination of one or more of the following values:
 match_anyWhen + match_anyWhen this flag is set, then the first string matched is returned, rather than the longest possible match. This flag can significantly reduce the time taken to find a @@ -1427,16 +1427,16 @@ combination of one or more of the following values:
 match_not_nullWhen + match_not_nullWhen this flag is set, then the expression will never match a null string.  
 match_continuousWhen + match_continuousWhen this flags is set, then during a grep operation, each successive match must start from where the previous match finished.
 match_partialWhen + match_partialWhen this flag is set, the regex algorithms will report partial matches + href="#partial_matches">partial matches - that is where one or more characters at the end of the text input matched some prefix of the regular expression.  
- - - - + - - - - - - + more match_flags enumerators.
 iterator + iterator firstThe starting + The starting position of the range to search.  
 iterator lastThe ending + iterator lastThe ending position of the range to search.  
 match_results<iterator, + match_results<iterator, Allocator>& mAn instance + An instance of match_results in which what matched will be reported. On exit if a match occurred then m[0] denotes the whole of the string that matched, m[0].first and m[0].second @@ -1532,30 +1532,30 @@ are as follows:
 const + const reg_expression<charT, traits, Allocator2>& eThe regular + The regular expression to search for.  
 unsigned + unsigned flags = match_defaultThe flags + The flags that determine what gets matched, a combination of one or - more match_flags enumerators. 
-


+


 

-

Example: the following example, takes the contents of a file in the form of a string, +

Example: the following example, takes the contents of a file in the form of a string, and searches for all the C++ class declarations in the file. The code will work regardless of the way that std::string is implemented, for example it could easily be modified to work with @@ -1602,11 +1602,11 @@ void IndexClasses(map_type& m, const std::string& file)

Algorithm regex_grep

-

#include <boost/regex.hpp>

+

#include <boost/regex.hpp>

-

 Regex_grep allows you to search through +

 Regex_grep allows you to search through an iterator range and locate all the (non-overlapping) matches with a given regular expression. The function is declared as:

@@ -1617,7 +1617,7 @@ with a given regular expression. The function is declared as:

                        const reg_expression<charT, traits, Allocator>& e,                          unsigned flags = match_default) -

The library also defines the following +

The library also defines the following convenience versions, which take either a const charT*, or a const std::basic_string<>& in place of a pair of iterators [note - these versions may not be available, or may be @@ -1636,73 +1636,73 @@ capabilities]:

              const reg_expression<charT, traits, Allocator>& e,                unsigned flags = match_default); -

The parameters for the primary version of +

The parameters for the primary version of regex_grep have the following meanings:
 

- - + - - + - - + - - + - - + + href="#match_type">match_flags enumerators.
 fooA predicate + fooA predicate function object or function pointer, see below for more information.  
 firstThe start of + firstThe start of the range to search.  
 lastThe end of + lastThe end of the range to search.  
 eThe regular + eThe regular expression to search for.  
 flagsThe flags + flagsThe flags that determine how matching is carried out, one of the match_flags enumerators.  
-

 The algorithm finds all of the non-overlapping +

 The algorithm finds all of the non-overlapping matches of the expression e, for each match it fills a match_results<iterator, Allocator> structure, which contains +href="#reg_match">match_results<iterator, Allocator> structure, which contains information on what matched, and calls the predicate foo, passing the match_results<iterator, Allocator> as a single argument. If the predicate returns true, then the grep operation continues, otherwise it terminates without searching for further matches. The function returns the number of matches found.

-

The general form of the predicate is:

+

The general form of the predicate is:

struct grep_predicate
 {
    bool operator()(const match_results<iterator_type, expression_type::alloc_type>& m);
 };
-

For example the regular expression "a*b" +

For example the regular expression "a*b" would find one match in the string "aaaaab" and two in the string "aaabb".

-

Remember this algorithm can be used for a lot +

Remember this algorithm can be used for a lot more than implementing a version of grep, the predicate can be and do anything that you want, grep utilities would output the results to the screen, another program could index a file based @@ -1712,7 +1712,7 @@ results of one regex_grep can even be chained into another regex_grep to create recursive parsers.

Example: convert the example +>Example: convert the example from regex_search to use regex_grep instead:

#include <string> 
@@ -1757,7 +1757,7 @@ void IndexClasses(map_type& m, const std::string& file)
 } 

Example: Use regex_grep to +>Example: Use regex_grep to call a global callback function:

#include <string> 
@@ -1798,7 +1798,7 @@ void IndexClasses(const std::string& file)
   

Example: use regex_grep to +>Example: use regex_grep to call a class member function, use the standard library adapters std::mem_fun and std::bind1st to convert the member function into a predicate:

@@ -1858,7 +1858,7 @@ bool class_index::grep_callback(boost::match_results<std::string::const_i  

Finally, C++ Builder users can +>Finally, C++ Builder users can use C++ Builder's closure type as a callback argument:

#include <string> 
@@ -1920,14 +1920,14 @@ index[std::string(what[5].first, what[5].second) + std::string(what[6].first, wh
 
 

 Algorithm regex_format

-

#include <boost/regex.hpp>

+

#include <boost/regex.hpp>

-

The algorithm regex_format takes the results of +

The algorithm regex_format takes the results of a match and creates a new string based upon a format -string, regex_format can be used for +href="format_string.htm#format_string">format +string, regex_format can be used for search and replace operations:

template <class OutputIterator, class iterator, class Allocator, class charT>
@@ -1942,7 +1942,7 @@ OutputIterator regex_format(OutputIterator out,
                             const std::basic_string<charT>& fmt,
                             unsigned flags = 0);
-

The library also defines the following +

The library also defines the following convenience variation of regex_format, which returns the result directly as a string, rather than outputting to an iterator [note - this version may not be available, or may be available in a @@ -1960,112 +1960,122 @@ std::basic_string<charT> regex_format                                   const std::basic_string<charT>& fmt,                                   unsigned flags = 0);

-

Parameters to the main version of the function +

Parameters to the main version of the function are passed as follows:
 

- - - - - - - -
 OutputIterator + OutputIterator outAn output + An output iterator type, the output string is sent to this iterator. Typically this would be a std::ostream_iterator.  
 const + const match_results<iterator, Allocator>& mAn instance + An instance of match_results<> obtained from one of the matching algorithms above, and denoting what matched.  
 const + const charT* fmtA format + A format string that determines how the match is transformed into the new string.  
 unsigned + unsigned flagsOptional + Optional flags which describe how the format string is to be interpreted.  
-

Format flags are defined as follows:
+

Format flags are +defined as follows:
 

- - + - - + - - + - - + + regex_merge operations. + + + + + +
 format_allEnables all + format_allEnables all syntax options (perl-like plus extentions).  
 format_sedAllows only a + format_sedAllows only a sed-like syntax.  
 format_perlAllows only a + format_perlAllows only a perl-like syntax.  
 format_no_copyDisables + format_no_copyDisables copying of unmatched sections to the output string during - regex_merge operations.  
 format_first_onlyWhen this flag is set only the first occurance will + be replaced (applies to regex_merge only). 
-


+


 

-

The format string syntax (and available options) +

The format string syntax (and available options) is described more fully under format -strings.

+href="format_string.htm#format_string">format +strings.


Algorithm regex_merge

-

#include <boost/regex.hpp>

+

#include <boost/regex.hpp>

-

The algorithm regex_merge is a combination of regex_grep and regex_format. That is, it greps through the string finding all the -matches to the regular expression, for each match it then calls -regex_format to format the string and sends the result to the -output iterator. Sections of text that do not match are copied to -the output unchanged only if the flags parameter does not have -the flag format_no_copy set.

+

The algorithm regex_merge is a combination of regex_grep and regex_format. That is, it greps through the string finding all the +matches to the regular expression, for each match it then calls regex_format to format the string and sends the result to the output +iterator. Sections of text that do not match are copied to the +output unchanged only if the flags parameter does not have the +flag format_no_copy set. If the flag format_first_only is set then +only the first occurance is replaced rather than all occurances.

template <class OutputIterator, class iterator, class traits, class Allocator, class charT>
 OutputIterator regex_merge(OutputIterator out, 
@@ -2083,7 +2093,7 @@ OutputIterator regex_merge(OutputIterator out, 
                            std::basic_string<charT>& fmt, 
                            unsigned int flags = match_default);
-

The library also defines the following +

The library also defines the following convenience variation of regex_merge, which returns the result directly as a string, rather than outputting to an iterator [note - this version may not be available, or may be available in a @@ -2101,68 +2111,68 @@ std::basic_string<charT> regex_merge(const std::basic_string<cha                                      const std::basic_string<charT>& fmt,                                       unsigned int flags = match_default); -

Parameters to the main version of the function +

Parameters to the main version of the function are passed as follows:
 

- - - - - - + - - - - - - + href="#match_type">match_flags, and how the format string is interpreted - see + format_flags.
 OutputIterator + OutputIterator outAn output + An output iterator type, the output string is sent to this iterator. Typically this would be a std::ostream_iterator.  
 iterator + iterator firstThe start of + The start of the range of text to grep.  
 iterator lastThe end of + iterator lastThe end of the range of text to grep.  
 const + const reg_expression<charT, traits, Allocator>& eThe + The expression to search for.  
 const + const charT* fmtThe format + The format string to be applied to sections of text that match.  
 unsigned + unsigned int flags = match_defaultFlags which + Flags which determine how the expression is matched - see match_flags, and how the format string is interpreted - see - format_flags.  
-

Example: the following example takes C/C++ source code as input, and outputs syntax +

Example: the following example takes C/C++ source code as input, and outputs syntax highlighted HTML code.

@@ -2201,8 +2211,9 @@ boost::regex e1, e2;
 
 int main(int argc, const char** argv)
 {
-   e1.set_expression(expression_text);
-   e2.set_expression(pre_expression);
+   try{
+   e1.assign(expression_text);
+   e2.assign(pre_expression);
    for(int i = 1; i < argc; ++i)
    {
       std::cout << "Processing file " << argv[i] << std::endl;
@@ -2225,6 +2236,9 @@ color="#0000FF">".htm"));
       boost::regex_merge(out, s.begin(), s.end(), e1, format_string);
       os << footer_text;
    }
+   }
+   catch(...)
+   { return -1; }
    return 0;
 }
 
@@ -2273,11 +2287,11 @@ color="#0000FF">"(?1<)(?2>)";
 
 

Algorithm regex_split

-

#include <boost/regex.hpp>

+

#include <boost/regex.hpp>

-

Algorithm regex_split performs a similar +

Algorithm regex_split performs a similar operation to the perl split operation, and comes in three overloaded forms:

@@ -2298,7 +2312,7 @@ std::size_t regex_split(OutputIterator out,  std::size_t regex_split(OutputIterator out, std::basic_string<charT, Traits1, Alloc1>& s);
-

Each version takes an output-iterator for +

Each version takes an output-iterator for output, and a string for input. If the expression contains no marked sub-expressions, then the algorithm writes one string onto the output-iterator for each section of input that does not match @@ -2315,7 +2329,7 @@ specified, then it defaults to "\s+", and splitting occurs on whitespace.

Example: the following +>Example: the following function will split the input string into a series of tokens, and remove each token from the string s:

@@ -2325,7 +2339,7 @@ remove each token from the string s:

}

Example: the following short +>Example: the following short program will extract all of the URL's from a html file, and print them out to cout:

@@ -2538,9 +2552,10 @@ void search(std::istream& is)
-

Copyright Dr -John Maddock 1998-2001 all +

Copyright Dr +John Maddock 1998-2001 all rights reserved.

+ diff --git a/test/regress/parse.cpp b/test/regress/parse.cpp index 6cec7367..d8129935 100644 --- a/test/regress/parse.cpp +++ b/test/regress/parse.cpp @@ -82,6 +82,7 @@ flag_info flag_data[] = { { BOOST_RE_STR("format_sed"), 10, format_sed, 3 }, { BOOST_RE_STR("format_perl"), 11, format_perl, 3 }, { BOOST_RE_STR("format_no_copy"), 14, format_no_copy, 3 }, + { BOOST_RE_STR("format_first_only"), 17, format_first_only, 3 }, { BOOST_RE_STR("REG_NO_POSIX_TEST"), 17, REG_NO_POSIX_TEST, 4 }, { BOOST_RE_STR("REG_UNICODE_ONLY"), 16, REG_UNICODE_ONLY, 4 }, diff --git a/test/regress/tests.txt b/test/regress/tests.txt index 95202a78..ffe0af97 100644 --- a/test/regress/tests.txt +++ b/test/regress/tests.txt @@ -813,6 +813,13 @@ a+(b+) "...aaabb,,,ab*abbb?" $1 "...bb,,,b*bbb?" (a+)|(b+) "...aaabb,,,ab*abbb?" (?1A:B)C "...ACBC,,,ACBC*ACBC?" (a+)|(b+) "...aaabb,,,ab*abbb?" ?1:B "...B,,,B*B?" +- match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_first_only +; move to copying unmatched data, but replace first occurance only: +a+ "...aaa,,," bbb "...bbb,,," +a+(b+) "...aaabb,,," $1 "...bb,,," +a+(b+) "...aaabb,,,ab*abbb?" $1 "...bb,,,ab*abbb?" +(a+)|(b+) "...aaabb,,,ab*abbb?" (?1A)(?2B) "...Abb,,,ab*abbb?" + ; ; changes to newline handling with 2.11: ; @@ -871,3 +878,4 @@ a+(?#b+)b+ xaaabbba 1 7 +