From 3f8b3b5bd12e1d14f8fcbdffbac6ef4739f860b0 Mon Sep 17 00:00:00 2001 From: John Maddock Date: Thu, 2 Aug 2007 17:06:26 +0000 Subject: [PATCH] Removed files that were previously removed from cvs but got resurrected for some reason. [SVN r38403] --- doc/bad_expression.html | 81 --- doc/basic_regex.html | 906 ------------------------------- doc/captures.html | 254 --------- doc/character_class_names.html | 326 ----------- doc/collating_names.html | 368 ------------- doc/concepts.html | 453 ---------------- doc/configuration.html | 155 ------ doc/contacts.html | 87 --- doc/error_type.html | 139 ----- doc/examples.html | 117 ---- doc/faq.html | 114 ---- doc/format_boost_syntax.html | 163 ------ doc/format_perl_syntax.html | 150 ----- doc/format_sed_syntax.html | 109 ---- doc/format_syntax.html | 52 -- doc/gcc-performance.html | 543 ------------------ doc/headers.html | 48 -- doc/history.html | 177 ------ doc/icu_strings.html | 468 ---------------- doc/implementation.html | 43 -- doc/index.html | 16 - doc/install.html | 260 --------- doc/introduction.html | 181 ------ doc/localisation.html | 808 --------------------------- doc/match_flag_type.html | 295 ---------- doc/match_results.html | 459 ---------------- doc/mfc_strings.html | 294 ---------- doc/non_standard_strings.html | 53 -- doc/partial_matches.html | 195 ------- doc/performance.html | 52 -- doc/posix_api.html | 286 ---------- doc/redistributables.html | 55 -- doc/reg_expression.html | 44 -- doc/regbase.html | 82 --- doc/regex.html | 481 ---------------- doc/regex_format.html | 204 ------- doc/regex_grep.html | 377 ------------- doc/regex_iterator.html | 456 ---------------- doc/regex_match.html | 318 ----------- doc/regex_merge.html | 45 -- doc/regex_replace.html | 256 --------- doc/regex_search.html | 315 ----------- doc/regex_split.html | 145 ----- doc/regex_token_iterator.html | 381 ------------- doc/regex_traits.html | 87 --- doc/standards.html | 237 -------- doc/sub_match.html | 571 ------------------- doc/syntax.html | 55 -- doc/syntax_basic.html | 238 -------- doc/syntax_extended.html | 520 ------------------ doc/syntax_leftmost_longest.html | 65 --- doc/syntax_option_type.html | 543 ------------------ doc/syntax_perl.html | 626 --------------------- doc/thread_safety.html | 70 --- doc/uarrow.gif | Bin 1666 -> 0 bytes doc/unicode.html | 66 --- doc/vc71-performance.html | 703 ------------------------ 57 files changed, 14592 deletions(-) delete mode 100644 doc/bad_expression.html delete mode 100644 doc/basic_regex.html delete mode 100644 doc/captures.html delete mode 100644 doc/character_class_names.html delete mode 100644 doc/collating_names.html delete mode 100644 doc/concepts.html delete mode 100644 doc/configuration.html delete mode 100644 doc/contacts.html delete mode 100644 doc/error_type.html delete mode 100644 doc/examples.html delete mode 100644 doc/faq.html delete mode 100644 doc/format_boost_syntax.html delete mode 100644 doc/format_perl_syntax.html delete mode 100644 doc/format_sed_syntax.html delete mode 100644 doc/format_syntax.html delete mode 100644 doc/gcc-performance.html delete mode 100644 doc/headers.html delete mode 100644 doc/history.html delete mode 100644 doc/icu_strings.html delete mode 100644 doc/implementation.html delete mode 100644 doc/index.html delete mode 100644 doc/install.html delete mode 100644 doc/introduction.html delete mode 100644 doc/localisation.html delete mode 100644 doc/match_flag_type.html delete mode 100644 doc/match_results.html delete mode 100644 doc/mfc_strings.html delete mode 100644 doc/non_standard_strings.html delete mode 100644 doc/partial_matches.html delete mode 100644 doc/performance.html delete mode 100644 doc/posix_api.html delete mode 100644 doc/redistributables.html delete mode 100644 doc/reg_expression.html delete mode 100644 doc/regbase.html delete mode 100644 doc/regex.html delete mode 100644 doc/regex_format.html delete mode 100644 doc/regex_grep.html delete mode 100644 doc/regex_iterator.html delete mode 100644 doc/regex_match.html delete mode 100644 doc/regex_merge.html delete mode 100644 doc/regex_replace.html delete mode 100644 doc/regex_search.html delete mode 100644 doc/regex_split.html delete mode 100644 doc/regex_token_iterator.html delete mode 100644 doc/regex_traits.html delete mode 100644 doc/standards.html delete mode 100644 doc/sub_match.html delete mode 100644 doc/syntax.html delete mode 100644 doc/syntax_basic.html delete mode 100644 doc/syntax_extended.html delete mode 100644 doc/syntax_leftmost_longest.html delete mode 100644 doc/syntax_option_type.html delete mode 100644 doc/syntax_perl.html delete mode 100644 doc/thread_safety.html delete mode 100644 doc/uarrow.gif delete mode 100644 doc/unicode.html delete mode 100644 doc/vc71-performance.html diff --git a/doc/bad_expression.html b/doc/bad_expression.html deleted file mode 100644 index aee75368..00000000 --- a/doc/bad_expression.html +++ /dev/null @@ -1,81 +0,0 @@ - - - - Boost.Regex: regex_error - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

class regex_error

-
-

Boost.Regex Index

-
-
-
-
-

Synopsis

-

#include <boost/pattern_except.hpp>

-

The class regex_error defines the type of objects thrown as - exceptions to report errors during the conversion from a string representing a - regular expression to a finite state machine.  

-
-namespace boost{
-
-class regex_error : public std::runtime_error
-{
-public:
-   explicit regex_error(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos);
-   explicit regex_error(boost::regex_constants::error_type err);
-   boost::regex_constants::error_type code()const;
-   std::ptrdiff_t position()const;
-};
-
-typedef regex_error bad_pattern; // for backwards compatibility
-typedef regex_error bad_expression; // for backwards compatibility
-
-} // namespace boost
-
-

Description

-
-regex_error(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos);
-regex_error(boost::regex_constants::error_type err);
-

Effects: Constructs an object of class regex_error.

-
-boost::regex_constants::error_type code()const;
-

Effects: returns the error code that represents parsing error that occurred.

-
-std::ptrdiff_t position()const; 
-

Effects: returns the location in the expression where parsing stopped.

-

Footnotes: the choice of std::runtime_error as the base class for - regex_error is moot; depending upon how the library is used - exceptions may be either logic errors (programmer supplied expressions) or run - time errors (user supplied expressions).  The library previously used bad_pattern - and bad_expression for errors, these have been replaced by the - single class regex_error to keep the library in synchronization - with the standardization proposal.

-

-
-

Revised - - 24 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/basic_regex.html b/doc/basic_regex.html deleted file mode 100644 index 16774739..00000000 --- a/doc/basic_regex.html +++ /dev/null @@ -1,906 +0,0 @@ - - - - Boost.Regex: basic_regex - - - - - - - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

basic_regex

-
-

Boost.Regex Index

-
-
-
-
-

Synopsis

-
-#include <boost/regex.hpp>
-
-

The template class basic_regex encapsulates regular expression parsing - and compilation. The class takes two template parameters:

-

charT: determines the character type, i.e. either char or - wchar_t; see charT concept.

-

traits: determines the behavior of the character type, for - example which character class names are recognized. A default traits class is - provided: regex_traits<charT>.  See - also traits concept.

-

For ease of use there are two typedefs that define the two standard basic_regex - instances, unless you want to use custom traits classes or non-standard - character types, you won't need to use anything other than these:

-
-namespace boost{
-template <class charT, class traits = regex_traits<charT>  >
-class basic_regex;
-typedef basic_regex<char>      regex;
-typedef basic_regex<wchar_t>   wregex;
-}
-
-

The definition of basic_regex follows: it is based very closely on class - basic_string, and fulfils the requirements for a constant-container of charT.

-
-namespace boost{
-
-template <class  charT, class traits = regex_traits<charT> >
-class basic_regex {
-   public:          
-   // types:
-   typedef          charT                                value_type;
-   typedef          implementation-specific              const_iterator;
-   typedef          const_iterator                       iterator;                 
-   typedef          charT&                               reference;           
-   typedef          const charT&                         const_reference;           
-   typedef          std::ptrdiff_t                       difference_type;                 
-   typedef          std::size_t                          size_type;
-   typedef          regex_constants::syntax_option_type  flag_type;
-   typedef typename traits::locale_type                  locale_type;
-
-   // constants:
-   // main option selection:
-   static const regex_constants::syntax_option_type normal          = regex_constants::normal;
-   static const regex_constants::syntax_option_type ECMAScript      = normal;
-   static const regex_constants::syntax_option_type JavaScript      = normal;
-   static const regex_constants::syntax_option_type JScript         = normal;
-   static const regex_constants::syntax_option_type basic           = regex_constants::basic;
-   static const regex_constants::syntax_option_type extended        = regex_constants::extended;
-   static const regex_constants::syntax_option_type awk             = regex_constants::awk;
-   static const regex_constants::syntax_option_type grep            = regex_constants::grep;
-   static const regex_constants::syntax_option_type egrep           = regex_constants::egrep;
-   static const regex_constants::syntax_option_type sed             = basic = regex_constants::sed;
-   static const regex_constants::syntax_option_type perl            = regex_constants::perl;
-   static const regex_constants::syntax_option_type literal         = regex_constants::literal;
-   // modifiers specific to perl expressions:
-   static const regex_constants::syntax_option_type no_mod_m        = regex_constants::no_mod_m;
-   static const regex_constants::syntax_option_type no_mod_s        = regex_constants::no_mod_s;
-   static const regex_constants::syntax_option_type mod_s           = regex_constants::mod_s;
-   static const regex_constants::syntax_option_type mod_x           = regex_constants::mod_x;
-   // modifiers specific to POSIX basic expressions:
-   static const regex_constants::syntax_option_type bk_plus_qm      = regex_constants::bk_plus_qm;
-   static const regex_constants::syntax_option_type bk_vbar         = regex_constants::bk_vbar
-   static const regex_constants::syntax_option_type no_char_classes = regex_constants::no_char_classes
-   static const regex_constants::syntax_option_type no_intervals    = regex_constants::no_intervals
-   // common modifiers:
-   static const regex_constants::syntax_option_type nosubs          = regex_constants::nosubs;
-   static const regex_constants::syntax_option_type optimize        = regex_constants::optimize;
-   static const regex_constants::syntax_option_type collate         = regex_constants::collate;
-   static const regex_constants::syntax_option_type newline_alt     = regex_constants::newline_alt;
-   static const regex_constants::syntax_option_type no_except       = regex_constants::newline_alt;
-
-   // construct/copy/destroy:
-   explicit basic_regex ();
-   explicit basic_regex(const  charT* p, flag_type f = regex_constants::normal);
-   basic_regex(const charT* p1, const  charT* p2, flag_type f = regex_constants::normal);
-   basic_regex(const charT* p, size_type len, flag_type  f);
-   basic_regex(const basic_regex&);
-   template <class ST, class SA>
-   explicit basic_regex(const basic_string<charT, ST,  SA>& p, flag_type f = regex_constants::normal);
-   template <class InputIterator>
-   basic_regex(InputIterator first,  InputIterator last, flag_type f = regex_constants::normal);
-
-   ~basic_regex();
-   basic_regex& operator=(const basic_regex&);
-   basic_regex& operator= (const charT* ptr); 
-   template <class ST, class SA> 
-   basic_regex& operator= (const basic_string<charT, ST, SA>& p);
-   // iterators: 
-   const_iterator begin() const; 
-   const_iterator end() const;
-   // capacity: 
-   size_type size() const; 
-   size_type max_size() const; 
-   bool empty() const; 
-   unsigned mark_count()const; 
-   //
-   // modifiers: 
-   basic_regex& assign(const basic_regex& that); 
-   basic_regex& assign(const charT* ptr, flag_type f = regex_constants::normal);
-   basic_regex& assign(const charT* ptr, unsigned int len, flag_type f);
-   template <class string_traits, class A>
-   basic_regex& assign(const basic_string<charT, string_traits, A>& s,
-                       flag_type f = regex_constants::normal);
-   template <class InputIterator>
-   basic_regex& assign(InputIterator first, InputIterator last,
-                       flag_type f = regex_constants::normal);
-
-   // const operations:
-   flag_type flags() const;
-   int status()const;
-   basic_string<charT> str() const;
-   int compare(basic_regex&) const;
-   // locale:
-   locale_type imbue(locale_type loc);
-   locale_type getloc() const;
-   // swap
-   void swap(basic_regex&) throw();
-};
-
-template <class charT, class traits>
-bool operator == (const basic_regex<charT, traits>& lhs,
-                  const basic_regex<charT, traits>& rhs);
-template <class charT, class traits>
-bool operator != (const basic_regex<charT, traits>& lhs,
-                  const basic_regex<charT, traits>& rhs);
-template <class charT, class traits>
-bool operator < (const basic_regex<charT, traits>& lhs,
-                 const basic_regex<charT, traits>& rhs);
-template <class charT, class traits>
-bool operator <= (const basic_regex<charT, traits>& lhs,
-                  const basic_regex<charT, traits>& rhs);
-template <class charT, class traits>
-bool operator >= (const basic_regex<charT, traits>& lhs,
-                  const basic_regex<charT, traits>& rhs);
-template <class charT, class traits>
-bool operator > (const basic_regex<charT, traits>& lhs,
-                 const basic_regex<charT, traits>& rhs);
-
-template <class charT, class io_traits, class re_traits>
-basic_ostream<charT, io_traits>&
-   operator << (basic_ostream<charT, io_traits>& os,
-                const basic_regex<charT, re_traits>& e);
-
-template <class charT, class traits>
-void swap(basic_regex<charT, traits>& e1,
-          basic_regex<charT, traits>& e2);
-
-typedef basic_regex<char> regex;
-typedef basic_regex<wchar_t> wregex;
-
-} // namespace boost
-
-

Description

-

Class basic_regex has the following public member functions:

-

basic_regex constants

-
-// main option selection:
-static const regex_constants::syntax_option_type normal           = regex_constants::normal;
-static const regex_constants::syntax_option_type ECMAScript       = normal;
-static const regex_constants::syntax_option_type JavaScript       = normal;
-static const regex_constants::syntax_option_type JScript          = normal;
-static const regex_constants::syntax_option_type basic            = regex_constants::basic;
-static const regex_constants::syntax_option_type extended         = regex_constants::extended;
-static const regex_constants::syntax_option_type awk              = regex_constants::awk;
-static const regex_constants::syntax_option_type grep             = regex_constants::grep;
-static const regex_constants::syntax_option_type egrep            = regex_constants::egrep;
-static const regex_constants::syntax_option_type sed              = regex_constants::sed;
-static const regex_constants::syntax_option_type perl             = regex_constants::perl;
-static const regex_constants::syntax_option_type literal          = regex_constants::literal;
-// modifiers specific to perl expressions:
-static const regex_constants::syntax_option_type no_mod_m         = regex_constants::no_mod_m;
-static const regex_constants::syntax_option_type no_mod_s         = regex_constants::no_mod_s;
-static const regex_constants::syntax_option_type mod_s            = regex_constants::mod_s;
-static const regex_constants::syntax_option_type mod_x            = regex_constants::mod_x;
-// modifiers specific to POSIX basic expressions:
-static const regex_constants::syntax_option_type bk_plus_qm       = regex_constants::bk_plus_qm;
-static const regex_constants::syntax_option_type bk_vbar          = regex_constants::bk_vbar
-static const regex_constants::syntax_option_type no_char_classes  = regex_constants::no_char_classes
-static const regex_constants::syntax_option_type no_intervals     = regex_constants::no_intervals
-// common modifiers:
-static const regex_constants::syntax_option_type nosubs           = regex_constants::nosubs;
-static const regex_constants::syntax_option_type optimize         = regex_constants::optimize;
-static const regex_constants::syntax_option_type collate          = regex_constants::collate;
-static const regex_constants::syntax_option_type newline_alt      = regex_constants::newline_alt;
-
-

The static constant members are provided as synonyms for the constants declared - in namespace boost::regex_constants; for each constant of type - syntax_option_type declared in namespace boost::regex_constants - then a constant with the same name, type and value is declared within the scope - of basic_regex.

-

basic_regex constructors

-
 basic_regex();
-
-

Effects: Constructs an object of class basic_regex. The - postconditions of this function are indicated in the table:

-
-
- - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

true

-
-

size()

-
-

0

-
-

str()

-
-

basic_string<charT>()

-
-
-
-

basic_regex(const charT* p, flag_type f = regex_constants::normal); - -
-

Requires: p shall not be a null pointer.

-

Throws: bad_expression if p is not a valid regular - expression, unless the flag no_except is set in f.

-

Effects: Constructs an object of class basic_regex; the - object's internal finite state machine is constructed from the regular - expression contained in the null-terminated string p, and interpreted - according to the option flags specified - in f. The postconditions of this function are indicated in the table:

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

false

-
-

size()

-
-

char_traits<charT>::length(p)

-
-

str()

-
-

basic_string<charT>(p)

-
-

flags()

-
-

f

-
-

mark_count()

-
-

The number of marked sub-expressions within the expression.

-
-
-
-
- 
-
-
basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal);
-

Requires: p1 and p2 are not null pointers, p1 < p2.

-

Throws: bad_expression if [p1,p2) is not a valid regular - expression, unless the flag no_except is set in f.

-

Effects: Constructs an object of class basic_regex; the - object's internal finite state machine is constructed from the regular - expression contained in the sequence of characters [p1,p2), and interpreted - according the option flags specified in f. - The postconditions of this function are indicated in the table:

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

false

-
-

size()

-
-

std::distance(p1,p2)

-
-

str()

-
-

basic_string<charT>(p1,p2)

-
-

flags()

-
-

f

-
-

mark_count()

-
-

The number of marked sub-expressions within the expression.

-
-
-
-
- 
-
-
basic_regex(const charT* p, size_type len, flag_type f);
-
-

Requires: p shall not be a null pointer, len < max_size().

-

Throws: bad_expression if p is not a valid regular - expression, unless the flag no_except is set in f.

-

Effects: Constructs an object of class basic_regex; the - object's internal finite state machine is constructed from the regular - expression contained in the sequence of characters [p, p+len), and interpreted - according the option flags specified in f. - The postconditions of this function are indicated in the table:

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

false

-
-

size()

-
-

len

-
-

str()

-
-

basic_string<charT>(p, len)

-
-

flags()

-
-

f

-
-

mark_count()

-
-

The number of marked sub-expressions within the expression.

-
-
-
-

basic_regex(const basic_regex& e); -
-

Effects: Constructs an object of class basic_regex as a - copy of the object e. The postconditions of this function are indicated - in the table:

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

e.empty()

-
-

size()

-
-

e.size()

-
-

str()

-
-

e.str()

-
-

flags()

-
-

e.flags()

-
-

mark_count()

-
-

e.mark_count()

-
-
-
-

-template <class ST, class SA> -basic_regex(const basic_string<charT, ST, SA>& s, flag_type f = regex_constants::normal); -
-

Throws: bad_expression if s is not a valid regular - expression, unless the flag no_except is set in f.

-

Effects: Constructs an object of class basic_regex; the - object's internal finite state machine is constructed from the regular - expression contained in the string s, and interpreted according to the - option flags specified in f. The postconditions of this function - are indicated in the table:

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

false

-
-

size()

-
-

s.size()

-
-

str()

-
-

s

-
-

flags()

-
-

f

-
-

mark_count()

-
-

The number of marked sub-expressions within the expression.

-
-
-
-

-template <class ForwardIterator> -basic_regex(ForwardIterator first, ForwardIterator last, flag_type f = regex_constants::normal); -
-

Throws: bad_expression if the sequence [first, last) - is not a valid regular expression, unless the flag no_except is set in f.

-

Effects: Constructs an object of class basic_regex; the - object's internal finite state machine is constructed from the regular - expression contained in the sequence of characters [first, last), and - interpreted according to the option flags - specified in f. The postconditions of this function are indicated in the - table:

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

false

-
-

size()

-
-

distance(first,last)

-
-

str()

-
-

basic_string<charT>(first,last)

-
-

flags()

-
-

f

-
-

mark_count()

-
-

The number of marked sub-expressions within the expression.

-
-
-
-

-basic_regex& operator=(const basic_regex& e);
-
-

Effects: Returns the result of assign(e.str(), e.flags()).

-
basic_regex& operator=(const charT* ptr);
-
-

Requires: p shall not be a null pointer.

-

Effects: Returns the result of assign(ptr).

-

-template <class ST, class SA>
-basic_regex& operator=(const basic_string<charT, ST, SA>& p);
-
-

Effects: Returns the result of assign(p).

-

basic_regex iterators

-

-const_iterator begin() const;
-
-

Effects: Returns a starting iterator to a sequence of characters - representing the regular expression.

-

-const_iterator end() const;
-
-

Effects: Returns termination iterator to a sequence of characters - representing the regular expression.

-

basic_regex capacity

-

-size_type size() const;
-
-

Effects: Returns the length of the sequence of characters representing - the regular expression.

-

-size_type max_size() const;
-
-

Effects: Returns the maximum length of the sequence of characters - representing the regular expression.

-

-bool empty() const;
-
-

Effects: Returns true if the object does not contain a valid - regular expression, otherwise false.

-
unsigned mark_count() const;
-
-

Effects: Returns the number of marked sub-expressions within the regular - expresion.

-

basic_regex assign

-

-basic_regex& assign(const basic_regex& that);
-
-

Effects: Returns assign(that.str(), that.flags()).

-

-basic_regex& assign(const charT* ptr, flag_type f = regex_constants::normal);
-
-

Effects: Returns assign(string_type(ptr), f).

-
basic_regex& assign(const charT* ptr, unsigned int len, flag_type f);
-

Effects: Returns assign(string_type(ptr, len), f).

-
template <class string_traits, class A>
-basic_regex& assign(const basic_string<charT, string_traits, A>& s,
-                    flag_type f = regex_constants::normal);
-
-

Throws: bad_expression if s is not a valid regular - expression, unless the flag no_except is set in f.

-

Returns: *this.

-

Effects: Assigns the regular expression contained in the string s, - interpreted according the option flags specified - in f. The postconditions of this function are indicated in the table:

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

false

-
-

size()

-
-

s.size()

-
-

str()

-
-

s

-
-

flags()

-
-

f

-
-

mark_count()

-
-

The number of marked sub-expressions within the expression.

-
-
-
-
- 
-
-
template <class InputIterator>
-basic_regex& assign(InputIterator first, InputIterator last,
-                    flag_type f = regex_constants::normal);
-
-

Requires: The type InputIterator corresponds to the Input Iterator - requirements (24.1.1).

-

Effects: Returns assign(string_type(first, last), f).

-

basic_regex constant operations

-
flag_type flags() const;
-
-

Effects: Returns a copy of the regular expression syntax flags that were - passed to the object's constructor, or the last call to assign.

- -
int status() const;
-
-

Effects: Returns zero if the expression contains a valid - regular expression, otherwise an error code.  - This member function is retained for use in environments that cannot use - exception handling.

-
basic_string<charT> str() const;
-
-

Effects: Returns a copy of the character sequence passed to the object's - constructor, or the last call to assign.

-
int compare(basic_regex& e)const;
-
-

Effects: If flags() == e.flags() then returns str().compare(e.str()), - otherwise returns flags() - e.flags().

-

basic_regex locale

-
locale_type imbue(locale_type l);
-
-

Effects: Returns the result of traits_inst.imbue(l) where - traits_inst is a (default initialized) instance of the template - parameter traits stored within the object. Calls to imbue - invalidate any currently contained regular expression.

-

Postcondition: empty() == true.

-

-locale_type getloc() const;
-
-

Effects: Returns the result of traits_inst.getloc() where - traits_inst is a (default initialized) instance of the template - parameter traits stored within the object.

-

basic_regex swap

-

-void swap(basic_regex& e) throw();
-
-

Effects: Swaps the contents of the two regular expressions.

-

Postcondition: *this contains the characters that were in e, - e contains the regular expression that was in *this.

-

Complexity: constant time.

-

basic_regex non-member functions

-
basic_regex non-member comparison operators 
-

Comparisons between basic_regex objects are provided on an experimental basis: - please note that these are likely to be removed from the standard library - proposal, so use with care if you are writing portable code.

-

-template <class charT, class traits>
-bool operator == (const basic_regex<charT, traits>& lhs,
-                  const basic_regex<charT, traits>& rhs);
-
-

Effects: Returns lhs.compare(rhs) == 0.

-

-template <class charT, class traits>
-bool operator != (const basic_regex<charT, traits>& lhs,
-                  const basic_regex<charT, traits>& rhs);
-
-

Effects: Returns lhs.compare(rhs) != 0.

-

-template <class charT, class traits>
-bool operator < (const basic_regex<charT, traits>& lhs,
-                 const basic_regex<charT, traits>& rhs);
-
-

Effects: Returns lhs.compare(rhs) < 0.

-

-template <class charT, class traits>
-bool operator <= (const basic_regex<charT, traits>& lhs,
-                  const basic_regex<charT, traits>& rhs);
-
-

Effects: Returns lhs.compare(rhs) <= 0.

-

-template <class charT, class traits>
-bool operator >= (const basic_regex<charT, traits>& lhs,
-                  const basic_regex<charT, traits>& rhs);
-
-

Effects: Returns lhs.compare(rhs) >= 0.

-

-template <class charT, class traits>
-bool operator > (const basic_regex<charT, traits>& lhs,
-                 const basic_regex<charT, traits>& rhs);
-
-

Effects: Returns lhs.compare(rhs) > 0.

-
basic_regex inserter.
-

The basic_regex stream inserter is provided on an experimental basis, and - outputs the textual representation of the expression to the stream:

-

-template <class charT, class io_traits, class re_traits>
-basic_ostream<charT, io_traits>&
-   operator << (basic_ostream<charT, io_traits>& os
-                const basic_regex<charT, re_traits>& e);
-
-

Effects: Returns (os << e.str()).

-
basic_regex non-member swap
-

-template <class charT, class traits>
-void swap(basic_regex<charT, traits>& lhs,
-          basic_regex<charT, traits>& rhs);
-
-

Effects: calls lhs.swap(rhs).

-
-

Revised 7 Aug - - 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/captures.html b/doc/captures.html deleted file mode 100644 index fe0232f8..00000000 --- a/doc/captures.html +++ /dev/null @@ -1,254 +0,0 @@ - - - - Boost.Regex: Understanding Captures - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Understanding Captures

-
-

Boost.Regex Index

-
-

-
-

-

Captures are the iterator ranges that are "captured" by marked sub-expressions - as a regular expression gets matched.  Each marked sub-expression can - result in more than one capture, if it is matched more than once.  This - document explains how captures and marked sub-expressions in Boost.Regex are - represented and accessed.

-

Marked sub-expressions

-

Every time a Perl regular expression contains a parenthesis group (), it spits - out an extra field, known as a marked sub-expression, for example the - expression:

-
(\w+)\W+(\w+)
-

- Has two marked sub-expressions (known as $1 and $2 respectively), in addition - the complete match is known as $&, everything before the first match as $`, - and everything after the match as $'.  So if the above expression is - searched for within "@abc def--", then we obtain:

-
-

- - - - - - - - - - - - - - - - - - - - - -
-

$`

-
"@"
$&"abc def"
$1"abc"
$2"def"
$'"--"
-

-
-

In Boost.regex all these are accessible via the match_results - class that gets filled in when calling one of the matching algorithms (regex_search, - regex_match, or regex_iterator).  - So given:

-
boost::match_results<IteratorType> m;
-

The Perl and Boost.Regex equivalents are as follows:

-
-

- - - - - - - - - - - - - - - - - - - - - -
PerlBoost.Regex
$`m.prefix()
$&m[0]
$nm[n]
$'m.suffix()
-

-
-

-

In Boost.Regex each sub-expression match is represented by a - sub_match object, this is basically just a pair of iterators denoting - the start and end possition of the sub-expression match, but there are some - additional operators provided so that objects of type sub_match behave a lot - like a std::basic_string: for example they are implicitly - convertible to a basic_string, they can be compared - to a string, added to a string, or - streamed out to an output stream.

-

Unmatched Sub-Expressions

-

When a regular expression match is found there is no need for all of the marked - sub-expressions to have participated in the match, for example the expression:

-

(abc)|(def)

-

can match either $1 or $2, but never both at the same time.  In - Boost.Regex you can determine which sub-expressions matched by accessing the - sub_match::matched data member.

-

Repeated Captures

-

When a marked sub-expression is repeated, then the sub-expression gets - "captured" multiple times, however normally only the final capture is - available, for example if

-
(?:(\w+)\W+)+
-

is matched against

-
one fine day
-

Then $1 will contain the string "day", and all the previous captures will have - been forgotten.

-

However, Boost.Regex has an experimental feature that allows all the capture - information to be retained - this is accessed either via the - match_results::captures member function or the sub_match::captures - member function.  These functions return a container that contains a - sequence of all the captures obtained during the regular expression - matching.  The following example program shows how this information may be - used:

-
#include <boost/regex.hpp>
-#include <iostream>
-
-
-void print_captures(const std::string& regx, const std::string& text)
-{
-   boost::regex e(regx);
-   boost::smatch what;
-   std::cout << "Expression:  \"" << regx << "\"\n";
-   std::cout << "Text:        \"" << text << "\"\n";
-   if(boost::regex_match(text, what, e, boost::match_extra))
-   {
-      unsigned i, j;
-      std::cout << "** Match found **\n   Sub-Expressions:\n";
-      for(i = 0; i < what.size(); ++i)
-         std::cout << "      $" << i << " = \"" << what[i] << "\"\n";
-      std::cout << "   Captures:\n";
-      for(i = 0; i < what.size(); ++i)
-      {
-         std::cout << "      $" << i << " = {";
-         for(j = 0; j < what.captures(i).size(); ++j)
-         {
-            if(j)
-               std::cout << ", ";
-            else
-               std::cout << " ";
-            std::cout << "\"" << what.captures(i)[j] << "\"";
-         }
-         std::cout << " }\n";
-      }
-   }
-   else
-   {
-      std::cout << "** No Match found **\n";
-   }
-}
-
-int main(int , char* [])
-{
-   print_captures("(([[:lower:]]+)|([[:upper:]]+))+", "aBBcccDDDDDeeeeeeee");
-   print_captures("(.*)bar|(.*)bah", "abcbar");
-   print_captures("(.*)bar|(.*)bah", "abcbah");
-   print_captures("^(?:(\\w+)|(?>\\W+))*$", "now is the time for all good men to come to the aid of the party");
-   return 0;
-}
-

Which produces the following output:

-
Expression:  "(([[:lower:]]+)|([[:upper:]]+))+"
-Text:        "aBBcccDDDDDeeeeeeee"
-** Match found **
-   Sub-Expressions:
-      $0 = "aBBcccDDDDDeeeeeeee"
-      $1 = "eeeeeeee"
-      $2 = "eeeeeeee"
-      $3 = "DDDDD"
-   Captures:
-      $0 = { "aBBcccDDDDDeeeeeeee" }
-      $1 = { "a", "BB", "ccc", "DDDDD", "eeeeeeee" }
-      $2 = { "a", "ccc", "eeeeeeee" }
-      $3 = { "BB", "DDDDD" }
-Expression:  "(.*)bar|(.*)bah"
-Text:        "abcbar"
-** Match found **
-   Sub-Expressions:
-      $0 = "abcbar"
-      $1 = "abc"
-      $2 = ""
-   Captures:
-      $0 = { "abcbar" }
-      $1 = { "abc" }
-      $2 = { }
-Expression:  "(.*)bar|(.*)bah"
-Text:        "abcbah"
-** Match found **
-   Sub-Expressions:
-      $0 = "abcbah"
-      $1 = ""
-      $2 = "abc"
-   Captures:
-      $0 = { "abcbah" }
-      $1 = { }
-      $2 = { "abc" }
-Expression:  "^(?:(\w+)|(?>\W+))*$"
-Text:        "now is the time for all good men to come to the aid of the party"
-** Match found **
-   Sub-Expressions:
-      $0 = "now is the time for all good men to come to the aid of the party"
-      $1 = "party"
-   Captures:
-      $0 = { "now is the time for all good men to come to the aid of the party" }
-      $1 = { "now", "is", "the", "time", "for", "all", "good", "men", "to", "come", "to", "the", "aid", "of", "the", "party" }
-
-

Unfortunately enabling this feature has an impact on performance (even if you - don't use it), and a much bigger impact if you do use it, therefore to use this - feature you need to:

- -

-


-

-

-

Revised  - - 12 Dec 2003 -

-

© Copyright John Maddock  - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/character_class_names.html b/doc/character_class_names.html deleted file mode 100644 index 576e45d0..00000000 --- a/doc/character_class_names.html +++ /dev/null @@ -1,326 +0,0 @@ - - - - Boost.Regex: Character Class Names - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Character Class Names.

-
-

Boost.Regex Index

-
-

-
-

-

Contents

-
-
Character Classes that are Always Supported
- Character classes that are supported by Unicode Regular Expressions
-
-

Character Classes that are Always Supported

-

The following character class names are always supported by Boost.Regex:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NamePOSIX-standard nameDescription
alnumYesAny alpha-numeric character.
alphaYesAny alphabetic character.
blankYesAny whitespace character that is not a line separator.
cntrlYesAny control character.
dNoAny decimal digit
digitYesAny decimal digit.
graphYesAny graphical character.
lNoAny lower case character.
lowerYesAny lower case character.
printYesAny printable character.
punctYesAny punctuation character.
sNoAny whitespace character.
spaceYesAny whitespace character.
unicodeNoAny extended character whose code point is above 255 in value.
uNoAny upper case character.
upperYesAny upper case character.
wNoAny word character (alphanumeric characters plus the underscore).
wordNoAny word character (alphanumeric characters plus the underscore).
xdigitYesAny hexadecimal digit character.
-

-

-

Character classes that are supported by Unicode Regular - Expressions

-

The following character classes are only supported by Unicode - Regular Expressions: that is those that use the u32regex type.  The - names used are the same as those from - Chapter 4 of the Unicode standard.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Short NameLong Name
- ASCII
- Any
- Assigned
C*Other
CcControl
CfFormat
CnNot Assigned
CoPrivate Use
CsSurrogate
L*Letter
LlLowercase Letter
LmModifier Letter
LoOther Letter
LtTitlecase
LuUppercase Letter
M*Mark
McSpacing Combining Mark
MeEnclosing Mark
MnNon-Spacing Mark
N*Number
NdDecimal Digit Number
NlLetter Number
NoOther Number
P*Punctuation
PcConnector Punctuation
PdDash Punctuation
PeClose Punctuation
PfFinal Punctuation
PiInitial Punctuation
PoOther Punctuation
PsOpen Punctuation
S*Symbol
ScCurrency Symbol
SkModifier Symbol
SmMath Symbol
SoOther Symbol
Z*Separator
ZlLine Separator
ZpParagraph Separator
ZsSpace Separator
-
-

-

-

Revised   - - 10 Jan 2005  -

-

© Copyright John Maddock 2004-5

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/collating_names.html b/doc/collating_names.html deleted file mode 100644 index c553f1ed..00000000 --- a/doc/collating_names.html +++ /dev/null @@ -1,368 +0,0 @@ - - - - Boost.Regex: Collating Element Names - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Collating Element Names

-
-

Boost.Regex Index

-
-

-
-

-

Contents

-
-
Digraphs
-
POSIX Symbolic Names
-
Unicode Symbolic Names
-
-

Digraphs

-

The following are treated as valid digraphs when used as a collating name:

-

"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj", - "Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".

-

POSIX Symbolic Names

-

The following symbolic names are recognised as valid collating element names, - in addition to any single character:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameCharacter
NUL\x00
SOH\x01
STX\x02
ETX\x03
EOT\x04
ENQ\x05
ACK\x06
alert\x07
backspace\x08
tab\t
newline\n
vertical-tab\v
form-feed\f
carriage-return\r
SO\xE
SI\xF
DLE\x10
DC1\x11
DC2\x12
DC3\x13
DC4\x14
NAK\x15
SYN\x16
ETB\x17
CAN\x18
EM\x19
SUB\x1A
ESC\x1B
IS4\x1C
IS3\x1D
IS2\x1E
IS1\x1F
space\x20
exclamation-mark!
quotation-mark"
number-sign#
dollar-sign$
percent-sign%
ampersand&
apostrophe'
left-parenthesis(
right-parenthesis)
asterisk*
plus-sign+
comma,
hyphen-
period.
slash/
zero0
one1
two2
three3
four4
five5
six6
seven7
eight8
nine9
colon:
semicolon;
less-than-sign<
equals-sign=
greater-than-sign>
question-mark?
commercial-at@
left-square-bracket[
backslash\
right-square-bracket]
circumflex~
underscore_
grave-accent`
left-curly-bracket{
vertical-line|
right-curly-bracket}
tilde~
DEL\x7F
-

-

-

Named Unicode Characters

-

When using Unicode aware regular expressions (with - the u32regex type), all the normal symbolic names for Unicode - characters (those given in Unidata.txt) are recognised.

-

-


-

-

-

Revised 12 Jan 2005 -

-

© Copyright John Maddock 2004-2005

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/concepts.html b/doc/concepts.html deleted file mode 100644 index ed598933..00000000 --- a/doc/concepts.html +++ /dev/null @@ -1,453 +0,0 @@ - - - - Boost.Regex: Index - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Concepts

-
-

Boost.Regex Index

-
-

-
-

-

charT requirements

-

Type charT used a template argument to class template - basic_regex, must have a trivial default constructor, copy constructor, - assignment operator, and destructor.  In addition the following - requirements must be met for objects; c of type charT, c1 and c2 of type charT - const, and i of type int:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionReturn typeAssertion / Note / Pre- / Post-condition
charT ccharTDefault constructor (must be trivial).
charT c(c1)charTCopy constructor (must be trivial).
c1 = c2charTAssignment operator (must be trivial).
c1 == c2booltrue if c1 has the same value as c2.
c1 != c2booltrue if c1 and c2 are not equal.
c1 < c2booltrue if the value of c1 is less than c2.
c1 > c2booltrue if the value of c1 is greater than c2.
c1 <= c2booltrue if c1 is less than or equal to c2.
c1 >= c2booltrue if c1 is greater than or equal to c2.
intmax_t i = c1int -

charT must be convertible to an integral type.

-

Note: type charT is not required to support this operation, if the traits class - used supports the full Boost-specific interface, rather than the minimal - standardised-interface (see traits class requirements below).

-
charT c(i);charTcharT must be constructable from an integral type.
-

-

traits requirements

-

There are two sets of requirements for the traits template argument to - basic_regex: a mininal interface (which is part of the regex standardization - proposal), and an optional Boost-specific enhanced interface.

-

Minimal requirements.

-

In the following table X denotes a traits class defining types and functions - for the character container type charT; u is an object of type X; v is an - object of type const X; p is a value of type const charT*; I1 and I2 are Input - Iterators; c is a value of type const charT; s is an object of type - X::string_type; cs is an object of type const X::string_type; b is a value of - type bool; I is a value of type int; F1 and F2 are values of type const charT*; - and loc is an object of type X::locale_type.

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Expression

-
-

Return type

-
-

Assertion / Note -
- Pre / Post condition

-
-

X::char_type

-
-

charT

-
-

The character container type used in the implementation of class template basic_regex.

-
-

X::size_type

-
-

 

-
-

An unsigned integer type, capable of holding the length of a null-terminated - string of charT's.

-
-

X::string_type

-
-

std::basic_string<charT> or std::vector<charT>

-
-

 

-
-

X::locale_type

-
-

Implementation defined

-
-

A copy constructible type that represents the locale used by the traits class.

-
-

X::char_class_type

-
-

Implementation defined

-
-

A bitmask type representing a particular character classification. Multiple - values of this type can be bitwise-or'ed together to obtain a new valid value.

-
-

X::length(p)

-
-

X::size_type

-
-

Yields the smallest i such that p[i] == 0. Complexity - is linear in i.

-
-

v.translate(c)

-
-

X::char_type

-
-

Returns a character such that for any character d that is to be considered - equivalent to c then v.translate(c) == v.translate(d).

-
-

v.translate_nocase(c)

-
X::char_typeFor all characters C that are to be considered - equivalent to c when comparisons are to be performed without regard to case, - then v.translate_- nocase(c) == v.translate_- nocase(C).
-

v.transform(F1, F2)

-
-

X::string_type

-
-

Returns a sort key for the character sequence designated by the iterator range - [F1, F2) such that if the character sequence [G1, G2) sorts before the - character sequence [H1, H2) then v.transform(G1, G2) < v.transform(H1, - H2). 

-
-

v.transform_primary(F1, F2)

-
-

X::string_type

-
-

Returns a sort key for the character sequence designated by the iterator range - [F1, F2) such that if the character sequence [G1, G2) sorts before the - character sequence [H1, H2) when character case is not considered then - v.transform_primary(G1, G2) < v.transform_- primary(H1, H2).

-
-

v.lookup_classname(F1, F2)

-
-

X::char_class_type

-
-

Converts the character sequence designated by the iterator range [F1,F2) into a - bitmask type that can subsequently be passed to isctype. Values returned from - lookup_classname can be safely bitwise or'ed together. Returns 0 if the - character sequence is not the name of a character class recognized by X. The - value returned shall be independent of the case of the characters in the - sequence.

-
-

v.lookup_collatename(F1, F2)

-
-

X::string_type

-
-

Returns a sequence of characters that represents the collating element - consisting of the character sequence designated by the iterator range [F1, F2). - Returns an empty string if the character sequence is not a valid collating - element.

-
-

v.isctype(c, v.lookup_classname (F1, F2))

-
-

bool

-
-

Returns true if character c is a member of the character class designated by - the iterator range [F1, F2), false otherwise.

-
-

v.value(c, i)

-
-

int

-
-

Returns the value represented by the digit c in base I if the character c is a - valid digit in base I; otherwise returns -1. [Note: the value of I will only be - 8, 10, or 16. -end note]

-
-

u.imbue(loc)

-
-

X::locale_type

-
-

Imbues u with the locale loc, returns the previous - locale used by u if any. 

-
-

v.getloc()

-
-

X::locale_type

-
-

Returns the current locale used by v if any. 

-
-

v.error_string(i)

-
-

std::string

-
-

Returns a human readable error string for the error condition i, - where i is one of the values enumerated by type regex_constants::error_type.  - If the value i is not recognized then returns the string "Unknown - error" or a localized equivalent.

-
-

-

Additional Optional Requirements

-

The following additional requirements are strictly optional, however in order - for basic_regex to take advantage of these additional interfaces, all of the - following requirements must be met; basic_regex will detect the presence or - absense of member boost_extensions_tag and configure itself - appropriately.

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionResult -

Assertion / Note -
- Pre / Post condition

-
X::boost_extensions_tagAn unspecified type.When present, all of the extensions listed in this table must be present.
-

v.syntax_type(c)

-
regex_constants::syntax_type -

Returns a symbolic value of type regex_constants::syntax_type that - signifies the meaning of character c within the regular expression - grammar.

-
v.escape_syntax_type(c)regex_constants::escape_syntax_type -

Returns a symbolic value of type regex_constants::escape_syntax_type, - that signifies the meaning of character c within the regular - expression grammar, when c has been preceded by an escape - character. Precondition: if b is the character preceding c - in the expression being parsed then: v.syntax_type(b) == syntax_escape

-
-

v.translate(c, b)

-
X::char_type -

Returns a character d such that: for any character d that - is to be considered equivalent to c then v.translate(c,false)==v.translate(d,false). - Likewise for all characters C that are to be considered equivalent - to c when comparisons are to be performed without regard to case, - then v.translate(c,true)==v.translate(C,true).

-
-

v.toi(I1, I2, i)

-
An integer type capable of holding either a charT or an int. -

Behaves as follows: if p==q or if *p is not a digit - character then returns -1. Otherwise performs formatted numeric input on the - sequence [p,q) and returns the result as an int. Postcondition: either p == - q or *p is a non-digit character.

-
-

v.error_string(i)

-
std::string -

Returns a human readable error string for the error condition i, - where i is one of the values enumerated by type - regex_constants::error_type.  If the value i - is not recognized then returns the string "Unknown error" or a localized - equivalent.

-
v.tolower(c)X::char_typeConverts c to lower case, used for Perl-style \l and \L formating operations.
v.toupper(c)X::char_typeConverts c to upper case, used for Perl-style \u and \U formating operations.
-

-

Iterator Rrequirements

-

-

The regular expression algorithms (and iterators) take all require a - Bidirectional-Iterator.

-

-


-

-

-

Revised - - 24 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/configuration.html b/doc/configuration.html deleted file mode 100644 index 9da777e7..00000000 --- a/doc/configuration.html +++ /dev/null @@ -1,155 +0,0 @@ - - - - Boost.Regex: Configuration and setup - - - - - - - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Configuration and setup

-
-

Boost.Regex Index

-
-
-
-
-

Contents

-
-
Compiler setup
Locale and traits class - selection
Linkage Options
Algorithm - Selection
Algorithm Tuning
-
-

Compiler setup.

-

You shouldn't need to do anything special to configure boost.regex for use with - your compiler - the boost.config subsystem - should already take care of it, if you do have problems (or you are using a - particularly obscure compiler or platform) then boost.config has - a configure script.

-

Locale and traits class selection.

-

The following macros (see user.hpp) - control how boost.regex interacts with the user's locale:

- - - - - - - - - - - - - -
BOOST_REGEX_USE_C_LOCALE - Forces boost.regex to use the global C locale in its traits class support: this - is now deprecated in favour of the C++ locale.
BOOST_REGEX_USE_CPP_LOCALEForces boost.regex to use std::locale in it's default traits class, regular - expressions can then be imbued with an instance specific locale.  - This is the default behaviour on non-Windows platforms.
BOOST_REGEX_NO_W32Tells boost.regex not to use any Win32 API's even when available (implies - BOOST_REGEX_USE_CPP_LOCALE unless BOOST_REGEX_USE_C_LOCALE is set).
-
-
-

Linkage Options

- - - - - - - - - -
BOOST_REGEX_DYN_LINKFor Microsoft and Borland C++ builds, this tells boost.regex that it should - link to the dll build of the boost.regex.  By default boost.regex will - link to its static library build, even if the dynamic C runtime library is in - use.
BOOST_REGEX_NO_LIBFor Microsoft and Borland C++ builds, this tells boost.regex that it should - not automatically select the library to link to.
-
-
-

Algorithm Selection

- - - - - - - - - -
BOOST_REGEX_RECURSIVETells boost.regex to use a stack-recursive matching algorithm.  This is - generally the fastest option (although there is very little in it), but can - cause stack overflow in extreme cases, on Win32 this can be handled safely, but - this is not the case on other platforms.
BOOST_REGEX_NON_RECURSIVETells boost.regex to use a non-stack recursive matching algorithm, this can be - slightly slower than the alternative, but is always safe no matter how - pathological the regular expression.  This is the default on non-Win32 - platforms.
-
-
-

Algorithm Tuning

-

The following option applies only if BOOST_REGEX_RECURSIVE is set.

- - - - - -
BOOST_REGEX_HAS_MS_STACK_GUARDTells boost.regex that Microsoft style __try - __except blocks are supported, - and can be used to safely trap stack overflow.
-
-
-

The following options apply only if BOOST_REGEX_NON_RECURSIVE is set.

- - - - - - - - - - - - - -
BOOST_REGEX_BLOCKSIZEIn non-recursive mode, boost.regex uses largish blocks of memory to act as a - stack for the state machine, the larger the block size then the fewer - allocations that will take place.  This defaults to 4096 bytes, which is - large enough to match the vast majority of regular expressions without - further allocations, however, you can choose smaller or larger values depending - upon your platforms characteristics.
BOOST_REGEX_MAX_BLOCKSTells boost.regex how many blocks of size BOOST_REGEX_BLOCKSIZE it is - permitted to use.  If this value is exceeded then boost.regex will stop - trying to find a match and throw a std::runtime_error.  Defaults to 1024, - don't forget to tweek this value if you alter BOOST_REGEX_BLOCKSIZE by much.
BOOST_REGEX_MAX_CACHE_BLOCKSTells boost.regex how many memory blocks to store in it's internal cache - - memory blocks are taken from this cache rather than by calling ::operator - new.  Generally speeking this can be an order of magnitude faster than - calling ::opertator new each time a memory block is required, but has the - downside that boost.regex can end up caching a large chunk of memory (by - default up to 16 blocks each of BOOST_REGEX_BLOCKSIZE size).  If memory is - tight then try defining this to 0 (disables all caching), or if that is too - slow, then a value of 1 or 2, may be sufficient.  On the other hand, on - large multi-processor, multi-threaded systems, you may find that a higher value - is in order.
-
-
-
-

Revised  - - 23 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/contacts.html b/doc/contacts.html deleted file mode 100644 index 645460ec..00000000 --- a/doc/contacts.html +++ /dev/null @@ -1,87 +0,0 @@ - - - - Boost.Regex: Contacts - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Contacts and Acknowledgements

-
-

Boost.Regex Index

-
-
-
-
-

The author can be contacted at john@johnmaddock.co.uk; the home page for - this library is at www.boost.org.

-

I am indebted to Robert Sedgewick's - "Algorithms in C++" for forcing me to think about algorithms and their - performance, and to the folks at boost for - forcing me to think, period.

-

Eric Niebler, author of the - GRETA regular expression component, has shared several important ideas, - in a series of long discussions.

-

Pete Becker, of Dinkumware Ltd, has - helped enormously with the standardisation proposal language.

-

The following people have all contributed useful comments or fixes: Dave - Abrahams, Mike Allison, Edan Ayal, Jayashree Balasubramanian, Jan Bölsche, - Beman Dawes, Paul Baxter, David Bergman, David Dennerline, Edward Diener, Peter - Dimov, Robert Dunn, Fabio Forno, Tobias Gabrielsson, Rob Gillen, Marc Gregoire, - Chris Hecker, Nick Hodapp, Jesse Jones, Martin Jost, Boris Krasnovskiy, Jan - Hermelink, Max Leung, Wei-hao Lin, Jens Maurer, Richard Peters, Heiko Schmidt, - Jason Shirk, Gerald Slacik, Scobie Smith, Mike Smyth, Alexander Sokolovsky, - Hervé Poirier, Michael Raykh, Marc Recht, Scott VanCamp, Bruno Voigt, Alexey - Voinov, Jerry Waldorf, Rob Ward, Lealon Watts, John Wismar, Thomas Witt and - Yuval Yosef. I am also grateful to the manuals supplied with the Henry Spencer, - Perl and GNU regular expression libraries - wherever possible I have tried to - maintain compatibility with these libraries and with the POSIX standard - the - code however is entirely my own, including any bugs! I can absolutely guarantee - that I will not fix any bugs I don't know about, so if you have any comments or - spot any bugs, please get in touch.

-

Useful further information can be found at:

-

Short tutorials on regular expressions can be - found here and here.

-

The main book on regular expressions is - Mastering Regular Expressions, published by O'Reilly.

-

Information on the - Boost.regex standardization proposal, along with other - standard library extension proposals can be found on the - C++ Committees web pages.

-

TheOpen Unix - Specification contains a wealth of useful material, including the - regular expression syntax, and specifications for - <regex.h> and - <nl_types.h>.

-

The Pattern Matching Pointers - site is a "must visit" resource for anyone interested in pattern matching.

-

Glimpse and Agrep, use a - simplified regular expression syntax to achieve faster search times.

-

Udi Manber and - Ricardo Baeza-Yates both have a selection of useful pattern matching - papers available from their respective web sites.

-

-
-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/error_type.html b/doc/error_type.html deleted file mode 100644 index 6ca14383..00000000 --- a/doc/error_type.html +++ /dev/null @@ -1,139 +0,0 @@ - - - - Boost.Regex: error_type - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

error_type

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
Description
-

Synopsis

-

Type error type represents the different types of errors that can be raised by - the library when parsing a regular expression.

-
-namespace boost{ namespace regex_constants{
-
-typedef implementation-specific-type error_type;
-
-static const error_type error_collate;
-static const error_type error_ctype;
-static const error_type error_escape;
-static const error_type error_backref;
-static const error_type error_brack;
-static const error_type error_paren;
-static const error_type error_brace;
-static const error_type error_badbrace;
-static const error_type error_range;
-static const error_type error_space;
-static const error_type error_badrepeat;
-static const error_type error_complexity;
-static const error_type error_stack;
-static const error_type error_bad_pattern;
-
-} // namespace regex_constants
-} // namespace boost
-
-

 

-

Description

-

The type error_type is an implementation-specific enumeration type that may - take one of the following values:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ConstantMeaning
error_collateAn invalid collating element was specified in a [[.name.]] block.
error_ctypeAn invalid character class name was specified in a [[:name:]] block.
error_escapeAn invalid or trailing escape was encountered.
error_backrefA back-reference to a non-existant marked sub-expression was encountered.
error_brackAn invalid character set [...] was encountered.
error_paren -

Mismatched '(' and ')'.

-
error_braceMismatched '{' and '}'.
error_badbraceInvalid contents of a {...} block.
error_rangeA character range was invalid, for example [d-a].
error_spaceOut of memory.
error_badrepeatAn attempt to repeat something that can not be repeated - for example a*+
error_complexityThe expression became too complex to handle.
error_stackOut of program stack space.
error_bad_patternOther unspecified errors.
-

-

-
-

-

Revised  - - 24 June 2004  - -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/examples.html b/doc/examples.html deleted file mode 100644 index 737b11ee..00000000 --- a/doc/examples.html +++ /dev/null @@ -1,117 +0,0 @@ - - - - Boost.Regex: Examples - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Examples

-
-

Boost.Regex Index

-
-
-
-
-

Test Programs

-

regress:

-

A regression test application that gives the matching/searching algorithms a - full workout. The presence of this program is your guarantee that the library - will behave as claimed - at least as far as those items tested are concerned - - if anyone spots anything that isn't being tested I'd be glad to hear about it.

-

Directory: libs/regex/test/regress.

-

Files: basic_tests.cpp - test_deprecated.cpp main.cpp.

-

bad_expression_test:

-

Verifies that "bad" regular expressions don't cause the matcher to go into - infinite loops, but to throw an exception instead.

-

Directory: libs/regex/test/pathology.

-

Files: bad_expression_test.cpp.

-

recursion_test:

-

Verifies that the matcher can't overrun the stack (no matter what the - expression).

-

Directory: libs/regex/test/pathology.

-

Files: recursion_test.cpp.

-

concepts:

-

Verifies that the library meets all documented concepts (a compile only test).

-

Directory: libs/regex/test/concepts.

-

Files: concept_check.cpp.

-

captures_test:

-

Test code for captures.

-

Directory: libs/test/captures.

-

Files: captures_test.cpp.

-

Example programs

-

grep

-

A simple grep implementation, run with the -h command line option to find out - its usage.

-

Files: grep.cpp

-

timer.exe

-

A simple interactive expression matching application, the results of all - matches are timed, allowing the programmer to optimize their regular - expressions where performance is critical.

-

Files: regex_timer.cpp.

-

Code snippets

-

The snippets examples contain the code examples used in the documentation:

-

captures_example.cpp: - Demonstrates the use of captures.

-

credit_card_example.cpp: - Credit card number formatting code.

-

partial_regex_grep.cpp: - Search example using partial matches.

-

partial_regex_match.cpp: - regex_match example using partial matches.

-

regex_iterator_example.cpp: - Iterating through a series of matches.

-

regex_match_example.cpp: - ftp based regex_match example.

-

regex_merge_example.cpp: - regex_merge example: converts a C++ file to syntax highlighted HTML.

-

regex_replace_example.cpp: - regex_replace example: converts a C++ file to syntax highlighted HTML

-

regex_search_example.cpp: - regex_search example: searches a cpp file for class definitions.

-

regex_token_iterator_eg_1.cpp: - split a string into a series of tokens.

-

regex_token_iterator_eg_2.cpp: - enumerate the linked URL's in a HTML file.

-

The following are deprecated:

-

regex_grep_example_1.cpp: - regex_grep example 1: searches a cpp file for class definitions.

-

regex_grep_example_2.cpp: - regex_grep example 2: searches a cpp file for class definitions, using a global - callback function.

-

regex_grep_example_3.cpp: - regex_grep example 2: searches a cpp file for class definitions, using a bound - member function callback.

-

regex_grep_example_4.cpp: - regex_grep example 2: searches a cpp file for class definitions, using a C++ - Builder closure as a callback.

-

regex_split_example_1.cpp: - regex_split example: split a string into tokens.

-

regex_split_example_2.cpp - : regex_split example: spit out linked URL's.

-

-
-

Revised  - - 28 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/faq.html b/doc/faq.html deleted file mode 100644 index e9557443..00000000 --- a/doc/faq.html +++ /dev/null @@ -1,114 +0,0 @@ - - - - Boost.Regex: FAQ - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

FAQ

-
-

Boost.Regex Index

-
-
-
-
- -

 Q. Why can't I - use the "convenience" versions of regex_match / regex_search / regex_grep / - regex_format / regex_merge?

-

A. These versions may or may not be available depending upon the capabilities - of your compiler, the rules determining the format of these functions are quite - complex - and only the versions visible to a standard compliant compiler are - given in the help. To find out what your compiler supports, run - <boost/regex.hpp> through your C++ pre-processor, and search the output - file for the function that you are interested in.

-

Q. I can't get regex++ to work with - escape characters, what's going on?

-

A. If you embed regular expressions in C++ code, then remember that escape - characters are processed twice: once by the C++ compiler, and once by the - regex++ expression compiler, so to pass the regular expression \d+ to regex++, - you need to embed "\\d+" in your code. Likewise to match a literal backslash - you will need to embed "\\\\" in your code. -

-

Q. Why does using parenthesis in a POSIX regular expression - change the result of a match?

-

For POSIX (extended and basic) regular expressions, but not for perl regexes, - parentheses don't only mark; they determine what the best match is as well. - When the expression is compiled as a POSIX basic or extended regex then - Boost.regex follows the POSIX standard leftmost longest rule for determining - what matched. So if there is more than one possible match after considering the - whole expression, it looks next at the first sub-expression and then the second - sub-expression and so on. So...

-
-"(0*)([0-9]*)" against "00123" would produce
-$1 = "00"
-$2 = "123"
-
-

where as

-
-"0*([0-9])*" against "00123" would produce
-$1 = "00123"
-
-

If you think about it, had $1 only matched the "123", this would be "less good" - than the match "00123" which is both further to the left and longer. If you - want $1 to match only the "123" part, then you need to use something like:

-
-"0*([1-9][0-9]*)"
-
-

as the expression.

-

Q. Why don't character ranges work properly (POSIX mode - only)?
- A. The POSIX standard specifies that character range expressions are locale - sensitive - so for example the expression [A-Z] will match any collating - element that collates between 'A' and 'Z'. That means that for most locales - other than "C" or "POSIX", [A-Z] would match the single character 't' for - example, which is not what most people expect - or at least not what most - people have come to expect from regular expression engines. For this reason, - the default behaviour of boost.regex (perl mode) is to turn locale sensitive - collation off by not setting the regex_constants::collate compile time flag. - However if you set a non-default compile time flag - for example - regex_constants::extended or regex_constants::basic, then locale dependent - collation will be enabled, this also applies to the POSIX API functions which - use either regex_constants::extended or regex_constants::basic internally. [Note - - when regex_constants::nocollate in effect, the library behaves "as if" the - LC_COLLATE locale category were always "C", regardless of what its actually set - to - end note].

-

Q. Why are there no throw specifications on any of the - functions? What exceptions can the library throw?

-

A. Not all compilers support (or honor) throw specifications, others support - them but with reduced efficiency. Throw specifications may be added at a later - date as compilers begin to handle this better. The library should throw only - three types of exception: boost::bad_expression can be thrown by basic_regex - when compiling a regular expression, std::runtime_error can be thrown when a - call to basic_regex::imbue tries to open a message catalogue that doesn't - exist, or when a call to regex_search or regex_match results in an - "everlasting" search, or when a call to RegEx::GrepFiles or - RegEx::FindFiles tries to open a file that cannot be opened, finally - std::bad_alloc can be thrown by just about any of the functions in this - library.

-

-
-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/format_boost_syntax.html b/doc/format_boost_syntax.html deleted file mode 100644 index d497ddeb..00000000 --- a/doc/format_boost_syntax.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - Boost.Regex: Boost-Extended Format String Syntax - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Boost-Extended Format String Syntax

-
-

Boost.Regex Index

-
-

-

Boost-Extended format strings treat all characters as literals except for - '$', '\', '(', ')', '?', ':' and '\'.

-

Grouping

-

The characters '(' and ')' perform lexical grouping, use \( and \) if you want - a to output literal parenthesis.

-

Conditionals

-

The character '?' begins a conditional expression, the general form is:

-
?Ntrue-expression:false-expression
-

where N is decimal digit.

-

If sub-expression N was matched, then true-expression is evaluated and - sent to output, otherwise false-expression is evaluated and sent to output.

-

You will normally need to surround a conditional-expression with parenthesis in - order to prevent ambiguities.

-

Placeholder Sequences

-

Placeholder sequences specify that some part of what matched the regular - expression should be sent to output as follows:

-

- - - - - - - - - - - - - - - - - - - - - - - - - -
PlaceholderMeaning
$&Outputs what matched the whole expression.
$`Outputs the text between the end of the last match found (or the start of the - text if no previous match was found), and the start of the current match.
$'Outputs all the text following the end of the current match.
$$Outputs a literal '$'
$nOutputs what matched the n'th sub-expression.
-

-

Any $-placeholder sequence not listed above, results in '$' being treated as a - literal.

-

Escape Sequences

-

An escape character followed by any character x, outputs that - character unless x is one of the escape sequences shown below.

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
EscapeMeaning
\aOutputs the bell character: '\a'.
\eOutputs the ANSI escape character (code point 27).
\fOutputs a form feed character: '\f'
\nOutputs a newline character: '\n'.
\rOutputs a carriage return character: '\r'.
\tOutputs a tab character: '\t'.
\vOutputs a vertical tab character: '\v'.
\xDDOutputs the character whose hexadecimal code point is 0xDD
\x{DDDD}Outputs the character whose hexadecimal code point is 0xDDDDD
\cXOutputs the ANSI escape sequence "escape-X".
\DIf D is a decimal digit in the range 1-9, then outputs the text that - matched sub-expression D.
\lCauses the next character to be outputted, to be output in lower case.
\uCauses the next character to be outputted, to be output in upper case.
\LCauses all subsequent characters to be output in lower case, until a \E is - found.
\UCauses all subsequent characters to be output in upper case, until a \E is - found.
\ETerminates a \L or \U sequence.
-

-

-


-

-

-

Revised - - 24 Nov 2004 -

-

© Copyright John Maddock 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/format_perl_syntax.html b/doc/format_perl_syntax.html deleted file mode 100644 index 481a141a..00000000 --- a/doc/format_perl_syntax.html +++ /dev/null @@ -1,150 +0,0 @@ - - - - Boost.Regex: Perl-Style Format String Syntax - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Perl-Style Format String Syntax

-
-

Boost.Regex Index

-
-

-

Perl-style format strings treat all characters as literals except '$' and '\' - which start placeholder and escape sequences respectively.

-

Placeholder sequences specify that some part of what matched the regular - expression should be sent to output as follows:

-

- - - - - - - - - - - - - - - - - - - - - - - - - -
PlaceholderMeaning
$&Outputs what matched the whole expression.
$`Outputs the text between the end of the last match found (or the start of the - text if no previous match was found), and the start of the current match.
$'Outputs all the text following the end of the current match.
$$Outputs a literal '$'
$nOutputs what matched the n'th sub-expression.
-

-

Any $-placeholder sequence not listed above, results in '$' being treated as a - literal.

-

An escape character followed by any character x, outputs that - character unless x is one of the escape sequences shown below.

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
EscapeMeaning
\aOutputs the bell character: '\a'.
\eOutputs the ANSI escape character (code point 27).
\fOutputs a form feed character: '\f'
\nOutputs a newline character: '\n'.
\rOutputs a carriage return character: '\r'.
\tOutputs a tab character: '\t'.
\vOutputs a vertical tab character: '\v'.
\xDDOutputs the character whose hexadecimal code point is 0xDD
\x{DDDD}Outputs the character whose hexadecimal code point is 0xDDDDD
\cXOutputs the ANSI escape sequence "escape-X".
\DIf D is a decimal digit in the range 1-9, then outputs the text that - matched sub-expression D.
\lCauses the next character to be outputted, to be output in lower case.
\uCauses the next character to be outputted, to be output in upper case.
\LCauses all subsequent characters to be output in lower case, until a \E is - found.
\UCauses all subsequent characters to be output in upper case, until a \E is - found.
\ETerminates a \L or \U sequence.
-

-

-


-

-

-

Revised - - 24 Nov 2004 -

-

© Copyright John Maddock 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/format_sed_syntax.html b/doc/format_sed_syntax.html deleted file mode 100644 index ee0a331b..00000000 --- a/doc/format_sed_syntax.html +++ /dev/null @@ -1,109 +0,0 @@ - - - - Boost.Regex: Sed-Style Format String Syntax - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Sed-Style Format String Syntax

-
-

Boost.Regex Index

-
-

-

Sed-style format strings treat all characters as literals except:

-

- - - - - - - - - -
&The ampersand character is replaced in the output stream by the the whole of - what matched the regular expression.  Use \& to output a literal - '&' character.
\Specifies an escape sequence.
-

-

-

An escape character followed by any character x, outputs that - character unless x is one of the escape sequences shown below.

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
EscapeMeaning
\aOutputs the bell character: '\a'.
\eOutputs the ANSI escape character (code point 27).
\fOutputs a form feed character: '\f'
\nOutputs a newline character: '\n'.
\rOutputs a carriage return character: '\r'.
\tOutputs a tab character: '\t'.
\vOutputs a vertical tab character: '\v'.
\xDDOutputs the character whose hexadecimal code point is 0xDD
\x{DDDD}Outputs the character whose hexadecimal code point is 0xDDDDD
\cXOutputs the ANSI escape sequence "escape-X".
\DIf D is a decimal digit in the range 1-9, then outputs the text that - matched sub-expression D.
-

-

-


-

-

-

Revised - - 24 Nov 2004 -

-

© Copyright John Maddock 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/format_syntax.html b/doc/format_syntax.html deleted file mode 100644 index c7061d21..00000000 --- a/doc/format_syntax.html +++ /dev/null @@ -1,52 +0,0 @@ - - - - Boost.Regex: Format String Syntax - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Format String Syntax

-
-

Boost.Regex Index

-
-
-
-
-

Format strings are used by the algorithm regex_replace and - by match_results::format, and are used to - transform one string into another.

-

- There are three kind of format string: Sed, Perl and Boost-extended.

-

Alternatively, when the flag format_literal is passed to one of these - functions, then the format string is treated as a string literal, and is copied - unchanged to the output.

-

Sed Style Format Strings
- Perl Style Format Strings
- Boost-Extended Format Strings

-

-
-

Revised - - 24 Nov 2004 -

-

© Copyright John Maddock 1998- - - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/gcc-performance.html b/doc/gcc-performance.html deleted file mode 100644 index 5dcea95a..00000000 --- a/doc/gcc-performance.html +++ /dev/null @@ -1,543 +0,0 @@ - - - - Regular Expression Performance Comparison (gcc 3.2) - - - - - - - -

Regular Expression Performance Comparison

-

The following tables provide comparisons between the following regular - expression libraries:

-

The Boost regex library.

-

The GNU regular expression library.

-

Philip Hazel's PCRE library.

-

Details

-

Machine: Intel Pentium 4 2.8GHz PC.

-

Compiler: GNU C++ version 3.2 20020927 (prerelease).

-

C++ Standard Library: GNU libstdc++ version 20020927.

-

OS: Cygwin.

-

Boost version: 1.31.0.

-

PCRE version: 4.1.

-

As ever care should be taken in interpreting the results, only sensible regular - expressions (rather than pathological cases) are given, most are taken from the - Boost regex examples, or from the Library of - Regular Expressions. In addition, some variation in the relative - performance of these libraries can be expected on other machines - as memory - access and processor caching effects can be quite large for most finite state - machine algorithms. In each case the first figure given is the relative time - taken (so a value of 1.0 is as good as it gets), while the second figure is the - actual time taken.

-

Averages

-

The following are the average relative scores for all the tests: the perfect - regular expression library would score 1, in practice anything less than 2 - is pretty good.

- - - - - - - - - - - - - -
BoostBoost + C++ localePOSIXPCRE
1.45031.49124108.3721.56255
-
-
-

Comparison 1: Long Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within a long English language text was measured - (mtent12.txt - from Project Gutenberg, 19Mb). 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionBoostBoost + C++ localePOSIXPCRE
Twain3.49
- (0.205s)
4.09
- (0.24s)
65.2
- (3.83s)
1
- (0.0588s)
Huck[[:alpha:]]+3.86
- (0.203s)
4.52
- (0.238s)
100
- (5.26s)
1
- (0.0526s)
[[:alpha:]]+ing1.01
- (1.23s)
1
- (1.22s)
4.95
- (6.04s)
4.67
- (5.71s)
^[^ ]*?Twain1
- (0.31s)
1.05
- (0.326s)
NA3.32
- (1.03s)
Tom|Sawyer|Huckleberry|Finn1.02
- (0.125s)
1
- (0.123s)
165
- (20.3s)
1.08
- (0.133s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)1
- (0.345s)
1.03
- (0.355s)
NA1.71
- (0.59s)
-
-
-

Comparison 2: Medium Sized Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within a medium sized English language text was - measured (the first 50K from mtent12.txt). 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionBoostBoost + C++ localePOSIXPCRE
Twain1.8
- (0.000519s)
2.14
- (0.000616s)
9.08
- (0.00262s)
1
- (0.000289s)
Huck[[:alpha:]]+3.65
- (0.000499s)
4.36
- (0.000597s)
1
- (0.000137s)
1.43
- (0.000196s)
[[:alpha:]]+ing1
- (0.00258s)
1
- (0.00258s)
5.28
- (0.0136s)
5.63
- (0.0145s)
^[^ ]*?Twain1
- (0.000929s)
1.03
- (0.000957s)
NA2.82
- (0.00262s)
Tom|Sawyer|Huckleberry|Finn1
- (0.000812s)
1
- (0.000812s)
60.1
- (0.0488s)
1.28
- (0.00104s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)1.02
- (0.00178s)
1
- (0.00174s)
242
- (0.421s)
1.3
- (0.00227s)
-
-
-

Comparison 3: C++ Code Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within the C++ source file - boost/crc.hpp was measured. 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionBoostBoost + C++ localePOSIXPCRE
^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([ - ]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{)1.04
- (0.000144s)
1
- (0.000139s)
862
- (0.12s)
4.56
- (0.000636s)
(^[ - ]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\>1
- (0.0139s)
1.01
- (0.0141s)
NA1.55
- (0.0216s)
^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>)1.04
- (0.000332s)
1
- (0.000318s)
130
- (0.0413s)
1.72
- (0.000547s)
^[ ]*#[ ]*include[ ]+("boost/[^"]+"|<boost/[^>]+>)1.02
- (0.000323s)
1
- (0.000318s)
150
- (0.0476s)
1.72
- (0.000547s)
-
-

-

Comparison 4: HTML Document Search -

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within the html file libs/libraries.htm - was measured. 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionBoostBoost + C++ localePOSIXPCRE
beman|john|dave1.03
- (0.000367s)
1
- (0.000357s)
47.4
- (0.0169s)
1.16
- (0.000416s)
<p>.*?</p>1.25
- (0.000459s)
1
- (0.000367s)
NA1.03
- (0.000376s)
<a[^>]+href=("[^"]*"|[^[:space:]]+)[^>]*>1
- (0.000509s)
1.02
- (0.000518s)
305
- (0.155s)
1.1
- (0.000558s)
<h[12345678][^>]*>.*?</h[12345678]>1.04
- (0.00025s)
1
- (0.00024s)
NA1.16
- (0.000279s)
<img[^>]+src=("[^"]*"|[^[:space:]]+)[^>]*>2.22
- (0.000489s)
1.69
- (0.000372s)
148
- (0.0326s)
1
- (0.00022s)
<font[^>]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*?</font>1.71
- (0.000371s)
1.75
- (0.000381s)
NA1
- (0.000218s)
-
-
-

Comparison 3: Simple Matches

-

For each of the following regular expressions the time taken to match against - the text indicated was measured. 

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionTextBoostBoost + C++ localePOSIXPCRE
abcabc1.36
- (2.15e-07s)
1.36
- (2.15e-07s)
2.76
- (4.34e-07s)
1
- (1.58e-07s)
^([0-9]+)(\-| |$)(.*)$100- this is a line of ftp response which contains a message string1.55
- (7.26e-07s)
1.51
- (7.07e-07s)
319
- (0.000149s)
1
- (4.67e-07s)
([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}1234-5678-1234-4561.96
- (9.54e-07s)
1.96
- (9.54e-07s)
44.5
- (2.17e-05s)
1
- (4.87e-07s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$john@johnmaddock.co.uk1.22
- (1.51e-06s)
1.23
- (1.53e-06s)
162
- (0.000201s)
1
- (1.24e-06s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$foo12@foo.edu1.28
- (1.47e-06s)
1.3
- (1.49e-06s)
104
- (0.00012s)
1
- (1.15e-06s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$bob.smith@foo.tv1.28
- (1.47e-06s)
1.3
- (1.49e-06s)
113
- (0.00013s)
1
- (1.15e-06s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$EH10 2QQ1.38
- (4.68e-07s)
1.41
- (4.77e-07s)
13.5
- (4.59e-06s)
1
- (3.39e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$G1 1AA1.28
- (4.35e-07s)
1.25
- (4.25e-07s)
11.7
- (3.97e-06s)
1
- (3.39e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$SW1 1ZZ1.32
- (4.53e-07s)
1.31
- (4.49e-07s)
12.2
- (4.2e-06s)
1
- (3.44e-07s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$4/1/20011.16
- (3.82e-07s)
1.2
- (3.96e-07s)
13.9
- (4.59e-06s)
1
- (3.29e-07s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$12/12/20011.38
- (4.49e-07s)
1.38
- (4.49e-07s)
16
- (5.2e-06s)
1
- (3.25e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$1231.19
- (7.64e-07s)
1.16
- (7.45e-07s)
7.51
- (4.81e-06s)
1
- (6.4e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$+3.141591.32
- (8.97e-07s)
1.31
- (8.88e-07s)
14
- (9.48e-06s)
1
- (6.78e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$-3.141591.32
- (8.97e-07s)
1.31
- (8.88e-07s)
14
- (9.48e-06s)
1
- (6.78e-07s)
-
-
-
-

© Copyright John Maddock 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/headers.html b/doc/headers.html deleted file mode 100644 index 031d33fe..00000000 --- a/doc/headers.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - Boost.Regex: Headers - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Headers

-
-

Boost.Regex Index

-
-

-
-

-

There are two main headers used by this library: <boost/regex.hpp> - provides full access to the main template library, while - <boost/cregex.hpp> provides access to the (deprecated) high level class - RegEx, and the POSIX API functions. -

-

There is also a header containing only forward declarations - <boost/regex_fwd.hpp> for use when an interface is dependent upon - boost::basic_regex, but otherwise does not need the full definitions.

-

-


-

-

Revised  - - 28 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/history.html b/doc/history.html deleted file mode 100644 index 72a50f03..00000000 --- a/doc/history.html +++ /dev/null @@ -1,177 +0,0 @@ - - - - Boost.Regex: History - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

History

-
-

Boost.Regex Index

-
-

-
-

-

Boost 1.34

- -

Boost 1.33.1

- -

Boost 1.33.0.

- -

Boost 1.32.1.

- -

Boost 1.31.0.

- -

-


-

-

Revised  - - 28 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/icu_strings.html b/doc/icu_strings.html deleted file mode 100644 index 39548198..00000000 --- a/doc/icu_strings.html +++ /dev/null @@ -1,468 +0,0 @@ - - - - Boost.Regex: Working With Unicode and ICU String Types - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Working With Unicode and ICU String Types.

-
-

Boost.Regex Index

-
-

-
-

-

Contents

-
-
Introduction
-
Unicode regular expression types
-
Regular Expression Algorithms -
-
-
u32regex_match
-
u32regex_search
-
u32regex_replace
-
-
- -
Iterators -
-
-
u32regex_iterator
-
u32regex_token_iterator
-
-
- -
-

Introduction

-

The header:

-
<boost/regex/icu.hpp>
-

contains the data types and algorithms necessary for working with regular - expressions in a Unicode aware environment.  -

-

In order to use this header you will need - the ICU library, and you will need to have built the Boost.Regex library - with ICU support enabled.

-

The header will enable you to:

- -

Unicode regular expression types

-

Header <boost/regex/icu.hpp> provides a regular expression traits - class that handles UTF-32 characters:

-
class icu_regex_traits;
-

and a regular expression type based upon that:

-
typedef basic_regex<UChar32,icu_regex_traits> u32regex;
-

The type u32regex is regular expression type to use for all Unicode - regular expressions; internally it uses UTF-32 code points, but can be created - from, and used to search, either UTF-8, or UTF-16 encoded strings as well as - UTF-32 ones.

-

The constructors, and - assign member functions of u32regex, require UTF-32 encoded strings, but - there are a series of overloaded algorithms called make_u32regex which allow - regular expressions to be created from UTF-8, UTF-16, or UTF-32 encoded - strings:

-
template <class InputIterator> 
-u32regex make_u32regex(InputIterator i, InputIterator j, boost::regex_constants::syntax_option_type opt);
-
-

Effects: Creates a regular expression object from the iterator - sequence [i,j). The character encoding of the sequence is determined based upon - sizeof(*i): 1 implies UTF-8, 2 implies UTF-16, and 4 implies UTF-32.

-
u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);
-
-

Effects: Creates a regular expression object from the - Null-terminated UTF-8 characater sequence p.

-
u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);
-

Effects: Creates a regular expression object from the - Null-terminated UTF-8 characater sequence p.u32regex - make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt - = boost::regex_constants::perl);

-

Effects: Creates a regular expression object from the - Null-terminated characater sequence p.  The character encoding of - the sequence is determined based upon sizeof(wchar_t): 1 implies - UTF-8, 2 implies UTF-16, and 4 implies UTF-32.

-
u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);
-

Effects: Creates a regular expression object from the - Null-terminated UTF-16 characater sequence p.

-
template<class C, class T, class A>
-u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);
-

Effects: Creates a regular expression object from the string s.  - The character encoding of the string is determined based upon sizeof(C): - 1 implies UTF-8, 2 implies UTF-16, and 4 implies UTF-32.

-
u32regex make_u32regex(const UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);
-

Effects: Creates a regular expression object from the UTF-16 - encoding string s.

-

Regular Expression Algorithms

-

The regular expression algorithms regex_match, - regex_search and regex_replace all - expect that the character sequence upon which they operate, is encoded in the - same character encoding as the regular expression object with which they are - used.  For Unicode regular expressions that behavior is undesirable: while - we may want to process the data in UTF-32 "chunks", the actual data is much - more likely to encoded as either UTF-8 or UTF-16.  Therefore the header - <boost/regex/icu.hpp> provides a series of thin wrappers around these - algorithms, called u32regex_match, u32regex_search, and u32regex_replace.  - These wrappers use iterator-adapters internally to make external UTF-8 or - UTF-16 data look as though it's really a UTF-32 sequence, that can then be - passed on to the "real" algorithm.

-

u32regex_match

-

For each regex_match algorithm defined by - <boost/regex.hpp>, then <boost/regex/icu.hpp> defines an overloaded - algorithm that takes the same arguments, but which is called u32regex_match, - and which will accept UTF-8, UTF-16 or UTF-32 encoded data, as well as an - ICU UnicodeString as input.

-

Example: match a password, encoded in a UTF-16 UnicodeString:

-
//
-// Find out if *password* meets our password requirements,
-// as defined by the regular expression *requirements*.
-//
-bool is_valid_password(const UnicodeString& password, const UnicodeString& requirements)
-{
-   return boost::u32regex_match(password, boost::make_u32regex(requirements));
-}
-
-

-

Example: match a UTF-8 encoded filename:

-
//
-// Extract filename part of a path from a UTF-8 encoded std::string and return the result
-// as another std::string:
-//
-std::string get_filename(const std::string& path)
-{
-   boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
-   boost::smatch what;
-   if(boost::u32regex_match(path, what, r))
-   {
-      // extract $1 as a CString:
-      return what.str(1);
-   }
-   else
-   {
-      throw std::runtime_error("Invalid pathname");
-   }
-}
-
-

u32regex_search

-

For each regex_search algorithm defined by - <boost/regex.hpp>, then <boost/regex/icu.hpp> defines an overloaded - algorithm that takes the same arguments, but which is called u32regex_search, - and which will accept UTF-8, UTF-16 or UTF-32 encoded data, as well as an - ICU UnicodeString as input.

-

Example: search for a character sequence in a specific - language block: -

-
UnicodeString extract_greek(const UnicodeString& text)
-{
-   // searches through some UTF-16 encoded text for a block encoded in Greek,
-   // this expression is imperfect, but the best we can do for now - searching
-   // for specific scripts is actually pretty hard to do right.
-   //
-   // Here we search for a character sequence that begins with a Greek letter,
-   // and continues with characters that are either not-letters ( [^[:L*:]] )
-   // or are characters in the Greek character block ( [\\x{370}-\\x{3FF}] ).
-   //
-   boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
-   boost::u16match what;
-   if(boost::u32regex_search(text, what, r))
-   {
-      // extract $0 as a CString:
-      return UnicodeString(what[0].first, what.length(0));
-   }
-   else
-   {
-      throw std::runtime_error("No Greek found!");
-   }
-}
-

u32regex_replace

-

For each regex_replace algorithm defined by - <boost/regex.hpp>, then <boost/regex/icu.hpp> defines an overloaded - algorithm that takes the same arguments, but which is called u32regex_replace, - and which will accept UTF-8, UTF-16 or UTF-32 encoded data, as well as an - ICU UnicodeString as input.  The input sequence and the format string - specifier passed to the algorithm, can be encoded independently (for example - one can be UTF-8, the other in UTF-16), but the result string / output iterator - argument must use the same character encoding as the text being searched.

-

Example: Credit card number reformatting:

-
//
-// Take a credit card number as a string of digits, 
-// and reformat it as a human readable string with "-"
-// separating each group of four digit;, 
-// note that we're mixing a UTF-32 regex, with a UTF-16
-// string and a UTF-8 format specifier, and it still all 
-// just works:
-//
-const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
-const char* human_format = "$1-$2-$3-$4";
-
-UnicodeString human_readable_card_number(const UnicodeString& s)
-{
-   return boost::u32regex_replace(s, e, human_format);
-}
-

-

Iterators

-

u32regex_iterator

-

Type u32regex_iterator is in all respects the same as - regex_iterator except that since the regular expression type is always - u32regex it only takes one template parameter (the iterator type). It also - calls u32regex_search internally, allowing it to interface correctly with - UTF-8, UTF-16, and UTF-32 data:

-
-template <class BidirectionalIterator>
-class u32regex_iterator
-{
-   // for members see regex_iterator
-};
-
-typedef u32regex_iterator<const char*>     utf8regex_iterator;
-typedef u32regex_iterator<const UChar*>    utf16regex_iterator;
-typedef u32regex_iterator<const UChar32*>  utf32regex_iterator;
-
-

In order to simplify the construction of a u32regex_iterator from a string, - there are a series of non-member helper functions called - make_u32regex_iterator:

-
-u32regex_iterator<const char*> 
-   make_u32regex_iterator(const char* s, 
-                          const u32regex& e, 
-                          regex_constants::match_flag_type m = regex_constants::match_default);
-                          
-u32regex_iterator<const wchar_t*> 
-   make_u32regex_iterator(const wchar_t* s, 
-                          const u32regex& e, 
-                          regex_constants::match_flag_type m = regex_constants::match_default);
-                          
-u32regex_iterator<const UChar*> 
-   make_u32regex_iterator(const UChar* s, 
-                          const u32regex& e, 
-                          regex_constants::match_flag_type m = regex_constants::match_default);
-                          
-template <class charT, class Traits, class Alloc>
-u32regex_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> 
-   make_u32regex_iterator(const std::basic_string<charT, Traits, Alloc>& s, 
-                          const u32regex& e, 
-                          regex_constants::match_flag_type m = regex_constants::match_default);
-                          
-u32regex_iterator<const UChar*> 
-   make_u32regex_iterator(const UnicodeString& s, 
-                          const u32regex& e, 
-                          regex_constants::match_flag_type m = regex_constants::match_default);
-

-

Each of these overloads returns an iterator that enumerates all occurrences of - expression e, in text s, using match_flags m.

-

Example: search for international currency symbols, along with - their associated numeric value:

-
-void enumerate_currencies(const std::string& text)
-{
-   // enumerate and print all the currency symbols, along
-   // with any associated numeric values:
-   const char* re = 
-      "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
-      "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
-      "(?(1)"
-         "|(?(2)"
-            "[[:Cf:][:Cc:][:Z*:]]*"
-         ")"
-         "[[:Sc:]]"
-      ")";
-   boost::u32regex r = boost::make_u32regex(re);
-   boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
-   while(i != j)
-   {
-      std::cout << (*i)[0] << std::endl;
-      ++i;
-   }
-}
-

-

Calling -

-
enumerate_currencies(" $100.23 or £198.12 ");
-

Yields the output:

-
$100.23
£198.12
-

Provided of course that the input is encoded as UTF-8.

-

u32regex_token_iterator

-

Type u32regex_token_iterator is in all respects the same as - regex_token_iterator except that since the regular expression type is - always u32regex it only takes one template parameter (the iterator type).  - It also calls u32regex_search internally, allowing it to interface correctly - with UTF-8, UTF-16, and UTF-32 data:

-
template <class BidirectionalIterator>
-class u32regex_token_iterator
-{
-   // for members see regex_token_iterator
-};
-
-typedef u32regex_token_iterator<const char*>     utf8regex_token_iterator;
-typedef u32regex_token_iterator<const UChar*>    utf16regex_token_iterator;
-typedef u32regex_token_iterator<const UChar32*>  utf32regex_token_iterator;
-
-

In order to simplify the construction of a u32regex_token_iterator from a - string, there are a series of non-member helper functions called - make_u32regex_token_iterator:

-
-u32regex_token_iterator<const char*> 
-   make_u32regex_token_iterator(const char* s, 
-                                const u32regex& e, 
-                                int sub, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                               
-u32regex_token_iterator<const wchar_t*> 
-   make_u32regex_token_iterator(const wchar_t* s, 
-                                const u32regex& e, 
-                                int sub, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-u32regex_token_iterator<const UChar*> 
-   make_u32regex_token_iterator(const UChar* s, 
-                                const u32regex& e, 
-                                int sub, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-template <class charT, class Traits, class Alloc>
-u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> 
-   make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& s, 
-                                const u32regex& e, 
-                                int sub, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-u32regex_token_iterator<const UChar*> 
-   make_u32regex_token_iterator(const UnicodeString& s, 
-                                const u32regex& e, 
-                                int sub, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-

-

Each of these overloads returns an iterator that enumerates all occurrences of - marked sub-expression sub in regular expression e, found - in text s, using match_flags m.

-
-template <std::size_t N>
-u32regex_token_iterator<const char*> 
-   make_u32regex_token_iterator(const char* p, 
-                                const u32regex& e, 
-                                const int (&submatch)[N], 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-template <std::size_t N>
-u32regex_token_iterator<const wchar_t*> 
-   make_u32regex_token_iterator(const wchar_t* p, 
-                                const u32regex& e, 
-                                const int (&submatch)[N], 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-template <std::size_t N>
-u32regex_token_iterator<const UChar*> 
-   make_u32regex_token_iterator(const UChar* p, 
-                                const u32regex& e, 
-                                const int (&submatch)[N], 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-template <class charT, class Traits, class Alloc, std::size_t N>
-u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> 
-   make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& p, 
-                                const u32regex& e, 
-                                const int (&submatch)[N], 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-template <std::size_t N>
-u32regex_token_iterator<const UChar*> 
-   make_u32regex_token_iterator(const UnicodeString& s, 
-                                const u32regex& e, 
-                                const int (&submatch)[N], 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-
-

Each of these overloads returns an iterator that enumerates one sub-expression - for each submatch in regular expression e, found in - text s, using match_flags m.

-
-u32regex_token_iterator<const char*> 
-   make_u32regex_token_iterator(const char* p, 
-                                const u32regex& e, 
-                                const std::vector<int>& submatch, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-u32regex_token_iterator<const wchar_t*> 
-   make_u32regex_token_iterator(const wchar_t* p, 
-                                const u32regex& e, 
-                                const std::vector<int>& submatch, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-u32regex_token_iterator<const UChar*> 
-   make_u32regex_token_iterator(const UChar* p, 
-                                const u32regex& e, 
-                                const std::vector<int>& submatch, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-template <class charT, class Traits, class Alloc>
-u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> 
-   make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& p, 
-                                const u32regex& e, 
-                                const std::vector<int>& submatch, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-                                
-u32regex_token_iterator<const UChar*> 
-   make_u32regex_token_iterator(const UnicodeString& s, 
-                                const u32regex& e, 
-                                const std::vector<int>& submatch, 
-                                regex_constants::match_flag_type m = regex_constants::match_default);
-
-

Each of these overloads returns an iterator that enumerates one sub-expression - for each submatch in regular expression e, found in - text s, using match_flags m.

-

Example: search for international currency symbols, along with - their associated numeric value:

-
-void enumerate_currencies2(const std::string& text)
-{
-   // enumerate and print all the currency symbols, along
-   // with any associated numeric values:
-   const char* re = 
-      "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
-      "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
-      "(?(1)"
-         "|(?(2)"
-            "[[:Cf:][:Cc:][:Z*:]]*"
-         ")"
-         "[[:Sc:]]"
-      ")";
-   boost::u32regex r = boost::make_u32regex(re);
-   boost::u32regex_token_iterator<std::string::const_iterator> 
-      i(boost::make_u32regex_token_iterator(text, r, 1)), j;
-   while(i != j)
-   {
-      std::cout << *i << std::endl;
-      ++i;
-   }
-}
-
-

-


-

Revised  - - 05 Jan 2005  -

-

© Copyright John Maddock 2005

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - - diff --git a/doc/implementation.html b/doc/implementation.html deleted file mode 100644 index d2a9b5f2..00000000 --- a/doc/implementation.html +++ /dev/null @@ -1,43 +0,0 @@ - - - - Boost.Regex: Implementation - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Implementation

-
-

Boost.Regex Index

-
-

-
-

-

Todo.

-

-


-

-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/index.html b/doc/index.html deleted file mode 100644 index ddd57e4c..00000000 --- a/doc/index.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - - -

- Automatic redirection failed, please go to html/index.html. -

-

Copyright John Maddock 2001

-

Distributed under the Boost Software License, Version 1.0. (See accompanying file - LICENSE_1_0.txt or copy at www.boost.org/LICENSE_1_0.txt).

- - - - - diff --git a/doc/install.html b/doc/install.html deleted file mode 100644 index 6f43c55b..00000000 --- a/doc/install.html +++ /dev/null @@ -1,260 +0,0 @@ - - - - Boost.Regex: Installation - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Installation

-
-

Boost.Regex Index

-
-

-
-

When you extract the library from its zip file, you must preserve its internal - directory structure (for example by using the -d option when extracting). If - you didn't do that when extracting, then you'd better stop reading this, delete - the files you just extracted, and try again! -

-

This library should not need configuring before use; most popular - compilers/standard libraries/platforms are already supported "as is". If you do - experience configuration problems, or just want to test the configuration with - your compiler, then the process is the same as for all of boost; see the - configuration library documentation.

-

The library will encase all code inside namespace boost. -

-

Unlike some other template libraries, this library consists of a mixture of - template code (in the headers) and static code and data (in cpp files). - Consequently it is necessary to build the library's support code into a library - or archive file before you can use it, instructions for specific platforms are - as follows: -

-

Building with bjam

-

This is now the preferred method for building and installing this library, - please refer to the getting started - guide for more information.

-

Building With Unicode and ICU Support

-

A default build of this library does not enable Unciode - support via ICU.  There is no need to enable this support if you - don't need it, but if you use ICU for your Unicode support already, and want to - work with Unicode-aware regular expressions then read on.

-

Most of the information you will need is in the - getting started guide, the only additional step you need to take is to - tell bjam that you want Boost.Regex to use ICU and optionally to tell bjam - where ICU is located.

-

If you're building on a Unix-like platform, and ICU is already installed in - you're compilers search path (with an install prefix of /usr or /usr/local - for example), then set the environment variable HAVE_ICU to enable ICU - support.  For example you might build with the command line:

-
bjam -sHAVE_ICU=1 --toolset=toolset-name install
-

If ICU is not already in your compilers path then you need to set the - environment variable ICU_PATH to point to the route directory of your ICU - installation, for example if ICU was installed to /usr/local/icu/3.3 you might - use:

-
bjam -sICU_PATH=/usr/local/icu/3.3 --toolset=toolset-name install
-

Note that ICU is a C++ library just like Boost is, as such your copy of ICU - must have been built with the same C++ compiler (and compiler version) that you - are using to build Boost.  Boost.Regex will not work correctly unless - you ensure that this is the case: it is up to you to ensure that - the version of ICU you are using is binary compatible with the toolset you use - to build Boost.

-

Building via makefiles

-

Borland C++ Builder: -

- -
make -fbcb5.mak
-

The build process will build a variety of .lib and .dll files (the exact number - depends upon the version of Borland's tools you are using) the .lib and dll - files will be in a sub-directory called bcb4 or bcb5 depending upon the - makefile used. To install the libraries into your development system use:

-
make -fbcb5.mak install
-

library files will be copied to <BCROOT>/lib and the dll's to - <BCROOT>/bin, where <BCROOT> corresponds to the install path of - your Borland C++ tools. -

-

You may also remove temporary files created during the build process (excluding - lib and dll files) by using:

-
make -fbcb5.mak clean
-

Finally when you use regex++ it is only necessary for you to add the - <boost> root director to your list of include directories for that - project. It is not necessary for you to manually add a .lib file to the - project; the headers will automatically select the correct .lib file for your - build mode and tell the linker to include it. There is one caveat however: the - library can not tell the difference between VCL and non-VCL enabled builds when - building a GUI application from the command line, if you build from the command - line with the 5.5 command line tools then you must define the pre-processor - symbol _NO_VCL in order to ensure that the correct link libraries are selected: - the C++ Builder IDE normally sets this automatically. Hint, users of the 5.5 - command line tools may want to add a -D_NO_VCL to bcc32.cfg in order to set - this option permanently. -

-

If you would prefer to do a dynamic link to the regex libraries when using the - dll runtime then define BOOST_REGEX_DYN_LINK (you must do this if you want to - use boost.regex in multiple dll's), otherwise Boost.regex will be statically - linked by default. 

-

If you want to suppress automatic linking altogether (and supply your own - custom build of the lib) then define BOOST_REGEX_NO_LIB.

-

If you are building with C++ Builder 6, you will find that - <boost/regex.hpp> can not be used in a pre-compiled header (the actual - problem is in <locale> which gets included by <boost/regex.hpp>), - if this causes problems for you, then try defining BOOST_NO_STD_LOCALE when - building, this will disable some features throughout boost, but may save you a - lot in compile times!

-

Microsoft Visual C++ 6, 7, 7.1 and 8

-

You need version 6 of MSVC to build this library. If you are using VC5 then you - may want to look at one of the previous releases of this - library -

-

Open up a command prompt, which has the necessary MSVC environment variables - defined (for example by using the batch file Vcvars32.bat installed by the - Visual Studio installation), and change to the <boost>\libs\regex\build - directory. -

-

Select the correct makefile - vc6.mak for "vanilla" Visual C++ 6 or - vc6-stlport.mak if you are using STLPort.

-

Invoke the makefile like this:

-
nmake -fvc6.mak
-

You will now have a collection of lib and dll files in a "vc6" subdirectory, to - install these into your development system use:

-
nmake -fvc6.mak install
-

The lib files will be copied to your <VC6>\lib directory and the dll - files to <VC6>\bin, where <VC6> is the root of your Visual C++ 6 - installation.

-

You can delete all the temporary files created during the build (excluding lib - and dll files) using:

-
nmake -fvc6.mak clean 
-

If you want to build with ICU support, then you need to pass the path to your - ICU directory to the makefile, for example with: -

-
nmake ICU_PATH=c:\open-source\icu -fvc71.mak install
-

Finally when you use regex++ it is only necessary for you to add the - <boost> root directory to your list of include directories for that - project. It is not necessary for you to manually add a .lib file to the - project; the headers will automatically select the correct .lib file for your - build mode and tell the linker to include it. -

-

Note that if you want to dynamically link to the regex library when using the - dynamic C++ runtime, define BOOST_REGEX_DYN_LINK when building your project.

-

If you want to add the source directly to your project then define - BOOST_REGEX_NO_LIB to disable automatic library selection.

-

There are several important caveats to remember when using boost.regex with - Microsoft's Compiler:

- -

GCC(2.95 and 3.x) -

-

You can build with gcc using the normal boost Jamfile in - <boost>/libs/regex/build, alternatively there is a conservative makefile - for the g++ compiler. From the command prompt change to the - <boost>/libs/regex/build directory and type: -

-
make -fgcc.mak 
-

At the end of the build process you should have a gcc sub-directory containing - release and debug versions of the library (libboost_regex.a and - libboost_regex_debug.a). When you build projects that use regex++, you will - need to add the boost install directory to your list of include paths and add - <boost>/libs/regex/build/gcc/libboost_regex.a to your list of library - files. -

-

There is also a makefile to build the library as a shared library:

-
make -fgcc-shared.mak
-

which will build libboost_regex.so and libboost_regex_debug.so.

-

Both of the these makefiles support the following environment variables:

-

ICU_PATH: tells the makefile to build with Unicode support, set to the path - where your ICU installation is located, for example with: make - ICU_PATH=/usr/local install -fgcc.mak

-

CXXFLAGS: extra compiler options - note that this applies to both the debug and - release builds.

-

INCLUDES: additional include directories.

-

LDFLAGS: additional linker options.

-

LIBS: additional library files.

-

For the more adventurous there is a configure script in - <boost>/libs/config; see the config - library documentation.

-

Sun Workshop 6.1

-

There is a makefile for the sun (6.1) compiler (C++ version 3.12). From the - command prompt change to the <boost>/libs/regex/build directory and type: -

-
dmake -f sunpro.mak 
-

At the end of the build process you should have a sunpro sub-directory - containing single and multithread versions of the library (libboost_regex.a, - libboost_regex.so, libboost_regex_mt.a and libboost_regex_mt.so). When you - build projects that use regex++, you will need to add the boost install - directory to your list of include paths and add - <boost>/libs/regex/build/sunpro/ to your library search path. -

-

Both of the these makefiles support the following environment variables:

-

CXXFLAGS: extra compiler options - note that this applies to both the single - and multithreaded builds.

-

INCLUDES: additional include directories.

-

LDFLAGS: additional linker options.

-

LIBS: additional library files.

-

LIBSUFFIX: a suffix to mangle the library name with (defaults to nothing).

-

This makefile does not set any architecture specific options like -xarch=v9, - you can set these by defining the appropriate macros, for example:

-
dmake CXXFLAGS="-xarch=v9" LDFLAGS="-xarch=v9" LIBSUFFIX="_v9" -f sunpro.mak
-

will build v9 variants of the regex library named libboost_regex_v9.a etc.

-

Makefiles for Other compilers: -

-

There is a generic makefile (generic.mak ) - provided in <boost-root>/libs/regex/build - see that makefile for details - of environment variables that need to be set before use. -


-

-

Revised  - - 09 Jan 2005  -

-

© Copyright John Maddock 1998- - 2005

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/introduction.html b/doc/introduction.html deleted file mode 100644 index 3a4fcc7c..00000000 --- a/doc/introduction.html +++ /dev/null @@ -1,181 +0,0 @@ - - - - Boost.Regex: Introduction - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Introduction

-
-

Boost.Regex Index

-
-

-
-

-

Regular expressions are a form of pattern-matching that are often used in text - processing; many users will be familiar with the Unix utilities grep, sed - and awk, and the programming language Perl, each of which make - extensive use of regular expressions. Traditionally C++ users have been limited - to the POSIX C API's for manipulating regular expressions, and while regex++ - does provide these API's, they do not represent the best way to use the - library. For example regex++ can cope with wide character strings, or search - and replace operations (in a manner analogous to either sed or Perl), something - that traditional C libraries can not do.

-

The class boost::basic_regex is the key class in - this library; it represents a "machine readable" regular expression, and is - very closely modeled on std::basic_string, think of it as a string plus the - actual state-machine required by the regular expression algorithms. Like - std::basic_string there are two typedefs that are almost always the means by - which this class is referenced:

-
namespace boost{
-
-template <class charT, 
-          class traits = regex_traits<charT> >
-class basic_regex;
-
-typedef basic_regex<char> regex;
-typedef basic_regex<wchar_t> wregex;
-
-}
-

To see how this library can be used, imagine that we are writing a credit card - processing application. Credit card numbers generally come as a string of - 16-digits, separated into groups of 4-digits, and separated by either a space - or a hyphen. Before storing a credit card number in a database (not necessarily - something your customers will appreciate!), we may want to verify that the - number is in the correct format. To match any digit we could use the regular - expression [0-9], however ranges of characters like this are actually locale - dependent. Instead we should use the POSIX standard form [[:digit:]], or the - regex++ and Perl shorthand for this \d (note that many older libraries tended - to be hard-coded to the C-locale, consequently this was not an issue for them). - That leaves us with the following regular expression to validate credit card - number formats:

-
(\d{4}[- ]){3}\d{4}
-

Here the parenthesis act to group (and mark for future reference) - sub-expressions, and the {4} means "repeat exactly 4 times". This is an example - of the extended regular expression syntax used by Perl, awk and egrep. Regex++ - also supports the older "basic" syntax used by sed and grep, but this is - generally less useful, unless you already have some basic regular expressions - that you need to reuse.

-

Now let's take that expression and place it in some C++ code to validate the - format of a credit card number:

-
bool validate_card_format(const std::string& s)
-{
-   static const boost::regex e("(\\d{4}[- ]){3}\\d{4}");
-   return regex_match(s, e);
-}
-

Note how we had to add some extra escapes to the expression: remember that the - escape is seen once by the C++ compiler, before it gets to be seen by the - regular expression engine, consequently escapes in regular expressions have to - be doubled up when embedding them in C/C++ code. Also note that all the - examples assume that your compiler supports Koenig lookup, if yours doesn't - (for example VC6), then you will have to add some boost:: prefixes to some of - the function calls in the examples.

-

Those of you who are familiar with credit card processing, will have realized - that while the format used above is suitable for human readable card numbers, - it does not represent the format required by online credit card systems; these - require the number as a string of 16 (or possibly 15) digits, without any - intervening spaces. What we need is a means to convert easily between the two - formats, and this is where search and replace comes in. Those who are familiar - with the utilities sed and Perl will already be ahead here; we - need two strings - one a regular expression - the other a "format - string" that provides a description of the text to replace the match - with. In regex++ this search and replace operation is performed with the - algorithm regex_replace, for our credit card - example we can write two algorithms like this to provide the format - conversions:

-
// match any format with the regular expression:
-const boost::regex e("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
-const std::string machine_format("\\1\\2\\3\\4");
-const std::string human_format("\\1-\\2-\\3-\\4");
-
-std::string machine_readable_card_number(const std::string s)
-{
-   return regex_replace(s, e, machine_format, boost::match_default | boost::format_sed);
-}
-
-std::string human_readable_card_number(const std::string s)
-{
-   return regex_replace(s, e, human_format, boost::match_default | boost::format_sed);
-}
-

Here we've used marked sub-expressions in the regular expression to split out - the four parts of the card number as separate fields, the format string then - uses the sed-like syntax to replace the matched text with the reformatted - version.

-

In the examples above, we haven't directly manipulated the results of a regular - expression match, however in general the result of a match contains a number of - sub-expression matches in addition to the overall match. When the library needs - to report a regular expression match it does so using an instance of the class - match_results, as before there are typedefs of this class for the most - common cases: -

-
namespace boost{
-typedef match_results<const char*> cmatch;
-typedef match_results<const wchar_t*> wcmatch;
-typedef match_results<std::string::const_iterator> smatch;
-typedef match_results<std::wstring::const_iterator> wsmatch; 
-}
-

The algorithms regex_search and regex_match - make use of match_results to report what matched; the difference between these - algorithms is that regex_match will only find - matches that consume all of the input text, where as - regex_search will search for a match anywhere within the text - being matched.

-

Note that these algorithms are not restricted to searching regular C-strings, - any bidirectional iterator type can be searched, allowing for the possibility - of seamlessly searching almost any kind of data. -

-

For search and replace operations, in addition to the algorithm - regex_replace that we have already seen, the match_results - class has a format member that takes the result of a match and a format string, - and produces a new string by merging the two.

-

For iterating through all occurences of an expression within a text, there are - two iterator types: regex_iterator will - enumerate over the match_results objects - found, while regex_token_iterator will - enumerate a series of strings (similar to perl style split operations).

-

For those that dislike templates, there is a high level wrapper class RegEx - that is an encapsulation of the lower level template code - it provides a - simplified interface for those that don't need the full power of the library, - and supports only narrow characters, and the "extended" regular expression - syntax. This class is now deprecated as it does not form part of the regular - expressions C++ standard library proposal. -

-

The POSIX API functions: regcomp, regexec, regfree - and regerror, are available in both narrow character and Unicode versions, and - are provided for those who need compatibility with these API's. -

-

Finally, note that the library now has run-time localization - support, and recognizes the full POSIX regular expression syntax - including - advanced features like multi-character collating elements and equivalence - classes - as well as providing compatibility with other regular expression - libraries including GNU and BSD4 regex packages, and to a more limited extent - Perl 5. -

-

-


-

-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/localisation.html b/doc/localisation.html deleted file mode 100644 index 31bd8663..00000000 --- a/doc/localisation.html +++ /dev/null @@ -1,808 +0,0 @@ - - - - Boost.Regex: Localisation - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Localisation

-
-

Boost.Regex Index

-
-
-
-
-

Boost.regex provides extensive support for run-time localization, the - localization model used can be split into two parts: front-end and back-end.

-

Front-end localization deals with everything which the user sees - error - messages, and the regular expression syntax itself. For example a French - application could change [[:word:]] to [[:mot:]] and \w to \m. Modifying the - front end locale requires active support from the developer, by providing the - library with a message catalogue to load, containing the localized strings. - Front-end locale is affected by the LC_MESSAGES category only.

-

Back-end localization deals with everything that occurs after the expression - has been parsed - in other words everything that the user does not see or - interact with directly. It deals with case conversion, collation, and character - class membership. The back-end locale does not require any intervention from - the developer - the library will acquire all the information it requires for - the current locale from the underlying operating system / run time library. - This means that if the program user does not interact with regular expressions - directly - for example if the expressions are embedded in your C++ code - then - no explicit localization is required, as the library will take care of - everything for you. For example embedding the expression [[:word:]]+ in your - code will always match a whole word, if the program is run on a machine with, - for example, a Greek locale, then it will still match a whole word, but in - Greek characters rather than Latin ones. The back-end locale is affected by the - LC_TYPE and LC_COLLATE categories.

-

There are three separate localization mechanisms supported by boost.regex:

-

Win32 localization model.

-

This is the default model when the library is compiled under Win32, and is - encapsulated by the traits class w32_regex_traits. When this model is in effect - each basic_regex object gets it's own LCID, by default this is the users - default setting as returned by GetUserDefaultLCID, but you can call imbue - on the basic_regex object to set it's locale to some other LCID if you wish. - All the settings used by boost.regex are acquired directly from the operating - system bypassing the C run time library. Front-end localization requires a - resource dll, containing a string table with the user-defined strings. The - traits class exports the function:

-

static std::string set_message_catalogue(const std::string& s);

-

which needs to be called with a string identifying the name of the resource - dll, before your code compiles any regular expressions (but not - necessarily before you construct any basic_regex instances):

-

- boost::w32_regex_traits<char>::set_message_catalogue("mydll.dll");

-

- The library provides full Unicode support under NT, under Windows 9x the - library degrades gracefully - characters 0 to 255 are supported, the remainder - are treated as "unknown" graphic characters.

-

C localization model.

-

This model has been deprecated in favor of the C++ localoe for all non-Windows - compilers that support it.  This locale is encapsulated by the traits - class c_regex_traits, Win32 users can force this model to take effect by - defining the pre-processor symbol BOOST_REGEX_USE_C_LOCALE. When this model is - in effect there is a single global locale, as set by setlocale. All - settings are acquired from your run time library, consequently Unicode support - is dependent upon your run time library implementation.

-

Front end localization is not supported.

-

Note that calling setlocale invalidates all compiled regular - expressions, calling setlocale(LC_ALL, "C") will make this library - behave equivalent to most traditional regular expression libraries including - version 1 of this library.

-

C++ localization model.

-

This model is the default for non-Windows compilers.

-

- When this model is in effect each instance of basic_regex<> has its own - instance of std::locale, class basic_regex<> also has a member function imbue - which allows the locale for the expression to be set on a per-instance basis. - Front end localization requires a POSIX message catalogue, which will be loaded - via the std::messages facet of the expression's locale, the traits class - exports the symbol:

-

static std::string set_message_catalogue(const std::string& s);

-

which needs to be called with a string identifying the name of the message - catalogue, before your code compiles any regular expressions (but not - necessarily before you construct any basic_regex instances):

-

- boost::cpp_regex_traits<char>::set_message_catalogue("mycatalogue");

-

Note that calling basic_regex<>::imbue will invalidate any expression - currently compiled in that instance of basic_regex<>.

-

Finally note that if you build the library with a non-default localization - model, then the appropriate pre-processor symbol (BOOST_REGEX_USE_C_LOCALE or - BOOST_REGEX_USE_CPP_LOCALE) must be defined both when you build the support - library, and when you include <boost/regex.hpp> or - <boost/cregex.hpp> in your code. The best way to ensure this is to add - the #define to <boost/regex/user.hpp>.

-

Providing a message catalogue:

-

- In order to localize the front end of the library, you need to provide the - library with the appropriate message strings contained either in a resource - dll's string table (Win32 model), or a POSIX message catalogue (C++ models). In - the latter case the messages must appear in message set zero of the catalogue. - The messages and their id's are as follows:
-  

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Message idMeaningDefault value 
 101The character used to start a sub-expression."(" 
 102The character used to end a sub-expression - declaration.")" 
 103The character used to denote an end of line - assertion."$" 
 104The character used to denote the start of line - assertion."^" 
 105The character used to denote the "match any character - expression"."." 
 106The match zero or more times repetition operator."*" 
 107The match one or more repetition operator."+" 
 108The match zero or one repetition operator."?" 
 109The character set opening character."[" 
 110The character set closing character."]" 
 111The alternation operator."|" 
 112The escape character."\\" 
 113The hash character (not currently used)."#" 
 114The range operator."-" 
 115The repetition operator opening character."{" 
 116The repetition operator closing character."}" 
 117The digit characters."0123456789" 
 118The character which when preceded by an escape - character represents the word boundary assertion."b" 
 119The character which when preceded by an escape - character represents the non-word boundary assertion."B" 
 120The character which when preceded by an escape - character represents the word-start boundary assertion."<" 
 121The character which when preceded by an escape - character represents the word-end boundary assertion.">" 
 122The character which when preceded by an escape - character represents any word character."w" 
 123The character which when preceded by an escape - character represents a non-word character."W" 
 124The character which when preceded by an escape - character represents a start of buffer assertion."`A" 
 125The character which when preceded by an escape - character represents an end of buffer assertion."'z" 
 126The newline character."\n" 
 127The comma separator."," 
 128The character which when preceded by an escape - character represents the bell character."a" 
 129The character which when preceded by an escape - character represents the form feed character."f" 
 130The character which when preceded by an escape - character represents the newline character."n" 
 131The character which when preceded by an escape - character represents the carriage return character."r" 
 132The character which when preceded by an escape - character represents the tab character."t" 
 133The character which when preceded by an escape - character represents the vertical tab character."v" 
 134The character which when preceded by an escape - character represents the start of a hexadecimal character constant."x" 
 135The character which when preceded by an escape - character represents the start of an ASCII escape character."c" 
 136The colon character.":" 
 137The equals character."=" 
 138The character which when preceded by an escape - character represents the ASCII escape character."e" 
 139The character which when preceded by an escape - character represents any lower case character."l" 
 140The character which when preceded by an escape - character represents any non-lower case character."L" 
 141The character which when preceded by an escape - character represents any upper case character."u" 
 142The character which when preceded by an escape - character represents any non-upper case character."U" 
 143The character which when preceded by an escape - character represents any space character."s" 
 144The character which when preceded by an escape - character represents any non-space character."S" 
 145The character which when preceded by an escape - character represents any digit character."d" 
 146The character which when preceded by an escape - character represents any non-digit character."D" 
 147The character which when preceded by an escape - character represents the end quote operator."E" 
 148The character which when preceded by an escape - character represents the start quote operator."Q" 
 149The character which when preceded by an escape - character represents a Unicode combining character sequence."X" 
 150The character which when preceded by an escape - character represents any single character."C" 
 151The character which when preceded by an escape - character represents end of buffer operator."Z" 
 152The character which when preceded by an escape - character represents the continuation assertion."G" 
 153The character which when preceeded by (? indicates a zero width negated - forward lookahead assert.! 
-
-
-

Custom error messages are loaded as follows: 

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Message IDError message IDDefault string 
 201REG_NOMATCH"No match" 
 202REG_BADPAT"Invalid regular expression" 
 203REG_ECOLLATE"Invalid collation character" 
 204REG_ECTYPE"Invalid character class name" 
 205REG_EESCAPE"Trailing backslash" 
 206REG_ESUBREG"Invalid back reference" 
 207REG_EBRACK"Unmatched [ or [^" 
 208REG_EPAREN"Unmatched ( or \\(" 
 209REG_EBRACE"Unmatched \\{" 
 210REG_BADBR"Invalid content of \\{\\}" 
 211REG_ERANGE"Invalid range end" 
 212REG_ESPACE"Memory exhausted" 
 213REG_BADRPT"Invalid preceding regular expression" 
 214REG_EEND"Premature end of regular expression" 
 215REG_ESIZE"Regular expression too big" 
 216REG_ERPAREN"Unmatched ) or \\)" 
 217REG_EMPTY"Empty expression" 
 218REG_E_UNKNOWN"Unknown error" 
-
-
-

Custom character class names are loaded as followed: 

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Message IDDescriptionEquivalent default class name 
 300The character class name for alphanumeric characters."alnum" 
 301The character class name for alphabetic characters."alpha" 
 302The character class name for control characters."cntrl" 
 303The character class name for digit characters."digit" 
 304The character class name for graphics characters."graph" 
 305The character class name for lower case characters."lower" 
 306The character class name for printable characters."print" 
 307The character class name for punctuation characters."punct" 
 308The character class name for space characters."space" 
 309The character class name for upper case characters."upper" 
 310The character class name for hexadecimal characters."xdigit" 
 311The character class name for blank characters."blank" 
 312The character class name for word characters."word" 
 313The character class name for Unicode characters."unicode" 
-
-
-

Finally, custom collating element names are loaded starting from message id - 400, and terminating when the first load thereafter fails. Each message looks - something like: "tagname string" where tagname is the name used inside - [[.tagname.]] and string is the actual text of the collating element. - Note that the value of collating element [[.zero.]] is used for the conversion - of strings to numbers - if you replace this with another value then that will - be used for string parsing - for example use the Unicode character 0x0660 for - [[.zero.]] if you want to use Unicode Arabic-Indic digits in your regular - expressions in place of Latin digits.

-

Note that the POSIX defined names for character classes and collating elements - are always available - even if custom names are defined, in contrast, custom - error messages, and custom syntax messages replace the default ones.

-

-
-

Revised  - - 26 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/match_flag_type.html b/doc/match_flag_type.html deleted file mode 100644 index 64f61402..00000000 --- a/doc/match_flag_type.html +++ /dev/null @@ -1,295 +0,0 @@ - - - - Boost.Regex: match_flag_type - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

match_flag_type

-
-

Boost.Regex Index

-
-
-
-
-

Synopsis

-

The type match_flag_type is an implementation specific bitmask - type (17.3.2.1.2) that controls how a regular expression is matched against a - character sequence.  The behavior of the format flags is described in more - detail in the format syntax guide.

-
-namespace boost{ namespace regex_constants{
-
-typedef implemenation-specific-bitmask-type match_flag_type;
-
-static const match_flag_type match_default = 0;
-static const match_flag_type match_not_bob;
-static const match_flag_type match_not_eob;
-static const match_flag_type match_not_bol;
-static const match_flag_type match_not_eol;
-static const match_flag_type match_not_bow;
-static const match_flag_type match_not_eow;
-static const match_flag_type match_any;
-static const match_flag_type match_not_null;
-static const match_flag_type match_continuous;
-static const match_flag_type match_partial;
-static const match_flag_type match_single_line;
-static const match_flag_type match_prev_avail;
-static const match_flag_type match_not_dot_newline;
-static const match_flag_type match_not_dot_null;
-
-static const match_flag_type format_default = 0;
-static const match_flag_type format_sed;
-static const match_flag_type format_perl;
-static const match_flag_type format_literal; 
-static const match_flag_type format_no_copy; -static const match_flag_type format_first_only; -static const match_flag_type format_all; - -} // namespace regex_constants -} // namespace boost -
-

Description

-

The type match_flag_type is an implementation specific bitmask - type (17.3.2.1.2). When matching a regular expression against a sequence of - characters [first, last) then setting its elements has the effects listed in - the table below:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Effect if set

-
-

match_default

-
-

Specifies that matching of regular expressions proceeds without any - modification of the normal rules used in ECMA-262, ECMAScript Language - Specification, Chapter 15 part 10, RegExp (Regular Expression) Objects (FWD.1)

-
match_not_bobSpecifies that the expressions "\A" and - "\`" should not match against the sub-sequence [first,first).
match_not_eobSpecifies that the expressions "\'", "\z" and - "\Z" should not match against the sub-sequence [last,last).
-

match_not_bol

-
-

Specifies that the expression "^" should not be matched against the - sub-sequence [first,first).

-
-

match_not_eol

-
-

Specifies that the expression "$" should not be matched against the - sub-sequence [last,last).

-
-

match_not_bow

-
-

Specifies that the expressions "\<" and "\b" should not be matched - against the sub-sequence [first,first).

-
-

match_not_eow

-
-

Specifies that the expressions "\>" and "\b" should not be matched - against the sub-sequence [last,last).

-
-

match_any

-
-

Specifies that if more than one match is possible then any match is an - acceptable result: this will still find the leftmost match, but may not find - the "best" match at that position.  Use this flag if you care about the - speed of matching, but don't care what was matched (only whether there is one - or not).

-
-

match_not_null

-
-

Specifies that the expression can not be matched against an empty sequence.

-
-

match_continuous

-
-

Specifies that the expression must match a sub-sequence that begins at first.

-
-

match_partial

-
-

Specifies that if no match can be found, then it is acceptable to return a - match [from, last) such that from!= last, if there could exist some longer - sequence of characters [from,to) of which [from,last) is a prefix, and which - would result in a full match.

-

This flag is used when matching incomplete or very long texts, see the - partial matches documentation for more information.

-
match_extraInstructs the matching engine to retain all available - capture information; if a capturing group is repeated then information - about every repeat is available via match_results::captures() - or sub_match_captures().
match_single_lineEquivalent to the inverse of Perl's m/ modifier; - prevents ^ from matching after an embedded newline character (so that it only - matches at the start of the text being matched), and $ from matching before an - embedded newline (so that it only matches at the end of the text being - matched).
-

match_prev_avail

-
-

Specifies that --first is a valid iterator position, when this - flag is set then the flags match_not_bol and match_not_bow - are ignored by the regular expression algorithms (RE.7) and iterators (RE.8).

-
match_not_dot_newlineSpecifies that the expression "." does not match a - newline character.  This is the inverse of Perl's s/ modifier.
match_not_dot_nullSpecified that the expression "." does not match a - character null '\0'.
-

format_default

-
-

Specifies that when a regular expression match is to be replaced by a new - string, that the new string is constructed using the rules used by the - ECMAScript replace function in ECMA-262, ECMAScript Language Specification, - Chapter 15 part 5.4.11 String.prototype.replace. (FWD.1). In addition during - search and replace operations then all non-overlapping occurrences of the - regular expression are located and replaced, and sections of the input that did - not match the expression, are copied unchanged to the output string.

-
-

format_sed

-
-

Specifies that when a regular expression match is to be replaced by a new - string, that the new string is constructed using the rules used by the Unix sed - utility in IEEE Std 1003.1-2001, Portable Operating SystemInterface (POSIX ), - Shells and Utilities..

-
-

format_perl

-
-

- Specifies that when a regular expression match is to be replaced by a new - string, that the new string is constructed using the same rules as Perl 5.

-
format_literalSpecified that when a regular expression match is to - be replaced by a new string, that the new string is a literal copy of the - replacement text.
format_allSpecifies that all syntax extensions are - enabled, including conditional (?ddexpression1:expression2) replacements: see - the format string guide for more details.
-

format_no_copy

-
-

When specified during a search and replace operation, then sections of the - character container sequence being searched that do match the regular - expression, are not copied to the output string.

-
-

format_first_only

-
-

When specified during a search and replace operation, then only the first - occurrence of the regular expression is replaced.

-
-
-
-

-
-
-
-

Revised - - 04 Feb 2004 -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/match_results.html b/doc/match_results.html deleted file mode 100644 index 6634b90a..00000000 --- a/doc/match_results.html +++ /dev/null @@ -1,459 +0,0 @@ - - - - Boost.Regex: class match_results - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

class match_results

-
-

Boost.Regex Index

-
-
-
-
-

Contents

-
-
Synopsis
Description
-
-

Synopsis

-

#include <boost/regex.hpp>

-

Regular expressions are different from many simple pattern-matching algorithms - in that as well as finding an overall match they can also produce - sub-expression matches: each sub-expression being delimited in the pattern by a - pair of parenthesis (...). There has to be some method for reporting - sub-expression matches back to the user: this is achieved this by defining a - class match_results that acts as an indexed collection of sub-expression - matches, each sub-expression match being contained in an object of type - sub_match .

-

Template class match_results denotes a collection of character sequences - representing the result of a regular expression match. Objects of type - match_results are passed to the algorithms regex_match - and regex_search, and are returned by the - iterator regex_iterator .  Storage for - the collection is allocated and freed as necessary by the member functions of - class match_results.

-

The template class match_results conforms to the requirements of a Sequence, as - specified in (lib.sequence.reqmts), except that only operations defined for - const-qualified Sequences are supported.

-

Class template match_results is most commonly used as one of the typedefs - cmatch, wcmatch, smatch, or wsmatch:

-
template <class BidirectionalIterator,
-          class Allocator = std::allocator<sub_match<BidirectionalIterator> >
-class match_results;
-
-typedef match_results<const char*> cmatch;
-typedef match_results<const wchar_t*> wcmatch;
-typedef match_results<string::const_iterator> smatch;
-typedef match_results<wstring::const_iterator> wsmatch;
-
-template <class BidirectionalIterator,
-          class Allocator = std::allocator<sub_match<BidirectionalIterator> >
-class match_results
-{ 
-public: 
-   typedef          sub_match<BidirectionalIterator>                        value_type;
-   typedef          const value_type&                                       const_reference;
-   typedef          const_reference                                         reference;
-   typedef          implementation defined                                  const_iterator;
-   typedef          const_iterator                                          iterator;
-   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
-   typedef typename Allocator::size_type                                    size_type;
-   typedef          Allocator                                               allocator_type;
-   typedef typename iterator_traits<BidirectionalIterator>::value_type      char_type;
-   typedef          basic_string<char_type>                                 string_type;
-
-   // construct/copy/destroy:
-   explicit match_results(const Allocator& a = Allocator());
-   match_results(const match_results& m);
-   match_results& operator=(const match_results& m); 
-   ~match_results();
-
-   // size:
-   size_type size() const;
-   size_type max_size() const;
-   bool empty() const;
-   // element access:
-   difference_type length(int sub = 0) const;
-   difference_type position(unsigned int sub = 0) const;
-   string_type str(int sub = 0) const;
-   const_reference operator[](int n) const;
-
-   const_reference prefix() const;
-
-   const_reference suffix() const;
-   const_iterator begin() const;
-   const_iterator end() const;
-   // format:
-   template <class OutputIterator>
-   OutputIterator format(OutputIterator out,
-                         const string_type& fmt,
-                         match_flag_type flags = format_default) const;
-   string_type format(const string_type& fmt,
-                      match_flag_type flags = format_default) const;
-
-   allocator_type get_allocator() const;
-   void swap(match_results& that);
-
-#ifdef BOOST_REGEX_MATCH_EXTRA
-   typedef typename value_type::capture_sequence_type capture_sequence_type;
-   const capture_sequence_type& captures(std::size_t i)const;
-#endif
-
-};
-
-template <class BidirectionalIterator, class Allocator>
-bool operator == (const match_results<BidirectionalIterator, Allocator>& m1,
-                  const match_results<BidirectionalIterator, Allocator>& m2);
-template <class BidirectionalIterator, class Allocator>
-bool operator != (const match_results<BidirectionalIterator, Allocator>& m1,
-                  const match_results<BidirectionalIterator, Allocator>& m2);
-
-template <class charT, class traits, class BidirectionalIterator, class Allocator>
-basic_ostream<charT, traits>&
-   operator << (basic_ostream<charT, traits>& os,
-                const match_results<BidirectionalIterator, Allocator>& m);
-
-template <class BidirectionalIterator, class Allocator>
-void swap(match_results<BidirectionalIterator, Allocator>& m1,
-          match_results<BidirectionalIterator, Allocator>& m2);
-
-

Description

-

match_results constructors

-

In all match_results constructors, a copy of the Allocator - argument is used for any memory allocation performed by the constructor or - member functions during the lifetime of the object.

-

-match_results(const Allocator& a = Allocator());
-
- -

Effects: Constructs an object of class match_results. The postconditions - of this function are indicated in the table:

-

-
- - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

true

-
-

size()

-
-

0

-
-

str()

-
-

basic_string<charT>()

-
-
-

 

-

-match_results(const match_results& m);
-
- -

Effects: Constructs an object of class match_results, as a copy of m.

-

-match_results& operator=(const match_results& m);
-
- -

Effects: Assigns m to *this. The postconditions of this function are - indicated in the table:

-

-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

empty()

-
-

m.empty().

-
-

size()

-
-

m.size().

-
-

str(n)

-
-

m.str(n) for all integers n < m.size().

-
-

prefix()

-
-

m.prefix().

-
-

suffix()

-
-

m.suffix().

-
-

(*this)[n]

-
-

m[n] for all integers n < m.size().

-
-

length(n)

-
-

m.length(n) for all integers n < m.size().

-
-

position(n)

-
-

m.position(n) for all integers n < m.size().

-
-
-

match_results size

-

-size_type size()const;
-
- -

Effects: Returns the number of sub_match elements stored in *this; that - is the number of marked sub-expressions in the regular expression that was - matched plus one.

-

-size_type max_size()const;
-
- -

Effects: Returns the maximum number of sub_match elements that can be - stored in *this.

-

-bool empty()const;
-
- -

Effects: Returns size() == 0.

-

match_results element access

-

-difference_type length(int sub = 0)const;
-
- -

Effects: Returns the length of sub-expression sub, that is to - say: (*this)[sub].length().

-

-difference_type position(unsigned int sub = 0)const;
-
- -

Effects: Returns the starting location of sub-expression sub, - or -1 if sub was not matched.  Note that if this represents a - partial match , then position() will return the location of - the partial match even though (*this)[0].matched is false.

-

-string_type str(int sub = 0)const;
-
- -

Effects: Returns sub-expression sub as a string:  string_type((*this)[sub]).

-

-const_reference operator[](int n) const;
-
- -

Effects: Returns a reference to the sub_match object - representing the character sequence that matched marked sub-expression n. - If n == 0 then returns a reference to a sub_match object - representing the character sequence that matched the whole regular - expression.  If n is out of range, or if n is an - unmatched sub-expression, then returns a sub_match object whose matched - member is false.

-

-const_reference prefix()const;
-
- -

Effects: Returns a reference to the sub_match object - representing the character sequence from the start of the string being - matched/searched, to the start of the match found.

-

-const_reference suffix()const;
-
- -

Effects: Returns a reference to the sub_match object - representing the character sequence from the end of the match found to the end - of the string being matched/searched.

-

-const_iterator begin()const;
-
- -

Effects: Returns a starting iterator that enumerates over all the marked - sub-expression matches stored in *this.

-

-const_iterator end()const;
-
- -

Effects: Returns a terminating iterator that enumerates over all the - marked sub-expression matches stored in *this.

-

match_results reformatting

-
template <class OutputIterator>
-OutputIterator format(OutputIterator out,
-                      const string_type& fmt,
-                      match_flag_type flags = format_default);
-
- -

Requires: The type OutputIterator conforms to the Output Iterator - requirements (24.1.2).

- -

Effects: Copies the character sequence [fmt.begin(), fmt.end()) to - OutputIterator out. For each format specifier or escape sequence in fmt, - replace that sequence with either the character(s) it represents, or the - sequence of characters within *this to which it refers. The bitmasks specified - in flags determines what - format specifiers or escape sequences are recognized, by default this is - the format used by ECMA-262, ECMAScript Language Specification, Chapter 15 part - 5.4.11 String.prototype.replace.

- -

Returns: out.

-

-string_type format(const string_type& fmt,
-                   match_flag_type flags = format_default);
-
- -

Effects: Returns a copy of the string fmt. For each format - specifier or escape sequence in fmt, replace that sequence with either - the character(s) it represents, or the sequence of characters within *this to - which it refers. The bitmasks specified in flags - determines what format specifiers or escape sequences - are recognized, by default this is the format used by ECMA-262, - ECMAScript Language Specification, Chapter 15 part 5.4.11 - String.prototype.replace.

-

Allocator access

-
allocator_type get_allocator()const;
-
- -

Effects: Returns a copy of the Allocator that was passed to the object's - constructor.

-

Swap

-
void swap(match_results& that);
-
- -

Effects: Swaps the contents of the two sequences.

- -

Postcondition: *this contains the sequence of matched - sub-expressions that were in that, that contains the - sequence of matched sub-expressions that were in *this.

- -

Complexity: constant time.

-

Captures

-
typedef typename value_type::capture_sequence_type capture_sequence_type;
-

Defines an implementation-specific type that satisfies the requirements of - a standard library Sequence (21.1.1 including the optional Table 68 - operations), whose value_type is a sub_match<BidirectionalIterator>. This - type happens to be std::vector<sub_match<BidirectionalIterator> >, - but you shouldn't actually rely on that.

-
const capture_sequence_type& captures(std::size_t i)const; 
-

Effects: returns a sequence containing all the captures - obtained for sub-expression i.

-

Returns: (*this)[i].captures();

-

Preconditions: the library must be built and used with - BOOST_REGEX_MATCH_EXTRA defined, and you must pass the flag - match_extra to the regex matching functions (regex_match, - regex_search, regex_iterator - or regex_token_iterator) in order for - this member function to be defined and return useful information.

-

Rationale: Enabling this feature has several consequences: -

- -

match_results non-members

-
template <class BidirectionalIterator, class Allocator>
-bool operator == (const match_results<BidirectionalIterator, Allocator>& m1,
-                  const match_results<BidirectionalIterator, Allocator>& m2);
-

Effects: Compares the two sequences for equality.

-
template <class BidirectionalIterator, class Allocator>
-bool operator != (const match_results<BidirectionalIterator, Allocator>& m1,
-                  const match_results<BidirectionalIterator, Allocator>& m2);
-

Effects: Compares the two sequences for inequality.

-
template <class charT, class traits, class BidirectionalIterator, class Allocator>
-basic_ostream<charT, traits>&
-   operator << (basic_ostream<charT, traits>& os,
-                const match_results<BidirectionalIterator, Allocator>& m);
-

Effects: Writes the contents of m to the stream os as - if by calling os << m.str(); Returns os..

-
template <class BidirectionalIterator, class Allocator>
-void swap(match_results<BidirectionalIterator, Allocator>& m1,
-          match_results<BidirectionalIterator, Allocator>& m2);
-

Effects: Swaps the contents of the two sequences.

-

-
-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/mfc_strings.html b/doc/mfc_strings.html deleted file mode 100644 index f1d733b7..00000000 --- a/doc/mfc_strings.html +++ /dev/null @@ -1,294 +0,0 @@ - - - - Boost.Regex: Working With MFC/ATL String Types - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Working With MFC/ATL String Types.

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Introduction
Types
Regular - Expression Creation
Overloaded Algorithms -
-
-
regex_match
regex_search -
regex_replace
-
-
Iterators -
-
-
regex_iterator creation helper
- regex_token_iterator creation helpers
-
-
-
-

Introduction

-

The header <boost/regex/mfc.hpp> provides Boost.Regex support for MFC - string types: note that this support requires Visual Studio .NET (Visual C++ 7) - or later, where all of the MFC and ATL string types are based around - the CSimpleStringT class template. 

-

In the following documentation, whenever you see CSimpleStringT<charT>, - then you can substitute any of the following MFC/ATL types (all of which - inherit from CSimpleStringT):

-

CString
- CStringA
- CStringW
- CAtlString
- CAtlStringA
- CAtlStringW
- CStringT<charT,traits>
- CFixedStringT<charT,N>
- CSimpleStringT<charT>

-

Types

-

The following typedefs are provided for the convenience of those working with - TCHAR's:

-
typedef basic_regex<TCHAR> tregex; 
-typedef match_results<TCHAR const*> tmatch; 
-typedef regex_iterator<TCHAR const*> tregex_iterator; 
-typedef regex_token_iterator<TCHAR const*> tregex_token_iterator; 
-
-

If you are working with explicitly narrow or wide characters rather than TCHAR, - then use the regular Boost.Regex types instead.

-

Regular Expression Creation

-

The following helper function is available to assist in the creation of a - regular expression from an MFC/ATL string type:

-
template <class charT>
-basic_regex<charT> 
-   make_regex(const ATL::CSimpleStringT<charT>& s, 
-              ::boost::regex_constants::syntax_option_type f = boost::regex_constants::normal);
-

Effects: returns basic_regex<charT>(s.GetString(), - s.GetString() + s.GetLength(), f);

-

Overloaded Algorithms

-

For each regular expression algorithm that's overloaded for a std::basic_string - argument, there is also one overloaded for the MFC/ATL string types.  - These algorithm signatures all look a lot more complex than they actually - are, but for completeness here they are anyway:

-

regex_match

-

There are two overloads, the first reports what matched in a match_results - structure, the second does not.  -

-

All the usual caveats for regex_match apply, in - particular the algorithm will only report a successful match if all of the - input text matches the expression, if this isn't what you want then - use regex_search instead.

-
template <class charT, class T, class A>
-bool regex_match(
-   const ATL::CSimpleStringT<charT>& s, 
-   match_results<const B*, A>& what, 
-   const basic_regex<charT, T>& e, 
-   boost::regex_constants::match_flag_type f = boost::regex_constants::match_default); 
-

-

Effects: returns ::boost::regex_match(s.GetString(), - s.GetString() + s.GetLength(), what, e, f);

-

Example:

-
//
-// Extract filename part of a path from a CString and return the result
-// as another CString:
-//
-CString get_filename(const CString& path)
-{
-   boost::tregex r(__T("(?:\\A|.*\\\\)([^\\\\]+)"));
-   boost::tmatch what;
-   if(boost::regex_match(path, what, r))
-   {
-      // extract $1 as a CString:
-      return CString(what[1].first, what.length(1));
-   }
-   else
-   {
-      throw std::runtime_error("Invalid pathname");
-   }
-}
-      
-
-
template <class charT, class T>
-bool regex_match(
-   const ATL::CSimpleStringT<charT>& s,
-   const basic_regex<B, T>& e,
-   boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
-

-

Effects: returns ::boost::regex_match(s.GetString(), - s.GetString() + s.GetLength(), e, f);

-

Example:

-
//
-// Find out if *password* meets our password requirements,
-// as defined by the regular expression *requirements*.
-//
-bool is_valid_password(const CString& password, const CString& requirements)
-{
-   return boost::regex_match(password, boost::make_regex(requirements));
-}      
-
-

regex_search

-

There are two additional overloads for regex_search, - the first reports what matched the second does not:

-
template <class charT, class A, class T>
-bool regex_search(const ATL::CSimpleStringT<charT>& s,
-                  match_results<const charT*, A>& what,
-                  const basic_regex<charT, T>& e,
-                  boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
-

Effects: returns ::boost::regex_search(s.GetString(), - s.GetString() + s.GetLength(), what, e, f);

-

Example:: Postcode extraction from an address string.

-
CString extract_postcode(const CString& address)
-{
-   // searches throw address for a UK postcode and returns the result,
-   // the expression used is by Phil A. on www.regxlib.com:
-   boost::tregex r(__T("^(([A-Z]{1,2}[0-9]{1,2})|([A-Z]{1,2}[0-9][A-Z]))\\s?([0-9][A-Z]{2})$"));
-   boost::tmatch what;
-   if(boost::regex_search(address, what, r))
-   {
-      // extract $0 as a CString:
-      return CString(what[0].first, what.length());
-   }
-   else
-   {
-      throw std::runtime_error("No postcode found");
-   }
-}      
-
-
template <class charT, class T>
-inline bool regex_search(const ATL::CSimpleStringT<charT>& s,
-                 const basic_regex<charT, T>& e,
-                 boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
-
-

Effects: returns ::boost::regex_search(s.GetString(), - s.GetString() + s.GetLength(), e, f);

-
-

regex_replace

-

There are two additional overloads for regex_replace, - the first sends output to an output iterator, while the second creates a new - string

-
template <class OutputIterator, class BidirectionalIterator, class traits, class
-          charT>
-OutputIterator regex_replace(OutputIterator out,
-                           BidirectionalIterator first,
-                           BidirectionalIterator last,
-                           const basic_regex<charT, traits>& e,
-                           const ATL::CSimpleStringT<charT>& fmt,
-                           match_flag_type flags = match_default)
-
-

Effects: returns ::boost::regex_replace(out, - first, last, e, fmt.GetString(), flags);

-
template <class traits, charT>
-ATL::CSimpleStringT<charT> regex_replace(const ATL::CSimpleStringT<charT>& s,
-                            const basic_regex<charT, traits>& e,
-                            const ATL::CSimpleStringT<charT>& fmt,
-                            match_flag_type flags = match_default)
-

Effects: returns a new string created using - regex_replace, and the same memory manager as string s.

-

Example:

-
//
-// Take a credit card number as a string of digits, 
-// and reformat it as a human readable string with "-"
-// separating each group of four digits:
-//
-const boost::tregex e(__T("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"));
-const CString human_format = __T("$1-$2-$3-$4");
-
-CString human_readable_card_number(const CString& s)
-{
-   return boost::regex_replace(s, e, human_format);
-}
-      
-

Iterators

-

The following helper functions are provided to ease the conversion from an - MFC/ATL string to a regex_iterator or - regex_token_iterator:

-

regex_iterator creation helper

-
template <class charT>
-regex_iterator<charT const*> 
-   make_regex_iterator(
-      const ATL::CSimpleStringT<charT>& s, 
-      const basic_regex<charT>& e, 
-      ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
-
-

Effects:returns regex_iterator(s.GetString(), - s.GetString() + s.GetLength(), e, f);

-

Example:

-
void enumerate_links(const CString& html)
-{
-   // enumerate and print all the  links in some HTML text,
-   // the expression used is by Andew Lee on www.regxlib.com:
-   boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
-   boost::tregex_iterator i(boost::make_regex_iterator(html, r)), j;
-   while(i != j)
-   {
-      std::cout << (*i)[1] << std::endl;
-      ++i;
-   }
-}
-      
-
-

regex_token_iterator creation helpers

-
template <class charT> 
-regex_token_iterator<charT const*> 
-   make_regex_token_iterator(
-      const ATL::CSimpleStringT<charT>& s, 
-      const basic_regex<charT>& e, 
-      int sub = 0, 
-      ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
-
-

Effects:returns regex_token_iterator(s.GetString(), - s.GetString() + s.GetLength(), e, sub, f);

-
template <class charT> 
-regex_token_iterator<charT const*> 
-   make_regex_token_iterator(
-      const ATL::CSimpleStringT<charT>& s, 
-      const basic_regex<charT>& e, 
-      const std::vector<int>& subs, 
-      ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
-
-

Effects:returns regex_token_iterator(s.GetString(), - s.GetString() + s.GetLength(), e, subs, f);

-
template <class charT, std::size_t N> 
-regex_token_iterator<charT const*> 
-   make_regex_token_iterator(
-      const ATL::CSimpleStringT<charT>& s, 
-      const basic_regex<charT>& e, 
-      const int (& subs)[N], 
-      ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
-
-

Effects: returns regex_token_iterator(s.GetString(), - s.GetString() + s.GetLength(), e, subs, f);

-

Example:

-
void enumerate_links2(const CString& html)
-{
-   // enumerate and print all the  links in some HTML text,
-   // the expression used is by Andew Lee on www.regxlib.com:
-   boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
-   boost::tregex_token_iterator i(boost::make_regex_token_iterator(html, r, 1)), j;
-   while(i != j)
-   {
-      std::cout << *i << std::endl;
-      ++i;
-   }
-}      
-
-

Revised  - - 21 Dec 2004 -

-

© Copyright John Maddock 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/non_standard_strings.html b/doc/non_standard_strings.html deleted file mode 100644 index 5196abcf..00000000 --- a/doc/non_standard_strings.html +++ /dev/null @@ -1,53 +0,0 @@ - - - - Boost.Regex: Working With Non-Standard String Types - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Working With Non-Standard String Types.

-
-

Boost.Regex Index

-
-

-
-

-

The Boost.Regex algorithms and iterators are all iterator-based, with - convenience overloads of the algorithms provided that convert standard library - string types to iterator pairs internally.  If you want to search a - non-standard string type then the trick is to convert that string into an - iterator pair: so far I haven't come across any string types that can't be - handled this way, even if they're not officially iterator based.  - Certainly any string type that provides access to it's internal buffer, along - with it's length, can be converted into a pair of pointers (which can be used - as iterators).

-

Some non-standard string types are sufficiently common that wappers have been - provided for them:

-

MFC/ATL Strings.
- ICU Strings.

-

-


-

-

Revised - - 24 Nov 2004 -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/partial_matches.html b/doc/partial_matches.html deleted file mode 100644 index f523fc9a..00000000 --- a/doc/partial_matches.html +++ /dev/null @@ -1,195 +0,0 @@ - - - - Boost.Regex: Partial Matches - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Partial Matches

-
-

Boost.Regex Index

-
-

-
-

-

The match-flag match_partial can - be passed to the following algorithms: regex_match, - regex_search, and regex_grep, - and used with the iterator regex_iterator. - When used it indicates that partial as well as full matches should be found. A - partial match is one that matched one or more characters at the end of the text - input, but did not match all of the regular expression (although it may have - done so had more input been available). Partial matches are typically used when - either validating data input (checking each character as it is entered on the - keyboard), or when searching texts that are either too long to load into memory - (or even into a memory mapped file), or are of indeterminate length (for - example the source may be a socket or similar). Partial and full matches can be - differentiated as shown in the following table (the variable M represents an - instance of match_results<> as filled in - by regex_match, regex_search or regex_grep):
-

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ResultM[0].matchedM[0].firstM[0].second
No matchFalseUndefinedUndefinedUndefined
Partial matchTrueFalseStart of partial match.End of partial match (end of text).
Full matchTrueTrueStart of full match.End of full match.
-

-

Be aware that using partial matches can sometimes result in somewhat imperfect - behavior:

- -

The following example - tests to see whether the text could be a valid credit card number, as the user - presses a key, the character entered would be added to the string being built - up, and passed to is_possible_card_number. If this returns true - then the text could be a valid card number, so the user interface's OK button - would be enabled. If it returns false, then this is not yet a valid card - number, but could be with more input, so the user interface would disable the - OK button. Finally, if the procedure throws an exception the input could never - become a valid number, and the inputted character must be discarded, and a - suitable error indication displayed to the user.

-
#include <string>
-#include <iostream>
-#include <boost/regex.hpp>
-
-boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
-
-bool is_possible_card_number(const std::string& input)
-{
-   //
-   // return false for partial match, true for full match, or throw for
-   // impossible match based on what we have so far...
-   boost::match_results<std::string::const_iterator> what;
-   if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
-   {
-      // the input so far could not possibly be valid so reject it:
-      throw std::runtime_error("Invalid data entered - this could not possibly be a valid card number");
-   }
-   // OK so far so good, but have we finished?
-   if(what[0].matched)
-   {
-      // excellent, we have a result:
-      return true;
-   }
-   // what we have so far is only a partial match...
-   return false;
-}
-

In the following example, - text input is taken from a stream containing an unknown amount of text; this - example simply counts the number of html tags encountered in the stream. The - text is loaded into a buffer and searched a part at a time, if a partial match - was encountered, then the partial match gets searched a second time as the - start of the next batch of text:

-
#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <boost/regex.hpp>
-
-// match some kind of html tag:
-boost::regex e("<[^>]*>");
-// count how many:
-unsigned int tags = 0;
-// saved position of partial match:
-char* next_pos = 0;
-
-bool grep_callback(const boost::match_results<char*>& m)
-{
-   if(m[0].matched == false)
-   {
-      // save position and return:
-      next_pos = m[0].first;
-   }
-   else
-      ++tags;
-   return true;
-}
-
-void search(std::istream& is)
-{
-   char buf[4096];
-   next_pos = buf + sizeof(buf);
-   bool have_more = true;
-   while(have_more)
-   {
-      // how much do we copy forward from last try:
-      unsigned leftover = (buf + sizeof(buf)) - next_pos;
-      // and how much is left to fill:
-      unsigned size = next_pos - buf;
-      // copy forward whatever we have left:
-      memcpy(buf, next_pos, leftover);
-      // fill the rest from the stream:
-      unsigned read = is.readsome(buf + leftover, size);
-      // check to see if we've run out of text:
-      have_more = read == size;
-      // reset next_pos:
-      next_pos = buf + sizeof(buf);
-      // and then grep:
-      boost::regex_grep(grep_callback,
-                        buf,
-                        buf + read + leftover,
-                        e,
-                        boost::match_default | boost::match_partial);
-   }
-}
-

-


-

-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/performance.html b/doc/performance.html deleted file mode 100644 index c7897ff3..00000000 --- a/doc/performance.html +++ /dev/null @@ -1,52 +0,0 @@ - - - - Boost.Regex: Performance - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Performance

-
-

Boost.Regex Index

-
-

-
-

-

The performance of Boost.regex in both recursive and non-recursive modes should - be broadly comparable to other regular expression libraries: recursive mode is - slightly faster (especially where memory allocation requires thread - synchronisation), but not by much.  The following pages compare - Boost.regex with various other regular expression libraries for the following - compilers:

-

Visual Studio.Net 2003 (recursive Boost.regex - implementation).

-

Gcc 3.2 (cygwin) (non-recursive Boost.regex - implementation).

-

-


-

-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/posix_api.html b/doc/posix_api.html deleted file mode 100644 index 967b8407..00000000 --- a/doc/posix_api.html +++ /dev/null @@ -1,286 +0,0 @@ - - - - Boost.Regex: POSIX API Compatibility Functions - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

POSIX API Compatibility Functions

-
-

Boost.Regex Index

-
-

-
-

-
#include <boost/cregex.hpp>
-or:
-#include <boost/regex.h>
-

The following functions are available for users who need a POSIX compatible C - library, they are available in both Unicode and narrow character versions, the - standard POSIX API names are macros that expand to one version or the other - depending upon whether UNICODE is defined or not. -

-

Important: Note that all the symbols defined here are enclosed inside - namespace boost when used in C++ programs, unless you use #include - <boost/regex.h> instead - in which case the symbols are still defined in - namespace boost, but are made available in the global namespace as well.

-

The functions are defined as: -

-
extern "C" {
-int regcompA(regex_tA*, const char*, int);
-unsigned int regerrorA(int, const regex_tA*, char*, unsigned int);
-int regexecA(const regex_tA*, const char*, unsigned int, regmatch_t*, int);
-void regfreeA(regex_tA*);
-
-int regcompW(regex_tW*, const wchar_t*, int);
-unsigned int regerrorW(int, const regex_tW*, wchar_t*, unsigned int);
-int regexecW(const regex_tW*, const wchar_t*, unsigned int, regmatch_t*, int);
-void regfreeW(regex_tW*);
-
-#ifdef UNICODE
-#define regcomp regcompW
-#define regerror regerrorW
-#define regexec regexecW
-#define regfree regfreeW
-#define regex_t regex_tW
-#else
-#define regcomp regcompA
-#define regerror regerrorA
-#define regexec regexecA
-#define regfree regfreeA
-#define regex_t regex_tA
-#endif
-}
-

All the functions operate on structure regex_t, which exposes two public - members: -

-

unsigned int re_nsub this is filled in by regcomp and indicates - the number of sub-expressions contained in the regular expression. -

-

const TCHAR* re_endp points to the end of the expression to compile when - the flag REG_PEND is set. -

-

Footnote: regex_t is actually a #define - it is either regex_tA or regex_tW - depending upon whether UNICODE is defined or not, TCHAR is either char or - wchar_t again depending upon the macro UNICODE. -

-

regcomp

-

regcomp takes a pointer to a regex_t, a pointer to the expression - to compile and a flags parameter which can be a combination of: -
-   -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 REG_EXTENDEDCompiles modern regular expressions. Equivalent to - regbase::char_classes | regbase::intervals | regbase::bk_refs. 
 REG_BASICCompiles basic (obsolete) regular expression syntax. - Equivalent to regbase::char_classes | regbase::intervals | regbase::limited_ops - | regbase::bk_braces | regbase::bk_parens | regbase::bk_refs. 
 REG_NOSPECAll characters are ordinary, the expression is a - literal string. 
 REG_ICASECompiles for matching that ignores character case. 
 REG_NOSUBHas no effect in this library. 
 REG_NEWLINEWhen this flag is set a dot does not match the - newline character. 
 REG_PENDWhen this flag is set the re_endp parameter of the - regex_t structure must point to the end of the regular expression to compile. 
 REG_NOCOLLATEWhen this flag is set then locale dependent collation - for character ranges is turned off. 
 REG_ESCAPE_IN_LISTS
- , , , -
When this flag is set, then escape sequences are - permitted in bracket expressions (character sets). 
 REG_NEWLINE_ALT When this flag is set then the newline character is - equivalent to the alternation operator |. 
 REG_PERL Compiles Perl like regular expressions. 
 REG_AWKA shortcut for awk-like behavior: REG_EXTENDED | - REG_ESCAPE_IN_LISTS 
 REG_GREPA shortcut for grep like behavior: REG_BASIC | - REG_NEWLINE_ALT 
 REG_EGREP A shortcut for egrep like behavior: - REG_EXTENDED | REG_NEWLINE_ALT 
-

-

regerror

-

regerror takes the following parameters, it maps an error code to a human - readable string: -
-

-

- - - - - - - - - - - - - - - - - - - - - - - - - -
 int codeThe error code. 
 const regex_t* eThe regular expression (can be null). 
 char* bufThe buffer to fill in with the error message. 
 unsigned int buf_sizeThe length of buf. 
-

-

If the error code is OR'ed with REG_ITOA then the message that results is the - printable name of the code rather than a message, for example "REG_BADPAT". If - the code is REG_ATIO then e must not be null and e->re_pend must - point to the printable name of an error code, the return value is then the - value of the error code. For any other value of code, the return value - is the number of characters in the error message, if the return value is - greater than or equal to buf_size then regerror will have to be - called again with a larger buffer.

-

regexec

-

regexec finds the first occurrence of expression e within string buf. - If len is non-zero then *m is filled in with what matched the - regular expression, m[0] contains what matched the whole string, m[1] - the first sub-expression etc, see regmatch_t in the header file - declaration for more details. The eflags parameter can be a combination - of: -
-   -

-

- - - - - - - - - - - - - - - - - - - -
 REG_NOTBOLParameter buf does not represent the start of - a line. 
 REG_NOTEOLParameter buf does not terminate at the end of - a line. 
 REG_STARTENDThe string searched starts at buf + pmatch[0].rm_so - and ends at buf + pmatch[0].rm_eo. 
-

-

regfree

-

Finally regfree frees all the memory that was allocated by regcomp. -

-

Footnote: this is an abridged reference to the POSIX API functions, it is - provided for compatibility with other libraries, rather than an API to be used - in new code (unless you need access from a language other than C++). This - version of these functions should also happily coexist with other versions, as - the names used are macros that expand to the actual function names. -

-


-

-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/redistributables.html b/doc/redistributables.html deleted file mode 100644 index cdad4739..00000000 --- a/doc/redistributables.html +++ /dev/null @@ -1,55 +0,0 @@ - - - - Boost.Regex: Redistributables and Library Names - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Redistributables and Library Names

-
-

Boost.Regex Index

-
-

-
-

-

If you are using Microsoft or Borland C++ and link to a dll version of the run - time library, then you can choose to also link to a dll version of boost.regex - by defining the symbol BOOST_REGEX_DYN_LINK when you compile your code. While - these dll's are redistributable, there are no "standard" versions, so when - installing on the users PC, you should place these in a directory private to - your application, and not in the PC's directory path. Note that if you link to - a static version of your run time library, then you will also link to a static - version of boost.regex and no dll's will need to be distributed. The possible - boost.regex dll and library names are computed according to the - formula given in the getting started guide. -

-

Note: you can disable automatic library selection by defining the symbol - BOOST_REGEX_NO_LIB when compiling, this is useful if you want to build - Boost.Regex yourself in your IDE, or if you need to debug boost.regex. -

-

-


-

-

Revised  - - 28 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/reg_expression.html b/doc/reg_expression.html deleted file mode 100644 index 15962278..00000000 --- a/doc/reg_expression.html +++ /dev/null @@ -1,44 +0,0 @@ - - - - Boost.Regex: Class reg_expression (deprecated) - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Class reg_expression (deprecated)

-
-

Boost.Regex Index

-
-

-
-

-

The use of class template reg_expression is deprecated: use - basic_regex instead.

-

-


-

-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/regbase.html b/doc/regbase.html deleted file mode 100644 index e2cf5c54..00000000 --- a/doc/regbase.html +++ /dev/null @@ -1,82 +0,0 @@ - - - - -Boost.Regex: regbase - - - - -

- - - - - - - -
-

-"C++

-
-

Boost.Regex

- -

regbase

-
-

-"Boost.Regex

-
- -
-
- - -
-

Use of the type boost::regbase is now deprecated, -and the type does not form a part of the -regular expression standardization proposal.  This type -still exists as a base class of boost::basic_regex, -and you can still refer to -boost::regbase::constant_name in your code, however for -maximum portability to other std regex implementations you should -instead use either:

- -
-boost::regex_constants::constant_name
-
- -

or

- -
-boost::regex::constant_name
-
- -

or

- -
-boost::wregex::constant_name
-
- -

- -
-
-
- - -

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - - diff --git a/doc/regex.html b/doc/regex.html deleted file mode 100644 index 7a5f29de..00000000 --- a/doc/regex.html +++ /dev/null @@ -1,481 +0,0 @@ - - - - Boost.Regex: class RegEx (deprecated) - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

class RegEx (deprecated)

-
-

Boost.Regex Index

-
-
-
-
-

The high level wrapper class RegEx is now deprecated and does not form a part - of the regular - expression standardization proposal.  This type still exists, and - existing code will continue to compile, however the following documentation is - unlikely to be further updated.

-
-#include <boost/cregex.hpp>
-
-

The class RegEx provides a high level simplified interface to the regular - expression library, this class only handles narrow character strings, and - regular expressions always follow the "normal" syntax - that is the same as the - perl / ECMAScript synatx.

-
-typedef bool (*GrepCallback)(const RegEx& expression);
-typedef bool (*GrepFileCallback)(const char* file, const RegEx& expression);
-typedef bool (*FindFilesCallback)(const char* file);
-
-class  RegEx
-{
-public:
-   RegEx();
-   RegEx(const RegEx& o);
-   ~RegEx();
-   RegEx(const char* c, bool icase = false);
-   explicit RegEx(const std::string& s, bool icase = false);
-   RegEx& operator=(const RegEx& o);
-   RegEx& operator=(const char* p);
-   RegEx& operator=(const std::string& s);
-   unsigned int SetExpression(const char* p, bool icase = false);
-   unsigned int SetExpression(const std::string& s, bool icase = false);
-   std::string Expression()const;
-   //
-   // now matching operators: 
-   // 
-   bool Match(const char* p, boost::match_flag_type flags = match_default);
-   bool Match(const std::string& s, boost::match_flag_type flags = match_default); 
-   bool Search(const char* p, boost::match_flag_type flags = match_default); 
-   bool Search(const std::string& s, boost::match_flag_type flags = match_default); 
-   unsigned int Grep(GrepCallback cb, const char* p, boost::match_flag_type flags = match_default); 
-   unsigned int Grep(GrepCallback cb, const std::string& s, boost::match_flag_type flags = match_default); 
-   unsigned int Grep(std::vector<std::string>& v, const char* p, boost::match_flag_type flags = match_default); 
-   unsigned int Grep(std::vector<std::string>& v, const std::string& s, boost::match_flag_type flags = match_default); 
-   unsigned int Grep(std::vector<unsigned int>& v, const char* p, boost::match_flag_type flags = match_default); 
-   unsigned int Grep(std::vector<unsigned int>& v, const std::string& s, boost::match_flag_type flags = match_default); 
-   unsigned int GrepFiles(GrepFileCallback cb, const char* files, bool recurse = false, boost::match_flag_type flags = match_default); 
-   unsigned int GrepFiles(GrepFileCallback cb, const std::string& files, bool recurse = false, boost::match_flag_type flags = match_default); 
-   unsigned int FindFiles(FindFilesCallback cb, const char* files, bool recurse = false, boost::match_flag_type flags = match_default); 
-   unsigned int FindFiles(FindFilesCallback cb, const std::string& files, bool recurse = false, boost::match_flag_type flags = match_default); 
-   std::string Merge(const std::string& in, const std::string& fmt, bool copy = true, boost::match_flag_type flags = match_default); 
-   std::string Merge(const char* in, const char* fmt, bool copy = true, boost::match_flag_type flags = match_default); 
-   unsigned Split(std::vector<std::string>& v, std::string& s, boost::match_flag_type flags = match_default, unsigned max_count = ~0); 
-   // 
-   // now operators for returning what matched in more detail: 
-   // 
-   unsigned int Position(int i = 0)const; 
-   unsigned int Length(int i = 0)const; 
-   bool Matched(int i = 0)const;
-   unsigned int Line()const; 
-   unsigned int Marks() const; 
-   std::string What(int i)const; 
-   std::string operator[](int i)const ; 
-
-   static const unsigned int npos;
-};    
-
-

Member functions for class RegEx are defined as follows:
-  

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 RegEx();Default constructor, constructs an instance of RegEx - without any valid expression. 
 RegEx(const RegEx& o);Copy constructor, all the properties of parameter o - are copied. 
 RegEx(const char* c, bool icase - = false);Constructs an instance of RegEx, setting the - expression to c, if icase is true then matching is - insensitive to case, otherwise it is sensitive to case. Throws bad_expression - on failure. 
 RegEx(const std::string& s, bool icase - = false);Constructs an instance of RegEx, setting the - expression to s, if icase is true then matching is - insensitive to case, otherwise it is sensitive to case. Throws bad_expression - on failure. 
 RegEx& operator=(const RegEx& - o);Default assignment operator. 
 RegEx& operator=(const char* - p);Assignment operator, equivalent to calling SetExpression(p, - false). Throws bad_expression on failure. 
 RegEx& operator=(const std::string& - s);Assignment operator, equivalent to calling SetExpression(s, - false). Throws bad_expression on failure. 
 unsigned int SetExpression(constchar* - p, bool icase = false);Sets the current expression to p, if icase - is true then matching is insensitive to case, otherwise it is sensitive - to case. Throws bad_expression on failure. 
 unsigned int SetExpression(const - std::string& s, bool icase = false);Sets the current expression to s, if icase - is true then matching is insensitive to case, otherwise it is sensitive - to case. Throws bad_expression on failure. 
 std::string Expression()const;Returns a copy of the current regular expression. 
 bool Match(const char* p, - boost::match_flag_type flags = match_default);Attempts to match the current expression against the - text p using the match flags flags - see - match flags. Returns true if the expression matches the whole of - the input string. 
 bool Match(const std::string& s, - boost::match_flag_type flags = match_default) ;Attempts to match the current expression against the - text s using the match flags flags - see - match flags. Returns true if the expression matches the whole of - the input string. 
 bool Search(const char* p, - boost::match_flag_type flags = match_default);Attempts to find a match for the current expression - somewhere in the text p using the match flags flags - see - match flags. Returns true if the match succeeds. 
 bool Search(const std::string& s, - boost::match_flag_type flags = match_default) ;Attempts to find a match for the current expression - somewhere in the text s using the match flags flags - see - match flags. Returns true if the match succeeds. 
 unsigned int Grep(GrepCallback cb, const - char* p, boost::match_flag_type flags = match_default);Finds all matches of the current expression in the - text p using the match flags flags - see - match flags. For each match found calls the call-back function cb - as: cb(*this); -

If at any stage the call-back function returns false then the grep operation - terminates, otherwise continues until no further matches are found. Returns the - number of matches found.

-
 
 unsigned int Grep(GrepCallback cb, const - std::string& s, boost::match_flag_type flags = match_default);Finds all matches of the current expression in the - text s using the match flags flags - see - match flags. For each match found calls the call-back function cb - as: cb(*this); -

If at any stage the call-back function returns false then the grep operation - terminates, otherwise continues until no further matches are found. Returns the - number of matches found.

-
 
 unsigned int Grep(std::vector<std::string>& - v, const char* p, boost::match_flag_type flags = match_default);Finds all matches of the current expression in the - text p using the match flags flags - see - match flags. For each match pushes a copy of what matched onto v. - Returns the number of matches found. 
 unsigned int Grep(std::vector<std::string>& - v, const std::string& s, boost::match_flag_type flags = - match_default);Finds all matches of the current expression in the - text s using the match flags flags - see - match flags. For each match pushes a copy of what matched onto v. - Returns the number of matches found. 
 unsigned int Grep(std::vector<unsigned - int>& v, const char* p, boost::match_flag_type - flags = match_default);Finds all matches of the current expression in the - text p using the match flags flags - see - match flags. For each match pushes the starting index of what matched - onto v. Returns the number of matches found. 
 unsigned int Grep(std::vector<unsigned - int>& v, const std::string& s, boost::match_flag_type - flags = match_default);Finds all matches of the current expression in the - text s using the match flags flags - see - match flags. For each match pushes the starting index of what matched - onto v. Returns the number of matches found. 
 unsigned int GrepFiles(GrepFileCallback - cb, const char* files, bool recurse = false, - boost::match_flag_type flags = match_default);Finds all matches of the current expression in the - files files using the match flags flags - see - match flags. For each match calls the call-back function cb.  -

If the call-back returns false then the algorithm returns without considering - further matches in the current file, or any further files. 

-

The parameter files can include wild card characters '*' and '?', if the - parameter recurse is true then searches sub-directories for matching - file names. 

-

Returns the total number of matches found.

-

May throw an exception derived from std::runtime_error if file io fails.

-
 
 unsigned int GrepFiles(GrepFileCallback - cb, const std::string& files, bool recurse = false, - boost::match_flag_type flags = match_default);Finds all matches of the current expression in the - files files using the match flags flags - see - match flags. For each match calls the call-back function cb.  -

If the call-back returns false then the algorithm returns without considering - further matches in the current file, or any further files. 

-

The parameter files can include wild card characters '*' and '?', if the - parameter recurse is true then searches sub-directories for matching - file names. 

-

Returns the total number of matches found.

-

May throw an exception derived from std::runtime_error if file io fails.

-
 
 unsigned int FindFiles(FindFilesCallback - cb, const char* files, bool recurse = false, - boost::match_flag_type flags = match_default);Searches files to find all those which contain - at least one match of the current expression using the match flags flags - - see match flags. For each matching file - calls the call-back function cb.  -

If the call-back returns false then the algorithm returns without considering - any further files. 

-

The parameter files can include wild card characters '*' and '?', if the - parameter recurse is true then searches sub-directories for matching - file names. 

-

Returns the total number of files found.

-

May throw an exception derived from std::runtime_error if file io fails.

-
 
 unsigned int FindFiles(FindFilesCallback - cb, const std::string& files, bool recurse = false, - boost::match_flag_type flags = match_default);Searches files to find all those which contain - at least one match of the current expression using the match flags flags - - see match flags. For each matching file - calls the call-back function cb.  -

If the call-back returns false then the algorithm returns without considering - any further files. 

-

The parameter files can include wild card characters '*' and '?', if the - parameter recurse is true then searches sub-directories for matching - file names. 

-

Returns the total number of files found.

-

May throw an exception derived from std::runtime_error if file io fails.

-
 
 std::string Merge(const std::string& in, const - std::string& fmt, bool copy = true, boost::match_flag_type - flags = match_default);Performs a search and replace operation: searches - through the string in for all occurrences of the current expression, for - each occurrence replaces the match with the format string fmt. Uses flags - to determine what gets matched, and how the format string should be treated. If - copy is true then all unmatched sections of input are copied unchanged - to output, if the flag format_first_only is set then only the first - occurance of the pattern found is replaced. Returns the new string. See - also format string syntax, match flags - and format flags. 
 std::string Merge(const char* in, const - char* fmt, bool copy = true, boost::match_flag_type flags = - match_default);Performs a search and replace operation: searches - through the string in for all occurrences of the current expression, for - each occurrence replaces the match with the format string fmt. Uses flags - to determine what gets matched, and how the format string should be treated. If - copy is true then all unmatched sections of input are copied unchanged - to output, if the flag format_first_only is set then only the first - occurance of the pattern found is replaced. Returns the new string. See - also format string syntax, match flags - and format flags. 
 unsigned Split(std::vector<std::string>& v, - std::string& s, boost::match_flag_type flags = match_default, unsigned - max_count = ~0);Splits the input string and pushes each one onto the vector. If - the expression contains no marked sub-expressions, then one string is outputted - for each section of the input that does not match the expression. If the - expression does contain marked sub-expressions, then outputs one string for - each marked sub-expression each time a match occurs. Outputs no more than max_count - strings. Before returning, deletes from the input string s all of the - input that has been processed (all of the string if max_count was not - reached). Returns the number of strings pushed onto the vector. 
 unsigned int Position(int i = 0)const;Returns the position of what matched sub-expression i. - If i = 0 then returns the position of the whole match. Returns - RegEx::npos if the supplied index is invalid, or if the specified - sub-expression did not participate in the match. 
 unsigned int Length(int i = 0)const;Returns the length of what matched sub-expression i. - If i = 0 then returns the length of the whole match. Returns RegEx::npos - if the supplied index is invalid, or if the specified sub-expression did not - participate in the match. 
 bool Matched(int i = 0)const;Returns true if sub-expression i was matched, false otherwise. 
 unsigned int Line()const;Returns the line on which the match occurred, indexes - start from 1 not zero, if no match occurred then returns RegEx::npos. 
 unsigned int Marks() const;Returns the number of marked sub-expressions - contained in the expression. Note that this includes the whole match - (sub-expression zero), so the value returned is always >= 1. 
 std::string What(int i)const;Returns a copy of what matched sub-expression i. - If i = 0 then returns a copy of the whole match. Returns a null string - if the index is invalid or if the specified sub-expression did not participate - in a match. 
 std::string operator[](int i)const - ;Returns what(i); -

Can be used to simplify access to sub-expression matches, and make usage more - perl-like.

-
 
-
-
-

-
-

Revised - - 04 Feb 2004 -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/regex_format.html b/doc/regex_format.html deleted file mode 100644 index 74f21e9c..00000000 --- a/doc/regex_format.html +++ /dev/null @@ -1,204 +0,0 @@ - - - - -Boost.Regex: Algorithm regex_format (deprecated) - - - - -

- - - - - - - -
-

-"C++

-
-

Boost.Regex

- -

Algorithm regex_format (deprecated)

-
-

-"Boost.Regex

-
- -
-
- - -
-

The algorithm regex_format is deprecated; new code should use -match_results::format instead.  Existing code will continue to -compile, the following documentation is taken from the previous -version of boost.regex and will not be further updated:

- -

Algorithm regex_format

- -
-#include <boost/regex.hpp>
-
- -

The algorithm regex_format takes the results of a match and -creates a new string based upon a -format string, regex_format can be used for search and replace -operations:

- -
-template <class OutputIterator, class iterator, class Allocator, class charT>
-OutputIterator regex_format(OutputIterator out,
-                            const match_results<iterator, Allocator>& m,
-                            const charT* fmt,
-                            match_flag_type flags = 0);
-template <class OutputIterator, class iterator, class Allocator, class charT>
-OutputIterator regex_format(OutputIterator out,
-                            const match_results<iterator, Allocator>& m,
-                            const std::basic_string<charT>& fmt,
-                            match_flag_type flags = 0);
-
- -

The library also defines the following convenience variation of -regex_format, which returns the result directly as a string, rather -than outputting to an iterator [note - this version may not be -available, or may be available in a more limited form, depending -upon your compilers capabilities]:

- -
-template <class iterator, class Allocator, class charT>
-std::basic_string<charT> regex_format
-                                 (const match_results<iterator, Allocator>& m, 
-                                  const charT* fmt,
-                                  match_flag_type flags = 0);
-
-template <class iterator, class Allocator, class charT>
-std::basic_string<charT> regex_format
-                                 (const match_results<iterator, Allocator>& m, 
-                                  const std::basic_string<charT>& fmt,
-                                  match_flag_type flags = 0);
-
- -

Parameters to the main version of the function are passed as -follows:

- -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 OutputIterator outAn output iterator type, the output -string is sent to this iterator. Typically this would be a -std::ostream_iterator. 
 const -match_results<iterator, Allocator>& mAn instance of match_results<> -obtained from one of the matching algorithms above, and denoting -what matched. 
 const charT* fmtA format string that determines how -the match is transformed into the new string. 
 unsigned flagsOptional flags which describe how the -format string is to be interpreted. 
- -
-
- - -

Format flags are defined as -follows:

- -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 format_allEnables all syntax options (perl-like -plus extentions). 
 format_sedAllows only a sed-like syntax. 
 format_perlAllows only a perl-like syntax. 
 format_no_copyDisables copying of unmatched sections -to the output string during -regex_merge operations. 
 format_first_onlyWhen this flag is set only the first occurance will be replaced -(applies to regex_merge only). 
- -
-
- - -

The format string syntax (and available options) is described -more fully under format strings -.

- -

- -
-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - - diff --git a/doc/regex_grep.html b/doc/regex_grep.html deleted file mode 100644 index ac1d804b..00000000 --- a/doc/regex_grep.html +++ /dev/null @@ -1,377 +0,0 @@ - - - - Boost.Regex: Algorithm regex_grep (deprecated) - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Algorithm regex_grep (deprecated)

-
-

Boost.Regex Index

-
-
-
-
-

The algorithm regex_grep is deprecated in favor of regex_iterator - which provides a more convenient and standard library friendly interface.

-

The following documentation is taken unchanged from the previous boost release, - and will not be updated in future.

-
-
-#include <boost/regex.hpp>
-
-

regex_grep allows you to search through a bidirectional-iterator range and - locate all the (non-overlapping) matches with a given regular expression. The - function is declared as:

-
-template <class Predicate, class iterator, class charT, class traits>
-unsigned int regex_grep(Predicate foo,
-                         iterator first,
-                         iterator last,
-                         const basic_regex<charT, traits>& e,
-                         boost::match_flag_type flags = match_default)
-
-

The library also defines the following convenience versions, which take either - a const charT*, or a const std::basic_string<>& in place of a pair of - iterators [note - these versions may not be available, or may be available in a - more limited form, depending upon your compilers capabilities]:

-
-template <class Predicate, class charT, class traits>
-unsigned int regex_grep(Predicate foo, 
-              const charT* str, 
-              const basic_regex<charT, traits>& e, 
-              boost::match_flag_type flags = match_default);
-
-template <class Predicate, class ST, class SA, class charT, class traits>
-unsigned int regex_grep(Predicate foo, 
-              const std::basic_string<charT, ST, SA>& s, 
-              const basic_regex<charT, traits>& e, 
-              boost::match_flag_type flags = match_default);
-
-

The parameters for the primary version of regex_grep have the following - meanings: 

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 fooA predicate function object or function pointer, see - below for more information. 
 firstThe start of the range to search. 
 lastThe end of the range to search. 
 eThe regular expression to search for. 
 flagsThe flags that determine how matching is carried out, - one of the match_flags enumerators. 
-
-
-

The algorithm finds all of the non-overlapping matches of the expression e, for - each match it fills a match_results<iterator> - structure, which contains information on what matched, and calls the predicate - foo, passing the match_results<iterator> as a single argument. If the - predicate returns true, then the grep operation continues, otherwise it - terminates without searching for further matches. The function returns the - number of matches found.

-

The general form of the predicate is:

-
-struct grep_predicate
-{
-   bool operator()(const match_results<iterator_type>& m);
-};
-
-

For example the regular expression "a*b" would find one match in the string - "aaaaab" and two in the string "aaabb".

-

Remember this algorithm can be used for a lot more than implementing a version - of grep, the predicate can be and do anything that you want, grep utilities - would output the results to the screen, another program could index a file - based on a regular expression and store a set of bookmarks in a list, or a text - file conversion utility would output to file. The results of one regex_grep can - even be chained into another regex_grep to create recursive parsers.

-

The algorithm may throw std::runtime_error if the complexity - of matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-

Example: convert - the example from regex_search to use regex_grep instead:

-
-#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-
-// IndexClasses: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-
-const char* re = 
-   // possibly leading whitespace:   
-   "^[[:space:]]*" 
-   // possible template declaration:
-   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
-   // class or struct:
-   "(class|struct)[[:space:]]*" 
-   // leading declspec macros etc:
-   "("
-      "\\<\\w+\\>"
-      "("
-         "[[:blank:]]*\\([^)]*\\)"
-      ")?"
-      "[[:space:]]*"
-   ")*" 
-   // the class name
-   "(\\<\\w*\\>)[[:space:]]*" 
-   // template specialisation parameters
-   "(<[^;:{]+>)?[[:space:]]*"
-   // terminate in { or :
-   "(\\{|:[^;\\{()]*\\{)";
-
-boost::regex expression(re); 
-class IndexClassesPred 
-{ 
-   map_type& m; 
-   std::string::const_iterator base; 
-public: 
-   IndexClassesPred(map_type& a, std::string::const_iterator b) : m(a), base(b) {} 
-   bool operator()(const  smatch& what) 
-   { 
-      // what[0] contains the whole string 
-      // what[5] contains the class name. 
-      // what[6] contains the template specialisation if any. 
-      // add class name and position to map: 
-      m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-                what[5].first - base; 
-      return true; 
-   } 
-}; 
-void IndexClasses(map_type& m, const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-   regex_grep(IndexClassesPred(m, start), start, end, expression); 
-}
-
-

Example: Use - regex_grep to call a global callback function:

-
-#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-
-// purpose: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-
-const char* re = 
-   // possibly leading whitespace:   
-   "^[[:space:]]*" 
-   // possible template declaration:
-   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
-   // class or struct:
-   "(class|struct)[[:space:]]*" 
-   // leading declspec macros etc:
-   "("
-      "\\<\\w+\\>"
-      "("
-         "[[:blank:]]*\\([^)]*\\)"
-      ")?"
-      "[[:space:]]*"
-   ")*" 
-   // the class name
-   "(\\<\\w*\\>)[[:space:]]*" 
-   // template specialisation parameters
-   "(<[^;:{]+>)?[[:space:]]*"
-   // terminate in { or :
-   "(\\{|:[^;\\{()]*\\{)";
-
-boost::regex expression(re);
-map_type class_index; 
-std::string::const_iterator base; 
-
-bool grep_callback(const  boost::smatch& what) 
-{ 
-   // what[0] contains the whole string 
-   // what[5] contains the class name. 
-   // what[6] contains the template specialisation if any. 
-   // add class name and position to map: 
-   class_index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-                what[5].first - base; 
-   return true; 
-} 
-void IndexClasses(const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-   base = start; 
-   regex_grep(grep_callback, start, end, expression, match_default); 
-}
- 
-
-

Example: use - regex_grep to call a class member function, use the standard library adapters std::mem_fun - and std::bind1st to convert the member function into a predicate:

-
-#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-#include <functional> 
-// purpose: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-class class_index 
-{ 
-   boost::regex expression; 
-   map_type index; 
-   std::string::const_iterator base; 
-   bool  grep_callback(boost::smatch what); 
-public: 
-   void IndexClasses(const std::string& file); 
-   class_index() 
-      : index(), 
-        expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
-                   "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?" 
-                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
-                   "(\\{|:[^;\\{()]*\\{)" 
-                   ){} 
-}; 
-bool  class_index::grep_callback(boost::smatch what) 
-{ 
-   // what[0] contains the whole string 
-   // what[5] contains the class name. 
-   // what[6] contains the template specialisation if any. 
-   // add class name and position to map: 
-   index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-               what[5].first - base; 
-   return true; 
-} 
-
-void class_index::IndexClasses(const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-   base = start; 
-   regex_grep(std::bind1st(std::mem_fun(&class_index::grep_callback), this), 
-              start, 
-              end, 
-              expression); 
-} 
- 
-
-

Finally, C++ - Builder users can use C++ Builder's closure type as a callback argument:

-
-#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-#include <functional> 
-// purpose: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-class class_index 
-{ 
-   boost::regex expression; 
-   map_type index; 
-   std::string::const_iterator base; 
-   typedef  boost::smatch arg_type; 
-   bool grep_callback(const arg_type& what); 
-public: 
-   typedef bool (__closure* grep_callback_type)(const arg_type&); 
-   void IndexClasses(const std::string& file); 
-   class_index() 
-      : index(), 
-        expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
-                   "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?" 
-                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
-                   "(\\{|:[^;\\{()]*\\{)" 
-                   ){} 
-}; 
-
-bool class_index::grep_callback(const arg_type& what) 
-{ 
-   // what[0] contains the whole string    
-// what[5] contains the class name.    
-// what[6] contains the template specialisation if any.    
-// add class name and position to map:    
-index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-               what[5].first - base; 
-   return true; 
-} 
-
-void class_index::IndexClasses(const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-   base = start; 
-   class_index::grep_callback_type cl = &(this->grep_callback); 
-   regex_grep(cl, 
-            start, 
-            end, 
-            expression); 
-}
-
-

-
-

Revised - - 26 June 2004 -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/regex_iterator.html b/doc/regex_iterator.html deleted file mode 100644 index f2e647f5..00000000 --- a/doc/regex_iterator.html +++ /dev/null @@ -1,456 +0,0 @@ - - - - Boost.Regex: regex_iterator - - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

regex_iterator

-
-

Boost.Regex Index

-
-
-
-
-

Contents

-
-
Synopsis
Description
- Examples
-
-

Synopsis

-

The iterator type regex_iterator will enumerate all of the regular expression - matches found in some sequence: dereferencing a regex_iterator yields a - reference to a match_results object.

-
-template <class BidirectionalIterator, 
-          class charT = iterator_traits<BidirectionalIterator>::value_type,
-          class traits = regex_traits<charT> >
-class regex_iterator 
-{
-public:
-   typedef          basic_regex<charT, traits>                              regex_type;
-   typedef          match_results<BidirectionalIterator>                    value_type;
-   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
-   typedef          const value_type*                                       pointer;
-   typedef          const value_type&                                       reference;
-   typedef          std::forward_iterator_tag                               iterator_category;
-   
-   regex_iterator();
-   regex_iterator(BidirectionalIterator a, BidirectionalIterator b, 
-                  const regex_type& re, 
-                  match_flag_type m = match_default);
-   regex_iterator(const regex_iterator&);
-   regex_iterator& operator=(const regex_iterator&);
-   bool operator==(const regex_iterator&)const;
-   bool operator!=(const regex_iterator&)const;
-   const value_type& operator*()const;
-   const value_type* operator->()const;
-   regex_iterator& operator++();
-   regex_iterator operator++(int);
-};
-
-typedef
-regex_iterator<const
-
-char*> cregex_iterator; typedef                  regex_iterator<std::string::const_iterator>
-sregex_iterator; #ifndef  BOOST_NO_WREGEX
-typedef regex_iterator<const
-wchar_t*> wcregex_iterator; typedef               regex_iterator<std::wstring::const_iterator>
-wsregex_iterator; #endif template
-<class
-
-charT, class traits> regex_iterator<const charT*,
-charT, traits> 
-   make_regex_iterator(const charT* p, const basic_regex<charT, traits>& e, regex_constants::match_flag_type m =       regex_constants::match_default); template  <class
-   
-charT, class traits, class ST, class SA> regex_iterator<typename std::basic_string<charT,
-ST, SA>::const_iterator, charT, traits> 
-   make_regex_iterator(const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, regex_constants::match_flag_type m = regex_constants::match_default);
-
-
-

Description

-

A regex_iterator is constructed from a pair of iterators, and enumerates all - occurrences of a regular expression within that iterator range.

-

-regex_iterator();
-
- -

Effects: constructs an end of sequence regex_iterator.

-
regex_iterator(BidirectionalIterator a, BidirectionalIterator b, 
-               const regex_type& re, 
-               match_flag_type m = match_default);
-
- -

Effects: constructs a regex_iterator that will enumerate all occurrences - of the expression re, within the sequence [a,b), and found - using match flags m.  The object re must exist for the - lifetime of the regex_iterator.

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-

-regex_iterator(const regex_iterator& that);
-
- -

Effects: constructs a copy of that.

- -

Postconditions: *this == that.

-

-regex_iterator& operator=(const regex_iterator&);
-
- -

Effects: sets *this equal to those in that.

- -

Postconditions: *this == that.

-

-bool operator==(const regex_iterator& that)const;
-
- -

Effects: returns true if *this is equal to that.

-

-bool operator!=(const regex_iterator&)const;
-
- -

Effects: returns !(*this == that).

-

-const value_type& operator*()const;
-
-

Effects: dereferencing a regex_iterator object it yields a - const reference to a match_results object, - whose members are set as follows:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value

-
-

(*it).size()

-
-

re.mark_count()

-
-

(*it).empty()

-
-

false

-
-

(*it).prefix().first

-
-

The end of the last match found, or the start of the underlying sequence if - this is the first match enumerated

-
-

(*it).prefix().last

-
-

The same as the start of the match found:
- (*it)[0].first

-
-

(*it).prefix().matched

-
-

True if the prefix did not match an empty string:
- (*it).prefix().first != (*it).prefix().second

-
-

(*it).suffix().first

-
-

The same as the end of the match found:
- (*it)[0].second

-
-

(*it).suffix().last

-
-

The end of the underlying sequence.

-
-

(*it).suffix().matched

-
-

True if the suffix did not match an empty string:
- (*it).suffix().first != (*it).suffix().second

-
-

(*it)[0].first

-
-

The start of the sequence of characters that matched the regular expression

-
-

(*it)[0].second

-
-

The end of the sequence of characters that matched the regular expression

-
-

(*it)[0].matched

-
-

true if a full match was found, and false if it was a - partial match (found as a result of the match_partial flag being - set).

-
-

(*it)[n].first

-
-

For all integers n < (*it).size(), the start of the sequence that matched - sub-expression n. Alternatively, if sub-expression n did not participate - in the match, then last.

-
-

(*it)[n].second

-
-

For all integers n < (*it).size(), the end of the sequence that matched - sub-expression n. Alternatively, if sub-expression n did not participate - in the match, then last.

-
-

(*it)[n].matched

-
-

For all integers n < (*it).size(), true if sub-expression n participated - in the match, false otherwise.

-
(*it).position(n)For all integers n < (*it).size(), then the - distance from the start of the underlying sequence to the start of - sub-expression match n.
-
-
-

-const value_type* operator->()const;
-
- -

Effects: returns &(*this).

-

-regex_iterator& operator++();
-
-

Effects: moves the iterator to the next match in the - underlying sequence, or the end of sequence iterator if none if found. -  When the last match found matched a zero length string, then the - regex_iterator will find the next match as follows: if there exists a non-zero - length match that starts at the same location as the last one, then returns it, - otherwise starts looking for the next (possibly zero length) match from one - position to the right of the last match.

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

- -

Returns: *this.

-

-regex_iterator operator++(int);
-
- -

Effects: constructs a copy result of *this, - then calls ++(*this).

- -

Returns: result.

-
template <class charT, class traits> regex_iterator<const charT*, charT, traits>
-make_regex_iterator(const charT* 
-   p, const basic_regex<charT, 
-                       traits>& e, regex_constants::match_flag_type m 
-                       = regex_constants::match_default); template <class
-                       
-charT, class traits, class ST, class SA> regex_iterator<typename std::basic_string<charT,
-ST, SA>::const_iterator, charT, traits> 
-   make_regex_iterator(const std::basic_string<charT, ST, SA>& p, 
-                       const basic_regex<charT, traits>& e, 
-                       regex_constants::match_flag_type m = regex_constants::match_default);
-
-

Effects: returns an iterator that enumerates all occurences of - expression e in text p using match_flags m.

-

Examples

-

The following example - takes a C++ source file and builds up an index of class names, and the location - of that class in the file.

-
-#include <string>
-#include <map>
-#include <fstream>
-#include <iostream>
-#include <boost/regex.hpp>
-
-using namespace std;
-
-// purpose:
-// takes the contents of a file in the form of a string
-// and searches for all the C++ class definitions, storing
-// their locations in a map of strings/int's
-
-typedef std::map<std::string, std::string::difference_type, std::less<std::string> > map_type;
-
-const char* re = 
-   // possibly leading whitespace:   
-   "^[[:space:]]*" 
-   // possible template declaration:
-   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
-   // class or struct:
-   "(class|struct)[[:space:]]*" 
-   // leading declspec macros etc:
-   "("
-      "\\<\\w+\\>"
-      "("
-         "[[:blank:]]*\\([^)]*\\)"
-      ")?"
-      "[[:space:]]*"
-   ")*" 
-   // the class name
-   "(\\<\\w*\\>)[[:space:]]*" 
-   // template specialisation parameters
-   "(<[^;:{]+>)?[[:space:]]*"
-   // terminate in { or :
-   "(\\{|:[^;\\{()]*\\{)";
-
-
-boost::regex expression(re);
-map_type class_index;
-
-bool regex_callback(const boost::match_results<std::string::const_iterator>& what)
-{
-   // what[0] contains the whole string
-   // what[5] contains the class name.
-   // what[6] contains the template specialisation if any.
-   // add class name and position to map:
-   class_index[what[5].str() + what[6].str()] = what.position(5);
-   return true;
-}
-
-void load_file(std::string& s, std::istream& is)
-{
-   s.erase();
-   s.reserve(is.rdbuf()->in_avail());
-   char c;
-   while(is.get(c))
-   {
-      if(s.capacity() == s.size())
-         s.reserve(s.capacity() * 3);
-      s.append(1, c);
-   }
-}
-
-int main(int argc, const char** argv)
-{
-   std::string text;
-   for(int i = 1; i < argc; ++i)
-   {
-      cout << "Processing file " << argv[i] << endl;
-      std::ifstream fs(argv[i]);
-      load_file(text, fs);
-      // construct our iterators:
-      boost::sregex_iterator m1(text.begin(), text.end(), expression);
-      boost::sregex_iterator m2;
-      std::for_each(m1, m2, &regex_callback);
-      // copy results:
-      cout << class_index.size() << " matches found" << endl;
-      map_type::iterator c, d;
-      c = class_index.begin();
-      d = class_index.end();
-      while(c != d)
-      {
-         cout << "class \"" << (*c).first << "\" found at index: " << (*c).second << endl;
-         ++c;
-      }
-      class_index.erase(class_index.begin(), class_index.end());
-   }
-   return 0;
-}
-
-
-

Revised   - - 06 Jan 05  -

-

© Copyright John Maddock 1998- - 2005

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/regex_match.html b/doc/regex_match.html deleted file mode 100644 index 31c5eba6..00000000 --- a/doc/regex_match.html +++ /dev/null @@ -1,318 +0,0 @@ - - - - Boost.Regex: Algorithm regex_match - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Algorithm regex_match

-
-

Boost.Regex Index

-
-

-
-

-

Contents

-
-
Synopsis
Description
- Examples
-
-

Synopsis

-
#include <boost/regex.hpp> 
-

- The algorithm regex _match determines whether a given regular expression - matches all of a given character sequence denoted by a pair of - bidirectional-iterators, the algorithm is defined as follows, the main use of - this function is data input validation. -

Note that the result is true only if the expression matches the whole of - the input sequence.  If you want to search for an expression - somewhere within the sequence then use regex_search.  - If you want to match a prefix of the character string then use - regex_search with the flag match_continuous - set. -

-template <class BidirectionalIterator, class Allocator, class charT, class traits>
-bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
-                 match_results<BidirectionalIterator, Allocator>& m,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default); 
-
-template <class BidirectionalIterator, class charT, class traits>
-bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default); 
-
-template <class charT, class Allocator, class traits>
-bool regex_match(const charT* str, match_results<const charT*, Allocator>& m,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default); 
-
-template <class ST, class SA, class Allocator, class charT, class traits>
-bool regex_match(const basic_string<charT, ST, SA>& s,
-                 match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m, 
-                 const basic_regex <charT, traits>& e, 
-                 match_flag_type flags = match_default); 
-
-template <class charT, class traits>
-bool regex_match(const charT* str,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default); 
-
-template <class ST, class SA, class charT, class traits>
-bool regex_match(const basic_string<charT, ST, SA>& s,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default);
-
-

Description

-
template <class BidirectionalIterator, class Allocator, class charT, class traits>
-bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
-                 match_results<BidirectionalIterator, Allocator>& m,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default);
-

Requires: Type BidirectionalIterator meets the requirements of a - Bidirectional Iterator (24.1.4).

-

Effects: Determines whether there is an exact match between the regular - expression e, and all of the character sequence [first, last), parameter - flags is used to control how the expression - is matched against the character sequence. Returns true if such a match - exists, false otherwise.

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-

Postconditions: If the function returns false, then the effect on - parameter m is undefined, otherwise the effects on parameter m are - given in the table:

-

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element -

-
-

Value -

-
-

m.size()

-
-

e.mark_count()

-
-

m.empty()

-
-

false

-
-

m.prefix().first

-
-

first

-
-

m.prefix().last

-
-

first

-
-

m.prefix().matched

-
-

false

-
-

m.suffix().first

-
-

last

-
-

m.suffix().last

-
-

last

-
-

m.suffix().matched

-
-

false

-
-

m[0].first

-
-

first

-
-

m[0].second

-
-

last

-
-

m[0].matched

-
-

true if a full match was found, and false if it was - a partial match (found as a result of the match_partial flag being - set).

-
-

m[n].first

-
-

For all integers n < m.size(), the start of the sequence that matched - sub-expression n. Alternatively, if sub-expression n did not participate - in the match, then last.

-
-

m[n].second

-
-

For all integers n < m.size(), the end of the sequence that matched - sub-expression n. Alternatively, if sub-expression n did not participate - in the match, then last.

-
-

m[n].matched

-
-

For all integers n < m.size(), true if sub-expression n participated - in the match, false otherwise.

-
-
-

-
-
 
-
template <class BidirectionalIterator, class charT, class traits>
-bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default);
-

Effects: Behaves "as if" by constructing an instance of - match_results<BidirectionalIterator> what, - and then returning the result of regex_match(first, last, what, e, flags).

-
template <class charT, class Allocator, class traits>
-bool regex_match(const charT* str, match_results<const charT*, Allocator>& m,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default);
-

Effects: Returns the result of regex_match(str, str + - char_traits<charT>::length(str), m, e, flags).

-
template <class ST, class SA, class Allocator,
-          class charT, class traits>
-bool regex_match(const basic_string<charT, ST, SA>& s,
-                 match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m, 
-                 const basic_regex <charT, traits>& e, 
-                 match_flag_type flags = match_default);
-

Effects: Returns the result of regex_match(s.begin(), s.end(), m, e, - flags).

-
template <class charT, class traits>
-bool regex_match(const charT* str,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default);
-

Effects: Returns the result of regex_match(str, str + - char_traits<charT>::length(str), e, flags).

-
template <class ST, class SA, class charT, class traits>
-bool regex_match(const basic_string<charT, ST, SA>& s,
-                 const basic_regex <charT, traits>& e,
-                 match_flag_type flags = match_default);
-

Effects: Returns the result of regex_match(s.begin(), s.end(), e, - flags). -

Examples

-

The following example - processes an ftp response: -

-
#include <stdlib.h> 
-#include <boost/regex.hpp> 
-#include <string> 
-#include <iostream> 
-
-using namespace boost; 
-
-regex expression("([0-9]+)(\\-| |$)(.*)"); 
-
-// process_ftp: 
-// on success returns the ftp response code, and fills 
-// msg with the ftp response message. 
-int process_ftp(const char* response, std::string* msg) 
-{ 
-   cmatch what; 
-   if(regex_match(response, what, expression)) 
-   { 
-      // what[0] contains the whole string 
-      // what[1] contains the response code 
-      // what[2] contains the separator character 
-      // what[3] contains the text message. 
-      if(msg) 
-         msg->assign(what[3].first, what[3].second); 
-      return std::atoi(what[1].first); 
-   } 
-   // failure did not match 
-   if(msg) 
-      msg->erase(); 
-   return -1; 
-}
-      

-


-

-

Revised  - - 26 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/regex_merge.html b/doc/regex_merge.html deleted file mode 100644 index bbfcc23b..00000000 --- a/doc/regex_merge.html +++ /dev/null @@ -1,45 +0,0 @@ - - - - Boost.Regex: Algorithm regex_merge (deprecated) - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Algorithm regex_merge (deprecated)

-
-

Boost.Regex Index

-
-

-
-

-

Algorithm regex_merge has been renamed regex_replace, - existing code will continue to compile, but new code should use - regex_replace instead.

-

-


-

-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/regex_replace.html b/doc/regex_replace.html deleted file mode 100644 index fd0392eb..00000000 --- a/doc/regex_replace.html +++ /dev/null @@ -1,256 +0,0 @@ - - - - Boost.Regex: Algorithm regex_replace - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Algorithm regex_replace

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
Description
- Examples
-

Synopsis

-
#include <boost/regex.hpp> 
-

The algorithm regex_replace searches through a string finding - all the matches to the regular expression: for each match it then calls - match_results::format to format the string and sends the result to the - output iterator. Sections of text that do not match are copied to the output - unchanged only if the flags parameter does not have the flag - format_no_copy set. If the flag format_first_only - is set then only the first occurrence is replaced rather than all - occurrences. 

template <class OutputIterator, class BidirectionalIterator, class traits, class charT>
-OutputIterator regex_replace(OutputIterator out,
-                           BidirectionalIterator first,
-                           BidirectionalIterator last,
-                           const basic_regex<charT, traits>& e,
-                           const basic_string<charT>& fmt,
-                           match_flag_type flags = match_default);
-
-template <class traits, class charT>
-basic_string<charT> regex_replace(const basic_string<charT>& s,
-                            const basic_regex<charT, traits>& e,
-                            const basic_string<charT>& fmt,
-                            match_flag_type flags = match_default);
-
-
-

Description

-
template <class OutputIterator, class BidirectionalIterator, class traits, class charT>
-OutputIterator regex_replace(OutputIterator out,
-                           BidirectionalIterator first,
-                           BidirectionalIterator last,
-                           const basic_regex<charT, traits>& e,
-                           const basic_string<charT>& fmt,
-                           match_flag_type flags = match_default);
-

Enumerates all the occurences of expression e in the sequence [first, - last), replacing each occurence with the string that results by merging the - match found with the format string fmt, and copies the resulting - string to out

-

If the flag format_no_copy is set in flags then unmatched sections of - text are not copied to output.  -

-

If the flag format_first_only is set in flags then only the first - occurence of e is replaced.  -

-

The manner in which the format string fmt is interpretted, along with - the rules used for finding matches, are determined by the - flags set in flags

-

Effects: Constructs an - regex_iterator - object: -

-
regex_iterator<BidirectionalIterator, charT, traits, Allocator> 
i(first, last, e, flags),
-

and uses - - i - to enumerate through all of the matches m of type - - match_results - <BidirectionalIterator> that - occur within the sequence [first, last). -

-

If no such matches are found - and

-
!(flags & format_no_copy) 
-

then calls -

-
std::copy(first, last, out). 
-

Otherwise, for each match found, - if

-
!(flags & format_no_copy) 
-

calls -

-
std::copy(m.prefix().first, m.prefix().last, out), 
-

and then calls -

-
m.format(out, fmt, flags). 
-

Finally - if

-
!(flags & format_no_copy) 
-

calls -

-
std::copy(last_m.suffix().first, last_m,suffix().last, out) 
-

where - - last_m - - is a copy of the last match found. -

-

If - flags & -format_first_only - is non-zero then only the first match found is replaced.

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-

Returns: out. -

-
template <class traits, class charT>
-basic_string<charT> regex_replace(const basic_string<charT>& s,
-                            const basic_regex<charT, traits>& e,
-                            const basic_string<charT>& fmt,
-                            match_flag_type flags = match_default);
-

Effects: Constructs an object basic_string<charT> result, - calls regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, - flags), and then returns result. -

Examples

-

The following example - takes C/C++ source code as input, and outputs syntax highlighted HTML code.

-

-
#include <fstream>
-#include <sstream>
-#include <string>
-#include <iterator>
-#include <boost/regex.hpp>
-#include <fstream>
-#include <iostream>
-
-// purpose:
-// takes the contents of a file and transform to
-// syntax highlighted code in html format
-
-boost::regex e1, e2;
-extern const char* expression_text;
-extern const char* format_string;
-extern const char* pre_expression;
-extern const char* pre_format;
-extern const char* header_text;
-extern const char* footer_text;
-
-void load_file(std::string& s, std::istream& is)
-{
-   s.erase();
-   s.reserve(is.rdbuf()->in_avail());
-   char c;
-   while(is.get(c))
-   {
-      if(s.capacity() == s.size())
-         s.reserve(s.capacity() * 3);
-      s.append(1, c);
-   }
-}
-
-int main(int argc, const char** argv)
-{
-   try{
-   e1.assign(expression_text);
-   e2.assign(pre_expression);
-   for(int i = 1; i < argc; ++i)
-   {
-      std::cout << "Processing file " << argv[i] << std::endl;
-      std::ifstream fs(argv[i]);
-      std::string in;
-      load_file(in, fs);
-      std::string out_name(std::string(argv[i]) + std::string(".htm"));
-      std::ofstream os(out_name.c_str());
-      os << header_text;
-      // strip '<' and '>' first by outputting to a
-      // temporary string stream
-      std::ostringstream t(std::ios::out | std::ios::binary);
-      std::ostream_iterator<char, char> oi(t);
-      boost::regex_replace(oi, in.begin(), in.end(),
-      e2, pre_format, boost::match_default | boost::format_all);
-      // then output to final output stream
-      // adding syntax highlighting:
-      std::string s(t.str());
-      std::ostream_iterator<char, char> out(os);
-      boost::regex_replace(out, s.begin(), s.end(),
-      e1, format_string, boost::match_default | boost::format_all);
-      os << footer_text;
-   }
-   }
-   catch(...)
-   { return -1; }
-   return 0;
-}
-
-extern const char* pre_expression = "(<)|(>)|(&)|\\r";
-extern const char* pre_format = "(?1<)(?2>)(?3&amp;)";
-
-
-const char* expression_text = // preprocessor directives: index 1
-                              "(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
-                              // comment: index 2
-                              "(//[^\\n]*|/\\*.*?\\*/)|"
-                              // literals: index 3
-                              "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
-                              // string literals: index 4
-                              "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
-                              // keywords: index 5
-                              "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
-                              "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
-                              "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
-                              "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
-                              "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
-                              "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
-                              "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
-                              "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
-                              "|using|virtual|void|volatile|wchar_t|while)\\>"
-                              ;
-
-const char* format_string = "(?1<font color=\"#008040\">$&</font>)"
-                            "(?2<I><font color=\"#000080\">$&</font></I>)"
-                            "(?3<font color=\"#0000A0\">$&</font>)"
-                            "(?4<font color=\"#0000FF\">$&</font>)"
-                            "(?5<B>$&</B>)";
-
-const char* header_text = "<HTML>\n<HEAD>\n"
-                          "<TITLE>Auto-generated html formated source</TITLE>\n"
-                          "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
-                          "</HEAD>\n"
-                          "<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
-                          "<P> </P>\n<PRE>";
-
-const char* footer_text = "</PRE>\n</BODY>\n\n";
-      
-
-

Revised  - - 26 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/regex_search.html b/doc/regex_search.html deleted file mode 100644 index 22691687..00000000 --- a/doc/regex_search.html +++ /dev/null @@ -1,315 +0,0 @@ - - - - Boost.Regex: Algorithm regex_search - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Algorithm regex_search

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
Description
- Examples
-

Synopsis

-
#include <boost/regex.hpp> 
-

-

The algorithm regex_search will search a range denoted by a pair of - bidirectional-iterators for a given regular expression. The algorithm uses - various heuristics to reduce the search time by only checking for a match if a - match could conceivably start at that position. The algorithm is defined as - follows: -

template <class BidirectionalIterator, 
-          class Allocator, class charT, class traits>
-bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
-                  match_results<BidirectionalIterator, Allocator>& m,
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-                  
-template <class ST, class SA, 
-          class Allocator, class charT, class traits> 
-bool regex_search(const basic_string<charT, ST, SA>& s, 
-                  match_results<
-                      typename basic_string<charT, ST,SA>::const_iterator, 
-                      Allocator>& m, 
-                  const basic_regex<charT, traits>& e, 
-                  match_flag_type flags = match_default); 
-          
-template<class charT, class Allocator, class traits> 
-bool regex_search(const charT* str, 
-                  match_results<const charT*, Allocator>& m, 
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-                  
-template <class BidirectionalIterator, class charT, class traits>                
-bool regex_search(BidirectionalIterator first, BidirectionalIterator last, 
-                  const basic_regex<charT, traits>& e, 
-                  match_flag_type flags = match_default); 
-                  
-template <class charT, class traits> 
-bool regex_search(const charT* str, 
-                  const basic_regex<charT, traits>& e, 
-                  match_flag_type flags = match_default); 
-                  
-template<class ST, class SA, class charT, class traits>
-bool regex_search(const basic_string<charT, ST, SA>& s,
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-
-

Description

-
template <class BidirectionalIterator, class Allocator, class charT, class traits>
-bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
-                  match_results<BidirectionalIterator, Allocator>& m,
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-

Requires: Type BidirectionalIterator meets the requirements of a - Bidirectional Iterator (24.1.4).

-

Effects: Determines whether there is some sub-sequence within - [first,last) that matches the regular expression e, parameter flags - is used to control how the expression is matched against the character - sequence. Returns true if such a sequence exists, false otherwise.

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-

Postconditions: If the function returns false, then the effect on - parameter m is undefined, otherwise the effects on parameter m are - given in the table:

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Element

-
-

Value -

-
-

m.size()

-
-

e.mark_count()

-
-

m.empty()

-
-

false

-
-

m.prefix().first

-
-

first

-
-

m.prefix().last

-
-

m[0].first

-
-

m.prefix().matched

-
-

m.prefix().first != m.prefix().second

-
-

m.suffix().first

-
-

m[0].second

-
-

m.suffix().last

-
-

last

-
-

m.suffix().matched

-
-

m.suffix().first != m.suffix().second

-
-

m[0].first

-
-

The start of the sequence of characters that matched the regular expression

-
-

m[0].second

-
-

The end of the sequence of characters that matched the regular expression

-
-

m[0].matched

-
-

true if a full match was found, and false if it was - a partial match (found as a result of the match_partial flag being - set).

-
-

m[n].first

-
-

For all integers n < m.size(), the start of the sequence that matched - sub-expression n. Alternatively, if sub-expression n did not participate - in the match, then last.

-
-

m[n].second

-
-

For all integers n < m.size(), the end of the sequence that matched - sub-expression n. Alternatively, if sub-expression n did not participate - in the match, then last.

-
-

m[n].matched

-
-

For all integers n < m.size(), true if sub-expression n participated - in the match, false otherwise.

-
-
-
-
template <class charT, class Allocator, class traits>
-bool regex_search(const charT* str, match_results<const charT*, Allocator>& m,
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-

Effects: Returns the result of regex_search(str, str + - char_traits<charT>::length(str), m, e, flags).

-
template <class ST, class SA, class Allocator, class charT,
-          class traits>
-bool regex_search(const basic_string<charT, ST, SA>& s,
-                  match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-

Effects: Returns the result of regex_search(s.begin(), s.end(), m, - e, flags).

-
template <class iterator, class charT, class traits>
-bool regex_search(iterator first, iterator last,
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-

Effects: Behaves "as if" by constructing an instance of - match_results<BidirectionalIterator> what, - and then returning the result of regex_search(first, last, what, e, flags).

-
template <class charT, class traits>
-bool regex_search(const charT* str
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-

Effects: Returns the result of regex_search(str, str + - char_traits<charT>::length(str), e, flags).

-
template <class ST, class SA, class charT, class traits>
-bool regex_search(const basic_string<charT, ST, SA>& s,
-                  const basic_regex<charT, traits>& e,
-                  match_flag_type flags = match_default);
-

Effects: Returns the result of regex_search(s.begin(), s.end(), e, - flags). -

Examples

-

The following example, - takes the contents of a file in the form of a string, and searches for all the - C++ class declarations in the file. The code will work regardless of the way - that std::string is implemented, for example it could easily be modified to - work with the SGI rope class, which uses a non-contiguous storage strategy.

-

-
#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-
-// purpose: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-
-boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"); 
-
-void IndexClasses(map_type& m, const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-      boost::match_results<std::string::const_iterator> what; 
-   boost::match_flag_type flags = boost::match_default; 
-   while(regex_search(start, end, what, expression, flags)) 
-   { 
-      // what[0] contains the whole string 
-      // what[5] contains the class name. 
-      // what[6] contains the template specialisation if any. 
-      // add class name and position to map: 
-      m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-                what[5].first - file.begin(); 
-      // update search position: 
-      start = what[0].second; 
-      // update flags: 
-      flags |= boost::match_prev_avail; 
-      flags |= boost::match_not_bob; 
-   } 
-}
-     
-
-

Revised - - 23 June 2004 -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/regex_split.html b/doc/regex_split.html deleted file mode 100644 index a3b7b293..00000000 --- a/doc/regex_split.html +++ /dev/null @@ -1,145 +0,0 @@ - - - - Boost.Regex: Algorithm regex_split (deprecated) - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Algorithm regex_split (deprecated)

-
-

Boost.Regex Index

-
-

-
-

-

The algorithm regex_split has been deprecated in favor of the iterator - regex_token_iterator which has a more flexible and powerful interface, - as well as following the more usual standard library "pull" rather than "push" - semantics.

-

Code which uses regex_split will continue to compile, the following - documentation is taken from the previous boost.regex version:

-

Algorithm regex_split

-
#include <boost/regex.hpp> 
-

Algorithm regex_split performs a similar operation to the perl split operation, - and comes in three overloaded forms: -

-
template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2>
-std::size_t regex_split(OutputIterator out, 
-                        std::basic_string<charT, Traits1, Alloc1>& s, 
-                      const basic_regex<charT, Traits2>& e,
-                      boost::match_flag_type flags,
-                        std::size_t max_split);
-
-template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2>
-std::size_t regex_split(OutputIterator out, 
-                        std::basic_string<charT, Traits1, Alloc1>& s, 
-                      const basic_regex<charT, Traits2>& e,
-                        boost::match_flag_type flags = match_default);
-
-template <class OutputIterator, class charT, class Traits1, class Alloc1>
-std::size_t regex_split(OutputIterator out, 
-                        std::basic_string<charT, Traits1, Alloc1>& s);
-

Effects: Each version of the algorithm takes an - output-iterator for output, and a string for input. If the expression contains - no marked sub-expressions, then the algorithm writes one string onto the - output-iterator for each section of input that does not match the expression. - If the expression does contain marked sub-expressions, then each time a match - is found, one string for each marked sub-expression will be written to the - output-iterator. No more than max_split strings will be written to the - output-iterator. Before returning, all the input processed will be deleted from - the string s (if max_split is not reached then all of s will - be deleted). Returns the number of strings written to the output-iterator. If - the parameter max_split is not specified then it defaults to UINT_MAX. - If no expression is specified, then it defaults to "\s+", and splitting occurs - on whitespace. -

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-

Example: the - following function will split the input string into a series of tokens, and - remove each token from the string s: -

-
unsigned tokenise(std::list<std::string>& l, std::string& s)
-{
-   return boost::regex_split(std::back_inserter(l), s);
-}
-

Example: the - following short program will extract all of the URL's from a html file, and - print them out to cout: -

-
#include <list>
-#include <fstream>
-#include <iostream>
-#include <boost/regex.hpp>
-
-boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
-               boost::regbase::normal | boost::regbase::icase);
-
-void load_file(std::string& s, std::istream& is)
-{
-   s.erase();
-   //
-   // attempt to grow string buffer to match file size,
-   // this doesn't always work...
-   s.reserve(is.rdbuf()-&gtin_avail());
-   char c;
-   while(is.get(c))
-   {
-      // use logarithmic growth stategy, in case
-      // in_avail (above) returned zero:
-      if(s.capacity() == s.size())
-         s.reserve(s.capacity() * 3);
-      s.append(1, c);
-   }
-}
-
-
-int main(int argc, char** argv)
-{
-   std::string s;
-   std::list<std::string> l;
-
-   for(int i = 1; i < argc; ++i)
-   {
-      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
-      s.erase();
-      std::ifstream is(argv[i]);
-      load_file(s, is);
-      boost::regex_split(std::back_inserter(l), s, e);
-      while(l.size())
-      {
-         s = *(l.begin());
-         l.pop_front();
-         std::cout << s << std::endl;
-      }
-   }
-   return 0;
-}
-
-

Revised  - - 26 June 2004 -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/regex_token_iterator.html b/doc/regex_token_iterator.html deleted file mode 100644 index 9bd59050..00000000 --- a/doc/regex_token_iterator.html +++ /dev/null @@ -1,381 +0,0 @@ - - - - Boost.Regex: regex_token_iterator - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

regex_token_iterator

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
Description
- Examples
-

Synopsis

-

The template class regex_token_iterator is an iterator adapter; - that is to say it represents a new view of an existing iterator sequence, by - enumerating all the occurrences of a regular expression within that sequence, - and presenting one or more character sequence for each match found. Each - position enumerated by the iterator is a sub_match - object that represents what matched a particular sub-expression within the - regular expression. When class regex_token_iterator is used to - enumerate a single sub-expression with index -1, then the iterator performs - field splitting: that is to say it enumerates one character sequence for each - section of the character container sequence that does not match the regular - expression specified.

-
-template <class BidirectionalIterator, 
-          class charT = iterator_traits<BidirectionalIterator>::value_type,
-          class traits = regex_traits<charT> >
-class regex_token_iterator 
-{
-public:
-   typedef          basic_regex<charT, traits>                              regex_type;
-   typedef          sub_match<BidirectionalIterator>                        value_type;
-   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
-   typedef          const value_type*                                       pointer;
-   typedef          const value_type&                                       reference;
-   typedef          std::forward_iterator_tag                               iterator_category;
-   
-   regex_token_iterator();
-   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
-                        int submatch = 0, match_flag_type m = match_default);
-   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
-                        const std::vector<int>& submatches, match_flag_type m = match_default);
-   template <std::size_t N>
-   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
-                        const int (&submatches)[N], match_flag_type m = match_default);
-   regex_token_iterator(const regex_token_iterator&);
-   regex_token_iterator& operator=(const regex_token_iterator&);
-   bool operator==(const regex_token_iterator&)const;
-   bool operator!=(const regex_token_iterator&)const;
-   const value_type& operator*()const;
-   const value_type* operator->()const;
-   regex_token_iterator& operator++();
-   regex_token_iterator operator++(int);
-};
-
-typedef regex_token_iterator<const char*>                   cregex_token_iterator;
-typedef regex_token_iterator<std::string::const_iterator>   sregex_token_iterator;
-#ifndef BOOST_NO_WREGEX
-typedef regex_token_iterator<const wchar_t*>                wcregex_token_iterator;
-typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
-#endif
-
-template <class charT, class traits>
-regex_token_iterator<const charT*, charT, traits> 
-   make_regex_token_iterator(const charT* p, 
-                             const basic_regex<charT, traits>& e, 
-                             int submatch = 0, 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-                             
-template <class charT, class traits, class ST, class SA>
-regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> 
-   make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, 
-                             const basic_regex<charT, traits>& e, 
-                             int submatch = 0, 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-                             
-template <class charT, class traits, std::size_t N>
-regex_token_iterator<const charT*, charT, traits> 
-  make_regex_token_iterator(const charT* p, 
-                            const basic_regex<charT, traits>& e, 
-                            const int (&submatch)[N], 
-                            regex_constants::match_flag_type m = regex_constants::match_default);
-                            
-template <class charT, class traits, class ST, class SA, std::size_t N>
-regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> 
-   make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, 
-                             const basic_regex<charT, traits>& e, 
-                             const int (&submatch)[N], 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-                             
-template <class charT, class traits>
-regex_token_iterator<const charT*, charT, traits> 
-   make_regex_token_iterator(const charT* p, 
-                             const basic_regex<charT, traits>& e, 
-                             const std::vector<int>& submatch, 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-                             
-template <class charT, class traits, class ST, class SA>
-regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> 
-   make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, 
-                             const basic_regex<charT, traits>& e, 
-                             const std::vector<int>& submatch, 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-
-

Description

-
regex_token_iterator();
-

Effects: constructs an end of sequence iterator.

-
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
-                     int submatch = 0, match_flag_type m = match_default);
-

Preconditions: !re.empty().  Object re shall exist - for the lifetime of the iterator constructed from it.

-

Effects: constructs a regex_token_iterator that will enumerate one - string for each regular expression match of the expression re found - within the sequence [a,b), using match flags m.  The - string enumerated is the sub-expression submatch for each match - found; if submatch is -1, then enumerates all the text sequences that - did not match the expression re (that is to performs field splitting).

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
-                     const std::vector<int>& submatches, match_flag_type m = match_default);
-

Preconditions: submatches.size() && !re.empty().  - Object re shall exist for the lifetime of the iterator constructed from it.

-

Effects: constructs a regex_token_iterator that will enumerate submatches.size() - strings for each regular expression match of the expression re found - within the sequence [a,b), using match flags m.  For - each match found one string will be enumerated for each sub-expression - index contained within submatches vector; if submatches[0] - is -1, then the first string enumerated for each match will be all of the text - from end of the last match to the start of the current match, in addition there - will be one extra string enumerated when no more matches can be found: from the - end of the last match found, to the end of the underlying sequence.

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-
template <std::size_t N>
-regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
-                     const int (&submatches)[R], match_flag_type m = match_default);
-

Preconditions: !re.empty().  Object re shall exist - for the lifetime of the iterator constructed from it.

-

Effects: constructs a regex_token_iterator that will - enumerate R strings for each regular expression match of the - expression re found within the sequence [a,b), using match - flags m.  For each match found one string will be - enumerated for each sub-expression index contained within the submatches - array; if submatches[0] is -1, then the first string enumerated - for each match will be all of the text from end of the last match to the start - of the current match, in addition there will be one extra string enumerated - when no more matches can be found: from the end of the last match found, to the - end of the underlying sequence.

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

-
regex_token_iterator(const regex_token_iterator& that);
-

Effects: constructs a copy of that.

-

Postconditions: *this == that.

-
regex_token_iterator& operator=(const regex_token_iterator& that);
-

Effects: sets *this to be equal to that.

-

Postconditions: *this == that.

-
bool operator==(const regex_token_iterator&)const;
-

- Effects: returns true if *this is the same position as that.

-
bool operator!=(const regex_token_iterator&)const;
-

- Effects: returns !(*this == that).

-
const value_type& operator*()const;
-

- Effects: returns the current character sequence being enumerated.

-
const value_type* operator->()const;
-

- Effects: returns &(*this).

-
regex_token_iterator& operator++();
-

- Effects: Moves on to the next character sequence to be enumerated.

-

Throws: std::runtime_error if the complexity of - matching the expression against an N character string begins to exceed O(N2), - or if the program runs out of stack space while matching the expression (if - Boost.regex is configured in recursive mode), - or if the matcher exhausts it's permitted memory allocation (if Boost.regex is - configured in non-recursive mode).

- -

- Returns: *this.

regex_token_iterator& operator++(int);
-

Effects: constructs a copy result of *this, - then calls ++(*this).

-

Returns: result. -

template <class charT, class traits>
-regex_token_iterator<const charT*, charT, traits> 
-   make_regex_token_iterator(const charT* p, 
-                             const basic_regex<charT, traits>& e, 
-                             int submatch = 0, 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-                             
-template <class charT, class traits, class ST, class SA>
-regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> 
-   make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, 
-                             const basic_regex<charT, traits>& e, 
-                             int submatch = 0, 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-                             
-template <class charT, class traits, std::size_t N>
-regex_token_iterator<const charT*, charT, traits> 
-  make_regex_token_iterator(const charT* p, 
-                            const basic_regex<charT, traits>& e, 
-                            const int (&submatch)[N], 
-                            regex_constants::match_flag_type m = regex_constants::match_default);
-                            
-template <class charT, class traits, class ST, class SA, std::size_t N>
-regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> 
-   make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, 
-                             const basic_regex<charT, traits>& e, 
-                             const int (&submatch)[N], 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-                             
-template <class charT, class traits>
-regex_token_iterator<const charT*, charT, traits> 
-   make_regex_token_iterator(const charT* p, 
-                             const basic_regex<charT, traits>& e, 
-                             const std::vector<int>& submatch, 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-                             
-template <class charT, class traits, class ST, class SA>
-regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> 
-   make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, 
-                             const basic_regex<charT, traits>& e, 
-                             const std::vector<int>& submatch, 
-                             regex_constants::match_flag_type m = regex_constants::match_default);
-
-

Effects: returns a regex_token_iterator that enumerates - one sub_match for each value in submatch for - each occurrence of regular expression e in string p, matched - using match_flags m.

-

-

Examples

-

The following example - takes a string and splits it into a series of tokens:

-
-#include <iostream>
-#include <boost/regex.hpp>
-
-using namespace std;
-
-int main(int argc)
-{
-   string s;
-   do{
-      if(argc == 1)
-      {
-         cout << "Enter text to split (or \"quit\" to exit): ";
-         getline(cin, s);
-         if(s == "quit") break;
-      }
-      else
-         s = "This is a string of tokens";
-
-      boost::regex re("\\s+");
-      boost::sregex_token_iterator i(s.begin(), s.end(), re, -1);
-      boost::sregex_token_iterator j;
-
-      unsigned count = 0;
-      while(i != j)
-      {
-         cout << *i++ << endl;
-         count++;
-      }
-      cout << "There were " << count << " tokens found." << endl;
-
-   }while(argc == 1);
-   return 0;
-}
-
-
-

The following example - takes a html file and outputs a list of all the linked files:

-
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <boost/regex.hpp>
-
-boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
-               boost::regex::normal | boost::regbase::icase);
-
-void load_file(std::string& s, std::istream& is)
-{
-   s.erase();
-   //
-   // attempt to grow string buffer to match file size,
-   // this doesn't always work...
-   s.reserve(is.rdbuf()->in_avail());
-   char c;
-   while(is.get(c))
-   {
-      // use logarithmic growth stategy, in case
-      // in_avail (above) returned zero:
-      if(s.capacity() == s.size())
-         s.reserve(s.capacity() * 3);
-      s.append(1, c);
-   }
-}
-
-int main(int argc, char** argv)
-{
-   std::string s;
-   int i;
-   for(i = 1; i < argc; ++i)
-   {
-      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
-      s.erase();
-      std::ifstream is(argv[i]);
-      load_file(s, is);
-      boost::sregex_token_iterator i(s.begin(), s.end(), e, 1);
-      boost::sregex_token_iterator j;
-      while(i != j)
-      {
-         std::cout << *i++ << std::endl;
-      }
-   }
-   //
-   // alternative method:
-   // test the array-literal constructor, and split out the whole
-   // match as well as $1....
-   //
-   for(i = 1; i < argc; ++i)
-   {
-      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
-      s.erase();
-      std::ifstream is(argv[i]);
-      load_file(s, is);
-      const int subs[] = {1, 0,};
-      boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
-      boost::sregex_token_iterator j;
-      while(i != j)
-      {
-         std::cout << *i++ << std::endl;
-      }
-   }
-
-   return 0;
-}
-
-
-

Revised  - - 26 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/regex_traits.html b/doc/regex_traits.html deleted file mode 100644 index dc1708f7..00000000 --- a/doc/regex_traits.html +++ /dev/null @@ -1,87 +0,0 @@ - - - - Boost.Regex: class regex_traits - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

class regex_traits

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
Description
-
-

Synopsis

-
-namespace boost{
-
-template <class charT, class implementationT = sensible_default_choice>
-struct regex_traits : public implementationT
-{
-   regex_traits() : implementationT() {}
-};
-
-template <class charT>
-struct c_regex_traits;
-
-template <class charT>
-struct cpp_regex_traits;
-
-template <class charT>
-struct w32_regex_traits;
-
-} // namespace boost
-
-

Description

-

The class regex_traits is just a thin wrapper around an actual implemention - class, which may be one of:

- -

The default behavior can be altered by defining one of the following - configuration macros in boost/regex/user.hpp:

- -

All these traits classes fulfil the traits class - requirements.

-
-

-

Revised - - 24 June 2004  -

-

© Copyright John Maddock 1998- - - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/standards.html b/doc/standards.html deleted file mode 100644 index d5bc8c46..00000000 --- a/doc/standards.html +++ /dev/null @@ -1,237 +0,0 @@ - - - - Boost.Regex: Standards Conformance - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Standards Conformance

-
-

Boost.Regex Index

-
-

-
-

C++

-

Boost.regex is intended to conform to the - regular expression standardization proposal, which will appear in a - future C++ standard technical report (and hopefully in a future version of the - standard). 

-

ECMAScript / JavaScript

-

All of the ECMAScript regular expression syntax features are supported, except - that:

-

Negated class escapes (\S, \D and \W) are not permitted inside character class - definitions ( [...] ).

-

The escape sequence \u matches any upper case character (the same as - [[:upper:]]) rather than a Unicode escape sequence; use \x{DDDD} for - Unicode escape sequences.

-

Perl

-

Almost all Perl features are supported, except for:

-

- - - - - - - - - -
(?{code})Not implementable in a compiled strongly typed language.
(??{code})Not implementable in a compiled strongly typed language.
-

-

POSIX

-

All the POSIX basic and extended regular expression features are supported, - except that:

-

No character collating names are recognized except those specified in the POSIX - standard for the C locale, unless they are explicitly registered with the - traits class.

-

Character equivalence classes ( [[=a=]] etc) are probably buggy except on - Win32.  Implementing this feature requires knowledge of the format of the - string sort keys produced by the system; if you need this, and the default - implementation doesn't work on your platform, then you will need to supply a - custom traits class.

-

Unicode

-

The following comments refer to Unicode - Technical - Standard -#18: Unicode Regular Expressions version 9.

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#FeatureSupport
1.1Hex NotationYes: use \x{DDDD} to refer to code point UDDDD.
1.2Character PropertiesAll the names listed under the General - Category Property are supported.  Script names and Other Names are - not currently supported.
1.3Subtraction and Intersection -

Indirectly support by forward-lookahead: -

-

(?=[[:X:]])[[:Y:]]

-

Gives the intersection of character properties X and Y.

-

(?![[:X:]])[[:Y:]]

-

Gives everything in Y that is not in X (subtraction).

-
1.4Simple Word BoundariesConforming: non-spacing marks are included in the set of word characters.
1.5Caseless MatchingSupported, note that at this level, case transformations are 1:1, many to many - case folding operations are not supported (for example "ß" to "SS").
1.6Line BoundariesSupported, except that "." matches only one character of "\r\n". Other than - that word boundaries match correctly; including not matching in the middle of a - "\r\n" sequence.
1.7Code PointsSupported: provided you use the u32* algorithms, - then UTF-8, UTF-16 and UTF-32 are all treated as sequences of 32-bit code - points.
2.1Canonical EquivalenceNot supported: it is up to the user of the library to convert all text into - the same canonical form as the regular expression.
2.2Default Grapheme ClustersNot supported.
2.3 -

Default Word Boundaries

-
Not supported.
2.4 -

Default Loose Matches

-
Not Supported.
2.5Name PropertiesSupported: the expression "[[:name:]]" or \N{name} matches the named character - "name".
2.6Wildcard propertiesNot Supported.
3.1Tailored Punctuation.Not Supported.
3.2Tailored Grapheme ClustersNot Supported.
3.3Tailored Word Boundaries.Not Supported.
3.4Tailored Loose MatchesPartial support: [[=c=]] matches characters with the same primary equivalence - class as "c".
3.5Tailored RangesSupported: [a-b] matches any character that collates in the range a to b, when - the expression is constructed with the collate - flag set.
3.6Context MatchesNot Supported.
3.7Incremental MatchesSupported: pass the flag match_partial to - the regex algorithms.
3.8Unicode Set SharingNot Supported.
3.9Possible Match SetsNot supported, however this information is used internally to optimise the - matching of regular expressions, and return quickly if no match is possible.
3.10Folded MatchingPartial Support:  It is possible to achieve a similar effect by using a - custom regular expression traits class.
3.11Custom Submatch EvaluationNot Supported.
-

-
-

Revised  - - 28 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - - diff --git a/doc/sub_match.html b/doc/sub_match.html deleted file mode 100644 index 388d6773..00000000 --- a/doc/sub_match.html +++ /dev/null @@ -1,571 +0,0 @@ - - - - Boost.Regex: sub_match - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

sub_match

-
-

Boost.Regex Index

-
-

-
-

Synopsis

-

#include <boost/regex.hpp> -

-

Regular expressions are different from many simple pattern-matching algorithms - in that as well as finding an overall match they can also produce - sub-expression matches: each sub-expression being delimited in the pattern by a - pair of parenthesis (...). There has to be some method for reporting - sub-expression matches back to the user: this is achieved this by defining a - class match_results that acts as an - indexed collection of sub-expression matches, each sub-expression match being - contained in an object of type sub_match - . -

Objects of type sub_match may only obtained by subscripting an object - of type match_results - . -

Objects of type sub_match may be compared to objects of type std::basic_string, - or const charT* or const charT - . -

Objects of type sub_match may be added to objects of type std::basic_string, - or const charT* or const charT, to produce a new std::basic_string - - object. -

When the marked sub-expression denoted by an object of type sub_match<> - participated in a regular expression match then member matched evaluates - to true, and members first and second denote the - range of characters [first,second) which formed that match. - Otherwise matched is false, and members first and second - contained undefined values.

-

When the marked sub-expression denoted by an object of type sub_match<> - was repeated, then the sub_match object represents the match obtained by the - last repeat.  The complete set of all the captures obtained for all the - repeats, may be accessed via the captures() member function (Note: this has - serious performance implications, you have to explicitly enable this feature).

-

If an object of type sub_match<> represents sub-expression 0 - - that is to say the whole match - then member matched is always - true, unless a partial match was obtained as a result of the flag match_partial - being passed to a regular expression algorithm, in which case member matched - is false, and members first and second represent the - character range that formed the partial match.

-
namespace boost{
-
-template <class BidirectionalIterator>
-class sub_match;
-
-typedef sub_match<const char*> csub_match;
-typedef sub_match<const wchar_t*> wcsub_match;
-typedef sub_match<std::string::const_iterator> ssub_match;
-typedef sub_match<std::wstring::const_iterator> wssub_match;
-
-template <class BidirectionalIterator>
-class sub_match : public std::pair<BidirectionalIterator, BidirectionalIterator>
-{
-public:
-   typedef typename iterator_traits<BidirectionalIterator>::value_type       value_type;
-   typedef typename iterator_traits<BidirectionalIterator>::difference_type  difference_type;
-   typedef          BidirectionalIterator                                    iterator;
-
-   bool matched;
-
-   difference_type length()const;
-   operator basic_string<value_type>()const;
-   basic_string<value_type> str()const;
-
-   int compare(const sub_match& s)const;
-   int compare(const basic_string<value_type>& s)const;
-   int compare(const value_type* s)const;
-#ifdef BOOST_REGEX_MATCH_EXTRA
-   typedef implementation-private capture_sequence_type;
-   const capture_sequence_type& captures()const;
-#endif
-};
-//
-// comparisons to another sub_match:
-//
-template <class BidirectionalIterator>
-bool operator == (const sub_match<BidirectionalIterator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator>
-bool operator != (const sub_match<BidirectionalIterator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator>
-bool operator < (const sub_match<BidirectionalIterator>& lhs,
-                 const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator>
-bool operator <= (const sub_match<BidirectionalIterator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator>
-bool operator >= (const sub_match<BidirectionalIterator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator>
-bool operator > (const sub_match<BidirectionalIterator>& lhs,
-                 const sub_match<BidirectionalIterator>& rhs);
-
-
-//
-// comparisons to a basic_string:
-//
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator == (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator != (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator < (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                 const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator > (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                 const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator >= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator <= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator == (const sub_match<BidirectionalIterator>& lhs,
-                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator != (const sub_match<BidirectionalIterator>& lhs,
-                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator < (const sub_match<BidirectionalIterator>& lhs,
-                 const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator > (const sub_match<BidirectionalIterator>& lhs,
-                 const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator >= (const sub_match<BidirectionalIterator>& lhs,
-                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator <= (const sub_match<BidirectionalIterator>& lhs,
-                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-
-//
-// comparisons to a pointer to a character array:
-//
-template <class BidirectionalIterator> 
-bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                 const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                 const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-
-template <class BidirectionalIterator> 
-bool operator == (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-template <class BidirectionalIterator> 
-bool operator != (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-template <class BidirectionalIterator> 
-bool operator < (const sub_match<BidirectionalIterator>& lhs, 
-                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-template <class BidirectionalIterator> 
-bool operator > (const sub_match<BidirectionalIterator>& lhs, 
-                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-template <class BidirectionalIterator> 
-bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-template <class BidirectionalIterator> 
-bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-
-//
-// comparisons to a single character:
-//
-template <class BidirectionalIterator> 
-bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                 const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                 const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-template <class BidirectionalIterator> 
-bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-
-template <class BidirectionalIterator> 
-bool operator == (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-template <class BidirectionalIterator> 
-bool operator != (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-template <class BidirectionalIterator> 
-bool operator < (const sub_match<BidirectionalIterator>& lhs, 
-                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-template <class BidirectionalIterator> 
-bool operator > (const sub_match<BidirectionalIterator>& lhs, 
-                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-template <class BidirectionalIterator> 
-bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-template <class BidirectionalIterator> 
-bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-// 
-// addition operators: 
-//
-template <class BidirectionalIterator, class traits, class Allocator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator> 
-    operator + (const std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& s, 
-                const sub_match<BidirectionalIterator>& m); 
-template <class BidirectionalIterator, class traits, class Allocator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>
-    operator + (const sub_match<BidirectionalIterator>& m, 
-                const std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& s); 
-template <class BidirectionalIterator> std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (typename iterator_traits<BidirectionalIterator>::value_type const* s, 
-                const sub_match<BidirectionalIterator>& m); 
-template <class BidirectionalIterator> std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (const sub_match<BidirectionalIterator>& m, 
-                typename iterator_traits<BidirectionalIterator>::value_type const * s);
-template <class BidirectionalIterator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (typename iterator_traits<BidirectionalIterator>::value_type const& s, 
-                const sub_match<BidirectionalIterator>& m); 
-template <class BidirectionalIterator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (const sub_match<BidirectionalIterator>& m, 
-                typename iterator_traits<BidirectionalIterator>::value_type const& s); 
-template <class BidirectionalIterator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (const sub_match<BidirectionalIterator>& m1,
-                const sub_match<BidirectionalIterator>& m2);
-
-//
-// stream inserter:
-//
-template <class charT, class traits, class BidirectionalIterator>
-basic_ostream<charT, traits>&
-   operator << (basic_ostream<charT, traits>& os,
-                const sub_match<BidirectionalIterator>& m);
-
-} // namespace boost
-

Description

-

sub_match members

-
typedef typename std::iterator_traits<iterator>::value_type value_type;
-

The type pointed to by the iterators.

-
typedef typename std::iterator_traits<iterator>::difference_type difference_type;
-

A type that represents the difference between two iterators.

-
typedef iterator iterator_type;
-

The iterator type.

-
iterator first
-

An iterator denoting the position of the start of the match.

-
iterator second
-

An iterator denoting the position of the end of the match.

-
bool matched
-

A Boolean value denoting whether this sub-expression participated in the match.

-
static difference_type length();
-

Effects: returns the length of this matched sub-expression, or 0 if this - sub-expression was not matched: matched ? distance(first, second) : 0).

-
operator basic_string<value_type>()const;
-

Effects: converts *this into a string: returns (matched ? - basic_string<value_type>(first, second) : - basic_string<value_type>()).

-
basic_string<value_type> str()const;
-

Effects: returns a string representation of *this:  (matched ? - basic_string<value_type>(first, second) : - basic_string<value_type>()).

-
int compare(const sub_match& s)const;
-

Effects: performs a lexical comparison to s: returns str().compare(s.str()).

-
int compare(const basic_string<value_type>& s)const;
-

Effects: compares *this to the string s: returns str().compare(s).

-
int compare(const value_type* s)const;
-

Effects: compares *this to the null-terminated string s: returns - str().compare(s).

-
typedef implementation-private capture_sequence_type;
-

Defines an implementation-specific type that satisfies the requirements of - a standard library Sequence (21.1.1 including the optional Table 68 - operations), whose value_type is a sub_match<BidirectionalIterator>. This - type happens to be std::vector<sub_match<BidirectionalIterator> >, - but you shouldn't actually rely on that.

-
const capture_sequence_type& captures()const; 
-

Effects: returns a sequence containing all the captures - obtained for this sub-expression.

-

Preconditions: the library must be built and used with - BOOST_REGEX_MATCH_EXTRA defined, and you must pass the flag - match_extra to the regex matching functions (regex_match, - regex_search, regex_iterator - or regex_token_iterator) in order for - this member function to be defined and return useful information.

-

Rationale: Enabling this feature has several consequences: -

- -

sub_match non-member operators

-
Comparisons against self
-
template <class BidirectionalIterator>
-bool operator == (const sub_match<BidirectionalIterator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs.compare(rhs) == 0.

-
template <class BidirectionalIterator>
-bool operator != (const sub_match<BidirectionalIterator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs.compare(rhs) != 0.

-
template <class BidirectionalIterator>
-bool operator < (const sub_match<BidirectionalIterator>& lhs,
-                 const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs.compare(rhs) < 0.

-
template <class BidirectionalIterator>
-bool operator <= (const sub_match<BidirectionalIterator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs.compare(rhs) <= 0.

-
template <class BidirectionalIterator>
-bool operator >= (const sub_match<BidirectionalIterator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs.compare(rhs) >= 0.

-
template <class BidirectionalIterator>
-bool operator > (const sub_match<BidirectionalIterator>& lhs,
-                 const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs.compare(rhs) > 0.

-
Comparisons with std::basic_string
-

-template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator == (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits,
-                  Allocator>& lhs, const sub_match<BidirectionalIterator>& rhs);
-
-

Effects: returns lhs == rhs.str().

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator != (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs != rhs.str().

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator < (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                 const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs < rhs.str().

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator > (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                 const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs > rhs.str().

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator >= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs >= rhs.str().

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator <= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
-                  const sub_match<BidirectionalIterator>& rhs);
-

Effects: returns lhs <= rhs.str().

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator == (const sub_match<BidirectionalIterator>& lhs,
-                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-

Effects: returns lhs.str() == rhs.

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator != (const sub_match<BidirectionalIterator>& lhs,
-                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-

Effects: returns lhs.str() != rhs.

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator < (const sub_match<BidirectionalIterator>& lhs,
-                 const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-

Effects: returns lhs.str() < rhs.

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator > (const sub_match<BidirectionalIterator>& lhs,
-                 const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-

Effects: returns lhs.str() > rhs.

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator >= (const sub_match<BidirectionalIterator>& lhs,
-                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-

Effects: returns lhs.str() >= rhs.

-
template <class BidirectionalIterator, class traits, class Allocator> 
-bool operator <= (const sub_match<BidirectionalIterator>& lhs,
-                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
-

Effects: returns lhs.str() <= rhs.

-
Comparisons with null-terminated strings
-
template <class BidirectionalIterator> 
-bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs == rhs.str().

-
template <class BidirectionalIterator> 
-bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs != rhs.str().

-
template <class BidirectionalIterator> 
-bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                 const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs < rhs.str().

-
template <class BidirectionalIterator> 
-bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                 const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs > rhs.str().

-
template <class BidirectionalIterator> 
-bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs >= rhs.str().

-
template <class BidirectionalIterator> 
-bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs <= rhs.str().

-
template <class BidirectionalIterator> 
-bool operator == (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-

Effects: returns lhs.str() == rhs.

-
template <class BidirectionalIterator> 
-bool operator != (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-

Effects: returns lhs.str() != rhs.

-
template <class BidirectionalIterator> 
-bool operator < (const sub_match<BidirectionalIterator>& lhs, 
-                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-

Effects: returns lhs.str() < rhs.

-
template <class BidirectionalIterator> 
-bool operator > (const sub_match<BidirectionalIterator>& lhs, 
-                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-

Effects: returns lhs.str() > rhs.

-
template <class BidirectionalIterator> 
-bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-

Effects: returns lhs.str() >= rhs.

-
template <class BidirectionalIterator> 
-bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
-

Effects: returns lhs.str() <= rhs.

-
Comparisons with a single character
-
template <class BidirectionalIterator> 
-bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs == rhs.str().

-
template <class BidirectionalIterator> 
-bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs != rhs.str().

-
template <class BidirectionalIterator> 
-bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                 const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs < rhs.str().

-
template <class BidirectionalIterator> 
-bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                 const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs > rhs.str().

-
template <class BidirectionalIterator> 
-bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs >= rhs.str().

-
template <class BidirectionalIterator> 
-bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
-                  const sub_match<BidirectionalIterator>& rhs); 
-

Effects: returns lhs <= rhs.str().

-
template <class BidirectionalIterator> 
-bool operator == (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-

Effects: returns lhs.str() == rhs.

-
template <class BidirectionalIterator> 
-bool operator != (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-

Effects: returns lhs.str() != rhs.

-
template <class BidirectionalIterator> 
-bool operator < (const sub_match<BidirectionalIterator>& lhs, 
-                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-

Effects: returns lhs.str() < rhs.

-
template <class BidirectionalIterator> 
-bool operator > (const sub_match<BidirectionalIterator>& lhs, 
-                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-

Effects: returns lhs.str() > rhs.

-
template <class BidirectionalIterator> 
-bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-

Effects: returns lhs.str() >= rhs.

-
template <class BidirectionalIterator> 
-bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
-                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
-

Effects: returns lhs.str() <= rhs.

-
Addition operators
-

The addition operators for sub_match allow you to add a sub_match to any type - to which you can add a std::string and obtain a new string as the result.

-
template <class BidirectionalIterator, class traits, class Allocator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator> 
-    operator + (const std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& s, 
-                const sub_match<BidirectionalIterator>& m); 
-

Effects: returns s + m.str().

-
template <class BidirectionalIterator, class traits, class Allocator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>
-    operator + (const sub_match<BidirectionalIterator>& m, 
-                const std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& s); 
-

Effects: returns m.str() + s.

-
template <class BidirectionalIterator> std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (typename iterator_traits<BidirectionalIterator>::value_type const* s, 
-                const sub_match<BidirectionalIterator>& m); 
-

Effects: returns s + m.str().

-
template <class BidirectionalIterator> std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (const sub_match<BidirectionalIterator>& m, 
-                typename iterator_traits<BidirectionalIterator>::value_type const * s);
-

Effects: returns m.str() + s.

-
template <class BidirectionalIterator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (typename iterator_traits<BidirectionalIterator>::value_type const& s, 
-                const sub_match<BidirectionalIterator>& m); 
-

Effects: returns s + m.str().

-
template <class BidirectionalIterator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (const sub_match<BidirectionalIterator>& m, 
-                typename iterator_traits<BidirectionalIterator>::value_type const& s); 
-

Effects: returns m.str() + s.

-
template <class BidirectionalIterator> 
-std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type> 
-    operator + (const sub_match<BidirectionalIterator>& m1,
-                const sub_match<BidirectionalIterator>& m2);
-

Effects: returns m1.str() + m2.str().

-
Stream inserter
-
template <class charT, class traits, class BidirectionalIterator>
-basic_ostream<charT, traits>&
-   operator << (basic_ostream<charT, traits>& os
-                const sub_match<BidirectionalIterator>& m);
-

- Effects: returns (os << m.str()). -


-

Revised - - 22 Dec 2004 -

-

© Copyright John Maddock 1998- - - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/syntax.html b/doc/syntax.html deleted file mode 100644 index e2b62f87..00000000 --- a/doc/syntax.html +++ /dev/null @@ -1,55 +0,0 @@ - - - - Boost.Regex: Regular Expression Syntax - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Regular Expression Syntax

-
-

Boost.Regex Index

-
-

-
-

This section covers the regular expression syntax used by this library, this is - a programmers guide, the actual syntax presented to your program's users will - depend upon the flags used during - expression compilation. -

-

There are three main syntax options available, depending upon how - you construct the regular expression object:

- -

You can also construct a regular expression that treats every character as a - literal, but that's not really a "syntax"!

-
-

Revised  - - 10 Sept 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/syntax_basic.html b/doc/syntax_basic.html deleted file mode 100644 index 14096c38..00000000 --- a/doc/syntax_basic.html +++ /dev/null @@ -1,238 +0,0 @@ - - - - Boost.Regex: POSIX-Basic Regular Expression Syntax - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

POSIX Basic Regular Expression Syntax

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
POSIX Basic Syntax
- Variations -
-
-
Grep
Emacs
-
-
Options
References
-
-

Synopsis

-

The POSIX-Basic regular expression syntax is used by the Unix utility sed, - and variations are used by grep and emacs.  You can - construct POSIX basic regular expressions in Boost.Regex by passing the flag basic - to the regex constructor, for example:

-
// e1 is a case sensitive POSIX-Basic expression:
-boost::regex e1(my_expression, boost::regex::basic);
-// e2 a case insensitive POSIX-Basic expression:
-boost::regex e2(my_expression, boost::regex::basic|boost::regex::icase);
-

POSIX Basic Syntax

-

In POSIX-Basic regular expressions, all characters are match themselves except - for the following special characters:

-
.[\*^$
-

Wildcard:

-

The single character '.' when used outside of a character set will match any - single character except:

-

The NULL character when the flag match_no_dot_null is passed to the - matching algorithms.

-

The newline character when the flag match_not_dot_newline is passed to - the matching algorithms.

-

Anchors:

-

A '^' character shall match the start of a line when used as the first - character of an expression, or the first character of a sub-expression.

-

A '$' character shall match the end of a line when used as the last character - of an expression, or the last character of a sub-expression.

-

Marked sub-expressions:

-

A section beginning \( and ending \) acts as a marked sub-expression.  - Whatever matched the sub-expression is split out in a separate field by the - matching algorithms.  Marked sub-expressions can also repeated, or - referred-to by a back-reference.

-

Repeats:

-

Any atom (a single character, a marked sub-expression, or a character class) - can be repeated with the * operator.

-

For example a* will match any number of letter a's repeated zero or more times - (an atom repeated zero times matches an empty string), so the expression a*b - will match any of the following:

-
b
-ab
-aaaaaaaab
-

An atom can also be repeated with a bounded repeat:

-

a\{n\}  Matches 'a' repeated exactly n times.

-

a\{n,\}  Matches 'a' repeated n or more times.

-

a\{n, m\}  Matches 'a' repeated between n and m times - inclusive.

-

For example:

-
^a\{2,3\}$
-

Will match either of:

-
aa
-aaa
-

But neither of:

-
a
-aaaa
-

It is an error to use a repeat operator, if the preceding construct can not be - repeated, for example:

-
a\(*\)
-

Will raise an error, as there is nothing for the * operator to be applied to.

-

Back references:

-

An escape character followed by a digit n, where n is in the - range 1-9, matches the same string that was matched by sub-expression n.  - For example the expression:

-
^\(a*\).*\1$
-

Will match the string:

-
aaabbaaa
-

But not the string:

-
aaabba
-

Character sets:

-

A character set is a bracket-expression starting with [ and ending with ], it - defines a set of characters, and matches any single character that is a member - of that set.

-

A bracket expression may contain any combination of the following:

-
-
Single characters:
-

For example [abc], will match any of the characters 'a', 'b', or 'c'.

-
Character ranges:
-

For example [a-c] will match any single character in the range 'a' to - 'c'.  By default, for POSIX-Basic regular expressions, a character x - is within the range y to z, if it collates within that - range; this results in locale specific behavior.  This behavior can - be turned off by unsetting the collate - option flag - in which case whether a character appears within a range is - determined by comparing the code points of the characters only

-
Negation:
-

If the bracket-expression begins with the ^ character, then it matches the - complement of the characters it contains, for example [^a-c] matches any - character that is not in the range a-c.

-
Character classes:
-

An expression of the form [[:name:]] matches the named character class "name", - for example [[:lower:]] matches any lower case character.  See - character class names.

-
Collating Elements:
-

An expression of the form [[.col.] matches the collating element col.  - A collating element is any single character, or any sequence of characters that - collates as a single unit.  Collating elements may also be used as the end - point of a range, for example: [[.ae.]-c] matches the character sequence "ae", - plus any single character in the rangle "ae"-c, assuming that "ae" is treated - as a single collating element in the current locale.

-

Collating elements may be used in place of escapes (which are not normally - allowed inside character sets), for example [[.^.]abc] would match either one - of the characters 'abc^'.

-

As an extension, a collating element may also be specified via its - symbolic name, for example:

-

[[.NUL.]]

-

matches a NUL character.

-
Equivalence classes:
-

- An expression of theform[[=col=]], matches any character or collating element - whose primary sort key is the same as that for collating element col, - as with collating elements the name col may be a - symbolic name.  A primary sort key is one that ignores case, - accentation, or locale-specific tailorings; so for example [[=a=]] matches any - of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  - Unfortunately implementation of this is reliant on the platform's collation and - localisation support; this feature can not be relied upon to work portably - across all platforms, or even all locales on one platform.

-
-
Combinations:
-

All of the above can be combined in one character set declaration, for example: - [[:digit:]a-c[.NUL.]].

-

Escapes

-

With the exception of the escape sequences \{, \}, \(, and \), which are - documented above, an escape followed by any character matches that - character.  This can be used to make the special characters .[\*^$, - "ordinary".  Note that the escape character loses its special meaning - inside a character set, so [\^] will match either a literal '\' or a '^'.

-

What Gets Matched

-

When there is more that one way to match a regular expression, the "best" - possible match is obtained using the leftmost-longest - rule.

-

Variations

-

Grep

-

When an expression is compiled with the flag grep set, then the - expression is treated as a newline separated list of POSIX-Basic - expressions, a match is found if any of the expressions in the list match, for - example:

-
boost::regex e("abc\ndef", boost::regex::grep);
-

will match either of the POSIX-Basic expressions "abc" or "def".

-

As its name suggests, this behavior is consistent with the Unix utility grep.

-

emacs

-

In addition to the POSIX-Basic features the following - characters are also special:

-
-

+ repeats the preceding atom one or more times.

-

? repeats the preceding atom zero or one times.

-

*? A non-greedy version of *.

-

+? A non-greedy version of +.

-

?? A non-greedy version of ?.

-
-

And the following escape sequences are also recognised:

-
-

\| specifies an alternative.

-

\(?:  ...  \) is a non-marking grouping construct - allows you to - lexically group something without spitting out an extra sub-expression.

-

\w  matches any word character.

-

\W matches any non-word character.

-

\sx matches any character in the syntax group x, the following emacs - groupings are supported: 's', ' ', '_', 'w', '.', ')', '(', '"', '\'', '>' - and '<'.  Refer to the emacs docs for details.

-

\Sx matches any character not in the syntax grouping x.

-

\c and \C are not supported.

-

\` matches zero characters only at the start of a buffer (or string being - matched).

-

\' matches zero characters only at the end of a buffer (or string being - matched).

-

\b matches zero characters at a word boundary.

-

\B matches zero characters, not at a word boundary.

-

\< matches zero characters only at the start of a word.

-

\> matches zero characters only at the end of a word.

-
-

Finally, you should note that emacs style regular expressions are - matched according to the Perl "depth first search" - rules.  Emacs expressions are matched this way because they contain - Perl-like extensions, that do not interact well with the - POSIX-style leftmost-longest rule.

-

Options

-

There are a variety of flags that - may be combined with the basic and grep options when - constructing the regular expression, in particular note that the - newline_alt, no_char_classes, no-intervals, bk_plus_qm and bk_plus_vbar options - all alter the syntax, while the collate - and icase options modify how the case and locale sensitivity are to be - applied.

-

References

-

IEEE - Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions - and Headers, Section 9, Regular Expressions (FWD.1).

-

IEEE - Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and - Utilities, Section 4, Utilities, grep (FWD.1).

-

Emacs Version 21.3.

-

-


-

-

Revised  - - 21 Aug 2004  -

-

© Copyright John Maddock 2004

- -

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt).

-
- - diff --git a/doc/syntax_extended.html b/doc/syntax_extended.html deleted file mode 100644 index d9253166..00000000 --- a/doc/syntax_extended.html +++ /dev/null @@ -1,520 +0,0 @@ - - - - Boost.Regex: POSIX-Extended Regular Expression Syntax - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

POSIX-Extended Regular Expression Syntax

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
POSIX Extended Syntax -
Variations -
-
-
egrep
awk 
-
-
Options
References
-
-

Synopsis

-

The POSIX-Extended regular expression syntax is supported by the POSIX C - regular expression API's, and variations are used by the utilities egrep - and awk. You can construct POSIX extended regular expressions in - Boost.Regex by passing the flag extended to the regex constructor, for - example:

-
// e1 is a case sensitive POSIX-Extended expression:
-boost::regex e1(my_expression, boost::regex::extended);
-// e2 a case insensitive POSIX-Extended expression:
-boost::regex e2(my_expression, boost::regex::extended|boost::regex::icase);
-

POSIX Extended Syntax

-

In POSIX-Extended regular expressions, all characters match themselves except - for the following special characters:

-
.[{()\*+?|^$
-

Wildcard:

-

The single character '.' when used outside of a character set will match any - single character except:

-

The NULL character when the flag match_no_dot_null is passed to the - matching algorithms.

-

The newline character when the flag match_not_dot_newline is passed to - the matching algorithms.

-

Anchors:

-

A '^' character shall match the start of a line when used as the first - character of an expression, or the first character of a sub-expression.

-

A '$' character shall match the end of a line when used as the last character - of an expression, or the last character of a sub-expression.

-

Marked sub-expressions:

-

A section beginning ( and ending ) acts as a marked sub-expression.  - Whatever matched the sub-expression is split out in a separate field by the - matching algorithms.  Marked sub-expressions can also repeated, or - referred to by a back-reference.

-

Repeats:

-

Any atom (a single character, a marked sub-expression, or a character class) - can be repeated with the *, +, ?, and {} operators.

-

The * operator will match the preceding atom zero or more times, for example - the expression a*b will match any of the following:

-
b
-ab
-aaaaaaaab
-

The + operator will match the preceding atom one or more times, for example the - expression a+b will match any of the following:

-
ab
-aaaaaaaab
-

But will not match:

-
b
-

The ? operator will match the preceding atom zero or one times, for - example the expression ca?b will match any of the following:

-
cb
-cab
-

But will not match:

-
caab
-

An atom can also be repeated with a bounded repeat:

-

a{n}  Matches 'a' repeated exactly n times.

-

a{n,}  Matches 'a' repeated n or more times.

-

a{n, m}  Matches 'a' repeated between n and m times - inclusive.

-

For example:

-
^a{2,3}$
-

Will match either of:

-
aa
-aaa
-

But neither of:

-
a
-aaaa
-

It is an error to use a repeat operator, if the preceding construct can not be - repeated, for example:

-
a(*)
-

Will raise an error, as there is nothing for the * operator to be applied to.

-

Back references:

-

An escape character followed by a digit n, where n is in the - range 1-9, matches the same string that was matched by sub-expression n.  - For example the expression:

-
^(a*).*\1$
-

Will match the string:

-
aaabbaaa
-

But not the string:

-
aaabba
-

Caution: the POSIX standard does not support back-references - for "extended" regular expressions, this is a compatible extension to that - standard.

-

Alternation

-

The | operator will match either of its arguments, so for example: abc|def will - match either "abc" or "def".  -

-

Parenthesis can be used to group alternations, for example: ab(d|ef) will match - either of "abd" or "abef".

-

Character sets:

-

A character set is a bracket-expression starting with [ and ending with ], it - defines a set of characters, and matches any single character that is a member - of that set.

-

A bracket expression may contain any combination of the following:

-
-
Single characters:
-

For example [abc], will match any of the characters 'a', 'b', or 'c'.

-
Character ranges:
-

For example [a-c] will match any single character in the range 'a' to - 'c'.  By default, for POSIX-Extended regular expressions, a character x - is within the range y to z, if it collates within that - range; this results in locale specific behavior .  - This behavior can be turned off by unsetting the - collate option flag - in which case whether a character appears - within a range is determined by comparing the code points of the characters - only.

-
Negation:
-

If the bracket-expression begins with the ^ character, then it matches the - complement of the characters it contains, for example [^a-c] matches any - character that is not in the range a-c.

-
Character classes:
-

An expression of the form [[:name:]] matches the named character class "name", - for example [[:lower:]] matches any lower case character.  See - character class names.

-
Collating Elements:
-

An expression of the form [[.col.] matches the collating element col.  - A collating element is any single character, or any sequence of characters that - collates as a single unit.  Collating elements may also be used as the end - point of a range, for example: [[.ae.]-c] matches the character sequence "ae", - plus any single character in the range "ae"-c, assuming that "ae" is treated as - a single collating element in the current locale.

-

Collating elements may be used in place of escapes (which are not normally - allowed inside character sets), for example [[.^.]abc] would match either one - of the characters 'abc^'.

-

As an extension, a collating element may also be specified via its - symbolic name, for example:

-

[[.NUL.]]

-

matches a NUL character.

-
Equivalence classes:
-

- An expression oftheform[[=col=]], matches any character or collating element - whose primary sort key is the same as that for collating element col, - as with colating elements the name col may be a - symbolic name.  A primary sort key is one that ignores case, - accentation, or locale-specific tailorings; so for example [[=a=]] matches any - of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  - Unfortunately implementation of this is reliant on the platform's collation and - localisation support; this feature can not be relied upon to work portably - across all platforms, or even all locales on one platform.

-
-
Combinations:
-

All of the above can be combined in one character set declaration, for example: - [[:digit:]a-c[.NUL.]].

-

Escapes

-

The POSIX standard defines no escape sequences for POSIX-Extended regular - expressions, except that:

- -

However, that's rather restrictive, so the following standard-compatible - extensions are also supported by Boost.Regex:

-
-
Escapes matching a specific character
-

The following escape sequences are all synonyms for single characters:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
EscapeCharacter
\a'\a'
\e0x1B
\f\f
\n\n
\r\r
\t\t
\v\v
\b\b (but only inside a character class declaration).
\cXAn ASCII escape sequence - the character whose code point is X % 32
\xddA hexadecimal escape sequence - matches the single character whose code point - is 0xdd.
\x{dddd}A hexadecimal escape sequence - matches the single character whose code point - is 0xdddd.
\0dddAn octal escape sequence - matches the single character whose code point is - 0ddd.
\N{Name}Matches the single character which has the symbolic - name name.  For example \N{newline} matches the single - character \n.
-

-
"Single character" character classes:
-

Any escaped character x, if x is the name of a character - class shall match any character that is a member of that class, and any escaped - character X, if x is the name of a character class, shall - match any character not in that class.

-

The following are supported by default:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Escape sequenceEquivalent to
\d[[:digit:]]
\l[[:lower:]]
\s[[:space:]]
\u[[:upper:]]
\w[[:word:]]
\D[^[:digit:]]
\L[^[:lower:]]
\S[^[:space:]]
\U[^[:upper:]]
\W[^[:word:]]
-

-
-
Character Properties
- -

The character property names in the following table are all - equivalent to the names used in character - classes.

-
- - - - - - - - - - - - - - - - - - - - - - - - - - -
FormDescriptionEquivalent character set form
\pXMatches any character that has the property X.[[:X:]]
\p{Name}Matches any character that has the property Name.[[:Name:]]
\PXMatches any character that does not have the property X.[^[:X:]]
\P{Name}Matches any character that does not have the property Name.[^[:Name:]]
-
-
Word Boundaries
-

The following escape sequences match the boundaries of words:

-

- - - - - - - - - - - - - - - - - -
\<Matches the start of a word.
\>Matches the end of a word.
\bMatches a word boundary (the start or end of a word).
\BMatches only when not at a word boundary.
-

-
Buffer boundaries
-

The following match only at buffer boundaries: a "buffer" in this context is - the whole of the input text that is being matched against (note that ^ and - $ may match embedded newlines within the text).

-

- - - - - - - - - - - - - - - - - - - - - -
\`Matches at the start of a buffer only.
\'Matches at the end of a buffer only.
\AMatches at the start of a buffer only (the same as \`).
\zMatches at the end of a buffer only (the same as \').
\ZMatches an optional sequence of newlines at the end of a buffer: equivalent to - the regular expression \n*\z
-

-
Continuation Escape
-

The sequence \G matches only at the end of the last match found, or at the - start of the text being matched if no previous match was found.  This - escape useful if you're iterating over the matches contained within a text, and - you want each subsequence match to start where the last one ended.

-
Quoting escape
-

The escape sequence \Q begins a "quoted sequence": all the subsequent - characters are treated as literals, until either the end of the regular - expression or \E is found.  For example the expression: \Q\*+\Ea+ would - match either of:

-
\*+a
\*+aaa
-
Unicode escapes
-

- - - - - - - - - -
\CMatches a single code point: in Boost regex this has exactly the same effect - as a "." operator.
\XMatches a combining character sequence: that is any non-combining character - followed by a sequence of zero or more combining characters.
-

-
Any other escape
-

Any other escape sequence matches the character that is escaped, for example \@ - matches a literal '@'.

-
-

Operator precedence

-

 The order of precedence for of operators is as shown in the following - table:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Collation-related bracket symbols[==] [::] [..]
Escaped characters - \
Character set (bracket expression) - []
Grouping()
Single-character-ERE duplication - * + ? {m,n}
Concatenation
Anchoring^$
Alternation|
-

-
-

What Gets Matched

-

When there is more that one way to match a regular expression, the "best" - possible match is obtained using the leftmost-longest - rule.

-

Variations

-

Egrep

-

When an expression is compiled with the flag egrep set, then the - expression is treated as a newline separated list of POSIX-Extended - expressions, a match is found if any of the expressions in the list match, for - example:

-
boost::regex e("abc\ndef", boost::regex::egrep);
-

will match either of the POSIX-Basic expressions "abc" or "def".

-

As its name suggests, this behavior is consistent with the Unix utility egrep, - and with grep when used with the -E option.

-

awk

-

In addition to the POSIX-Extended features the - escape character is special inside a character class declaration. 

-

In addition, some escape sequences that are not defined as part of - POSIX-Extended specification are required to be supported - however Boost.Regex - supports these by default anyway.

-

Options

-

There are a variety of flags that - may be combined with the extended and egrep options when - constructing the regular expression, in particular note that the - newline_alt option alters the syntax, while the - collate, nosubs and icase options modify how the case and locale - sensitivity are to be applied.

-

References

-

IEEE - Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions - and Headers, Section 9, Regular Expressions.

-

IEEE - Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and - Utilities, Section 4, Utilities, egrep.

-

IEEE - Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and - Utilities, Section 4, Utilities, awk.

-
-

-

Revised  - - 21 Aug 2004  -

-

© Copyright John Maddock 2004

- -

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt).

-
- - diff --git a/doc/syntax_leftmost_longest.html b/doc/syntax_leftmost_longest.html deleted file mode 100644 index 6330fc5f..00000000 --- a/doc/syntax_leftmost_longest.html +++ /dev/null @@ -1,65 +0,0 @@ - - - - Boost.Regex: Index - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

The "Leftmost Longest" Rule

-
-

Boost.Regex Index

-
-

-
-

-

Often there is more than one way of matching a regular expression at a - particular location, for POSIX basic and extended regular expressions, the - "best" match is determined as follows:

-
    -
  1. - Find the leftmost match, if there is only one match possible at this location - then return it.
  2. -
  3. - Find the longest of the possible matches, along with any ties.  If there - is only one such possible match then return it.
  4. -
  5. - If there are no marked sub-expressions, then all the remaining alternatives are - indistinguishable; return the first of these found.
  6. -
  7. - Find the match which has matched the first sub-expression in the leftmost - position, along with any ties.  If there is only on such match possible - then return it.
  8. -
  9. - Find the match which has the longest match for the first sub-expression, along - with any ties.  If there is only one such match then return it.
  10. -
  11. - Repeat steps 3 and 4 for each additional marked sub-expression.
  12. -
  13. - If there is still more than one possible match remaining, then they are - indistinguishable; return the first one found.
-

-


-

-

Revised - - 16 Dec 2004

-

© Copyright John Maddock 1998- - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/syntax_option_type.html b/doc/syntax_option_type.html deleted file mode 100644 index fbd5cdbe..00000000 --- a/doc/syntax_option_type.html +++ /dev/null @@ -1,543 +0,0 @@ - - - - Boost.Regex: syntax_option_type - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

syntax_option_type

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
Description -
-
-
Options for Perl Regular Expressions
Options - for POSIX Extended Regular Expressions
Options for POSIX - Basic Regular Expressions
Options for String Literals
-
-
-
-

Synopsis

-

Type syntax_option type is an implementation specific bitmask type that - controls how a regular expression string is to be interpreted.  For - convenience note that all the constants listed here, are also duplicated within - the scope of class template basic_regex.

-
namespace std{ namespace regex_constants{
-
-typedef implementation-specific-bitmask-type syntax_option_type;
-// these flags are standardized: -static const syntax_option_type normal; -static const syntax_option_type ECMAScript = normal; -static const syntax_option_type JavaScript = normal; -static const syntax_option_type JScript = normal; -static const syntax_option_type perl = normal;
static const syntax_option_type basic; -static const syntax_option_type sed = basic; -static const syntax_option_type extended; -static const syntax_option_type awk; -static const syntax_option_type grep; -static const syntax_option_type egrep; -static const syntax_option_type icase; -static const syntax_option_type nosubs; -static const syntax_option_type optimize; -static const syntax_option_type collate; -// other boost.regex specific options are listed below
-} // namespace regex_constants -} // namespace std
-

Description

-

The type syntax_option_type is an implementation specific bitmask - type (17.3.2.1.2). Setting its elements has the effects listed in the table - below, a valid value of type syntax_option_type will always have - exactly one of the elements normal, basic, extended, awk, grep, egrep, sed, - literal or perl set.

-

Note that for convenience all the constants listed here are duplicated within - the scope of class template basic_regex, so you can use any of:

-
boost::regex_constants::constant_name
-

or

-
boost::regex::constant_name
-

or

-
boost::wregex::constant_name
-

in an interchangeable manner.

-

Options for Perl Regular Expressions:

-

One of the following must always be set for perl regular expressions:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElementStandardizedEffect when set
-

ECMAScript

-
Yes -

Specifies that the grammar recognized by the regular expression engine uses its - normal semantics: that is the same as that given in the ECMA-262, ECMAScript - Language Specification, Chapter 15 part 10, RegExp (Regular Expression) Objects - (FWD.1).

-

boost.regex also recognizes all of the perl-compatible (?...) extensions in - this mode.

-
perlNoAs above.
normalNoAs above.
JavaScriptNoAs above.
JScriptNoAs above.
-

-

The following options may also be set when using perl-style regular - expressions:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElementStandardizedEffect when set
icaseYes -

Specifies that matching of regular expressions against a character container - sequence shall be performed without regard to case.

-
nosubsYes -

Specifies that when a regular expression is matched against a character - container sequence, then no sub-expression matches are to be stored in the - supplied match_results structure.

-
optimizeYes -

Specifies that the regular expression engine should pay more attention to the - speed with which regular expressions are matched, and less to the speed with - which regular expression objects are constructed. Otherwise it has no - detectable effect on the program output.  This currently has no effect for - Boost.Regex.

-
collateYes -

Specifies that character ranges of the form "[a-b]" should be locale sensitive.

-
newline_altNoSpecifies that the \n character has the same effect as the alternation - operator |.  Allows newline separated lists to be used as a list of - alternatives.
no_exceptNoPrevents basic_regex from throwing an exception when an invalid expression is - encountered.
no_mod_mNoNormally Boost.Regex behaves as if the Perl m-modifier is on: so the - assertions ^ and $ match after and before embedded newlines respectively, - setting this flags is equivalent to prefixing the expression with (?-m).
no_mod_sNoNormally whether Boost.Regex will match "." against a newline character is - determined by the match flag match_dot_not_newline.  - Specifying this flag is equivalent to prefixing the expression with (?-s) and - therefore causes "." not to match a newline character regardless of whether - match_not_dot_newline is set in the match flags.
mod_sNoNormally whether Boost.Regex will match "." against a newline character is - determined by the match flag match_dot_not_newline.  - Specifying this flag is equivalent to prefixing the expression with (?s) and - therefore causes "." to match a newline character regardless of whether - match_not_dot_newline is set in the match flags.
mod_xNoTurns on the perl x-modifier: causes unescaped whitespace in the expression to - be ignored.
-

-

Options for POSIX Extended Regular Expressions:

-

Exactly one of the following must always be set for POSIX extended regular - expressions:

-

- - - - - - - - - - - - - - - - - - - - - -
ElementStandardizedEffect when set
extendedYes -

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX extended regular expressions in IEEE Std - 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions and - Headers, Section 9, Regular Expressions (FWD.1).  -

-

In addition some perl-style escape sequences are supported (The POSIX standard - specifies that only "special" characters may be escaped, all other escape - sequences result in undefined behavior).

-
egrepYes -

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX utility grep when given the -E option in IEEE Std - 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and - Utilities, Section 4, Utilities, grep (FWD.1).

-

That is to say, the same as POSIX extended syntax, but with the newline - character acting as an alternation character in addition to "|".

-
awkYes -

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX utility awk in IEEE Std 1003.1-2001, Portable - Operating System Interface (POSIX ), Shells and Utilities, Section 4, awk - (FWD.1).

-

That is to say: the same as POSIX extended syntax, but with escape sequences in - character classes permitted.

-

In addition some perl-style escape sequences are supported (actually the awk - syntax only requires \a \b \t \v \f \n and \r to be recognised, all other - Perl-style escape sequences invoke undefined behavior according to the POSIX - standard, but are in fact recognised by Boost.Regex).

-
-

-

The following options may also be set when using POSIX extended regular - expressions:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -   - - - -
ElementStandardizedEffect when set
icaseYes -

Specifies that matching of regular expressions against a character container - sequence shall be performed without regard to case.

-
nosubsYes -

Specifies that when a regular expression is matched against a character - container sequence, then no sub-expression matches are to be stored in the - supplied match_results structure.

-
optimizeYes -

Specifies that the regular expression engine should pay more attention to the - speed with which regular expressions are matched, and less to the speed with - which regular expression objects are constructed. Otherwise it has no - detectable effect on the program output.  This currently has no effect for - boost.regex.

-
collateYes -

Specifies that character ranges of the form "[a-b]" should be locale - sensitive.  This bit is on by default for - POSIX-Extended regular expressions, but can be unset to force ranges to be - compared by code point only.

-
newline_altNoSpecifies that the \n character has the same effect as the alternation - operator |.  Allows newline separated lists to be used as a list of - alternatives.
no_escape_in_listsNoWhen set this makes the escape character ordinary inside lists, so that [\b] - would match either '\' or 'b'. This bit is one by default for - POSIX-Extended regular expressions, but can be unset to force escapes to be - recognised inside lists.
no_bk_refsNoWhen set then backreferences are disabled.  This bit is - on by default for POSIX-Extended regular expressions, but can be - unset to support for backreferences on.
no_except - NoPrevents basic_regex from throwing an exception when an invalid expression is - encountered.
-

-

Options for POSIX Basic Regular Expressions:

-

Exactly one of the following must always be set for POSIX basic regular - expressions:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - -
ElementStandardizedEffect When Set
basicYes -

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX basic regular - expressions in IEEE Std 1003.1-2001, Portable Operating System Interface - (POSIX ), Base Definitions and Headers, Section 9, Regular Expressions (FWD.1). -

-
sedNoAs Above.
grepYes -

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX utility grep in - IEEE Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and - Utilities, Section 4, Utilities, grep (FWD.1).

-

That is to say, the same as POSIX basic syntax, but with the newline character - acting as an alternation character; the expression is treated as a newline - separated list of alternatives.

-
emacsNoSpecifies that the grammar recognised is the superset of the POSIX-Basic - syntax used by the emacs program.
-

-

The following options may also be set when using POSIX basic regular - expressions:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElementStandardizedEffect when set
icaseYes -

Specifies that matching of regular expressions against a character container - sequence shall be performed without regard to case.

-
nosubsYes -

Specifies that when a regular expression is matched against a character - container sequence, then no sub-expression matches are to be stored in the - supplied match_results structure.

-
optimizeYes -

Specifies that the regular expression engine should pay more attention to the - speed with which regular expressions are matched, and less to the speed with - which regular expression objects are constructed. Otherwise it has no - detectable effect on the program output.  This currently has no effect for - boost.regex.

-
collateYes -

Specifies that character ranges of the form "[a-b]" should be locale - sensitive.  This bit is on by default for - POSIX-Basic regular expressions, but can be unset to force ranges to be - compared by code point only.

-
newline_altNoSpecifies that the \n character has the same effect as the alternation - operator |.  Allows newline separated lists to be used as a list of - alternatives.  This bit is already set, if you use the grep option.
no_char_classesNoWhen set then character classes such as [[:alnum:]] are not allowed.
no_escape_in_listsNoWhen set this makes the escape character ordinary inside lists, so that [\b] - would match either '\' or 'b'. This bit is one by default for - POSIX-basic regular expressions, but can be unset to force escapes to be - recognised inside lists.
no_intervalsNoWhen set then bounded repeats such as a{2,3} are not permitted.
bk_plus_qmNoWhen set then \? acts as a zero-or-one repeat operator, and \+ acts as a - one-or-more repeat operator.
bk_vbarNoWhen set then \| acts as the alternation operator.
no_except - NoPrevents basic_regex from throwing an exception when an invalid expression is - encountered.
-

-

Options for Literal Strings:

-

The following must always be set to interpret the expression as a string - literal:

-

- - - - - - - - - - - -
ElementStandardizedEffect when set
literalYesTreat the string as a literal (no special characters).
-

-

The following options may also be combined with the literal flag:

-

- - - - - - - - - - - - - - - - -
ElementStandardizedEffect when set
icaseYes -

Specifies that matching of regular expressions against a character container - sequence shall be performed without regard to case.

-
optimizeYes -

Specifies that the regular expression engine should pay more attention to the - speed with which regular expressions are matched, and less to the speed with - which regular expression objects are constructed. Otherwise it has no - detectable effect on the program output.  This currently has no effect for - boost.regex.

-
-

-

 

-
-

Revised  - - 23 June 2004  -

-

© Copyright John Maddock 1998- - 2004

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/syntax_perl.html b/doc/syntax_perl.html deleted file mode 100644 index 3eda0385..00000000 --- a/doc/syntax_perl.html +++ /dev/null @@ -1,626 +0,0 @@ - - - - Boost.Regex: Perl Regular Expression Syntax - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

- Perl Regular Expression Syntax

-
-

Boost.Regex Index

-
-

-
-

Contents

-
-
Synopsis
Perl Syntax
- What Gets Matched
Variations -
-
Options
Modifiers
References
-
-

Synopsis

-

The Perl regular expression syntax is based on that used by the programming - language Perl .  Perl regular expressions are the default - behavior in Boost.Regex or you can pass the flag perl to the - regex constructor, for example:

-
// e1 is a case sensitive Perl regular expression: 
-// since Perl is the default option there's no need to explicitly specify the syntax used here:
-boost::regex e1(my_expression);
-// e2 a case insensitive Perl regular expression:
-boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);
-

Perl Regular Expression Syntax

-

In Perl regular expressions, all characters match themselves except for - the following special characters:

-
.[{()\*+?|^$
-

Wildcard:

-

The single character '.' when used outside of a character set will match any - single character except:

-

The NULL character when the flag match_no_dot_null is passed to the - matching algorithms.

-

The newline character when the flag match_not_dot_newline is passed to - the matching algorithms.

-

Anchors:

-

A '^' character shall match the start of a line.

-

A '$' character shall match the end of a line.

-

Marked sub-expressions:

-

A section beginning ( and ending ) acts as a marked sub-expression.  - Whatever matched the sub-expression is split out in a separate field by the - matching algorithms.  Marked sub-expressions can also repeated, or - referred to by a back-reference.

-

Non-marking grouping:

-

A marked sub-expression is useful to lexically group part of a regular - expression, but has the side-effect of spitting out an extra field in the - result.  As an alternative you can lexically group part of a regular - expression, without generating a marked sub-expression by using (?: and ) , for - example (?:ab)+ will repeat "ab" without splitting out any separate - sub-expressions.

-

Repeats:

-

Any atom (a single character, a marked sub-expression, or a character class) - can be repeated with the *, +, ?, and {} operators.

-

The * operator will match the preceding atom zero or more times, for example - the expression a*b will match any of the following:

-
b
-ab
-aaaaaaaab
-

The + operator will match the preceding atom one or more times, for example the - expression a+b will match any of the following:

-
ab
-aaaaaaaab
-

But will not match:

-
b
-

The ? operator will match the preceding atom zero or one times, for - example the expression ca?b will match any of the following:

-
cb
-cab
-

But will not match:

-
caab
-

An atom can also be repeated with a bounded repeat:

-

a{n}  Matches 'a' repeated exactly n times.

-

a{n,}  Matches 'a' repeated n or more times.

-

a{n, m}  Matches 'a' repeated between n and m times - inclusive.

-

For example:

-
^a{2,3}$
-

Will match either of:

-
aa
-aaa
-

But neither of:

-
a
-aaaa
-

It is an error to use a repeat operator, if the preceding construct can not be - repeated, for example:

-
a(*)
-

Will raise an error, as there is nothing for the * operator to be applied to.

-

Non greedy repeats

-

The normal repeat operators are "greedy", that is to say they will consume as - much input as possible.  There are non-greedy versions available that will - consume as little input as possible while still producing a match.

-

*? Matches the previous atom zero or more times, while consuming as little - input as possible.

-

+? Matches the previous atom one or more times, while consuming as little input - as possible.

-

?? Matches the previous atom zero or one times, while consuming as little input - as possible.

-

{n,}? Matches the previous atom n or more times, while consuming - as little input as possible.

-

{n,m}? Matches the previous atom between n and m times, - while consuming as little input as possible.

-

Back references:

-

An escape character followed by a digit n, where n is in the - range 1-9, matches the same string that was matched by sub-expression n.  - For example the expression:

-
^(a*).*\1$
-

Will match the string:

-
aaabbaaa
-

But not the string:

-
aaabba
-

Alternation

-

The | operator will match either of its arguments, so for example: abc|def will - match either "abc" or "def".  -

-

Parenthesis can be used to group alternations, for example: ab(d|ef) will match - either of "abd" or "abef".

-

Empty alternatives are not allowed (these are almost always a mistake), - but if you really want an empty alternative use (?:) as a placeholder, for - example:

-
-

"|abc" is not a valid expression, but
- "(?:)|abc" is and is equivalent, also the expression:
- "(?:abc)??" has exactly the same effect.

-
-

Character sets:

-

A character set is a bracket-expression starting with [ and ending with ], it - defines a set of characters, and matches any single character that is a member - of that set.

-

A bracket expression may contain any combination of the following:

-
-
Single characters:
-

For example [abc], will match any of the characters 'a', 'b', or 'c'.

-
Character ranges:
-

For example [a-c] will match any single character in the range 'a' to - 'c'.  By default, for POSIX-Perl regular expressions, a character x - is within the range y to z, if it collates within that - range; this results in locale specific behavior.  This behavior can - be turned off by unsetting the collate - option flag - in which case whether a character appears within a range is - determined by comparing the code points of the characters only

-
Negation:
-

If the bracket-expression begins with the ^ character, then it matches the - complement of the characters it contains, for example [^a-c] matches any - character that is not in the range a-c.

-
Character classes:
-

An expression of the form [[:name:]] matches the named character class "name", - for example [[:lower:]] matches any lower case character.  See - character class names.

-
Collating Elements:
-

An expression of the form [[.col.] matches the collating element col.  - A collating element is any single character, or any sequence of characters that - collates as a single unit.  Collating elements may also be used as the end - point of a range, for example: [[.ae.]-c] matches the character sequence "ae", - plus any single character in the range "ae"-c, assuming that "ae" is treated as - a single collating element in the current locale.

-

As an extension, a collating element may also be specified via it's - symbolic name, for example:

-

[[.NUL.]]

-

matches a NUL character.

-
Equivalence classes:
-

- An expression oftheform[[=col=]], matches any character or collating element - whose primary sort key is the same as that for collating element col, - as with colating elements the name col may be a - symbolic name.  A primary sort key is one that ignores case, - accentation, or locale-specific tailorings; so for example [[=a=]] matches any - of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  - Unfortunately implementation of this is reliant on the platform's collation and - localisation support; this feature can not be relied upon to work portably - across all platforms, or even all locales on one platform.

-
Escapes:
-

All the escape sequences that match a single character, or a single character - class are permitted within a character class definition, except the - negated character classes (\D \W etc).

-
-
Combinations:
-

All of the above can be combined in one character set declaration, for example: - [[:digit:]a-c[.NUL.]].

-

Escapes

-

Any special character preceded by an escape shall match itself. -

-

The following escape sequences are also supported:

-
-
Escapes matching a specific character
-

The following escape sequences are all synonyms for single characters:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
EscapeCharacter
\a'\a'
\e0x1B
\f\f
\n\n
\r\r
\t\t
\v\v
\b\b (but only inside a character class declaration).
\cXAn ASCII escape sequence - the character whose code point is X % 32
\xddA hexadecimal escape sequence - matches the single character whose code point - is 0xdd.
\x{dddd}A hexadecimal escape sequence - matches the single character whose code point - is 0xdddd.
\0dddAn octal escape sequence - matches the single character whose code point is - 0ddd.
\N{name}Matches the single character which has the symbolic - name name.  For example \N{newline} matches the single - character \n.
-

-
"Single character" character classes:
-

Any escaped character x, if x is the name of a character - class shall match any character that is a member of that class, and any escaped - character X, if x is the name of a character class, shall - match any character not in that class.

-

The following are supported by default:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Escape sequenceEquivalent to
\d[[:digit:]]
\l[[:lower:]]
\s[[:space:]]
\u[[:upper:]]
\w[[:word:]]
\D[^[:digit:]]
\L[^[:lower:]]
\S[^[:space:]]
\U[^[:upper:]]
\W[^[:word:]]
-

-
Character Properties
-

The character property names in the following table are all equivalent to the - names used in character classes.

-

- - - - - - - - - - - - - - - - - - - - - - - - - - -
FormDescriptionEquivalent character set form
\pXMatches any character that has the property X.[[:X:]]
\p{Name}Matches any character that has the property Name.[[:Name:]]
\PXMatches any character that does not have the property X.[^[:X:]]
\P{Name}Matches any character that does not have the property Name.[^[:Name:]]
-

-
Word Boundaries
-

The following escape sequences match the boundaries of words:

-

- - - - - - - - - - - - - - - - - -
\<Matches the start of a word.
\>Matches the end of a word.
\bMatches a word boundary (the start or end of a word).
\BMatches only when not at a word boundary.
-

-
Buffer boundaries
-

The following match only at buffer boundaries: a "buffer" in this context is - the whole of the input text that is being matched against (note that ^ and - $ may match embedded newlines within the text).

-

- - - - - - - - - - - - - - - - - - - - - -
\`Matches at the start of a buffer only.
\'Matches at the end of a buffer only.
\AMatches at the start of a buffer only (the same as \`).
\zMatches at the end of a buffer only (the same as \').
\ZMatches an optional sequence of newlines at the end of a buffer: equivalent to - the regular expression \n*\z
-

-
Continuation Escape
-

The sequence \G matches only at the end of the last match found, or at the - start of the text being matched if no previous match was found.  This - escape useful if you're iterating over the matches contained within a text, and - you want each subsequence match to start where the last one ended.

-
Quoting escape
-

The escape sequence \Q begins a "quoted sequence": all the subsequent - characters are treated as literals, until either the end of the regular - expression or \E is found.  For example the expression: \Q\*+\Ea+ would - match either of:

-
\*+a
\*+aaa
-
Unicode escapes
-

- - - - - - - - - -
\CMatches a single code point: in Boost regex this has exactly the same effect - as a "." operator.
\XMatches a combining character sequence: that is any non-combining character - followed by a sequence of zero or more combining characters.
-

-
Any other escape
-

Any other escape sequence matches the character that is escaped, for example \@ - matches a literal '@'.

-
-

Perl Extended Patterns

-

Perl-specific extensions to the regular expression syntax all start - with (?.

-
-
Comments
-

(?# ... ) is treated as a comment, it's contents are ignored.

-
Modifiers
-

(?imsx-imsx ... ) alters which of the perl modifiers are in effect - within the pattern, changes take effect from the point that the block is first - seen and extend to any enclosing ).  Letters before a '-' turn that perl - modifier on, letters afterward, turn it off.

-

(?imsx-imsx:pattern) applies the specified modifiers to pattern - only.

-
Non-marking grouping
-

(?:pattern) lexically groups pattern, without generating an - additional sub-expression.

-
Lookahead
-

(?=pattern) consumes zero characters, only if pattern matches.

-

(?!pattern) consumes zero characters, only if pattern does - not match.

-

Lookahead is typically used to create the logical AND of two regular - expressions, for example if a password must contain a lower case letter, an - upper case letter, a punctuation symbol, and be at least 6 characters long, - then the expression:

-
(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}
-

could be used to validate the password.

-
Lookbehind
-

(?<=pattern) consumes zero characters, only if pattern could - be matched against the characters preceding the current position (pattern - must be of fixed length).

-

(?<!pattern) consumes zero characters, only if pattern could - not be matched against the characters preceding the current position (pattern - must be of fixed length).

-
Independent sub-expressions
-

(?>pattern) pattern is matched independently of the - surrounding patterns, the expression will never backtrack into pattern.  - Independent sub-expressions are typically used to improve performance; only the - best possible match for pattern will be considered, if this doesn't - allow the expression as a whole to match then no match is found at all.

-
Conditional Expressions
-

(?(condition)yes-pattern|no-pattern) attempts to match yes-pattern - if the condition is true, otherwise attempts to match no-pattern.

-

(?(condition)yes-pattern) attempts to match yes-pattern if - the condition is true, otherwise fails.

-

Condition may be either a forward lookahead assert, or the - index of a marked sub-expression (the condition becomes true if the - sub-expression has been matched).

-
-

Operator precedence

-

 The order of precedence for of operators is as shown in the following - table:

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Collation-related bracket symbols[==] [::] [..]
Escaped characters - \
Character set (bracket expression) - []
Grouping()
Single-character-ERE duplication - * + ? {m,n}
Concatenation
Anchoring^$
Alternation|
-

-
-

What gets matched

-

If you view the regular expression as a directed (possibly cyclic) graph, then - the best match found is the first match found by a depth-first-search performed - on that graph, while matching the input text.

-

Alternatively:

-

the best match found is the leftmost match, with individual elements matched as - follows;

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ConstructWhat gets matches
AtomA AtomBLocates the best match for AtomA that has a following match for AtomB.
Expression1 | Expression2If Expresion1 can be matched then returns that match, otherwise attempts to - match Expression2.
S{N}Matches S repeated exactly N times.
S{N,M}Matches S repeated between N and M times, and as many times as possible.
S{N,M}?Matches S repeated between N and M times, and as few times as possible.
S?, S*, S+ The same as S{0,1}, S{0,UINT_MAX}, - S{1,UINT_MAX} respectively. -
S??, S*?, S+?The same as S{0,1}?, S{0,UINT_MAX}?, S{1,UINT_MAX}? - respectively. -
(?>S) - Matches the best match for S, and only that.
- (?=S), (?<=S) - Matches only the best match for S (this is only visible if there are capturing - parenthesis within S).
(?!S), (?<!S)Considers only whether a match for S exists or not.
(?(condition)yes-pattern | no-pattern)If condition is true, then only yes-pattern is considered, - otherwise only no-pattern is considered.
-

-

Variations

-

The options normal, ECMAScript, JavaScript - and JScript are all synonyms for Perl.

-

Options

-

There are a variety of flags that - may be combined with the Perl option when constructing the regular - expression, in particular note that the newline_alt - option alters the syntax, while the collate, - nosubs and icase options modify how the case and locale sensitivity - are to be applied.

-

Modifiers

-

The perl smix modifiers can either be applied using a (?smix-smix) - prefix to the regular expression, or with one of the regex-compile time flags - no_mod_m, mod_x, mod_s, and no_mod_s. -

-

References

-

Perl 5.8.

-
-

-

Revised  - - 21 Aug 2004  -

-

© Copyright John Maddock 2004

- -

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt).

-
- - diff --git a/doc/thread_safety.html b/doc/thread_safety.html deleted file mode 100644 index 894a7688..00000000 --- a/doc/thread_safety.html +++ /dev/null @@ -1,70 +0,0 @@ - - - - Boost.Regex: Thread Safety - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Thread Safety

-
-

Boost.Regex Index

-
-

-
-

The regex library is thread safe when Boost is: you can verify that Boost is in - thread safe mode by checking to see if BOOST_HAS_THREADS is defined: this macro - is set automatically by the config system when threading support is turned on - in your compiler. -

-

Class basic_regex<> and its typedefs regex - and wregex are thread safe, in that compiled regular expressions can safely be - shared between threads. The matching algorithms regex_match, - regex_search, regex_grep, - regex_format and regex_merge - are all re-entrant and thread safe. Class match_results - is now thread safe, in that the results of a match can be safely copied from - one thread to another (for example one thread may find matches and push - match_results instances onto a queue, while another thread pops them off the - other end), otherwise use a separate instance of match_results - per thread. -

-

The POSIX API functions are all re-entrant and - thread safe, regular expressions compiled with regcomp can also be - shared between threads. -

-

The class RegEx is only thread safe if each thread - gets its own RegEx instance (apartment threading) - this is a consequence of - RegEx handling both compiling and matching regular expressions. -

-

Finally note that changing the global locale invalidates all compiled regular - expressions, therefore calling set_locale from one thread while another - uses regular expressions will produce unpredictable results. -

-

- There is also a requirement that there is only one thread executing prior to - the start of main().

-
-

Revised - - 24 Oct 2003 -

-

© Copyright John Maddock 1998- - - 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - diff --git a/doc/uarrow.gif b/doc/uarrow.gif deleted file mode 100644 index 6afd20c3857127c21fc9bcd52ec347e32c21578c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1666 zcmZ?wbhEHb)Me0S_|CwP5S3S3-SznXe?>*j_wTyvm7!+LipuF=EDz;>HaU#`S8h zGbFWQMU_0|jnYg*R~!4zoU;5Lr*b+kNLjFqO}~hCs-EvWIiqyb(4{u9n_SYiN0ser zo_b{M{x_V;vAo&^{Mxyq#wF4=oeHiqM6_c}V>j8vuGVp>F%O-V(SCB__7}X`>4L_U zVm6)9HjN6d6V!a?>4h#eja{wfQZZ%y0 zcBxI;Cfl@)`abn8dE3M44hNU*NNPV`GVOBRyc?a%?}et+Ma?OV1z ztZr|7`^k*Hb7j4o3a6defBuPS+ET~7&h+g3q~w&@bLM{g z{v)%b=KJ^W@87?F{`~p9d-twgyLRr}xnswU?cKX~>(;Gn*REZ+| zz|hB_12PztCmcBbGstqvcx+g3u$ey;0G87DHmwvOZbsa=mLSozov4QVirN zkYH*$(Ipxfv7$$*g>5oV5eFyJlXU-iJ*75&36GSIaY!gPakWlRX$xFzo8-`7pp?lY znV<6SW%6gfo)61`o?wS!MoswK)ayS4bmo_)pa{hw_vEcTxM zPDanKulTRm%EV*qzc;dPkof=bLaW;>i<48e&;Q%IfLkeak%MEWvmvi+)u$)tnah)3 zzYobUlt|ZDsp2B;lOu4zr|;?Xi81Aljx7>18v}w8=dCnx5x37caI~QDN1{wB=fsDu zys-rjSlMMb6pskc%4B2mG~_!tncsDW+KG_rFW=`ov1BY|m#=3r>d5VUbL<2|ufd}y zN(W?>L__VgzDUVA*Qs(Sw#pQ7C?(vR=Ao3Rl+=-VrMi-oT@Uca~Kt?WUK2AMe@GvcE{Cd?3LKD+Sc5{|74 z97Oz04zdV)C~!SoA`+l@KtO%#7iQtykpJx0QyQw=S@@C{Bm{XmvjpCgs&PoWyynza z@uT9&9F9kLS>?oz@Jp^ZI4$|G+;eeJr9BJYY?{Avi<7`&Kf{hFC9km6PN_VMY=XP) zNx5#;y81nfPvKKUYHVPNAq&@R!H??_wKsmy<&~@n@K7`k-1@Aaqm`jm%<+RYi=e9D zj7f$i_#F_GNyn$i)?1(nYpwDZ(GJJ{0WJvobi zo9%_7!m6+A3JI)9KcCK6Z~8WwZ_~3MkMqrz&zGy7qPKj`1lHGq4R j5{XCdyxsBG&w8Q7<)tgD4zTO*shVAswMUtek--`O>qI)6 diff --git a/doc/unicode.html b/doc/unicode.html deleted file mode 100644 index 9e22faf3..00000000 --- a/doc/unicode.html +++ /dev/null @@ -1,66 +0,0 @@ - - - - Boost.Regex: Index - - - - -

- - - - - - -
-

C++ Boost

-
-

Boost.Regex

-

Unicode Regular Expressions.

-
-

Boost.Regex Index

-
-

-
-

-

There are two ways to use Boost.Regex with Unicode strings:

-

Rely on wchar_t

-

If your platform's wchar_t type can hold Unicode strings, and your - platform's C/C++ runtime correctly handles wide character constants (when - passed to std::iswspace std::iswlower etc), then you can use boost::wregex to - process Unicode.  However, there are several disadvantages to this - approach:

-
    -
  • - It's not portable: there's no guarantee on the width of wchar_t, or even - whether the runtime treats wide characters as Unicode at all, most Windows - compilers do so, but many Unix systems do not.
  • -
  • - There's no support for Unicode-specific character classes: [[:Nd:]], [[:Po:]] - etc.
  • -
  • - You can only search strings that are encoded as sequences of wide characters, - it is not possible to search UTF-8, or even UTF-16 on many platforms.
-

Use a Unicode Aware Regular Expression Type.

-

If you have the ICU - library, then Boost.Regex can be configured - to make use of it, and provide a distinct regular expression type - (boost::u32regex), that supports both Unicode specific character properties, - and the searching of text that is encoded in either UTF-8, UTF-16, or - UTF-32.  See: ICU string class support.

-

-


-

-

-

Revised  - - 04 Jan 2005  -

-

© Copyright John Maddock 2005

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - - diff --git a/doc/vc71-performance.html b/doc/vc71-performance.html deleted file mode 100644 index c847845a..00000000 --- a/doc/vc71-performance.html +++ /dev/null @@ -1,703 +0,0 @@ - - - Regular Expression Performance Comparison - - - - - - -

Regular Expression Performance Comparison

-

- The following tables provide comparisons between the following regular - expression libraries:

-

GRETA.

-

The Boost regex library.

-

Henry Spencer's regular expression library - - this is provided for comparison as a typical non-backtracking implementation.

-

Philip Hazel's PCRE library.

-

Details

-

Machine: Intel Pentium 4 2.8GHz PC.

-

Compiler: Microsoft Visual C++ version 7.1.

-

C++ Standard Library: Dinkumware standard library version 313.

-

OS: Win32.

-

Boost version: 1.31.0.

-

PCRE version: 3.9.

-

- As ever care should be taken in interpreting the results, only sensible regular - expressions (rather than pathological cases) are given, most are taken from the - Boost regex examples, or from the Library of - Regular Expressions. In addition, some variation in the relative - performance of these libraries can be expected on other machines - as memory - access and processor caching effects can be quite large for most finite state - machine algorithms.

-

Averages

-

The following are the average relative scores for all the tests: the perfect - regular expression library would score 1, in practice any small number - (say less that 4 or 5) is pretty good.

-

- - - - - - - - - - - - - - - - -
GRETAGRETA
- (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
2.316196.142032.306681.94363124.7522.09365
-

-

Comparison 1: Long Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within a long English language text was measured - (mtent12.txt - from Project Gutenberg, 19Mb). 

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionGRETAGRETA
- (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
Twain1
- (0.0407s)
1
- (0.0407s)
4.18
- (0.17s)
4.18
- (0.17s)
135
- (5.48s)
1.37
- (0.0557s)
Huck[[:alpha:]]+1.02
- (0.0381s)
1
- (0.0375s)
4.53
- (0.17s)
4.54
- (0.17s)
166
- (6.23s)
1.34
- (0.0501s)
[[:alpha:]]+ing4.3
- (4.18s)
9.93
- (9.65s)
1.15
- (1.12s)
1
- (0.972s)
8.15
- (7.92s)
5.85
- (5.69s)
^[^ ]*?Twain6.25
- (1.84s)
20.9
- (6.16s)
1.56
- (0.461s)
1
- (0.295s)
NA2.58
- (0.761s)
Tom|Sawyer|Huckleberry|Finn6.53
- (0.711s)
11.5
- (1.25s)
2.3
- (0.251s)
1
- (0.109s)
196
- (21.4s)
1.77
- (0.193s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)3.88
- (0.972s)
6.48
- (1.62s)
1.66
- (0.416s)
1
- (0.251s)
NA2.48
- (0.62s)
-

-

Comparison 2: Medium Sized Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within a medium sized English language text was - measured (the first 50K from mtent12.txt). 

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionGRETAGRETA
- (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
Twain1
- (9.05e-005s)
1.03
- (9.29e-005s)
4.92
- (0.000445s)
4.92
- (0.000445s)
43.2
- (0.00391s)
3.18
- (0.000288s)
Huck[[:alpha:]]+1
- (8.56e-005s)
1
- (8.56e-005s)
4.97
- (0.000425s)
4.98
- (0.000426s)
2.8
- (0.000239s)
2.2
- (0.000188s)
[[:alpha:]]+ing5.29
- (0.011s)
11.8
- (0.0244s)
1.19
- (0.00246s)
1
- (0.00207s)
8.77
- (0.0182s)
6.88
- (0.0142s)
^[^ ]*?Twain5.98
- (0.00462s)
20.2
- (0.0156s)
1.54
- (0.00119s)
1
- (0.000772s)
NA2.53
- (0.00195s)
Tom|Sawyer|Huckleberry|Finn3.42
- (0.00207s)
6.31
- (0.00383s)
1.71
- (0.00104s)
1
- (0.000606s)
81.5
- (0.0494s)
1.96
- (0.00119s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)1.97
- (0.00266s)
3.77
- (0.00509s)
1.38
- (0.00186s)
1
- (0.00135s)
297
- (0.401s)
1.77
- (0.00238s)
-

-

Comparison 3: C++ Code Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within the C++ source file - boost/crc.hpp was measured. 

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionGRETAGRETA
- (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([ - ]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{)6.67
- (0.00147s)
36.9
- (0.00813s)
1.03
- (0.000227s)
1
- (0.00022s)
557
- (0.123s)
2.57
- (0.000566s)
(^[ - ]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\>1
- (0.00555s)
3.32
- (0.0185s)
2.53
- (0.0141s)
1.94
- (0.0108s)
NA3.38
- (0.0188s)
^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>)4.77
- (0.00156s)
24.8
- (0.00814s)
1.13
- (0.000372s)
1
- (0.000328s)
120
- (0.0394s)
1.58
- (0.000518s)
^[ ]*#[ ]*include[ ]+("boost/[^"]+"|<boost/[^>]+>)4.72
- (0.00154s)
24.8
- (0.00813s)
1.12
- (0.000367s)
1
- (0.000328s)
143
- (0.0469s)
1.58
- (0.000518s)
-

-

-

Comparison 4: HTML Document Search

- -

For each of the following regular expressions the time taken to find all - occurrences of the expression within the html file libs/libraries.htm - was measured. 

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionGRETAGRETA
- (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
beman|john|dave4.07
- (0.00111s)
7.14
- (0.00195s)
1.75
- (0.000479s)
1
- (0.000273s)
54.3
- (0.0149s)
1.83
- (0.000499s)
<p>.*?</p>1
- (6.59e-005s)
1.04
- (6.84e-005s)
4.15
- (0.000273s)
4.23
- (0.000279s)
NA4.23
- (0.000279s)
<a[^>]+href=("[^"]*"|[^[:space:]]+)[^>]*>1.39
- (0.000626s)
1.83
- (0.000821s)
1.41
- (0.000636s)
1
- (0.00045s)
351
- (0.158s)
1.13
- (0.000509s)
<h[12345678][^>]*>.*?</h[12345678]>1
- (0.000142s)
1.21
- (0.000171s)
2.62
- (0.000372s)
1.48
- (0.00021s)
NA1.73
- (0.000245s)
<img[^>]+src=("[^"]*"|[^[:space:]]+)[^>]*>1
- (5.38e-005s)
1.05
- (5.63e-005s)
5
- (0.000269s)
5.18
- (0.000278s)
604
- (0.0325s)
4.05
- (0.000218s)
<font[^>]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*?</font>1
- (6.05e-005s)
1.09
- (6.59e-005s)
4.45
- (0.000269s)
4.69
- (0.000284s)
NA3.64
- (0.00022s)
-

-

Comparison 3: Simple Matches

-

- For each of the following regular expressions the time taken to match against - the text indicated was measured. 

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExpressionTextGRETAGRETA
- (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
abcabc1.32
- (2.24e-007s)
1.86
- (3.15e-007s)
1.25
- (2.12e-007s)
1.24
- (2.1e-007s)
2.98
- (5.05e-007s)
1
- (1.7e-007s)
^([0-9]+)(\-| |$)(.*)$100- this is a line of ftp response which contains a message string1.32
- (5.91e-007s)
1.96
- (8.78e-007s)
2.68
- (1.2e-006s)
1.53
- (6.88e-007s)
332
- (0.000149s)
1
- (4.49e-007s)
([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}1234-5678-1234-4561.44
- (7.16e-007s)
2.04
- (1.01e-006s)
3.35
- (1.66e-006s)
2.15
- (1.07e-006s)
31.4
- (1.56e-005s)
1
- (4.96e-007s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$john@johnmaddock.co.uk1
- (1.18e-006s)
1.42
- (1.68e-006s)
2.06
- (2.44e-006s)
1.35
- (1.6e-006s)
165
- (0.000196s)
1.06
- (1.26e-006s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$foo12@foo.edu1
- (1.09e-006s)
1.44
- (1.57e-006s)
2.21
- (2.4e-006s)
1.41
- (1.53e-006s)
108
- (0.000117s)
1.04
- (1.13e-006s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$bob.smith@foo.tv1
- (1.07e-006s)
1.43
- (1.53e-006s)
2.21
- (2.37e-006s)
1.45
- (1.55e-006s)
123
- (0.000132s)
1.05
- (1.13e-006s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$EH10 2QQ1
- (3.19e-007s)
1.67
- (5.34e-007s)
1.58
- (5.05e-007s)
1.4
- (4.49e-007s)
10.4
- (3.32e-006s)
1.15
- (3.68e-007s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$G1 1AA1
- (3.29e-007s)
1.65
- (5.44e-007s)
1.51
- (4.96e-007s)
1.36
- (4.49e-007s)
8.46
- (2.79e-006s)
1.1
- (3.63e-007s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$SW1 1ZZ1
- (3.25e-007s)
1.64
- (5.34e-007s)
1.56
- (5.05e-007s)
1.38
- (4.49e-007s)
9.29
- (3.02e-006s)
1.13
- (3.68e-007s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$4/1/20011
- (3.44e-007s)
1.55
- (5.34e-007s)
2.36
- (8.12e-007s)
2.2
- (7.55e-007s)
19.6
- (6.72e-006s)
1.81
- (6.21e-007s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$12/12/20011.05
- (6.59e-007s)
1.66
- (1.05e-006s)
1.44
- (9.07e-007s)
1.23
- (7.73e-007s)
11.6
- (7.34e-006s)
1
- (6.3e-007s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$1231
- (5.72e-007s)
1.59
- (9.07e-007s)
1.6
- (9.16e-007s)
1.49
- (8.5e-007s)
6.14
- (3.51e-006s)
1.22
- (6.97e-007s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$+3.141591
- (6.78e-007s)
1.52
- (1.03e-006s)
1.47
- (9.94e-007s)
1.31
- (8.88e-007s)
10.8
- (7.34e-006s)
1.08
- (7.35e-007s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$-3.141591
- (6.78e-007s)
1.52
- (1.03e-006s)
1.46
- (9.92e-007s)
1.32
- (8.98e-007s)
10.5
- (7.11e-006s)
1.11
- (7.54e-007s)
-

-
-

© Copyright John Maddock 2003

-

Use, modification and distribution are subject to the Boost Software License, - Version 1.0. (See accompanying file LICENSE_1_0.txt - or copy at http://www.boost.org/LICENSE_1_0.txt)

- - -