From c8f2ed3bdb3970d9269ca8ae375c4ba31488bf94 Mon Sep 17 00:00:00 2001 From: John Maddock Date: Wed, 23 Apr 2003 10:49:57 +0000 Subject: [PATCH] Added new docs, Moved source more into line with the std lib proposal. [SVN r18292] --- appendix.htm | 1304 --------- doc/Attic/bad_expression.html | 77 + doc/Attic/basic_regex.html | 944 +++++++ doc/Attic/contacts.html | 86 + doc/Attic/examples.html | 107 + doc/Attic/faq.html | 118 + doc/Attic/format_syntax.html | 217 ++ doc/Attic/headers.html | 51 + doc/Attic/history.html | 44 + doc/Attic/implementation.html | 44 + doc/Attic/install.html | 236 ++ doc/Attic/introduction.html | 174 ++ doc/Attic/localisation.html | 1126 ++++++++ doc/Attic/match_flag_type.html | 266 ++ doc/Attic/match_results.html | 390 +++ doc/Attic/partial_matches.html | 184 ++ doc/Attic/posix_api.html | 288 ++ doc/Attic/redistributables.html | 83 + doc/Attic/reg_expression.html | 45 + doc/Attic/regbase.html | 55 + doc/Attic/regex.html | 492 ++++ doc/Attic/regex_format.html | 165 ++ doc/Attic/regex_grep.html | 379 +++ doc/Attic/regex_match.html | 325 +++ doc/Attic/regex_merge.html | 46 + doc/Attic/regex_replace.html | 208 ++ doc/Attic/regex_search.html | 332 +++ doc/Attic/regex_split.html | 143 + doc/Attic/regex_traits.html | 47 + doc/Attic/sub_match.html | 427 +++ doc/Attic/syntax.html | 783 ++++++ doc/Attic/syntax_option_type.html | 334 +++ doc/Attic/thread_safety.html | 66 + doc/Attic/uarrow.gif | Bin 0 -> 1666 bytes doc/bad_expression.html | 77 + doc/basic_regex.html | 944 +++++++ doc/contacts.html | 86 + doc/examples.html | 107 + doc/faq.html | 118 + doc/format_syntax.html | 217 ++ doc/headers.html | 51 + doc/history.html | 44 + doc/implementation.html | 44 + doc/index.html | 119 + doc/install.html | 236 ++ doc/introduction.html | 174 ++ doc/localisation.html | 1126 ++++++++ doc/match_flag_type.html | 266 ++ doc/match_results.html | 390 +++ doc/partial_matches.html | 184 ++ doc/posix_api.html | 288 ++ doc/redistributables.html | 83 + doc/reg_expression.html | 45 + doc/regbase.html | 55 + doc/regex.html | 492 ++++ doc/regex_format.html | 165 ++ doc/regex_grep.html | 379 +++ doc/regex_match.html | 325 +++ doc/regex_merge.html | 46 + doc/regex_replace.html | 208 ++ doc/regex_search.html | 332 +++ doc/regex_split.html | 143 + doc/regex_traits.html | 47 + doc/sub_match.html | 427 +++ doc/syntax.html | 783 ++++++ doc/syntax_option_type.html | 334 +++ doc/thread_safety.html | 66 + doc/uarrow.gif | Bin 0 -> 1666 bytes example/Jamfile | 1 + example/jgrep/main.cpp | 6 +- example/snippets/credit_card_example.cpp | 4 +- example/snippets/regex_replace_example.cpp | 137 + example/snippets/regex_split_example_2.cpp | 2 +- faq.htm | 205 -- format_string.htm | 243 -- hl_ref.htm | 572 ---- include/boost/regex/v4/basic_regex.hpp | 64 +- include/boost/regex/v4/match_flags.hpp | 51 +- include/boost/regex/v4/perl_matcher.hpp | 4 +- .../boost/regex/v4/perl_matcher_common.hpp | 15 +- .../regex/v4/perl_matcher_non_recursive.hpp | 7 +- .../boost/regex/v4/perl_matcher_recursive.hpp | 19 +- include/boost/regex/v4/regbase.hpp | 61 +- include/boost/regex/v4/regex.hpp | 3 + include/boost/regex/v4/regex_compile.hpp | 88 +- include/boost/regex/v4/regex_format.hpp | 6 +- include/boost/regex/v4/regex_grep.hpp | 2 +- include/boost/regex/v4/regex_merge.hpp | 21 +- include/boost/regex/v4/regex_replace.hpp | 91 + include/boost/regex/v4/regex_search.hpp | 85 +- include/boost/regex/v4/sub_match.hpp | 51 + index.htm | 150 - introduction.htm | 476 ---- performance/Jamfile | 43 + performance/command_line.cpp | 51 +- performance/input.html | 119 +- performance/main.cpp | 2 +- performance/time_boost.cpp | 4 +- performance/time_localised_boost.cpp | 4 +- posix_ref.htm | 314 --- src/cregex.cpp | 6 +- src/posix_api.cpp | 17 +- src/wide_posix_api.cpp | 17 +- syntax.htm | 742 ----- template_class_ref.htm | 2479 ----------------- test/regress/parse.cpp | 42 +- test/regress/regex_test.cpp | 18 +- test/regress/tests.cpp | 35 +- test/regress/tests.txt | 62 +- test/regress/wregex_test.cpp | 18 +- traits_class_ref.htm | 1016 ------- 111 files changed, 17582 insertions(+), 7758 deletions(-) delete mode 100644 appendix.htm create mode 100644 doc/Attic/bad_expression.html create mode 100644 doc/Attic/basic_regex.html create mode 100644 doc/Attic/contacts.html create mode 100644 doc/Attic/examples.html create mode 100644 doc/Attic/faq.html create mode 100644 doc/Attic/format_syntax.html create mode 100644 doc/Attic/headers.html create mode 100644 doc/Attic/history.html create mode 100644 doc/Attic/implementation.html create mode 100644 doc/Attic/install.html create mode 100644 doc/Attic/introduction.html create mode 100644 doc/Attic/localisation.html create mode 100644 doc/Attic/match_flag_type.html create mode 100644 doc/Attic/match_results.html create mode 100644 doc/Attic/partial_matches.html create mode 100644 doc/Attic/posix_api.html create mode 100644 doc/Attic/redistributables.html create mode 100644 doc/Attic/reg_expression.html create mode 100644 doc/Attic/regbase.html create mode 100644 doc/Attic/regex.html create mode 100644 doc/Attic/regex_format.html create mode 100644 doc/Attic/regex_grep.html create mode 100644 doc/Attic/regex_match.html create mode 100644 doc/Attic/regex_merge.html create mode 100644 doc/Attic/regex_replace.html create mode 100644 doc/Attic/regex_search.html create mode 100644 doc/Attic/regex_split.html create mode 100644 doc/Attic/regex_traits.html create mode 100644 doc/Attic/sub_match.html create mode 100644 doc/Attic/syntax.html create mode 100644 doc/Attic/syntax_option_type.html create mode 100644 doc/Attic/thread_safety.html create mode 100644 doc/Attic/uarrow.gif create mode 100644 doc/bad_expression.html create mode 100644 doc/basic_regex.html create mode 100644 doc/contacts.html create mode 100644 doc/examples.html create mode 100644 doc/faq.html create mode 100644 doc/format_syntax.html create mode 100644 doc/headers.html create mode 100644 doc/history.html create mode 100644 doc/implementation.html create mode 100644 doc/index.html create mode 100644 doc/install.html create mode 100644 doc/introduction.html create mode 100644 doc/localisation.html create mode 100644 doc/match_flag_type.html create mode 100644 doc/match_results.html create mode 100644 doc/partial_matches.html create mode 100644 doc/posix_api.html create mode 100644 doc/redistributables.html create mode 100644 doc/reg_expression.html create mode 100644 doc/regbase.html create mode 100644 doc/regex.html create mode 100644 doc/regex_format.html create mode 100644 doc/regex_grep.html create mode 100644 doc/regex_match.html create mode 100644 doc/regex_merge.html create mode 100644 doc/regex_replace.html create mode 100644 doc/regex_search.html create mode 100644 doc/regex_split.html create mode 100644 doc/regex_traits.html create mode 100644 doc/sub_match.html create mode 100644 doc/syntax.html create mode 100644 doc/syntax_option_type.html create mode 100644 doc/thread_safety.html create mode 100644 doc/uarrow.gif create mode 100644 example/snippets/regex_replace_example.cpp delete mode 100644 faq.htm delete mode 100644 format_string.htm delete mode 100644 hl_ref.htm create mode 100644 include/boost/regex/v4/regex_replace.hpp delete mode 100644 index.htm delete mode 100644 introduction.htm create mode 100644 performance/Jamfile delete mode 100644 posix_ref.htm delete mode 100644 syntax.htm delete mode 100644 template_class_ref.htm delete mode 100644 traits_class_ref.htm diff --git a/appendix.htm b/appendix.htm deleted file mode 100644 index ba0b3bdf..00000000 --- a/appendix.htm +++ /dev/null @@ -1,1304 +0,0 @@ - - - - - - -Regex++, Appendices - - - - -

 

- - - - - - -

C++ Boost

-

Regex++, Appendices.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

Appendix 1: Implementation notes

- -

This is the first port of regex++ to the boost library, and is -based on regex++ 2.x, see changes.txt for a full list of changes -from the previous version. There are no known functionality bugs -except that POSIX style equivalence classes are only guaranteed -correct if the Win32 localization model is used (the default for -Win32 builds of the library).

- -

There are some aspects of the code that C++ puritans will -consider to be poor style, in particular the use of goto in some -of the algorithms. The code could be cleaned up, by changing to a -recursive implementation, although it is likely to be slower in -that case.

- -

The performance of the algorithms should be satisfactory in -most cases. For example the times taken to match the ftp response -expression "^([0-9]+)(\-| |$)(.*)$" against the string -"100- this is a line of ftp response which contains a -message string" are: BSD implementation 450 micro seconds, -GNU implementation 271 micro seconds, regex++ 127 micro seconds (Pentium -P90, Win32 console app under MS Windows 95).

- -

However it should be noted that there are some "pathological" -expressions which may require exponential time for matching; -these all involve nested repetition operators, for example -attempting to match the expression "(a*a)*b" against N -letter a's requires time proportional to 2N. -These expressions can (almost) always be rewritten in such a way -as to avoid the problem, for example "(a*a)*b" could be -rewritten as "a*b" which requires only time linearly -proportional to N to solve. In the general case, non-nested -repeat expressions require time proportional to N2, -however if the clauses are mutually exclusive then they can be -matched in linear time - this is the case with "a*b", -for each character the matcher will either match an "a" -or a "b" or fail, where as with "a*a" the -matcher can't tell which branch to take (the first "a" -or the second) and so has to try both. Be careful how you -write your regular expressions and avoid nested repeats if you -can! New to this version, some previously pathological cases have -been fixed - in particular searching for expressions which -contain leading repeats and/or leading literal strings should be -much faster than before. Literal strings are now searched for -using the Knuth/Morris/Pratt algorithm (this is used in -preference to the Boyer/More algorithm because it allows the -tracking of newline characters).

- -

Some aspects of the POSIX regular expression syntax are -implementation defined:

- - - -
- -

Appendix 2: Thread safety

- -

Class reg_expression<> and its typedefs regex and wregex -are thread safe, in that compiled regular expressions can safely -be shared between threads. The matching algorithms regex_match, -regex_search, regex_grep, regex_format and regex_merge are all re-entrant -and thread safe. Class match_results is now thread safe, in that -the results of a match can be safely copied from one thread to -another (for example one thread may find matches and push -match_results instances onto a queue, while another thread pops -them off the other end), otherwise use a separate instance of -match_results per thread.

- -

The POSIX API functions are all re-entrant and thread safe, -regular expressions compiled with regcomp can also be -shared between threads.

- -

The class RegEx is only thread safe if each thread gets its -own RegEx instance (apartment threading) - this is a consequence -of RegEx handling both compiling and matching regular expressions. -

- -

Finally note that changing the global locale invalidates all -compiled regular expressions, therefore calling set_locale -from one thread while another uses regular expressions will -produce unpredictable results.

- -

There is also a requirement that there is only one thread -executing prior to the start of main().

- -
- -

Appendix 3: Localization

- -

 Regex++ provides extensive support for run-time -localization, the localization model used can be split into two -parts: front-end and back-end.

- -

Front-end localization deals with everything which the user -sees - error messages, and the regular expression syntax itself. -For example a French application could change [[:word:]] to [[:mot:]] -and \w to \m. Modifying the front end locale requires active -support from the developer, by providing the library with a -message catalogue to load, containing the localized strings. -Front-end locale is affected by the LC_MESSAGES category only.

- -

Back-end localization deals with everything that occurs after -the expression has been parsed - in other words everything that -the user does not see or interact with directly. It deals with -case conversion, collation, and character class membership. The -back-end locale does not require any intervention from the -developer - the library will acquire all the information it -requires for the current locale from the underlying operating -system / run time library. This means that if the program user -does not interact with regular expressions directly - for example -if the expressions are embedded in your C++ code - then no -explicit localization is required, as the library will take care -of everything for you. For example embedding the expression [[:word:]]+ -in your code will always match a whole word, if the program is -run on a machine with, for example, a Greek locale, then it will -still match a whole word, but in Greek characters rather than -Latin ones. The back-end locale is affected by the LC_TYPE and -LC_COLLATE categories.

- -

There are three separate localization mechanisms supported by -regex++:

- -

Win32 localization model.

- -

This is the default model when the library is compiled under -Win32, and is encapsulated by the traits class w32_regex_traits. -When this model is in effect there is a single global locale as -defined by the user's control panel settings, and returned by -GetUserDefaultLCID. All the settings used by regex++ are acquired -directly from the operating system bypassing the C run time -library. Front-end localization requires a resource dll, -containing a string table with the user-defined strings. The -traits class exports the function:

- -

static std::string set_message_catalogue(const std::string& -s);

- -

which needs to be called with a string identifying the name of -the resource dll, before your code compiles any regular -expressions (but not necessarily before you construct any reg_expression -instances):

- -

boost::w32_regex_traits<char>::set_message_catalogue("mydll.dll"); -

- -

Note that this API sets the dll name for both the -narrow and wide character specializations of w32_regex_traits.

- -

This model does not currently support thread specific locales -(via SetThreadLocale under Windows NT), the library provides full -Unicode support under NT, under Windows 9x the library degrades -gracefully - characters 0 to 255 are supported, the remainder are -treated as "unknown" graphic characters.

- -

C localization model.

- -

This is the default model when the library is compiled under -an operating system other than Win32, and is encapsulated by the -traits class c_regex_traits, -Win32 users can force this model to take effect by defining the -pre-processor symbol BOOST_REGEX_USE_C_LOCALE. When this model is -in effect there is a single global locale, as set by setlocale. -All settings are acquired from your run time library, -consequently Unicode support is dependent upon your run time -library implementation. Front end localization requires a POSIX -message catalogue. The traits class exports the function:

- -

static std::string set_message_catalogue(const std::string& -s);

- -

which needs to be called with a string identifying the name of -the message catalogue, before your code compiles any -regular expressions (but not necessarily before you construct any -reg_expression instances):

- -

boost::c_regex_traits<char>::set_message_catalogue("mycatalogue"); -

- -

Note that this API sets the dll name for both the -narrow and wide character specializations of c_regex_traits. If -your run time library does not support POSIX message catalogues, -then you can either provide your own implementation of -<nl_types.h> or define BOOST_RE_NO_CAT to disable front-end -localization via message catalogues.

- -

Note that calling setlocale invalidates all compiled -regular expressions, calling setlocale(LC_ALL, "C") -will make this library behave equivalent to most traditional -regular expression libraries including version 1 of this library. -

- -

C++ localization model. -

- -

This model is only in effect if the library is built with the -pre-processor symbol BOOST_REGEX_USE_CPP_LOCALE defined. When -this model is in effect each instance of reg_expression<> -has its own instance of std::locale, class reg_expression<> -also has a member function imbue which allows the locale -for the expression to be set on a per-instance basis. Front end -localization requires a POSIX message catalogue, which will be -loaded via the std::messages facet of the expression's locale, -the traits class exports the symbol:

- -

static std::string set_message_catalogue(const std::string& -s);

- -

which needs to be called with a string identifying the name of -the message catalogue, before your code compiles any -regular expressions (but not necessarily before you construct any -reg_expression instances):

- -

boost::cpp_regex_traits<char>::set_message_catalogue("mycatalogue"); -

- -

Note that calling reg_expression<>::imbue will -invalidate any expression currently compiled in that instance of -reg_expression<>. This model is the one which closest fits -the ethos of the C++ standard library, however it is the model -which will produce the slowest code, and which is the least well -supported by current standard library implementations, for -example I have yet to find an implementation of std::locale which -supports either message catalogues, or locales other than "C" -or "POSIX".

- -

Finally note that if you build the library with a non-default -localization model, then the appropriate pre-processor symbol (BOOST_REGEX_USE_C_LOCALE -or BOOST_REGEX_USE_CPP_LOCALE) must be defined both when you -build the support library, and when you include <boost/regex.hpp> -or <boost/cregex.hpp> in your code. The best way to ensure -this is to add the #define to <boost/regex/detail/regex_options.hpp>. -

- -

Providing a message catalogue:

- -

In order to localize the front end of the library, you need to -provide the library with the appropriate message strings -contained either in a resource dll's string table (Win32 model), -or a POSIX message catalogue (C or C++ models). In the latter -case the messages must appear in message set zero of the -catalogue. The messages and their id's are as follows:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Message id Meaning Default value  
 101 The character used to start - a sub-expression. "("  
 102 The character used to end a - sub-expression declaration. ")"  
 103 The character used to denote - an end of line assertion. "$"  
 104 The character used to denote - the start of line assertion. "^"  
 105 The character used to denote - the "match any character expression". "."  
 106 The match zero or more times - repetition operator. "*"  
 107 The match one or more - repetition operator. "+"  
 108 The match zero or one - repetition operator. "?"  
 109 The character set opening - character. "["  
 110 The character set closing - character. "]"  
 111 The alternation operator. "|"  
 112 The escape character. "\\"  
 113 The hash character (not - currently used). "#"  
 114 The range operator. "-"  
 115 The repetition operator - opening character. "{"  
 116 The repetition operator - closing character. "}"  
 117 The digit characters. "0123456789"  
 118 The character which when - preceded by an escape character represents the word - boundary assertion. "b"  
 119 The character which when - preceded by an escape character represents the non-word - boundary assertion. "B"  
 120 The character which when - preceded by an escape character represents the word-start - boundary assertion. "<"  
 121 The character which when - preceded by an escape character represents the word-end - boundary assertion. ">"  
 122 The character which when - preceded by an escape character represents any word - character. "w"  
 123 The character which when - preceded by an escape character represents a non-word - character. "W"  
 124 The character which when - preceded by an escape character represents a start of - buffer assertion. "`A"  
 125 The character which when - preceded by an escape character represents an end of - buffer assertion. "'z"  
 126 The newline character. "\n"  
 127 The comma separator. ","  
 128 The character which when - preceded by an escape character represents the bell - character. "a"  
 129 The character which when - preceded by an escape character represents the form feed - character. "f"  
 130 The character which when - preceded by an escape character represents the newline - character. "n"  
 131 The character which when - preceded by an escape character represents the carriage - return character. "r"  
 132 The character which when - preceded by an escape character represents the tab - character. "t"  
 133 The character which when - preceded by an escape character represents the vertical - tab character. "v"  
 134 The character which when - preceded by an escape character represents the start of a - hexadecimal character constant. "x"  
 135 The character which when - preceded by an escape character represents the start of - an ASCII escape character. "c"  
 136 The colon character. ":"  
 137 The equals character. "="  
 138 The character which when - preceded by an escape character represents the ASCII - escape character. "e"  
 139 The character which when - preceded by an escape character represents any lower case - character. "l"  
 140 The character which when - preceded by an escape character represents any non-lower - case character. "L"  
 141 The character which when - preceded by an escape character represents any upper case - character. "u"  
 142 The character which when - preceded by an escape character represents any non-upper - case character. "U"  
 143 The character which when - preceded by an escape character represents any space - character. "s"  
 144 The character which when - preceded by an escape character represents any non-space - character. "S"  
 145 The character which when - preceded by an escape character represents any digit - character. "d"  
 146 The character which when - preceded by an escape character represents any non-digit - character. "D"  
 147 The character which when - preceded by an escape character represents the end quote - operator. "E"  
 148 The character which when - preceded by an escape character represents the start - quote operator. "Q"  
 149 The character which when - preceded by an escape character represents a Unicode - combining character sequence. "X"  
 150 The character which when - preceded by an escape character represents any single - character. "C"  
 151 The character which when - preceded by an escape character represents end of buffer - operator. "Z"  
 152 The character which when - preceded by an escape character represents the - continuation assertion. "G"  
 153The character which when preceeded by (? indicates a - zero width negated forward lookahead assert.! 
- -


- -

Custom error messages are loaded as follows:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Message ID Error message ID Default string  
 201 REG_NOMATCH "No match"  
 202 REG_BADPAT "Invalid regular - expression"  
 203 REG_ECOLLATE "Invalid collation - character"  
 204 REG_ECTYPE "Invalid character - class name"  
 205 REG_EESCAPE "Trailing backslash" -  
 206 REG_ESUBREG "Invalid back reference" -  
 207 REG_EBRACK "Unmatched [ or [^" -  
 208 REG_EPAREN "Unmatched ( or \\(" -  
 209 REG_EBRACE "Unmatched \\{"  
 210 REG_BADBR "Invalid content of - \\{\\}"  
 211 REG_ERANGE "Invalid range end" -  
 212 REG_ESPACE "Memory exhausted" -  
 213 REG_BADRPT "Invalid preceding - regular expression"  
 214 REG_EEND "Premature end of - regular expression"  
 215 REG_ESIZE "Regular expression too - big"  
 216 REG_ERPAREN "Unmatched ) or \\)" -  
 217 REG_EMPTY "Empty expression" -  
 218 REG_E_UNKNOWN "Unknown error"  
- -


- -

Custom character class names are loaded as followed:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Message ID Description Equivalent default class - name  
 300 The character class name for - alphanumeric characters. "alnum"  
 301 The character class name for - alphabetic characters. "alpha"  
 302 The character class name for - control characters. "cntrl"  
 303 The character class name for - digit characters. "digit"  
 304 The character class name for - graphics characters. "graph"  
 305 The character class name for - lower case characters. "lower"  
 306 The character class name for - printable characters. "print"  
 307 The character class name for - punctuation characters. "punct"  
 308 The character class name for - space characters. "space"  
 309 The character class name for - upper case characters. "upper"  
 310 The character class name for - hexadecimal characters. "xdigit"  
 311 The character class name for - blank characters. "blank"  
 312 The character class name for - word characters. "word"  
 313 The character class name for - Unicode characters. "unicode"  
- -


- -

Finally, custom collating element names are loaded starting -from message id 400, and terminating when the first load -thereafter fails. Each message looks something like: "tagname -string" where tagname is the name used inside [[.tagname.]] -and string is the actual text of the collating element. -Note that the value of collating element [[.zero.]] is used for -the conversion of strings to numbers - if you replace this with -another value then that will be used for string parsing - for -example use the Unicode character 0x0660 for [[.zero.]] if you -want to use Unicode Arabic-Indic digits in your regular -expressions in place of Latin digits.

- -

Note that the POSIX defined names for character classes and -collating elements are always available - even if custom names -are defined, in contrast, custom error messages, and custom -syntax messages replace the default ones.

- -
- -

Appendix 4: Example Applications

- -

There are three demo applications that ship with this library, -they all come with makefiles for Borland, Microsoft and gcc -compilers, otherwise you will have to create your own makefiles.

- -
regress.exe:
- -

A regression test application that gives the matching/searching -algorithms a full workout. The presence of this program is your -guarantee that the library will behave as claimed - at least as -far as those items tested are concerned - if anyone spots -anything that isn't being tested I'd be glad to hear about it.

- -

Files: parse.cpp, regress.cpp, tests.cpp.

- -
jgrep.exe
- -

A simple grep implementation, run with no command line options -to find out its usage. Look at fileiter.cpp/fileiter.hpp -and the mapfile class to see an example of a "smart" -bidirectional iterator that can be used with regex++ or any other -STL algorithm.

- -

Files: jgrep.cpp, main.cpp.

- -
timer.exe
- -

A simple interactive expression matching application, the -results of all matches are timed, allowing the programmer to -optimize their regular expressions where performance is critical. -

- -

Files: regex_timer.cpp. -

- -

The snippets examples contain the code examples used in the -documentation:

- -

regex_match_example.cpp: -ftp based regex_match example.

- -

regex_search_example.cpp: -regex_search example: searches a cpp file for class definitions.

- -

regex_grep_example_1.cpp: -regex_grep example 1: searches a cpp file for class definitions.

- -

regex_merge_example.cpp: -regex_merge example: converts a C++ file to syntax highlighted -HTML.

- -

regex_grep_example_2.cpp: -regex_grep example 2: searches a cpp file for class definitions, -using a global callback function.

- -

regex_grep_example_3.cpp: -regex_grep example 2: searches a cpp file for class definitions, -using a bound member function callback.

- -

regex_grep_example_4.cpp: -regex_grep example 2: searches a cpp file for class definitions, -using a C++ Builder closure as a callback.

- -

regex_split_example_1.cpp: -regex_split example: split a string into tokens.

- -

regex_split_example_2.cpp: -regex_split example: spit out linked URL's.

- -
- -

Appendix 5: Header Files

- -

There are two main headers used by this library: <boost/regex.hpp> -provides full access to the entire library, while <boost/cregex.hpp> -provides access to just the high level class RegEx, and the POSIX -API functions.

- -
- -

Appendix 6: Redistributables

- -

 If you are using Microsoft or Borland C++ and link to a -dll version of the run time library, then you will also link to -one of the dll versions of regex++. While these dll's are -redistributable, there are no "standard" versions, so -when installing on the users PC, you should place these in a -directory private to your application, and not in the PC's -directory path. Note that if you link to a static version of your -run time library, then you will also link to a static version of -regex++ and no dll's will need to be distributed. The possible -regex++ dll and library names are computed according to the -following formula:
-

- -

"boost_regex_"
-+ BOOST_LIB_TOOLSET
-+ "_"
-+ BOOST_LIB_THREAD_OPT
-+ BOOST_LIB_RT_OPT
-+ BOOST_LIB_LINK_OPT
-+ BOOST_LIB_DEBUG_OPT
-
-These are defined as:
-
-BOOST_LIB_TOOLSET: The compiler toolset name (vc6, vc7, bcb5 etc).
-
-BOOST_LIB_THREAD_OPT: "s" for single thread builds,
-"m" for multithread builds.
-
-BOOST_LIB_RT_OPT: "s" for static runtime,
-"d" for dynamic runtime.
-
-BOOST_LIB_LINK_OPT: "s" for static link,
-"i" for dynamic link.
-
-BOOST_LIB_DEBUG_OPT: nothing for release builds,
-"d" for debug builds,
-"dd" for debug-diagnostic builds (_STLP_DEBUG).

- -

Note: you can disable automatic library selection by defining -the symbol BOOST_REGEX_NO_LIB when compiling, this is useful if -you want to statically link even though you're using the dll -version of your run time library, or if you need to debug regex++. -

- -
- -

Notes for upgraders

- -

This version of regex++ is the first to be ported to the boost project, and as a result -has a number of changes to comply with the boost coding -guidelines.

- -

Headers have been changed from <header> or <header.h> -to <boost/header.hpp>

- -

The library namespace has changed from "jm", to -"boost".

- -

The reg_xxx algorithms have been renamed regex_xxx (to improve -naming consistency).

- -

Algorithm query_match has been renamed regex_match, and only -returns true if the expression matches the whole of the input -string (think input data validation).

- -

Compiling existing code:

- -

The directory, libs/regex/old_include contains a set of -headers that make this version of regex++ compatible with -previous ones, either add this directory to your include path, or -copy these headers to the root directory of your boost -installation. The contents of these headers are deprecated and -undocumented - really these are just here for existing code - for -new projects use the new header forms.

- -
- -

Further Information (Contacts and -Acknowledgements)

- -

The author can be contacted at John_Maddock@compuserve.com, -the home page for this library is at http://ourworld.compuserve.com/homepages/John_Maddock/regexpp.htm, -and the official boost version can be obtained from www.boost.org/libraries.htm.

- -

I am indebted to Robert Sedgewick's "Algorithms in C++" -for forcing me to think about algorithms and their performance, -and to the folks at boost for forcing me to think, period. -The following people have all contributed useful comments or -fixes: Dave Abrahams, Mike Allison, Edan Ayal, Jayashree -Balasubramanian, Jan Bölsche, Beman Dawes, Paul Baxter, David -Bergman, David Dennerline, Edward Diener, Peter Dimov, Robert -Dunn, Fabio Forno, Tobias Gabrielsson, Rob Gillen, Marc Gregoire, -Chris Hecker, Nick Hodapp, Jesse Jones, Martin Jost, Boris -Krasnovskiy, Jan Hermelink, Max Leung, Wei-hao Lin, Jens Maurer, -Richard Peters, Heiko Schmidt, Jason Shirk, Gerald Slacik, Scobie -Smith, Mike Smyth, Alexander Sokolovsky, Hervé Poirier, Michael -Raykh, Marc Recht, Scott VanCamp, Bruno Voigt, Alexey Voinov, -Jerry Waldorf, Rob Ward, Lealon Watts, Thomas Witt and Yuval -Yosef. I am also grateful to the manuals supplied with the Henry -Spencer, Perl and GNU regular expression libraries - wherever -possible I have tried to maintain compatibility with these -libraries and with the POSIX standard - the code however is -entirely my own, including any bugs! I can absolutely guarantee -that I will not fix any bugs I don't know about, so if you have -any comments or spot any bugs, please get in touch.

- -

Useful further information can be found at:

- -

A short tutorial on regular expressions can -be found here.

- -

The Open -Unix Specification contains a wealth of useful material, -including the regular expression syntax, and specifications for <regex.h> -and <nl_types.h>. -

- -

The Pattern -Matching Pointers site is a "must visit" resource -for anyone interested in pattern matching.

- -

Glimpse and Agrep, -use a simplified regular expression syntax to achieve faster -search times.

- -

Udi Manber -and Ricardo Baeza-Yates -both have a selection of useful pattern matching papers available -from their respective web sites.

- -
- -

Copyright Dr -John Maddock 1998-2000 all rights reserved.

- - diff --git a/doc/Attic/bad_expression.html b/doc/Attic/bad_expression.html new file mode 100644 index 00000000..cf840f34 --- /dev/null +++ b/doc/Attic/bad_expression.html @@ -0,0 +1,77 @@ + + + + Boost.Regex: bad_expression + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

class bad_expression

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+

#include <boost/pat_except.hpp> +

+

The class bad_expression defines the type of objects thrown as + exceptions to report errors during the conversion from a string representing a + regular expression to a finite state machine.  

+
namespace boost{
+
+class bad_pattern : public std::runtime_error
+{
+public:
+   explicit bad_pattern(const std::string& s) : std::runtime_error(s){};
+};
+
+class bad_expression : public bad_pattern
+{
+public:
+   bad_expression(const std::string& s) : bad_pattern(s) {}
+};
+
+
+} // namespace boost
+

Description

+
bad_expression(const string& what_arg); 
+

Effects: Constructs an object of class bad_expression.

+ +

+ Postcondition: strcmp(what(), what_arg.c_str()) == 0. +

Footnotes: the class bad_pattern forms the base class for all + pattern-matching exceptions, of which bad_expression is one. The choice + of std::runtime_error as the base class for bad_pattern + is moot, depending upon how the library is used exceptions may be either logic + errors (programmer supplied expressions) or run time errors (user supplied + expressions). +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/basic_regex.html b/doc/Attic/basic_regex.html new file mode 100644 index 00000000..0a7a2775 --- /dev/null +++ b/doc/Attic/basic_regex.html @@ -0,0 +1,944 @@ + + + + Boost.Regex: basic_regex + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

basic_regex

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+
#include <boost/regex.hpp> 
+

The template class basic_regex encapsulates regular expression parsing + and compilation. The class takes three template parameters: +

+

charT: determines the character type, i.e. either char or + wchar_t. +

+

traits: determines the behaviour of the character type, for + example which character class names are recognized. A default traits class is + provided: regex_traits<charT>. +

+

Allocator: the allocator class used to allocate memory by the + class. +

+

For ease of use there are two typedefs that define the two standard basic_regex + instances, unless you want to use custom traits classes or allocators, you + won't need to use anything other than these: +

+
namespace boost{
+template <class charT, class traits = regex_traits<charT>, class Allocator = std::allocator<charT>  >
+class reg_expression;
+typedef reg_expression<char> regex;
+typedef reg_expression<wchar_t> wregex;
+}
+

The definition of reg_expression follows: it is based very closely on + class basic_string, and fulfils the requirements for a constant-container of charT. +

+
namespace boost{
+
+template <class charT,
+          class traits = regex_traits<charT>,
+          class Allocator = allocator<charT> >
+class basic_regex
+{
+public:
+   // types:
+   typedef          charT                                value_type;
+   typedef          implementation defined               const_iterator;
+   typedef          const_iterator                       iterator;
+   typedef typename Allocator::reference                 reference;
+   typedef typename Allocator::const_reference           const_reference;
+   typedef typename Allocator::difference_type           difference_type;
+   typedef typename Allocator::size_type                 size_type;
+   typedef          Allocator                            allocator_type;
+   typedef          regex_constants::syntax_option_type  flag_type;
+   typedef typename traits::locale_type                  locale_type;
+
+   // constants:
+   static const regex_constants::syntax_option_type normal = regex_constants::normal;
+   static const regex_constants::syntax_option_type icase = regex_constants::icase;
+   static const regex_constants::syntax_option_type nosubs = regex_constants::nosubs;
+   static const regex_constants::syntax_option_type optimize = regex_constants::optimize;
+   static const regex_constants::syntax_option_type collate = regex_constants::collate;
+   static const regex_constants::syntax_option_type ECMAScript = normal;
+   static const regex_constants::syntax_option_type JavaScript = normal;
+   static const regex_constants::syntax_option_type JScript = normal;
+   // these flags are optional, if the functionality is supported
+   // then the flags shall take these names.
+   static const regex_constants::syntax_option_type basic = regex_constants::basic;
+   static const regex_constants::syntax_option_type extended = regex_constants::extended;
+   static const regex_constants::syntax_option_type awk = regex_constants::awk;
+   static const regex_constants::syntax_option_type grep = regex_constants::grep;
+   static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
+   static const regex_constants::syntax_option_type sed = basic = regex_constants::sed;
+   static const regex_constants::syntax_option_type perl = regex_constants::perl;
+
+   // construct/copy/destroy:
+   explicit basic_regex(const Allocator& a = Allocator());
+   explicit basic_regex(const charT* p, flag_type f = regex_constants::normal,
+                        const Allocator& a = Allocator());
+   basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal,
+               const Allocator& a = Allocator());
+   basic_regex(const charT* p, size_type len, flag_type f,
+               const Allocator& a = Allocator());
+   basic_regex(const basic_regex&);
+   template <class ST, class SA>
+   explicit basic_regex(const basic_string<charT, ST, SA>& p,
+                        flag_type f = regex_constants::normal,
+                        const Allocator& a = Allocator());
+   template <class InputIterator>
+   basic_regex(InputIterator first, inputIterator last,
+               flag_type f = regex_constants::normal,
+               const Allocator& a = Allocator());
+
+   ~basic_regex();
+   basic_regex& operator=(const basic_regex&);
+   basic_regex& operator=(const charT* ptr);
+   template <class ST, class SA>
+   basic_regex& operator=(const basic_string<charT, ST, SA>& p);
+
+   // iterators:
+   const_iterator begin() const;
+   const_iterator end() const;
+   // capacity:
+   size_type size() const;
+   size_type max_size() const;
+   bool empty() const;
+   unsigned mark_count() const;
+
+   //
+   // modifiers:
+   basic_regex& assign(const basic_regex& that);
+   basic_regex& assign(const charT* ptr, flag_type f = regex_constants::normal);
+   basic_regex& assign(const charT* first, const charT* last,
+                       flag_type f = regex_constants::normal);
+   template <class string_traits, class A>
+   basic_regex& assign(const basic_string<charT, string_traits, A>& s,
+                       flag_type f = regex_constants::normal);
+   template <class InputIterator>
+   basic_regex& assign(InputIterator first, InputIterator last,
+                       flag_type f = regex_constants::normal);
+
+   // const operations:
+   Allocator get_allocator() const;
+   flag_type getflags() const;
+   basic_string<charT> str() const;
+   int compare(basic_regex&) const;
+   // locale:
+   locale_type imbue(locale_type loc);
+   locale_type getloc() const;
+   // swap
+   void swap(basic_regex&) throw();
+};
+
+template <class charT, class traits, class Allocator>
+bool operator == (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator != (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator < (const basic_regex<charT, traits, Allocator>& lhs,
+                 const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator <= (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator >= (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator > (const basic_regex<charT, traits, Allocator>& lhs,
+                 const basic_regex<charT, traits, Allocator>& rhs);
+
+template <class charT, class io_traits, class re_traits, class Allocator>
+basic_ostream<charT, io_traits>&
+   operator << (basic_ostream<charT, io_traits>& os,
+                const basic_regex<charT, re_traits, Allocator>& e);
+
+template <class charT, class traits, class Allocator>
+void swap(basic_regex<charT, traits, Allocator>& e1,
+          basic_regex<charT, traits, Allocator>& e2);
+
+typedef basic_regex<char> regex;
+typedef basic_regex<wchar_t> wregex;
+
+} // namespace boost
+

Description

+

Class basic_regex has the following public member functions: +

+

basic_regex constants

+
static const regex_constants::syntax_option_type normal = regex_constants::normal;
+static const regex_constants::syntax_option_type icase = regex_constants::icase;
+static const regex_constants::syntax_option_type nosubs = regex_constants::nosubs;
+static const regex_constants::syntax_option_type optimize = regex_constants::optimize;
+static const regex_constants::syntax_option_type collate = regex_constants::collate;
+static const regex_constants::syntax_option_type ECMAScript = normal;
+static const regex_constants::syntax_option_type JavaScript = normal;
+static const regex_constants::syntax_option_type JScript = normal;
+static const regex_constants::syntax_option_type basic = regex_constants::basic;
+static const regex_constants::syntax_option_type extended = regex_constants::extended;
+static const regex_constants::syntax_option_type awk = regex_constants::awk;
+static const regex_constants::syntax_option_type grep = regex_constants::grep;
+static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
+static const regex_constants::syntax_option_type sed = basic = regex_constants::sed;
+static const regex_constants::syntax_option_type perl = regex_constants::perl;
+

The static constant members are provided as synonyms for the constants declared + in namespace boost::regex_constants; for each constant of type syntax_option_type + declared in namespace boost::regex_constants then a constant with + the same name, type and value is declared within the scope of basic_regex.

+

basic_regex constructors

+

In all basic_regex constructors, a copy of the Allocator + argument is used for any memory allocation performed by the constructor or + member functions during the lifetime of the object. +

+
basic_regex(const Allocator& a = Allocator());
+ +

+ Effects: Constructs an object of class basic_regex. The + postconditions of this function are indicated in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

true

+
+

size()

+
+

0

+
+

str()

+
+

basic_string<charT>()

+

+
+

+
basic_regex(const charT* p, flag_type f = regex_constants::normal, const Allocator& a = Allocator());
+ +

+ Requires: p shall not be a null pointer.

+

+ Throws: bad_expression if p is not a valid regular + expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the null-terminated string p, and interpreted + according to the option flags specified + in f. The postconditions of this function are indicated in the + table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

char_traits<charT>::length(p)

+
+

str()

+
+

basic_string<charT>(p)

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal, const Allocator& a = Allocator());
+ +

+ Requires: p1 and p2 are not null pointers, p1 < p2.

+

+ Throws: bad_expression if [p1,p2) is not a valid regular + expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the sequence of characters [p1,p2), and interpreted + according the option flags specified in f. + The postconditions of this function are indicated in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

std::distance(p1,p2)

+
+

str()

+
+

basic_string<charT>(p1,p2)

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
basic_regex(const charT* p, size_type len, flag_type f, const Allocator& a = Allocator());
+ +

+ Requires: p shall not be a null pointer, len < max_size().

+

+ Throws: bad_expression if p is not a valid regular + expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the sequence of characters [p, p+len), and interpreted + according the option flags specified in f. + The postconditions of this function are indicated in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

len

+
+

str()

+
+

basic_string<charT>(p, len)

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
basic_regex(const basic_regex& e);
+ +

+ Effects: Constructs an object of class basic_regex as a + copy of the object e. The postconditions of this function are indicated + in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

e.empty()

+
+

size()

+
+

e.size()

+
+

str()

+
+

e.str()

+
+

getflags()

+
+

e.getflags()

+
+

mark_count()

+
+

e.mark_count()

+

+
+

+
template <class ST, class SA>
+basic_regex(const basic_string<charT, ST, SA>& s,
+            flag_type f = regex_constants::normal, const Allocator& a = Allocator());
+ +

+ Throws: bad_expression if s is not a valid regular + expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the string s, and interpreted according to the + option flags specified in f. The postconditions of this function + are indicated in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

s.size()

+
+

str()

+
+

s

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
template <class ForwardIterator>
+basic_regex(ForwardIterator first, ForwardIterator last,
+            flag_type f = regex_constants::normal, const Allocator& a = Allocator());
+ +

+ Throws: bad_expression if the sequence [first, last) + is not a valid regular expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the sequence of characters [first, last), and + interpreted according to the option flags + specified in f. The postconditions of this function are indicated in the + table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

distance(first,last)

+
+

str()

+
+

basic_string<charT>(first,last)

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
basic_regex& operator=(const basic_regex& e);
+ +

+ Effects: Returns the result of assign(e.str(), e.getflags()).

basic_regex& operator=(const charT* ptr);
+ +

+ Requires: p shall not be a null pointer.

+

+ Effects: Returns the result of assign(ptr).

template <class ST, class SA>
+basic_regex& operator=(const basic_string<charT, ST, SA>& p);
+ +

+ Effects: Returns the result of assign(p).

+

basic_regex iterators

+
const_iterator begin() const;
+ +

+ Effects: Returns a starting iterator to a sequence of characters + representing the regular expression.

const_iterator end() const;
+ +

+ Effects: Returns termination iterator to a sequence of characters + representing the regular expression.

+

basic_regex capacity

+
size_type size() const;
+ +

+ Effects: Returns the length of the sequence of characters representing + the regular expression.

size_type max_size() const;
+ +

+ Effects: Returns the maximum length of the sequence of characters + representing the regular expression.

bool empty() const;
+ +

+ Effects: Returns true if the object does not contain a valid + regular expression, otherwise false.

unsigned mark_count() const;
+ +

+ Effects: Returns the number of marked sub-expressions within the regular + expresion.

+

basic_regex assign

+
basic_regex& assign(const basic_regex& that);
+ +

+ Effects: Returns assign(that.str(), that.getflags()).

basic_regex& assign(const charT* ptr, flag_type f = regex_constants::normal);
+ +

+ Effects: Returns assign(string_type(ptr), f).

basic_regex& assign(const charT* first, const charT* last,
+                    flag_type f = regex_constants::normal);
+ +

+ Effects: Returns assign(string_type(first, last), f).

template <class string_traits, class A>
+basic_regex& assign(const basic_string<charT, string_traits, A>& s,
+                    flag_type f = regex_constants::normal);
+ +

+ Throws: bad_expression if s is not a valid regular + expression.

+

+ Returns: *this.

+

+ Effects: Assigns the regular expression contained in the string s, + interpreted according the option flags specified + in f. The postconditions of this function are indicated in the + table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

s.size()

+
+

str()

+
+

s

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
template <class InputIterator>
+basic_regex& assign(InputIterator first, InputIterator last,
+                    flag_type f = regex_constants::normal);
+ +

+ Requires: The type InputIterator corresponds to the Input Iterator + requirements (24.1.1).

+

+ Effects: Returns assign(string_type(first, last), f).

+

basic_regex constant operations

+
Allocator get_allocator() const;
+ +

+ Effects: Returns a copy of the Allocator that was passed to the object's + constructor.

flag_type getflags() const;
+ +

+ Effects: Returns a copy of the regular expression syntax flags that were + passed to the object's constructor, or the last call to assign.

basic_string<charT> str() const;
+ +

+ Effects: Returns a copy of the character sequence passed to the object's + constructor, or the last call to assign.

int compare(basic_regex& e)const;
+ +

+ Effects: If getflags() == e.getflags() then returns str().compare(e.str()), + otherwise returns getflags() - e.getflags().

+

basic_regex locale

+
locale_type imbue(locale_type l);
+ +

+ Effects: Returns the result of traits_inst.imbue(l) where + traits_inst is a (default initialized) instance of the template + parameter traits stored within the object. Calls to imbue + invalidate any currently contained regular expression.

+

+ Postcondition: empty() == true.

locale_type getloc() const;
+ +

+ Effects: Returns the result of traits_inst.getloc() where + traits_inst is a (default initialized) instance of the template + parameter traits stored within the object.

+

basic_regex swap

+
void swap(basic_regex& e) throw();
+ +

+ Effects: Swaps the contents of the two regular expressions.

+

+ Postcondition: *this contains the characters that were in e, + e contains the regular expression that was in *this.

+

+ Complexity: constant time.

+

basic_regex non-member functions

+
basic_regex non-member comparison operators 
+
template <class charT, class traits, class Allocator>
+bool operator == (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) == 0.

template <class charT, class traits, class Allocator>
+bool operator != (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) != 0.

template <class charT, class traits, class Allocator>
+bool operator < (const basic_regex<charT, traits, Allocator>& lhs,
+                 const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) < 0.

template <class charT, class traits, class Allocator>
+bool operator <= (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) <= 0.

template <class charT, class traits, class Allocator>
+bool operator >= (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) >= 0.

template <class charT, class traits, class Allocator>
+bool operator > (const basic_regex<charT, traits, Allocator>& lhs,
+                 const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) > 0.

+
basic_regex inserter.
+
template <class charT, class io_traits, class re_traits, class Allocator>
+basic_ostream<charT, io_traits>&
+   operator << (basic_ostream<charT, io_traits>& os
+                const basic_regex<charT, re_traits, Allocator>& e);
+ +

+ Effects: Returns (os << e.str()).

+
basic_regex non-member swap
+
template <class charT, class traits, class Allocator>
+void swap(basic_regex<charT, traits, Allocator>& lhs,
+          basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: calls lhs.swap(rhs).

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/contacts.html b/doc/Attic/contacts.html new file mode 100644 index 00000000..1f256990 --- /dev/null +++ b/doc/Attic/contacts.html @@ -0,0 +1,86 @@ + + + + Boost.Regex: Contacts + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Contacts and Acknowledgements

+
+

Boost.Regex Index

+
+

+
+

+

The author can be contacted at John_Maddock@compuserve.com, + the home page for this library is at + http://ourworld.compuserve.com/homepages/John_Maddock/regexpp.htm, and + the official boost version can be obtained from www.boost.org/libraries.htm. +

+

I am indebted to Robert Sedgewick's "Algorithms in C++" for forcing me to think + about algorithms and their performance, and to the folks at boost for forcing + me to think, period. The following people have all contributed useful + comments or fixes: Dave Abrahams, Mike Allison, Edan Ayal, Jayashree + Balasubramanian, Jan Bölsche, Beman Dawes, Paul Baxter, David Bergman, David + Dennerline, Edward Diener, Peter Dimov, Robert Dunn, Fabio Forno, Tobias + Gabrielsson, Rob Gillen, Marc Gregoire, Chris Hecker, Nick Hodapp, Jesse Jones, + Martin Jost, Boris Krasnovskiy, Jan Hermelink, Max Leung, Wei-hao Lin, Jens + Maurer, Richard Peters, Heiko Schmidt, Jason Shirk, Gerald Slacik, Scobie + Smith, Mike Smyth, Alexander Sokolovsky, Hervé Poirier, Michael Raykh, Marc + Recht, Scott VanCamp, Bruno Voigt, Alexey Voinov, Jerry Waldorf, Rob Ward, + Lealon Watts, Thomas Witt and Yuval Yosef. I am also grateful to the manuals + supplied with the Henry Spencer, Perl and GNU regular expression libraries - + wherever possible I have tried to maintain compatibility with these libraries + and with the POSIX standard - the code however is entirely my own, including + any bugs! I can absolutely guarantee that I will not fix any bugs I don't know + about, so if you have any comments or spot any bugs, please get in touch. +

+

Useful further information can be found at: +

+

A short tutorial on regular expressions + can be found here.

+

The Open Unix + Specification contains a wealth of useful material, including the + regular expression syntax, and specifications for + <regex.h> and + <nl_types.h>. +

+

The Pattern Matching Pointers + site is a "must visit" resource for anyone interested in pattern matching. +

+

Glimpse and Agrep, use a + simplified regular expression syntax to achieve faster search times. +

+

Udi Manber and + Ricardo Baeza-Yates + both have a selection of useful pattern matching papers available from their + respective web sites. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/examples.html b/doc/Attic/examples.html new file mode 100644 index 00000000..b8d61d2d --- /dev/null +++ b/doc/Attic/examples.html @@ -0,0 +1,107 @@ + + + + Boost.Regex: Examples + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Examples

+
+

Boost.Regex Index

+
+

+
+

+

There are three demo applications that ship with this library, they all come + with makefiles for Borland, Microsoft and gcc compilers, otherwise you will + have to create your own makefiles. +

+
regress.exe: +
+

A regression test application that gives the matching/searching algorithms a + full workout. The presence of this program is your guarantee that the library + will behave as claimed - at least as far as those items tested are concerned - + if anyone spots anything that isn't being tested I'd be glad to hear about it. +

+

Files: parse.cpp, + regress.cpp, tests.cpp. +

+
jgrep.exe +
+

A simple grep implementation, run with no command line options to find out its + usage. Look at fileiter.cpp/fileiter.hpp and + the mapfile class to see an example of a "smart" bidirectional iterator that + can be used with boost.regex or any other STL algorithm. +

+

Files: jgrep.cpp, + main.cpp. +

+
timer.exe +
+

A simple interactive expression matching application, the results of all + matches are timed, allowing the programmer to optimize their regular + expressions where performance is critical. +

+

Files: regex_timer.cpp. +

+
Code snippets
+

The snippets examples contain the code examples used in the documentation:

+

credit_card_example.cpp: + Credit card number formatting code.

+

partial_regex_grep.cpp: + Search example using partial matches.

+

partial_regex_match.cpp: + regex_match example using partial matches.

+

regex_grep_example_1.cpp: + regex_grep example 1: searches a cpp file for class definitions.

+

regex_grep_example_2.cpp: + regex_grep example 2: searches a cpp file for class definitions, using a global + callback function. +

+

regex_grep_example_3.cpp: + regex_grep example 2: searches a cpp file for class definitions, using a bound + member function callback.

+

regex_grep_example_4.cpp: + regex_grep example 2: searches a cpp file for class definitions, using a C++ + Builder closure as a callback.

+

regex_match_example.cpp: + ftp based regex_match example.

+

regex_merge_example.cpp: + regex_merge example: converts a C++ file to syntax highlighted HTML.

+

regex_replace_example.cpp: + regex_replace example: converts a C++ file to syntax highlighted HTML

+

regex_search_example.cpp: + regex_search example: searches a cpp file for class definitions.

+

regex_split_example_1.cpp: + regex_split example: split a string into tokens.

+

regex_split_example_2.cpp + : regex_split example: spit out linked URL's. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/faq.html b/doc/Attic/faq.html new file mode 100644 index 00000000..675a2405 --- /dev/null +++ b/doc/Attic/faq.html @@ -0,0 +1,118 @@ + + + + Boost.Regex: FAQ + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

FAQ

+
+

Boost.Regex Index

+
+

+
+

+ +

 Q. Why can't I use the "convenience" versions of + regex_match / regex_search / regex_grep / regex_format / regex_merge? +

+
+

A. These versions may or may not be available depending upon the capabilities + of your compiler, the rules determining the format of these functions are quite + complex - and only the versions visible to a standard compliant compiler are + given in the help. To find out what your compiler supports, run + <boost/regex.hpp> through your C++ pre-processor, and search the output + file for the function that you are interested in. +

+

Q. I can't get regex++ to work with escape characters, what's going on? +

+
+

A. If you embed regular expressions in C++ code, then remember that escape + characters are processed twice: once by the C++ compiler, and once by the + regex++ expression compiler, so to pass the regular expression \d+ to regex++, + you need to embed "\\d+" in your code. Likewise to match a literal backslash + you will need to embed "\\\\" in your code. +

+

Q. Why does using parenthesis in a POSIX regular expression change the result + of a match?

+

For POSIX (extended and basic) regular expressions, but not for perl regexes, + parentheses don't only mark; they determine what the best match is as well. + When the expression is compiled as a POSIX basic or extended regex then + Boost.regex follows the POSIX standard leftmost longest rule for determining + what matched. So if there is more than one possible match after considering the + whole expression, it looks next at the first sub-expression and then the second + sub-expression and so on. So...

+
"(0*)([0-9]*)" against "00123" would produce
+$1 = "00"
+$2 = "123"
+

where as

+
"0*([0-9)*" against "00123" would produce
+$1 = "00123"
+

If you think about it, had $1 only matched the "123", this would be "less good" + than the match "00123" which is both further to the left and longer. If you + want $1 to match only the "123" part, then you need to use something like:

+
"0*([1-9][0-9]*)"
+

as the expression.

+

Q. Why don't character ranges work properly (POSIX mode + only)? +
+ A. The POSIX standard specifies that character range expressions are locale + sensitive - so for example the expression [A-Z] will match any collating + element that collates between 'A' and 'Z'. That means that for most locales + other than "C" or "POSIX", [A-Z] would match the single character 't' for + example, which is not what most people expect - or at least not what most + people have come to expect from regular expression engines. For this reason, + the default behaviour of boost.regex (perl mode) is to turn locale sensitive + collation off by not setting the regex_constants::collate compile time flag. + However if you set a non-default compile time flag - for example + regex_constants::extended or regex_constants::basic, then locale dependent + collation will be enabled, this also applies to the POSIX API functions which + use either regex_constants::extended or regex_constants::basic internally. [Note + - when regex_constants::nocollate in effect, the library behaves "as if" the + LC_COLLATE locale category were always "C", regardless of what its actually set + to - end note]. +

+

Q. Why are there no throw specifications on any of the + functions? What exceptions can the library throw? +

+

+ A. Not all compilers support (or honor) throw specifications, others support + them but with reduced efficiency. Throw specifications may be added at a later + date as compilers begin to handle this better. The library should throw only + three types of exception: boost::bad_expression can be thrown by basic_regex + when compiling a regular expression, std::runtime_error can be thrown when a + call to basic_regex::imbue tries to open a message catalogue that doesn't + exist, or when a call to regex_search or regex_match results in an + "everlasting" search, or when a call to RegEx::GrepFiles or + RegEx::FindFiles tries to open a file that cannot be opened, finally + std::bad_alloc can be thrown by just about any of the functions in this + library. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/format_syntax.html b/doc/Attic/format_syntax.html new file mode 100644 index 00000000..dc71d6d8 --- /dev/null +++ b/doc/Attic/format_syntax.html @@ -0,0 +1,217 @@ + + + + Boost.Regex: Format String Syntax + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Format String Syntax

+
+

Boost.Regex Index

+
+

+
+

+

Format strings are used by the algorithm + regex_merge and by match_results::format, + and are used to transform one string into another. +

+

There are three kind of format string: sed, perl and extended, the extended + syntax is a superset of the others so this is covered first. +

+

Extended format syntax +

+

In format strings, all characters are treated as literals except: ()$\?: +

+

To use any of these as literals you must prefix them with the escape character + \ +

+

The following special sequences are recognized: 
+   +
+ Grouping: +

+

Use the parenthesis characters ( and ) to group sub-expressions within the + format string, use \( and \) to represent literal '(' and ')'. 
+   +
+ Sub-expression expansions: +

+

The following perl like expressions expand to a particular matched + sub-expression: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 $`Expands to all the text from the end of the previous + match to the start of the current match, if there was no previous match in the + current operation, then everything from the start of the input string to the + start of the match. 
 $'Expands to all the text from the end of the match to + the end of the input string. 
 $&Expands to all of the current match. 
 $0Expands to all of the current match. 
 $NExpands to the text that matched sub-expression N. 
+

+

Conditional expressions: +

+

Conditional expressions allow two different format strings to be selected + dependent upon whether a sub-expression participated in the match or not: +

+

?Ntrue_expression:false_expression +

+

Executes true_expression if sub-expression N participated in the match, + otherwise executes false_expression. +

+

Example: suppose we search for "(while)|(for)" then the format string + "?1WHILE:FOR" would output what matched, but in upper case. 
+   +
+ Escape sequences: +

+

The following escape sequences are also allowed: +
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 \aThe bell character. 
 \fThe form feed character. 
 \nThe newline character. 
 \rThe carriage return character. 
 \tThe tab character. 
 \vA vertical tab character. 
 \xA hexadecimal character - for example \x0D. 
 \x{}A possible unicode hexadecimal character - for + example \x{1A0} 
 \cxThe ASCII escape character x, for example \c@ is + equivalent to escape-@. 
 \eThe ASCII escape character. 
 \ddAn octal character constant, for example \10. 
+

+

Perl format strings +

+

Perl format strings are the same as the default syntax except that the + characters ()?: have no special meaning. +

+

Sed format strings +

+

Sed format strings use only the characters \ and & as special characters. +

+

\n where n is a digit, is expanded to the nth sub-expression. +

+

& is expanded to the whole of the match (equivalent to \0). +

+

+ Other escape sequences are expanded as per the default syntax. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/headers.html b/doc/Attic/headers.html new file mode 100644 index 00000000..f6651d3d --- /dev/null +++ b/doc/Attic/headers.html @@ -0,0 +1,51 @@ + + + + Boost.Regex: Headers + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Headers

+
+

Boost.Regex Index

+
+

+
+

+

There are two main headers used by this library: <boost/regex.hpp> + provides full access to the entire library, while <boost/cregex.hpp> + provides access to just the high level class RegEx, and the POSIX API + functions. +

+

There is also a header containing only forward declarations + <boost/regex_fwd.hpp> for use when an interface is dependent upon + boost::basic_regex, but otherwise does not need the full definitions.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/history.html b/doc/Attic/history.html new file mode 100644 index 00000000..126519f2 --- /dev/null +++ b/doc/Attic/history.html @@ -0,0 +1,44 @@ + + + + Boost.Regex: History + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

History

+
+

Boost.Regex Index

+
+

+
+

+

Todo.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/implementation.html b/doc/Attic/implementation.html new file mode 100644 index 00000000..044fff31 --- /dev/null +++ b/doc/Attic/implementation.html @@ -0,0 +1,44 @@ + + + + Boost.Regex: Implementation + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Implementation

+
+

Boost.Regex Index

+
+

+
+

+

Todo.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/install.html b/doc/Attic/install.html new file mode 100644 index 00000000..b816b2c8 --- /dev/null +++ b/doc/Attic/install.html @@ -0,0 +1,236 @@ + + + + Boost.Regex: Index + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Installation

+
+

Boost.Regex Index

+
+

+
+

+

[ Important: If you are upgrading from the + 2.x version of this library then you will find a number of changes to the + documented header names and library interfaces, existing code should still + compile unchanged however - see + Note for Upgraders. ]

+

When you extract the library from its zip file, you must preserve its internal + directory structure (for example by using the -d option when extracting). If + you didn't do that when extracting, then you'd better stop reading this, delete + the files you just extracted, and try again! +

+

This library should not need configuring before use; most popular + compilers/standard libraries/platforms are already supported "as is". If you do + experience configuration problems, or just want to test the configuration with + your compiler, then the process is the same as for all of boost; see the + configuration library documentation.

+

The library will encase all code inside namespace boost. +

+

Unlike some other template libraries, this library consists of a mixture of + template code (in the headers) and static code and data (in cpp files). + Consequently it is necessary to build the library's support code into a library + or archive file before you can use it, instructions for specific platforms are + as follows: +

+

Borland C++ Builder: +

+ +
make -fbcb5.mak
+

The build process will build a variety of .lib and .dll files (the exact number + depends upon the version of Borland's tools you are using) the .lib and dll + files will be in a sub-directory called bcb4 or bcb5 depending upon the + makefile used. To install the libraries into your development system use:

+

make -fbcb5.mak install

+

library files will be copied to <BCROOT>/lib and the dll's to + <BCROOT>/bin, where <BCROOT> corresponds to the install path of + your Borland C++ tools. +

+

You may also remove temporary files created during the build process (excluding + lib and dll files) by using:

+

make -fbcb5.mak clean

+

Finally when you use regex++ it is only necessary for you to add the + <boost> root director to your list of include directories for that + project. It is not necessary for you to manually add a .lib file to the + project; the headers will automatically select the correct .lib file for your + build mode and tell the linker to include it. There is one caveat however: the + library can not tell the difference between VCL and non-VCL enabled builds when + building a GUI application from the command line, if you build from the command + line with the 5.5 command line tools then you must define the pre-processor + symbol _NO_VCL in order to ensure that the correct link libraries are selected: + the C++ Builder IDE normally sets this automatically. Hint, users of the 5.5 + command line tools may want to add a -D_NO_VCL to bcc32.cfg in order to set + this option permanently. +

+

If you would prefer to do a static link to the regex libraries even when using + the dll runtime then define BOOST_REGEX_STATIC_LINK, and if you want to + suppress automatic linking altogether (and supply your own custom build of the + lib) then define BOOST_REGEX_NO_LIB.

+

If you are building with C++ Builder 6, you will find that + <boost/regex.hpp> can not be used in a pre-compiled header (the actual + problem is in <locale> which gets included by <boost/regex.hpp>), + if this causes problems for you, then try defining BOOST_NO_STD_LOCALE when + building, this will disable some features throughout boost, but may save you a + lot in compile times!

+

Microsoft Visual C++ 6 and 7

+

You need version 6 of MSVC to build this library. If you are using VC5 then you + may want to look at one of the previous releases of this + library +

+

Open up a command prompt, which has the necessary MSVC environment variables + defined (for example by using the batch file Vcvars32.bat installed by the + Visual Studio installation), and change to the <boost>\libs\regex\build + directory. +

+

Select the correct makefile - vc6.mak for "vanilla" Visual C++ 6 or + vc6-stlport.mak if you are using STLPort.

+

Invoke the makefile like this:

+

nmake -fvc6.mak

+

You will now have a collection of lib and dll files in a "vc6" subdirectory, to + install these into your development system use:

+

nmake -fvc6.mak install

+

The lib files will be copied to your <VC6>\lib directory and the dll + files to <VC6>\bin, where <VC6> is the root of your Visual C++ 6 + installation.

+

You can delete all the temporary files created during the build (excluding lib + and dll files) using:

+

nmake -fvc6.mak clean +

+

Finally when you use regex++ it is only necessary for you to add the + <boost> root directory to your list of include directories for that + project. It is not necessary for you to manually add a .lib file to the + project; the headers will automatically select the correct .lib file for your + build mode and tell the linker to include it. +

+

Note that if you want to statically link to the regex library when using the + dynamic C++ runtime, define BOOST_REGEX_STATIC_LINK when building your project + (this only has an effect for release builds). If you want to add the source + directly to your project then define BOOST_REGEX_NO_LIB to disable automatic + library selection.

+

Important: there have been some reports of + compiler-optimisation bugs affecting this library, (particularly with VC6 + versions prior to service patch 5) the workaround is to build the library using + /Oityb1 rather than /O2. That is to use all optimisation settings except /Oa. + This problem is reported to affect some standard library code as well (in fact + I'm not sure if the problem is with the regex code or the underlying standard + library), so it's probably worthwhile applying this workaround in normal + practice in any case.

+

Note: if you have replaced the C++ standard library that comes with VC6, then + when you build the library you must ensure that the environment variables + "INCLUDE" and "LIB" have been updated to reflect the include and library paths + for the new library - see vcvars32.bat (part of your Visual Studio + installation) for more details. Alternatively if STLPort is in c:/stlport then + you could use:

+

nmake INCLUDES="-Ic:/stlport/stlport" XLFLAGS="/LIBPATH:c:/stlport/lib" + -fvc6-stlport.mak

+

If you are building with the full STLPort v4.x, then use the vc6-stlport.mak + file provided and set the environment variable STLPORT_PATH to point to the + location of your STLport installation (Note that the full STLPort libraries + appear not to support single-thread static builds). +
+   +
+   +

+

GCC(2.95 and 3.x) +

+

There is a conservative makefile for the g++ compiler. From the command prompt + change to the <boost>/libs/regex/build directory and type: +

+

make -fgcc.mak +

+

At the end of the build process you should have a gcc sub-directory containing + release and debug versions of the library (libboost_regex.a and + libboost_regex_debug.a). When you build projects that use regex++, you will + need to add the boost install directory to your list of include paths and add + <boost>/libs/regex/build/gcc/libboost_regex.a to your list of library + files. +

+

There is also a makefile to build the library as a shared library:

+

make -fgcc-shared.mak

+

which will build libboost_regex.so and libboost_regex_debug.so.

+

Both of the these makefiles support the following environment variables:

+

CXXFLAGS: extra compiler options - note that this applies to both the debug and + release builds.

+

INCLUDES: additional include directories.

+

LDFLAGS: additional linker options.

+

LIBS: additional library files.

+

For the more adventurous there is a configure script in + <boost>/libs/config; see the config library + documentation.

+

Sun Workshop 6.1

+

There is a makefile for the sun (6.1) compiler (C++ version 3.12). From the + command prompt change to the <boost>/libs/regex/build directory and type: +

+

dmake -f sunpro.mak +

+

At the end of the build process you should have a sunpro sub-directory + containing single and multithread versions of the library (libboost_regex.a, + libboost_regex.so, libboost_regex_mt.a and libboost_regex_mt.so). When you + build projects that use regex++, you will need to add the boost install + directory to your list of include paths and add + <boost>/libs/regex/build/sunpro/ to your library search path. +

+

Both of the these makefiles support the following environment variables:

+

CXXFLAGS: extra compiler options - note that this applies to both the single + and multithreaded builds.

+

INCLUDES: additional include directories.

+

LDFLAGS: additional linker options.

+

LIBS: additional library files.

+

LIBSUFFIX: a suffix to mangle the library name with (defaults to nothing).

+

This makefile does not set any architecture specific options like -xarch=v9, + you can set these by defining the appropriate macros, for example:

+

dmake CXXFLAGS="-xarch=v9" LDFLAGS="-xarch=v9" LIBSUFFIX="_v9" -f sunpro.mak

+

will build v9 variants of the regex library named libboost_regex_v9.a etc.

+

Other compilers: +

+

There is a generic makefile (generic.mak) + provided in <boost-root>/libs/regex/build - see that makefile for details + of environment variables that need to be set before use. Alternatively you can + using the Jam based build system. If + you need to configure the library for your platform, then refer to the + config library documentation + . +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/introduction.html b/doc/Attic/introduction.html new file mode 100644 index 00000000..ea0d588a --- /dev/null +++ b/doc/Attic/introduction.html @@ -0,0 +1,174 @@ + + + + Boost.Regex: Introduction + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Introduction

+
+

Boost.Regex Index

+
+

+
+

+

Regular expressions are a form of pattern-matching that are often used in text + processing; many users will be familiar with the Unix utilities grep, sed + and awk, and the programming language perl, each of which make + extensive use of regular expressions. Traditionally C++ users have been limited + to the POSIX C API's for manipulating regular expressions, and while regex++ + does provide these API's, they do not represent the best way to use the + library. For example regex++ can cope with wide character strings, or search + and replace operations (in a manner analogous to either sed or perl), something + that traditional C libraries can not do.

+

The class boost::basic_regex is the key class in + this library; it represents a "machine readable" regular expression, and is + very closely modelled on std::basic_string, think of it as a string plus the + actual state-machine required by the regular expression algorithms. Like + std::basic_string there are two typedefs that are almost always the means by + which this class is referenced:

+
namespace boost{
+
+template <class charT, 
+          class traits = regex_traits<charT>, 
+          class Allocator = std::allocator<charT> >
+class basic_regex;
+
+typedef basic_regex<char> regex;
+typedef basic_regex<wchar_t> wregex;
+
+}
+

To see how this library can be used, imagine that we are writing a credit card + processing application. Credit card numbers generally come as a string of + 16-digits, separated into groups of 4-digits, and separated by either a space + or a hyphen. Before storing a credit card number in a database (not necessarily + something your customers will appreciate!), we may want to verify that the + number is in the correct format. To match any digit we could use the regular + expression [0-9], however ranges of characters like this are actually locale + dependent. Instead we should use the POSIX standard form [[:digit:]], or the + regex++ and perl shorthand for this \d (note that many older libraries tended + to be hard-coded to the C-locale, consequently this was not an issue for them). + That leaves us with the following regular expression to validate credit card + number formats:

+

(\d{4}[- ]){3}\d{4}

+

Here the parenthesis act to group (and mark for future reference) + sub-expressions, and the {4} means "repeat exactly 4 times". This is an example + of the extended regular expression syntax used by perl, awk and egrep. Regex++ + also supports the older "basic" syntax used by sed and grep, but this is + generally less useful, unless you already have some basic regular expressions + that you need to reuse.

+

Now lets take that expression and place it in some C++ code to validate the + format of a credit card number:

+
bool validate_card_format(const std::string s)
+{
+   static const boost::regex e("(\\d{4}[- ]){3}\\d{4}");
+   return regex_match(s, e);
+}
+

Note how we had to add some extra escapes to the expression: remember that the + escape is seen once by the C++ compiler, before it gets to be seen by the + regular expression engine, consequently escapes in regular expressions have to + be doubled up when embedding them in C/C++ code. Also note that all the + examples assume that your compiler supports Koenig lookup, if yours doesn't + (for example VC6), then you will have to add some boost:: prefixes to some of + the function calls in the examples.

+

Those of you who are familiar with credit card processing, will have realised + that while the format used above is suitable for human readable card numbers, + it does not represent the format required by online credit card systems; these + require the number as a string of 16 (or possibly 15) digits, without any + intervening spaces. What we need is a means to convert easily between the two + formats, and this is where search and replace comes in. Those who are familiar + with the utilities sed and perl will already be ahead here; we + need two strings - one a regular expression - the other a "format + string" that provides a description of the text to replace the match + with. In regex++ this search and replace operation is performed with the + algorithm regex_replace, for our credit card example we can write two algorithms + like this to provide the format conversions:

+
// match any format with the regular expression:
+const boost::regex e("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
+const std::string machine_format("\\1\\2\\3\\4");
+const std::string human_format("\\1-\\2-\\3-\\4");
+
+std::string machine_readable_card_number(const std::string s)
+{
+   return regex_replace(s, e, machine_format, boost::match_default | boost::format_sed);
+}
+
+std::string human_readable_card_number(const std::string s)
+{
+   return regex_replace(s, e, human_format, boost::match_default | boost::format_sed);
+}
+

Here we've used marked sub-expressions in the regular expression to split out + the four parts of the card number as separate fields, the format string then + uses the sed-like syntax to replace the matched text with the reformatted + version.

+

In the examples above, we haven't directly manipulated the results of a regular + expression match, however in general the result of a match contains a number of + sub-expression matches in addition to the overall match. When the library needs + to report a regular expression match it does so using an instance of the class + match_results, as before there are typedefs of this class for the most + common cases: +

+
namespace boost{
+typedef match_results<const char*> cmatch;
+typedef match_results<const wchar_t*> wcmatch;
+typedef match_results<std::string::const_iterator> smatch;
+typedef match_results<std::wstring::const_iterator> wsmatch; 
+}
+

The algorithms regex_search and + regex_grep (i.e. finding all matches in a string) make use of + match_results to report what matched.

+

Note that these algorithms are not restricted to searching regular C-strings, + any bidirectional iterator type can be searched, allowing for the possibility + of seamlessly searching almost any kind of data. +

+

For search and replace operations in addition to the algorithm + regex_replace that we have already seen, the algorithm + regex_format takes the result of a match and a format string, and + produces a new string by merging the two.

+

For those that dislike templates, there is a high level wrapper class RegEx + that is an encapsulation of the lower level template code - it provides a + simplified interface for those that don't need the full power of the library, + and supports only narrow characters, and the "extended" regular expression + syntax. +

+

The POSIX API functions: regcomp, regexec, regfree + and regerror, are available in both narrow character and Unicode versions, and + are provided for those who need compatibility with these API's. +

+

Finally, note that the library now has run-time localization + support, and recognizes the full POSIX regular expression syntax - including + advanced features like multi-character collating elements and equivalence + classes - as well as providing compatibility with other regular expression + libraries including GNU and BSD4 regex packages, and to a more limited extent + perl 5. +

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998-2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + + diff --git a/doc/Attic/localisation.html b/doc/Attic/localisation.html new file mode 100644 index 00000000..45986f40 --- /dev/null +++ b/doc/Attic/localisation.html @@ -0,0 +1,1126 @@ + + + + Boost.Regex: Localisation + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Localisation

+
+

Boost.Regex Index

+
+

+
+

+

Boost.regex provides extensive support for run-time localization, the + localization model used can be split into two parts: front-end and back-end.

+

+

Front-end localization deals with everything which the user sees - error + messages, and the regular expression syntax itself. For example a French + application could change [[:word:]] to [[:mot:]] and \w to \m. Modifying the + front end locale requires active support from the developer, by providing the + library with a message catalogue to load, containing the localized strings. + Front-end locale is affected by the LC_MESSAGES category only. +

+

Back-end localization deals with everything that occurs after the expression + has been parsed - in other words everything that the user does not see or + interact with directly. It deals with case conversion, collation, and character + class membership. The back-end locale does not require any intervention from + the developer - the library will acquire all the information it requires for + the current locale from the underlying operating system / run time library. + This means that if the program user does not interact with regular expressions + directly - for example if the expressions are embedded in your C++ code - then + no explicit localization is required, as the library will take care of + everything for you. For example embedding the expression [[:word:]]+ in your + code will always match a whole word, if the program is run on a machine with, + for example, a Greek locale, then it will still match a whole word, but in + Greek characters rather than Latin ones. The back-end locale is affected by the + LC_TYPE and LC_COLLATE categories. +

+

There are three separate localization mechanisms supported by boost.regex:

+

Win32 localization model. +

+

This is the default model when the library is compiled under Win32, and is + encapsulated by the traits class w32_regex_traits. When this model is in effect + there is a single global locale as defined by the user's control panel + settings, and returned by GetUserDefaultLCID. All the settings used by + boost.regex are acquired directly from the operating system bypassing the C run + time library. Front-end localization requires a resource dll, containing a + string table with the user-defined strings. The traits class exports the + function: +

+

static std::string set_message_catalogue(const std::string& s); +

+

which needs to be called with a string identifying the name of the resource + dll, before your code compiles any regular expressions (but not + necessarily before you construct any reg_expression instances): +

+

boost::w32_regex_traits<char>::set_message_catalogue("mydll.dll"); +

+

Note that this API sets the dll name for both the narrow and wide + character specializations of w32_regex_traits. +

+

This model does not currently support thread specific locales (via + SetThreadLocale under Windows NT), the library provides full Unicode support + under NT, under Windows 9x the library degrades gracefully - characters 0 to + 255 are supported, the remainder are treated as "unknown" graphic characters. +

+

C localization model. +

+

This is the default model when the library is compiled under an operating + system other than Win32, and is encapsulated by the traits class c_regex_traits, + Win32 users can force this model to take effect by defining the pre-processor + symbol BOOST_REGEX_USE_C_LOCALE. When this model is in effect there is a single + global locale, as set by setlocale. All settings are acquired from your + run time library, consequently Unicode support is dependent upon your run time + library implementation. Front end localization requires a POSIX message + catalogue. The traits class exports the function: +

+

static std::string set_message_catalogue(const std::string& s); +

+

which needs to be called with a string identifying the name of the message + catalogue, before your code compiles any regular expressions (but not + necessarily before you construct any reg_expression instances): +

+

boost::c_regex_traits<char>::set_message_catalogue("mycatalogue"); +

+

Note that this API sets the dll name for both the narrow and wide + character specializations of c_regex_traits. If your run time library does not + support POSIX message catalogues, then you can either provide your own + implementation of <nl_types.h> or define BOOST_RE_NO_CAT to disable + front-end localization via message catalogues. +

+

Note that calling setlocale invalidates all compiled regular + expressions, calling setlocale(LC_ALL, "C") will make this library + behave equivalent to most traditional regular expression libraries including + version 1 of this library. +

+

C++ localization model. +

+

This model is only in effect if the library is built with the pre-processor + symbol BOOST_REGEX_USE_CPP_LOCALE defined. When this model is in effect each + instance of reg_expression<> has its own instance of std::locale, class + reg_expression<> also has a member function imbue which allows the + locale for the expression to be set on a per-instance basis. Front end + localization requires a POSIX message catalogue, which will be loaded via the + std::messages facet of the expression's locale, the traits class exports the + symbol: +

+

static std::string set_message_catalogue(const std::string& s); +

+

which needs to be called with a string identifying the name of the message + catalogue, before your code compiles any regular expressions (but not + necessarily before you construct any reg_expression instances): +

+

boost::cpp_regex_traits<char>::set_message_catalogue("mycatalogue"); +

+

Note that calling reg_expression<>::imbue will invalidate any expression + currently compiled in that instance of reg_expression<>. This model is + the one which closest fits the ethos of the C++ standard library, however it is + the model which will produce the slowest code, and which is the least well + supported by current standard library implementations, for example I have yet + to find an implementation of std::locale which supports either message + catalogues, or locales other than "C" or "POSIX". +

+

Finally note that if you build the library with a non-default localization + model, then the appropriate pre-processor symbol (BOOST_REGEX_USE_C_LOCALE or + BOOST_REGEX_USE_CPP_LOCALE) must be defined both when you build the support + library, and when you include <boost/regex.hpp> or + <boost/cregex.hpp> in your code. The best way to ensure this is to add + the #define to <boost/regex/user.hpp>. +

+

Providing a message catalogue: +

+

In order to localize the front end of the library, you need to provide the + library with the appropriate message strings contained either in a resource + dll's string table (Win32 model), or a POSIX message catalogue (C or C++ + models). In the latter case the messages must appear in message set zero of the + catalogue. The messages and their id's are as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Message id + Meaning + Default value +  
 101 + The character used to start a sub-expression. + "(" +  
 102 + The character used to end a sub-expression + declaration. + ")" +  
 103 + The character used to denote an end of line + assertion. + "$" +  
 104 + The character used to denote the start of line + assertion. + "^" +  
 105 + The character used to denote the "match any character + expression". + "." +  
 106 + The match zero or more times repetition operator. + "*" +  
 107 + The match one or more repetition operator. + "+" +  
 108 + The match zero or one repetition operator. + "?" +  
 109 + The character set opening character. + "[" +  
 110 + The character set closing character. + "]" +  
 111 + The alternation operator. + "|" +  
 112 + The escape character. + "\\" +  
 113 + The hash character (not currently used). + "#" +  
 114 + The range operator. + "-" +  
 115 + The repetition operator opening character. + "{" +  
 116 + The repetition operator closing character. + "}" +  
 117 + The digit characters. + "0123456789" +  
 118 + The character which when preceded by an escape + character represents the word boundary assertion. + "b" +  
 119 + The character which when preceded by an escape + character represents the non-word boundary assertion. + "B" +  
 120 + The character which when preceded by an escape + character represents the word-start boundary assertion. + "<" +  
 121 + The character which when preceded by an escape + character represents the word-end boundary assertion. + ">" +  
 122 + The character which when preceded by an escape + character represents any word character. + "w" +  
 123 + The character which when preceded by an escape + character represents a non-word character. + "W" +  
 124 + The character which when preceded by an escape + character represents a start of buffer assertion. + "`A" +  
 125 + The character which when preceded by an escape + character represents an end of buffer assertion. + "'z" +  
 126 + The newline character. + "\n" +  
 127 + The comma separator. + "," +  
 128 + The character which when preceded by an escape + character represents the bell character. + "a" +  
 129 + The character which when preceded by an escape + character represents the form feed character. + "f" +  
 130 + The character which when preceded by an escape + character represents the newline character. + "n" +  
 131 + The character which when preceded by an escape + character represents the carriage return character. + "r" +  
 132 + The character which when preceded by an escape + character represents the tab character. + "t" +  
 133 + The character which when preceded by an escape + character represents the vertical tab character. + "v" +  
 134 + The character which when preceded by an escape + character represents the start of a hexadecimal character constant. + "x" +  
 135 + The character which when preceded by an escape + character represents the start of an ASCII escape character. + "c" +  
 136 + The colon character. + ":" +  
 137 + The equals character. + "=" +  
 138 + The character which when preceded by an escape + character represents the ASCII escape character. + "e" +  
 139 + The character which when preceded by an escape + character represents any lower case character. + "l" +  
 140 + The character which when preceded by an escape + character represents any non-lower case character. + "L" +  
 141 + The character which when preceded by an escape + character represents any upper case character. + "u" +  
 142 + The character which when preceded by an escape + character represents any non-upper case character. + "U" +  
 143 + The character which when preceded by an escape + character represents any space character. + "s" +  
 144 + The character which when preceded by an escape + character represents any non-space character. + "S" +  
 145 + The character which when preceded by an escape + character represents any digit character. + "d" +  
 146 + The character which when preceded by an escape + character represents any non-digit character. + "D" +  
 147 + The character which when preceded by an escape + character represents the end quote operator. + "E" +  
 148 + The character which when preceded by an escape + character represents the start quote operator. + "Q" +  
 149 + The character which when preceded by an escape + character represents a Unicode combining character sequence. + "X" +  
 150 + The character which when preceded by an escape + character represents any single character. + "C" +  
 151 + The character which when preceded by an escape + character represents end of buffer operator. + "Z" +  
 152 + The character which when preceded by an escape + character represents the continuation assertion. + "G" +  
 153The character which when preceeded by (? indicates a zero width negated + forward lookahead assert.! 
+

+


+   +

+

Custom error messages are loaded as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Message ID + Error message ID + Default string +  
 201 + REG_NOMATCH + "No match" +  
 202 + REG_BADPAT + "Invalid regular expression" +  
 203 + REG_ECOLLATE + "Invalid collation character" +  
 204 + REG_ECTYPE + "Invalid character class name" +  
 205 + REG_EESCAPE + "Trailing backslash" +  
 206 + REG_ESUBREG + "Invalid back reference" +  
 207 + REG_EBRACK + "Unmatched [ or [^" +  
 208 + REG_EPAREN + "Unmatched ( or \\(" +  
 209 + REG_EBRACE + "Unmatched \\{" +  
 210 + REG_BADBR + "Invalid content of \\{\\}" +  
 211 + REG_ERANGE + "Invalid range end" +  
 212 + REG_ESPACE + "Memory exhausted" +  
 213 + REG_BADRPT + "Invalid preceding regular expression" +  
 214 + REG_EEND + "Premature end of regular expression" +  
 215 + REG_ESIZE + "Regular expression too big" +  
 216 + REG_ERPAREN + "Unmatched ) or \\)" +  
 217 + REG_EMPTY + "Empty expression" +  
 218 + REG_E_UNKNOWN + "Unknown error" +  
+

+


+   +

+

Custom character class names are loaded as followed: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Message ID + Description + Equivalent default class name +  
 300 + The character class name for alphanumeric characters. + "alnum" +  
 301 + The character class name for alphabetic characters. + "alpha" +  
 302 + The character class name for control characters. + "cntrl" +  
 303 + The character class name for digit characters. + "digit" +  
 304 + The character class name for graphics characters. + "graph" +  
 305 + The character class name for lower case characters. + "lower" +  
 306 + The character class name for printable characters. + "print" +  
 307 + The character class name for punctuation characters. + "punct" +  
 308 + The character class name for space characters. + "space" +  
 309 + The character class name for upper case characters. + "upper" +  
 310 + The character class name for hexadecimal characters. + "xdigit" +  
 311 + The character class name for blank characters. + "blank" +  
 312 + The character class name for word characters. + "word" +  
 313 + The character class name for Unicode characters. + "unicode" +  
+

+


+   +

+

Finally, custom collating element names are loaded starting from message id + 400, and terminating when the first load thereafter fails. Each message looks + something like: "tagname string" where tagname is the name used inside + [[.tagname.]] and string is the actual text of the collating element. + Note that the value of collating element [[.zero.]] is used for the conversion + of strings to numbers - if you replace this with another value then that will + be used for string parsing - for example use the Unicode character 0x0660 for + [[.zero.]] if you want to use Unicode Arabic-Indic digits in your regular + expressions in place of Latin digits. +

+

+ Note that the POSIX defined names for character classes and collating elements + are always available - even if custom names are defined, in contrast, custom + error messages, and custom syntax messages replace the default ones. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/match_flag_type.html b/doc/Attic/match_flag_type.html new file mode 100644 index 00000000..598f6c83 --- /dev/null +++ b/doc/Attic/match_flag_type.html @@ -0,0 +1,266 @@ + + + + Boost.Regex: match_flag_type + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

match_flag_type

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+

The type match_flag_type is an implementation defined bitmask type + (17.3.2.1.2) that controls how a regular expression is matched against a + character sequence.

+
namespace std{ namespace regex_constants{
+
+typedef bitmask_type match_flag_type;
+
+static const match_flag_type match_default = 0;
+static const match_flag_type match_not_bob;
+static const match_flag_type match_not_eob;
+static const match_flag_type match_not_bol;
+static const match_flag_type match_not_eol;
+static const match_flag_type match_not_bow;
+static const match_flag_type match_not_eow;
+static const match_flag_type match_any;
+static const match_flag_type match_not_null;
+static const match_flag_type match_continuous;
+static const match_flag_type match_partial;
+static const match_flag_type match_prev_avail;
+static const match_flag_type match_not_dot_newline;
+static const match_flag_type match_not_dot_null;
+
+static const match_flag_type format_default = 0;
+static const match_flag_type format_sed;
+static const match_flag_type format_perl;
+static const match_flag_type format_no_copy;
+static const match_flag_type format_first_only;
+static const match_flag_type format_all;
+
+} // namespace regex_constants
+} // namespace std
+

Description

+

The type match_flag_type is an implementation defined bitmask type + (17.3.2.1.2). When matching a regular expression against a sequence of + characters [first, last) then setting its elements has the effects listed in + the table below:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Element

+
+

Effect if set

+
+

match_default

+
+

Specifies that matching of regular expressions proceeds without any + modification of the normal rules used in ECMA-262, ECMAScript Language + Specification, Chapter 15 part 10, RegExp (Regular Expression) Objects (FWD.1)

+
match_not_bobSpecifies that the expression "\A" should not match + against the sub-sequence [first,first).
match_not_eobSpecifies that the expressions "\z" and + "\Z" should not match against the sub-sequence [last,last).
+

match_not_bol

+
+

Specifies that the expression "^" should not be matched against the + sub-sequence [first,first).

+
+

match_not_eol

+
+

Specifies that the expression "$" should not be matched against the + sub-sequence [last,last).

+
+

match_not_bow

+
+

Specifies that the expression "\b" should not be matched against the + sub-sequence [first,first).

+
+

match_not_eow

+
+

Specifies that the expression "\b" should not be matched against the + sub-sequence [last,last).

+
+

match_any

+
+

Specifies that if more than one match is possible then any match is an + acceptable result.

+
+

match_not_null

+
+

Specifies that the expression can not be matched against an empty sequence.

+
+

match_continuous

+
+

Specifies that the expression must match a sub-sequence that begins at first.

+
+

match_partial

+
+

Specifies that if no match can be found, then it is acceptable to return a + match [from, last) where from!=last, if there exists some sequence of + characters [from,to) of which [from,last) is a prefix, and which would result + in a full match.

+
+

match_prev_avail

+
+

Specifies that --first is a valid iterator position, when this + flag is set then the flags match_not_bol and match_not_bow + are ignored by the regular expression algorithms (RE.7) and iterators (RE.8).

+
match_not_dot_newlineSpecifies that the expression "." does not match a + newline character.
match_not_dot_nullSpecified that the expression "." does not match a + character null '\0'.
+

format_default

+
+

Specifies that when a regular expression match is to be replaced by a new + string, that the new string is constructed using the rules used by the + ECMAScript replace function in ECMA-262, ECMAScript Language Specification, + Chapter 15 part 5.4.11 String.prototype.replace. (FWD.1). In addition during + search and replace operations then all non-overlapping occurrences of the + regular expression are located and replaced, and sections of the input that did + not match the expression, are copied unchanged to the output string.

+
+

format_sed

+
+

Specifies that when a regular expression match is to be replaced by a new + string, that the new string is constructed using the rules used by the Unix sed + utility in IEEE Std 1003.1-2001, Portable Operating SystemInterface (POSIX ), + Shells and Utilities..

+
+

format_perl

+
+

Specifies that when a regular expression match is to be replaced by a new + string, that the new string is constructed using an implementation defined + superset of the rules used by the ECMAScript replace function in ECMA-262, + ECMAScript Language Specification, Chapter 15 part 5.4.11 + String.prototype.replace (FWD.1).

+
format_allSpecifies that all syntax extensions are + enabled, including conditional (?ddexpression1:expression2) replacements.
+

format_no_copy

+
+

When specified during a search and replace operation, then sections of the + character container sequence being searched that do match the regular + expression, are not copied to the output string.

+
+

format_first_only

+
+

When specified during a search and replace operation, then only the first + occurrence of the regular expression is replaced.

+
+

+

+


+

+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/match_results.html b/doc/Attic/match_results.html new file mode 100644 index 00000000..9a0d1afc --- /dev/null +++ b/doc/Attic/match_results.html @@ -0,0 +1,390 @@ + + + + Boost.Regex: class match_results + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

class match_results

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+
+

Synopsis

+

#include <boost/regex.hpp> +

+

Regular expressions are different from many simple pattern-matching algorithms + in that as well as finding an overall match they can also produce + sub-expression matches: each sub-expression being delimited in the pattern by a + pair of parenthesis (...). There has to be some method for reporting + sub-expression matches back to the user: this is achieved this by defining a + class match_results that acts as an indexed collection of sub-expression + matches, each sub-expression match being contained in an object of type + sub_match + . +

Template class match_results denotes a collection of character sequences + representing the result of a regular expression match. Objects of type + match_results are passed to the algorithms regex_match + and regex_search, and are returned by the iterator + regex_iterator + .  Storage for the collection is allocated and freed as necessary by the + member functions of class match_results. +

The template class match_results conforms to the requirements of a Sequence, as + specified in (lib.sequence.reqmts), except that only operations defined for + const-qualified Sequences are supported.

+

Class template match_results is most commonly used as one of the typedefs + cmatch, wcmatch, smatch, or wsmatch:

+
template <class BidirectionalIterator,
+          class Allocator = allocator<sub_match<BidirectionalIterator> >
+class match_results;
+
+typedef match_results<const char*> cmatch;
+typedef match_results<const wchar_t*> wcmatch;
+typedef match_results<string::const_iterator> smatch;
+typedef match_results<wstring::const_iterator> wsmatch;
+
+template <class BidirectionalIterator,
+          class Allocator = allocator<sub_match<BidirectionalIterator> >
+class match_results
+{ 
+public: 
+   typedef          sub_match<BidirectionalIterator>                        value_type;
+   typedef          const value_type&                                       const_reference;
+   typedef          const_reference                                         reference;
+   typedef          implementation defined                                  const_iterator;
+   typedef          const_iterator                                          iterator;
+   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
+   typedef typename Allocator::size_type                                    size_type;
+   typedef          Allocator                                               allocator_type;
+   typedef typename iterator_traits<BidirectionalIterator>::value_type      char_type;
+   typedef          basic_string<char_type>                                 string_type;
+
+   // construct/copy/destroy:
+   explicit match_results(const Allocator& a = Allocator());
+   match_results(const match_results& m);
+   match_results& operator=(const match_results& m); 
+   ~match_results();
+
+   // size:
+   size_type size() const;
+   size_type max_size() const;
+   bool empty() const;
+   // element access:
+   difference_type length(int sub = 0) const;
+   difference_type position(unsigned int sub = 0) const;
+   string_type str(int sub = 0) const;
+   const_reference operator[](int n) const;
+
+   const_reference prefix() const;
+
+   const_reference suffix() const;
+   const_iterator begin() const;
+   const_iterator end() const;
+   // format:
+   template <class OutputIterator>
+   OutputIterator format(OutputIterator out,
+                         const string_type& fmt,
+                         match_flag_type flags = format_default) const;
+   string_type format(const string_type& fmt,
+                      match_flag_type flags = format_default) const;
+
+   allocator_type get_allocator() const;
+   void swap(match_results& that);
+};
+
+template <class BidirectionalIterator, class Allocator>
+bool operator == (const match_results<BidirectionalIterator, Allocator>& m1,
+                  const match_results<BidirectionalIterator, Allocator>& m2);
+template <class BidirectionalIterator, class Allocator>
+bool operator != (const match_results<BidirectionalIterator, Allocator>& m1,
+                  const match_results<BidirectionalIterator, Allocator>& m2);
+
+template <class charT, class traits, class BidirectionalIterator, class Allocator>
+basic_ostream<charT, traits>&
+   operator << (basic_ostream<charT, traits>& os,
+                const match_results<BidirectionalIterator, Allocator>& m);
+
+template <class BidirectionalIterator, class Allocator>
+void swap(match_results<BidirectionalIterator, Allocator>& m1,
+          match_results<BidirectionalIterator, Allocator>& m2);
+

Description

+

match_results constructors

+

In all match_results constructors, a copy of the Allocator + argument is used for any memory allocation performed by the constructor or + member functions during the lifetime of the object. +

+
match_results(const Allocator& a = Allocator());
+ +

+ Effects: Constructs an object of class match_results. The postconditions + of this function are indicated in Table RE16:

+
+ Table RE16--match_results(const Allocator&) effects
+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

true

+
+

size()

+
+

0

+
+

str()

+
+

basic_string<charT>()

+

+
+

+
match_results(const match_results& m);
+ +

+ Effects: Constructs an object of class match_results, as a copy of + m.

match_results& operator=(const match_results& m);
+ +

+ Effects: Assigns m to *this. The postconditions of this function are + indicated in Table RE17:

+
Table RE17--match_results(const Allocator&) effects
+

+

+

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value +

+
+

empty()

+
+

m.empty().

+
+

size()

+
+

m.size().

+
+

str(n)

+
+

m.str(n) for all integers n < m.size().

+
+

prefix()

+
+

m.prefix().

+
+

suffix()

+
+

m.suffix().

+
+

(*this)[n]

+
+

m[n] for all integers n < m.size().

+
+

length(n)

+
+

m.length(n) for all integers n < m.size().

+
+

position(n)

+
+

m.position(n) for all integers n < m.size().

+

+
+

+
+

match_results size

+
size_type size()const;
+ +

+ Effects: Returns the number of sub_match elements stored in *this.

size_type max_size()const;
+ +

+ Effects: Returns the maximum number of sub_match elements that can be + stored in *this.

bool empty()const;
+ +

+ Effects: Returns size() == 0.

+

match_results element access

+
difference_type length(int sub = 0)const;
+ +

+ Effects: Returns (*this)[sub].length().

difference_type position(unsigned int sub = 0)const;
+ +

+ Effects: Returns std::distance(prefix().first, + (*this)[sub].first).

string_type str(int sub = 0)const;
+ +

+ Effects: Returns string_type((*this)[sub]).

const_reference operator[](int n) const;
+ +

+ Effects: Returns a reference to the sub_match object + representing the character sequence that matched marked sub-expression n. + If n == 0 then returns a reference to a sub_match object + representing the character sequence that matched the whole regular + expression.

const_reference prefix()const;
+ +

+ Effects: Returns a reference to the sub_match object + representing the character sequence from the start of the string being + matched/searched, to the start of the match found.

const_reference suffix()const;
+ +

+ Effects: Returns a reference to the sub_match object + representing the character sequence from the end of the match found to the end + of the string being matched/searched.

const_iterator begin()const;
+ +

+ Effects: Returns a starting iterator that enumerates over all the marked + sub-expression matches stored in *this.

const_iterator end()const;
+ +

+ Effects: Returns a terminating iterator that enumerates over all the + marked sub-expression matches stored in *this.

+

match_results reformatting

+
template <class OutputIterator>
+OutputIterator format(OutputIterator out,
+                      const string_type& fmt,
+                      match_flag_type flags = format_default);
+ +

+ Requires: The type OutputIterator conforms to the Output Iterator + requirements (24.1.2).

+

+ Effects: Copies the character sequence [fmt.begin(), fmt.end()) to + OutputIterator out. For each format specifier or escape sequence in fmt, + replace that sequence with either the character(s) it represents, or the + sequence of characters within *this to which it refers. The bitmasks specified + in flags determines what + format specifiers or escape sequences are recognized, by default this is + the format used by ECMA-262, ECMAScript Language Specification, Chapter 15 part + 5.4.11 String.prototype.replace.

+

+ Returns: out.

string_type format(const string_type& fmt,
+                   match_flag_type flags = format_default);
+ +

+ Effects: Returns a copy of the string fmt. For each format + specifier or escape sequence in fmt, replace that sequence with either + the character(s) it represents, or the sequence of characters within *this to + which it refers. The bitmasks specified in flags + determines what format specifiers or escape sequences + are recognized, by default this is the format used by ECMA-262, + ECMAScript Language Specification, Chapter 15 part 5.4.11 + String.prototype.replace.

allocator_type get_allocator()const;
+ +

+ Effects: Returns a copy of the Allocator that was passed to the object's + constructor.

void swap(match_results& that);
+ +

+ Effects: Swaps the contents of the two sequences.

+

+ Postcondition: *this contains the sequence of matched + sub-expressions that were in that, that contains the + sequence of matched sub-expressions that were in *this.

+

+ Complexity: constant time. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/partial_matches.html b/doc/Attic/partial_matches.html new file mode 100644 index 00000000..130c837d --- /dev/null +++ b/doc/Attic/partial_matches.html @@ -0,0 +1,184 @@ + + + + Boost.Regex: Partial Matches + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Partial Matches

+
+

Boost.Regex Index

+
+

+
+

+

The match-flag match_partial can + be passed to the following algorithms: regex_match, + regex_search, and regex_grep. + When used it indicates that partial as well as full matches should be found. A + partial match is one that matched one or more characters at the end of the text + input, but did not match all of the regular expression (although it may have + done so had more input been available). Partial matches are typically used when + either validating data input (checking each character as it is entered on the + keyboard), or when searching texts that are either too long to load into memory + (or even into a memory mapped file), or are of indeterminate length (for + example the source may be a socket or similar). Partial and full matches can be + differentiated as shown in the following table (the variable M represents an + instance of match_results<> as filled in + by regex_match, regex_search or regex_grep):
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 ResultM[0].matchedM[0].firstM[0].second
No matchFalseUndefinedUndefinedUndefined
Partial matchTrueFalseStart of partial match.End of partial match (end of text).
Full matchTrueTrueStart of full match.End of full match.
+

+

The following example + tests to see whether the text could be a valid credit card number, as the user + presses a key, the character entered would be added to the string being built + up, and passed to is_possible_card_number. If this returns true + then the text could be a valid card number, so the user interface's OK button + would be enabled. If it returns false, then this is not yet a valid card + number, but could be with more input, so the user interface would disable the + OK button. Finally, if the procedure throws an exception the input could never + become a valid number, and the inputted character must be discarded, and a + suitable error indication displayed to the user.

+
#include <string>
+#include <iostream>
+#include <boost/regex.hpp>
+
+boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
+
+bool is_possible_card_number(const std::string& input)
+{
+   //
+   // return false for partial match, true for full match, or throw for
+   // impossible match based on what we have so far...
+   boost::match_results<std::string::const_iterator> what;
+   if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
+   {
+      // the input so far could not possibly be valid so reject it:
+      throw std::runtime_error("Invalid data entered - this could not possibly be a valid card number");
+   }
+   // OK so far so good, but have we finished?
+   if(what[0].matched)
+   {
+      // excellent, we have a result:
+      return true;
+   }
+   // what we have so far is only a partial match...
+   return false;
+}
+

In the following example, + text input is taken from a stream containing an unknown amount of text; this + example simply counts the number of html tags encountered in the stream. The + text is loaded into a buffer and searched a part at a time, if a partial match + was encountered, then the partial match gets searched a second time as the + start of the next batch of text:

+
#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <boost/regex.hpp>
+
+// match some kind of html tag:
+boost::regex e("<[^>]*>");
+// count how many:
+unsigned int tags = 0;
+// saved position of partial match:
+char* next_pos = 0;
+
+bool grep_callback(const boost::match_results<char*>& m)
+{
+   if(m[0].matched == false)
+   {
+      // save position and return:
+      next_pos = m[0].first;
+   }
+   else
+      ++tags;
+   return true;
+}
+
+void search(std::istream& is)
+{
+   char buf[4096];
+   next_pos = buf + sizeof(buf);
+   bool have_more = true;
+   while(have_more)
+   {
+      // how much do we copy forward from last try:
+      unsigned leftover = (buf + sizeof(buf)) - next_pos;
+      // and how much is left to fill:
+      unsigned size = next_pos - buf;
+      // copy forward whatever we have left:
+      memcpy(buf, next_pos, leftover);
+      // fill the rest from the stream:
+      unsigned read = is.readsome(buf + leftover, size);
+      // check to see if we've run out of text:
+      have_more = read == size;
+      // reset next_pos:
+      next_pos = buf + sizeof(buf);
+      // and then grep:
+      boost::regex_grep(grep_callback,
+                        buf,
+                        buf + read + leftover,
+                        e,
+                        boost::match_default | boost::match_partial);
+   }
+}
+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/posix_api.html b/doc/Attic/posix_api.html new file mode 100644 index 00000000..89ae0a2f --- /dev/null +++ b/doc/Attic/posix_api.html @@ -0,0 +1,288 @@ + + + + Boost.Regex: POSIX API Compatibility Functions + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

POSIX API Compatibility Functions

+
+

Boost.Regex Index

+
+

+
+

+
#include <boost/cregex.hpp>
+or:
+#include <boost/regex.h>
+

The following functions are available for users who need a POSIX compatible C + library, they are available in both Unicode and narrow character versions, the + standard POSIX API names are macros that expand to one version or the other + depending upon whether UNICODE is defined or not. +

+

Important: Note that all the symbols defined here are enclosed inside + namespace boost when used in C++ programs, unless you use #include + <boost/regex.h> instead - in which case the symbols are still defined in + namespace boost, but are made available in the global namespace as well.

+

The functions are defined as: +

+
extern "C" {
+int regcompA(regex_tA*, const char*, int);
+unsigned int regerrorA(int, const regex_tA*, char*, unsigned int);
+int regexecA(const regex_tA*, const char*, unsigned int, regmatch_t*, int);
+void regfreeA(regex_tA*);
+
+int regcompW(regex_tW*, const wchar_t*, int);
+unsigned int regerrorW(int, const regex_tW*, wchar_t*, unsigned int);
+int regexecW(const regex_tW*, const wchar_t*, unsigned int, regmatch_t*, int);
+void regfreeW(regex_tW*);
+
+#ifdef UNICODE
+#define regcomp regcompW
+#define regerror regerrorW
+#define regexec regexecW
+#define regfree regfreeW
+#define regex_t regex_tW
+#else
+#define regcomp regcompA
+#define regerror regerrorA
+#define regexec regexecA
+#define regfree regfreeA
+#define regex_t regex_tA
+#endif
+}
+

All the functions operate on structure regex_t, which exposes two public + members: +

+

unsigned int re_nsub this is filled in by regcomp and indicates + the number of sub-expressions contained in the regular expression. +

+

const TCHAR* re_endp points to the end of the expression to compile when + the flag REG_PEND is set. +

+

Footnote: regex_t is actually a #define - it is either regex_tA or regex_tW + depending upon whether UNICODE is defined or not, TCHAR is either char or + wchar_t again depending upon the macro UNICODE. +

+

regcomp

+

regcomp takes a pointer to a regex_t, a pointer to the expression + to compile and a flags parameter which can be a combination of: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 REG_EXTENDEDCompiles modern regular expressions. Equivalent to + regbase::char_classes | regbase::intervals | regbase::bk_refs. 
 REG_BASICCompiles basic (obsolete) regular expression syntax. + Equivalent to regbase::char_classes | regbase::intervals | regbase::limited_ops + | regbase::bk_braces | regbase::bk_parens | regbase::bk_refs. 
 REG_NOSPECAll characters are ordinary, the expression is a + literal string. 
 REG_ICASECompiles for matching that ignores character case. 
 REG_NOSUBHas no effect in this library. 
 REG_NEWLINEWhen this flag is set a dot does not match the + newline character. 
 REG_PENDWhen this flag is set the re_endp parameter of the + regex_t structure must point to the end of the regular expression to compile. 
 REG_NOCOLLATEWhen this flag is set then locale dependent collation + for character ranges is turned off. 
 REG_ESCAPE_IN_LISTS
+ , , , +
When this flag is set, then escape sequences are + permitted in bracket expressions (character sets). 
 REG_NEWLINE_ALT When this flag is set then the newline character is + equivalent to the alternation operator |. 
 REG_PERL  A shortcut for perl-like behavior: REG_EXTENDED + | REG_NOCOLLATE | REG_ESCAPE_IN_LISTS 
 REG_AWKA shortcut for awk-like behavior: REG_EXTENDED | + REG_ESCAPE_IN_LISTS 
 REG_GREPA shortcut for grep like behavior: REG_BASIC | + REG_NEWLINE_ALT 
 REG_EGREP A shortcut for egrep like behavior: + REG_EXTENDED | REG_NEWLINE_ALT 
+

+

regerror

+

regerror takes the following parameters, it maps an error code to a human + readable string: +
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
 int codeThe error code. 
 const regex_t* eThe regular expression (can be null). 
 char* bufThe buffer to fill in with the error message. 
 unsigned int buf_sizeThe length of buf. 
+

+

If the error code is OR'ed with REG_ITOA then the message that results is the + printable name of the code rather than a message, for example "REG_BADPAT". If + the code is REG_ATIO then e must not be null and e->re_pend must + point to the printable name of an error code, the return value is then the + value of the error code. For any other value of code, the return value + is the number of characters in the error message, if the return value is + greater than or equal to buf_size then regerror will have to be + called again with a larger buffer.

+

regexec

+

regexec finds the first occurrence of expression e within string buf. + If len is non-zero then *m is filled in with what matched the + regular expression, m[0] contains what matched the whole string, m[1] + the first sub-expression etc, see regmatch_t in the header file + declaration for more details. The eflags parameter can be a combination + of: +
+   +

+

+ + + + + + + + + + + + + + + + + + + +
 REG_NOTBOLParameter buf does not represent the start of + a line. 
 REG_NOTEOLParameter buf does not terminate at the end of + a line. 
 REG_STARTENDThe string searched starts at buf + pmatch[0].rm_so + and ends at buf + pmatch[0].rm_eo. 
+

+

regfree

+

Finally regfree frees all the memory that was allocated by regcomp. +

+

Footnote: this is an abridged reference to the POSIX API functions, it is + provided for compatibility with other libraries, rather than an API to be used + in new code (unless you need access from a language other than C++). This + version of these functions should also happily coexist with other versions, as + the names used are macros that expand to the actual function names. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/redistributables.html b/doc/Attic/redistributables.html new file mode 100644 index 00000000..24af723f --- /dev/null +++ b/doc/Attic/redistributables.html @@ -0,0 +1,83 @@ + + + + Boost.Regex: Redistributables and Library Names + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Redistributables and Library Names

+
+

Boost.Regex Index

+
+

+
+

+

If you are using Microsoft or Borland C++ and link to a dll version of the run + time library, then you will also link to one of the dll versions of boost.regex. + While these dll's are redistributable, there are no "standard" versions, so + when installing on the users PC, you should place these in a directory private + to your application, and not in the PC's directory path. Note that if you link + to a static version of your run time library, then you will also link to a + static version of boost.regex and no dll's will need to be distributed. The + possible boost.regex dll and library names are computed according to the following + formula:
+

+

+

"boost_regex_"
+ + BOOST_LIB_TOOLSET
+ + "_"
+ + BOOST_LIB_THREAD_OPT
+ + BOOST_LIB_RT_OPT
+ + BOOST_LIB_LINK_OPT
+ + BOOST_LIB_DEBUG_OPT
+
+ These are defined as:
+
+ BOOST_LIB_TOOLSET: The compiler toolset name (vc6, vc7, bcb5 etc).
+
+ BOOST_LIB_THREAD_OPT: "s" for single thread builds,
+ "m" for multithread builds.
+
+ BOOST_LIB_RT_OPT: "s" for static runtime,
+ "d" for dynamic runtime.
+
+ BOOST_LIB_LINK_OPT: "s" for static link,
+ "i" for dynamic link.
+
+ BOOST_LIB_DEBUG_OPT: nothing for release builds,
+ "d" for debug builds,
+ "dd" for debug-diagnostic builds (_STLP_DEBUG).

+

+ Note: you can disable automatic library selection by defining the symbol + BOOST_REGEX_NO_LIB when compiling, this is useful if you want to statically + link even though you're using the dll version of your run time library, or if + you need to debug boost.regex. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/reg_expression.html b/doc/Attic/reg_expression.html new file mode 100644 index 00000000..79263c8b --- /dev/null +++ b/doc/Attic/reg_expression.html @@ -0,0 +1,45 @@ + + + + Boost.Regex: Class reg_expression (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Class reg_expression (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The use of class template reg_expression is deprecated: use + basic_regex instead.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regbase.html b/doc/Attic/regbase.html new file mode 100644 index 00000000..6bd38353 --- /dev/null +++ b/doc/Attic/regbase.html @@ -0,0 +1,55 @@ + + + + Boost.Regex: regbase + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

regbase

+
+

Boost.Regex Index

+
+

+
+

+

Use of the type boost::regbase is now deprecated, and the type does not form a + part of the + regular expression standardization proposal.  This type still + exists as a base class of boost::basic_regex, and you can still refer to + boost::regbase::constant_name in your code, however for maximum portability to + other std regex implementations you should instead use either:

+
boost::regex_constants::constant_name
+

or

+
boost::regex::constant_name
+

or

+
boost::wregex::constant_name
+

+


+

+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex.html b/doc/Attic/regex.html new file mode 100644 index 00000000..f36bb13e --- /dev/null +++ b/doc/Attic/regex.html @@ -0,0 +1,492 @@ + + + + Boost.Regex: class RegEx (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

class RegEx (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The high level wrapper class RegEx is now deprecated and does not form a part + of the regular + expression standardization proposal.  This type still exists, and + existing code will continue to compile, however the following documentation is + unlikely to be further updated.

+
#include <boost/cregex.hpp> 
+

The class RegEx provides a high level simplified interface to the regular + expression library, this class only handles narrow character strings, and + regular expressions always follow the "normal" syntax - that is the same as the + perl / ECMAScript synatx. +

+
typedef bool (*GrepCallback)(const RegEx& expression);
+typedef bool (*GrepFileCallback)(const char* file, const RegEx& expression);
+typedef bool (*FindFilesCallback)(const char* file);
+
+class  RegEx
+{
+public:
+   RegEx();
+   RegEx(const RegEx& o);
+   ~RegEx();
+   RegEx(const char* c, bool icase = false);
+   explicit RegEx(const std::string& s, bool icase = false);
+   RegEx& operator=(const RegEx& o);
+   RegEx& operator=(const char* p);
+   RegEx& operator=(const std::string& s);
+   unsigned int SetExpression(const char* p, bool icase = false);
+   unsigned int SetExpression(const std::string& s, bool icase = false);
+   std::string Expression()const;
+   //
+   // now matching operators: 
+   // 
+   bool Match(const char* p, unsigned int flags = match_default);
+   bool Match(const std::string& s, unsigned int flags = match_default); 
+   bool Search(const char* p, unsigned int flags = match_default); 
+   bool Search(const std::string& s, unsigned int flags = match_default); 
+   unsigned int Grep(GrepCallback cb, const char* p, unsigned int flags = match_default); 
+   unsigned int Grep(GrepCallback cb, const std::string& s, unsigned int flags = match_default); 
+   unsigned int Grep(std::vector<std::string>& v, const char* p, unsigned int flags = match_default); 
+   unsigned int Grep(std::vector<std::string>& v, const std::string& s, unsigned int flags = match_default); 
+   unsigned int Grep(std::vector<unsigned int>& v, const char* p, unsigned int flags = match_default); 
+   unsigned int Grep(std::vector<unsigned int>& v, const std::string& s, unsigned int flags = match_default); 
+   unsigned int GrepFiles(GrepFileCallback cb, const char* files, bool recurse = false, unsigned int flags = match_default); 
+   unsigned int GrepFiles(GrepFileCallback cb, const std::string& files, bool recurse = false, unsigned int flags = match_default); 
+   unsigned int FindFiles(FindFilesCallback cb, const char* files, bool recurse = false, unsigned int flags = match_default); 
+   unsigned int FindFiles(FindFilesCallback cb, const std::string& files, bool recurse = false, unsigned int flags = match_default); 
+   std::string Merge(const std::string& in, const std::string& fmt, bool copy = true, unsigned int flags = match_default); 
+   std::string Merge(const char* in, const char* fmt, bool copy = true, unsigned int flags = match_default); 
+   unsigned Split(std::vector<std::string>& v, std::string& s, unsigned flags = match_default, unsigned max_count = ~0); 
+   // 
+   // now operators for returning what matched in more detail: 
+   // 
+   unsigned int Position(int i = 0)const; 
+   unsigned int Length(int i = 0)const; 
+   bool Matched(int i = 0)const;
+   unsigned int Line()const; 
+   unsigned int Marks() const; 
+   std::string What(int i)const; 
+   std::string operator[](int i)const ; 
+
+   static const unsigned int npos;
+};     
+

Member functions for class RegEx are defined as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 RegEx();Default constructor, constructs an instance of RegEx + without any valid expression. 
 RegEx(const RegEx& o);Copy constructor, all the properties of parameter o + are copied. 
 RegEx(const char* c, bool icase + = false);Constructs an instance of RegEx, setting the + expression to c, if icase is true then matching is + insensitive to case, otherwise it is sensitive to case. Throws bad_expression + on failure. 
 RegEx(const std::string& s, bool icase + = false);Constructs an instance of RegEx, setting the + expression to s, if icase is true then matching is + insensitive to case, otherwise it is sensitive to case. Throws bad_expression + on failure. 
 RegEx& operator=(const RegEx& + o);Default assignment operator. 
 RegEx& operator=(const char* + p);Assignment operator, equivalent to calling SetExpression(p, + false). Throws bad_expression on failure. 
 RegEx& operator=(const std::string& + s);Assignment operator, equivalent to calling SetExpression(s, + false). Throws bad_expression on failure. 
 unsigned int SetExpression(constchar* + p, bool icase = false);Sets the current expression to p, if icase + is true then matching is insensitive to case, otherwise it is sensitive + to case. Throws bad_expression on failure. 
 unsigned int SetExpression(const + std::string& s, bool icase = false);Sets the current expression to s, if icase + is true then matching is insensitive to case, otherwise it is sensitive + to case. Throws bad_expression on failure. 
 std::string Expression()const;Returns a copy of the current regular expression. 
 bool Match(const char* p, unsigned + int flags = match_default);Attempts to match the current expression against the + text p using the match flags flags - see + match flags. Returns true if the expression matches the whole of + the input string. 
 bool Match(const std::string& s, unsigned + int flags = match_default) ;Attempts to match the current expression against the + text s using the match flags flags - see + match flags. Returns true if the expression matches the whole of + the input string. 
 bool Search(const char* p, unsigned + int flags = match_default);Attempts to find a match for the current expression + somewhere in the text p using the match flags flags - see + match flags. Returns true if the match succeeds. 
 bool Search(const std::string& s, unsigned + int flags = match_default) ;Attempts to find a match for the current expression + somewhere in the text s using the match flags flags - see + match flags. Returns true if the match succeeds. 
 unsigned int Grep(GrepCallback cb, const + char* p, unsigned int flags = match_default);Finds all matches of the current expression in the + text p using the match flags flags - see + match flags. For each match found calls the call-back function cb + as: cb(*this); +

If at any stage the call-back function returns false then the grep operation + terminates, otherwise continues until no further matches are found. Returns the + number of matches found.

+
 
 unsigned int Grep(GrepCallback cb, const + std::string& s, unsigned int flags = match_default);Finds all matches of the current expression in the + text s using the match flags flags - see + match flags. For each match found calls the call-back function cb + as: cb(*this); +

If at any stage the call-back function returns false then the grep operation + terminates, otherwise continues until no further matches are found. Returns the + number of matches found. +

+
 
 unsigned int Grep(std::vector<std::string>& + v, const char* p, unsigned int flags = + match_default);Finds all matches of the current expression in the + text p using the match flags flags - see + match flags. For each match pushes a copy of what matched onto v. + Returns the number of matches found. 
 unsigned int Grep(std::vector<std::string>& + v, const std::string& s, unsigned int flags = + match_default);Finds all matches of the current expression in the + text s using the match flags flags - see + match flags. For each match pushes a copy of what matched onto v. + Returns the number of matches found. 
 unsigned int Grep(std::vector<unsigned + int>& v, const char* p, unsigned int flags + = match_default);Finds all matches of the current expression in the + text p using the match flags flags - see + match flags. For each match pushes the starting index of what matched + onto v. Returns the number of matches found. 
 unsigned int Grep(std::vector<unsigned + int>& v, const std::string& s, unsigned int + flags = match_default);Finds all matches of the current expression in the + text s using the match flags flags - see + match flags. For each match pushes the starting index of what matched + onto v. Returns the number of matches found. 
 unsigned int GrepFiles(GrepFileCallback + cb, const char* files, bool recurse = false, unsigned + int flags = match_default);Finds all matches of the current expression in the + files files using the match flags flags - see + match flags. For each match calls the call-back function cb.  +

If the call-back returns false then the algorithm returns without considering + further matches in the current file, or any further files.  +

+

The parameter files can include wild card characters '*' and '?', if the + parameter recurse is true then searches sub-directories for matching + file names.  +

+

Returns the total number of matches found.

+

May throw an exception derived from std::runtime_error if file io fails.

+
 
 unsigned int GrepFiles(GrepFileCallback + cb, const std::string& files, bool recurse = false, unsigned + int flags = match_default);Finds all matches of the current expression in the + files files using the match flags flags - see + match flags. For each match calls the call-back function cb.  +

If the call-back returns false then the algorithm returns without considering + further matches in the current file, or any further files.  +

+

The parameter files can include wild card characters '*' and '?', if the + parameter recurse is true then searches sub-directories for matching + file names.  +

+

Returns the total number of matches found.

+

May throw an exception derived from std::runtime_error if file io fails.

+
 
 unsigned int FindFiles(FindFilesCallback + cb, const char* files, bool recurse = false, unsigned + int flags = match_default);Searches files to find all those which contain + at least one match of the current expression using the match flags flags + - see match flags. For each + matching file calls the call-back function cb.  +

If the call-back returns false then the algorithm returns without considering + any further files.  +

+

The parameter files can include wild card characters '*' and '?', if the + parameter recurse is true then searches sub-directories for matching + file names.  +

+

Returns the total number of files found.

+

May throw an exception derived from std::runtime_error if file io fails.

+
 
 unsigned int FindFiles(FindFilesCallback + cb, const std::string& files, bool recurse = false, unsigned + int flags = match_default);Searches files to find all those which contain + at least one match of the current expression using the match flags flags + - see match flags. For each + matching file calls the call-back function cb.  +

If the call-back returns false then the algorithm returns without considering + any further files.  +

+

The parameter files can include wild card characters '*' and '?', if the + parameter recurse is true then searches sub-directories for matching + file names.  +

+

Returns the total number of files found.

+

May throw an exception derived from std::runtime_error if file io fails.

+
 
 std::string Merge(const std::string& in, const + std::string& fmt, bool copy = true, unsigned int + flags = match_default);Performs a search and replace operation: searches + through the string in for all occurrences of the current expression, for + each occurrence replaces the match with the format string fmt. Uses flags + to determine what gets matched, and how the format string should be treated. If + copy is true then all unmatched sections of input are copied unchanged + to output, if the flag format_first_only is set then only the first + occurance of the pattern found is replaced. Returns the new string. See + also format string syntax, match + flags and format flags. 
 std::string Merge(const char* in, const + char* fmt, bool copy = true, unsigned int flags = + match_default);Performs a search and replace operation: searches + through the string in for all occurrences of the current expression, for + each occurrence replaces the match with the format string fmt. Uses flags + to determine what gets matched, and how the format string should be treated. If + copy is true then all unmatched sections of input are copied unchanged + to output, if the flag format_first_only is set then only the first + occurance of the pattern found is replaced. Returns the new string. See + also format string syntax, match + flags and format flags. 
 unsigned Split(std::vector<std::string>& v, + std::string& s, unsigned flags = match_default, unsigned max_count + = ~0);Splits the input string and pushes each one onto the vector. If + the expression contains no marked sub-expressions, then one string is outputted + for each section of the input that does not match the expression. If the + expression does contain marked sub-expressions, then outputs one string for + each marked sub-expression each time a match occurs. Outputs no more than max_count + strings. Before returning, deletes from the input string s all of + the input that has been processed (all of the string if max_count was + not reached). Returns the number of strings pushed onto the vector. 
 unsigned int Position(int i = 0)const;Returns the position of what matched sub-expression i. + If i = 0 then returns the position of the whole match. Returns + RegEx::npos if the supplied index is invalid, or if the specified + sub-expression did not participate in the match. 
 unsigned int Length(int i = 0)const;Returns the length of what matched sub-expression i. + If i = 0 then returns the length of the whole match. Returns RegEx::npos + if the supplied index is invalid, or if the specified sub-expression did not + participate in the match. 
 bool Matched(int i = 0)const;Returns true if sub-expression i was matched, false otherwise. 
 unsigned int Line()const;Returns the line on which the match occurred, indexes + start from 1 not zero, if no match occurred then returns RegEx::npos. 
 unsigned int Marks() const;Returns the number of marked sub-expressions + contained in the expression. Note that this includes the whole match + (sub-expression zero), so the value returned is always >= 1. 
 std::string What(int i)const;Returns a copy of what matched sub-expression i. + If i = 0 then returns a copy of the whole match. Returns a null string + if the index is invalid or if the specified sub-expression did not participate + in a match. 
 std::string operator[](int i)const + ;Returns what(i); +

Can be used to simplify access to sub-expression matches, and make usage more + perl-like.

+
 
+

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_format.html b/doc/Attic/regex_format.html new file mode 100644 index 00000000..06de5621 --- /dev/null +++ b/doc/Attic/regex_format.html @@ -0,0 +1,165 @@ + + + + Boost.Regex: Algorithm regex_format (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_format (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The algorithm regex_format is deprecated, new code should use + match_results::format instead.  Existing code will continue to compile, + the following documentation ius taken from the previous version of boost.regex + and will not be further updated:

+

Algorithm regex_format

+

+
#include <boost/regex.hpp> 
+

The algorithm regex_format takes the results of a match and creates a new + string based upon a format string, + regex_format can be used for search and replace operations: +

+
template <class OutputIterator, class iterator, class Allocator, class charT>
+OutputIterator regex_format(OutputIterator out,
+                            const match_results<iterator, Allocator>& m,
+                            const charT* fmt,
+                            match_flag_type flags = 0);
+
+template <class OutputIterator, class iterator, class Allocator, class charT>
+OutputIterator regex_format(OutputIterator out,
+                            const match_results<iterator, Allocator>& m,
+                            const std::basic_string<charT>& fmt,
+                            match_flag_type flags = 0);
+

The library also defines the following convenience variation of regex_format, + which returns the result directly as a string, rather than outputting to an + iterator [note - this version may not be available, or may be available in a + more limited form, depending upon your compilers capabilities]: +

+
template <class iterator, class Allocator, class charT>
+std::basic_string<charT> regex_format
+                                 (const match_results<iterator, Allocator>& m, 
+                                  const charT* fmt,
+                                  match_flag_type flags = 0);
+
+template <class iterator, class Allocator, class charT>
+std::basic_string<charT> regex_format
+                                 (const match_results<iterator, Allocator>& m, 
+                                  const std::basic_string<charT>& fmt,
+                                  match_flag_type flags = 0);
+

Parameters to the main version of the function are passed as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
 OutputIterator outAn output iterator type, the output string is sent to + this iterator. Typically this would be a std::ostream_iterator. 
 const match_results<iterator, + Allocator>& mAn instance of match_results<> obtained from + one of the matching algorithms above, and denoting what matched. 
 const charT* fmtA format string that determines how the match is + transformed into the new string. 
 unsigned flagsOptional flags which describe how the format string + is to be interpreted. 
+

+

Format flags are defined as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 format_allEnables all syntax options (perl-like plus + extentions). 
 format_sedAllows only a sed-like syntax. 
 format_perlAllows only a perl-like syntax. 
 format_no_copyDisables copying of unmatched sections to the output + string during regex_merge operations. 
 format_first_onlyWhen this flag is set only the first occurance will be replaced (applies to + regex_merge only). 
+

+


+   +

+

The format string syntax (and available options) is described more fully under + format strings + . +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_grep.html b/doc/Attic/regex_grep.html new file mode 100644 index 00000000..131f7c84 --- /dev/null +++ b/doc/Attic/regex_grep.html @@ -0,0 +1,379 @@ + + + + Boost.Regex: Algorithm regex_grep (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_grep (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The algorithm regex_grep is deprecated in favour of regex_iterator + which provides a more convenient and standard library friendly interface.

+

The following documentation is taken unchanged from the previous boost release, + and will not be updated in future.

+
+
#include <boost/regex.hpp> 
+

regex_grep allows you to search through a bidirectional-iterator range and + locate all the (non-overlapping) matches with a given regular expression. The + function is declared as: +

+
template <class Predicate, class iterator, class charT, class traits, class Allocator>
+unsigned int regex_grep(Predicate foo,
+                         iterator first,
+                         iterator last,
+                         const basic_regex<charT, traits, Allocator>& e,
+                         unsigned flags = match_default)
+

The library also defines the following convenience versions, which take either + a const charT*, or a const std::basic_string<>& in place of a pair of + iterators [note - these versions may not be available, or may be available in a + more limited form, depending upon your compilers capabilities]: +

+
template <class Predicate, class charT, class Allocator, class traits>
+unsigned int regex_grep(Predicate foo, 
+              const charT* str, 
+              const basic_regex<charT, traits, Allocator>& e, 
+              unsigned flags = match_default);
+
+template <class Predicate, class ST, class SA, class Allocator, class charT, class traits>
+unsigned int regex_grep(Predicate foo, 
+              const std::basic_string<charT, ST, SA>& s, 
+              const basic_regex<charT, traits, Allocator>& e, 
+              unsigned flags = match_default);
+

The parameters for the primary version of regex_grep have the following + meanings: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 fooA predicate function object or function pointer, see + below for more information. 
 firstThe start of the range to search. 
 lastThe end of the range to search. 
 eThe regular expression to search for. 
 flagsThe flags that determine how matching is carried out, + one of the match_flags enumerators. 
+

+

 The algorithm finds all of the non-overlapping matches of the expression + e, for each match it fills a match_results<iterator, + Allocator> structure, which contains information on what matched, and calls + the predicate foo, passing the match_results<iterator, Allocator> as a + single argument. If the predicate returns true, then the grep operation + continues, otherwise it terminates without searching for further matches. The + function returns the number of matches found.

+

The general form of the predicate is: +

+
struct grep_predicate
+{
+   bool operator()(const match_results<iterator_type, typename expression_type::alloc_type::template rebind<sub_match<BidirectionalIterator> >::other>& m);
+};
+

Note that in almost every case the allocator parameter can be omitted, when + specifying the match_results type, + alternatively one of the typedefs cmatch, wcmatch, smatch or wsmatch can be + used. +

+

For example the regular expression "a*b" would find one match in the string + "aaaaab" and two in the string "aaabb". +

+

Remember this algorithm can be used for a lot more than implementing a version + of grep, the predicate can be and do anything that you want, grep utilities + would output the results to the screen, another program could index a file + based on a regular expression and store a set of bookmarks in a list, or a text + file conversion utility would output to file. The results of one regex_grep can + even be chained into another regex_grep to create recursive parsers. +

+

Example: convert the + example from regex_search to use regex_grep instead: +

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+
+// IndexClasses: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+
+const char* re = 
+   // possibly leading whitespace:   
+   "^[[:space:]]*" 
+   // possible template declaration:
+   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
+   // class or struct:
+   "(class|struct)[[:space:]]*" 
+   // leading declspec macros etc:
+   "("
+      "\\<\\w+\\>"
+      "("
+         "[[:blank:]]*\\([^)]*\\)"
+      ")?"
+      "[[:space:]]*"
+   ")*" 
+   // the class name
+   "(\\<\\w*\\>)[[:space:]]*" 
+   // template specialisation parameters
+   "(<[^;:{]+>)?[[:space:]]*"
+   // terminate in { or :
+   "(\\{|:[^;\\{()]*\\{)";
+
+boost::regex expression(re); 
+
+class IndexClassesPred 
+{ 
+   map_type& m; 
+   std::string::const_iterator base; 
+public: 
+   IndexClassesPred(map_type& a, std::string::const_iterator b) : m(a), base(b) {} 
+   bool operator()(const  smatch& what) 
+   { 
+      // what[0] contains the whole string 
+      // what[5] contains the class name. 
+      // what[6] contains the template specialisation if any. 
+      // add class name and position to map: 
+      m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+                what[5].first - base; 
+      return true; 
+   } 
+}; 
+
+void IndexClasses(map_type& m, const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+   regex_grep(IndexClassesPred(m, start), start, end, expression); 
+} 
+

Example: Use + regex_grep to call a global callback function: +

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+
+// purpose: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+
+const char* re = 
+   // possibly leading whitespace:   
+   "^[[:space:]]*" 
+   // possible template declaration:
+   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
+   // class or struct:
+   "(class|struct)[[:space:]]*" 
+   // leading declspec macros etc:
+   "("
+      "\\<\\w+\\>"
+      "("
+         "[[:blank:]]*\\([^)]*\\)"
+      ")?"
+      "[[:space:]]*"
+   ")*" 
+   // the class name
+   "(\\<\\w*\\>)[[:space:]]*" 
+   // template specialisation parameters
+   "(<[^;:{]+>)?[[:space:]]*"
+   // terminate in { or :
+   "(\\{|:[^;\\{()]*\\{)";
+
+boost::regex expression(re);
+map_type class_index; 
+std::string::const_iterator base; 
+
+bool grep_callback(const  boost::smatch& what) 
+{ 
+   // what[0] contains the whole string 
+   // what[5] contains the class name. 
+   // what[6] contains the template specialisation if any. 
+   // add class name and position to map: 
+   class_index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+                what[5].first - base; 
+   return true; 
+} 
+
+void IndexClasses(const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+   base = start; 
+   regex_grep(grep_callback, start, end, expression, match_default); 
+}
+  
+

Example: use + regex_grep to call a class member function, use the standard library adapters std::mem_fun + and std::bind1st to convert the member function into a predicate: +

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+#include <functional> 
+
+// purpose: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+
+class class_index 
+{ 
+   boost::regex expression; 
+   map_type index; 
+   std::string::const_iterator base; 
+   bool  grep_callback(boost::smatch what); 
+public: 
+   void IndexClasses(const std::string& file); 
+   class_index() 
+      : index(), 
+        expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
+                   "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?" 
+                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
+                   "(\\{|:[^;\\{()]*\\{)" 
+                   ){} 
+}; 
+
+bool  class_index::grep_callback(boost::smatch what) 
+{ 
+   // what[0] contains the whole string 
+   // what[5] contains the class name. 
+   // what[6] contains the template specialisation if any. 
+   // add class name and position to map: 
+   index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+               what[5].first - base; 
+   return true; 
+} 
+
+void class_index::IndexClasses(const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+   base = start; 
+   regex_grep(std::bind1st(std::mem_fun(&class_index::grep_callback), this), 
+              start, 
+              end, 
+              expression); 
+} 
+  
+

Finally, C++ Builder + users can use C++ Builder's closure type as a callback argument: +

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+#include <functional> 
+
+// purpose: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+class class_index 
+{ 
+   boost::regex expression; 
+   map_type index; 
+   std::string::const_iterator base; 
+   typedef  boost::smatch arg_type; 
+   bool grep_callback(const arg_type& what); 
+public: 
+   typedef bool (__closure* grep_callback_type)(const arg_type&); 
+   void IndexClasses(const std::string& file); 
+   class_index() 
+      : index(), 
+        expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
+                   "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?" 
+                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
+                   "(\\{|:[^;\\{()]*\\{)" 
+                   ){} 
+}; 
+
+bool class_index::grep_callback(const arg_type& what) 
+{ 
+   // what[0] contains the whole string    
+// what[5] contains the class name.    
+// what[6] contains the template specialisation if any.    
+// add class name and position to map:    
+index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+               what[5].first - base; 
+   return true; 
+} 
+
+void class_index::IndexClasses(const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+   base = start; 
+   class_index::grep_callback_type cl = &(this->grep_callback); 
+   regex_grep(cl, 
+            start, 
+            end, 
+            expression); 
+} 
+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_match.html b/doc/Attic/regex_match.html new file mode 100644 index 00000000..890c65d6 --- /dev/null +++ b/doc/Attic/regex_match.html @@ -0,0 +1,325 @@ + + + + Boost.Regex: Algorithm regex_match + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_match

+
+

Boost.Regex Index

+
+

+
+

+

Contents

+
+
Synopsis
+
Description
+
Examples
+
+

Synopsis

+
#include <boost/regex.hpp> 
+

The algorithm regex _match determines whether a given regular expression + matches a given sequence denoted by a pair of bidirectional-iterators, the + algorithm is defined as follows, note that the result is true + only if the expression matches the whole of the input sequence, + the main use of this function is data input validation. +

template <class BidirectionalIterator, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
+                 match_results<BidirectionalIterator, Allocator>& m,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+                 
+template <class BidirectionalIterator, class charT, class traits, class Allocator2>
+bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+                 
+template <class charT, class Allocator, class traits, class Allocator2>
+bool regex_match(const charT* str, match_results<const charT*, Allocator>& m,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+                 
+template <class ST, class SA, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_match(const basic_string<charT, ST, SA>& s,
+                 match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m, 
+                 const basic_regex<charT, traits, Allocator2>& e, 
+                 match_flag_type flags = match_default);
+                 
+template <class charT, class traits, class Allocator2>
+bool regex_match(const charT* str,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+                 
+template <class ST, class SA, class charT, class traits, class Allocator2>
+bool regex_match(const basic_string<charT, ST, SA>& s,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+
+

Description

+
template <class BidirectionalIterator, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
+                 match_results<BidirectionalIterator, Allocator>& m,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Requires: Type BidirectionalIterator meets the requirements of a + Bidirectional Iterator (24.1.4).

+ +

+ Effects: Determines whether there is an exact match between the regular + expression e, and all of the character sequence [first, last), parameter + flags is used to control how the expression + is matched against the character sequence. Returns true if such a match + exists, false otherwise.

+

+ Postconditions: If the function returns false, then the effect on + parameter m is undefined, otherwise the effects on parameter m are + given in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

m.size()

+
+

e.mark_count()

+
+

m.empty()

+
+

false

+
+

m.prefix().first

+
+

first

+
+

m.prefix().last

+
+

first

+
+

m.prefix().matched

+
+

false

+
+

m.suffix().first

+
+

last

+
+

m.suffix().last

+
+

last

+
+

m.suffix().matched

+
+

false

+
+

m[0].first

+
+

first

+
+

m[0].second

+
+

last

+
+

m[0].matched

+
+

+ true if a full match was found, and false if it was a + partial match (found as a result of the match_partial flag being + set).

+

m[n].first

+
+

For all integers n < m.size(), the start of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

m[n].second

+
+

For all integers n < m.size(), the end of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

m[n].matched

+
+

For all integers n < m.size(), true if sub-expression n participated + in the match, false otherwise.

+

+
+

+
template <class BidirectionalIterator, class charT, class traits, class Allocator2>
+bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Effects: Behaves "as if" by constructing an instance of + match_results<BidirectionalIterator> what, + and then returning the result of regex_match(first, last, what, e, flags).

template <class charT, class Allocator, class traits, class Allocator2>
+bool regex_match(const charT* str, match_results<const charT*, Allocator>& m,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_match(str, str + + char_traits<charT>::length(str), m, e, flags).

template <class ST, class SA, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_match(const basic_string<charT, ST, SA>& s,
+                 match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m, 
+                 const basic_regex<charT, traits, Allocator2>& e, 
+                 match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_match(s.begin(), s.end(), m, e, + flags).

template <class charT, class traits, class Allocator2>
+bool regex_match(const charT* str,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_match(str, str + + char_traits<charT>::length(str), e, flags).

template <class ST, class SA, class charT, class traits, class Allocator2>
+bool regex_match(const basic_string<charT, ST, SA>& s,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_match(s.begin(), s.end(), e, + flags). +

Examples

+

The following example + processes an ftp response: +

+
#include <stdlib.h> 
+#include <boost/regex.hpp> 
+#include <string> 
+#include <iostream> 
+
+using namespace boost; 
+
+regex expression("([0-9]+)(\\-| |$)(.*)"); 
+
+// process_ftp: 
+// on success returns the ftp response code, and fills 
+// msg with the ftp response message. 
+int process_ftp(const char* response, std::string* msg) 
+{ 
+   cmatch what; 
+   if(regex_match(response, what, expression)) 
+   { 
+      // what[0] contains the whole string 
+      // what[1] contains the response code 
+      // what[2] contains the separator character 
+      // what[3] contains the text message. 
+      if(msg) 
+         msg->assign(what[3].first, what[3].second); 
+      return std::atoi(what[1].first); 
+   } 
+   // failure did not match 
+   if(msg) 
+      msg->erase(); 
+   return -1; 
+}
+      

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_merge.html b/doc/Attic/regex_merge.html new file mode 100644 index 00000000..df7cb75e --- /dev/null +++ b/doc/Attic/regex_merge.html @@ -0,0 +1,46 @@ + + + + Boost.Regex: Algorithm regex_merge (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_merge (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

Algorithm regex_merge has been renamed regex_replace, + existing code will continue to compile, but newcode should use + regex_replace instead.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_replace.html b/doc/Attic/regex_replace.html new file mode 100644 index 00000000..aed7b8fa --- /dev/null +++ b/doc/Attic/regex_replace.html @@ -0,0 +1,208 @@ + + + + Boost.Regex: Algorithm regex_replace + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_replace

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+ Examples
+

Synopsis

+
#include <boost/regex.hpp> 
+

The algorithm regex_replace searches through a string finding + all the matches to the regular expression: for each match it then calls + match_results::format to format the string and sends the result to the + output iterator. Sections of text that do not match are copied to the output + unchanged only if the flags parameter does not have the flag + format_no_copy set. If the flag format_first_only + is set then only the first occurance is replaced rather than all + occurrences. 

template <class OutputIterator, class BidirectionalIterator, class traits,
+          class Allocator, class charT>
+OutputIterator regex_replace(OutputIterator out,
+                           BidirectionalIterator first,
+                           BidirectionalIterator last,
+                           const basic_regex<charT, traits, Allocator>& e,
+                           const basic_string<charT>& fmt,
+                           match_flag_type flags = match_default);
+
+template <class traits, class Allocator, class charT>
+basic_string<charT> regex_replace(const basic_string<charT>& s,
+                            const basic_regex<charT, traits, Allocator>& e,
+                            const basic_string<charT>& fmt,
+                            match_flag_type flags = match_default);
+
+
+

Description

+
template <class OutputIterator, class BidirectionalIterator, class traits,
+          class Allocator, class charT>
+OutputIterator regex_replace(OutputIterator out,
+                           BidirectionalIterator first,
+                           BidirectionalIterator last,
+                           const basic_regex<charT, traits, Allocator>& e,
+                           const basic_string<charT>& fmt,
+                           match_flag_type flags = match_default);
+ +

+ Effects: Finds all the non-overlapping matches m of type match_results<BidirectionalIterator> + that occur within the sequence [first, last). If no such matches are + found and !(flags & format_no_copy) then calls std::copy(first, + last, out). Otherwise, for each match found, if !(flags & + format_no_copy) calls std::copy(m.prefix().first, m.prefix().last, + out), and then calls m.format(out, fmt, flags). Finally + if !(flags & format_no_copy) calls std::copy(last_m.suffix().first, + last_m,suffix().last, out) where last_m is a copy of the + last match found. If flags & format_first_only is non-zero + then only the first match found is replaced.

+ +

+ Returns: out.

template <class traits, class Allocator, class charT>
+basic_string<charT> regex_replace(const basic_string<charT>& s,
+                            const basic_regex<charT, traits, Allocator>& e,
+                            const basic_string<charT>& fmt,
+                            match_flag_type flags = match_default);
+ +

+ Effects: Constructs an object basic_string<charT> result, + calls regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, + flags), and then returns result. +

Examples

+

The following example takes + C/C++ source code as input, and outputs syntax highlighted HTML code.

+

+
#include <fstream>
+#include <sstream>
+#include <string>
+#include <iterator>
+#include <boost/regex.hpp>
+#include <fstream>
+#include <iostream>
+
+// purpose:
+// takes the contents of a file and transform to
+// syntax highlighted code in html format
+
+boost::regex e1, e2;
+extern const char* expression_text;
+extern const char* format_string;
+extern const char* pre_expression;
+extern const char* pre_format;
+extern const char* header_text;
+extern const char* footer_text;
+
+void load_file(std::string& s, std::istream& is)
+{
+   s.erase();
+   s.reserve(is.rdbuf()->in_avail());
+   char c;
+   while(is.get(c))
+   {
+      if(s.capacity() == s.size())
+         s.reserve(s.capacity() * 3);
+      s.append(1, c);
+   }
+}
+
+int main(int argc, const char** argv)
+{
+   try{
+   e1.assign(expression_text);
+   e2.assign(pre_expression);
+   for(int i = 1; i < argc; ++i)
+   {
+      std::cout << "Processing file " << argv[i] << std::endl;
+      std::ifstream fs(argv[i]);
+      std::string in;
+      load_file(in, fs);
+      std::string out_name(std::string(argv[i]) + std::string(".htm"));
+      std::ofstream os(out_name.c_str());
+      os << header_text;
+      // strip '<' and '>' first by outputting to a
+      // temporary string stream
+      std::ostringstream t(std::ios::out | std::ios::binary);
+      std::ostream_iterator<char, char> oi(t);
+      boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format);
+      // then output to final output stream
+      // adding syntax highlighting:
+      std::string s(t.str());
+      std::ostream_iterator<char, char> out(os);
+      boost::regex_replace(out, s.begin(), s.end(), e1, format_string);
+      os << footer_text;
+   }
+   }
+   catch(...)
+   { return -1; }
+   return 0;
+}
+
+extern const char* pre_expression = "(<)|(>)|\\r";
+extern const char* pre_format = "(?1<)(?2>)";
+
+
+const char* expression_text = // preprocessor directives: index 1
+                              "(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
+                              // comment: index 2
+                              "(//[^\\n]*|/\\*.*?\\*/)|"
+                              // literals: index 3
+                              "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
+                              // string literals: index 4
+                              "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
+                              // keywords: index 5
+                              "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
+                              "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
+                              "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
+                              "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
+                              "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
+                              "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
+                              "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
+                              "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
+                              "|using|virtual|void|volatile|wchar_t|while)\\>"
+                              ;
+
+const char* format_string = "(?1<font color=\"#008040\">$&</font>)"
+                            "(?2<I><font color=\"#000080\">$&</font></I>)"
+                            "(?3<font color=\"#0000A0\">$&</font>)"
+                            "(?4<font color=\"#0000FF\">$&</font>)"
+                            "(?5<B>$&</B>)";
+
+const char* header_text = "<HTML>\n<HEAD>\n"
+                          "<TITLE>Auto-generated html formated source</TITLE>\n"
+                          "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
+                          "</HEAD>\n"
+                          "<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
+                          "<P> </P>\n<PRE>";
+
+const char* footer_text = "</PRE>\n</BODY>\n\n";
+      
+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_search.html b/doc/Attic/regex_search.html new file mode 100644 index 00000000..84c7ae3c --- /dev/null +++ b/doc/Attic/regex_search.html @@ -0,0 +1,332 @@ + + + + Boost.Regex: Algorithm regex_search + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_search

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+ Examples
+

Synopsis

+
#include <boost/regex.hpp> 
+

+

The algorithm regex_search will search a range denoted by a pair of + bidirectional-iterators for a given regular expression. The algorithm uses + various heuristics to reduce the search time by only checking for a match if a + match could conceivably start at that position. The algorithm is defined as + follows: +

template <class BidirectionalIterator, 
+          class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
+                  match_results<BidirectionalIterator, Allocator>& m,
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+                  
+template <class ST, class SA, 
+          class Allocator, class charT,
+          class traits, class Allocator2> 
+bool regex_search(const basic_string<charT, ST, SA>& s, 
+                  match_results<
+                      typename basic_string<charT, ST,SA>::const_iterator, 
+                      Allocator>& m, 
+                  const basic_regex<charT, traits, Allocator2>& e, 
+                  match_flag_type flags = match_default); 
+          
+template<class charT, class Allocator, class traits, 
+         class Allocator2> 
+bool regex_search(const charT* str, 
+                  match_results<const charT*, Allocator>& m, 
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+                  
+template <class BidirectionalIterator, class Allocator,
+          class charT, class traits>                
+bool regex_search(BidirectionalIterator first, BidirectionalIterator last, 
+                  const basic_regex<charT, traits, Allocator>& e, 
+                  match_flag_type flags = match_default); 
+                  
+template <class charT, class Allocator, 
+          class traits> 
+bool regex_search(const charT* str, 
+                  const basic_regex<charT, traits, Allocator>& e, 
+                  match_flag_type flags = match_default); 
+                  
+template<class ST, class SA,
+         class Allocator, class charT, 
+         class traits>
+bool regex_search(const basic_string<charT, ST, SA>& s,
+                  const basic_regex<charT, traits, Allocator>& e,
+                  match_flag_type flags = match_default);
+
+

Description

+
template <class BidirectionalIterator, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
+                  match_results<BidirectionalIterator, Allocator>& m,
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+ +

+ Requires: Type BidirectionalIterator meets the requirements of a + Bidirectional Iterator (24.1.4).

+ +

+ Effects: Determines whether there is some sub-sequence within + [first,last) that matches the regular expression e, parameter flags + is used to control how the expression is matched against the character + sequence. Returns true if such a sequence exists, false otherwise.

+

+ Postconditions: If the function returns false, then the effect on + parameter m is undefined, otherwise the effects on parameter m are + given in the table:

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element

+

+ Value

+

m.size()

+
+

e.mark_count()

+
+

m.empty()

+
+

false

+
+

m.prefix().first

+
+

first

+
+

m.prefix().last

+
+

m[0].first

+
+

m.prefix().matched

+
+

m.prefix().first != m.prefix().second

+
+

m.suffix().first

+
+

m[0].second

+
+

m.suffix().last

+
+

last

+
+

m.suffix().matched

+
+

m.suffix().first != m.suffix().second

+
+

m[0].first

+
+

The start of the sequence of characters that matched the regular expression

+
+

m[0].second

+
+

The end of the sequence of characters that matched the regular expression

+
+

m[0].matched

+
+

+ true if a full match was found, and false if it was a + partial match (found as a result of the match_partial flag being + set).

+

m[n].first

+
+

For all integers n < m.size(), the start of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

m[n].second

+
+

For all integers n < m.size(), the end of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

m[n].matched

+
+

For all integers n < m.size(), true if sub-expression n participated + in the match, false otherwise.

+

+
+
+
+
template <class charT, class Allocator, class traits, class Allocator2>
+bool regex_search(const charT* str, match_results<const charT*, Allocator>& m,
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_search(str, str + + char_traits<charT>::length(str), m, e, flags).

template <class ST, class SA, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_search(const basic_string<charT, ST, SA>& s,
+                  match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_search(s.begin(), s.end(), m, e, + flags).

template <class iterator, class Allocator, class charT,
+          class traits>
+bool regex_search(iterator first, iterator last,
+                  const basic_regex<charT, traits, Allocator>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Behaves "as if" by constructing an instance of + match_results<BidirectionalIterator> what, + and then returning the result of regex_search(first, last, what, e, flags).

template <class charT, class Allocator, class traits>
+bool regex_search(const charT* str
+                  const basic_regex<charT, traits, Allocator>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_search(str, str + + char_traits<charT>::length(str), e, flags).

template <class ST, class SA, class Allocator, class charT,
+          class traits>
+bool regex_search(const basic_string<charT, ST, SA>& s,
+                  const basic_regex<charT, traits, Allocator>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_search(s.begin(), s.end(), e, + flags). +

Examples

+

The following example, + takes the contents of a file in the form of a string, and searches for all the + C++ class declarations in the file. The code will work regardless of the way + that std::string is implemented, for example it could easily be modified to + work with the SGI rope class, which uses a non-contiguous storage strategy.

+

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+
+// purpose: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+
+boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"); 
+
+void IndexClasses(map_type& m, const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+      boost::match_results<std::string::const_iterator> what; 
+   unsigned int flags = boost::match_default; 
+   while(regex_search(start, end, what, expression, flags)) 
+   { 
+      // what[0] contains the whole string 
+      // what[5] contains the class name. 
+      // what[6] contains the template specialisation if any. 
+      // add class name and position to map: 
+      m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+                what[5].first - file.begin(); 
+      // update search position: 
+      start = what[0].second; 
+      // update flags: 
+      flags |= boost::match_prev_avail; 
+      flags |= boost::match_not_bob; 
+   } 
+}
+      
+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_split.html b/doc/Attic/regex_split.html new file mode 100644 index 00000000..5f8f45b0 --- /dev/null +++ b/doc/Attic/regex_split.html @@ -0,0 +1,143 @@ + + + + Boost.Regex: Algorithm regex_split (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_split (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The algorithm regex_split has been deprecated in favour of the iterator + regex_token_iterator which has a more flexible and powerful interface, + as well as following the more usual standard library "pull" rather than "push" + semantics.

+

Code which uses regex_split will continue to compile, the following + documentation is taken from the previous boost.regex version:

+

Algorithm regex_split

+
#include <boost/regex.hpp> 
+

Algorithm regex_split performs a similar operation to the perl split operation, + and comes in three overloaded forms: +

+
template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2, class Alloc2>
+std::size_t regex_split(OutputIterator out, 
+                        std::basic_string<charT, Traits1, Alloc1>& s, 
+                        const basic_regex<charT, Traits2, Alloc2>& e,
+                        unsigned flags,
+                        std::size_t max_split);
+
+template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2, class Alloc2>
+std::size_t regex_split(OutputIterator out, 
+                        std::basic_string<charT, Traits1, Alloc1>& s, 
+                        const basic_regex<charT, Traits2, Alloc2>& e,
+                        unsigned flags = match_default);
+
+template <class OutputIterator, class charT, class Traits1, class Alloc1>
+std::size_t regex_split(OutputIterator out, 
+                        std::basic_string<charT, Traits1, Alloc1>& s);
+

Each version takes an output-iterator for output, and a string for input. If + the expression contains no marked sub-expressions, then the algorithm writes + one string onto the output-iterator for each section of input that does not + match the expression. If the expression does contain marked sub-expressions, + then each time a match is found, one string for each marked sub-expression will + be written to the output-iterator. No more than max_split strings will + be written to the output-iterator. Before returning, all the input processed + will be deleted from the string s (if max_split is not reached + then all of s will be deleted). Returns the number of strings written to + the output-iterator. If the parameter max_split is not specified then it + defaults to UINT_MAX. If no expression is specified, then it defaults to "\s+", + and splitting occurs on whitespace. +

+

Example: the following + function will split the input string into a series of tokens, and remove each + token from the string s: +

+
unsigned tokenise(std::list<std::string>& l, std::string& s)
+{
+   return boost::regex_split(std::back_inserter(l), s);
+}
+

Example: the following + short program will extract all of the URL's from a html file, and print them + out to cout: +

+
#include <list>
+#include <fstream>
+#include <iostream>
+#include <boost/regex.hpp>
+
+boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
+               boost::regbase::normal | boost::regbase::icase);
+
+void load_file(std::string& s, std::istream& is)
+{
+   s.erase();
+   //
+   // attempt to grow string buffer to match file size,
+   // this doesn't always work...
+   s.reserve(is.rdbuf()-&gtin_avail());
+   char c;
+   while(is.get(c))
+   {
+      // use logarithmic growth stategy, in case
+      // in_avail (above) returned zero:
+      if(s.capacity() == s.size())
+         s.reserve(s.capacity() * 3);
+      s.append(1, c);
+   }
+}
+
+
+int main(int argc, char** argv)
+{
+   std::string s;
+   std::list<std::string> l;
+
+   for(int i = 1; i < argc; ++i)
+   {
+      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
+      s.erase();
+      std::ifstream is(argv[i]);
+      load_file(s, is);
+      boost::regex_split(std::back_inserter(l), s, e);
+      while(l.size())
+      {
+         s = *(l.begin());
+         l.pop_front();
+         std::cout << s << std::endl;
+      }
+   }
+   return 0;
+}
+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_traits.html b/doc/Attic/regex_traits.html new file mode 100644 index 00000000..74b31383 --- /dev/null +++ b/doc/Attic/regex_traits.html @@ -0,0 +1,47 @@ + + + + Boost.Regex: class regex_traits + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

class regex_traits

+
+

Boost.Regex Index

+
+

+
+

+

Under construction.

+

The current boost.regex traits class design will be migrated to that specified + in the regular + expression standardization proposal

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/sub_match.html b/doc/Attic/sub_match.html new file mode 100644 index 00000000..09849bfa --- /dev/null +++ b/doc/Attic/sub_match.html @@ -0,0 +1,427 @@ + + + + Boost.Regex: sub_match + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

sub_match

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+

#include <boost/regex.hpp> +

+

Regular expressions are different from many simple pattern-matching algorithms + in that as well as finding an overall match they can also produce + sub-expression matches: each sub-expression being delimited in the pattern by a + pair of parenthesis (...). There has to be some method for reporting + sub-expression matches back to the user: this is achieved this by defining a + class match_results that acts as an + indexed collection of sub-expression matches, each sub-expression match being + contained in an object of type sub_match + . +

Objects of type sub_match may only obtained by subscripting an object + of type match_results + . +

When the marked sub-expression denoted by an object of type sub_match<> + participated in a regular expression match then member matched evaluates + to true, and members first and second denote the + range of characters [first,second) which formed that match. + Otherwise matched is false, and members first and second + contained undefined values.

+

If an object of type sub_match<> represents sub-expression 0 + - that is to say the whole match - then member matched is always + true, unless a partial match was obtained as a result of the flag match_partial + being passed to a regular expression algorithm, in which case member matched + is false, and members first and second represent the + character range that formed the partial match.

+
+namespace boost{
+      
+template <class BidirectionalIterator>
+class sub_match : public std::pair<BidirectionalIterator, BidirectionalIterator>
+{
+public:
+   typedef typename iterator_traits<BidirectionalIterator>::value_type       value_type;
+   typedef typename iterator_traits<BidirectionalIterator>::difference_type  difference_type;
+   typedef          BidirectionalIterator                                    iterator;
+
+   bool matched;
+
+   difference_type length()const;
+   operator basic_string<value_type>()const;
+   basic_string<value_type> str()const;
+
+   int compare(const sub_match& s)const;
+   int compare(const basic_string<value_type>& s)const;
+   int compare(const value_type* s)const;
+};
+
+template <class BidirectionalIterator>
+bool operator == (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator != (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator < (const sub_match<BidirectionalIterator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator <= (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator >= (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator > (const sub_match<BidirectionalIterator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+
+
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator == (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator != (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator < (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator > (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator >= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator <= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs,
+                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs,
+                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs,
+                 const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs,
+                 const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs,
+                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs,
+                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+
+template <class BidirectionalIterator> 
+bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+
+template <class BidirectionalIterator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+
+template <class BidirectionalIterator> 
+bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+
+template <class BidirectionalIterator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+
+template <class charT, class traits, class BidirectionalIterator>
+basic_ostream<charT, traits>&
+   operator << (basic_ostream<charT, traits>& os,
+                const sub_match<BidirectionalIterator>& m);
+
+} // namespace boost
+

Description

+

+ sub_match members

+
typedef typename std::iterator_traits<iterator>::value_type value_type;
+

The type pointed to by the iterators.

+
typedef typename std::iterator_traits<iterator>::difference_type difference_type;
+

A type that represents the difference between two iterators.

+
typedef iterator iterator_type;
+

The iterator type.

+
iterator first
+

An iterator denoting the position of the start of the match.

+
iterator second
+

An iterator denoting the position of the end of the match.

+
bool matched
+

A Boolean value denoting whether this sub-expression participated in the match.

+
static difference_type length();
+ +

+ Effects: returns (matched ? 0 : distance(first, second)).

operator basic_string<value_type>()const;
+ +

+ Effects: returns (matched ? basic_string<value_type>(first, + second) : basic_string<value_type>()).

basic_string<value_type> str()const;
+ +

+ Effects: returns (matched ? basic_string<value_type>(first, + second) : basic_string<value_type>()).

int compare(const sub_match& s)const;
+ +

+ Effects: returns str().compare(s.str()).

int compare(const basic_string<value_type>& s)const;
+ +

+ Effects: returns str().compare(s).

int compare(const value_type* s)const;
+ +

+ Effects: returns str().compare(s).

+

+ sub_match non-member operators

+
template <class BidirectionalIterator>
+bool operator == (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) == 0.

template <class BidirectionalIterator>
+bool operator != (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) != 0.

template <class BidirectionalIterator>
+bool operator < (const sub_match<BidirectionalIterator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) < 0.

template <class BidirectionalIterator>
+bool operator <= (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) <= 0.

template <class BidirectionalIterator>
+bool operator >= (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) >= 0.

template <class BidirectionalIterator>
+bool operator > (const sub_match<BidirectionalIterator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) > 0.

template <class BidirectionalIterator> 
+bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs == rhs.str().

template <class BidirectionalIterator> 
+bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs != rhs.str().

template <class BidirectionalIterator> 
+bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs < rhs.str().

template <class BidirectionalIterator> 
+bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs > rhs.str().

template <class BidirectionalIterator> 
+bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs >= rhs.str().

template <class BidirectionalIterator> 
+bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs <= rhs.str().

template <class BidirectionalIterator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() == rhs.

template <class BidirectionalIterator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() != rhs.

template <class BidirectionalIterator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() < rhs.

template <class BidirectionalIterator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() > rhs.

template <class BidirectionalIterator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() >= rhs.

template <class BidirectionalIterator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() <= rhs.

template <class BidirectionalIterator> 
+bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs == rhs.str().

template <class BidirectionalIterator> 
+bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs != rhs.str().

template <class BidirectionalIterator> 
+bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs < rhs.str().

template <class BidirectionalIterator> 
+bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs > rhs.str().

template <class BidirectionalIterator> 
+bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs >= rhs.str().

template <class BidirectionalIterator> 
+bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs <= rhs.str().

template <class BidirectionalIterator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() == rhs.

template <class BidirectionalIterator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() != rhs.

template <class BidirectionalIterator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() < rhs.

template <class BidirectionalIterator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() > rhs.

template <class BidirectionalIterator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() >= rhs.

template <class BidirectionalIterator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() <= rhs.

template <class charT, class traits, class BidirectionalIterator>
+basic_ostream<charT, traits>&
+   operator << (basic_ostream<charT, traits>& os
+                const sub_match<BidirectionalIterator>& m);
+ +

+ Effects: returns (os << m.str()). +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/syntax.html b/doc/Attic/syntax.html new file mode 100644 index 00000000..e03e0fe6 --- /dev/null +++ b/doc/Attic/syntax.html @@ -0,0 +1,783 @@ + + + + Boost.Regex: Regular Expression Syntax + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Regular Expression Syntax

+
+

Boost.Regex Index

+
+

+
+

+

This section covers the regular expression syntax used by this library, this is + a programmers guide, the actual syntax presented to your program's users will + depend upon the flags used during expression compilation. +

+

Literals +

+

All characters are literals except: ".", "|", "*", "?", "+", "(", ")", "{", + "}", "[", "]", "^", "$" and "\". These characters are literals when preceded by + a "\". A literal is a character that matches itself, or matches the result of + traits_type::translate(), where traits_type is the traits template parameter to + class reg_expression.

+

Wildcard +

+

The dot character "." matches any single character except : when match_not_dot_null + is passed to the matching algorithms, the dot does not match a null character; + when match_not_dot_newline is passed to the matching algorithms, then + the dot does not match a newline character. +

+

Repeats +

+

A repeat is an expression that is repeated an arbitrary number of times. An + expression followed by "*" can be repeated any number of times including zero. + An expression followed by "+" can be repeated any number of times, but at least + once, if the expression is compiled with the flag regex_constants::bk_plus_qm + then "+" is an ordinary character and "\+" represents a repeat of once or more. + An expression followed by "?" may be repeated zero or one times only, if the + expression is compiled with the flag regex_constants::bk_plus_qm then "?" is an + ordinary character and "\?" represents the repeat zero or once operator. When + it is necessary to specify the minimum and maximum number of repeats + explicitly, the bounds operator "{}" may be used, thus "a{2}" is the letter "a" + repeated exactly twice, "a{2,4}" represents the letter "a" repeated between 2 + and 4 times, and "a{2,}" represents the letter "a" repeated at least twice with + no upper limit. Note that there must be no white-space inside the {}, and there + is no upper limit on the values of the lower and upper bounds. When the + expression is compiled with the flag regex_constants::bk_braces then "{" and + "}" are ordinary characters and "\{" and "\}" are used to delimit bounds + instead. All repeat expressions refer to the shortest possible previous + sub-expression: a single character; a character set, or a sub-expression + grouped with "()" for example. +

+

Examples: +

+

"ba*" will match all of "b", "ba", "baaa" etc. +

+

"ba+" will match "ba" or "baaaa" for example but not "b". +

+

"ba?" will match "b" or "ba". +

+

"ba{2,4}" will match "baa", "baaa" and "baaaa". +

+

Non-greedy repeats +

+

Whenever the "extended" regular expression syntax is in use (the default) then + non-greedy repeats are possible by appending a '?' after the repeat; a + non-greedy repeat is one which will match the shortest possible string. +

+

For example to match html tag pairs one could use something like: +

+

"<\s*tagname[^>]*>(.*?)<\s*/tagname\s*>" +

+

In this case $1 will contain the text between the tag pairs, and will be the + shortest possible matching string.  +

+

Parenthesis +

+

Parentheses serve two purposes, to group items together into a sub-expression, + and to mark what generated the match. For example the expression "(ab)*" would + match all of the string "ababab". The matching algorithms + regex_match and regex_search + each take an instance of match_results + that reports what caused the match, on exit from these functions the + match_results contains information both on what the whole expression + matched and on what each sub-expression matched. In the example above + match_results[1] would contain a pair of iterators denoting the final "ab" of + the matching string. It is permissible for sub-expressions to match null + strings. If a sub-expression takes no part in a match - for example if it is + part of an alternative that is not taken - then both of the iterators that are + returned for that sub-expression point to the end of the input string, and the matched + parameter for that sub-expression is false. Sub-expressions are indexed + from left to right starting from 1, sub-expression 0 is the whole expression. +

+

Non-Marking Parenthesis +

+

Sometimes you need to group sub-expressions with parenthesis, but don't want + the parenthesis to spit out another marked sub-expression, in this case a + non-marking parenthesis (?:expression) can be used. For example the following + expression creates no sub-expressions: +

+

"(?:abc)*"

+

Forward Lookahead Asserts  +

+

There are two forms of these; one for positive forward lookahead asserts, and + one for negative lookahead asserts:

+

"(?=abc)" matches zero characters only if they are followed by the expression + "abc".

+

"(?!abc)" matches zero characters only if they are not followed by the + expression "abc".

+

Independent sub-expressions

+

"(?>expression)" matches "expression" as an independent atom (the algorithm + will not backtrack into it if a failure occures later in the expression).

+

Alternatives +

+

Alternatives occur when the expression can match either one sub-expression or + another, each alternative is separated by a "|", or a "\|" if the flag + regex_constants::bk_vbar is set, or by a newline character if the flag + regex_constants::newline_alt is set. Each alternative is the largest possible + previous sub-expression; this is the opposite behaviour from repetition + operators. +

+

Examples: +

+

"a(b|c)" could match "ab" or "ac". +

+

"abc|def" could match "abc" or "def". +

+

Sets +

+

A set is a set of characters that can match any single character that is a + member of the set. Sets are delimited by "[" and "]" and can contain literals, + character ranges, character classes, collating elements and equivalence + classes. Set declarations that start with "^" contain the compliment of the + elements that follow. +

+

Examples: +

+

Character literals: +

+

"[abc]" will match either of "a", "b", or "c". +

+

"[^abc] will match any character other than "a", "b", or "c". +

+

Character ranges: +

+

"[a-z]" will match any character in the range "a" to "z". +

+

"[^A-Z]" will match any character other than those in the range "A" to "Z". +

+

Note that character ranges are highly locale dependent if the flag + regex_constants::collate is set: they match any character that collates between + the endpoints of the range, ranges will only behave according to ASCII rules + when the default "C" locale is in effect. For example if the library is + compiled with the Win32 localization model, then [a-z] will match the ASCII + characters a-z, and also 'A', 'B' etc, but not 'Z' which collates just after + 'z'. This locale specific behaviour is disabled by default (in perl mode), and + forces ranges to collate according to ASCII character code. +

+

Character classes are denoted using the syntax "[:classname:]" within a set + declaration, for example "[[:space:]]" is the set of all whitespace characters. + Character classes are only available if the flag regex_constants::char_classes + is set. The available character classes are: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 alnumAny alpha numeric character. 
 alphaAny alphabetical character a-z and A-Z. Other + characters may also be included depending upon the locale. 
 blankAny blank character, either a space or a tab. 
 cntrlAny control character. 
 digitAny digit 0-9. 
 graphAny graphical character. 
 lowerAny lower case character a-z. Other characters may + also be included depending upon the locale. 
 printAny printable character. 
 punctAny punctuation character. 
 spaceAny whitespace character. 
 upperAny upper case character A-Z. Other characters may + also be included depending upon the locale. 
 xdigitAny hexadecimal digit character, 0-9, a-f and A-F. 
 wordAny word character - all alphanumeric characters plus + the underscore. 
 unicodeAny character whose code is greater than 255, this + applies to the wide character traits classes only. 
+

+

There are some shortcuts that can be used in place of the character classes, + provided the flag regex_constants::escape_in_lists is set then you can use: +

+

\w in place of [:word:] +

+

\s in place of [:space:] +

+

\d in place of [:digit:] +

+

\l in place of [:lower:] +

+

\u in place of [:upper:]  +

+

Collating elements take the general form [.tagname.] inside a set declaration, + where tagname is either a single character, or a name of a collating + element, for example [[.a.]] is equivalent to [a], and [[.comma.]] is + equivalent to [,]. The library supports all the standard POSIX collating + element names, and in addition the following digraphs: "ae", "ch", "ll", "ss", + "nj", "dz", "lj", each in lower, upper and title case variations. + Multi-character collating elements can result in the set matching more than one + character, for example [[.ae.]] would match two characters, but note that + [^[.ae.]] would only match one character.  +

+

+ Equivalence classes take the general form[=tagname=] inside a set declaration, + where tagname is either a single character, or a name of a collating + element, and matches any character that is a member of the same primary + equivalence class as the collating element [.tagname.]. An equivalence class is + a set of characters that collate the same, a primary equivalence class is a set + of characters whose primary sort key are all the same (for example strings are + typically collated by character, then by accent, and then by case; the primary + sort key then relates to the character, the secondary to the accentation, and + the tertiary to the case). If there is no equivalence class corresponding to tagname + , then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no + locale independent method of obtaining the primary sort key for a character, + except under Win32. For other operating systems the library will "guess" the + primary sort key from the full sort key (obtained from strxfrm), so + equivalence classes are probably best considered broken under any operating + system other than Win32.  +

+

To include a literal "-" in a set declaration then: make it the first character + after the opening "[" or "[^", the endpoint of a range, a collating element, or + if the flag regex_constants::escape_in_lists is set then precede with an escape + character as in "[\-]". To include a literal "[" or "]" or "^" in a set then + make them the endpoint of a range, a collating element, or precede with an + escape character if the flag regex_constants::escape_in_lists is set. +

+

Line anchors +

+

An anchor is something that matches the null string at the start or end of a + line: "^" matches the null string at the start of a line, "$" matches the null + string at the end of a line. +

+

Back references +

+

A back reference is a reference to a previous sub-expression that has already + been matched, the reference is to what the sub-expression matched, not to the + expression itself. A back reference consists of the escape character "\" + followed by a digit "1" to "9", "\1" refers to the first sub-expression, "\2" + to the second etc. For example the expression "(.*)\1" matches any string that + is repeated about its mid-point for example "abcabc" or "xyzxyz". A back + reference to a sub-expression that did not participate in any match, matches + the null string: NB this is different to some other regular expression + matchers. Back references are only available if the expression is compiled with + the flag regex_constants::bk_refs set. +

+

Characters by code +

+

This is an extension to the algorithm that is not available in other libraries, + it consists of the escape character followed by the digit "0" followed by the + octal character code. For example "\023" represents the character whose octal + code is 23. Where ambiguity could occur use parentheses to break the expression + up: "\0103" represents the character whose code is 103, "(\010)3 represents the + character 10 followed by "3". To match characters by their hexadecimal code, + use \x followed by a string of hexadecimal digits, optionally enclosed inside + {}, for example \xf0 or \x{aff}, notice the latter example is a Unicode + character.

+

Word operators +

+

The following operators are provided for compatibility with the GNU regular + expression library. +

+

"\w" matches any single character that is a member of the "word" character + class, this is identical to the expression "[[:word:]]". +

+

"\W" matches any single character that is not a member of the "word" character + class, this is identical to the expression "[^[:word:]]". +

+

"\<" matches the null string at the start of a word. +

+

"\>" matches the null string at the end of the word. +

+

"\b" matches the null string at either the start or the end of a word. +

+

"\B" matches a null string within a word. +

+

The start of the sequence passed to the matching algorithms is considered to be + a potential start of a word unless the flag match_not_bow is set. The end of + the sequence passed to the matching algorithms is considered to be a potential + end of a word unless the flag match_not_eow is set. +

+

Buffer operators +

+

The following operators are provide for compatibility with the GNU regular + expression library, and Perl regular expressions: +

+

"\`" matches the start of a buffer. +

+

"\A" matches the start of the buffer. +

+

"\'" matches the end of a buffer. +

+

"\z" matches the end of a buffer. +

+

"\Z" matches the end of a buffer, or possibly one or more new line characters + followed by the end of the buffer. +

+

A buffer is considered to consist of the whole sequence passed to the matching + algorithms, unless the flags match_not_bob or match_not_eob are set. +

+

Escape operator +

+

The escape character "\" has several meanings. +

+

Inside a set declaration the escape character is a normal character unless the + flag regex_constants::escape_in_lists is set in which case whatever follows the + escape is a literal character regardless of its normal meaning. +

+

The escape operator may introduce an operator for example: back references, or + a word operator. +

+

The escape operator may make the following character normal, for example "\*" + represents a literal "*" rather than the repeat operator. +

+

Single character escape sequences +

+

The following escape sequences are aliases for single characters: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Escape sequence + Character code + Meaning +  
 \a + 0x07 + Bell character. +  
 \f + 0x0C + Form feed. +  
 \n + 0x0A + Newline character. +  
 \r + 0x0D + Carriage return. +  
 \t + 0x09 + Tab character. +  
 \v + 0x0B + Vertical tab. +  
 \e + 0x1B + ASCII Escape character. +  
 \0dd + 0dd + An octal character code, where dd is one or + more octal digits. +  
 \xXX + 0xXX + A hexadecimal character code, where XX is one or more + hexadecimal digits. +  
 \x{XX} + 0xXX + A hexadecimal character code, where XX is one or more + hexadecimal digits, optionally a unicode character. +  
 \cZ + z-@ + An ASCII escape sequence control-Z, where Z is any + ASCII character greater than or equal to the character code for '@'. +  
+

+

Miscellaneous escape sequences: +

+

The following are provided mostly for perl compatibility, but note that there + are some differences in the meanings of \l \L \u and \U: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 \w + Equivalent to [[:word:]]. +  
 \W + Equivalent to [^[:word:]]. +  
 \s + Equivalent to [[:space:]]. +  
 \S + Equivalent to [^[:space:]]. +  
 \d + Equivalent to [[:digit:]]. +  
 \D + Equivalent to [^[:digit:]]. +  
 \l + Equivalent to [[:lower:]]. +  
 \L + Equivalent to [^[:lower:]]. +  
 \u + Equivalent to [[:upper:]]. +  
 \U + Equivalent to [^[:upper:]]. +  
 \C + Any single character, equivalent to '.'. +  
 \X + Match any Unicode combining character sequence, for + example "a\x 0301" (a letter a with an acute). +  
 \Q + The begin quote operator, everything that follows is + treated as a literal character until a \E end quote operator is found. +  
 \E + The end quote operator, terminates a sequence begun + with \Q. +  
+

+

What gets matched? +

+

+ When the expression is compiled as a perl-compatible regex then the matching + algorithms will perform a depth first search on the state machine and report + the first match found. +

+ When the expression is compiled as a POSIX-compatible regex then the matching + algorithms will match the first possible matching string, if more than one + string starting at a given location can match then it matches the longest + possible string, unless the flag match_any is set, in which case the first + match encountered is returned. Use of the match_any option can reduce the time + taken to find the match - but is only useful if the user is less concerned + about what matched - for example it would not be suitable for search and + replace operations. In cases where their are multiple possible matches all + starting at the same location, and all of the same length, then the match + chosen is the one with the longest first sub-expression, if that is the same + for two or more matches, then the second sub-expression will be examined and so + on. +

+ The following table examples illustrate the main differences between perl and + POSIX regular expression matching rules: +

+

+

+

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Expression

+
+

Text

+
+

POSIX leftmost longest match

+
+

ECMAScript depth first search match

+
+

+ a|ab +

+
+

+ xaby +

+
+

+ "ab"

+

+ "a"

+

+ .*([[:alnum:]]+).*

+

+ " abc def xyz "

+

$0 = " abc def xyz "
+ $1 = "abc"

+
+

$0 = " abc def xyz "
+ $1 = "z"

+
+

+ .*(a|xayy)

+

+ zzxayyzz

+

+ "zzxayy"

+

"zzxa"

+

+
+
+

+
+

These differences between perl matching rules, and POSIX matching rules, mean + that these two regular expression syntaxes differ not only in the features + offered, but also in the form that the state machine takes and/or the + algorithms used to traverse the state machine. +


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/syntax_option_type.html b/doc/Attic/syntax_option_type.html new file mode 100644 index 00000000..ece58de0 --- /dev/null +++ b/doc/Attic/syntax_option_type.html @@ -0,0 +1,334 @@ + + + + Boost.Regex: syntax_option_type + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

syntax_option_type

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+

Type syntax_option type is an implementation defined bitmask type that controls + how a regular expression string is to be interpreted.  For convenience + note that all the constants listed here, are also duplicated within the scope + of class template basic_regex.

+
namespace std{ namespace regex_constants{
+
+typedef bitmask_type syntax_option_type;
+// these flags are standardized:
+static const syntax_option_type normal;
+static const syntax_option_type icase;
+static const syntax_option_type nosubs;
+static const syntax_option_type optimize;
+static const syntax_option_type collate;
+static const syntax_option_type ECMAScript = normal;
+static const syntax_option_type JavaScript = normal;
+static const syntax_option_type JScript = normal;
+static const syntax_option_type basic;
+static const syntax_option_type extended;
+static const syntax_option_type awk;
+static const syntax_option_type grep;
+static const syntax_option_type egrep;
+static const syntax_option_type sed = basic;
+static const syntax_option_type perl;
// these are boost.regex specific:
static const syntax_option_type escape_in_lists;
static const syntax_option_type char_classes;
static const syntax_option_type intervals;
static const syntax_option_type limited_ops;
static const syntax_option_type newline_alt;
static const syntax_option_type bk_plus_qm;
static const syntax_option_type bk_braces;
static const syntax_option_type bk_parens;
static const syntax_option_type bk_refs;
static const syntax_option_type bk_vbar;
static const syntax_option_type use_except;
static const syntax_option_type failbit;
static const syntax_option_type literal;
static const syntax_option_type nocollate;
static const syntax_option_type perlex;
static const syntax_option_type emacs;
+} // namespace regex_constants +} // namespace std
+

Description

+

The type syntax_option_type is an implementation defined bitmask + type (17.3.2.1.2). Setting its elements has the effects listed in the table + below, a valid value of type syntax_option_type will always have + exactly one of the elements normal, basic, extended, awk, grep, egrep, sed + or perl set.

+

Note that for convenience all the constants listed here are duplicated within + the scope of class template basic_regex, so you can use any of:

+
boost::regex_constants::constant_name
+

or

+
boost::regex::constant_name
+

or

+
boost::wregex::constant_name
+

in an interchangeable manner.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Element

+
+

Effect if set

+
+

normal

+
+

Specifies that the grammar recognized by the regular expression engine uses its + normal semantics: that is the same as that given in the ECMA-262, ECMAScript + Language Specification, Chapter 15 part 10, RegExp (Regular Expression) Objects + (FWD.1).

+

boost.regex also recognises most perl-compatible extensions in this mode.

+
+

icase

+
+

Specifies that matching of regular expressions against a character container + sequence shall be performed without regard to case.

+
+

nosubs

+
+

Specifies that when a regular expression is matched against a character + container sequence, then no sub-expression matches are to be stored in the + supplied match_results structure.

+
+

optimize

+
+

Specifies that the regular expression engine should pay more attention to the + speed with which regular expressions are matched, and less to the speed with + which regular expression objects are constructed. Otherwise it has no + detectable effect on the program output.  This currently has no effect for + boost.regex.

+
+

collate

+
+

Specifies that character ranges of the form "[a-b]" should be locale sensitive.

+
+

ECMAScript

+
+

The same as normal.

+
+

JavaScript

+
+

The same as normal.

+
+

JScript

+
+

The same as normal.

+
+

basic

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX basic regular expressions in IEEE Std 1003.1-2001, + Portable Operating System Interface (POSIX ), Base Definitions and Headers, + Section 9, Regular Expressions (FWD.1). +

+
+

extended

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX extended regular expressions in IEEE Std + 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions and + Headers, Section 9, Regular Expressions (FWD.1).

+
+

awk

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX utility awk in IEEE Std 1003.1-2001, Portable + Operating System Interface (POSIX ), Shells and Utilities, Section 4, awk + (FWD.1).

+

That is to say: the same as POSIX extended syntax, but with escape sequences in + character classes permitted.

+
+

grep

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX utility grep in IEEE Std 1003.1-2001, Portable + Operating System Interface (POSIX ), Shells and Utilities, Section 4, + Utilities, grep (FWD.1).

+

That is to say, the same as POSIX basic syntax, but with the newline character + acting as an alternation character in addition to "|".

+
+

egrep

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX utility grep when given the -E option in IEEE Std + 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, grep (FWD.1).

+

That is to say, the same as POSIX extended syntax, but with the newline + character acting as an alternation character in addition to "|".

+
+

sed

+
+

The same as basic.

+
+

perl

+
+

The same as normal.

+
+

+

+

The following constants are specific to this particular regular expression + implementation and do not appear in the + regular expression standardization proposal:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
regbase::escape_in_listsAllows the use of the escape "\" character in sets of + characters, for example [\]] represents the set of characters containing only + "]". If this flag is not set then "\" is an ordinary character inside sets.
regbase::char_classesWhen this bit is set, character classes [:classname:] + are allowed inside character set declarations, for example "[[:word:]]" + represents the set of all characters that belong to the character class "word".
regbase:: intervalsWhen this bit is set, repetition intervals are + allowed, for example "a{2,4}" represents a repeat of between 2 and 4 letter + a's.
regbase:: limited_opsWhen this bit is set all of "+", "?" and "|" are + ordinary characters in all situations.
regbase:: newline_altWhen this bit is set, then the newline character "\n" + has the same effect as the alternation operator "|".
regbase:: bk_plus_qmWhen this bit is set then "\+" represents the one or + more repetition operator and "\?" represents the zero or one repetition + operator. When this bit is not set then "+" and "?" are used instead.
regbase:: bk_bracesWhen this bit is set then "\{" and "\}" are used for + bounded repetitions and "{" and "}" are normal characters. This is the opposite + of default behavior.
regbase:: bk_parensWhen this bit is set then "\(" and "\)" are used to + group sub-expressions and "(" and ")" are ordinary characters, this is the + opposite of default behaviour.
regbase:: bk_refsWhen this bit is set then back references are + allowed.
regbase:: bk_vbarWhen this bit is set then "\|" represents the + alternation operator and "|" is an ordinary character. This is the opposite of + default behaviour.
regbase:: use_exceptWhen this bit is set then a bad_expression + exception will be thrown on error.  Use of this flag is deprecated - + reg_expression will always throw on error.
regbase:: failbitThis bit is set on error, if regbase::use_except is + not set, then this bit should be checked to see if a regular expression is + valid before usage.
regbase::literalAll characters in the string are treated as literals, + there are no special characters or escape sequences.
regbase::emacsProvides compatability with the emacs + editor, eqivalent to: bk_braces | bk_parens | bk_refs | bk_vbar.
+

+

+


+

+

Revised + + 11 April 2003 +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/thread_safety.html b/doc/Attic/thread_safety.html new file mode 100644 index 00000000..2537c2b2 --- /dev/null +++ b/doc/Attic/thread_safety.html @@ -0,0 +1,66 @@ + + + + Boost.Regex: Thread Safety + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Thread Safety

+
+

Boost.Regex Index

+
+

+
+

+

Class basic_regex<> and its typedefs regex and wregex are thread safe, in + that compiled regular expressions can safely be shared between threads. The + matching algorithms regex_match, regex_search, regex_grep, regex_format and + regex_merge are all re-entrant and thread safe. Class match_results is now + thread safe, in that the results of a match can be safely copied from one + thread to another (for example one thread may find matches and push + match_results instances onto a queue, while another thread pops them off the + other end), otherwise use a separate instance of match_results per thread. +

+

The POSIX API functions are all re-entrant and thread safe, regular expressions + compiled with regcomp can also be shared between threads. +

+

The class RegEx is only thread safe if each thread gets its own RegEx instance + (apartment threading) - this is a consequence of RegEx handling both compiling + and matching regular expressions. +

+

Finally note that changing the global locale invalidates all compiled regular + expressions, therefore calling set_locale from one thread while another + uses regular expressions will produce unpredictable results. +

+

+ There is also a requirement that there is only one thread executing prior to + the start of main(). +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/uarrow.gif b/doc/Attic/uarrow.gif new file mode 100644 index 0000000000000000000000000000000000000000..6afd20c3857127c21fc9bcd52ec347e32c21578c GIT binary patch literal 1666 zcmZ?wbhEHb)Me0S_|CwP5S3S3-SznXe?>*j_wTyvm7!+LipuF=EDz;>HaU#`S8h zGbFWQMU_0|jnYg*R~!4zoU;5Lr*b+kNLjFqO}~hCs-EvWIiqyb(4{u9n_SYiN0ser zo_b{M{x_V;vAo&^{Mxyq#wF4=oeHiqM6_c}V>j8vuGVp>F%O-V(SCB__7}X`>4L_U zVm6)9HjN6d6V!a?>4h#eja{wfQZZ%y0 zcBxI;Cfl@)`abn8dE3M44hNU*NNPV`GVOBRyc?a%?}et+Ma?OV1z ztZr|7`^k*Hb7j4o3a6defBuPS+ET~7&h+g3q~w&@bLM{g z{v)%b=KJ^W@87?F{`~p9d-twgyLRr}xnswU?cKX~>(;Gn*REZ+| zz|hB_12PztCmcBbGstqvcx+g3u$ey;0G87DHmwvOZbsa=mLSozov4QVirN zkYH*$(Ipxfv7$$*g>5oV5eFyJlXU-iJ*75&36GSIaY!gPakWlRX$xFzo8-`7pp?lY znV<6SW%6gfo)61`o?wS!MoswK)ayS4bmo_)pa{hw_vEcTxM zPDanKulTRm%EV*qzc;dPkof=bLaW;>i<48e&;Q%IfLkeak%MEWvmvi+)u$)tnah)3 zzYobUlt|ZDsp2B;lOu4zr|;?Xi81Aljx7>18v}w8=dCnx5x37caI~QDN1{wB=fsDu zys-rjSlMMb6pskc%4B2mG~_!tncsDW+KG_rFW=`ov1BY|m#=3r>d5VUbL<2|ufd}y zN(W?>L__VgzDUVA*Qs(Sw#pQ7C?(vR=Ao3Rl+=-VrMi-oT@Uca~Kt?WUK2AMe@GvcE{Cd?3LKD+Sc5{|74 z97Oz04zdV)C~!SoA`+l@KtO%#7iQtykpJx0QyQw=S@@C{Bm{XmvjpCgs&PoWyynza z@uT9&9F9kLS>?oz@Jp^ZI4$|G+;eeJr9BJYY?{Avi<7`&Kf{hFC9km6PN_VMY=XP) zNx5#;y81nfPvKKUYHVPNAq&@R!H??_wKsmy<&~@n@K7`k-1@Aaqm`jm%<+RYi=e9D zj7f$i_#F_GNyn$i)?1(nYpwDZ(GJJ{0WJvobi zo9%_7!m6+A3JI)9KcCK6Z~8WwZ_~3MkMqrz&zGy7qPKj`1lHGq4R j5{XCdyxsBG&w8Q7<)tgD4zTO*shVAswMUtek--`O>qI)6 literal 0 HcmV?d00001 diff --git a/doc/bad_expression.html b/doc/bad_expression.html new file mode 100644 index 00000000..cf840f34 --- /dev/null +++ b/doc/bad_expression.html @@ -0,0 +1,77 @@ + + + + Boost.Regex: bad_expression + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

class bad_expression

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+

#include <boost/pat_except.hpp> +

+

The class bad_expression defines the type of objects thrown as + exceptions to report errors during the conversion from a string representing a + regular expression to a finite state machine.  

+
namespace boost{
+
+class bad_pattern : public std::runtime_error
+{
+public:
+   explicit bad_pattern(const std::string& s) : std::runtime_error(s){};
+};
+
+class bad_expression : public bad_pattern
+{
+public:
+   bad_expression(const std::string& s) : bad_pattern(s) {}
+};
+
+
+} // namespace boost
+

Description

+
bad_expression(const string& what_arg); 
+

Effects: Constructs an object of class bad_expression.

+ +

+ Postcondition: strcmp(what(), what_arg.c_str()) == 0. +

Footnotes: the class bad_pattern forms the base class for all + pattern-matching exceptions, of which bad_expression is one. The choice + of std::runtime_error as the base class for bad_pattern + is moot, depending upon how the library is used exceptions may be either logic + errors (programmer supplied expressions) or run time errors (user supplied + expressions). +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/basic_regex.html b/doc/basic_regex.html new file mode 100644 index 00000000..0a7a2775 --- /dev/null +++ b/doc/basic_regex.html @@ -0,0 +1,944 @@ + + + + Boost.Regex: basic_regex + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

basic_regex

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+
#include <boost/regex.hpp> 
+

The template class basic_regex encapsulates regular expression parsing + and compilation. The class takes three template parameters: +

+

charT: determines the character type, i.e. either char or + wchar_t. +

+

traits: determines the behaviour of the character type, for + example which character class names are recognized. A default traits class is + provided: regex_traits<charT>. +

+

Allocator: the allocator class used to allocate memory by the + class. +

+

For ease of use there are two typedefs that define the two standard basic_regex + instances, unless you want to use custom traits classes or allocators, you + won't need to use anything other than these: +

+
namespace boost{
+template <class charT, class traits = regex_traits<charT>, class Allocator = std::allocator<charT>  >
+class reg_expression;
+typedef reg_expression<char> regex;
+typedef reg_expression<wchar_t> wregex;
+}
+

The definition of reg_expression follows: it is based very closely on + class basic_string, and fulfils the requirements for a constant-container of charT. +

+
namespace boost{
+
+template <class charT,
+          class traits = regex_traits<charT>,
+          class Allocator = allocator<charT> >
+class basic_regex
+{
+public:
+   // types:
+   typedef          charT                                value_type;
+   typedef          implementation defined               const_iterator;
+   typedef          const_iterator                       iterator;
+   typedef typename Allocator::reference                 reference;
+   typedef typename Allocator::const_reference           const_reference;
+   typedef typename Allocator::difference_type           difference_type;
+   typedef typename Allocator::size_type                 size_type;
+   typedef          Allocator                            allocator_type;
+   typedef          regex_constants::syntax_option_type  flag_type;
+   typedef typename traits::locale_type                  locale_type;
+
+   // constants:
+   static const regex_constants::syntax_option_type normal = regex_constants::normal;
+   static const regex_constants::syntax_option_type icase = regex_constants::icase;
+   static const regex_constants::syntax_option_type nosubs = regex_constants::nosubs;
+   static const regex_constants::syntax_option_type optimize = regex_constants::optimize;
+   static const regex_constants::syntax_option_type collate = regex_constants::collate;
+   static const regex_constants::syntax_option_type ECMAScript = normal;
+   static const regex_constants::syntax_option_type JavaScript = normal;
+   static const regex_constants::syntax_option_type JScript = normal;
+   // these flags are optional, if the functionality is supported
+   // then the flags shall take these names.
+   static const regex_constants::syntax_option_type basic = regex_constants::basic;
+   static const regex_constants::syntax_option_type extended = regex_constants::extended;
+   static const regex_constants::syntax_option_type awk = regex_constants::awk;
+   static const regex_constants::syntax_option_type grep = regex_constants::grep;
+   static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
+   static const regex_constants::syntax_option_type sed = basic = regex_constants::sed;
+   static const regex_constants::syntax_option_type perl = regex_constants::perl;
+
+   // construct/copy/destroy:
+   explicit basic_regex(const Allocator& a = Allocator());
+   explicit basic_regex(const charT* p, flag_type f = regex_constants::normal,
+                        const Allocator& a = Allocator());
+   basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal,
+               const Allocator& a = Allocator());
+   basic_regex(const charT* p, size_type len, flag_type f,
+               const Allocator& a = Allocator());
+   basic_regex(const basic_regex&);
+   template <class ST, class SA>
+   explicit basic_regex(const basic_string<charT, ST, SA>& p,
+                        flag_type f = regex_constants::normal,
+                        const Allocator& a = Allocator());
+   template <class InputIterator>
+   basic_regex(InputIterator first, inputIterator last,
+               flag_type f = regex_constants::normal,
+               const Allocator& a = Allocator());
+
+   ~basic_regex();
+   basic_regex& operator=(const basic_regex&);
+   basic_regex& operator=(const charT* ptr);
+   template <class ST, class SA>
+   basic_regex& operator=(const basic_string<charT, ST, SA>& p);
+
+   // iterators:
+   const_iterator begin() const;
+   const_iterator end() const;
+   // capacity:
+   size_type size() const;
+   size_type max_size() const;
+   bool empty() const;
+   unsigned mark_count() const;
+
+   //
+   // modifiers:
+   basic_regex& assign(const basic_regex& that);
+   basic_regex& assign(const charT* ptr, flag_type f = regex_constants::normal);
+   basic_regex& assign(const charT* first, const charT* last,
+                       flag_type f = regex_constants::normal);
+   template <class string_traits, class A>
+   basic_regex& assign(const basic_string<charT, string_traits, A>& s,
+                       flag_type f = regex_constants::normal);
+   template <class InputIterator>
+   basic_regex& assign(InputIterator first, InputIterator last,
+                       flag_type f = regex_constants::normal);
+
+   // const operations:
+   Allocator get_allocator() const;
+   flag_type getflags() const;
+   basic_string<charT> str() const;
+   int compare(basic_regex&) const;
+   // locale:
+   locale_type imbue(locale_type loc);
+   locale_type getloc() const;
+   // swap
+   void swap(basic_regex&) throw();
+};
+
+template <class charT, class traits, class Allocator>
+bool operator == (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator != (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator < (const basic_regex<charT, traits, Allocator>& lhs,
+                 const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator <= (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator >= (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+template <class charT, class traits, class Allocator>
+bool operator > (const basic_regex<charT, traits, Allocator>& lhs,
+                 const basic_regex<charT, traits, Allocator>& rhs);
+
+template <class charT, class io_traits, class re_traits, class Allocator>
+basic_ostream<charT, io_traits>&
+   operator << (basic_ostream<charT, io_traits>& os,
+                const basic_regex<charT, re_traits, Allocator>& e);
+
+template <class charT, class traits, class Allocator>
+void swap(basic_regex<charT, traits, Allocator>& e1,
+          basic_regex<charT, traits, Allocator>& e2);
+
+typedef basic_regex<char> regex;
+typedef basic_regex<wchar_t> wregex;
+
+} // namespace boost
+

Description

+

Class basic_regex has the following public member functions: +

+

basic_regex constants

+
static const regex_constants::syntax_option_type normal = regex_constants::normal;
+static const regex_constants::syntax_option_type icase = regex_constants::icase;
+static const regex_constants::syntax_option_type nosubs = regex_constants::nosubs;
+static const regex_constants::syntax_option_type optimize = regex_constants::optimize;
+static const regex_constants::syntax_option_type collate = regex_constants::collate;
+static const regex_constants::syntax_option_type ECMAScript = normal;
+static const regex_constants::syntax_option_type JavaScript = normal;
+static const regex_constants::syntax_option_type JScript = normal;
+static const regex_constants::syntax_option_type basic = regex_constants::basic;
+static const regex_constants::syntax_option_type extended = regex_constants::extended;
+static const regex_constants::syntax_option_type awk = regex_constants::awk;
+static const regex_constants::syntax_option_type grep = regex_constants::grep;
+static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
+static const regex_constants::syntax_option_type sed = basic = regex_constants::sed;
+static const regex_constants::syntax_option_type perl = regex_constants::perl;
+

The static constant members are provided as synonyms for the constants declared + in namespace boost::regex_constants; for each constant of type syntax_option_type + declared in namespace boost::regex_constants then a constant with + the same name, type and value is declared within the scope of basic_regex.

+

basic_regex constructors

+

In all basic_regex constructors, a copy of the Allocator + argument is used for any memory allocation performed by the constructor or + member functions during the lifetime of the object. +

+
basic_regex(const Allocator& a = Allocator());
+ +

+ Effects: Constructs an object of class basic_regex. The + postconditions of this function are indicated in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

true

+
+

size()

+
+

0

+
+

str()

+
+

basic_string<charT>()

+

+
+

+
basic_regex(const charT* p, flag_type f = regex_constants::normal, const Allocator& a = Allocator());
+ +

+ Requires: p shall not be a null pointer.

+

+ Throws: bad_expression if p is not a valid regular + expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the null-terminated string p, and interpreted + according to the option flags specified + in f. The postconditions of this function are indicated in the + table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

char_traits<charT>::length(p)

+
+

str()

+
+

basic_string<charT>(p)

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal, const Allocator& a = Allocator());
+ +

+ Requires: p1 and p2 are not null pointers, p1 < p2.

+

+ Throws: bad_expression if [p1,p2) is not a valid regular + expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the sequence of characters [p1,p2), and interpreted + according the option flags specified in f. + The postconditions of this function are indicated in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

std::distance(p1,p2)

+
+

str()

+
+

basic_string<charT>(p1,p2)

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
basic_regex(const charT* p, size_type len, flag_type f, const Allocator& a = Allocator());
+ +

+ Requires: p shall not be a null pointer, len < max_size().

+

+ Throws: bad_expression if p is not a valid regular + expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the sequence of characters [p, p+len), and interpreted + according the option flags specified in f. + The postconditions of this function are indicated in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

len

+
+

str()

+
+

basic_string<charT>(p, len)

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
basic_regex(const basic_regex& e);
+ +

+ Effects: Constructs an object of class basic_regex as a + copy of the object e. The postconditions of this function are indicated + in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

e.empty()

+
+

size()

+
+

e.size()

+
+

str()

+
+

e.str()

+
+

getflags()

+
+

e.getflags()

+
+

mark_count()

+
+

e.mark_count()

+

+
+

+
template <class ST, class SA>
+basic_regex(const basic_string<charT, ST, SA>& s,
+            flag_type f = regex_constants::normal, const Allocator& a = Allocator());
+ +

+ Throws: bad_expression if s is not a valid regular + expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the string s, and interpreted according to the + option flags specified in f. The postconditions of this function + are indicated in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

s.size()

+
+

str()

+
+

s

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
template <class ForwardIterator>
+basic_regex(ForwardIterator first, ForwardIterator last,
+            flag_type f = regex_constants::normal, const Allocator& a = Allocator());
+ +

+ Throws: bad_expression if the sequence [first, last) + is not a valid regular expression.

+

+ Effects: Constructs an object of class basic_regex; the + object's internal finite state machine is constructed from the regular + expression contained in the sequence of characters [first, last), and + interpreted according to the option flags + specified in f. The postconditions of this function are indicated in the + table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

distance(first,last)

+
+

str()

+
+

basic_string<charT>(first,last)

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
basic_regex& operator=(const basic_regex& e);
+ +

+ Effects: Returns the result of assign(e.str(), e.getflags()).

basic_regex& operator=(const charT* ptr);
+ +

+ Requires: p shall not be a null pointer.

+

+ Effects: Returns the result of assign(ptr).

template <class ST, class SA>
+basic_regex& operator=(const basic_string<charT, ST, SA>& p);
+ +

+ Effects: Returns the result of assign(p).

+

basic_regex iterators

+
const_iterator begin() const;
+ +

+ Effects: Returns a starting iterator to a sequence of characters + representing the regular expression.

const_iterator end() const;
+ +

+ Effects: Returns termination iterator to a sequence of characters + representing the regular expression.

+

basic_regex capacity

+
size_type size() const;
+ +

+ Effects: Returns the length of the sequence of characters representing + the regular expression.

size_type max_size() const;
+ +

+ Effects: Returns the maximum length of the sequence of characters + representing the regular expression.

bool empty() const;
+ +

+ Effects: Returns true if the object does not contain a valid + regular expression, otherwise false.

unsigned mark_count() const;
+ +

+ Effects: Returns the number of marked sub-expressions within the regular + expresion.

+

basic_regex assign

+
basic_regex& assign(const basic_regex& that);
+ +

+ Effects: Returns assign(that.str(), that.getflags()).

basic_regex& assign(const charT* ptr, flag_type f = regex_constants::normal);
+ +

+ Effects: Returns assign(string_type(ptr), f).

basic_regex& assign(const charT* first, const charT* last,
+                    flag_type f = regex_constants::normal);
+ +

+ Effects: Returns assign(string_type(first, last), f).

template <class string_traits, class A>
+basic_regex& assign(const basic_string<charT, string_traits, A>& s,
+                    flag_type f = regex_constants::normal);
+ +

+ Throws: bad_expression if s is not a valid regular + expression.

+

+ Returns: *this.

+

+ Effects: Assigns the regular expression contained in the string s, + interpreted according the option flags specified + in f. The postconditions of this function are indicated in the + table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

false

+
+

size()

+
+

s.size()

+
+

str()

+
+

s

+
+

getflags()

+
+

f

+
+

mark_count()

+
+

The number of marked sub-expressions within the expression.

+

+
+

+
template <class InputIterator>
+basic_regex& assign(InputIterator first, InputIterator last,
+                    flag_type f = regex_constants::normal);
+ +

+ Requires: The type InputIterator corresponds to the Input Iterator + requirements (24.1.1).

+

+ Effects: Returns assign(string_type(first, last), f).

+

basic_regex constant operations

+
Allocator get_allocator() const;
+ +

+ Effects: Returns a copy of the Allocator that was passed to the object's + constructor.

flag_type getflags() const;
+ +

+ Effects: Returns a copy of the regular expression syntax flags that were + passed to the object's constructor, or the last call to assign.

basic_string<charT> str() const;
+ +

+ Effects: Returns a copy of the character sequence passed to the object's + constructor, or the last call to assign.

int compare(basic_regex& e)const;
+ +

+ Effects: If getflags() == e.getflags() then returns str().compare(e.str()), + otherwise returns getflags() - e.getflags().

+

basic_regex locale

+
locale_type imbue(locale_type l);
+ +

+ Effects: Returns the result of traits_inst.imbue(l) where + traits_inst is a (default initialized) instance of the template + parameter traits stored within the object. Calls to imbue + invalidate any currently contained regular expression.

+

+ Postcondition: empty() == true.

locale_type getloc() const;
+ +

+ Effects: Returns the result of traits_inst.getloc() where + traits_inst is a (default initialized) instance of the template + parameter traits stored within the object.

+

basic_regex swap

+
void swap(basic_regex& e) throw();
+ +

+ Effects: Swaps the contents of the two regular expressions.

+

+ Postcondition: *this contains the characters that were in e, + e contains the regular expression that was in *this.

+

+ Complexity: constant time.

+

basic_regex non-member functions

+
basic_regex non-member comparison operators 
+
template <class charT, class traits, class Allocator>
+bool operator == (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) == 0.

template <class charT, class traits, class Allocator>
+bool operator != (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) != 0.

template <class charT, class traits, class Allocator>
+bool operator < (const basic_regex<charT, traits, Allocator>& lhs,
+                 const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) < 0.

template <class charT, class traits, class Allocator>
+bool operator <= (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) <= 0.

template <class charT, class traits, class Allocator>
+bool operator >= (const basic_regex<charT, traits, Allocator>& lhs,
+                  const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) >= 0.

template <class charT, class traits, class Allocator>
+bool operator > (const basic_regex<charT, traits, Allocator>& lhs,
+                 const basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: Returns lhs.compare(rhs) > 0.

+
basic_regex inserter.
+
template <class charT, class io_traits, class re_traits, class Allocator>
+basic_ostream<charT, io_traits>&
+   operator << (basic_ostream<charT, io_traits>& os
+                const basic_regex<charT, re_traits, Allocator>& e);
+ +

+ Effects: Returns (os << e.str()).

+
basic_regex non-member swap
+
template <class charT, class traits, class Allocator>
+void swap(basic_regex<charT, traits, Allocator>& lhs,
+          basic_regex<charT, traits, Allocator>& rhs);
+ +

+ Effects: calls lhs.swap(rhs).

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/contacts.html b/doc/contacts.html new file mode 100644 index 00000000..1f256990 --- /dev/null +++ b/doc/contacts.html @@ -0,0 +1,86 @@ + + + + Boost.Regex: Contacts + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Contacts and Acknowledgements

+
+

Boost.Regex Index

+
+

+
+

+

The author can be contacted at John_Maddock@compuserve.com, + the home page for this library is at + http://ourworld.compuserve.com/homepages/John_Maddock/regexpp.htm, and + the official boost version can be obtained from www.boost.org/libraries.htm. +

+

I am indebted to Robert Sedgewick's "Algorithms in C++" for forcing me to think + about algorithms and their performance, and to the folks at boost for forcing + me to think, period. The following people have all contributed useful + comments or fixes: Dave Abrahams, Mike Allison, Edan Ayal, Jayashree + Balasubramanian, Jan Bölsche, Beman Dawes, Paul Baxter, David Bergman, David + Dennerline, Edward Diener, Peter Dimov, Robert Dunn, Fabio Forno, Tobias + Gabrielsson, Rob Gillen, Marc Gregoire, Chris Hecker, Nick Hodapp, Jesse Jones, + Martin Jost, Boris Krasnovskiy, Jan Hermelink, Max Leung, Wei-hao Lin, Jens + Maurer, Richard Peters, Heiko Schmidt, Jason Shirk, Gerald Slacik, Scobie + Smith, Mike Smyth, Alexander Sokolovsky, Hervé Poirier, Michael Raykh, Marc + Recht, Scott VanCamp, Bruno Voigt, Alexey Voinov, Jerry Waldorf, Rob Ward, + Lealon Watts, Thomas Witt and Yuval Yosef. I am also grateful to the manuals + supplied with the Henry Spencer, Perl and GNU regular expression libraries - + wherever possible I have tried to maintain compatibility with these libraries + and with the POSIX standard - the code however is entirely my own, including + any bugs! I can absolutely guarantee that I will not fix any bugs I don't know + about, so if you have any comments or spot any bugs, please get in touch. +

+

Useful further information can be found at: +

+

A short tutorial on regular expressions + can be found here.

+

The Open Unix + Specification contains a wealth of useful material, including the + regular expression syntax, and specifications for + <regex.h> and + <nl_types.h>. +

+

The Pattern Matching Pointers + site is a "must visit" resource for anyone interested in pattern matching. +

+

Glimpse and Agrep, use a + simplified regular expression syntax to achieve faster search times. +

+

Udi Manber and + Ricardo Baeza-Yates + both have a selection of useful pattern matching papers available from their + respective web sites. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/examples.html b/doc/examples.html new file mode 100644 index 00000000..b8d61d2d --- /dev/null +++ b/doc/examples.html @@ -0,0 +1,107 @@ + + + + Boost.Regex: Examples + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Examples

+
+

Boost.Regex Index

+
+

+
+

+

There are three demo applications that ship with this library, they all come + with makefiles for Borland, Microsoft and gcc compilers, otherwise you will + have to create your own makefiles. +

+
regress.exe: +
+

A regression test application that gives the matching/searching algorithms a + full workout. The presence of this program is your guarantee that the library + will behave as claimed - at least as far as those items tested are concerned - + if anyone spots anything that isn't being tested I'd be glad to hear about it. +

+

Files: parse.cpp, + regress.cpp, tests.cpp. +

+
jgrep.exe +
+

A simple grep implementation, run with no command line options to find out its + usage. Look at fileiter.cpp/fileiter.hpp and + the mapfile class to see an example of a "smart" bidirectional iterator that + can be used with boost.regex or any other STL algorithm. +

+

Files: jgrep.cpp, + main.cpp. +

+
timer.exe +
+

A simple interactive expression matching application, the results of all + matches are timed, allowing the programmer to optimize their regular + expressions where performance is critical. +

+

Files: regex_timer.cpp. +

+
Code snippets
+

The snippets examples contain the code examples used in the documentation:

+

credit_card_example.cpp: + Credit card number formatting code.

+

partial_regex_grep.cpp: + Search example using partial matches.

+

partial_regex_match.cpp: + regex_match example using partial matches.

+

regex_grep_example_1.cpp: + regex_grep example 1: searches a cpp file for class definitions.

+

regex_grep_example_2.cpp: + regex_grep example 2: searches a cpp file for class definitions, using a global + callback function. +

+

regex_grep_example_3.cpp: + regex_grep example 2: searches a cpp file for class definitions, using a bound + member function callback.

+

regex_grep_example_4.cpp: + regex_grep example 2: searches a cpp file for class definitions, using a C++ + Builder closure as a callback.

+

regex_match_example.cpp: + ftp based regex_match example.

+

regex_merge_example.cpp: + regex_merge example: converts a C++ file to syntax highlighted HTML.

+

regex_replace_example.cpp: + regex_replace example: converts a C++ file to syntax highlighted HTML

+

regex_search_example.cpp: + regex_search example: searches a cpp file for class definitions.

+

regex_split_example_1.cpp: + regex_split example: split a string into tokens.

+

regex_split_example_2.cpp + : regex_split example: spit out linked URL's. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/faq.html b/doc/faq.html new file mode 100644 index 00000000..675a2405 --- /dev/null +++ b/doc/faq.html @@ -0,0 +1,118 @@ + + + + Boost.Regex: FAQ + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

FAQ

+
+

Boost.Regex Index

+
+

+
+

+ +

 Q. Why can't I use the "convenience" versions of + regex_match / regex_search / regex_grep / regex_format / regex_merge? +

+
+

A. These versions may or may not be available depending upon the capabilities + of your compiler, the rules determining the format of these functions are quite + complex - and only the versions visible to a standard compliant compiler are + given in the help. To find out what your compiler supports, run + <boost/regex.hpp> through your C++ pre-processor, and search the output + file for the function that you are interested in. +

+

Q. I can't get regex++ to work with escape characters, what's going on? +

+
+

A. If you embed regular expressions in C++ code, then remember that escape + characters are processed twice: once by the C++ compiler, and once by the + regex++ expression compiler, so to pass the regular expression \d+ to regex++, + you need to embed "\\d+" in your code. Likewise to match a literal backslash + you will need to embed "\\\\" in your code. +

+

Q. Why does using parenthesis in a POSIX regular expression change the result + of a match?

+

For POSIX (extended and basic) regular expressions, but not for perl regexes, + parentheses don't only mark; they determine what the best match is as well. + When the expression is compiled as a POSIX basic or extended regex then + Boost.regex follows the POSIX standard leftmost longest rule for determining + what matched. So if there is more than one possible match after considering the + whole expression, it looks next at the first sub-expression and then the second + sub-expression and so on. So...

+
"(0*)([0-9]*)" against "00123" would produce
+$1 = "00"
+$2 = "123"
+

where as

+
"0*([0-9)*" against "00123" would produce
+$1 = "00123"
+

If you think about it, had $1 only matched the "123", this would be "less good" + than the match "00123" which is both further to the left and longer. If you + want $1 to match only the "123" part, then you need to use something like:

+
"0*([1-9][0-9]*)"
+

as the expression.

+

Q. Why don't character ranges work properly (POSIX mode + only)? +
+ A. The POSIX standard specifies that character range expressions are locale + sensitive - so for example the expression [A-Z] will match any collating + element that collates between 'A' and 'Z'. That means that for most locales + other than "C" or "POSIX", [A-Z] would match the single character 't' for + example, which is not what most people expect - or at least not what most + people have come to expect from regular expression engines. For this reason, + the default behaviour of boost.regex (perl mode) is to turn locale sensitive + collation off by not setting the regex_constants::collate compile time flag. + However if you set a non-default compile time flag - for example + regex_constants::extended or regex_constants::basic, then locale dependent + collation will be enabled, this also applies to the POSIX API functions which + use either regex_constants::extended or regex_constants::basic internally. [Note + - when regex_constants::nocollate in effect, the library behaves "as if" the + LC_COLLATE locale category were always "C", regardless of what its actually set + to - end note]. +

+

Q. Why are there no throw specifications on any of the + functions? What exceptions can the library throw? +

+

+ A. Not all compilers support (or honor) throw specifications, others support + them but with reduced efficiency. Throw specifications may be added at a later + date as compilers begin to handle this better. The library should throw only + three types of exception: boost::bad_expression can be thrown by basic_regex + when compiling a regular expression, std::runtime_error can be thrown when a + call to basic_regex::imbue tries to open a message catalogue that doesn't + exist, or when a call to regex_search or regex_match results in an + "everlasting" search, or when a call to RegEx::GrepFiles or + RegEx::FindFiles tries to open a file that cannot be opened, finally + std::bad_alloc can be thrown by just about any of the functions in this + library. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/format_syntax.html b/doc/format_syntax.html new file mode 100644 index 00000000..dc71d6d8 --- /dev/null +++ b/doc/format_syntax.html @@ -0,0 +1,217 @@ + + + + Boost.Regex: Format String Syntax + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Format String Syntax

+
+

Boost.Regex Index

+
+

+
+

+

Format strings are used by the algorithm + regex_merge and by match_results::format, + and are used to transform one string into another. +

+

There are three kind of format string: sed, perl and extended, the extended + syntax is a superset of the others so this is covered first. +

+

Extended format syntax +

+

In format strings, all characters are treated as literals except: ()$\?: +

+

To use any of these as literals you must prefix them with the escape character + \ +

+

The following special sequences are recognized: 
+   +
+ Grouping: +

+

Use the parenthesis characters ( and ) to group sub-expressions within the + format string, use \( and \) to represent literal '(' and ')'. 
+   +
+ Sub-expression expansions: +

+

The following perl like expressions expand to a particular matched + sub-expression: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 $`Expands to all the text from the end of the previous + match to the start of the current match, if there was no previous match in the + current operation, then everything from the start of the input string to the + start of the match. 
 $'Expands to all the text from the end of the match to + the end of the input string. 
 $&Expands to all of the current match. 
 $0Expands to all of the current match. 
 $NExpands to the text that matched sub-expression N. 
+

+

Conditional expressions: +

+

Conditional expressions allow two different format strings to be selected + dependent upon whether a sub-expression participated in the match or not: +

+

?Ntrue_expression:false_expression +

+

Executes true_expression if sub-expression N participated in the match, + otherwise executes false_expression. +

+

Example: suppose we search for "(while)|(for)" then the format string + "?1WHILE:FOR" would output what matched, but in upper case. 
+   +
+ Escape sequences: +

+

The following escape sequences are also allowed: +
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 \aThe bell character. 
 \fThe form feed character. 
 \nThe newline character. 
 \rThe carriage return character. 
 \tThe tab character. 
 \vA vertical tab character. 
 \xA hexadecimal character - for example \x0D. 
 \x{}A possible unicode hexadecimal character - for + example \x{1A0} 
 \cxThe ASCII escape character x, for example \c@ is + equivalent to escape-@. 
 \eThe ASCII escape character. 
 \ddAn octal character constant, for example \10. 
+

+

Perl format strings +

+

Perl format strings are the same as the default syntax except that the + characters ()?: have no special meaning. +

+

Sed format strings +

+

Sed format strings use only the characters \ and & as special characters. +

+

\n where n is a digit, is expanded to the nth sub-expression. +

+

& is expanded to the whole of the match (equivalent to \0). +

+

+ Other escape sequences are expanded as per the default syntax. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/headers.html b/doc/headers.html new file mode 100644 index 00000000..f6651d3d --- /dev/null +++ b/doc/headers.html @@ -0,0 +1,51 @@ + + + + Boost.Regex: Headers + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Headers

+
+

Boost.Regex Index

+
+

+
+

+

There are two main headers used by this library: <boost/regex.hpp> + provides full access to the entire library, while <boost/cregex.hpp> + provides access to just the high level class RegEx, and the POSIX API + functions. +

+

There is also a header containing only forward declarations + <boost/regex_fwd.hpp> for use when an interface is dependent upon + boost::basic_regex, but otherwise does not need the full definitions.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/history.html b/doc/history.html new file mode 100644 index 00000000..126519f2 --- /dev/null +++ b/doc/history.html @@ -0,0 +1,44 @@ + + + + Boost.Regex: History + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

History

+
+

Boost.Regex Index

+
+

+
+

+

Todo.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/implementation.html b/doc/implementation.html new file mode 100644 index 00000000..044fff31 --- /dev/null +++ b/doc/implementation.html @@ -0,0 +1,44 @@ + + + + Boost.Regex: Implementation + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Implementation

+
+

Boost.Regex Index

+
+

+
+

+

Todo.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/index.html b/doc/index.html new file mode 100644 index 00000000..504d335d --- /dev/null +++ b/doc/index.html @@ -0,0 +1,119 @@ + + + + + Boost.Regex: Index + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Index

+
+

Boost.Regex Index

+
+
+
+
+

Contents

+
+
Overview
Installation
+
+
+
Borland C++ Builder
+ Microsoft Visual C++
GNU G++
+
Sun Forte Compiler
+ Other compilers (building with bjam)
+
+
+
Reference
+
+
+
Types
+
+
+
syntax_option_type
+ match_flag_type
class bad_expression
+
class regex_traits
+ class template basic_regex
class template + sub_match
class template + match_results
+
+
+
Algorithms
+
+
+
regex_match
regex_search
+
regex_replace
+
+
+
Iterators
+
+
+
regex_iterator
+
+
+
Misc.
+
+
+
POSIX API Compatibility Functions
+
Partial matches
+
Regular Expression Syntax
+
Format String Syntax
+
+
+
Deprecated interfaces
+
+
+
class regbase
class + template reg_expression
Algorithm + regex_grep
Algorithm regex_format
+
Algorithm regex_merge
+ Algorithm regex_split
+
class RegEx
+
+
+
+
+
FAQ
+
Appendix
+
+
+
Implementation
+
Thread Safety
+
Localisation
+
Examples
+
Headers
+
Redistributables and Library Names
+
History
+
Contacts and Acknowledgements
+
+
+
+
+

Revised + + 11 April 2003 +

+

© Copyright John Maddock 1998- + + 2003 +

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/install.html b/doc/install.html new file mode 100644 index 00000000..b816b2c8 --- /dev/null +++ b/doc/install.html @@ -0,0 +1,236 @@ + + + + Boost.Regex: Index + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Installation

+
+

Boost.Regex Index

+
+

+
+

+

[ Important: If you are upgrading from the + 2.x version of this library then you will find a number of changes to the + documented header names and library interfaces, existing code should still + compile unchanged however - see + Note for Upgraders. ]

+

When you extract the library from its zip file, you must preserve its internal + directory structure (for example by using the -d option when extracting). If + you didn't do that when extracting, then you'd better stop reading this, delete + the files you just extracted, and try again! +

+

This library should not need configuring before use; most popular + compilers/standard libraries/platforms are already supported "as is". If you do + experience configuration problems, or just want to test the configuration with + your compiler, then the process is the same as for all of boost; see the + configuration library documentation.

+

The library will encase all code inside namespace boost. +

+

Unlike some other template libraries, this library consists of a mixture of + template code (in the headers) and static code and data (in cpp files). + Consequently it is necessary to build the library's support code into a library + or archive file before you can use it, instructions for specific platforms are + as follows: +

+

Borland C++ Builder: +

+
    +
  • + Open up a console window and change to the <boost>\libs\regex\build + directory. +
  • + Select the appropriate makefile (bcb4.mak for C++ Builder 4, bcb5.mak for C++ + Builder 5, and bcb6.mak for C++ Builder 6). +
  • + Invoke the makefile (pass the full path to your version of make if you have + more than one version installed, the makefile relies on the path to make to + obtain your C++ Builder installation directory and tools) for example: +
  • +
+
make -fbcb5.mak
+

The build process will build a variety of .lib and .dll files (the exact number + depends upon the version of Borland's tools you are using) the .lib and dll + files will be in a sub-directory called bcb4 or bcb5 depending upon the + makefile used. To install the libraries into your development system use:

+

make -fbcb5.mak install

+

library files will be copied to <BCROOT>/lib and the dll's to + <BCROOT>/bin, where <BCROOT> corresponds to the install path of + your Borland C++ tools. +

+

You may also remove temporary files created during the build process (excluding + lib and dll files) by using:

+

make -fbcb5.mak clean

+

Finally when you use regex++ it is only necessary for you to add the + <boost> root director to your list of include directories for that + project. It is not necessary for you to manually add a .lib file to the + project; the headers will automatically select the correct .lib file for your + build mode and tell the linker to include it. There is one caveat however: the + library can not tell the difference between VCL and non-VCL enabled builds when + building a GUI application from the command line, if you build from the command + line with the 5.5 command line tools then you must define the pre-processor + symbol _NO_VCL in order to ensure that the correct link libraries are selected: + the C++ Builder IDE normally sets this automatically. Hint, users of the 5.5 + command line tools may want to add a -D_NO_VCL to bcc32.cfg in order to set + this option permanently. +

+

If you would prefer to do a static link to the regex libraries even when using + the dll runtime then define BOOST_REGEX_STATIC_LINK, and if you want to + suppress automatic linking altogether (and supply your own custom build of the + lib) then define BOOST_REGEX_NO_LIB.

+

If you are building with C++ Builder 6, you will find that + <boost/regex.hpp> can not be used in a pre-compiled header (the actual + problem is in <locale> which gets included by <boost/regex.hpp>), + if this causes problems for you, then try defining BOOST_NO_STD_LOCALE when + building, this will disable some features throughout boost, but may save you a + lot in compile times!

+

Microsoft Visual C++ 6 and 7

+

You need version 6 of MSVC to build this library. If you are using VC5 then you + may want to look at one of the previous releases of this + library +

+

Open up a command prompt, which has the necessary MSVC environment variables + defined (for example by using the batch file Vcvars32.bat installed by the + Visual Studio installation), and change to the <boost>\libs\regex\build + directory. +

+

Select the correct makefile - vc6.mak for "vanilla" Visual C++ 6 or + vc6-stlport.mak if you are using STLPort.

+

Invoke the makefile like this:

+

nmake -fvc6.mak

+

You will now have a collection of lib and dll files in a "vc6" subdirectory, to + install these into your development system use:

+

nmake -fvc6.mak install

+

The lib files will be copied to your <VC6>\lib directory and the dll + files to <VC6>\bin, where <VC6> is the root of your Visual C++ 6 + installation.

+

You can delete all the temporary files created during the build (excluding lib + and dll files) using:

+

nmake -fvc6.mak clean +

+

Finally when you use regex++ it is only necessary for you to add the + <boost> root directory to your list of include directories for that + project. It is not necessary for you to manually add a .lib file to the + project; the headers will automatically select the correct .lib file for your + build mode and tell the linker to include it. +

+

Note that if you want to statically link to the regex library when using the + dynamic C++ runtime, define BOOST_REGEX_STATIC_LINK when building your project + (this only has an effect for release builds). If you want to add the source + directly to your project then define BOOST_REGEX_NO_LIB to disable automatic + library selection.

+

Important: there have been some reports of + compiler-optimisation bugs affecting this library, (particularly with VC6 + versions prior to service patch 5) the workaround is to build the library using + /Oityb1 rather than /O2. That is to use all optimisation settings except /Oa. + This problem is reported to affect some standard library code as well (in fact + I'm not sure if the problem is with the regex code or the underlying standard + library), so it's probably worthwhile applying this workaround in normal + practice in any case.

+

Note: if you have replaced the C++ standard library that comes with VC6, then + when you build the library you must ensure that the environment variables + "INCLUDE" and "LIB" have been updated to reflect the include and library paths + for the new library - see vcvars32.bat (part of your Visual Studio + installation) for more details. Alternatively if STLPort is in c:/stlport then + you could use:

+

nmake INCLUDES="-Ic:/stlport/stlport" XLFLAGS="/LIBPATH:c:/stlport/lib" + -fvc6-stlport.mak

+

If you are building with the full STLPort v4.x, then use the vc6-stlport.mak + file provided and set the environment variable STLPORT_PATH to point to the + location of your STLport installation (Note that the full STLPort libraries + appear not to support single-thread static builds). +
+   +
+   +

+

GCC(2.95 and 3.x) +

+

There is a conservative makefile for the g++ compiler. From the command prompt + change to the <boost>/libs/regex/build directory and type: +

+

make -fgcc.mak +

+

At the end of the build process you should have a gcc sub-directory containing + release and debug versions of the library (libboost_regex.a and + libboost_regex_debug.a). When you build projects that use regex++, you will + need to add the boost install directory to your list of include paths and add + <boost>/libs/regex/build/gcc/libboost_regex.a to your list of library + files. +

+

There is also a makefile to build the library as a shared library:

+

make -fgcc-shared.mak

+

which will build libboost_regex.so and libboost_regex_debug.so.

+

Both of the these makefiles support the following environment variables:

+

CXXFLAGS: extra compiler options - note that this applies to both the debug and + release builds.

+

INCLUDES: additional include directories.

+

LDFLAGS: additional linker options.

+

LIBS: additional library files.

+

For the more adventurous there is a configure script in + <boost>/libs/config; see the config library + documentation.

+

Sun Workshop 6.1

+

There is a makefile for the sun (6.1) compiler (C++ version 3.12). From the + command prompt change to the <boost>/libs/regex/build directory and type: +

+

dmake -f sunpro.mak +

+

At the end of the build process you should have a sunpro sub-directory + containing single and multithread versions of the library (libboost_regex.a, + libboost_regex.so, libboost_regex_mt.a and libboost_regex_mt.so). When you + build projects that use regex++, you will need to add the boost install + directory to your list of include paths and add + <boost>/libs/regex/build/sunpro/ to your library search path. +

+

Both of the these makefiles support the following environment variables:

+

CXXFLAGS: extra compiler options - note that this applies to both the single + and multithreaded builds.

+

INCLUDES: additional include directories.

+

LDFLAGS: additional linker options.

+

LIBS: additional library files.

+

LIBSUFFIX: a suffix to mangle the library name with (defaults to nothing).

+

This makefile does not set any architecture specific options like -xarch=v9, + you can set these by defining the appropriate macros, for example:

+

dmake CXXFLAGS="-xarch=v9" LDFLAGS="-xarch=v9" LIBSUFFIX="_v9" -f sunpro.mak

+

will build v9 variants of the regex library named libboost_regex_v9.a etc.

+

Other compilers: +

+

There is a generic makefile (generic.mak) + provided in <boost-root>/libs/regex/build - see that makefile for details + of environment variables that need to be set before use. Alternatively you can + using the Jam based build system. If + you need to configure the library for your platform, then refer to the + config library documentation + . +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/introduction.html b/doc/introduction.html new file mode 100644 index 00000000..ea0d588a --- /dev/null +++ b/doc/introduction.html @@ -0,0 +1,174 @@ + + + + Boost.Regex: Introduction + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Introduction

+
+

Boost.Regex Index

+
+

+
+

+

Regular expressions are a form of pattern-matching that are often used in text + processing; many users will be familiar with the Unix utilities grep, sed + and awk, and the programming language perl, each of which make + extensive use of regular expressions. Traditionally C++ users have been limited + to the POSIX C API's for manipulating regular expressions, and while regex++ + does provide these API's, they do not represent the best way to use the + library. For example regex++ can cope with wide character strings, or search + and replace operations (in a manner analogous to either sed or perl), something + that traditional C libraries can not do.

+

The class boost::basic_regex is the key class in + this library; it represents a "machine readable" regular expression, and is + very closely modelled on std::basic_string, think of it as a string plus the + actual state-machine required by the regular expression algorithms. Like + std::basic_string there are two typedefs that are almost always the means by + which this class is referenced:

+
namespace boost{
+
+template <class charT, 
+          class traits = regex_traits<charT>, 
+          class Allocator = std::allocator<charT> >
+class basic_regex;
+
+typedef basic_regex<char> regex;
+typedef basic_regex<wchar_t> wregex;
+
+}
+

To see how this library can be used, imagine that we are writing a credit card + processing application. Credit card numbers generally come as a string of + 16-digits, separated into groups of 4-digits, and separated by either a space + or a hyphen. Before storing a credit card number in a database (not necessarily + something your customers will appreciate!), we may want to verify that the + number is in the correct format. To match any digit we could use the regular + expression [0-9], however ranges of characters like this are actually locale + dependent. Instead we should use the POSIX standard form [[:digit:]], or the + regex++ and perl shorthand for this \d (note that many older libraries tended + to be hard-coded to the C-locale, consequently this was not an issue for them). + That leaves us with the following regular expression to validate credit card + number formats:

+

(\d{4}[- ]){3}\d{4}

+

Here the parenthesis act to group (and mark for future reference) + sub-expressions, and the {4} means "repeat exactly 4 times". This is an example + of the extended regular expression syntax used by perl, awk and egrep. Regex++ + also supports the older "basic" syntax used by sed and grep, but this is + generally less useful, unless you already have some basic regular expressions + that you need to reuse.

+

Now lets take that expression and place it in some C++ code to validate the + format of a credit card number:

+
bool validate_card_format(const std::string s)
+{
+   static const boost::regex e("(\\d{4}[- ]){3}\\d{4}");
+   return regex_match(s, e);
+}
+

Note how we had to add some extra escapes to the expression: remember that the + escape is seen once by the C++ compiler, before it gets to be seen by the + regular expression engine, consequently escapes in regular expressions have to + be doubled up when embedding them in C/C++ code. Also note that all the + examples assume that your compiler supports Koenig lookup, if yours doesn't + (for example VC6), then you will have to add some boost:: prefixes to some of + the function calls in the examples.

+

Those of you who are familiar with credit card processing, will have realised + that while the format used above is suitable for human readable card numbers, + it does not represent the format required by online credit card systems; these + require the number as a string of 16 (or possibly 15) digits, without any + intervening spaces. What we need is a means to convert easily between the two + formats, and this is where search and replace comes in. Those who are familiar + with the utilities sed and perl will already be ahead here; we + need two strings - one a regular expression - the other a "format + string" that provides a description of the text to replace the match + with. In regex++ this search and replace operation is performed with the + algorithm regex_replace, for our credit card example we can write two algorithms + like this to provide the format conversions:

+
// match any format with the regular expression:
+const boost::regex e("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
+const std::string machine_format("\\1\\2\\3\\4");
+const std::string human_format("\\1-\\2-\\3-\\4");
+
+std::string machine_readable_card_number(const std::string s)
+{
+   return regex_replace(s, e, machine_format, boost::match_default | boost::format_sed);
+}
+
+std::string human_readable_card_number(const std::string s)
+{
+   return regex_replace(s, e, human_format, boost::match_default | boost::format_sed);
+}
+

Here we've used marked sub-expressions in the regular expression to split out + the four parts of the card number as separate fields, the format string then + uses the sed-like syntax to replace the matched text with the reformatted + version.

+

In the examples above, we haven't directly manipulated the results of a regular + expression match, however in general the result of a match contains a number of + sub-expression matches in addition to the overall match. When the library needs + to report a regular expression match it does so using an instance of the class + match_results, as before there are typedefs of this class for the most + common cases: +

+
namespace boost{
+typedef match_results<const char*> cmatch;
+typedef match_results<const wchar_t*> wcmatch;
+typedef match_results<std::string::const_iterator> smatch;
+typedef match_results<std::wstring::const_iterator> wsmatch; 
+}
+

The algorithms regex_search and + regex_grep (i.e. finding all matches in a string) make use of + match_results to report what matched.

+

Note that these algorithms are not restricted to searching regular C-strings, + any bidirectional iterator type can be searched, allowing for the possibility + of seamlessly searching almost any kind of data. +

+

For search and replace operations in addition to the algorithm + regex_replace that we have already seen, the algorithm + regex_format takes the result of a match and a format string, and + produces a new string by merging the two.

+

For those that dislike templates, there is a high level wrapper class RegEx + that is an encapsulation of the lower level template code - it provides a + simplified interface for those that don't need the full power of the library, + and supports only narrow characters, and the "extended" regular expression + syntax. +

+

The POSIX API functions: regcomp, regexec, regfree + and regerror, are available in both narrow character and Unicode versions, and + are provided for those who need compatibility with these API's. +

+

Finally, note that the library now has run-time localization + support, and recognizes the full POSIX regular expression syntax - including + advanced features like multi-character collating elements and equivalence + classes - as well as providing compatibility with other regular expression + libraries including GNU and BSD4 regex packages, and to a more limited extent + perl 5. +

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998-2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + + diff --git a/doc/localisation.html b/doc/localisation.html new file mode 100644 index 00000000..45986f40 --- /dev/null +++ b/doc/localisation.html @@ -0,0 +1,1126 @@ + + + + Boost.Regex: Localisation + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Localisation

+
+

Boost.Regex Index

+
+

+
+

+

Boost.regex provides extensive support for run-time localization, the + localization model used can be split into two parts: front-end and back-end.

+

+

Front-end localization deals with everything which the user sees - error + messages, and the regular expression syntax itself. For example a French + application could change [[:word:]] to [[:mot:]] and \w to \m. Modifying the + front end locale requires active support from the developer, by providing the + library with a message catalogue to load, containing the localized strings. + Front-end locale is affected by the LC_MESSAGES category only. +

+

Back-end localization deals with everything that occurs after the expression + has been parsed - in other words everything that the user does not see or + interact with directly. It deals with case conversion, collation, and character + class membership. The back-end locale does not require any intervention from + the developer - the library will acquire all the information it requires for + the current locale from the underlying operating system / run time library. + This means that if the program user does not interact with regular expressions + directly - for example if the expressions are embedded in your C++ code - then + no explicit localization is required, as the library will take care of + everything for you. For example embedding the expression [[:word:]]+ in your + code will always match a whole word, if the program is run on a machine with, + for example, a Greek locale, then it will still match a whole word, but in + Greek characters rather than Latin ones. The back-end locale is affected by the + LC_TYPE and LC_COLLATE categories. +

+

There are three separate localization mechanisms supported by boost.regex:

+

Win32 localization model. +

+

This is the default model when the library is compiled under Win32, and is + encapsulated by the traits class w32_regex_traits. When this model is in effect + there is a single global locale as defined by the user's control panel + settings, and returned by GetUserDefaultLCID. All the settings used by + boost.regex are acquired directly from the operating system bypassing the C run + time library. Front-end localization requires a resource dll, containing a + string table with the user-defined strings. The traits class exports the + function: +

+

static std::string set_message_catalogue(const std::string& s); +

+

which needs to be called with a string identifying the name of the resource + dll, before your code compiles any regular expressions (but not + necessarily before you construct any reg_expression instances): +

+

boost::w32_regex_traits<char>::set_message_catalogue("mydll.dll"); +

+

Note that this API sets the dll name for both the narrow and wide + character specializations of w32_regex_traits. +

+

This model does not currently support thread specific locales (via + SetThreadLocale under Windows NT), the library provides full Unicode support + under NT, under Windows 9x the library degrades gracefully - characters 0 to + 255 are supported, the remainder are treated as "unknown" graphic characters. +

+

C localization model. +

+

This is the default model when the library is compiled under an operating + system other than Win32, and is encapsulated by the traits class c_regex_traits, + Win32 users can force this model to take effect by defining the pre-processor + symbol BOOST_REGEX_USE_C_LOCALE. When this model is in effect there is a single + global locale, as set by setlocale. All settings are acquired from your + run time library, consequently Unicode support is dependent upon your run time + library implementation. Front end localization requires a POSIX message + catalogue. The traits class exports the function: +

+

static std::string set_message_catalogue(const std::string& s); +

+

which needs to be called with a string identifying the name of the message + catalogue, before your code compiles any regular expressions (but not + necessarily before you construct any reg_expression instances): +

+

boost::c_regex_traits<char>::set_message_catalogue("mycatalogue"); +

+

Note that this API sets the dll name for both the narrow and wide + character specializations of c_regex_traits. If your run time library does not + support POSIX message catalogues, then you can either provide your own + implementation of <nl_types.h> or define BOOST_RE_NO_CAT to disable + front-end localization via message catalogues. +

+

Note that calling setlocale invalidates all compiled regular + expressions, calling setlocale(LC_ALL, "C") will make this library + behave equivalent to most traditional regular expression libraries including + version 1 of this library. +

+

C++ localization model. +

+

This model is only in effect if the library is built with the pre-processor + symbol BOOST_REGEX_USE_CPP_LOCALE defined. When this model is in effect each + instance of reg_expression<> has its own instance of std::locale, class + reg_expression<> also has a member function imbue which allows the + locale for the expression to be set on a per-instance basis. Front end + localization requires a POSIX message catalogue, which will be loaded via the + std::messages facet of the expression's locale, the traits class exports the + symbol: +

+

static std::string set_message_catalogue(const std::string& s); +

+

which needs to be called with a string identifying the name of the message + catalogue, before your code compiles any regular expressions (but not + necessarily before you construct any reg_expression instances): +

+

boost::cpp_regex_traits<char>::set_message_catalogue("mycatalogue"); +

+

Note that calling reg_expression<>::imbue will invalidate any expression + currently compiled in that instance of reg_expression<>. This model is + the one which closest fits the ethos of the C++ standard library, however it is + the model which will produce the slowest code, and which is the least well + supported by current standard library implementations, for example I have yet + to find an implementation of std::locale which supports either message + catalogues, or locales other than "C" or "POSIX". +

+

Finally note that if you build the library with a non-default localization + model, then the appropriate pre-processor symbol (BOOST_REGEX_USE_C_LOCALE or + BOOST_REGEX_USE_CPP_LOCALE) must be defined both when you build the support + library, and when you include <boost/regex.hpp> or + <boost/cregex.hpp> in your code. The best way to ensure this is to add + the #define to <boost/regex/user.hpp>. +

+

Providing a message catalogue: +

+

In order to localize the front end of the library, you need to provide the + library with the appropriate message strings contained either in a resource + dll's string table (Win32 model), or a POSIX message catalogue (C or C++ + models). In the latter case the messages must appear in message set zero of the + catalogue. The messages and their id's are as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Message id + Meaning + Default value +  
 101 + The character used to start a sub-expression. + "(" +  
 102 + The character used to end a sub-expression + declaration. + ")" +  
 103 + The character used to denote an end of line + assertion. + "$" +  
 104 + The character used to denote the start of line + assertion. + "^" +  
 105 + The character used to denote the "match any character + expression". + "." +  
 106 + The match zero or more times repetition operator. + "*" +  
 107 + The match one or more repetition operator. + "+" +  
 108 + The match zero or one repetition operator. + "?" +  
 109 + The character set opening character. + "[" +  
 110 + The character set closing character. + "]" +  
 111 + The alternation operator. + "|" +  
 112 + The escape character. + "\\" +  
 113 + The hash character (not currently used). + "#" +  
 114 + The range operator. + "-" +  
 115 + The repetition operator opening character. + "{" +  
 116 + The repetition operator closing character. + "}" +  
 117 + The digit characters. + "0123456789" +  
 118 + The character which when preceded by an escape + character represents the word boundary assertion. + "b" +  
 119 + The character which when preceded by an escape + character represents the non-word boundary assertion. + "B" +  
 120 + The character which when preceded by an escape + character represents the word-start boundary assertion. + "<" +  
 121 + The character which when preceded by an escape + character represents the word-end boundary assertion. + ">" +  
 122 + The character which when preceded by an escape + character represents any word character. + "w" +  
 123 + The character which when preceded by an escape + character represents a non-word character. + "W" +  
 124 + The character which when preceded by an escape + character represents a start of buffer assertion. + "`A" +  
 125 + The character which when preceded by an escape + character represents an end of buffer assertion. + "'z" +  
 126 + The newline character. + "\n" +  
 127 + The comma separator. + "," +  
 128 + The character which when preceded by an escape + character represents the bell character. + "a" +  
 129 + The character which when preceded by an escape + character represents the form feed character. + "f" +  
 130 + The character which when preceded by an escape + character represents the newline character. + "n" +  
 131 + The character which when preceded by an escape + character represents the carriage return character. + "r" +  
 132 + The character which when preceded by an escape + character represents the tab character. + "t" +  
 133 + The character which when preceded by an escape + character represents the vertical tab character. + "v" +  
 134 + The character which when preceded by an escape + character represents the start of a hexadecimal character constant. + "x" +  
 135 + The character which when preceded by an escape + character represents the start of an ASCII escape character. + "c" +  
 136 + The colon character. + ":" +  
 137 + The equals character. + "=" +  
 138 + The character which when preceded by an escape + character represents the ASCII escape character. + "e" +  
 139 + The character which when preceded by an escape + character represents any lower case character. + "l" +  
 140 + The character which when preceded by an escape + character represents any non-lower case character. + "L" +  
 141 + The character which when preceded by an escape + character represents any upper case character. + "u" +  
 142 + The character which when preceded by an escape + character represents any non-upper case character. + "U" +  
 143 + The character which when preceded by an escape + character represents any space character. + "s" +  
 144 + The character which when preceded by an escape + character represents any non-space character. + "S" +  
 145 + The character which when preceded by an escape + character represents any digit character. + "d" +  
 146 + The character which when preceded by an escape + character represents any non-digit character. + "D" +  
 147 + The character which when preceded by an escape + character represents the end quote operator. + "E" +  
 148 + The character which when preceded by an escape + character represents the start quote operator. + "Q" +  
 149 + The character which when preceded by an escape + character represents a Unicode combining character sequence. + "X" +  
 150 + The character which when preceded by an escape + character represents any single character. + "C" +  
 151 + The character which when preceded by an escape + character represents end of buffer operator. + "Z" +  
 152 + The character which when preceded by an escape + character represents the continuation assertion. + "G" +  
 153The character which when preceeded by (? indicates a zero width negated + forward lookahead assert.! 
+

+


+   +

+

Custom error messages are loaded as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Message ID + Error message ID + Default string +  
 201 + REG_NOMATCH + "No match" +  
 202 + REG_BADPAT + "Invalid regular expression" +  
 203 + REG_ECOLLATE + "Invalid collation character" +  
 204 + REG_ECTYPE + "Invalid character class name" +  
 205 + REG_EESCAPE + "Trailing backslash" +  
 206 + REG_ESUBREG + "Invalid back reference" +  
 207 + REG_EBRACK + "Unmatched [ or [^" +  
 208 + REG_EPAREN + "Unmatched ( or \\(" +  
 209 + REG_EBRACE + "Unmatched \\{" +  
 210 + REG_BADBR + "Invalid content of \\{\\}" +  
 211 + REG_ERANGE + "Invalid range end" +  
 212 + REG_ESPACE + "Memory exhausted" +  
 213 + REG_BADRPT + "Invalid preceding regular expression" +  
 214 + REG_EEND + "Premature end of regular expression" +  
 215 + REG_ESIZE + "Regular expression too big" +  
 216 + REG_ERPAREN + "Unmatched ) or \\)" +  
 217 + REG_EMPTY + "Empty expression" +  
 218 + REG_E_UNKNOWN + "Unknown error" +  
+

+


+   +

+

Custom character class names are loaded as followed: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Message ID + Description + Equivalent default class name +  
 300 + The character class name for alphanumeric characters. + "alnum" +  
 301 + The character class name for alphabetic characters. + "alpha" +  
 302 + The character class name for control characters. + "cntrl" +  
 303 + The character class name for digit characters. + "digit" +  
 304 + The character class name for graphics characters. + "graph" +  
 305 + The character class name for lower case characters. + "lower" +  
 306 + The character class name for printable characters. + "print" +  
 307 + The character class name for punctuation characters. + "punct" +  
 308 + The character class name for space characters. + "space" +  
 309 + The character class name for upper case characters. + "upper" +  
 310 + The character class name for hexadecimal characters. + "xdigit" +  
 311 + The character class name for blank characters. + "blank" +  
 312 + The character class name for word characters. + "word" +  
 313 + The character class name for Unicode characters. + "unicode" +  
+

+


+   +

+

Finally, custom collating element names are loaded starting from message id + 400, and terminating when the first load thereafter fails. Each message looks + something like: "tagname string" where tagname is the name used inside + [[.tagname.]] and string is the actual text of the collating element. + Note that the value of collating element [[.zero.]] is used for the conversion + of strings to numbers - if you replace this with another value then that will + be used for string parsing - for example use the Unicode character 0x0660 for + [[.zero.]] if you want to use Unicode Arabic-Indic digits in your regular + expressions in place of Latin digits. +

+

+ Note that the POSIX defined names for character classes and collating elements + are always available - even if custom names are defined, in contrast, custom + error messages, and custom syntax messages replace the default ones. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/match_flag_type.html b/doc/match_flag_type.html new file mode 100644 index 00000000..598f6c83 --- /dev/null +++ b/doc/match_flag_type.html @@ -0,0 +1,266 @@ + + + + Boost.Regex: match_flag_type + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

match_flag_type

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+

The type match_flag_type is an implementation defined bitmask type + (17.3.2.1.2) that controls how a regular expression is matched against a + character sequence.

+
namespace std{ namespace regex_constants{
+
+typedef bitmask_type match_flag_type;
+
+static const match_flag_type match_default = 0;
+static const match_flag_type match_not_bob;
+static const match_flag_type match_not_eob;
+static const match_flag_type match_not_bol;
+static const match_flag_type match_not_eol;
+static const match_flag_type match_not_bow;
+static const match_flag_type match_not_eow;
+static const match_flag_type match_any;
+static const match_flag_type match_not_null;
+static const match_flag_type match_continuous;
+static const match_flag_type match_partial;
+static const match_flag_type match_prev_avail;
+static const match_flag_type match_not_dot_newline;
+static const match_flag_type match_not_dot_null;
+
+static const match_flag_type format_default = 0;
+static const match_flag_type format_sed;
+static const match_flag_type format_perl;
+static const match_flag_type format_no_copy;
+static const match_flag_type format_first_only;
+static const match_flag_type format_all;
+
+} // namespace regex_constants
+} // namespace std
+

Description

+

The type match_flag_type is an implementation defined bitmask type + (17.3.2.1.2). When matching a regular expression against a sequence of + characters [first, last) then setting its elements has the effects listed in + the table below:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Element

+
+

Effect if set

+
+

match_default

+
+

Specifies that matching of regular expressions proceeds without any + modification of the normal rules used in ECMA-262, ECMAScript Language + Specification, Chapter 15 part 10, RegExp (Regular Expression) Objects (FWD.1)

+
match_not_bobSpecifies that the expression "\A" should not match + against the sub-sequence [first,first).
match_not_eobSpecifies that the expressions "\z" and + "\Z" should not match against the sub-sequence [last,last).
+

match_not_bol

+
+

Specifies that the expression "^" should not be matched against the + sub-sequence [first,first).

+
+

match_not_eol

+
+

Specifies that the expression "$" should not be matched against the + sub-sequence [last,last).

+
+

match_not_bow

+
+

Specifies that the expression "\b" should not be matched against the + sub-sequence [first,first).

+
+

match_not_eow

+
+

Specifies that the expression "\b" should not be matched against the + sub-sequence [last,last).

+
+

match_any

+
+

Specifies that if more than one match is possible then any match is an + acceptable result.

+
+

match_not_null

+
+

Specifies that the expression can not be matched against an empty sequence.

+
+

match_continuous

+
+

Specifies that the expression must match a sub-sequence that begins at first.

+
+

match_partial

+
+

Specifies that if no match can be found, then it is acceptable to return a + match [from, last) where from!=last, if there exists some sequence of + characters [from,to) of which [from,last) is a prefix, and which would result + in a full match.

+
+

match_prev_avail

+
+

Specifies that --first is a valid iterator position, when this + flag is set then the flags match_not_bol and match_not_bow + are ignored by the regular expression algorithms (RE.7) and iterators (RE.8).

+
match_not_dot_newlineSpecifies that the expression "." does not match a + newline character.
match_not_dot_nullSpecified that the expression "." does not match a + character null '\0'.
+

format_default

+
+

Specifies that when a regular expression match is to be replaced by a new + string, that the new string is constructed using the rules used by the + ECMAScript replace function in ECMA-262, ECMAScript Language Specification, + Chapter 15 part 5.4.11 String.prototype.replace. (FWD.1). In addition during + search and replace operations then all non-overlapping occurrences of the + regular expression are located and replaced, and sections of the input that did + not match the expression, are copied unchanged to the output string.

+
+

format_sed

+
+

Specifies that when a regular expression match is to be replaced by a new + string, that the new string is constructed using the rules used by the Unix sed + utility in IEEE Std 1003.1-2001, Portable Operating SystemInterface (POSIX ), + Shells and Utilities..

+
+

format_perl

+
+

Specifies that when a regular expression match is to be replaced by a new + string, that the new string is constructed using an implementation defined + superset of the rules used by the ECMAScript replace function in ECMA-262, + ECMAScript Language Specification, Chapter 15 part 5.4.11 + String.prototype.replace (FWD.1).

+
format_allSpecifies that all syntax extensions are + enabled, including conditional (?ddexpression1:expression2) replacements.
+

format_no_copy

+
+

When specified during a search and replace operation, then sections of the + character container sequence being searched that do match the regular + expression, are not copied to the output string.

+
+

format_first_only

+
+

When specified during a search and replace operation, then only the first + occurrence of the regular expression is replaced.

+
+

+

+


+

+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/match_results.html b/doc/match_results.html new file mode 100644 index 00000000..9a0d1afc --- /dev/null +++ b/doc/match_results.html @@ -0,0 +1,390 @@ + + + + Boost.Regex: class match_results + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

class match_results

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+
+

Synopsis

+

#include <boost/regex.hpp> +

+

Regular expressions are different from many simple pattern-matching algorithms + in that as well as finding an overall match they can also produce + sub-expression matches: each sub-expression being delimited in the pattern by a + pair of parenthesis (...). There has to be some method for reporting + sub-expression matches back to the user: this is achieved this by defining a + class match_results that acts as an indexed collection of sub-expression + matches, each sub-expression match being contained in an object of type + sub_match + . +

Template class match_results denotes a collection of character sequences + representing the result of a regular expression match. Objects of type + match_results are passed to the algorithms regex_match + and regex_search, and are returned by the iterator + regex_iterator + .  Storage for the collection is allocated and freed as necessary by the + member functions of class match_results. +

The template class match_results conforms to the requirements of a Sequence, as + specified in (lib.sequence.reqmts), except that only operations defined for + const-qualified Sequences are supported.

+

Class template match_results is most commonly used as one of the typedefs + cmatch, wcmatch, smatch, or wsmatch:

+
template <class BidirectionalIterator,
+          class Allocator = allocator<sub_match<BidirectionalIterator> >
+class match_results;
+
+typedef match_results<const char*> cmatch;
+typedef match_results<const wchar_t*> wcmatch;
+typedef match_results<string::const_iterator> smatch;
+typedef match_results<wstring::const_iterator> wsmatch;
+
+template <class BidirectionalIterator,
+          class Allocator = allocator<sub_match<BidirectionalIterator> >
+class match_results
+{ 
+public: 
+   typedef          sub_match<BidirectionalIterator>                        value_type;
+   typedef          const value_type&                                       const_reference;
+   typedef          const_reference                                         reference;
+   typedef          implementation defined                                  const_iterator;
+   typedef          const_iterator                                          iterator;
+   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
+   typedef typename Allocator::size_type                                    size_type;
+   typedef          Allocator                                               allocator_type;
+   typedef typename iterator_traits<BidirectionalIterator>::value_type      char_type;
+   typedef          basic_string<char_type>                                 string_type;
+
+   // construct/copy/destroy:
+   explicit match_results(const Allocator& a = Allocator());
+   match_results(const match_results& m);
+   match_results& operator=(const match_results& m); 
+   ~match_results();
+
+   // size:
+   size_type size() const;
+   size_type max_size() const;
+   bool empty() const;
+   // element access:
+   difference_type length(int sub = 0) const;
+   difference_type position(unsigned int sub = 0) const;
+   string_type str(int sub = 0) const;
+   const_reference operator[](int n) const;
+
+   const_reference prefix() const;
+
+   const_reference suffix() const;
+   const_iterator begin() const;
+   const_iterator end() const;
+   // format:
+   template <class OutputIterator>
+   OutputIterator format(OutputIterator out,
+                         const string_type& fmt,
+                         match_flag_type flags = format_default) const;
+   string_type format(const string_type& fmt,
+                      match_flag_type flags = format_default) const;
+
+   allocator_type get_allocator() const;
+   void swap(match_results& that);
+};
+
+template <class BidirectionalIterator, class Allocator>
+bool operator == (const match_results<BidirectionalIterator, Allocator>& m1,
+                  const match_results<BidirectionalIterator, Allocator>& m2);
+template <class BidirectionalIterator, class Allocator>
+bool operator != (const match_results<BidirectionalIterator, Allocator>& m1,
+                  const match_results<BidirectionalIterator, Allocator>& m2);
+
+template <class charT, class traits, class BidirectionalIterator, class Allocator>
+basic_ostream<charT, traits>&
+   operator << (basic_ostream<charT, traits>& os,
+                const match_results<BidirectionalIterator, Allocator>& m);
+
+template <class BidirectionalIterator, class Allocator>
+void swap(match_results<BidirectionalIterator, Allocator>& m1,
+          match_results<BidirectionalIterator, Allocator>& m2);
+

Description

+

match_results constructors

+

In all match_results constructors, a copy of the Allocator + argument is used for any memory allocation performed by the constructor or + member functions during the lifetime of the object. +

+
match_results(const Allocator& a = Allocator());
+ +

+ Effects: Constructs an object of class match_results. The postconditions + of this function are indicated in Table RE16:

+
+ Table RE16--match_results(const Allocator&) effects
+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

empty()

+
+

true

+
+

size()

+
+

0

+
+

str()

+
+

basic_string<charT>()

+

+
+

+
match_results(const match_results& m);
+ +

+ Effects: Constructs an object of class match_results, as a copy of + m.

match_results& operator=(const match_results& m);
+ +

+ Effects: Assigns m to *this. The postconditions of this function are + indicated in Table RE17:

+
Table RE17--match_results(const Allocator&) effects
+

+

+

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value +

+
+

empty()

+
+

m.empty().

+
+

size()

+
+

m.size().

+
+

str(n)

+
+

m.str(n) for all integers n < m.size().

+
+

prefix()

+
+

m.prefix().

+
+

suffix()

+
+

m.suffix().

+
+

(*this)[n]

+
+

m[n] for all integers n < m.size().

+
+

length(n)

+
+

m.length(n) for all integers n < m.size().

+
+

position(n)

+
+

m.position(n) for all integers n < m.size().

+

+
+

+
+

match_results size

+
size_type size()const;
+ +

+ Effects: Returns the number of sub_match elements stored in *this.

size_type max_size()const;
+ +

+ Effects: Returns the maximum number of sub_match elements that can be + stored in *this.

bool empty()const;
+ +

+ Effects: Returns size() == 0.

+

match_results element access

+
difference_type length(int sub = 0)const;
+ +

+ Effects: Returns (*this)[sub].length().

difference_type position(unsigned int sub = 0)const;
+ +

+ Effects: Returns std::distance(prefix().first, + (*this)[sub].first).

string_type str(int sub = 0)const;
+ +

+ Effects: Returns string_type((*this)[sub]).

const_reference operator[](int n) const;
+ +

+ Effects: Returns a reference to the sub_match object + representing the character sequence that matched marked sub-expression n. + If n == 0 then returns a reference to a sub_match object + representing the character sequence that matched the whole regular + expression.

const_reference prefix()const;
+ +

+ Effects: Returns a reference to the sub_match object + representing the character sequence from the start of the string being + matched/searched, to the start of the match found.

const_reference suffix()const;
+ +

+ Effects: Returns a reference to the sub_match object + representing the character sequence from the end of the match found to the end + of the string being matched/searched.

const_iterator begin()const;
+ +

+ Effects: Returns a starting iterator that enumerates over all the marked + sub-expression matches stored in *this.

const_iterator end()const;
+ +

+ Effects: Returns a terminating iterator that enumerates over all the + marked sub-expression matches stored in *this.

+

match_results reformatting

+
template <class OutputIterator>
+OutputIterator format(OutputIterator out,
+                      const string_type& fmt,
+                      match_flag_type flags = format_default);
+ +

+ Requires: The type OutputIterator conforms to the Output Iterator + requirements (24.1.2).

+

+ Effects: Copies the character sequence [fmt.begin(), fmt.end()) to + OutputIterator out. For each format specifier or escape sequence in fmt, + replace that sequence with either the character(s) it represents, or the + sequence of characters within *this to which it refers. The bitmasks specified + in flags determines what + format specifiers or escape sequences are recognized, by default this is + the format used by ECMA-262, ECMAScript Language Specification, Chapter 15 part + 5.4.11 String.prototype.replace.

+

+ Returns: out.

string_type format(const string_type& fmt,
+                   match_flag_type flags = format_default);
+ +

+ Effects: Returns a copy of the string fmt. For each format + specifier or escape sequence in fmt, replace that sequence with either + the character(s) it represents, or the sequence of characters within *this to + which it refers. The bitmasks specified in flags + determines what format specifiers or escape sequences + are recognized, by default this is the format used by ECMA-262, + ECMAScript Language Specification, Chapter 15 part 5.4.11 + String.prototype.replace.

allocator_type get_allocator()const;
+ +

+ Effects: Returns a copy of the Allocator that was passed to the object's + constructor.

void swap(match_results& that);
+ +

+ Effects: Swaps the contents of the two sequences.

+

+ Postcondition: *this contains the sequence of matched + sub-expressions that were in that, that contains the + sequence of matched sub-expressions that were in *this.

+

+ Complexity: constant time. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/partial_matches.html b/doc/partial_matches.html new file mode 100644 index 00000000..130c837d --- /dev/null +++ b/doc/partial_matches.html @@ -0,0 +1,184 @@ + + + + Boost.Regex: Partial Matches + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Partial Matches

+
+

Boost.Regex Index

+
+

+
+

+

The match-flag match_partial can + be passed to the following algorithms: regex_match, + regex_search, and regex_grep. + When used it indicates that partial as well as full matches should be found. A + partial match is one that matched one or more characters at the end of the text + input, but did not match all of the regular expression (although it may have + done so had more input been available). Partial matches are typically used when + either validating data input (checking each character as it is entered on the + keyboard), or when searching texts that are either too long to load into memory + (or even into a memory mapped file), or are of indeterminate length (for + example the source may be a socket or similar). Partial and full matches can be + differentiated as shown in the following table (the variable M represents an + instance of match_results<> as filled in + by regex_match, regex_search or regex_grep):
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 ResultM[0].matchedM[0].firstM[0].second
No matchFalseUndefinedUndefinedUndefined
Partial matchTrueFalseStart of partial match.End of partial match (end of text).
Full matchTrueTrueStart of full match.End of full match.
+

+

The following example + tests to see whether the text could be a valid credit card number, as the user + presses a key, the character entered would be added to the string being built + up, and passed to is_possible_card_number. If this returns true + then the text could be a valid card number, so the user interface's OK button + would be enabled. If it returns false, then this is not yet a valid card + number, but could be with more input, so the user interface would disable the + OK button. Finally, if the procedure throws an exception the input could never + become a valid number, and the inputted character must be discarded, and a + suitable error indication displayed to the user.

+
#include <string>
+#include <iostream>
+#include <boost/regex.hpp>
+
+boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
+
+bool is_possible_card_number(const std::string& input)
+{
+   //
+   // return false for partial match, true for full match, or throw for
+   // impossible match based on what we have so far...
+   boost::match_results<std::string::const_iterator> what;
+   if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
+   {
+      // the input so far could not possibly be valid so reject it:
+      throw std::runtime_error("Invalid data entered - this could not possibly be a valid card number");
+   }
+   // OK so far so good, but have we finished?
+   if(what[0].matched)
+   {
+      // excellent, we have a result:
+      return true;
+   }
+   // what we have so far is only a partial match...
+   return false;
+}
+

In the following example, + text input is taken from a stream containing an unknown amount of text; this + example simply counts the number of html tags encountered in the stream. The + text is loaded into a buffer and searched a part at a time, if a partial match + was encountered, then the partial match gets searched a second time as the + start of the next batch of text:

+
#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <boost/regex.hpp>
+
+// match some kind of html tag:
+boost::regex e("<[^>]*>");
+// count how many:
+unsigned int tags = 0;
+// saved position of partial match:
+char* next_pos = 0;
+
+bool grep_callback(const boost::match_results<char*>& m)
+{
+   if(m[0].matched == false)
+   {
+      // save position and return:
+      next_pos = m[0].first;
+   }
+   else
+      ++tags;
+   return true;
+}
+
+void search(std::istream& is)
+{
+   char buf[4096];
+   next_pos = buf + sizeof(buf);
+   bool have_more = true;
+   while(have_more)
+   {
+      // how much do we copy forward from last try:
+      unsigned leftover = (buf + sizeof(buf)) - next_pos;
+      // and how much is left to fill:
+      unsigned size = next_pos - buf;
+      // copy forward whatever we have left:
+      memcpy(buf, next_pos, leftover);
+      // fill the rest from the stream:
+      unsigned read = is.readsome(buf + leftover, size);
+      // check to see if we've run out of text:
+      have_more = read == size;
+      // reset next_pos:
+      next_pos = buf + sizeof(buf);
+      // and then grep:
+      boost::regex_grep(grep_callback,
+                        buf,
+                        buf + read + leftover,
+                        e,
+                        boost::match_default | boost::match_partial);
+   }
+}
+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/posix_api.html b/doc/posix_api.html new file mode 100644 index 00000000..89ae0a2f --- /dev/null +++ b/doc/posix_api.html @@ -0,0 +1,288 @@ + + + + Boost.Regex: POSIX API Compatibility Functions + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

POSIX API Compatibility Functions

+
+

Boost.Regex Index

+
+

+
+

+
#include <boost/cregex.hpp>
+or:
+#include <boost/regex.h>
+

The following functions are available for users who need a POSIX compatible C + library, they are available in both Unicode and narrow character versions, the + standard POSIX API names are macros that expand to one version or the other + depending upon whether UNICODE is defined or not. +

+

Important: Note that all the symbols defined here are enclosed inside + namespace boost when used in C++ programs, unless you use #include + <boost/regex.h> instead - in which case the symbols are still defined in + namespace boost, but are made available in the global namespace as well.

+

The functions are defined as: +

+
extern "C" {
+int regcompA(regex_tA*, const char*, int);
+unsigned int regerrorA(int, const regex_tA*, char*, unsigned int);
+int regexecA(const regex_tA*, const char*, unsigned int, regmatch_t*, int);
+void regfreeA(regex_tA*);
+
+int regcompW(regex_tW*, const wchar_t*, int);
+unsigned int regerrorW(int, const regex_tW*, wchar_t*, unsigned int);
+int regexecW(const regex_tW*, const wchar_t*, unsigned int, regmatch_t*, int);
+void regfreeW(regex_tW*);
+
+#ifdef UNICODE
+#define regcomp regcompW
+#define regerror regerrorW
+#define regexec regexecW
+#define regfree regfreeW
+#define regex_t regex_tW
+#else
+#define regcomp regcompA
+#define regerror regerrorA
+#define regexec regexecA
+#define regfree regfreeA
+#define regex_t regex_tA
+#endif
+}
+

All the functions operate on structure regex_t, which exposes two public + members: +

+

unsigned int re_nsub this is filled in by regcomp and indicates + the number of sub-expressions contained in the regular expression. +

+

const TCHAR* re_endp points to the end of the expression to compile when + the flag REG_PEND is set. +

+

Footnote: regex_t is actually a #define - it is either regex_tA or regex_tW + depending upon whether UNICODE is defined or not, TCHAR is either char or + wchar_t again depending upon the macro UNICODE. +

+

regcomp

+

regcomp takes a pointer to a regex_t, a pointer to the expression + to compile and a flags parameter which can be a combination of: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 REG_EXTENDEDCompiles modern regular expressions. Equivalent to + regbase::char_classes | regbase::intervals | regbase::bk_refs. 
 REG_BASICCompiles basic (obsolete) regular expression syntax. + Equivalent to regbase::char_classes | regbase::intervals | regbase::limited_ops + | regbase::bk_braces | regbase::bk_parens | regbase::bk_refs. 
 REG_NOSPECAll characters are ordinary, the expression is a + literal string. 
 REG_ICASECompiles for matching that ignores character case. 
 REG_NOSUBHas no effect in this library. 
 REG_NEWLINEWhen this flag is set a dot does not match the + newline character. 
 REG_PENDWhen this flag is set the re_endp parameter of the + regex_t structure must point to the end of the regular expression to compile. 
 REG_NOCOLLATEWhen this flag is set then locale dependent collation + for character ranges is turned off. 
 REG_ESCAPE_IN_LISTS
+ , , , +
When this flag is set, then escape sequences are + permitted in bracket expressions (character sets). 
 REG_NEWLINE_ALT When this flag is set then the newline character is + equivalent to the alternation operator |. 
 REG_PERL  A shortcut for perl-like behavior: REG_EXTENDED + | REG_NOCOLLATE | REG_ESCAPE_IN_LISTS 
 REG_AWKA shortcut for awk-like behavior: REG_EXTENDED | + REG_ESCAPE_IN_LISTS 
 REG_GREPA shortcut for grep like behavior: REG_BASIC | + REG_NEWLINE_ALT 
 REG_EGREP A shortcut for egrep like behavior: + REG_EXTENDED | REG_NEWLINE_ALT 
+

+

regerror

+

regerror takes the following parameters, it maps an error code to a human + readable string: +
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
 int codeThe error code. 
 const regex_t* eThe regular expression (can be null). 
 char* bufThe buffer to fill in with the error message. 
 unsigned int buf_sizeThe length of buf. 
+

+

If the error code is OR'ed with REG_ITOA then the message that results is the + printable name of the code rather than a message, for example "REG_BADPAT". If + the code is REG_ATIO then e must not be null and e->re_pend must + point to the printable name of an error code, the return value is then the + value of the error code. For any other value of code, the return value + is the number of characters in the error message, if the return value is + greater than or equal to buf_size then regerror will have to be + called again with a larger buffer.

+

regexec

+

regexec finds the first occurrence of expression e within string buf. + If len is non-zero then *m is filled in with what matched the + regular expression, m[0] contains what matched the whole string, m[1] + the first sub-expression etc, see regmatch_t in the header file + declaration for more details. The eflags parameter can be a combination + of: +
+   +

+

+ + + + + + + + + + + + + + + + + + + +
 REG_NOTBOLParameter buf does not represent the start of + a line. 
 REG_NOTEOLParameter buf does not terminate at the end of + a line. 
 REG_STARTENDThe string searched starts at buf + pmatch[0].rm_so + and ends at buf + pmatch[0].rm_eo. 
+

+

regfree

+

Finally regfree frees all the memory that was allocated by regcomp. +

+

Footnote: this is an abridged reference to the POSIX API functions, it is + provided for compatibility with other libraries, rather than an API to be used + in new code (unless you need access from a language other than C++). This + version of these functions should also happily coexist with other versions, as + the names used are macros that expand to the actual function names. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/redistributables.html b/doc/redistributables.html new file mode 100644 index 00000000..24af723f --- /dev/null +++ b/doc/redistributables.html @@ -0,0 +1,83 @@ + + + + Boost.Regex: Redistributables and Library Names + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Redistributables and Library Names

+
+

Boost.Regex Index

+
+

+
+

+

If you are using Microsoft or Borland C++ and link to a dll version of the run + time library, then you will also link to one of the dll versions of boost.regex. + While these dll's are redistributable, there are no "standard" versions, so + when installing on the users PC, you should place these in a directory private + to your application, and not in the PC's directory path. Note that if you link + to a static version of your run time library, then you will also link to a + static version of boost.regex and no dll's will need to be distributed. The + possible boost.regex dll and library names are computed according to the following + formula:
+

+

+

"boost_regex_"
+ + BOOST_LIB_TOOLSET
+ + "_"
+ + BOOST_LIB_THREAD_OPT
+ + BOOST_LIB_RT_OPT
+ + BOOST_LIB_LINK_OPT
+ + BOOST_LIB_DEBUG_OPT
+
+ These are defined as:
+
+ BOOST_LIB_TOOLSET: The compiler toolset name (vc6, vc7, bcb5 etc).
+
+ BOOST_LIB_THREAD_OPT: "s" for single thread builds,
+ "m" for multithread builds.
+
+ BOOST_LIB_RT_OPT: "s" for static runtime,
+ "d" for dynamic runtime.
+
+ BOOST_LIB_LINK_OPT: "s" for static link,
+ "i" for dynamic link.
+
+ BOOST_LIB_DEBUG_OPT: nothing for release builds,
+ "d" for debug builds,
+ "dd" for debug-diagnostic builds (_STLP_DEBUG).

+

+ Note: you can disable automatic library selection by defining the symbol + BOOST_REGEX_NO_LIB when compiling, this is useful if you want to statically + link even though you're using the dll version of your run time library, or if + you need to debug boost.regex. +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/reg_expression.html b/doc/reg_expression.html new file mode 100644 index 00000000..79263c8b --- /dev/null +++ b/doc/reg_expression.html @@ -0,0 +1,45 @@ + + + + Boost.Regex: Class reg_expression (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Class reg_expression (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The use of class template reg_expression is deprecated: use + basic_regex instead.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regbase.html b/doc/regbase.html new file mode 100644 index 00000000..6bd38353 --- /dev/null +++ b/doc/regbase.html @@ -0,0 +1,55 @@ + + + + Boost.Regex: regbase + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

regbase

+
+

Boost.Regex Index

+
+

+
+

+

Use of the type boost::regbase is now deprecated, and the type does not form a + part of the + regular expression standardization proposal.  This type still + exists as a base class of boost::basic_regex, and you can still refer to + boost::regbase::constant_name in your code, however for maximum portability to + other std regex implementations you should instead use either:

+
boost::regex_constants::constant_name
+

or

+
boost::regex::constant_name
+

or

+
boost::wregex::constant_name
+

+


+

+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex.html b/doc/regex.html new file mode 100644 index 00000000..f36bb13e --- /dev/null +++ b/doc/regex.html @@ -0,0 +1,492 @@ + + + + Boost.Regex: class RegEx (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

class RegEx (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The high level wrapper class RegEx is now deprecated and does not form a part + of the regular + expression standardization proposal.  This type still exists, and + existing code will continue to compile, however the following documentation is + unlikely to be further updated.

+
#include <boost/cregex.hpp> 
+

The class RegEx provides a high level simplified interface to the regular + expression library, this class only handles narrow character strings, and + regular expressions always follow the "normal" syntax - that is the same as the + perl / ECMAScript synatx. +

+
typedef bool (*GrepCallback)(const RegEx& expression);
+typedef bool (*GrepFileCallback)(const char* file, const RegEx& expression);
+typedef bool (*FindFilesCallback)(const char* file);
+
+class  RegEx
+{
+public:
+   RegEx();
+   RegEx(const RegEx& o);
+   ~RegEx();
+   RegEx(const char* c, bool icase = false);
+   explicit RegEx(const std::string& s, bool icase = false);
+   RegEx& operator=(const RegEx& o);
+   RegEx& operator=(const char* p);
+   RegEx& operator=(const std::string& s);
+   unsigned int SetExpression(const char* p, bool icase = false);
+   unsigned int SetExpression(const std::string& s, bool icase = false);
+   std::string Expression()const;
+   //
+   // now matching operators: 
+   // 
+   bool Match(const char* p, unsigned int flags = match_default);
+   bool Match(const std::string& s, unsigned int flags = match_default); 
+   bool Search(const char* p, unsigned int flags = match_default); 
+   bool Search(const std::string& s, unsigned int flags = match_default); 
+   unsigned int Grep(GrepCallback cb, const char* p, unsigned int flags = match_default); 
+   unsigned int Grep(GrepCallback cb, const std::string& s, unsigned int flags = match_default); 
+   unsigned int Grep(std::vector<std::string>& v, const char* p, unsigned int flags = match_default); 
+   unsigned int Grep(std::vector<std::string>& v, const std::string& s, unsigned int flags = match_default); 
+   unsigned int Grep(std::vector<unsigned int>& v, const char* p, unsigned int flags = match_default); 
+   unsigned int Grep(std::vector<unsigned int>& v, const std::string& s, unsigned int flags = match_default); 
+   unsigned int GrepFiles(GrepFileCallback cb, const char* files, bool recurse = false, unsigned int flags = match_default); 
+   unsigned int GrepFiles(GrepFileCallback cb, const std::string& files, bool recurse = false, unsigned int flags = match_default); 
+   unsigned int FindFiles(FindFilesCallback cb, const char* files, bool recurse = false, unsigned int flags = match_default); 
+   unsigned int FindFiles(FindFilesCallback cb, const std::string& files, bool recurse = false, unsigned int flags = match_default); 
+   std::string Merge(const std::string& in, const std::string& fmt, bool copy = true, unsigned int flags = match_default); 
+   std::string Merge(const char* in, const char* fmt, bool copy = true, unsigned int flags = match_default); 
+   unsigned Split(std::vector<std::string>& v, std::string& s, unsigned flags = match_default, unsigned max_count = ~0); 
+   // 
+   // now operators for returning what matched in more detail: 
+   // 
+   unsigned int Position(int i = 0)const; 
+   unsigned int Length(int i = 0)const; 
+   bool Matched(int i = 0)const;
+   unsigned int Line()const; 
+   unsigned int Marks() const; 
+   std::string What(int i)const; 
+   std::string operator[](int i)const ; 
+
+   static const unsigned int npos;
+};     
+

Member functions for class RegEx are defined as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 RegEx();Default constructor, constructs an instance of RegEx + without any valid expression. 
 RegEx(const RegEx& o);Copy constructor, all the properties of parameter o + are copied. 
 RegEx(const char* c, bool icase + = false);Constructs an instance of RegEx, setting the + expression to c, if icase is true then matching is + insensitive to case, otherwise it is sensitive to case. Throws bad_expression + on failure. 
 RegEx(const std::string& s, bool icase + = false);Constructs an instance of RegEx, setting the + expression to s, if icase is true then matching is + insensitive to case, otherwise it is sensitive to case. Throws bad_expression + on failure. 
 RegEx& operator=(const RegEx& + o);Default assignment operator. 
 RegEx& operator=(const char* + p);Assignment operator, equivalent to calling SetExpression(p, + false). Throws bad_expression on failure. 
 RegEx& operator=(const std::string& + s);Assignment operator, equivalent to calling SetExpression(s, + false). Throws bad_expression on failure. 
 unsigned int SetExpression(constchar* + p, bool icase = false);Sets the current expression to p, if icase + is true then matching is insensitive to case, otherwise it is sensitive + to case. Throws bad_expression on failure. 
 unsigned int SetExpression(const + std::string& s, bool icase = false);Sets the current expression to s, if icase + is true then matching is insensitive to case, otherwise it is sensitive + to case. Throws bad_expression on failure. 
 std::string Expression()const;Returns a copy of the current regular expression. 
 bool Match(const char* p, unsigned + int flags = match_default);Attempts to match the current expression against the + text p using the match flags flags - see + match flags. Returns true if the expression matches the whole of + the input string. 
 bool Match(const std::string& s, unsigned + int flags = match_default) ;Attempts to match the current expression against the + text s using the match flags flags - see + match flags. Returns true if the expression matches the whole of + the input string. 
 bool Search(const char* p, unsigned + int flags = match_default);Attempts to find a match for the current expression + somewhere in the text p using the match flags flags - see + match flags. Returns true if the match succeeds. 
 bool Search(const std::string& s, unsigned + int flags = match_default) ;Attempts to find a match for the current expression + somewhere in the text s using the match flags flags - see + match flags. Returns true if the match succeeds. 
 unsigned int Grep(GrepCallback cb, const + char* p, unsigned int flags = match_default);Finds all matches of the current expression in the + text p using the match flags flags - see + match flags. For each match found calls the call-back function cb + as: cb(*this); +

If at any stage the call-back function returns false then the grep operation + terminates, otherwise continues until no further matches are found. Returns the + number of matches found.

+
 
 unsigned int Grep(GrepCallback cb, const + std::string& s, unsigned int flags = match_default);Finds all matches of the current expression in the + text s using the match flags flags - see + match flags. For each match found calls the call-back function cb + as: cb(*this); +

If at any stage the call-back function returns false then the grep operation + terminates, otherwise continues until no further matches are found. Returns the + number of matches found. +

+
 
 unsigned int Grep(std::vector<std::string>& + v, const char* p, unsigned int flags = + match_default);Finds all matches of the current expression in the + text p using the match flags flags - see + match flags. For each match pushes a copy of what matched onto v. + Returns the number of matches found. 
 unsigned int Grep(std::vector<std::string>& + v, const std::string& s, unsigned int flags = + match_default);Finds all matches of the current expression in the + text s using the match flags flags - see + match flags. For each match pushes a copy of what matched onto v. + Returns the number of matches found. 
 unsigned int Grep(std::vector<unsigned + int>& v, const char* p, unsigned int flags + = match_default);Finds all matches of the current expression in the + text p using the match flags flags - see + match flags. For each match pushes the starting index of what matched + onto v. Returns the number of matches found. 
 unsigned int Grep(std::vector<unsigned + int>& v, const std::string& s, unsigned int + flags = match_default);Finds all matches of the current expression in the + text s using the match flags flags - see + match flags. For each match pushes the starting index of what matched + onto v. Returns the number of matches found. 
 unsigned int GrepFiles(GrepFileCallback + cb, const char* files, bool recurse = false, unsigned + int flags = match_default);Finds all matches of the current expression in the + files files using the match flags flags - see + match flags. For each match calls the call-back function cb.  +

If the call-back returns false then the algorithm returns without considering + further matches in the current file, or any further files.  +

+

The parameter files can include wild card characters '*' and '?', if the + parameter recurse is true then searches sub-directories for matching + file names.  +

+

Returns the total number of matches found.

+

May throw an exception derived from std::runtime_error if file io fails.

+
 
 unsigned int GrepFiles(GrepFileCallback + cb, const std::string& files, bool recurse = false, unsigned + int flags = match_default);Finds all matches of the current expression in the + files files using the match flags flags - see + match flags. For each match calls the call-back function cb.  +

If the call-back returns false then the algorithm returns without considering + further matches in the current file, or any further files.  +

+

The parameter files can include wild card characters '*' and '?', if the + parameter recurse is true then searches sub-directories for matching + file names.  +

+

Returns the total number of matches found.

+

May throw an exception derived from std::runtime_error if file io fails.

+
 
 unsigned int FindFiles(FindFilesCallback + cb, const char* files, bool recurse = false, unsigned + int flags = match_default);Searches files to find all those which contain + at least one match of the current expression using the match flags flags + - see match flags. For each + matching file calls the call-back function cb.  +

If the call-back returns false then the algorithm returns without considering + any further files.  +

+

The parameter files can include wild card characters '*' and '?', if the + parameter recurse is true then searches sub-directories for matching + file names.  +

+

Returns the total number of files found.

+

May throw an exception derived from std::runtime_error if file io fails.

+
 
 unsigned int FindFiles(FindFilesCallback + cb, const std::string& files, bool recurse = false, unsigned + int flags = match_default);Searches files to find all those which contain + at least one match of the current expression using the match flags flags + - see match flags. For each + matching file calls the call-back function cb.  +

If the call-back returns false then the algorithm returns without considering + any further files.  +

+

The parameter files can include wild card characters '*' and '?', if the + parameter recurse is true then searches sub-directories for matching + file names.  +

+

Returns the total number of files found.

+

May throw an exception derived from std::runtime_error if file io fails.

+
 
 std::string Merge(const std::string& in, const + std::string& fmt, bool copy = true, unsigned int + flags = match_default);Performs a search and replace operation: searches + through the string in for all occurrences of the current expression, for + each occurrence replaces the match with the format string fmt. Uses flags + to determine what gets matched, and how the format string should be treated. If + copy is true then all unmatched sections of input are copied unchanged + to output, if the flag format_first_only is set then only the first + occurance of the pattern found is replaced. Returns the new string. See + also format string syntax, match + flags and format flags. 
 std::string Merge(const char* in, const + char* fmt, bool copy = true, unsigned int flags = + match_default);Performs a search and replace operation: searches + through the string in for all occurrences of the current expression, for + each occurrence replaces the match with the format string fmt. Uses flags + to determine what gets matched, and how the format string should be treated. If + copy is true then all unmatched sections of input are copied unchanged + to output, if the flag format_first_only is set then only the first + occurance of the pattern found is replaced. Returns the new string. See + also format string syntax, match + flags and format flags. 
 unsigned Split(std::vector<std::string>& v, + std::string& s, unsigned flags = match_default, unsigned max_count + = ~0);Splits the input string and pushes each one onto the vector. If + the expression contains no marked sub-expressions, then one string is outputted + for each section of the input that does not match the expression. If the + expression does contain marked sub-expressions, then outputs one string for + each marked sub-expression each time a match occurs. Outputs no more than max_count + strings. Before returning, deletes from the input string s all of + the input that has been processed (all of the string if max_count was + not reached). Returns the number of strings pushed onto the vector. 
 unsigned int Position(int i = 0)const;Returns the position of what matched sub-expression i. + If i = 0 then returns the position of the whole match. Returns + RegEx::npos if the supplied index is invalid, or if the specified + sub-expression did not participate in the match. 
 unsigned int Length(int i = 0)const;Returns the length of what matched sub-expression i. + If i = 0 then returns the length of the whole match. Returns RegEx::npos + if the supplied index is invalid, or if the specified sub-expression did not + participate in the match. 
 bool Matched(int i = 0)const;Returns true if sub-expression i was matched, false otherwise. 
 unsigned int Line()const;Returns the line on which the match occurred, indexes + start from 1 not zero, if no match occurred then returns RegEx::npos. 
 unsigned int Marks() const;Returns the number of marked sub-expressions + contained in the expression. Note that this includes the whole match + (sub-expression zero), so the value returned is always >= 1. 
 std::string What(int i)const;Returns a copy of what matched sub-expression i. + If i = 0 then returns a copy of the whole match. Returns a null string + if the index is invalid or if the specified sub-expression did not participate + in a match. 
 std::string operator[](int i)const + ;Returns what(i); +

Can be used to simplify access to sub-expression matches, and make usage more + perl-like.

+
 
+

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_format.html b/doc/regex_format.html new file mode 100644 index 00000000..06de5621 --- /dev/null +++ b/doc/regex_format.html @@ -0,0 +1,165 @@ + + + + Boost.Regex: Algorithm regex_format (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_format (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The algorithm regex_format is deprecated, new code should use + match_results::format instead.  Existing code will continue to compile, + the following documentation ius taken from the previous version of boost.regex + and will not be further updated:

+

Algorithm regex_format

+

+
#include <boost/regex.hpp> 
+

The algorithm regex_format takes the results of a match and creates a new + string based upon a format string, + regex_format can be used for search and replace operations: +

+
template <class OutputIterator, class iterator, class Allocator, class charT>
+OutputIterator regex_format(OutputIterator out,
+                            const match_results<iterator, Allocator>& m,
+                            const charT* fmt,
+                            match_flag_type flags = 0);
+
+template <class OutputIterator, class iterator, class Allocator, class charT>
+OutputIterator regex_format(OutputIterator out,
+                            const match_results<iterator, Allocator>& m,
+                            const std::basic_string<charT>& fmt,
+                            match_flag_type flags = 0);
+

The library also defines the following convenience variation of regex_format, + which returns the result directly as a string, rather than outputting to an + iterator [note - this version may not be available, or may be available in a + more limited form, depending upon your compilers capabilities]: +

+
template <class iterator, class Allocator, class charT>
+std::basic_string<charT> regex_format
+                                 (const match_results<iterator, Allocator>& m, 
+                                  const charT* fmt,
+                                  match_flag_type flags = 0);
+
+template <class iterator, class Allocator, class charT>
+std::basic_string<charT> regex_format
+                                 (const match_results<iterator, Allocator>& m, 
+                                  const std::basic_string<charT>& fmt,
+                                  match_flag_type flags = 0);
+

Parameters to the main version of the function are passed as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
 OutputIterator outAn output iterator type, the output string is sent to + this iterator. Typically this would be a std::ostream_iterator. 
 const match_results<iterator, + Allocator>& mAn instance of match_results<> obtained from + one of the matching algorithms above, and denoting what matched. 
 const charT* fmtA format string that determines how the match is + transformed into the new string. 
 unsigned flagsOptional flags which describe how the format string + is to be interpreted. 
+

+

Format flags are defined as follows: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 format_allEnables all syntax options (perl-like plus + extentions). 
 format_sedAllows only a sed-like syntax. 
 format_perlAllows only a perl-like syntax. 
 format_no_copyDisables copying of unmatched sections to the output + string during regex_merge operations. 
 format_first_onlyWhen this flag is set only the first occurance will be replaced (applies to + regex_merge only). 
+

+


+   +

+

The format string syntax (and available options) is described more fully under + format strings + . +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_grep.html b/doc/regex_grep.html new file mode 100644 index 00000000..131f7c84 --- /dev/null +++ b/doc/regex_grep.html @@ -0,0 +1,379 @@ + + + + Boost.Regex: Algorithm regex_grep (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_grep (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The algorithm regex_grep is deprecated in favour of regex_iterator + which provides a more convenient and standard library friendly interface.

+

The following documentation is taken unchanged from the previous boost release, + and will not be updated in future.

+
+
#include <boost/regex.hpp> 
+

regex_grep allows you to search through a bidirectional-iterator range and + locate all the (non-overlapping) matches with a given regular expression. The + function is declared as: +

+
template <class Predicate, class iterator, class charT, class traits, class Allocator>
+unsigned int regex_grep(Predicate foo,
+                         iterator first,
+                         iterator last,
+                         const basic_regex<charT, traits, Allocator>& e,
+                         unsigned flags = match_default)
+

The library also defines the following convenience versions, which take either + a const charT*, or a const std::basic_string<>& in place of a pair of + iterators [note - these versions may not be available, or may be available in a + more limited form, depending upon your compilers capabilities]: +

+
template <class Predicate, class charT, class Allocator, class traits>
+unsigned int regex_grep(Predicate foo, 
+              const charT* str, 
+              const basic_regex<charT, traits, Allocator>& e, 
+              unsigned flags = match_default);
+
+template <class Predicate, class ST, class SA, class Allocator, class charT, class traits>
+unsigned int regex_grep(Predicate foo, 
+              const std::basic_string<charT, ST, SA>& s, 
+              const basic_regex<charT, traits, Allocator>& e, 
+              unsigned flags = match_default);
+

The parameters for the primary version of regex_grep have the following + meanings: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 fooA predicate function object or function pointer, see + below for more information. 
 firstThe start of the range to search. 
 lastThe end of the range to search. 
 eThe regular expression to search for. 
 flagsThe flags that determine how matching is carried out, + one of the match_flags enumerators. 
+

+

 The algorithm finds all of the non-overlapping matches of the expression + e, for each match it fills a match_results<iterator, + Allocator> structure, which contains information on what matched, and calls + the predicate foo, passing the match_results<iterator, Allocator> as a + single argument. If the predicate returns true, then the grep operation + continues, otherwise it terminates without searching for further matches. The + function returns the number of matches found.

+

The general form of the predicate is: +

+
struct grep_predicate
+{
+   bool operator()(const match_results<iterator_type, typename expression_type::alloc_type::template rebind<sub_match<BidirectionalIterator> >::other>& m);
+};
+

Note that in almost every case the allocator parameter can be omitted, when + specifying the match_results type, + alternatively one of the typedefs cmatch, wcmatch, smatch or wsmatch can be + used. +

+

For example the regular expression "a*b" would find one match in the string + "aaaaab" and two in the string "aaabb". +

+

Remember this algorithm can be used for a lot more than implementing a version + of grep, the predicate can be and do anything that you want, grep utilities + would output the results to the screen, another program could index a file + based on a regular expression and store a set of bookmarks in a list, or a text + file conversion utility would output to file. The results of one regex_grep can + even be chained into another regex_grep to create recursive parsers. +

+

Example: convert the + example from regex_search to use regex_grep instead: +

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+
+// IndexClasses: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+
+const char* re = 
+   // possibly leading whitespace:   
+   "^[[:space:]]*" 
+   // possible template declaration:
+   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
+   // class or struct:
+   "(class|struct)[[:space:]]*" 
+   // leading declspec macros etc:
+   "("
+      "\\<\\w+\\>"
+      "("
+         "[[:blank:]]*\\([^)]*\\)"
+      ")?"
+      "[[:space:]]*"
+   ")*" 
+   // the class name
+   "(\\<\\w*\\>)[[:space:]]*" 
+   // template specialisation parameters
+   "(<[^;:{]+>)?[[:space:]]*"
+   // terminate in { or :
+   "(\\{|:[^;\\{()]*\\{)";
+
+boost::regex expression(re); 
+
+class IndexClassesPred 
+{ 
+   map_type& m; 
+   std::string::const_iterator base; 
+public: 
+   IndexClassesPred(map_type& a, std::string::const_iterator b) : m(a), base(b) {} 
+   bool operator()(const  smatch& what) 
+   { 
+      // what[0] contains the whole string 
+      // what[5] contains the class name. 
+      // what[6] contains the template specialisation if any. 
+      // add class name and position to map: 
+      m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+                what[5].first - base; 
+      return true; 
+   } 
+}; 
+
+void IndexClasses(map_type& m, const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+   regex_grep(IndexClassesPred(m, start), start, end, expression); 
+} 
+

Example: Use + regex_grep to call a global callback function: +

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+
+// purpose: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+
+const char* re = 
+   // possibly leading whitespace:   
+   "^[[:space:]]*" 
+   // possible template declaration:
+   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
+   // class or struct:
+   "(class|struct)[[:space:]]*" 
+   // leading declspec macros etc:
+   "("
+      "\\<\\w+\\>"
+      "("
+         "[[:blank:]]*\\([^)]*\\)"
+      ")?"
+      "[[:space:]]*"
+   ")*" 
+   // the class name
+   "(\\<\\w*\\>)[[:space:]]*" 
+   // template specialisation parameters
+   "(<[^;:{]+>)?[[:space:]]*"
+   // terminate in { or :
+   "(\\{|:[^;\\{()]*\\{)";
+
+boost::regex expression(re);
+map_type class_index; 
+std::string::const_iterator base; 
+
+bool grep_callback(const  boost::smatch& what) 
+{ 
+   // what[0] contains the whole string 
+   // what[5] contains the class name. 
+   // what[6] contains the template specialisation if any. 
+   // add class name and position to map: 
+   class_index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+                what[5].first - base; 
+   return true; 
+} 
+
+void IndexClasses(const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+   base = start; 
+   regex_grep(grep_callback, start, end, expression, match_default); 
+}
+  
+

Example: use + regex_grep to call a class member function, use the standard library adapters std::mem_fun + and std::bind1st to convert the member function into a predicate: +

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+#include <functional> 
+
+// purpose: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+
+class class_index 
+{ 
+   boost::regex expression; 
+   map_type index; 
+   std::string::const_iterator base; 
+   bool  grep_callback(boost::smatch what); 
+public: 
+   void IndexClasses(const std::string& file); 
+   class_index() 
+      : index(), 
+        expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
+                   "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?" 
+                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
+                   "(\\{|:[^;\\{()]*\\{)" 
+                   ){} 
+}; 
+
+bool  class_index::grep_callback(boost::smatch what) 
+{ 
+   // what[0] contains the whole string 
+   // what[5] contains the class name. 
+   // what[6] contains the template specialisation if any. 
+   // add class name and position to map: 
+   index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+               what[5].first - base; 
+   return true; 
+} 
+
+void class_index::IndexClasses(const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+   base = start; 
+   regex_grep(std::bind1st(std::mem_fun(&class_index::grep_callback), this), 
+              start, 
+              end, 
+              expression); 
+} 
+  
+

Finally, C++ Builder + users can use C++ Builder's closure type as a callback argument: +

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+#include <functional> 
+
+// purpose: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+class class_index 
+{ 
+   boost::regex expression; 
+   map_type index; 
+   std::string::const_iterator base; 
+   typedef  boost::smatch arg_type; 
+   bool grep_callback(const arg_type& what); 
+public: 
+   typedef bool (__closure* grep_callback_type)(const arg_type&); 
+   void IndexClasses(const std::string& file); 
+   class_index() 
+      : index(), 
+        expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
+                   "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?" 
+                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
+                   "(\\{|:[^;\\{()]*\\{)" 
+                   ){} 
+}; 
+
+bool class_index::grep_callback(const arg_type& what) 
+{ 
+   // what[0] contains the whole string    
+// what[5] contains the class name.    
+// what[6] contains the template specialisation if any.    
+// add class name and position to map:    
+index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+               what[5].first - base; 
+   return true; 
+} 
+
+void class_index::IndexClasses(const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+   base = start; 
+   class_index::grep_callback_type cl = &(this->grep_callback); 
+   regex_grep(cl, 
+            start, 
+            end, 
+            expression); 
+} 
+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_match.html b/doc/regex_match.html new file mode 100644 index 00000000..890c65d6 --- /dev/null +++ b/doc/regex_match.html @@ -0,0 +1,325 @@ + + + + Boost.Regex: Algorithm regex_match + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_match

+
+

Boost.Regex Index

+
+

+
+

+

Contents

+
+
Synopsis
+
Description
+
Examples
+
+

Synopsis

+
#include <boost/regex.hpp> 
+

The algorithm regex _match determines whether a given regular expression + matches a given sequence denoted by a pair of bidirectional-iterators, the + algorithm is defined as follows, note that the result is true + only if the expression matches the whole of the input sequence, + the main use of this function is data input validation. +

template <class BidirectionalIterator, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
+                 match_results<BidirectionalIterator, Allocator>& m,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+                 
+template <class BidirectionalIterator, class charT, class traits, class Allocator2>
+bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+                 
+template <class charT, class Allocator, class traits, class Allocator2>
+bool regex_match(const charT* str, match_results<const charT*, Allocator>& m,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+                 
+template <class ST, class SA, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_match(const basic_string<charT, ST, SA>& s,
+                 match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m, 
+                 const basic_regex<charT, traits, Allocator2>& e, 
+                 match_flag_type flags = match_default);
+                 
+template <class charT, class traits, class Allocator2>
+bool regex_match(const charT* str,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+                 
+template <class ST, class SA, class charT, class traits, class Allocator2>
+bool regex_match(const basic_string<charT, ST, SA>& s,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+
+

Description

+
template <class BidirectionalIterator, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
+                 match_results<BidirectionalIterator, Allocator>& m,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Requires: Type BidirectionalIterator meets the requirements of a + Bidirectional Iterator (24.1.4).

+ +

+ Effects: Determines whether there is an exact match between the regular + expression e, and all of the character sequence [first, last), parameter + flags is used to control how the expression + is matched against the character sequence. Returns true if such a match + exists, false otherwise.

+

+ Postconditions: If the function returns false, then the effect on + parameter m is undefined, otherwise the effects on parameter m are + given in the table:

+

+

+

+
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

m.size()

+
+

e.mark_count()

+
+

m.empty()

+
+

false

+
+

m.prefix().first

+
+

first

+
+

m.prefix().last

+
+

first

+
+

m.prefix().matched

+
+

false

+
+

m.suffix().first

+
+

last

+
+

m.suffix().last

+
+

last

+
+

m.suffix().matched

+
+

false

+
+

m[0].first

+
+

first

+
+

m[0].second

+
+

last

+
+

m[0].matched

+
+

+ true if a full match was found, and false if it was a + partial match (found as a result of the match_partial flag being + set).

+

m[n].first

+
+

For all integers n < m.size(), the start of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

m[n].second

+
+

For all integers n < m.size(), the end of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

m[n].matched

+
+

For all integers n < m.size(), true if sub-expression n participated + in the match, false otherwise.

+

+
+

+
template <class BidirectionalIterator, class charT, class traits, class Allocator2>
+bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Effects: Behaves "as if" by constructing an instance of + match_results<BidirectionalIterator> what, + and then returning the result of regex_match(first, last, what, e, flags).

template <class charT, class Allocator, class traits, class Allocator2>
+bool regex_match(const charT* str, match_results<const charT*, Allocator>& m,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_match(str, str + + char_traits<charT>::length(str), m, e, flags).

template <class ST, class SA, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_match(const basic_string<charT, ST, SA>& s,
+                 match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m, 
+                 const basic_regex<charT, traits, Allocator2>& e, 
+                 match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_match(s.begin(), s.end(), m, e, + flags).

template <class charT, class traits, class Allocator2>
+bool regex_match(const charT* str,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_match(str, str + + char_traits<charT>::length(str), e, flags).

template <class ST, class SA, class charT, class traits, class Allocator2>
+bool regex_match(const basic_string<charT, ST, SA>& s,
+                 const basic_regex<charT, traits, Allocator2>& e,
+                 match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_match(s.begin(), s.end(), e, + flags). +

Examples

+

The following example + processes an ftp response: +

+
#include <stdlib.h> 
+#include <boost/regex.hpp> 
+#include <string> 
+#include <iostream> 
+
+using namespace boost; 
+
+regex expression("([0-9]+)(\\-| |$)(.*)"); 
+
+// process_ftp: 
+// on success returns the ftp response code, and fills 
+// msg with the ftp response message. 
+int process_ftp(const char* response, std::string* msg) 
+{ 
+   cmatch what; 
+   if(regex_match(response, what, expression)) 
+   { 
+      // what[0] contains the whole string 
+      // what[1] contains the response code 
+      // what[2] contains the separator character 
+      // what[3] contains the text message. 
+      if(msg) 
+         msg->assign(what[3].first, what[3].second); 
+      return std::atoi(what[1].first); 
+   } 
+   // failure did not match 
+   if(msg) 
+      msg->erase(); 
+   return -1; 
+}
+      

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_merge.html b/doc/regex_merge.html new file mode 100644 index 00000000..df7cb75e --- /dev/null +++ b/doc/regex_merge.html @@ -0,0 +1,46 @@ + + + + Boost.Regex: Algorithm regex_merge (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_merge (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

Algorithm regex_merge has been renamed regex_replace, + existing code will continue to compile, but newcode should use + regex_replace instead.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_replace.html b/doc/regex_replace.html new file mode 100644 index 00000000..aed7b8fa --- /dev/null +++ b/doc/regex_replace.html @@ -0,0 +1,208 @@ + + + + Boost.Regex: Algorithm regex_replace + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_replace

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+ Examples
+

Synopsis

+
#include <boost/regex.hpp> 
+

The algorithm regex_replace searches through a string finding + all the matches to the regular expression: for each match it then calls + match_results::format to format the string and sends the result to the + output iterator. Sections of text that do not match are copied to the output + unchanged only if the flags parameter does not have the flag + format_no_copy set. If the flag format_first_only + is set then only the first occurance is replaced rather than all + occurrences. 

template <class OutputIterator, class BidirectionalIterator, class traits,
+          class Allocator, class charT>
+OutputIterator regex_replace(OutputIterator out,
+                           BidirectionalIterator first,
+                           BidirectionalIterator last,
+                           const basic_regex<charT, traits, Allocator>& e,
+                           const basic_string<charT>& fmt,
+                           match_flag_type flags = match_default);
+
+template <class traits, class Allocator, class charT>
+basic_string<charT> regex_replace(const basic_string<charT>& s,
+                            const basic_regex<charT, traits, Allocator>& e,
+                            const basic_string<charT>& fmt,
+                            match_flag_type flags = match_default);
+
+
+

Description

+
template <class OutputIterator, class BidirectionalIterator, class traits,
+          class Allocator, class charT>
+OutputIterator regex_replace(OutputIterator out,
+                           BidirectionalIterator first,
+                           BidirectionalIterator last,
+                           const basic_regex<charT, traits, Allocator>& e,
+                           const basic_string<charT>& fmt,
+                           match_flag_type flags = match_default);
+ +

+ Effects: Finds all the non-overlapping matches m of type match_results<BidirectionalIterator> + that occur within the sequence [first, last). If no such matches are + found and !(flags & format_no_copy) then calls std::copy(first, + last, out). Otherwise, for each match found, if !(flags & + format_no_copy) calls std::copy(m.prefix().first, m.prefix().last, + out), and then calls m.format(out, fmt, flags). Finally + if !(flags & format_no_copy) calls std::copy(last_m.suffix().first, + last_m,suffix().last, out) where last_m is a copy of the + last match found. If flags & format_first_only is non-zero + then only the first match found is replaced.

+ +

+ Returns: out.

template <class traits, class Allocator, class charT>
+basic_string<charT> regex_replace(const basic_string<charT>& s,
+                            const basic_regex<charT, traits, Allocator>& e,
+                            const basic_string<charT>& fmt,
+                            match_flag_type flags = match_default);
+ +

+ Effects: Constructs an object basic_string<charT> result, + calls regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, + flags), and then returns result. +

Examples

+

The following example takes + C/C++ source code as input, and outputs syntax highlighted HTML code.

+

+
#include <fstream>
+#include <sstream>
+#include <string>
+#include <iterator>
+#include <boost/regex.hpp>
+#include <fstream>
+#include <iostream>
+
+// purpose:
+// takes the contents of a file and transform to
+// syntax highlighted code in html format
+
+boost::regex e1, e2;
+extern const char* expression_text;
+extern const char* format_string;
+extern const char* pre_expression;
+extern const char* pre_format;
+extern const char* header_text;
+extern const char* footer_text;
+
+void load_file(std::string& s, std::istream& is)
+{
+   s.erase();
+   s.reserve(is.rdbuf()->in_avail());
+   char c;
+   while(is.get(c))
+   {
+      if(s.capacity() == s.size())
+         s.reserve(s.capacity() * 3);
+      s.append(1, c);
+   }
+}
+
+int main(int argc, const char** argv)
+{
+   try{
+   e1.assign(expression_text);
+   e2.assign(pre_expression);
+   for(int i = 1; i < argc; ++i)
+   {
+      std::cout << "Processing file " << argv[i] << std::endl;
+      std::ifstream fs(argv[i]);
+      std::string in;
+      load_file(in, fs);
+      std::string out_name(std::string(argv[i]) + std::string(".htm"));
+      std::ofstream os(out_name.c_str());
+      os << header_text;
+      // strip '<' and '>' first by outputting to a
+      // temporary string stream
+      std::ostringstream t(std::ios::out | std::ios::binary);
+      std::ostream_iterator<char, char> oi(t);
+      boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format);
+      // then output to final output stream
+      // adding syntax highlighting:
+      std::string s(t.str());
+      std::ostream_iterator<char, char> out(os);
+      boost::regex_replace(out, s.begin(), s.end(), e1, format_string);
+      os << footer_text;
+   }
+   }
+   catch(...)
+   { return -1; }
+   return 0;
+}
+
+extern const char* pre_expression = "(<)|(>)|\\r";
+extern const char* pre_format = "(?1<)(?2>)";
+
+
+const char* expression_text = // preprocessor directives: index 1
+                              "(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
+                              // comment: index 2
+                              "(//[^\\n]*|/\\*.*?\\*/)|"
+                              // literals: index 3
+                              "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
+                              // string literals: index 4
+                              "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
+                              // keywords: index 5
+                              "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
+                              "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
+                              "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
+                              "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
+                              "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
+                              "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
+                              "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
+                              "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
+                              "|using|virtual|void|volatile|wchar_t|while)\\>"
+                              ;
+
+const char* format_string = "(?1<font color=\"#008040\">$&</font>)"
+                            "(?2<I><font color=\"#000080\">$&</font></I>)"
+                            "(?3<font color=\"#0000A0\">$&</font>)"
+                            "(?4<font color=\"#0000FF\">$&</font>)"
+                            "(?5<B>$&</B>)";
+
+const char* header_text = "<HTML>\n<HEAD>\n"
+                          "<TITLE>Auto-generated html formated source</TITLE>\n"
+                          "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
+                          "</HEAD>\n"
+                          "<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
+                          "<P> </P>\n<PRE>";
+
+const char* footer_text = "</PRE>\n</BODY>\n\n";
+      
+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_search.html b/doc/regex_search.html new file mode 100644 index 00000000..84c7ae3c --- /dev/null +++ b/doc/regex_search.html @@ -0,0 +1,332 @@ + + + + Boost.Regex: Algorithm regex_search + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_search

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+ Examples
+

Synopsis

+
#include <boost/regex.hpp> 
+

+

The algorithm regex_search will search a range denoted by a pair of + bidirectional-iterators for a given regular expression. The algorithm uses + various heuristics to reduce the search time by only checking for a match if a + match could conceivably start at that position. The algorithm is defined as + follows: +

template <class BidirectionalIterator, 
+          class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
+                  match_results<BidirectionalIterator, Allocator>& m,
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+                  
+template <class ST, class SA, 
+          class Allocator, class charT,
+          class traits, class Allocator2> 
+bool regex_search(const basic_string<charT, ST, SA>& s, 
+                  match_results<
+                      typename basic_string<charT, ST,SA>::const_iterator, 
+                      Allocator>& m, 
+                  const basic_regex<charT, traits, Allocator2>& e, 
+                  match_flag_type flags = match_default); 
+          
+template<class charT, class Allocator, class traits, 
+         class Allocator2> 
+bool regex_search(const charT* str, 
+                  match_results<const charT*, Allocator>& m, 
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+                  
+template <class BidirectionalIterator, class Allocator,
+          class charT, class traits>                
+bool regex_search(BidirectionalIterator first, BidirectionalIterator last, 
+                  const basic_regex<charT, traits, Allocator>& e, 
+                  match_flag_type flags = match_default); 
+                  
+template <class charT, class Allocator, 
+          class traits> 
+bool regex_search(const charT* str, 
+                  const basic_regex<charT, traits, Allocator>& e, 
+                  match_flag_type flags = match_default); 
+                  
+template<class ST, class SA,
+         class Allocator, class charT, 
+         class traits>
+bool regex_search(const basic_string<charT, ST, SA>& s,
+                  const basic_regex<charT, traits, Allocator>& e,
+                  match_flag_type flags = match_default);
+
+

Description

+
template <class BidirectionalIterator, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
+                  match_results<BidirectionalIterator, Allocator>& m,
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+ +

+ Requires: Type BidirectionalIterator meets the requirements of a + Bidirectional Iterator (24.1.4).

+ +

+ Effects: Determines whether there is some sub-sequence within + [first,last) that matches the regular expression e, parameter flags + is used to control how the expression is matched against the character + sequence. Returns true if such a sequence exists, false otherwise.

+

+ Postconditions: If the function returns false, then the effect on + parameter m is undefined, otherwise the effects on parameter m are + given in the table:

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element

+

+ Value

+

m.size()

+
+

e.mark_count()

+
+

m.empty()

+
+

false

+
+

m.prefix().first

+
+

first

+
+

m.prefix().last

+
+

m[0].first

+
+

m.prefix().matched

+
+

m.prefix().first != m.prefix().second

+
+

m.suffix().first

+
+

m[0].second

+
+

m.suffix().last

+
+

last

+
+

m.suffix().matched

+
+

m.suffix().first != m.suffix().second

+
+

m[0].first

+
+

The start of the sequence of characters that matched the regular expression

+
+

m[0].second

+
+

The end of the sequence of characters that matched the regular expression

+
+

m[0].matched

+
+

+ true if a full match was found, and false if it was a + partial match (found as a result of the match_partial flag being + set).

+

m[n].first

+
+

For all integers n < m.size(), the start of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

m[n].second

+
+

For all integers n < m.size(), the end of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

m[n].matched

+
+

For all integers n < m.size(), true if sub-expression n participated + in the match, false otherwise.

+

+
+
+
+
template <class charT, class Allocator, class traits, class Allocator2>
+bool regex_search(const charT* str, match_results<const charT*, Allocator>& m,
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_search(str, str + + char_traits<charT>::length(str), m, e, flags).

template <class ST, class SA, class Allocator, class charT,
+          class traits, class Allocator2>
+bool regex_search(const basic_string<charT, ST, SA>& s,
+                  match_results<typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
+                  const basic_regex<charT, traits, Allocator2>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_search(s.begin(), s.end(), m, e, + flags).

template <class iterator, class Allocator, class charT,
+          class traits>
+bool regex_search(iterator first, iterator last,
+                  const basic_regex<charT, traits, Allocator>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Behaves "as if" by constructing an instance of + match_results<BidirectionalIterator> what, + and then returning the result of regex_search(first, last, what, e, flags).

template <class charT, class Allocator, class traits>
+bool regex_search(const charT* str
+                  const basic_regex<charT, traits, Allocator>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_search(str, str + + char_traits<charT>::length(str), e, flags).

template <class ST, class SA, class Allocator, class charT,
+          class traits>
+bool regex_search(const basic_string<charT, ST, SA>& s,
+                  const basic_regex<charT, traits, Allocator>& e,
+                  match_flag_type flags = match_default);
+ +

+ Effects: Returns the result of regex_search(s.begin(), s.end(), e, + flags). +

Examples

+

The following example, + takes the contents of a file in the form of a string, and searches for all the + C++ class declarations in the file. The code will work regardless of the way + that std::string is implemented, for example it could easily be modified to + work with the SGI rope class, which uses a non-contiguous storage strategy.

+

+
#include <string> 
+#include <map> 
+#include <boost/regex.hpp> 
+
+// purpose: 
+// takes the contents of a file in the form of a string 
+// and searches for all the C++ class definitions, storing 
+// their locations in a map of strings/int's 
+typedef std::map<std::string, int, std::less<std::string> > map_type; 
+
+boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"); 
+
+void IndexClasses(map_type& m, const std::string& file) 
+{ 
+   std::string::const_iterator start, end; 
+   start = file.begin(); 
+   end = file.end(); 
+      boost::match_results<std::string::const_iterator> what; 
+   unsigned int flags = boost::match_default; 
+   while(regex_search(start, end, what, expression, flags)) 
+   { 
+      // what[0] contains the whole string 
+      // what[5] contains the class name. 
+      // what[6] contains the template specialisation if any. 
+      // add class name and position to map: 
+      m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
+                what[5].first - file.begin(); 
+      // update search position: 
+      start = what[0].second; 
+      // update flags: 
+      flags |= boost::match_prev_avail; 
+      flags |= boost::match_not_bob; 
+   } 
+}
+      
+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_split.html b/doc/regex_split.html new file mode 100644 index 00000000..5f8f45b0 --- /dev/null +++ b/doc/regex_split.html @@ -0,0 +1,143 @@ + + + + Boost.Regex: Algorithm regex_split (deprecated) + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Algorithm regex_split (deprecated)

+
+

Boost.Regex Index

+
+

+
+

+

The algorithm regex_split has been deprecated in favour of the iterator + regex_token_iterator which has a more flexible and powerful interface, + as well as following the more usual standard library "pull" rather than "push" + semantics.

+

Code which uses regex_split will continue to compile, the following + documentation is taken from the previous boost.regex version:

+

Algorithm regex_split

+
#include <boost/regex.hpp> 
+

Algorithm regex_split performs a similar operation to the perl split operation, + and comes in three overloaded forms: +

+
template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2, class Alloc2>
+std::size_t regex_split(OutputIterator out, 
+                        std::basic_string<charT, Traits1, Alloc1>& s, 
+                        const basic_regex<charT, Traits2, Alloc2>& e,
+                        unsigned flags,
+                        std::size_t max_split);
+
+template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2, class Alloc2>
+std::size_t regex_split(OutputIterator out, 
+                        std::basic_string<charT, Traits1, Alloc1>& s, 
+                        const basic_regex<charT, Traits2, Alloc2>& e,
+                        unsigned flags = match_default);
+
+template <class OutputIterator, class charT, class Traits1, class Alloc1>
+std::size_t regex_split(OutputIterator out, 
+                        std::basic_string<charT, Traits1, Alloc1>& s);
+

Each version takes an output-iterator for output, and a string for input. If + the expression contains no marked sub-expressions, then the algorithm writes + one string onto the output-iterator for each section of input that does not + match the expression. If the expression does contain marked sub-expressions, + then each time a match is found, one string for each marked sub-expression will + be written to the output-iterator. No more than max_split strings will + be written to the output-iterator. Before returning, all the input processed + will be deleted from the string s (if max_split is not reached + then all of s will be deleted). Returns the number of strings written to + the output-iterator. If the parameter max_split is not specified then it + defaults to UINT_MAX. If no expression is specified, then it defaults to "\s+", + and splitting occurs on whitespace. +

+

Example: the following + function will split the input string into a series of tokens, and remove each + token from the string s: +

+
unsigned tokenise(std::list<std::string>& l, std::string& s)
+{
+   return boost::regex_split(std::back_inserter(l), s);
+}
+

Example: the following + short program will extract all of the URL's from a html file, and print them + out to cout: +

+
#include <list>
+#include <fstream>
+#include <iostream>
+#include <boost/regex.hpp>
+
+boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
+               boost::regbase::normal | boost::regbase::icase);
+
+void load_file(std::string& s, std::istream& is)
+{
+   s.erase();
+   //
+   // attempt to grow string buffer to match file size,
+   // this doesn't always work...
+   s.reserve(is.rdbuf()-&gtin_avail());
+   char c;
+   while(is.get(c))
+   {
+      // use logarithmic growth stategy, in case
+      // in_avail (above) returned zero:
+      if(s.capacity() == s.size())
+         s.reserve(s.capacity() * 3);
+      s.append(1, c);
+   }
+}
+
+
+int main(int argc, char** argv)
+{
+   std::string s;
+   std::list<std::string> l;
+
+   for(int i = 1; i < argc; ++i)
+   {
+      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
+      s.erase();
+      std::ifstream is(argv[i]);
+      load_file(s, is);
+      boost::regex_split(std::back_inserter(l), s, e);
+      while(l.size())
+      {
+         s = *(l.begin());
+         l.pop_front();
+         std::cout << s << std::endl;
+      }
+   }
+   return 0;
+}
+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_traits.html b/doc/regex_traits.html new file mode 100644 index 00000000..74b31383 --- /dev/null +++ b/doc/regex_traits.html @@ -0,0 +1,47 @@ + + + + Boost.Regex: class regex_traits + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

class regex_traits

+
+

Boost.Regex Index

+
+

+
+

+

Under construction.

+

The current boost.regex traits class design will be migrated to that specified + in the regular + expression standardization proposal

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/sub_match.html b/doc/sub_match.html new file mode 100644 index 00000000..09849bfa --- /dev/null +++ b/doc/sub_match.html @@ -0,0 +1,427 @@ + + + + Boost.Regex: sub_match + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

sub_match

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+

#include <boost/regex.hpp> +

+

Regular expressions are different from many simple pattern-matching algorithms + in that as well as finding an overall match they can also produce + sub-expression matches: each sub-expression being delimited in the pattern by a + pair of parenthesis (...). There has to be some method for reporting + sub-expression matches back to the user: this is achieved this by defining a + class match_results that acts as an + indexed collection of sub-expression matches, each sub-expression match being + contained in an object of type sub_match + . +

Objects of type sub_match may only obtained by subscripting an object + of type match_results + . +

When the marked sub-expression denoted by an object of type sub_match<> + participated in a regular expression match then member matched evaluates + to true, and members first and second denote the + range of characters [first,second) which formed that match. + Otherwise matched is false, and members first and second + contained undefined values.

+

If an object of type sub_match<> represents sub-expression 0 + - that is to say the whole match - then member matched is always + true, unless a partial match was obtained as a result of the flag match_partial + being passed to a regular expression algorithm, in which case member matched + is false, and members first and second represent the + character range that formed the partial match.

+
+namespace boost{
+      
+template <class BidirectionalIterator>
+class sub_match : public std::pair<BidirectionalIterator, BidirectionalIterator>
+{
+public:
+   typedef typename iterator_traits<BidirectionalIterator>::value_type       value_type;
+   typedef typename iterator_traits<BidirectionalIterator>::difference_type  difference_type;
+   typedef          BidirectionalIterator                                    iterator;
+
+   bool matched;
+
+   difference_type length()const;
+   operator basic_string<value_type>()const;
+   basic_string<value_type> str()const;
+
+   int compare(const sub_match& s)const;
+   int compare(const basic_string<value_type>& s)const;
+   int compare(const value_type* s)const;
+};
+
+template <class BidirectionalIterator>
+bool operator == (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator != (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator < (const sub_match<BidirectionalIterator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator <= (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator >= (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator>
+bool operator > (const sub_match<BidirectionalIterator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+
+
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator == (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator != (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator < (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator > (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator >= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator <= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs,
+                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs,
+                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs,
+                 const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs,
+                 const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs,
+                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+template <class BidirectionalIterator, class traits, class Allocator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs,
+                  const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
+
+template <class BidirectionalIterator> 
+bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+
+template <class BidirectionalIterator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+template <class BidirectionalIterator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+
+template <class BidirectionalIterator> 
+bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+template <class BidirectionalIterator> 
+bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+
+template <class BidirectionalIterator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+template <class BidirectionalIterator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+
+template <class charT, class traits, class BidirectionalIterator>
+basic_ostream<charT, traits>&
+   operator << (basic_ostream<charT, traits>& os,
+                const sub_match<BidirectionalIterator>& m);
+
+} // namespace boost
+

Description

+

+ sub_match members

+
typedef typename std::iterator_traits<iterator>::value_type value_type;
+

The type pointed to by the iterators.

+
typedef typename std::iterator_traits<iterator>::difference_type difference_type;
+

A type that represents the difference between two iterators.

+
typedef iterator iterator_type;
+

The iterator type.

+
iterator first
+

An iterator denoting the position of the start of the match.

+
iterator second
+

An iterator denoting the position of the end of the match.

+
bool matched
+

A Boolean value denoting whether this sub-expression participated in the match.

+
static difference_type length();
+ +

+ Effects: returns (matched ? 0 : distance(first, second)).

operator basic_string<value_type>()const;
+ +

+ Effects: returns (matched ? basic_string<value_type>(first, + second) : basic_string<value_type>()).

basic_string<value_type> str()const;
+ +

+ Effects: returns (matched ? basic_string<value_type>(first, + second) : basic_string<value_type>()).

int compare(const sub_match& s)const;
+ +

+ Effects: returns str().compare(s.str()).

int compare(const basic_string<value_type>& s)const;
+ +

+ Effects: returns str().compare(s).

int compare(const value_type* s)const;
+ +

+ Effects: returns str().compare(s).

+

+ sub_match non-member operators

+
template <class BidirectionalIterator>
+bool operator == (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) == 0.

template <class BidirectionalIterator>
+bool operator != (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) != 0.

template <class BidirectionalIterator>
+bool operator < (const sub_match<BidirectionalIterator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) < 0.

template <class BidirectionalIterator>
+bool operator <= (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) <= 0.

template <class BidirectionalIterator>
+bool operator >= (const sub_match<BidirectionalIterator>& lhs,
+                  const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) >= 0.

template <class BidirectionalIterator>
+bool operator > (const sub_match<BidirectionalIterator>& lhs,
+                 const sub_match<BidirectionalIterator>& rhs);
+ +

+ Effects: returns lhs.compare(rhs) > 0.

template <class BidirectionalIterator> 
+bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs == rhs.str().

template <class BidirectionalIterator> 
+bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs != rhs.str().

template <class BidirectionalIterator> 
+bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs < rhs.str().

template <class BidirectionalIterator> 
+bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs > rhs.str().

template <class BidirectionalIterator> 
+bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs >= rhs.str().

template <class BidirectionalIterator> 
+bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs <= rhs.str().

template <class BidirectionalIterator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() == rhs.

template <class BidirectionalIterator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() != rhs.

template <class BidirectionalIterator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() < rhs.

template <class BidirectionalIterator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() > rhs.

template <class BidirectionalIterator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() >= rhs.

template <class BidirectionalIterator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const* rhs); 
+ +

+ Effects: returns lhs.str() <= rhs.

template <class BidirectionalIterator> 
+bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs == rhs.str().

template <class BidirectionalIterator> 
+bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs != rhs.str().

template <class BidirectionalIterator> 
+bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs < rhs.str().

template <class BidirectionalIterator> 
+bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                 const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs > rhs.str().

template <class BidirectionalIterator> 
+bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs >= rhs.str().

template <class BidirectionalIterator> 
+bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs, 
+                  const sub_match<BidirectionalIterator>& rhs); 
+ +

+ Effects: returns lhs <= rhs.str().

template <class BidirectionalIterator> 
+bool operator == (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() == rhs.

template <class BidirectionalIterator> 
+bool operator != (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() != rhs.

template <class BidirectionalIterator> 
+bool operator < (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() < rhs.

template <class BidirectionalIterator> 
+bool operator > (const sub_match<BidirectionalIterator>& lhs, 
+                 typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() > rhs.

template <class BidirectionalIterator> 
+bool operator >= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() >= rhs.

template <class BidirectionalIterator> 
+bool operator <= (const sub_match<BidirectionalIterator>& lhs, 
+                  typename iterator_traits<BidirectionalIterator>::value_type const& rhs); 
+ +

+ Effects: returns lhs.str() <= rhs.

template <class charT, class traits, class BidirectionalIterator>
+basic_ostream<charT, traits>&
+   operator << (basic_ostream<charT, traits>& os
+                const sub_match<BidirectionalIterator>& m);
+ +

+ Effects: returns (os << m.str()). +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/syntax.html b/doc/syntax.html new file mode 100644 index 00000000..e03e0fe6 --- /dev/null +++ b/doc/syntax.html @@ -0,0 +1,783 @@ + + + + Boost.Regex: Regular Expression Syntax + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Regular Expression Syntax

+
+

Boost.Regex Index

+
+

+
+

+

This section covers the regular expression syntax used by this library, this is + a programmers guide, the actual syntax presented to your program's users will + depend upon the flags used during expression compilation. +

+

Literals +

+

All characters are literals except: ".", "|", "*", "?", "+", "(", ")", "{", + "}", "[", "]", "^", "$" and "\". These characters are literals when preceded by + a "\". A literal is a character that matches itself, or matches the result of + traits_type::translate(), where traits_type is the traits template parameter to + class reg_expression.

+

Wildcard +

+

The dot character "." matches any single character except : when match_not_dot_null + is passed to the matching algorithms, the dot does not match a null character; + when match_not_dot_newline is passed to the matching algorithms, then + the dot does not match a newline character. +

+

Repeats +

+

A repeat is an expression that is repeated an arbitrary number of times. An + expression followed by "*" can be repeated any number of times including zero. + An expression followed by "+" can be repeated any number of times, but at least + once, if the expression is compiled with the flag regex_constants::bk_plus_qm + then "+" is an ordinary character and "\+" represents a repeat of once or more. + An expression followed by "?" may be repeated zero or one times only, if the + expression is compiled with the flag regex_constants::bk_plus_qm then "?" is an + ordinary character and "\?" represents the repeat zero or once operator. When + it is necessary to specify the minimum and maximum number of repeats + explicitly, the bounds operator "{}" may be used, thus "a{2}" is the letter "a" + repeated exactly twice, "a{2,4}" represents the letter "a" repeated between 2 + and 4 times, and "a{2,}" represents the letter "a" repeated at least twice with + no upper limit. Note that there must be no white-space inside the {}, and there + is no upper limit on the values of the lower and upper bounds. When the + expression is compiled with the flag regex_constants::bk_braces then "{" and + "}" are ordinary characters and "\{" and "\}" are used to delimit bounds + instead. All repeat expressions refer to the shortest possible previous + sub-expression: a single character; a character set, or a sub-expression + grouped with "()" for example. +

+

Examples: +

+

"ba*" will match all of "b", "ba", "baaa" etc. +

+

"ba+" will match "ba" or "baaaa" for example but not "b". +

+

"ba?" will match "b" or "ba". +

+

"ba{2,4}" will match "baa", "baaa" and "baaaa". +

+

Non-greedy repeats +

+

Whenever the "extended" regular expression syntax is in use (the default) then + non-greedy repeats are possible by appending a '?' after the repeat; a + non-greedy repeat is one which will match the shortest possible string. +

+

For example to match html tag pairs one could use something like: +

+

"<\s*tagname[^>]*>(.*?)<\s*/tagname\s*>" +

+

In this case $1 will contain the text between the tag pairs, and will be the + shortest possible matching string.  +

+

Parenthesis +

+

Parentheses serve two purposes, to group items together into a sub-expression, + and to mark what generated the match. For example the expression "(ab)*" would + match all of the string "ababab". The matching algorithms + regex_match and regex_search + each take an instance of match_results + that reports what caused the match, on exit from these functions the + match_results contains information both on what the whole expression + matched and on what each sub-expression matched. In the example above + match_results[1] would contain a pair of iterators denoting the final "ab" of + the matching string. It is permissible for sub-expressions to match null + strings. If a sub-expression takes no part in a match - for example if it is + part of an alternative that is not taken - then both of the iterators that are + returned for that sub-expression point to the end of the input string, and the matched + parameter for that sub-expression is false. Sub-expressions are indexed + from left to right starting from 1, sub-expression 0 is the whole expression. +

+

Non-Marking Parenthesis +

+

Sometimes you need to group sub-expressions with parenthesis, but don't want + the parenthesis to spit out another marked sub-expression, in this case a + non-marking parenthesis (?:expression) can be used. For example the following + expression creates no sub-expressions: +

+

"(?:abc)*"

+

Forward Lookahead Asserts  +

+

There are two forms of these; one for positive forward lookahead asserts, and + one for negative lookahead asserts:

+

"(?=abc)" matches zero characters only if they are followed by the expression + "abc".

+

"(?!abc)" matches zero characters only if they are not followed by the + expression "abc".

+

Independent sub-expressions

+

"(?>expression)" matches "expression" as an independent atom (the algorithm + will not backtrack into it if a failure occures later in the expression).

+

Alternatives +

+

Alternatives occur when the expression can match either one sub-expression or + another, each alternative is separated by a "|", or a "\|" if the flag + regex_constants::bk_vbar is set, or by a newline character if the flag + regex_constants::newline_alt is set. Each alternative is the largest possible + previous sub-expression; this is the opposite behaviour from repetition + operators. +

+

Examples: +

+

"a(b|c)" could match "ab" or "ac". +

+

"abc|def" could match "abc" or "def". +

+

Sets +

+

A set is a set of characters that can match any single character that is a + member of the set. Sets are delimited by "[" and "]" and can contain literals, + character ranges, character classes, collating elements and equivalence + classes. Set declarations that start with "^" contain the compliment of the + elements that follow. +

+

Examples: +

+

Character literals: +

+

"[abc]" will match either of "a", "b", or "c". +

+

"[^abc] will match any character other than "a", "b", or "c". +

+

Character ranges: +

+

"[a-z]" will match any character in the range "a" to "z". +

+

"[^A-Z]" will match any character other than those in the range "A" to "Z". +

+

Note that character ranges are highly locale dependent if the flag + regex_constants::collate is set: they match any character that collates between + the endpoints of the range, ranges will only behave according to ASCII rules + when the default "C" locale is in effect. For example if the library is + compiled with the Win32 localization model, then [a-z] will match the ASCII + characters a-z, and also 'A', 'B' etc, but not 'Z' which collates just after + 'z'. This locale specific behaviour is disabled by default (in perl mode), and + forces ranges to collate according to ASCII character code. +

+

Character classes are denoted using the syntax "[:classname:]" within a set + declaration, for example "[[:space:]]" is the set of all whitespace characters. + Character classes are only available if the flag regex_constants::char_classes + is set. The available character classes are: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 alnumAny alpha numeric character. 
 alphaAny alphabetical character a-z and A-Z. Other + characters may also be included depending upon the locale. 
 blankAny blank character, either a space or a tab. 
 cntrlAny control character. 
 digitAny digit 0-9. 
 graphAny graphical character. 
 lowerAny lower case character a-z. Other characters may + also be included depending upon the locale. 
 printAny printable character. 
 punctAny punctuation character. 
 spaceAny whitespace character. 
 upperAny upper case character A-Z. Other characters may + also be included depending upon the locale. 
 xdigitAny hexadecimal digit character, 0-9, a-f and A-F. 
 wordAny word character - all alphanumeric characters plus + the underscore. 
 unicodeAny character whose code is greater than 255, this + applies to the wide character traits classes only. 
+

+

There are some shortcuts that can be used in place of the character classes, + provided the flag regex_constants::escape_in_lists is set then you can use: +

+

\w in place of [:word:] +

+

\s in place of [:space:] +

+

\d in place of [:digit:] +

+

\l in place of [:lower:] +

+

\u in place of [:upper:]  +

+

Collating elements take the general form [.tagname.] inside a set declaration, + where tagname is either a single character, or a name of a collating + element, for example [[.a.]] is equivalent to [a], and [[.comma.]] is + equivalent to [,]. The library supports all the standard POSIX collating + element names, and in addition the following digraphs: "ae", "ch", "ll", "ss", + "nj", "dz", "lj", each in lower, upper and title case variations. + Multi-character collating elements can result in the set matching more than one + character, for example [[.ae.]] would match two characters, but note that + [^[.ae.]] would only match one character.  +

+

+ Equivalence classes take the general form[=tagname=] inside a set declaration, + where tagname is either a single character, or a name of a collating + element, and matches any character that is a member of the same primary + equivalence class as the collating element [.tagname.]. An equivalence class is + a set of characters that collate the same, a primary equivalence class is a set + of characters whose primary sort key are all the same (for example strings are + typically collated by character, then by accent, and then by case; the primary + sort key then relates to the character, the secondary to the accentation, and + the tertiary to the case). If there is no equivalence class corresponding to tagname + , then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no + locale independent method of obtaining the primary sort key for a character, + except under Win32. For other operating systems the library will "guess" the + primary sort key from the full sort key (obtained from strxfrm), so + equivalence classes are probably best considered broken under any operating + system other than Win32.  +

+

To include a literal "-" in a set declaration then: make it the first character + after the opening "[" or "[^", the endpoint of a range, a collating element, or + if the flag regex_constants::escape_in_lists is set then precede with an escape + character as in "[\-]". To include a literal "[" or "]" or "^" in a set then + make them the endpoint of a range, a collating element, or precede with an + escape character if the flag regex_constants::escape_in_lists is set. +

+

Line anchors +

+

An anchor is something that matches the null string at the start or end of a + line: "^" matches the null string at the start of a line, "$" matches the null + string at the end of a line. +

+

Back references +

+

A back reference is a reference to a previous sub-expression that has already + been matched, the reference is to what the sub-expression matched, not to the + expression itself. A back reference consists of the escape character "\" + followed by a digit "1" to "9", "\1" refers to the first sub-expression, "\2" + to the second etc. For example the expression "(.*)\1" matches any string that + is repeated about its mid-point for example "abcabc" or "xyzxyz". A back + reference to a sub-expression that did not participate in any match, matches + the null string: NB this is different to some other regular expression + matchers. Back references are only available if the expression is compiled with + the flag regex_constants::bk_refs set. +

+

Characters by code +

+

This is an extension to the algorithm that is not available in other libraries, + it consists of the escape character followed by the digit "0" followed by the + octal character code. For example "\023" represents the character whose octal + code is 23. Where ambiguity could occur use parentheses to break the expression + up: "\0103" represents the character whose code is 103, "(\010)3 represents the + character 10 followed by "3". To match characters by their hexadecimal code, + use \x followed by a string of hexadecimal digits, optionally enclosed inside + {}, for example \xf0 or \x{aff}, notice the latter example is a Unicode + character.

+

Word operators +

+

The following operators are provided for compatibility with the GNU regular + expression library. +

+

"\w" matches any single character that is a member of the "word" character + class, this is identical to the expression "[[:word:]]". +

+

"\W" matches any single character that is not a member of the "word" character + class, this is identical to the expression "[^[:word:]]". +

+

"\<" matches the null string at the start of a word. +

+

"\>" matches the null string at the end of the word. +

+

"\b" matches the null string at either the start or the end of a word. +

+

"\B" matches a null string within a word. +

+

The start of the sequence passed to the matching algorithms is considered to be + a potential start of a word unless the flag match_not_bow is set. The end of + the sequence passed to the matching algorithms is considered to be a potential + end of a word unless the flag match_not_eow is set. +

+

Buffer operators +

+

The following operators are provide for compatibility with the GNU regular + expression library, and Perl regular expressions: +

+

"\`" matches the start of a buffer. +

+

"\A" matches the start of the buffer. +

+

"\'" matches the end of a buffer. +

+

"\z" matches the end of a buffer. +

+

"\Z" matches the end of a buffer, or possibly one or more new line characters + followed by the end of the buffer. +

+

A buffer is considered to consist of the whole sequence passed to the matching + algorithms, unless the flags match_not_bob or match_not_eob are set. +

+

Escape operator +

+

The escape character "\" has several meanings. +

+

Inside a set declaration the escape character is a normal character unless the + flag regex_constants::escape_in_lists is set in which case whatever follows the + escape is a literal character regardless of its normal meaning. +

+

The escape operator may introduce an operator for example: back references, or + a word operator. +

+

The escape operator may make the following character normal, for example "\*" + represents a literal "*" rather than the repeat operator. +

+

Single character escape sequences +

+

The following escape sequences are aliases for single characters: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Escape sequence + Character code + Meaning +  
 \a + 0x07 + Bell character. +  
 \f + 0x0C + Form feed. +  
 \n + 0x0A + Newline character. +  
 \r + 0x0D + Carriage return. +  
 \t + 0x09 + Tab character. +  
 \v + 0x0B + Vertical tab. +  
 \e + 0x1B + ASCII Escape character. +  
 \0dd + 0dd + An octal character code, where dd is one or + more octal digits. +  
 \xXX + 0xXX + A hexadecimal character code, where XX is one or more + hexadecimal digits. +  
 \x{XX} + 0xXX + A hexadecimal character code, where XX is one or more + hexadecimal digits, optionally a unicode character. +  
 \cZ + z-@ + An ASCII escape sequence control-Z, where Z is any + ASCII character greater than or equal to the character code for '@'. +  
+

+

Miscellaneous escape sequences: +

+

The following are provided mostly for perl compatibility, but note that there + are some differences in the meanings of \l \L \u and \U: +
+   +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 \w + Equivalent to [[:word:]]. +  
 \W + Equivalent to [^[:word:]]. +  
 \s + Equivalent to [[:space:]]. +  
 \S + Equivalent to [^[:space:]]. +  
 \d + Equivalent to [[:digit:]]. +  
 \D + Equivalent to [^[:digit:]]. +  
 \l + Equivalent to [[:lower:]]. +  
 \L + Equivalent to [^[:lower:]]. +  
 \u + Equivalent to [[:upper:]]. +  
 \U + Equivalent to [^[:upper:]]. +  
 \C + Any single character, equivalent to '.'. +  
 \X + Match any Unicode combining character sequence, for + example "a\x 0301" (a letter a with an acute). +  
 \Q + The begin quote operator, everything that follows is + treated as a literal character until a \E end quote operator is found. +  
 \E + The end quote operator, terminates a sequence begun + with \Q. +  
+

+

What gets matched? +

+

+ When the expression is compiled as a perl-compatible regex then the matching + algorithms will perform a depth first search on the state machine and report + the first match found. +

+ When the expression is compiled as a POSIX-compatible regex then the matching + algorithms will match the first possible matching string, if more than one + string starting at a given location can match then it matches the longest + possible string, unless the flag match_any is set, in which case the first + match encountered is returned. Use of the match_any option can reduce the time + taken to find the match - but is only useful if the user is less concerned + about what matched - for example it would not be suitable for search and + replace operations. In cases where their are multiple possible matches all + starting at the same location, and all of the same length, then the match + chosen is the one with the longest first sub-expression, if that is the same + for two or more matches, then the second sub-expression will be examined and so + on. +

+ The following table examples illustrate the main differences between perl and + POSIX regular expression matching rules: +

+

+

+

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Expression

+
+

Text

+
+

POSIX leftmost longest match

+
+

ECMAScript depth first search match

+
+

+ a|ab +

+
+

+ xaby +

+
+

+ "ab"

+

+ "a"

+

+ .*([[:alnum:]]+).*

+

+ " abc def xyz "

+

$0 = " abc def xyz "
+ $1 = "abc"

+
+

$0 = " abc def xyz "
+ $1 = "z"

+
+

+ .*(a|xayy)

+

+ zzxayyzz

+

+ "zzxayy"

+

"zzxa"

+

+
+
+

+
+

These differences between perl matching rules, and POSIX matching rules, mean + that these two regular expression syntaxes differ not only in the features + offered, but also in the form that the state machine takes and/or the + algorithms used to traverse the state machine. +


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/syntax_option_type.html b/doc/syntax_option_type.html new file mode 100644 index 00000000..ece58de0 --- /dev/null +++ b/doc/syntax_option_type.html @@ -0,0 +1,334 @@ + + + + Boost.Regex: syntax_option_type + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

syntax_option_type

+
+

Boost.Regex Index

+
+

+
+

+

Synopsis

+

Type syntax_option type is an implementation defined bitmask type that controls + how a regular expression string is to be interpreted.  For convenience + note that all the constants listed here, are also duplicated within the scope + of class template basic_regex.

+
namespace std{ namespace regex_constants{
+
+typedef bitmask_type syntax_option_type;
+// these flags are standardized:
+static const syntax_option_type normal;
+static const syntax_option_type icase;
+static const syntax_option_type nosubs;
+static const syntax_option_type optimize;
+static const syntax_option_type collate;
+static const syntax_option_type ECMAScript = normal;
+static const syntax_option_type JavaScript = normal;
+static const syntax_option_type JScript = normal;
+static const syntax_option_type basic;
+static const syntax_option_type extended;
+static const syntax_option_type awk;
+static const syntax_option_type grep;
+static const syntax_option_type egrep;
+static const syntax_option_type sed = basic;
+static const syntax_option_type perl;
// these are boost.regex specific:
static const syntax_option_type escape_in_lists;
static const syntax_option_type char_classes;
static const syntax_option_type intervals;
static const syntax_option_type limited_ops;
static const syntax_option_type newline_alt;
static const syntax_option_type bk_plus_qm;
static const syntax_option_type bk_braces;
static const syntax_option_type bk_parens;
static const syntax_option_type bk_refs;
static const syntax_option_type bk_vbar;
static const syntax_option_type use_except;
static const syntax_option_type failbit;
static const syntax_option_type literal;
static const syntax_option_type nocollate;
static const syntax_option_type perlex;
static const syntax_option_type emacs;
+} // namespace regex_constants +} // namespace std
+

Description

+

The type syntax_option_type is an implementation defined bitmask + type (17.3.2.1.2). Setting its elements has the effects listed in the table + below, a valid value of type syntax_option_type will always have + exactly one of the elements normal, basic, extended, awk, grep, egrep, sed + or perl set.

+

Note that for convenience all the constants listed here are duplicated within + the scope of class template basic_regex, so you can use any of:

+
boost::regex_constants::constant_name
+

or

+
boost::regex::constant_name
+

or

+
boost::wregex::constant_name
+

in an interchangeable manner.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Element

+
+

Effect if set

+
+

normal

+
+

Specifies that the grammar recognized by the regular expression engine uses its + normal semantics: that is the same as that given in the ECMA-262, ECMAScript + Language Specification, Chapter 15 part 10, RegExp (Regular Expression) Objects + (FWD.1).

+

boost.regex also recognises most perl-compatible extensions in this mode.

+
+

icase

+
+

Specifies that matching of regular expressions against a character container + sequence shall be performed without regard to case.

+
+

nosubs

+
+

Specifies that when a regular expression is matched against a character + container sequence, then no sub-expression matches are to be stored in the + supplied match_results structure.

+
+

optimize

+
+

Specifies that the regular expression engine should pay more attention to the + speed with which regular expressions are matched, and less to the speed with + which regular expression objects are constructed. Otherwise it has no + detectable effect on the program output.  This currently has no effect for + boost.regex.

+
+

collate

+
+

Specifies that character ranges of the form "[a-b]" should be locale sensitive.

+
+

ECMAScript

+
+

The same as normal.

+
+

JavaScript

+
+

The same as normal.

+
+

JScript

+
+

The same as normal.

+
+

basic

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX basic regular expressions in IEEE Std 1003.1-2001, + Portable Operating System Interface (POSIX ), Base Definitions and Headers, + Section 9, Regular Expressions (FWD.1). +

+
+

extended

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX extended regular expressions in IEEE Std + 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions and + Headers, Section 9, Regular Expressions (FWD.1).

+
+

awk

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX utility awk in IEEE Std 1003.1-2001, Portable + Operating System Interface (POSIX ), Shells and Utilities, Section 4, awk + (FWD.1).

+

That is to say: the same as POSIX extended syntax, but with escape sequences in + character classes permitted.

+
+

grep

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX utility grep in IEEE Std 1003.1-2001, Portable + Operating System Interface (POSIX ), Shells and Utilities, Section 4, + Utilities, grep (FWD.1).

+

That is to say, the same as POSIX basic syntax, but with the newline character + acting as an alternation character in addition to "|".

+
+

egrep

+
+

Specifies that the grammar recognized by the regular expression engine is the + same as that used by POSIX utility grep when given the -E option in IEEE Std + 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, grep (FWD.1).

+

That is to say, the same as POSIX extended syntax, but with the newline + character acting as an alternation character in addition to "|".

+
+

sed

+
+

The same as basic.

+
+

perl

+
+

The same as normal.

+
+

+

+

The following constants are specific to this particular regular expression + implementation and do not appear in the + regular expression standardization proposal:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
regbase::escape_in_listsAllows the use of the escape "\" character in sets of + characters, for example [\]] represents the set of characters containing only + "]". If this flag is not set then "\" is an ordinary character inside sets.
regbase::char_classesWhen this bit is set, character classes [:classname:] + are allowed inside character set declarations, for example "[[:word:]]" + represents the set of all characters that belong to the character class "word".
regbase:: intervalsWhen this bit is set, repetition intervals are + allowed, for example "a{2,4}" represents a repeat of between 2 and 4 letter + a's.
regbase:: limited_opsWhen this bit is set all of "+", "?" and "|" are + ordinary characters in all situations.
regbase:: newline_altWhen this bit is set, then the newline character "\n" + has the same effect as the alternation operator "|".
regbase:: bk_plus_qmWhen this bit is set then "\+" represents the one or + more repetition operator and "\?" represents the zero or one repetition + operator. When this bit is not set then "+" and "?" are used instead.
regbase:: bk_bracesWhen this bit is set then "\{" and "\}" are used for + bounded repetitions and "{" and "}" are normal characters. This is the opposite + of default behavior.
regbase:: bk_parensWhen this bit is set then "\(" and "\)" are used to + group sub-expressions and "(" and ")" are ordinary characters, this is the + opposite of default behaviour.
regbase:: bk_refsWhen this bit is set then back references are + allowed.
regbase:: bk_vbarWhen this bit is set then "\|" represents the + alternation operator and "|" is an ordinary character. This is the opposite of + default behaviour.
regbase:: use_exceptWhen this bit is set then a bad_expression + exception will be thrown on error.  Use of this flag is deprecated - + reg_expression will always throw on error.
regbase:: failbitThis bit is set on error, if regbase::use_except is + not set, then this bit should be checked to see if a regular expression is + valid before usage.
regbase::literalAll characters in the string are treated as literals, + there are no special characters or escape sequences.
regbase::emacsProvides compatability with the emacs + editor, eqivalent to: bk_braces | bk_parens | bk_refs | bk_vbar.
+

+

+


+

+

Revised + + 11 April 2003 +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/thread_safety.html b/doc/thread_safety.html new file mode 100644 index 00000000..2537c2b2 --- /dev/null +++ b/doc/thread_safety.html @@ -0,0 +1,66 @@ + + + + Boost.Regex: Thread Safety + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Thread Safety

+
+

Boost.Regex Index

+
+

+
+

+

Class basic_regex<> and its typedefs regex and wregex are thread safe, in + that compiled regular expressions can safely be shared between threads. The + matching algorithms regex_match, regex_search, regex_grep, regex_format and + regex_merge are all re-entrant and thread safe. Class match_results is now + thread safe, in that the results of a match can be safely copied from one + thread to another (for example one thread may find matches and push + match_results instances onto a queue, while another thread pops them off the + other end), otherwise use a separate instance of match_results per thread. +

+

The POSIX API functions are all re-entrant and thread safe, regular expressions + compiled with regcomp can also be shared between threads. +

+

The class RegEx is only thread safe if each thread gets its own RegEx instance + (apartment threading) - this is a consequence of RegEx handling both compiling + and matching regular expressions. +

+

Finally note that changing the global locale invalidates all compiled regular + expressions, therefore calling set_locale from one thread while another + uses regular expressions will produce unpredictable results. +

+

+ There is also a requirement that there is only one thread executing prior to + the start of main(). +

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/uarrow.gif b/doc/uarrow.gif new file mode 100644 index 0000000000000000000000000000000000000000..6afd20c3857127c21fc9bcd52ec347e32c21578c GIT binary patch literal 1666 zcmZ?wbhEHb)Me0S_|CwP5S3S3-SznXe?>*j_wTyvm7!+LipuF=EDz;>HaU#`S8h zGbFWQMU_0|jnYg*R~!4zoU;5Lr*b+kNLjFqO}~hCs-EvWIiqyb(4{u9n_SYiN0ser zo_b{M{x_V;vAo&^{Mxyq#wF4=oeHiqM6_c}V>j8vuGVp>F%O-V(SCB__7}X`>4L_U zVm6)9HjN6d6V!a?>4h#eja{wfQZZ%y0 zcBxI;Cfl@)`abn8dE3M44hNU*NNPV`GVOBRyc?a%?}et+Ma?OV1z ztZr|7`^k*Hb7j4o3a6defBuPS+ET~7&h+g3q~w&@bLM{g z{v)%b=KJ^W@87?F{`~p9d-twgyLRr}xnswU?cKX~>(;Gn*REZ+| zz|hB_12PztCmcBbGstqvcx+g3u$ey;0G87DHmwvOZbsa=mLSozov4QVirN zkYH*$(Ipxfv7$$*g>5oV5eFyJlXU-iJ*75&36GSIaY!gPakWlRX$xFzo8-`7pp?lY znV<6SW%6gfo)61`o?wS!MoswK)ayS4bmo_)pa{hw_vEcTxM zPDanKulTRm%EV*qzc;dPkof=bLaW;>i<48e&;Q%IfLkeak%MEWvmvi+)u$)tnah)3 zzYobUlt|ZDsp2B;lOu4zr|;?Xi81Aljx7>18v}w8=dCnx5x37caI~QDN1{wB=fsDu zys-rjSlMMb6pskc%4B2mG~_!tncsDW+KG_rFW=`ov1BY|m#=3r>d5VUbL<2|ufd}y zN(W?>L__VgzDUVA*Qs(Sw#pQ7C?(vR=Ao3Rl+=-VrMi-oT@Uca~Kt?WUK2AMe@GvcE{Cd?3LKD+Sc5{|74 z97Oz04zdV)C~!SoA`+l@KtO%#7iQtykpJx0QyQw=S@@C{Bm{XmvjpCgs&PoWyynza z@uT9&9F9kLS>?oz@Jp^ZI4$|G+;eeJr9BJYY?{Avi<7`&Kf{hFC9km6PN_VMY=XP) zNx5#;y81nfPvKKUYHVPNAq&@R!H??_wKsmy<&~@n@K7`k-1@Aaqm`jm%<+RYi=e9D zj7f$i_#F_GNyn$i)?1(nYpwDZ(GJJ{0WJvobi zo9%_7!m6+A3JI)9KcCK6Z~8WwZ_~3MkMqrz&zGy7qPKj`1lHGq4R j5{XCdyxsBG&w8Q7<)tgD4zTO*shVAswMUtek--`O>qI)6 literal 0 HcmV?d00001 diff --git a/example/Jamfile b/example/Jamfile index f57f7a32..b4979bfa 100644 --- a/example/Jamfile +++ b/example/Jamfile @@ -38,6 +38,7 @@ test-suite regex-examples : [ regex-test-run snippets/regex_grep_example_4.cpp : $(BOOST_ROOT)/boost/rational.hpp ] [ regex-test-run snippets/regex_match_example.cpp : -auto ] [ regex-test-run snippets/regex_merge_example.cpp : $(BOOST_ROOT)/boost/rational.hpp ] +[ regex-test-run snippets/regex_replace_example.cpp : $(BOOST_ROOT)/boost/rational.hpp ] [ regex-test-run snippets/regex_search_example.cpp : $(BOOST_ROOT)/boost/rational.hpp ] [ regex-test-run snippets/regex_split_example_1.cpp : -auto ] [ regex-test-run snippets/regex_split_example_2.cpp : $(BOOST_ROOT)/libs/regex/index.htm ] diff --git a/example/jgrep/main.cpp b/example/jgrep/main.cpp index 1277f7fa..9fc8b697 100644 --- a/example/jgrep/main.cpp +++ b/example/jgrep/main.cpp @@ -232,14 +232,14 @@ void HandleArg(const char* arg) { if(words_only == 0) { - e.set_expression(arg, use_case ? regbase::normal : regbase::normal | regbase::icase); + e.set_expression(arg, use_case ? regex::normal : regbase::normal | regbase::icase); //ei.set_expression(arg); } else { char* buf = new char[std::strlen(arg) + 8]; std::sprintf(buf, "\\<%s\\>", arg); - e.set_expression(buf, use_case ? regbase::normal : regbase::normal | regbase::icase); + e.set_expression(buf, use_case ? regex::normal : regbase::normal | regbase::icase); //ei.set_expression(buf); delete[] buf; } @@ -261,7 +261,7 @@ void HandleArg(const char* arg) } if(words_only) std::strcat(buf2, "\\>"); - e.set_expression(buf2, use_case ? regbase::normal : regbase::normal | regbase::icase); + e.set_expression(buf2, use_case ? regex::normal : regbase::normal | regbase::icase); //ei.set_expression(buf2); delete[] buf2; } diff --git a/example/snippets/credit_card_example.cpp b/example/snippets/credit_card_example.cpp index 08ab41d2..7a59a99a 100644 --- a/example/snippets/credit_card_example.cpp +++ b/example/snippets/credit_card_example.cpp @@ -35,12 +35,12 @@ const std::string human_format("\\1-\\2-\\3-\\4"); std::string machine_readable_card_number(const std::string& s) { - return boost::regex_merge(s, e, machine_format, boost::match_default | boost::format_sed); + return boost::regex_replace(s, e, machine_format, boost::match_default | boost::format_sed); } std::string human_readable_card_number(const std::string& s) { - return boost::regex_merge(s, e, human_format, boost::match_default | boost::format_sed); + return boost::regex_replace(s, e, human_format, boost::match_default | boost::format_sed); } #include diff --git a/example/snippets/regex_replace_example.cpp b/example/snippets/regex_replace_example.cpp new file mode 100644 index 00000000..ec2c0626 --- /dev/null +++ b/example/snippets/regex_replace_example.cpp @@ -0,0 +1,137 @@ +/* + * + * Copyright (c) 1998-2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_replace_example.cpp + * VERSION see + * DESCRIPTION: regex_replace example: + * converts a C++ file to syntax highlighted HTML. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +// purpose: +// takes the contents of a file and transform to +// syntax highlighted code in html format + +boost::regex e1, e2; +extern const char* expression_text; +extern const char* format_string; +extern const char* pre_expression; +extern const char* pre_format; +extern const char* header_text; +extern const char* footer_text; + +void load_file(std::string& s, std::istream& is) +{ + s.erase(); + s.reserve(is.rdbuf()->in_avail()); + char c; + while(is.get(c)) + { + if(s.capacity() == s.size()) + s.reserve(s.capacity() * 3); + s.append(1, c); + } +} + +int main(int argc, const char** argv) +{ + try{ + e1.assign(expression_text); + e2.assign(pre_expression); + for(int i = 1; i < argc; ++i) + { + std::cout << "Processing file " << argv[i] << std::endl; + std::ifstream fs(argv[i]); + std::string in; + load_file(in, fs); + std::string out_name = std::string(argv[i]) + std::string(".htm"); + std::ofstream os(out_name.c_str()); + os << header_text; + // strip '<' and '>' first by outputting to a + // temporary string stream + std::ostringstream t(std::ios::out | std::ios::binary); + std::ostream_iterator oi(t); + boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format); + // then output to final output stream + // adding syntax highlighting: + std::string s(t.str()); + std::ostream_iterator out(os); + boost::regex_replace(out, s.begin(), s.end(), e1, format_string); + os << footer_text; + } + } + catch(...) + { return -1; } + return 0; +} + +extern const char* pre_expression = "(<)|(>)|\\r"; +extern const char* pre_format = "(?1<)(?2>)"; + + +const char* expression_text = // preprocessor directives: index 1 + "(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|" + // comment: index 2 + "(//[^\\n]*|/\\*.*?\\*/)|" + // literals: index 3 + "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|" + // string literals: index 4 + "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|" + // keywords: index 5 + "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import" + "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall" + "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool" + "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete" + "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto" + "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected" + "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast" + "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned" + "|using|virtual|void|volatile|wchar_t|while)\\>" + ; + +const char* format_string = "(?1$&)" + "(?2$&)" + "(?3$&)" + "(?4$&)" + "(?5$&)"; + +const char* header_text = "\n\n" + "Auto-generated html formated source\n" + "\n" + "\n" + "\n" + "

\n
";
+
+const char* footer_text = "
\n\n\n"; + + + + + + + + + + diff --git a/example/snippets/regex_split_example_2.cpp b/example/snippets/regex_split_example_2.cpp index 94d63295..9d237d38 100644 --- a/example/snippets/regex_split_example_2.cpp +++ b/example/snippets/regex_split_example_2.cpp @@ -28,7 +28,7 @@ #include boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"", - boost::regbase::normal | boost::regbase::icase); + boost::regex::normal | boost::regbase::icase); void load_file(std::string& s, std::istream& is) { diff --git a/faq.htm b/faq.htm deleted file mode 100644 index fb3795b6..00000000 --- a/faq.htm +++ /dev/null @@ -1,205 +0,0 @@ - - - - - - -Regex++ - FAQ - - - - -

 

- - - - - - -

C++ Boost

-

Regex++, FAQ.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -

Q. Why does using parenthesis in a -regular expression change the result of a match?

- -

Parentheses don't only mark; they determine what the best -match is as well. regex++ tries to follow the POSIX standard -leftmost longest rule for determining what matched. So if there -is more than one possible match after considering the whole -expression, it looks next at the first sub-expression and then -the second sub-expression and so on. So...

- -
"(0*)([0-9]*)" against "00123" would produce
-$1 = "00"
-$2 = "123"
- -

where as

- -
"0*([0-9)*" against "00123" would produce
-$1 = "00123"
- -

If you think about it, had $1 only matched the "123", -this would be "less good" than the match "00123" -which is both further to the left and longer. If you want $1 to -match only the "123" part, then you need to use -something like:

- -
"0*([1-9][0-9]*)"
- -

as the expression.

- -

Q. Configure says that my compiler is -unable to merge template instances, what does this mean?

- -

A. When you compile template code, you can end up with the -same template instances in multiple translation units - this will -lead to link time errors unless your compiler/linker is smart -enough to merge these template instances into a single record in -the executable file. If you see this warning after running -configure, then you can still link to libregex++.a if:

- -
    -
  1. You use only the low-level template classes (reg_expression<> - match_results<> etc), from a single translation - unit, and use no other part of regex++.
  2. -
  3. You use only the POSIX API functions (regcomp regexec etc), - and no other part of regex++.
  4. -
  5. You use only the high level class RegEx, and no other - part of regex++.
  6. -
- -

Another option is to create a master include file, which -#include's all the regex++ source files, and all the source files -in which you use regex++. You then compile and link this master -file as a single translation unit.

- -

Q. Configure says that my compiler is -unable to merge template instances from archive files, what does -this mean?

- -

A. When you compile template code, you can end up with the -same template instances in multiple translation units - this will -lead to link time errors unless your compiler/linker is smart -enough to merge these template instances into a single record in -the executable file. Some compilers are able to do this for -normal .cpp or .o files, but fail if the object file has been -placed in a library archive. If you see this warning after -running configure, then you can still link to libregex++.a if:

- -
    -
  1. You use only the low-level template classes (reg_expression<> - match_results<> etc), and use no other part of - regex++.
  2. -
  3. You use only the POSIX API functions (regcomp regexec etc), - and no other part of regex++.
  4. -
  5. You use only the high level class RegEx, and no other - part of regex++.
  6. -
- -

Another option is to add the regex++ source files directly to -your project instead of linking to libregex++.a, generally you -should do this only if you are getting link time errors with -libregex++.a.

- -

Q. Configure says that my compiler can't -merge templates containing switch statements, what does this -mean?

- -

A. Some compilers can't merge templates that contain static -data - this includes switch statements which implicitly generate -static data as well as code. Principally this affects the egcs -compiler - but note gcc 2.81 also suffers from this problem - the -compiler will compile and link the code - but the code will not -run because the code and the static data it uses have become -separated. The default behaviour of regex++ is to try and fix -this problem by declaring "problem" templates inside -unnamed namespaces, so that the templates have internal linkage. -Note that this can result in a great deal of code bloat. If the -compiler doesn't support namespaces, or if code bloat becomes a -problem, then follow the guidelines above for placing all the -templates used in a single translation unit, and edit boost/regex/config.hpp -so that BOOST_REGEX_NO_TEMPLATE_SWITCH_MERGE is no longer defined. -

- -

Q. I can't get regex++ to work with -escape characters, what's going on?

- -

A. If you embed regular expressions in C++ code, then remember -that escape characters are processed twice: once by the C++ -compiler, and once by the regex++ expression compiler, so to pass -the regular expression \d+ to regex++, you need to embed "\\d+" -in your code. Likewise to match a literal backslash you will need -to embed "\\\\" in your code.

- -

Q. Why don't character ranges work -properly?
-A. The POSIX standard specifies that character range expressions -are locale sensitive - so for example the expression [A-Z] will -match any collating element that collates between 'A' and 'Z'. -That means that for most locales other than "C" or -"POSIX", [A-Z] would match the single character 't' for -example, which is not what most people expect - or at least not -what most people have come to expect from regular expression -engines. For this reason, the default behaviour of regex++ is to -turn locale sensitive collation off by setting the regbase::nocollate -compile time flag (this is set by regbase::normal). However if -you set a non-default compile time flag - for example regbase::extended -or regbase::basic, then locale dependent collation will be -enabled, this also applies to the POSIX API functions which use -either regbase::extended or regbase::basic internally, in the -latter case use REG_NOCOLLATE in combination with either -REG_BASIC or REG_EXTENDED when invoking regcomp if you don't want -locale sensitive collation. [Note - when regbase::nocollate in -effect, the library behaves "as if" the LC_COLLATE -locale category were always "C", regardless of what its -actually set to - end note].

- -

 Q. Why can't I use the "convenience" -versions of query_match/reg_search/reg_grep/reg_format/reg_merge? -

- -

A. These versions may or may not be available depending upon -the capabilities of your compiler, the rules determining the -format of these functions are quite complex - and only the -versions visible to a standard compliant compiler are given in -the help. To find out what your compiler supports, run <boost/regex.hpp> -through your C++ pre-processor, and search the output file for -the function that you are interested in.

- -

Q. Why are there no throw specifications -on any of the functions? What exceptions can the library throw? -

- -

A. Not all compilers support (or honor) throw specifications, -others support them but with reduced efficiency. Throw -specifications may be added at a later date as compilers begin to -handle this better. The library should throw only three types of -exception: boost::bad_expression can be thrown by reg_expression -when compiling a regular expression, std::runtime_error can be -thrown when a call to reg_expression::imbue tries to open a -message catalogue that doesn't exist or when a call to RegEx::GrepFiles -or RegEx::FindFiles tries to open a file that cannot be opened, -finally std::bad_alloc can be thrown by just about any of the -functions in this library.

- -
- -

Copyright Dr -John Maddock 1998-2000 all rights reserved.

- - diff --git a/format_string.htm b/format_string.htm deleted file mode 100644 index 41a33842..00000000 --- a/format_string.htm +++ /dev/null @@ -1,243 +0,0 @@ - - - - - - -Regex++, Format String Reference - - - - -

 

- - - - - - -

C++ Boost

-

Regex++, Format - String Reference.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

Format String Syntax

- -

Format strings are used by the algorithms regex_format and regex_merge, and are -used to transform one string into another.

- -

There are three kind of format string: sed, perl and extended, -the extended syntax is the default so this is covered first.

- -

Extended format syntax

- -

In format strings, all characters are treated as literals -except: ()$\?:

- -

To use any of these as literals you must prefix them with the -escape character \

- -

The following special sequences are recognized:

- -

Grouping:

- -

Use the parenthesis characters ( and ) to group sub-expressions -within the format string, use \( and \) to represent literal '(' -and ')'.

- -

Sub-expression expansions:

- -

The following perl like expressions expand to a particular -matched sub-expression:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 $`Expands to all the text from - the end of the previous match to the start of the current - match, if there was no previous match in the current - operation, then everything from the start of the input - string to the start of the match. 
 $'Expands to all the text from - the end of the match to the end of the input string. 
 $&Expands to all of the - current match. 
 $0Expands to all of the - current match. 
 $NExpands to the text that - matched sub-expression N. 
- -


- -

Conditional expressions:

- -

Conditional expressions allow two different format strings to -be selected dependent upon whether a sub-expression participated -in the match or not:

- -

?Ntrue_expression:false_expression

- -

Executes true_expression if sub-expression N -participated in the match, otherwise executes false_expression.

- -

Example: suppose we search for "(while)|(for)" then -the format string "?1WHILE:FOR" would output what -matched, but in upper case.

- -

Escape sequences:

- -

The following escape sequences are also allowed:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 \aThe bell character. 
 \fThe form feed character. 
 \nThe newline character. 
 \rThe carriage return - character. 
 \tThe tab character. 
 \vA vertical tab character. 
 \xA hexadecimal character - - for example \x0D. 
 \x{}A possible unicode - hexadecimal character - for example \x{1A0} 
 \cxThe ASCII escape character - x, for example \c@ is equivalent to escape-@. 
 \eThe ASCII escape character. 
 \ddAn octal character constant, - for example \10. 
- -


- -

Perl format strings

- -

Perl format strings are the same as the default syntax except -that the characters ()?: have no special meaning.

- -

Sed format strings

- -

Sed format strings use only the characters \ and & as -special characters.

- -

\n where n is a digit, is expanded to the nth sub-expression.

- -

& is expanded to the whole of the match (equivalent to \0). -

- -

Other escape sequences are expanded as per the default syntax. -
-

- -
- -

Copyright Dr -John Maddock 1998-2000 all rights reserved.

- - diff --git a/hl_ref.htm b/hl_ref.htm deleted file mode 100644 index 44b803a1..00000000 --- a/hl_ref.htm +++ /dev/null @@ -1,572 +0,0 @@ - - - - - - -Regex++, RegEx Class Reference - - - - -

 

- - - - - - -

C++ Boost

-

Regex++, RegEx Class - Reference.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

Class RegEx

- -

#include <boost/cregex.hpp>

- -

The class RegEx provides a high level simplified interface to -the regular expression library, this class only handles narrow -character strings, and regular expressions always follow the -"normal" syntax - that is the same as the standard -POSIX extended syntax, but with locale specific collation -disabled, and escape characters inside character set declarations -are allowed.

- -
typedef bool (*GrepCallback)(const RegEx& expression);
-typedef bool (*GrepFileCallback)(const char* file, const RegEx& expression);
-typedef bool (*FindFilesCallback)(const char* file);
-
-class  RegEx
-{
-public:
-   RegEx();
-   RegEx(const RegEx& o);
-   ~RegEx();
-   RegEx(const char* c, bool icase = false);
-   explicit RegEx(const std::string& s, bool icase = false);
-   RegEx& operator=(const RegEx& o);
-   RegEx& operator=(const char* p);
-   RegEx& operator=(const std::string& s);
-   unsigned int SetExpression(const char* p, bool icase = false);
-   unsigned int SetExpression(const std::string& s, bool icase = false);
-   std::string Expression()const;
-   //
-   // now matching operators: 
-   // 
-   bool Match(const char* p, unsigned int flags = match_default);
-   bool Match(const std::string& s, unsigned int flags = match_default); 
-   bool Search(const char* p, unsigned int flags = match_default); 
-   bool Search(const std::string& s, unsigned int flags = match_default); 
-   unsigned int Grep(GrepCallback cb, const char* p, unsigned int flags = match_default); 
-   unsigned int Grep(GrepCallback cb, const std::string& s, unsigned int flags = match_default); 
-   unsigned int Grep(std::vector<std::string>& v, const char* p, unsigned int flags = match_default); 
-   unsigned int Grep(std::vector<std::string>& v, const std::string& s, unsigned int flags = match_default); 
-   unsigned int Grep(std::vector<unsigned int>& v, const char* p, unsigned int flags = match_default); 
-   unsigned int Grep(std::vector<unsigned int>& v, const std::string& s, unsigned int flags = match_default); 
-   unsigned int GrepFiles(GrepFileCallback cb, const char* files, bool recurse = false, unsigned int flags = match_default); 
-   unsigned int GrepFiles(GrepFileCallback cb, const std::string& files, bool recurse = false, unsigned int flags = match_default); 
-   unsigned int FindFiles(FindFilesCallback cb, const char* files, bool recurse = false, unsigned int flags = match_default); 
-   unsigned int FindFiles(FindFilesCallback cb, const std::string& files, bool recurse = false, unsigned int flags = match_default); 
-   std::string Merge(const std::string& in, const std::string& fmt, bool copy = true, unsigned int flags = match_default); 
-   std::string Merge(const char* in, const char* fmt, bool copy = true, unsigned int flags = match_default); 
-   unsigned Split(std::vector<std::string>& v, std::string& s, unsigned flags = match_default, unsigned max_count = ~0); 
-   // 
-   // now operators for returning what matched in more detail: 
-   // 
-   unsigned int Position(int i = 0)const; 
-   unsigned int Length(int i = 0)const; 
-   bool Matched(int i = 0)const;
-   unsigned int Line()const; 
-   unsigned int Marks() const; 
-   std::string What(int i)const; 
-   std::string operator[](int i)const ; 
-
-   static const unsigned int npos;
-};     
- -

Member functions for class RegEx are defined as follows:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 RegEx();Default constructor, - constructs an instance of RegEx without any valid - expression. 
 RegEx(const - RegEx& o);Copy constructor, all the - properties of parameter o are copied. 
 RegEx(const char* - c, bool icase = false);Constructs an instance of - RegEx, setting the expression to c, if icase - is true then matching is insensitive to case, - otherwise it is sensitive to case. Throws bad_expression - on failure. 
 RegEx(const std::string& - s, bool icase = false);Constructs an instance of - RegEx, setting the expression to s, if icase is - true then matching is insensitive to case, - otherwise it is sensitive to case. Throws bad_expression - on failure. 
 RegEx& operator=(const - RegEx& o);Default assignment operator. 
 RegEx& operator=(const - char* p);Assignment operator, - equivalent to calling SetExpression(p, false). - Throws bad_expression on failure. 
 RegEx& operator=(const - std::string& s);Assignment operator, - equivalent to calling SetExpression(s, false). - Throws bad_expression on failure. 
 unsigned int - SetExpression(constchar* p, bool icase = false);Sets the current expression - to p, if icase is true then matching - is insensitive to case, otherwise it is sensitive to case. - Throws bad_expression on failure. 
 unsigned int - SetExpression(const std::string& s, bool - icase = false);Sets the current expression - to s, if icase is true then matching - is insensitive to case, otherwise it is sensitive to case. - Throws bad_expression on failure. 
 std::string Expression()const;Returns a copy of the - current regular expression. 
 bool Match(const - char* p, unsigned int flags = - match_default);Attempts to match the - current expression against the text p using the - match flags flags - see match flags. - Returns true if the expression matches the whole - of the input string. 
 bool Match(const - std::string& s, unsigned int flags = - match_default) ;Attempts to match the - current expression against the text s using the - match flags flags - see match flags. - Returns true if the expression matches the whole - of the input string. 
 bool Search(const - char* p, unsigned int flags = - match_default);Attempts to find a match for - the current expression somewhere in the text p - using the match flags flags - see match flags. - Returns true if the match succeeds. 
 bool Search(const - std::string& s, unsigned int flags = - match_default) ;Attempts to find a match for - the current expression somewhere in the text s - using the match flags flags - see match flags. - Returns true if the match succeeds. 
 unsigned int - Grep(GrepCallback cb, const char* p, unsigned - int flags = match_default);Finds all matches of the - current expression in the text p using the match - flags flags - see match flags. - For each match found calls the call-back function cb - as: cb(*this);

If at any stage the call-back function - returns false then the grep operation terminates, - otherwise continues until no further matches are found. - Returns the number of matches found.

-
 
 unsigned int - Grep(GrepCallback cb, const std::string& s, unsigned - int flags = match_default);Finds all matches of the - current expression in the text s using the match - flags flags - see match flags. - For each match found calls the call-back function cb - as: cb(*this);

If at any stage the call-back function - returns false then the grep operation terminates, - otherwise continues until no further matches are found. - Returns the number of matches found.

-
 
 unsigned int - Grep(std::vector<std::string>& v, const char* - p, unsigned int flags = match_default);Finds all matches of the - current expression in the text p using the match - flags flags - see match flags. - For each match pushes a copy of what matched onto v. - Returns the number of matches found. 
 unsigned int - Grep(std::vector<std::string>& v, const - std::string& s, unsigned int flags = - match_default);Finds all matches of the - current expression in the text s using the match - flags flags - see match flags. - For each match pushes a copy of what matched onto v. - Returns the number of matches found. 
 unsigned int - Grep(std::vector<unsigned int>& v, const - char* p, unsigned int flags = - match_default);Finds all matches of the - current expression in the text p using the match - flags flags - see match flags. - For each match pushes the starting index of what matched - onto v. Returns the number of matches found. 
 unsigned int - Grep(std::vector<unsigned int>& v, const - std::string& s, unsigned int flags = - match_default);Finds all matches of the - current expression in the text s using the match - flags flags - see match flags. - For each match pushes the starting index of what matched - onto v. Returns the number of matches found. 
 unsigned int - GrepFiles(GrepFileCallback cb, const char* - files, bool recurse = false, unsigned - int flags = match_default);Finds all matches of the - current expression in the files files using the - match flags flags - see match flags. - For each match calls the call-back function cb. 

If - the call-back returns false then the algorithm returns - without considering further matches in the current file, - or any further files. 

-

The parameter files can include wild card - characters '*' and '?', if the parameter recurse - is true then searches sub-directories for matching file - names. 

-

Returns the total number of matches found.

-

May throw an exception derived from std::runtime_error - if file io fails.

-
 
 unsigned int - GrepFiles(GrepFileCallback cb, const std::string& - files, bool recurse = false, unsigned - int flags = match_default);Finds all matches of the - current expression in the files files using the - match flags flags - see match flags. - For each match calls the call-back function cb. 

If - the call-back returns false then the algorithm returns - without considering further matches in the current file, - or any further files. 

-

The parameter files can include wild card - characters '*' and '?', if the parameter recurse - is true then searches sub-directories for matching file - names. 

-

Returns the total number of matches found.

-

May throw an exception derived from std::runtime_error - if file io fails.

-
 
 unsigned int - FindFiles(FindFilesCallback cb, const char* - files, bool recurse = false, unsigned - int flags = match_default);Searches files to - find all those which contain at least one match of the - current expression using the match flags flags - - see match - flags. For each matching file calls the call-back - function cb. 

If the call-back returns false then - the algorithm returns without considering any further - files. 

-

The parameter files can include wild card - characters '*' and '?', if the parameter recurse - is true then searches sub-directories for matching file - names. 

-

Returns the total number of files found.

-

May throw an exception derived from std::runtime_error - if file io fails.

-
 
 unsigned int - FindFiles(FindFilesCallback cb, const std::string& - files, bool recurse = false, unsigned - int flags = match_default);Searches files to - find all those which contain at least one match of the - current expression using the match flags flags - - see match - flags. For each matching file calls the call-back - function cb. 

If the call-back returns false then - the algorithm returns without considering any further - files. 

-

The parameter files can include wild card - characters '*' and '?', if the parameter recurse - is true then searches sub-directories for matching file - names. 

-

Returns the total number of files found.

-

May throw an exception derived from std::runtime_error - if file io fails.

-
 
 std::string Merge(const - std::string& in, const std::string& fmt, bool - copy = true, unsigned int flags = - match_default);Performs a search and - replace operation: searches through the string in - for all occurrences of the current expression, for each - occurrence replaces the match with the format string fmt. - Uses flags to determine what gets matched, and how - the format string should be treated. If copy is - true then all unmatched sections of input are copied - unchanged to output, if the flag format_first_only - is set then only the first occurance of the pattern found - is replaced. Returns the new string. See also format string - syntax, match - flags and format flags. 
 std::string Merge(const - char* in, const char* fmt, bool copy = true, - unsigned int flags = match_default);Performs a search and - replace operation: searches through the string in - for all occurrences of the current expression, for each - occurrence replaces the match with the format string fmt. - Uses flags to determine what gets matched, and how - the format string should be treated. If copy is - true then all unmatched sections of input are copied - unchanged to output, if the flag format_first_only - is set then only the first occurance of the pattern found - is replaced. Returns the new string. See also format string - syntax, match - flags and format flags. 
 unsigned Split(std::vector<std::string>& - v, std::string& s, unsigned flags = - match_default, unsigned max_count = ~0);Splits the input string and pushes each - one onto the vector. If the expression contains no marked - sub-expressions, then one string is outputted for each - section of the input that does not match the expression. - If the expression does contain marked sub-expressions, - then outputs one string for each marked sub-expression - each time a match occurs. Outputs no more than max_count - strings. Before returning, deletes from the input - string s all of the input that has been processed - (all of the string if max_count was not reached). - Returns the number of strings pushed onto the vector. 
 unsigned int - Position(int i = 0)const;Returns the position of what - matched sub-expression i. If i = 0 then - returns the position of the whole match. Returns RegEx::npos - if the supplied index is invalid, or if the specified sub-expression - did not participate in the match. 
 unsigned int - Length(int i = 0)const;Returns the length of what - matched sub-expression i. If i = 0 then - returns the length of the whole match. Returns RegEx::npos - if the supplied index is invalid, or if the specified sub-expression - did not participate in the match. 
 bool Matched(int i - = 0)const;Returns true if sub-expression i was - matched, false otherwise. 
 unsigned int - Line()const;Returns the line on which - the match occurred, indexes start from 1 not zero, if no - match occurred then returns RegEx::npos. 
 unsigned int Marks() - const;Returns the number of marked - sub-expressions contained in the expression. Note that - this includes the whole match (sub-expression zero), so - the value returned is always >= 1. 
 std::string What(int - i)const;Returns a copy of what - matched sub-expression i. If i = 0 then - returns a copy of the whole match. Returns a null string - if the index is invalid or if the specified sub-expression - did not participate in a match. 
 std::string operator[](int - i)const ;Returns what(i);

Can - be used to simplify access to sub-expression matches, and - make usage more perl-like.

-
 
- -
- -

Copyright Dr -John Maddock 1998-2000 all rights reserved.

- - diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp index efca053c..f84fbc41 100644 --- a/include/boost/regex/v4/basic_regex.hpp +++ b/include/boost/regex/v4/basic_regex.hpp @@ -64,7 +64,7 @@ public: typedef typename Allocator::size_type size_type; typedef Allocator allocator_type; typedef Allocator alloc_type; - typedef regbase::flag_type flag_type; + typedef regex_constants::syntax_option_type flag_type; // locale_type // placeholder for actual locale type used by the // traits class to localise *this. @@ -72,15 +72,15 @@ public: public: explicit reg_expression(const Allocator& a = Allocator()); - explicit reg_expression(const charT* p, flag_type f = regbase::normal, const Allocator& a = Allocator()); - reg_expression(const charT* p1, const charT* p2, flag_type f = regbase::normal, const Allocator& a = Allocator()); + explicit reg_expression(const charT* p, flag_type f = regex_constants::normal, const Allocator& a = Allocator()); + reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal, const Allocator& a = Allocator()); reg_expression(const charT* p, size_type len, flag_type f, const Allocator& a = Allocator()); reg_expression(const reg_expression&); ~reg_expression(); reg_expression& BOOST_REGEX_CALL operator=(const reg_expression&); reg_expression& BOOST_REGEX_CALL operator=(const charT* ptr) { - set_expression(ptr, regbase::normal | regbase::use_except); + set_expression(ptr, regex_constants::normal | regex_constants::use_except); return *this; } @@ -88,84 +88,84 @@ public: // assign: reg_expression& assign(const reg_expression& that) { return *this = that; } - reg_expression& assign(const charT* ptr, flag_type f = regbase::normal) + reg_expression& assign(const charT* ptr, flag_type f = regex_constants::normal) { - set_expression(ptr, f | regbase::use_except); + set_expression(ptr, f | regex_constants::use_except); return *this; } reg_expression& assign(const charT* first, const charT* last, - flag_type f = regbase::normal) + flag_type f = regex_constants::normal) { - set_expression(first, last, f | regbase::use_except); + set_expression(first, last, f | regex_constants::use_except); return *this; } #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !(defined(__IBMCPP__) && (__IBMCPP__ <= 502)) template - unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string& p, flag_type f = regbase::normal) + unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string& p, flag_type f = regex_constants::normal) { return set_expression(p.data(), p.data() + p.size(), f); } template - explicit reg_expression(const std::basic_string& p, flag_type f = regbase::normal, const Allocator& a = Allocator()) - : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0) { set_expression(p, f | regbase::use_except); } + explicit reg_expression(const std::basic_string& p, flag_type f = regex_constants::normal, const Allocator& a = Allocator()) + : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0) { set_expression(p, f | regex_constants::use_except); } template - reg_expression(I first, I last, flag_type f = regbase::normal, const Allocator& al = Allocator()) + reg_expression(I first, I last, flag_type f = regex_constants::normal, const Allocator& al = Allocator()) : data(al), pkmp(0), error_code_(REG_EMPTY), _expression(0) { size_type len = last-first; scoped_array a(new charT[len]); std::copy(first, last, a.get()); - set_expression(a.get(), a.get() + len, f | regbase::use_except); + set_expression(a.get(), a.get() + len, f | regex_constants::use_except); } template reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string& p) { - set_expression(p.c_str(), p.c_str() + p.size(), regbase::normal | regbase::use_except); + set_expression(p.c_str(), p.c_str() + p.size(), regex_constants::normal | regex_constants::use_except); return *this; } template reg_expression& BOOST_REGEX_CALL assign( const std::basic_string& s, - flag_type f = regbase::normal) + flag_type f = regex_constants::normal) { - set_expression(s.c_str(), s.c_str() + s.size(), f | regbase::use_except); + set_expression(s.c_str(), s.c_str() + s.size(), f | regex_constants::use_except); return *this; } template reg_expression& BOOST_REGEX_CALL assign(fwd_iterator first, fwd_iterator last, - flag_type f = regbase::normal) + flag_type f = regex_constants::normal) { size_type len = last-first; scoped_array a(new charT[len]); std::copy(first, last, a.get()); - set_expression(a.get(), a.get() + len, f | regbase::use_except); + set_expression(a.get(), a.get() + len, f | regex_constants::use_except); return *this; } #else - unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string& p, flag_type f = regbase::normal) - { return set_expression(p.data(), p.data() + p.size(), f | regbase::use_except); } + unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + { return set_expression(p.data(), p.data() + p.size(), f | regex_constants::use_except); } - reg_expression(const std::basic_string& p, flag_type f = regbase::normal, const Allocator& a = Allocator()) - : data(a), pkmp(0) { set_expression(p, f | regbase::use_except); } + reg_expression(const std::basic_string& p, flag_type f = regex_constants::normal, const Allocator& a = Allocator()) + : data(a), pkmp(0) { set_expression(p, f | regex_constants::use_except); } reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string& p) { - set_expression(p.c_str(), p.c_str() + p.size(), regbase::normal | regbase::use_except); + set_expression(p.c_str(), p.c_str() + p.size(), regex_constants::normal | regex_constants::use_except); return *this; } reg_expression& BOOST_REGEX_CALL assign( const std::basic_string& s, - flag_type f = regbase::normal) + flag_type f = regex_constants::normal) { - set_expression(s.c_str(), s.c_str() + s.size(), f | regbase::use_except); + set_expression(s.c_str(), s.c_str() + s.size(), f | regex_constants::use_except); return *this; } @@ -234,8 +234,8 @@ public: // but are available for compatibility with earlier versions. allocator_type BOOST_REGEX_CALL allocator()const; const charT* BOOST_REGEX_CALL expression()const { return (this->error_code() ? 0 : _expression); } - unsigned int BOOST_REGEX_CALL set_expression(const charT* p, const charT* end, flag_type f = regbase::normal); - unsigned int BOOST_REGEX_CALL set_expression(const charT* p, flag_type f = regbase::normal) { return set_expression(p, p + traits_type::length(p), f); } + unsigned int BOOST_REGEX_CALL set_expression(const charT* p, const charT* end, flag_type f = regex_constants::normal); + unsigned int BOOST_REGEX_CALL set_expression(const charT* p, flag_type f = regex_constants::normal) { return set_expression(p, p + traits_type::length(p), f); } // // this should be private but template friends don't work: const traits_type& get_traits()const { return traits_inst; } @@ -336,9 +336,9 @@ public: typedef typename reg_expression::size_type size_type; explicit basic_regex(const Allocator& a = Allocator()) : reg_expression(a){} - explicit basic_regex(const charT* p, flag_type f = regbase::normal, const Allocator& a = Allocator()) + explicit basic_regex(const charT* p, flag_type f = regex_constants::normal, const Allocator& a = Allocator()) : reg_expression(p,f,a){} - basic_regex(const charT* p1, const charT* p2, flag_type f = regbase::normal, const Allocator& a = Allocator()) + basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal, const Allocator& a = Allocator()) : reg_expression(p1,p2,f,a){} basic_regex(const charT* p, size_type len, flag_type f, const Allocator& a = Allocator()) : reg_expression(p,len,f,a){} @@ -357,11 +357,11 @@ public: } #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !(defined(__IBMCPP__) && (__IBMCPP__ <= 502)) template - explicit basic_regex(const std::basic_string& p, flag_type f = regbase::normal, const Allocator& a = Allocator()) + explicit basic_regex(const std::basic_string& p, flag_type f = regex_constants::normal, const Allocator& a = Allocator()) : reg_expression(p,f,a){} template - basic_regex(I first, I last, flag_type f = regbase::normal, const Allocator& al = Allocator()) + basic_regex(I first, I last, flag_type f = regex_constants::normal, const Allocator& al = Allocator()) : reg_expression(first, last, f, a){} template @@ -371,7 +371,7 @@ public: return *this; } #else - basic_regex(const std::basic_string& p, flag_type f = regbase::normal, const Allocator& a = Allocator()) + basic_regex(const std::basic_string& p, flag_type f = regex_constants::normal, const Allocator& a = Allocator()) : reg_expression(p,f,a){} basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string& p) diff --git a/include/boost/regex/v4/match_flags.hpp b/include/boost/regex/v4/match_flags.hpp index 0badb03d..8e033b20 100644 --- a/include/boost/regex/v4/match_flags.hpp +++ b/include/boost/regex/v4/match_flags.hpp @@ -29,6 +29,7 @@ #ifdef __cplusplus namespace boost{ + namespace regex_constants{ #endif typedef enum _match_flags @@ -54,12 +55,14 @@ typedef enum _match_flags match_all = match_stop << 1, // must find the whole of input even if match_any is set match_perl = match_all << 1, // Use perl matching rules match_posix = match_perl << 1, // Use POSIX matching rules - match_max = match_posix, + match_nosubs = match_posix << 1, // don't trap marked subs + match_max = match_nosubs, - format_all = 0, // enable all extentions to sytax + format_perl = 0, // perl style replacement + format_default = 0, // ditto. format_sed = match_max << 1, // sed style replacement. - format_perl = format_sed << 1, // perl style replacement. - format_no_copy = format_perl << 1, // don't copy non-matching segments. + format_all = format_sed << 1, // enable all extentions to sytax. + format_no_copy = format_all << 1, // don't copy non-matching segments. format_first_only = format_no_copy << 1, // Only replace first occurance. format_is_if = format_first_only << 1 // internal use only. @@ -67,7 +70,6 @@ typedef enum _match_flags #if defined(BOOST_MSVC) && (BOOST_MSVC <= 1200) typedef unsigned long match_flag_type; -} // namespace boost #else typedef match_flags match_flag_type; @@ -87,10 +89,43 @@ inline match_flags& operator|=(match_flags& m1, match_flags m2) { m1 = m1|m2; return m1; } inline match_flags& operator^=(match_flags& m1, match_flags m2) { m1 = m1^m2; return m1; } +#endif +#endif +#ifdef __cplusplus +} // namespace regex_constants +// +// import names into boost for backwards compatiblity: +// +using regex_constants::match_flag_type; +using regex_constants::match_default; +using regex_constants::match_not_bol; +using regex_constants::match_not_eol; +using regex_constants::match_not_bob; +using regex_constants::match_not_eob; +using regex_constants::match_not_bow; +using regex_constants::match_not_eow; +using regex_constants::match_not_dot_newline; +using regex_constants::match_not_dot_null; +using regex_constants::match_prev_avail; +using regex_constants::match_init; +using regex_constants::match_any; +using regex_constants::match_not_null; +using regex_constants::match_continuous; +using regex_constants::match_partial; +using regex_constants::match_stop; +using regex_constants::match_all; +using regex_constants::match_perl; +using regex_constants::match_posix; +using regex_constants::match_nosubs; +using regex_constants::match_max; +using regex_constants::format_all; +using regex_constants::format_sed; +using regex_constants::format_perl; +using regex_constants::format_no_copy; +using regex_constants::format_first_only; +using regex_constants::format_is_if; } // namespace boost #endif // __cplusplus -#endif // BOOST_MSVC - -#endif +#endif // include guard diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index f99c740d..fcbd9882 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -65,7 +65,7 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next, const charT* p = reinterpret_cast(set_+1); iterator ptr; unsigned int i; - bool icase = e.flags() & regbase::icase; + bool icase = e.flags() & regex_constants::icase; if(next == last) return next; @@ -119,7 +119,7 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next, // try and match a range, NB only a single character can match if(set_->cranges) { - if(e.flags() & regbase::nocollate) + if((e.flags() & regex_constants::collate) == 0) s1 = s2; else traits_inst.transform(s1, s2); diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 001df9bf..80501227 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -44,11 +44,11 @@ perl_matcher::perl_matcher(BidiIter pstate = 0; m_match_flags = f; - icase = re.flags() & regbase::icase; + icase = re.flags() & regex_constants::icase; estimate_max_state_count(static_cast(0)); if(!(m_match_flags & (match_perl|match_posix))) { - if(re.flags() & regbase::perlex) + if(re.flags() & regex_constants::perlex) m_match_flags |= match_perl; else m_match_flags |= match_posix; @@ -104,7 +104,7 @@ bool perl_matcher::match() position = base; search_base = base; state_count = 0; - m_presult->set_size(re.mark_count(), base, last); + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last); m_presult->set_base(base); if(m_match_flags & match_posix) m_result = *m_presult; @@ -156,7 +156,7 @@ bool perl_matcher::find() position = base; search_base = base; pstate = access::first(re); - m_presult->set_size(re.mark_count(), base, last); + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last); m_presult->set_base(base); m_match_flags |= match_init; } @@ -174,7 +174,7 @@ bool perl_matcher::find() ++position; } // reset $` start: - m_presult->set_size(re.mark_count(), search_base, last); + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); if(base != search_base) m_match_flags |= match_prev_avail; } @@ -242,7 +242,8 @@ bool perl_matcher::match_endmark() int index = static_cast(pstate)->index; if(index > 0) { - m_presult->set_second(position, index); + if((m_match_flags & match_nosubs) == 0) + m_presult->set_second(position, index); } else if(index < 0) { @@ -706,7 +707,7 @@ bool perl_matcher::find_restart_lit int len = info->len; const char_type* x = info->pstr; int j = 0; - bool icase = re.flags() & regbase::icase; + bool icase = re.flags() & regex_constants::icase; while (position != last) { while((j > -1) && (x[j] != traits_inst.translate(*position, icase))) diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index 1659a1b0..e7f26cec 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -311,8 +311,11 @@ bool perl_matcher::match_startmark( default: { assert(index > 0); - push_matched_paren(index, (*m_presult)[index]); - m_presult->set_first(position, index); + if((m_match_flags & match_nosubs) == 0) + { + push_matched_paren(index, (*m_presult)[index]); + m_presult->set_first(position, index); + } pstate = pstate->next.p; break; } diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index d7dfcfdf..a832257f 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -105,12 +105,19 @@ bool perl_matcher::match_startmark( default: { assert(index > 0); - backup_subex sub(*m_presult, index); - m_presult->set_first(position, index); - pstate = pstate->next.p; - r = match_all_states(); - if(r == false) - sub.restore(*m_presult); + if((m_match_flags & match_nosubs) == 0) + { + backup_subex sub(*m_presult, index); + m_presult->set_first(position, index); + pstate = pstate->next.p; + r = match_all_states(); + if(r == false) + sub.restore(*m_presult); + } + else + { + pstate = pstate->next.p; + } break; } } diff --git a/include/boost/regex/v4/regbase.hpp b/include/boost/regex/v4/regbase.hpp index f9f202f8..2e3294b0 100644 --- a/include/boost/regex/v4/regbase.hpp +++ b/include/boost/regex/v4/regbase.hpp @@ -52,18 +52,24 @@ public: failbit = use_except << 1, // error flag literal = failbit << 1, // all characters are literals icase = literal << 1, // characters are matched regardless of case - nocollate = icase << 1, // don't use locale specific collation - perlex = nocollate << 1, // perl extensions + nocollate = 0, // don't use locale specific collation (deprecated) + collate = icase << 1, // use locale specific collation + perlex = collate << 1, // perl extensions + nosubs = perlex << 1, // don't mark sub-expressions + optimize = 0, // not really supported - basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs, - extended = char_classes | intervals | bk_refs, + basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs | collate, + extended = char_classes | intervals | bk_refs | collate, normal = perlex | escape_in_lists | char_classes | intervals | bk_refs | nocollate, emacs = bk_braces | bk_parens | bk_refs | bk_vbar, awk = extended | escape_in_lists, grep = basic | newline_alt, egrep = extended | newline_alt, sed = basic, - perl = normal + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal }; typedef unsigned int flag_type; @@ -91,6 +97,51 @@ protected: flag_type _flags; }; +// +// provide std lib proposal compatible constants: +// +namespace regex_constants{ + + enum flag_type_ + { + escape_in_lists = ::boost::regbase::escape_in_lists, + char_classes = ::boost::regbase::char_classes, + intervals = ::boost::regbase::intervals, + limited_ops = ::boost::regbase::limited_ops, + newline_alt = ::boost::regbase::newline_alt, + bk_plus_qm = ::boost::regbase::bk_plus_qm, + bk_braces = ::boost::regbase::bk_braces, + bk_parens = ::boost::regbase::bk_parens, + bk_refs = ::boost::regbase::bk_refs, + bk_vbar = ::boost::regbase::bk_vbar, + + use_except = ::boost::regbase::use_except, + failbit = ::boost::regbase::failbit, + literal = ::boost::regbase::literal, + icase = ::boost::regbase::icase, + nocollate = ::boost::regbase::nocollate, + collate = ::boost::regbase::collate, + perlex = ::boost::regbase::perlex, + nosubs = ::boost::regbase::nosubs, + optimize = ::boost::regbase::optimize, + + basic = ::boost::regbase::basic, + extended = ::boost::regbase::extended, + normal = ::boost::regbase::normal, + emacs = ::boost::regbase::emacs, + awk = ::boost::regbase::awk, + grep = ::boost::regbase::grep, + egrep = ::boost::regbase::egrep, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef ::boost::regbase::flag_type syntax_option_type; + +} // namespace regex_constants + } // namespace boost #ifdef __BORLANDC__ diff --git a/include/boost/regex/v4/regex.hpp b/include/boost/regex/v4/regex.hpp index 6abfb0f6..c9a7a86d 100644 --- a/include/boost/regex/v4/regex.hpp +++ b/include/boost/regex/v4/regex.hpp @@ -145,6 +145,9 @@ typedef match_results wsmatch; #ifndef BOOST_REGEX_V4_REGEX_GREP_HPP #include #endif +#ifndef BOOST_REGEX_V4_REGEX_REPLACE_HPP +#include +#endif #ifndef BOOST_REGEX_V4_REGEX_MERGE_HPP #include #endif diff --git a/include/boost/regex/v4/regex_compile.hpp b/include/boost/regex/v4/regex_compile.hpp index 2fe1f14f..28f81d31 100644 --- a/include/boost/regex/v4/regex_compile.hpp +++ b/include/boost/regex/v4/regex_compile.hpp @@ -51,7 +51,7 @@ bool BOOST_REGEX_CALL re_maybe_set_member(charT c, const reg_expression& e) { const charT* p = reinterpret_cast(set_+1); - bool icase = e.flags() & regbase::icase; + bool icase = e.flags() & regex_constants::icase; charT col = e.get_traits().translate(c, icase); for(unsigned int i = 0; i < set_->csingles; ++i) { @@ -91,21 +91,21 @@ template reg_expression::reg_expression(const charT* p, flag_type f, const Allocator& a) : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0) { - set_expression(p, f | regbase::use_except); + set_expression(p, f | regex_constants::use_except); } template reg_expression::reg_expression(const charT* p1, const charT* p2, flag_type f, const Allocator& a) : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0) { - set_expression(p1, p2, f | regbase::use_except); + set_expression(p1, p2, f | regex_constants::use_except); } template reg_expression::reg_expression(const charT* p, size_type len, flag_type f, const Allocator& a) : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0) { - set_expression(p, p + len, f | regbase::use_except); + set_expression(p, p + len, f | regex_constants::use_except); } template @@ -118,11 +118,11 @@ reg_expression::reg_expression(const reg_expression& BOOST_REGEX_CALL reg_expression::probe_start( case re_detail::syntax_element_literal: // only the first character of the literal can match: // note these have already been translated: - if(*reinterpret_cast(static_cast(node)+1) == traits_inst.translate(cc, (_flags & regbase::icase))) + if(*reinterpret_cast(static_cast(node)+1) == traits_inst.translate(cc, (_flags & regex_constants::icase))) return true; return false; case re_detail::syntax_element_end_line: // next character (if there is one!) must be a newline: - if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase)))) + if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regex_constants::icase)))) return true; return false; case re_detail::syntax_element_wild: @@ -512,10 +512,10 @@ bool BOOST_REGEX_CALL reg_expression::probe_start( return true; case re_detail::syntax_element_within_word: case re_detail::syntax_element_word_start: - return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word); + return traits_inst.is_class(traits_inst.translate(cc, (_flags & regex_constants::icase)), traits_type::char_class_word); case re_detail::syntax_element_word_end: // what follows must not be a word character, - return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word) ? false : true; + return traits_inst.is_class(traits_inst.translate(cc, (_flags & regex_constants::icase)), traits_type::char_class_word) ? false : true; case re_detail::syntax_element_buffer_end: // we can be null, nothing must follow, // NB we assume that this is followed by @@ -527,7 +527,7 @@ bool BOOST_REGEX_CALL reg_expression::probe_start( // NB we assume that this is followed by // re_detail::syntax_element_match, if its not then we can // never match anything anyway!! - return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase))); + return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regex_constants::icase))); case re_detail::syntax_element_backref: // there's no easy way to determine this // which is not to say it can't be done! @@ -540,7 +540,7 @@ bool BOOST_REGEX_CALL reg_expression::probe_start( return re_detail::re_maybe_set_member(cc, static_cast(node), *this) || (re_detail::re_is_set_member(static_cast(&cc), static_cast(&cc+1), static_cast(node), *this) != &cc); case re_detail::syntax_element_set: // set all the elements that are set in corresponding set: - c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regbase::icase)); + c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regex_constants::icase)); return static_cast(node)->_map[c] != 0; case re_detail::syntax_element_jump: if(static_cast(node)->alt.p < node) @@ -583,7 +583,7 @@ bool BOOST_REGEX_CALL reg_expression::probe_start( else return probe_start(node->next.p, cc, static_cast(node)->alt.p); case re_detail::syntax_element_combining: - return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regbase::icase))); + return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regex_constants::icase))); } return false; } @@ -777,7 +777,7 @@ re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression_map[(traits_uchar_type)traits_inst.translate((charT)i, (_flags & regbase::icase))] = re_detail::mask_all; + dat->_map[(traits_uchar_type)traits_inst.translate((charT)i, (_flags & regex_constants::icase))] = re_detail::mask_all; } } @@ -1317,7 +1317,7 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr #endif #ifdef __OpenBSD__ // strxfrm not working on OpenBSD?? - f |= regbase::nocollate; + f &= ~regex_constants::collate; #endif if(p == expression()) @@ -1357,11 +1357,11 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr ++marks; dat = 0; - if(_flags & regbase::literal) + if(_flags & regex_constants::literal) { while(ptr != end) { - dat = add_literal(dat, traits_inst.translate(*ptr, (_flags & regbase::icase))); + dat = add_literal(dat, traits_inst.translate(*ptr, (_flags & regex_constants::icase))); ++ptr; } } @@ -1384,8 +1384,16 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr open_bracked_jump: // extend: dat = add_simple(dat, re_detail::syntax_element_startmark, sizeof(re_detail::re_brace)); - markid.push(marks); - static_cast(dat)->index = marks++; + if(_flags & nosubs) + { + markid.push(0); + static_cast(dat)->index = 0; + } + else + { + markid.push(marks); + static_cast(dat)->index = marks++; + } mark.push(data.index(dat)); ++ptr; // @@ -1811,7 +1819,7 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr dat = compile_set(ptr, end); if(dat == 0) { - if((_flags & regbase::failbit) == 0) + if((_flags & regex_constants::failbit) == 0) fail(REG_EBRACK); return error_code(); } @@ -1963,7 +1971,7 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr fixup_apply(static_cast(data.data()), marks); // check for error during fixup: - if(_flags & regbase::failbit) + if(_flags & regex_constants::failbit) return error_code(); // @@ -1985,7 +1993,7 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr { charT* p1 = reinterpret_cast(reinterpret_cast(sbase) + sizeof(re_detail::re_literal)); charT* p2 = p1 + static_cast(sbase)->length; - pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator(_flags®base::icase, &traits_inst), data.allocator()); + pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator(_flags®ex_constants::icase, &traits_inst), data.allocator()); } } return error_code(); @@ -2022,7 +2030,7 @@ re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression(dat) - reinterpret_cast(data.data()); - *reinterpret_cast(data.extend(sizeof(charT))) = traits_inst.translate(c, (_flags & regbase::icase)); + *reinterpret_cast(data.extend(sizeof(charT))) = traits_inst.translate(c, (_flags & regex_constants::icase)); dat = reinterpret_cast(reinterpret_cast(data.data()) + pos); ++(static_cast(dat)->length); } @@ -2031,7 +2039,7 @@ re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression(dat)->length = 1; - *reinterpret_cast(reinterpret_cast(dat)+1) = traits_inst.translate(c, (_flags & regbase::icase)); + *reinterpret_cast(reinterpret_cast(dat)+1) = traits_inst.translate(c, (_flags & regex_constants::icase)); } return dat; } @@ -2082,7 +2090,7 @@ unsigned int BOOST_REGEX_CALL reg_expression::fixup_le leading_lit = false; const charT* p1 = _leading_string; const charT* p2 = _leading_string + _leading_string_len; - pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator(_flags®base::icase, &traits_inst), data.allocator()); + pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator(_flags®ex_constants::icase, &traits_inst), data.allocator()); } leading_lit = false; break; @@ -2140,16 +2148,16 @@ void BOOST_REGEX_CALL reg_expression::fail(unsigned in error_code_ = err; if(err) { - _flags |= regbase::failbit; + _flags |= regex_constants::failbit; #ifndef BOOST_NO_EXCEPTIONS - if(_flags & regbase::use_except) + if(_flags & regex_constants::use_except) { re_detail::raise_error(traits_inst, err); } #endif } else - _flags &= ~regbase::failbit; + _flags &= ~regex_constants::failbit; } diff --git a/include/boost/regex/v4/regex_format.hpp b/include/boost/regex/v4/regex_format.hpp index cafc9656..697a8ad5 100644 --- a/include/boost/regex/v4/regex_format.hpp +++ b/include/boost/regex/v4/regex_format.hpp @@ -337,7 +337,7 @@ expand_sub: continue; } case traits_type::syntax_open_bracket: - if(flags & (format_sed|format_perl)) + if(0 == (flags & format_all)) { *out = *fmt; ++out; @@ -351,7 +351,7 @@ expand_sub: continue; } case traits_type::syntax_close_bracket: - if(flags & (format_sed|format_perl)) + if(0 == (flags & format_all)) { *out = *fmt; ++out; @@ -375,7 +375,7 @@ expand_sub: continue; case traits_type::syntax_question: { - if(flags & (format_sed|format_perl)) + if(0 == (flags & format_all)) { *out = *fmt; ++out; diff --git a/include/boost/regex/v4/regex_grep.hpp b/include/boost/regex/v4/regex_grep.hpp index 2e48040d..a1b382dc 100644 --- a/include/boost/regex/v4/regex_grep.hpp +++ b/include/boost/regex/v4/regex_grep.hpp @@ -42,7 +42,7 @@ inline unsigned int regex_grep(Predicate foo, const reg_expression& e, match_flag_type flags = match_default) { - if(e.flags() & regbase::failbit) + if(e.flags() & regex_constants::failbit) return false; typedef detail::rebind_allocator, Allocator> binder; typedef typename binder::type match_allocator_type; diff --git a/include/boost/regex/v4/regex_merge.hpp b/include/boost/regex/v4/regex_merge.hpp index 7fca3b4e..f2286533 100644 --- a/include/boost/regex/v4/regex_merge.hpp +++ b/include/boost/regex/v4/regex_merge.hpp @@ -33,17 +33,14 @@ namespace boost{ #endif template -OutputIterator regex_merge(OutputIterator out, +inline OutputIterator regex_merge(OutputIterator out, Iterator first, Iterator last, const reg_expression& e, const charT* fmt, match_flag_type flags = match_default) { - Iterator l = first; - re_detail::merge_out_predicate oi(out, l, fmt, flags, e.get_traits()); - regex_grep(oi, first, last, e, flags); - return (flags & format_no_copy) ? out : re_detail::re_copy_out(out, l, last); + return regex_replace(out, first, last, e, fmt, flags); } template @@ -58,27 +55,21 @@ inline OutputIterator regex_merge(OutputIterator out, } template -std::basic_string regex_merge(const std::basic_string& s, +inline std::basic_string regex_merge(const std::basic_string& s, const reg_expression& e, const charT* fmt, match_flag_type flags = match_default) { - std::basic_string result; - re_detail::string_out_iterator > i(result); - regex_merge(i, s.begin(), s.end(), e, fmt, flags); - return result; + return regex_replace(s, e, fmt, flags); } template -std::basic_string regex_merge(const std::basic_string& s, +inline std::basic_string regex_merge(const std::basic_string& s, const reg_expression& e, const std::basic_string& fmt, match_flag_type flags = match_default) { - std::basic_string result; - re_detail::string_out_iterator > i(result); - regex_merge(i, s.begin(), s.end(), e, fmt.c_str(), flags); - return result; + return regex_replace(s, e, fmt, flags); } #ifdef __BORLANDC__ diff --git a/include/boost/regex/v4/regex_replace.hpp b/include/boost/regex/v4/regex_replace.hpp new file mode 100644 index 00000000..37383da2 --- /dev/null +++ b/include/boost/regex/v4/regex_replace.hpp @@ -0,0 +1,91 @@ +/* + * + * Copyright (c) 1998-2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V4_REGEX_REPLACE_HPP +#define BOOST_REGEX_V4_REGEX_REPLACE_HPP + + +namespace boost{ + +#ifdef __BORLANDC__ + #pragma option push -a8 -b -Vx -Ve -pc -w-8037 +#endif + +template +OutputIterator regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const reg_expression& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + Iterator l = first; + re_detail::merge_out_predicate oi(out, l, fmt, flags, e.get_traits()); + regex_grep(oi, first, last, e, flags); + return (flags & format_no_copy) ? out : re_detail::re_copy_out(out, l, last); +} + +template +inline OutputIterator regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const reg_expression& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return regex_replace(out, first, last, e, fmt.c_str(), flags); +} + +template +std::basic_string regex_replace(const std::basic_string& s, + const reg_expression& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + re_detail::string_out_iterator > i(result); + regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +template +std::basic_string regex_replace(const std::basic_string& s, + const reg_expression& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + re_detail::string_out_iterator > i(result); + regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); + return result; +} + +#ifdef __BORLANDC__ + #pragma option pop +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_REPLACE_HPP + diff --git a/include/boost/regex/v4/regex_search.hpp b/include/boost/regex/v4/regex_search.hpp index 01b27d5e..15100804 100644 --- a/include/boost/regex/v4/regex_search.hpp +++ b/include/boost/regex/v4/regex_search.hpp @@ -36,7 +36,7 @@ bool regex_search(BidiIterator first, BidiIterator last, const reg_expression& e, match_flag_type flags = match_default) { - if(e.flags() & regbase::failbit) + if(e.flags() & regex_constants::failbit) return false; re_detail::perl_matcher matcher(first, last, m, e, flags); @@ -103,6 +103,89 @@ inline bool regex_search(const std::basic_string& s, #endif +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING +template +bool regex_search(BidiIterator first, BidiIterator last, + const reg_expression& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + match_results m; + typedef typename match_results::allocator_type match_alloc_type; + re_detail::perl_matcher matcher(first, last, m, e, flags); + return matcher.find(); +} + +template +inline bool regex_search(const charT* str, + const reg_expression& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), e, flags); +} + +template +inline bool regex_search(const std::basic_string& s, + const reg_expression& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), e, flags); +} +#else // non-template function overloads +inline bool regex_search(const char* first, const char* last, + const regex& e, + match_flag_type flags = match_default) +{ + cmatch m; + return regex_search(first, last, m, e, flags); +} + +#ifndef BOOST_NO_WREGEX +inline bool regex_search(const wchar_t* first, const wchar_t* last, + const wregex& e, + match_flag_type flags = match_default) +{ + wcmatch m; + return regex_search(first, last, m, e, flags); +} +#endif +inline bool regex_search(const char* str, + const regex& e, + match_flag_type flags = match_default) +{ + cmatch m; + return regex_search(str, str + regex::traits_type::length(str), m, e, flags); +} +#ifndef BOOST_NO_WREGEX +inline bool regex_search(const wchar_t* str, + const wregex& e, + match_flag_type flags = match_default) +{ + wcmatch m; + return regex_search(str, str + wregex::traits_type::length(str), m, e, flags); +} +#endif +inline bool regex_search(const std::string& s, + const regex& e, + match_flag_type flags = match_default) +{ + smatch m; + return regex_search(s.begin(), s.end(), m, e, flags); +} +#if !defined(BOOST_NO_WREGEX) +inline bool regex_search(const std::basic_string& s, + const wregex& e, + match_flag_type flags = match_default) +{ + wsmatch m; + return regex_search(s.begin(), s.end(), m, e, flags); +} +#endif + +#endif + #ifdef __BORLANDC__ #pragma option pop #endif diff --git a/include/boost/regex/v4/sub_match.hpp b/include/boost/regex/v4/sub_match.hpp index 6b07a98a..b74eb224 100644 --- a/include/boost/regex/v4/sub_match.hpp +++ b/include/boost/regex/v4/sub_match.hpp @@ -205,6 +205,57 @@ bool operator >= (typename re_detail::regex_iterator_traits& m) { return m.str().compare(s) <= 0; } +// comparison to const charT& part 1: +template +bool operator == (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template +bool operator != (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template +bool operator > (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template +bool operator < (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template +bool operator >= (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template +bool operator <= (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } +// comparison to const charT* part 2: +template +bool operator == (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template +bool operator != (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template +bool operator < (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template +bool operator > (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template +bool operator <= (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template +bool operator >= (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } + #ifndef BOOST_NO_STD_LOCALE template std::basic_ostream& diff --git a/index.htm b/index.htm deleted file mode 100644 index f313dd7c..00000000 --- a/index.htm +++ /dev/null @@ -1,150 +0,0 @@ - - - - - - - -regex++, Index - - - - -

 

- - - - - - -

C++ Boost

-

Regex++, Index.

-

(Version 3.31, 16th Dec 2001)  -

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

Contents

- - - -
- -

Copyright Dr -John Maddock 1998-2001 all rights reserved.

- - diff --git a/introduction.htm b/introduction.htm deleted file mode 100644 index bcac99bb..00000000 --- a/introduction.htm +++ /dev/null @@ -1,476 +0,0 @@ - - - - - - - -regex++, Introduction - - - - -

 

- - - - - - -

C++ Boost

-

Regex++, Introduction.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

Introduction

- -

Regular expressions are a form of pattern-matching that are -often used in text processing; many users will be familiar with -the Unix utilities grep, sed and awk, and -the programming language perl, each of which make -extensive use of regular expressions. Traditionally C++ users -have been limited to the POSIX C API's for manipulating regular -expressions, and while regex++ does provide these API's, they do -not represent the best way to use the library. For example regex++ -can cope with wide character strings, or search and replace -operations (in a manner analogous to either sed or perl), -something that traditional C libraries can not do.

- -

The class boost::reg_expression -is the key class in this library; it represents a "machine -readable" regular expression, and is very closely modelled -on std::basic_string, think of it as a string plus the actual -state-machine required by the regular expression algorithms. Like -std::basic_string there are two typedefs that are almost always -the means by which this class is referenced:

- -
namespace boost{
-
-template <class charT, 
-          class traits = regex_traits<charT>, 
-          class Allocator = std::allocator<charT> >
-class reg_expression;
-
-typedef reg_expression<char> regex;
-typedef reg_expression<wchar_t> wregex;
-
-}
- -

To see how this library can be used, imagine that we are -writing a credit card processing application. Credit card numbers -generally come as a string of 16-digits, separated into groups of -4-digits, and separated by either a space or a hyphen. Before -storing a credit card number in a database (not necessarily -something your customers will appreciate!), we may want to verify -that the number is in the correct format. To match any digit we -could use the regular expression [0-9], however ranges of -characters like this are actually locale dependent. Instead we -should use the POSIX standard form [[:digit:]], or the regex++ -and perl shorthand for this \d (note that many older libraries -tended to be hard-coded to the C-locale, consequently this was -not an issue for them). That leaves us with the following regular -expression to validate credit card number formats:

- -

(\d{4}[- ]){3}\d{4}

- -

Here the parenthesis act to group (and mark for future -reference) sub-expressions, and the {4} means "repeat -exactly 4 times". This is an example of the extended regular -expression syntax used by perl, awk and egrep. Regex++ also -supports the older "basic" syntax used by sed and grep, -but this is generally less useful, unless you already have some -basic regular expressions that you need to reuse.

- -

Now lets take that expression and place it in some C++ code to -validate the format of a credit card number:

- -
bool validate_card_format(const std::string s)
-{
-   static const boost::regex e("(\\d{4}[- ]){3}\\d{4}");
-   return regex_match(s, e);
-}
- -

Note how we had to add some extra escapes to the expression: -remember that the escape is seen once by the C++ compiler, before -it gets to be seen by the regular expression engine, consequently -escapes in regular expressions have to be doubled up when -embedding them in C/C++ code. Also note that all the examples -assume that your compiler supports Koenig lookup, if yours -doesn't (for example VC6), then you will have to add some boost:: -prefixes to some of the function calls in the examples.

- -

Those of you who are familiar with credit card processing, -will have realised that while the format used above is suitable -for human readable card numbers, it does not represent the format -required by online credit card systems; these require the number -as a string of 16 (or possibly 15) digits, without any -intervening spaces. What we need is a means to convert easily -between the two formats, and this is where search and replace -comes in. Those who are familiar with the utilities sed -and perl will already be ahead here; we need two strings - -one a regular expression - the other a "format string" that provides a -description of the text to replace the match with. In regex++ -this search and replace operation is performed with the algorithm -regex_merge, for our credit card example we can write two -algorithms like this to provide the format conversions:

- -
-// match any format with the regular expression:
-const boost::regex e("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
-const std::string machine_format("\\1\\2\\3\\4");
-const std::string human_format("\\1-\\2-\\3-\\4");
-
-std::string machine_readable_card_number(const std::string s)
-{
-   return regex_merge(s, e, machine_format, boost::match_default | boost::format_sed);
-}
-
-std::string human_readable_card_number(const std::string s)
-{
-   return regex_merge(s, e, human_format, boost::match_default | boost::format_sed);
-}
- -

Here we've used marked sub-expressions in the regular -expression to split out the four parts of the card number as -separate fields, the format string then uses the sed-like syntax -to replace the matched text with the reformatted version.

- -

In the examples above, we haven't directly manipulated the -results of a regular expression match, however in general the -result of a match contains a number of sub-expression matches in -addition to the overall match. When the library needs to report a -regular expression match it does so using an instance of the -class match_results, -as before there are typedefs of this class for the most common -cases:

- -
namespace boost{
-typedef match_results<const char*> cmatch;
-typedef match_results<const wchar_t*> wcmatch;
-typedef match_results<std::string::const_iterator> smatch;
-typedef match_results<std::wstring::const_iterator> wsmatch; 
-}
- -

The algorithms regex_search -and regex_grep (i.e. -finding all matches in a string) make use of match_results to -report what matched.

- -

Note that these algorithms are not restricted to searching -regular C-strings, any bidirectional iterator type can be -searched, allowing for the possibility of seamlessly searching -almost any kind of data.

- -

For search and replace operations in addition to the algorithm -regex_merge that -we have already seen, the algorithm regex_format takes -the result of a match and a format string, and produces a new -string by merging the two.

- -

For those that dislike templates, there is a high level -wrapper class RegEx that is an encapsulation of the lower level -template code - it provides a simplified interface for those that -don't need the full power of the library, and supports only -narrow characters, and the "extended" regular -expression syntax.

- -

The POSIX API functions: -regcomp, regexec, regfree and regerror, are available in both -narrow character and Unicode versions, and are provided for those -who need compatibility with these API's.

- -

Finally, note that the library now has run-time localization support, and -recognizes the full POSIX regular expression syntax - including -advanced features like multi-character collating elements and -equivalence classes - as well as providing compatibility with -other regular expression libraries including GNU and BSD4 regex -packages, and to a more limited extent perl 5.

- -

Installation and Configuration -Options

- -

[ Important: If you are -upgrading from the 2.x version of this library then you will find -a number of changes to the documented header names and library -interfaces, existing code should still compile unchanged however -- see Note -for Upgraders. ]

- -

When you extract the library from its zip file, you must -preserve its internal directory structure (for example by using -the -d option when extracting). If you didn't do that when -extracting, then you'd better stop reading this, delete the files -you just extracted, and try again!

- -

This library should not need configuring before use; most -popular compilers/standard libraries/platforms are already -supported "as is". If you do experience configuration -problems, or just want to test the configuration with your -compiler, then the process is the same as for all of boost; see -the configuration library -documentation.

- -

The library will encase all code inside namespace boost.

- -

Unlike some other template libraries, this library consists of -a mixture of template code (in the headers) and static code and -data (in cpp files). Consequently it is necessary to build the -library's support code into a library or archive file before you -can use it, instructions for specific platforms are as follows:

- -

Borland C++ Builder:

- -
    -
  • Open up a console window and change to the - <boost>\libs\regex\build directory.
  • -
  • Select the appropriate makefile (bcb4.mak for C++ Builder - 4, bcb5.mak for C++ Builder 5, and bcb6.mak for C++ - Builder 6).
  • -
  • Invoke the makefile (pass the full path to your version - of make if you have more than one version installed, the - makefile relies on the path to make to obtain your C++ - Builder installation directory and tools) for example:
  • -
- -
make -fbcb5.mak
- -

The build process will build a variety of .lib and .dll files -(the exact number depends upon the version of Borland's tools you -are using) the .lib and dll files will be in a sub-directory -called bcb4 or bcb5 depending upon the makefile used. To install -the libraries into your development system use:

- -

make -fbcb5.mak install

- -

library files will be copied to <BCROOT>/lib and the -dll's to <BCROOT>/bin, where <BCROOT> corresponds to -the install path of your Borland C++ tools.

- -

You may also remove temporary files created during the build -process (excluding lib and dll files) by using:

- -

make -fbcb5.mak clean

- -

Finally when you use regex++ it is only necessary for you to -add the <boost> root director to your list of include -directories for that project. It is not necessary for you to -manually add a .lib file to the project; the headers will -automatically select the correct .lib file for your build mode -and tell the linker to include it. There is one caveat however: -the library can not tell the difference between VCL and non-VCL -enabled builds when building a GUI application from the command -line, if you build from the command line with the 5.5 command -line tools then you must define the pre-processor symbol _NO_VCL -in order to ensure that the correct link libraries are selected: -the C++ Builder IDE normally sets this automatically. Hint, users -of the 5.5 command line tools may want to add a -D_NO_VCL to bcc32.cfg -in order to set this option permanently.

- -

If you would prefer to do a static link to the regex libraries -even when using the dll runtime then define -BOOST_REGEX_STATIC_LINK, and if you want to suppress automatic -linking altogether (and supply your own custom build of the lib) -then define BOOST_REGEX_NO_LIB.

- -

If you are building with C++ Builder 6, you will find that -<boost/regex.hpp> can not be used in a pre-compiled header -(the actual problem is in <locale> which gets included by -<boost/regex.hpp>), if this causes problems for you, then -try defining BOOST_NO_STD_LOCALE when building, this will disable -some features throughout boost, but may save you a lot in compile -times!

- -

Microsoft Visual C++ 6 and 7

- -

You need version 6 of MSVC to build this library. If you are -using VC5 then you may want to look at one of the previous -releases of this library -

- -

Open up a command prompt, which has the necessary MSVC -environment variables defined (for example by using the batch -file Vcvars32.bat installed by the Visual Studio installation), -and change to the <boost>\libs\regex\build directory.

- -

Select the correct makefile - vc6.mak for "vanilla" -Visual C++ 6 or vc6-stlport.mak if you are using STLPort.

- -

Invoke the makefile like this:

- -

nmake -fvc6.mak

- -

You will now have a collection of lib and dll files in a -"vc6" subdirectory, to install these into your -development system use:

- -

nmake -fvc6.mak install

- -

The lib files will be copied to your <VC6>\lib directory -and the dll files to <VC6>\bin, where <VC6> is the -root of your Visual C++ 6 installation.

- -

You can delete all the temporary files created during the -build (excluding lib and dll files) using:

- -

nmake -fvc6.mak clean

- -

Finally when you use regex++ it is only necessary for you to -add the <boost> root directory to your list of include -directories for that project. It is not necessary for you to -manually add a .lib file to the project; the headers will -automatically select the correct .lib file for your build mode -and tell the linker to include it.

- -

Note that if you want to statically link to the regex library -when using the dynamic C++ runtime, define -BOOST_REGEX_STATIC_LINK when building your project (this only has -an effect for release builds). If you want to add the source -directly to your project then define BOOST_REGEX_NO_LIB to -disable automatic library selection.

- -

Important: there have been some -reports of compiler-optimisation bugs affecting this library, (particularly -with VC6 versions prior to service patch 5) the workaround is to -build the library using /Oityb1 rather than /O2. That is to use -all optimisation settings except /Oa. This problem is reported to -affect some standard library code as well (in fact I'm not sure -if the problem is with the regex code or the underlying standard -library), so it's probably worthwhile applying this workaround in -normal practice in any case.

- -

Note: if you have replaced the C++ standard library that comes -with VC6, then when you build the library you must ensure that -the environment variables "INCLUDE" and "LIB" -have been updated to reflect the include and library paths for -the new library - see vcvars32.bat (part of your Visual Studio -installation) for more details. Alternatively if STLPort is in c:/stlport -then you could use:

- -

nmake INCLUDES="-Ic:/stlport/stlport" XLFLAGS="/LIBPATH:c:/stlport/lib" --fvc6-stlport.mak

- -

If you are building with the full STLPort v4.x, then use the -vc6-stlport.mak file provided and set the environment variable -STLPORT_PATH to point to the location of your STLport -installation (Note that the full STLPort libraries appear not to -support single-thread static builds).

- -

GCC(2.95)

- -

There is a conservative makefile for the g++ compiler. From -the command prompt change to the <boost>/libs/regex/build -directory and type:

- -

make -fgcc.mak

- -

At the end of the build process you should have a gcc sub-directory -containing release and debug versions of the library (libboost_regex.a -and libboost_regex_debug.a). When you build projects that use -regex++, you will need to add the boost install directory to your -list of include paths and add <boost>/libs/regex/build/gcc/libboost_regex.a -to your list of library files.

- -

There is also a makefile to build the library as a shared -library:

- -

make -fgcc-shared.mak

- -

which will build libboost_regex.so and libboost_regex_debug.so.

- -

Both of the these makefiles support the following environment -variables:

- -

CXXFLAGS: extra compiler options - note that this applies to -both the debug and release builds.

- -

INCLUDES: additional include directories.

- -

LDFLAGS: additional linker options.

- -

LIBS: additional library files.

- -

For the more adventurous there is a configure script in -<boost>/libs/config; see the config -library documentation.

- -

Sun Workshop 6.1

- -

There is a makefile for the sun (6.1) compiler (C++ version 3.12). -From the command prompt change to the <boost>/libs/regex/build -directory and type:

- -

dmake -f sunpro.mak

- -

At the end of the build process you should have a sunpro sub-directory -containing single and multithread versions of the library (libboost_regex.a, -libboost_regex.so, libboost_regex_mt.a and libboost_regex_mt.so). -When you build projects that use regex++, you will need to add -the boost install directory to your list of include paths and add -<boost>/libs/regex/build/sunpro/ to your library search -path.

- -

Both of the these makefiles support the following environment -variables:

- -

CXXFLAGS: extra compiler options - note that this applies to -both the single and multithreaded builds.

- -

INCLUDES: additional include directories.

- -

LDFLAGS: additional linker options.

- -

LIBS: additional library files.

- -

LIBSUFFIX: a suffix to mangle the library name with (defaults -to nothing).

- -

This makefile does not set any architecture specific options -like -xarch=v9, you can set these by defining the appropriate -macros, for example:

- -

dmake CXXFLAGS="-xarch=v9" LDFLAGS="-xarch=v9" -LIBSUFFIX="_v9" -f sunpro.mak

- -

will build v9 variants of the regex library named -libboost_regex_v9.a etc.

- -

Other compilers:

- -

There is a generic makefile (generic.mak) -provided in <boost-root>/libs/regex/build - see that -makefile for details of environment variables that need to be set -before use. Alternatively you can using the Jam based build system. -If you need to configure the library for your platform, then -refer to the config library -documentation.

- -
- -

Copyright Dr -John Maddock 1998-2001 all rights reserved.

- - diff --git a/performance/Jamfile b/performance/Jamfile new file mode 100644 index 00000000..d3a58ee6 --- /dev/null +++ b/performance/Jamfile @@ -0,0 +1,43 @@ + +subproject libs/regex/performance ; + +SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_posix time_safe_greta ; + +if $(HS_REGEX_PATH) +{ + HS_SOURCES = $(HS_REGEX_PATH)/regcomp.c $(HS_REGEX_PATH)/regerror.c $(HS_REGEX_PATH)/regexec.c $(HS_REGEX_PATH)/regfree.c ; + POSIX_OPTS = BOOST_HAS_POSIX=1 $(HS_REGEX_PATH) ; +} +else if $(USE_POSIX) +{ + POSIX_OPTS = BOOST_HAS_POSIX=1 ; +} + +if $(PCRE_PATH) +{ + PCRE_SOURCES = $(PCRE_PATH)/chartables.c $(PCRE_PATH)/get.c $(PCRE_PATH)/pcre.c $(PCRE_PATH)/study.c ; + PCRE_OPTS = BOOST_HAS_PCRE=1 $(PCRE_PATH) ; +} +else if $(USE_PCRE) +{ + PCRE_OPTS = BOOST_HAS_PCRE=1 pcre ; +} + + +exe regex_comparison : + $(SOURCES).cpp + $(HS_SOURCES) + $(PCRE_SOURCES) + ../build/boost_regex + ../../test/build/boost_prg_exec_monitor + : + $(BOOST_ROOT) + BOOST_REGEX_NO_LIB=1 + BOOST_REGEX_STATIC_LINK=1 + $(POSIX_OPTS) + $(PCRE_OPTS) + ; + + + + diff --git a/performance/command_line.cpp b/performance/command_line.cpp index 6500a895..7e502154 100644 --- a/performance/command_line.cpp +++ b/performance/command_line.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include "regex_comparison.hpp" // @@ -190,6 +192,13 @@ void print_result(std::ostream& os, double time, double best) os << ""; } +std::string html_quote(const std::string& in) +{ + static const boost::regex e("(<)|(>)|(&)|(\")"); + static const std::string format("(?1<)(?2>)(?3&)(?4")"); + return regex_replace(in, e, format); +} + void output_html_results(bool show_description, const std::string& tagname) { std::stringstream os; @@ -230,9 +239,9 @@ void output_html_results(bool show_description, const std::string& tagname) last = result_list.end(); while(first != last) { - os << "" << first->expression << ""; + os << "" << html_quote(first->expression) << ""; if(show_description) - os << "" << first->description << ""; + os << "" << html_quote(first->description) << ""; #if defined(BOOST_HAS_GRETA) if(time_greta == true) print_result(os, first->greta_time, first->factor); @@ -264,7 +273,7 @@ void output_html_results(bool show_description, const std::string& tagname) std::string result = os.str(); - unsigned int pos = html_contents.find(tagname); + std::string::size_type pos = html_contents.find(tagname); if(pos != std::string::npos) { html_contents.replace(pos, tagname.size(), result); @@ -275,6 +284,42 @@ void output_final_html() { if(html_out_file.size()) { + // + // start with search and replace ops: + // + std::string::size_type pos; + pos = html_contents.find("%compiler%"); + if(pos != std::string::npos) + { + html_contents.replace(pos, 10, BOOST_COMPILER); + } + pos = html_contents.find("%library%"); + if(pos != std::string::npos) + { + html_contents.replace(pos, 9, BOOST_STDLIB); + } + pos = html_contents.find("%os%"); + if(pos != std::string::npos) + { + html_contents.replace(pos, 4, BOOST_PLATFORM); + } + pos = html_contents.find("%boost%"); + if(pos != std::string::npos) + { + html_contents.replace(pos, 7, BOOST_STRINGIZE(BOOST_VERSION)); + } + pos = html_contents.find("%pcre%"); + if(pos != std::string::npos) + { +#ifdef PCRE_MINOR + html_contents.replace(pos, 6, BOOST_STRINGIZE(PCRE_MAJOR.PCRE_MINOR)); +#else + html_contents.replace(pos, 6, "N/A"); +#endif + } + // + // now right the output to file: + // std::ofstream os(html_out_file.c_str()); os << html_contents; } diff --git a/performance/input.html b/performance/input.html index b218c211..f44bcfce 100644 --- a/performance/input.html +++ b/performance/input.html @@ -1,59 +1,64 @@ - - Regular Expression Performance Comparison - - - - - - -

Regular Expression Performance Comparison

-

The Boost and GRETA regular expression libraries have slightly different - interfaces, and it has been suggested that GRETA's interface allows for a more - efficient implementation. The following tables provide comparisons between:

-

GRETA.

-

The Boost regex library.

-

Henry Spencer's regular expression library - - this is provided for comparison as a typical non-backtracking implementation.

-

- Times were obtained on a 2.8GHz Pentium 4 PC running Windows XP, and the - code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever - care should be taken in interpreting the results, only sensible regular - expressions (rather than pathological cases) are given, most are taken from the - Boost regex examples, or from the Library of - Regular Expressions. In addition, some variation in the relative - performance of these libraries can be expected on other machines - as memory - access and processor caching effects can be quite large for most finite state - machine algorithms.

-

Comparison 1: Long Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within a long English language text was measured - (mtent12.txt - from Project Gutenberg, 19Mb). 

-

%long_twain_search%

-

Comparison 2: Medium Sized Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within a medium sized English language text was - measured (the first 50K from mtent12.txt). 

-

%short_twain_search%

-

Comparison 3: C++ Code Search

-

For each of the following regular expressions the time taken to find all - occurrences of the expression within the C++ source file - boost/crc.hpp was measured. 

-

%code_search%

-

-

Comparison 4: HTML Document Search

- -

For each of the following regular expressions the time taken to find all - occurrences of the expression within the html file libs/libraries.htm - was measured. 

-

%html_search%

-

Comparison 3: Simple Matches

-

- For each of the following regular expressions the time taken to match against - the text indicated was measured. 

-

%short_matches%

-
-

Copyright John Maddock April 2003, all rights reserved.

- + + Regular Expression Performance Comparison + + + + + + +

Regular Expression Performance Comparison

+

The Boost and GRETA regular expression libraries have slightly different + interfaces, and it has been suggested that GRETA's interface allows for a more + efficient implementation. The following tables provide comparisons between:

+

GRETA.

+

The Boost regex library.

+

Henry Spencer's regular expression library + - this is provided for comparison as a typical non-backtracking implementation.

+

Details

+

Machine: Intel Pentium 4 2.8GHz PC.

+

Compiler: %compiler%.

+

C++ Standard Library: %library%.

+

OS: %os%.

+

Boost version: %boost%.

+

PCRE version: %pcre%.

+

+ As ever care should be taken in interpreting the results, only sensible regular + expressions (rather than pathological cases) are given, most are taken from the + Boost regex examples, or from the Library of + Regular Expressions. In addition, some variation in the relative + performance of these libraries can be expected on other machines - as memory + access and processor caching effects can be quite large for most finite state + machine algorithms.

+

Comparison 1: Long Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within a long English language text was measured + (mtent12.txt + from Project Gutenberg, 19Mb). 

+

%long_twain_search%

+

Comparison 2: Medium Sized Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within a medium sized English language text was + measured (the first 50K from mtent12.txt). 

+

%short_twain_search%

+

Comparison 3: C++ Code Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within the C++ source file + boost/crc.hpp was measured. 

+

%code_search%

+

+

Comparison 4: HTML Document Search

+ +

For each of the following regular expressions the time taken to find all + occurrences of the expression within the html file libs/libraries.htm + was measured. 

+

%html_search%

+

Comparison 3: Simple Matches

+

+ For each of the following regular expressions the time taken to match against + the text indicated was measured. 

+

%short_matches%

+
+

Copyright John Maddock April 2003, all rights reserved.

+ diff --git a/performance/main.cpp b/performance/main.cpp index 0b6db1b7..96ecbaf8 100644 --- a/performance/main.cpp +++ b/performance/main.cpp @@ -232,7 +232,7 @@ int cpp_main(int argc, char * argv[]) if(test_long_twain) { - load_file(file_contents, "mtent12.txt"); + load_file(file_contents, "mtent13.txt"); test_find_all("Twain", file_contents); test_find_all("Huck[[:alpha:]]+", file_contents); diff --git a/performance/time_boost.cpp b/performance/time_boost.cpp index 2bb0b58b..9dc3e791 100644 --- a/performance/time_boost.cpp +++ b/performance/time_boost.cpp @@ -21,7 +21,7 @@ namespace b{ double time_match(const std::string& re, const std::string& text, bool icase) { - boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl)); + boost::regex e(re, (icase ? boost::regex::perl | boost::regex::icase : boost::regex::perl)); boost::smatch what; boost::timer tim; int iter = 1; @@ -59,7 +59,7 @@ bool dummy_grep_proc(const boost::smatch&) double time_find_all(const std::string& re, const std::string& text, bool icase) { - boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl)); + boost::regex e(re, (icase ? boost::regex::perl | boost::regex::icase : boost::regex::perl)); boost::smatch what; boost::timer tim; int iter = 1; diff --git a/performance/time_localised_boost.cpp b/performance/time_localised_boost.cpp index 34b67424..d1aeac89 100644 --- a/performance/time_localised_boost.cpp +++ b/performance/time_localised_boost.cpp @@ -21,7 +21,7 @@ namespace bl{ double time_match(const std::string& re, const std::string& text, bool icase) { - boost::reg_expression > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl)); + boost::reg_expression > e(re, (icase ? boost::regex::perl | boost::regex::icase : boost::regex::perl)); boost::smatch what; boost::timer tim; int iter = 1; @@ -59,7 +59,7 @@ bool dummy_grep_proc(const boost::smatch&) double time_find_all(const std::string& re, const std::string& text, bool icase) { - boost::reg_expression > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl)); + boost::reg_expression > e(re, (icase ? boost::regex::perl | boost::regex::icase : boost::regex::perl)); boost::smatch what; boost::timer tim; int iter = 1; diff --git a/posix_ref.htm b/posix_ref.htm deleted file mode 100644 index ffe2e677..00000000 --- a/posix_ref.htm +++ /dev/null @@ -1,314 +0,0 @@ - - - - - - -Regex++, POSIX API Reference - - - - -

 

- - - - - - -

C++ Boost

-

Regex++, POSIX API - Reference.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

POSIX compatibility library

- -
#include <boost/cregex.hpp>
-or:
-#include <boost/regex.h>
- -

The following functions are available for users who need a -POSIX compatible C library, they are available in both Unicode -and narrow character versions, the standard POSIX API names are -macros that expand to one version or the other depending upon -whether UNICODE is defined or not.

- -

Important: Note that all the symbols defined here are -enclosed inside namespace boost when used in C++ programs, -unless you use #include <boost/regex.h> instead - in which -case the symbols are still defined in namespace boost, but are -made available in the global namespace as well.

- -

The functions are defined as:

- -
extern "C" {
-int regcompA(regex_tA*, const char*, int);
-unsigned int regerrorA(int, const regex_tA*, char*, unsigned int);
-int regexecA(const regex_tA*, const char*, unsigned int, regmatch_t*, int);
-void regfreeA(regex_tA*);
-
-int regcompW(regex_tW*, const wchar_t*, int);
-unsigned int regerrorW(int, const regex_tW*, wchar_t*, unsigned int);
-int regexecW(const regex_tW*, const wchar_t*, unsigned int, regmatch_t*, int);
-void regfreeW(regex_tW*);
-
-#ifdef UNICODE
-#define regcomp regcompW
-#define regerror regerrorW
-#define regexec regexecW
-#define regfree regfreeW
-#define regex_t regex_tW
-#else
-#define regcomp regcompA
-#define regerror regerrorA
-#define regexec regexecA
-#define regfree regfreeA
-#define regex_t regex_tA
-#endif
-}
- -

All the functions operate on structure regex_t, which -exposes two public members:

- -

unsigned int re_nsub this is filled in by regcomp -and indicates the number of sub-expressions contained in the -regular expression.

- -

const TCHAR* re_endp points to the end of the -expression to compile when the flag REG_PEND is set.

- -

Footnote: regex_t is actually a #define - it is either -regex_tA or regex_tW depending upon whether UNICODE is defined or -not, TCHAR is either char or wchar_t again depending upon the -macro UNICODE.

- -

regcomp takes a pointer to a regex_t, a pointer -to the expression to compile and a flags parameter which can be a -combination of:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 REG_EXTENDEDCompiles modern regular - expressions. Equivalent to regbase::char_classes | - regbase::intervals | regbase::bk_refs. 
 REG_BASICCompiles basic (obsolete) - regular expression syntax. Equivalent to regbase::char_classes - | regbase::intervals | regbase::limited_ops | regbase::bk_braces - | regbase::bk_parens | regbase::bk_refs. 
 REG_NOSPECAll characters are ordinary, - the expression is a literal string. 
 REG_ICASECompiles for matching that - ignores character case. 
 REG_NOSUBHas no effect in this - library. 
 REG_NEWLINEWhen this flag is set a dot - does not match the newline character. 
 REG_PENDWhen this flag is set the - re_endp parameter of the regex_t structure must point to - the end of the regular expression to compile. 
 REG_NOCOLLATEWhen this flag is set then - locale dependent collation for character ranges is turned - off. 
 REG_ESCAPE_IN_LISTS
- , , ,
When this flag is set, then - escape sequences are permitted in bracket expressions (character - sets). 
 REG_NEWLINE_ALT When this flag is set then - the newline character is equivalent to the alternation - operator |. 
 REG_PERL  A shortcut for perl-like - behavior: REG_EXTENDED | REG_NOCOLLATE | - REG_ESCAPE_IN_LISTS 
 REG_AWKA shortcut for awk-like - behavior: REG_EXTENDED | REG_ESCAPE_IN_LISTS 
 REG_GREPA shortcut for grep like - behavior: REG_BASIC | REG_NEWLINE_ALT 
 REG_EGREP A shortcut for egrep - like behavior: REG_EXTENDED | REG_NEWLINE_ALT 
- -


- -

regerror takes the following parameters, it maps an -error code to a human readable string:

- - - - - - - - - - - - - - - - - - - - - - - - - - -
 int codeThe error code. 
 const regex_t* eThe regular expression (can - be null). 
 char* bufThe buffer to fill in with - the error message. 
 unsigned int buf_sizeThe length of buf. 
- -

If the error code is OR'ed with REG_ITOA then the message that -results is the printable name of the code rather than a message, -for example "REG_BADPAT". If the code is REG_ATIO then e -must not be null and e->re_pend must point to the -printable name of an error code, the return value is then the -value of the error code. For any other value of code, the -return value is the number of characters in the error message, if -the return value is greater than or equal to buf_size then -regerror will have to be called again with a larger buffer.

- -

regexec finds the first occurrence of expression e -within string buf. If len is non-zero then *m -is filled in with what matched the regular expression, m[0] -contains what matched the whole string, m[1] the first sub-expression -etc, see regmatch_t in the header file declaration for -more details. The eflags parameter can be a combination of: -

- - - - - - - - - - - - - - - - - - - - -
 REG_NOTBOLParameter buf does - not represent the start of a line. 
 REG_NOTEOLParameter buf does - not terminate at the end of a line. 
 REG_STARTENDThe string searched starts - at buf + pmatch[0].rm_so and ends at buf + pmatch[0].rm_eo. 
- -


- -

Finally regfree frees all the memory that was allocated -by regcomp.

- -

Footnote: this is an abridged reference to the POSIX API -functions, it is provided for compatibility with other libraries, -rather than an API to be used in new code (unless you need access -from a language other than C++). This version of these functions -should also happily coexist with other versions, as the names -used are macros that expand to the actual function names.
-

- -
- -

Copyright Dr -John Maddock 1998-2000 all rights reserved.

- - diff --git a/src/cregex.cpp b/src/cregex.cpp index 7196a7a3..9ccfadcb 100644 --- a/src/cregex.cpp +++ b/src/cregex.cpp @@ -193,7 +193,7 @@ RegEx& RegEx::operator=(const char* p) unsigned int RegEx::SetExpression(const char* p, bool icase) { BOOST_RE_GUARD_STACK - boost::uint_fast32_t f = icase ? regbase::normal | regbase::use_except | regbase::icase : regbase::normal | regbase::use_except; + boost::uint_fast32_t f = icase ? regex::normal | regex::use_except | regex::icase : regex::normal | regex::use_except; return pdata->e.set_expression(p, f); } @@ -459,7 +459,7 @@ std::string RegEx::Merge(const std::string& in, const std::string& fmt, std::string result; re_detail::string_out_iterator i(result); if(!copy) flags |= format_no_copy; - regex_merge(i, in.begin(), in.end(), pdata->e, fmt.c_str(), flags); + regex_replace(i, in.begin(), in.end(), pdata->e, fmt.c_str(), flags); return result; } @@ -469,7 +469,7 @@ std::string RegEx::Merge(const char* in, const char* fmt, std::string result; if(!copy) flags |= format_no_copy; re_detail::string_out_iterator i(result); - regex_merge(i, in, in + std::strlen(in), pdata->e, fmt, flags); + regex_replace(i, in, in + std::strlen(in), pdata->e, fmt, flags); return result; } diff --git a/src/posix_api.cpp b/src/posix_api.cpp index 7844759a..4c2025c2 100644 --- a/src/posix_api.cpp +++ b/src/posix_api.cpp @@ -58,27 +58,30 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA* expression, const char #endif } // set default flags: - boost::uint_fast32_t flags = (f & REG_EXTENDED) ? regbase::extended : regbase::basic; + boost::uint_fast32_t flags = (f & REG_EXTENDED) ? regex::extended : regex::basic; expression->eflags = (f & REG_NEWLINE) ? match_not_dot_newline : match_default; // and translate those that are actually set: if(f & REG_NOCOLLATE) - flags |= regbase::nocollate; + { + flags |= regex::nocollate; + flags &= ~regex::collate; + } if(f & REG_NOSUB) expression->eflags |= match_any; if(f & REG_NOSPEC) - flags |= regbase::literal; + flags |= regex::literal; if(f & REG_ICASE) - flags |= regbase::icase; + flags |= regex::icase; if(f & REG_ESCAPE_IN_LISTS) - flags |= regbase::escape_in_lists; + flags |= regex::escape_in_lists; if(f & REG_NEWLINE_ALT) - flags |= regbase::newline_alt; + flags |= regex::newline_alt; #ifndef BOOST_REGEX_V3 if(f & REG_PERLEX) - flags |= regbase::perlex; + flags |= regex::perlex; #endif const char* p2; diff --git a/src/wide_posix_api.cpp b/src/wide_posix_api.cpp index 8f92d1fb..841c92b2 100644 --- a/src/wide_posix_api.cpp +++ b/src/wide_posix_api.cpp @@ -66,27 +66,30 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompW(regex_tW* expression, const wcha #endif } // set default flags: - boost::uint_fast32_t flags = (f & REG_EXTENDED) ? regbase::extended : regbase::basic; + boost::uint_fast32_t flags = (f & REG_EXTENDED) ? wregex::extended : wregex::basic; expression->eflags = (f & REG_NEWLINE) ? match_not_dot_newline : match_default; // and translate those that are actually set: if(f & REG_NOCOLLATE) - flags |= regbase::nocollate; + { + flags |= wregex::nocollate; + flags &= ~wregex::collate; + } if(f & REG_NOSUB) expression->eflags |= match_any; if(f & REG_NOSPEC) - flags |= regbase::literal; + flags |= wregex::literal; if(f & REG_ICASE) - flags |= regbase::icase; + flags |= wregex::icase; if(f & REG_ESCAPE_IN_LISTS) - flags |= regbase::escape_in_lists; + flags |= wregex::escape_in_lists; if(f & REG_NEWLINE_ALT) - flags |= regbase::newline_alt; + flags |= wregex::newline_alt; #ifndef BOOST_REGEX_V3 if(f & REG_PERLEX) - flags |= regbase::perlex; + flags |= wregex::perlex; #endif const wchar_t* p2; diff --git a/syntax.htm b/syntax.htm deleted file mode 100644 index 327071e5..00000000 --- a/syntax.htm +++ /dev/null @@ -1,742 +0,0 @@ - - - - - - -Regex++, Regular Expression Syntax - - - - -

 

- - - - - - -

C++ Boost

-

Regex++, Regular - Expression Syntax.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

Regular expression syntax

- -

This section covers the regular expression syntax used by this -library, this is a programmers guide, the actual syntax presented -to your program's users will depend upon the flags used during -expression compilation.

- -

Literals

- -

All characters are literals except: ".", "|", -"*", "?", "+", "(", -")", "{", "}", "[", -"]", "^", "$" and "\". -These characters are literals when preceded by a "\". A -literal is a character that matches itself, or matches the result -of traits_type::translate(), where traits_type is the traits -template parameter to class reg_expression.

- -

Wildcard

- -

The dot character "." matches any single character -except : when match_not_dot_null is passed to the matching -algorithms, the dot does not match a null character; when match_not_dot_newline -is passed to the matching algorithms, then the dot does not match -a newline character.

- -

Repeats

- -

A repeat is an expression that is repeated an arbitrary number -of times. An expression followed by "*" can be repeated -any number of times including zero. An expression followed by -"+" can be repeated any number of times, but at least -once, if the expression is compiled with the flag regbase::bk_plus_qm -then "+" is an ordinary character and "\+" -represents a repeat of once or more. An expression followed by -"?" may be repeated zero or one times only, if the -expression is compiled with the flag regbase::bk_plus_qm then -"?" is an ordinary character and "\?" -represents the repeat zero or once operator. When it is necessary -to specify the minimum and maximum number of repeats explicitly, -the bounds operator "{}" may be used, thus "a{2}" -is the letter "a" repeated exactly twice, "a{2,4}" -represents the letter "a" repeated between 2 and 4 -times, and "a{2,}" represents the letter "a" -repeated at least twice with no upper limit. Note that there must -be no white-space inside the {}, and there is no upper limit on -the values of the lower and upper bounds. When the expression is -compiled with the flag regbase::bk_braces then "{" and -"}" are ordinary characters and "\{" and -"\}" are used to delimit bounds instead. All repeat -expressions refer to the shortest possible previous sub-expression: -a single character; a character set, or a sub-expression grouped -with "()" for example.

- -

Examples:

- -

"ba*" will match all of "b", "ba", -"baaa" etc.

- -

"ba+" will match "ba" or "baaaa" -for example but not "b".

- -

"ba?" will match "b" or "ba".

- -

"ba{2,4}" will match "baa", "baaa" -and "baaaa".

- -

Non-greedy repeats

- -

Whenever the "extended" regular expression syntax is -in use (the default) then non-greedy repeats are possible by -appending a '?' after the repeat; a non-greedy repeat is one -which will match the shortest possible string.

- -

For example to match html tag pairs one could use something -like:

- -

"<\s*tagname[^>]*>(.*?)<\s*/tagname\s*>" -

- -

In this case $1 will contain the text between the tag pairs, -and will be the shortest possible matching string.

- -

Parenthesis

- -

Parentheses serve two purposes, to group items together into a -sub-expression, and to mark what generated the match. For example -the expression "(ab)*" would match all of the string -"ababab". The matching algorithms regex_match and regex_search each -take an instance of match_results -that reports what caused the match, on exit from these functions -the match_results -contains information both on what the whole expression matched -and on what each sub-expression matched. In the example above -match_results[1] would contain a pair of iterators denoting the -final "ab" of the matching string. It is permissible -for sub-expressions to match null strings. If a sub-expression -takes no part in a match - for example if it is part of an -alternative that is not taken - then both of the iterators that -are returned for that sub-expression point to the end of the -input string, and the matched parameter for that sub-expression -is false. Sub-expressions are indexed from left to right -starting from 1, sub-expression 0 is the whole expression.

- -

Non-Marking Parenthesis

- -

Sometimes you need to group sub-expressions with parenthesis, -but don't want the parenthesis to spit out another marked sub-expression, -in this case a non-marking parenthesis (?:expression) can be used. -For example the following expression creates no sub-expressions:

- -

"(?:abc)*"

- -

Forward Lookahead Asserts 

- -

There are two forms of these; one for positive forward -lookahead asserts, and one for negative lookahead asserts:

- -

"(?=abc)" matches zero characters only if they are -followed by the expression "abc".

- -

"(?!abc)" matches zero characters only if they are -not followed by the expression "abc".

- -

Alternatives

- -

Alternatives occur when the expression can match either one -sub-expression or another, each alternative is separated by a -"|", or a "\|" if the flag regbase::bk_vbar -is set, or by a newline character if the flag regbase::newline_alt -is set. Each alternative is the largest possible previous sub-expression; -this is the opposite behaviour from repetition operators.

- -

Examples:

- -

"a(b|c)" could match "ab" or "ac". -

- -

"abc|def" could match "abc" or "def". -

- -

Sets

- -

A set is a set of characters that can match any single -character that is a member of the set. Sets are delimited by -"[" and "]" and can contain literals, -character ranges, character classes, collating elements and -equivalence classes. Set declarations that start with "^" -contain the compliment of the elements that follow.

- -

Examples:

- -

Character literals:

- -

"[abc]" will match either of "a", "b", -or "c".

- -

"[^abc] will match any character other than "a", -"b", or "c".

- -

Character ranges:

- -

"[a-z]" will match any character in the range "a" -to "z".

- -

"[^A-Z]" will match any character other than those -in the range "A" to "Z".

- -

Note that character ranges are highly locale dependent: they -match any character that collates between the endpoints of the -range, ranges will only behave according to ASCII rules when the -default "C" locale is in effect. For example if the -library is compiled with the Win32 localization model, then [a-z] -will match the ASCII characters a-z, and also 'A', 'B' etc, but -not 'Z' which collates just after 'z'. This locale specific -behaviour can be disabled by specifying regbase::nocollate when -compiling, this is the default behaviour when using regbase::normal, -and forces ranges to collate according to ASCII character code. -Likewise, if you use the POSIX C API functions then setting -REG_NOCOLLATE turns off locale dependent collation.

- -

Character classes are denoted using the syntax "[:classname:]" -within a set declaration, for example "[[:space:]]" is -the set of all whitespace characters. Character classes are only -available if the flag regbase::char_classes is set. The available -character classes are:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 alnumAny alpha numeric character. 
 alphaAny alphabetical character a-z - and A-Z. Other characters may also be included depending - upon the locale. 
 blankAny blank character, either - a space or a tab. 
 cntrlAny control character. 
 digitAny digit 0-9. 
 graphAny graphical character. 
 lowerAny lower case character a-z. - Other characters may also be included depending upon the - locale. 
 printAny printable character. 
 punctAny punctuation character. 
 spaceAny whitespace character. 
 upperAny upper case character A-Z. - Other characters may also be included depending upon the - locale. 
 xdigitAny hexadecimal digit - character, 0-9, a-f and A-F. 
 wordAny word character - all - alphanumeric characters plus the underscore. 
 unicodeAny character whose code is - greater than 255, this applies to the wide character - traits classes only. 
- -

There are some shortcuts that can be used in place of the -character classes, provided the flag regbase::escape_in_lists is -set then you can use:

- -

\w in place of [:word:]

- -

\s in place of [:space:]

- -

\d in place of [:digit:]

- -

\l in place of [:lower:]

- -

\u in place of [:upper:]

- -

Collating elements take the general form [.tagname.] inside a -set declaration, where tagname is either a single -character, or a name of a collating element, for example [[.a.]] -is equivalent to [a], and [[.comma.]] is equivalent to [,]. The -library supports all the standard POSIX collating element names, -and in addition the following digraphs: "ae", "ch", -"ll", "ss", "nj", "dz", -"lj", each in lower, upper and title case variations. -Multi-character collating elements can result in the set matching -more than one character, for example [[.ae.]] would match two -characters, but note that [^[.ae.]] would only match one -character.

- -

Equivalence classes take the general form [=tagname=] inside a -set declaration, where tagname is either a single -character, or a name of a collating element, and matches any -character that is a member of the same primary equivalence class -as the collating element [.tagname.]. An equivalence class is a -set of characters that collate the same, a primary equivalence -class is a set of characters whose primary sort key are all the -same (for example strings are typically collated by character, -then by accent, and then by case; the primary sort key then -relates to the character, the secondary to the accentation, and -the tertiary to the case). If there is no equivalence class -corresponding to tagname, then [=tagname=] is exactly the -same as [.tagname.]. Unfortunately there is no locale independent -method of obtaining the primary sort key for a character, except -under Win32. For other operating systems the library will "guess" -the primary sort key from the full sort key (obtained from strxfrm), -so equivalence classes are probably best considered broken under -any operating system other than Win32.

- -

To include a literal "-" in a set declaration then: -make it the first character after the opening "[" or -"[^", the endpoint of a range, a collating element, or -if the flag regbase::escape_in_lists is set then precede with an -escape character as in "[\-]". To include a literal -"[" or "]" or "^" in a set then -make them the endpoint of a range, a collating element, or -precede with an escape character if the flag regbase::escape_in_lists -is set.

- -

Line anchors

- -

An anchor is something that matches the null string at the -start or end of a line: "^" matches the null string at -the start of a line, "$" matches the null string at the -end of a line.

- -

Back references

- -

A back reference is a reference to a previous sub-expression -that has already been matched, the reference is to what the sub-expression -matched, not to the expression itself. A back reference consists -of the escape character "\" followed by a digit "1" -to "9", "\1" refers to the first sub-expression, -"\2" to the second etc. For example the expression -"(.*)\1" matches any string that is repeated about its -mid-point for example "abcabc" or "xyzxyz". A -back reference to a sub-expression that did not participate in -any match, matches the null string: NB this is different to some -other regular expression matchers. Back references are only -available if the expression is compiled with the flag regbase::bk_refs -set.

- -

Characters by code

- -

This is an extension to the algorithm that is not available in -other libraries, it consists of the escape character followed by -the digit "0" followed by the octal character code. For -example "\023" represents the character whose octal -code is 23. Where ambiguity could occur use parentheses to break -the expression up: "\0103" represents the character -whose code is 103, "(\010)3 represents the character 10 -followed by "3". To match characters by their -hexadecimal code, use \x followed by a string of hexadecimal -digits, optionally enclosed inside {}, for example \xf0 or -\x{aff}, notice the latter example is a Unicode character.

- -

Word operators

- -

The following operators are provided for compatibility with -the GNU regular expression library.

- -

"\w" matches any single character that is a member -of the "word" character class, this is identical to the -expression "[[:word:]]".

- -

"\W" matches any single character that is not a -member of the "word" character class, this is identical -to the expression "[^[:word:]]".

- -

"\<" matches the null string at the start of a -word.

- -

"\>" matches the null string at the end of the -word.

- -

"\b" matches the null string at either the start or -the end of a word.

- -

"\B" matches a null string within a word.

- -

The start of the sequence passed to the matching algorithms is -considered to be a potential start of a word unless the flag -match_not_bow is set. The end of the sequence passed to the -matching algorithms is considered to be a potential end of a word -unless the flag match_not_eow is set.

- -

Buffer operators

- -

The following operators are provide for compatibility with the -GNU regular expression library, and Perl regular expressions:

- -

"\`" matches the start of a buffer.

- -

"\A" matches the start of the buffer.

- -

"\'" matches the end of a buffer.

- -

"\z" matches the end of a buffer.

- -

"\Z" matches the end of a buffer, or possibly one or -more new line characters followed by the end of the buffer.

- -

A buffer is considered to consist of the whole sequence passed -to the matching algorithms, unless the flags match_not_bob or -match_not_eob are set.

- -

Escape operator

- -

The escape character "\" has several meanings.

- -

Inside a set declaration the escape character is a normal -character unless the flag regbase::escape_in_lists is set in -which case whatever follows the escape is a literal character -regardless of its normal meaning.

- -

The escape operator may introduce an operator for example: -back references, or a word operator.

- -

The escape operator may make the following character normal, -for example "\*" represents a literal "*" -rather than the repeat operator.

- -

Single character escape sequences

- -

The following escape sequences are aliases for single -characters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Escape sequence Character code Meaning  
 \a 0x07 Bell character.  
 \f 0x0C Form feed.  
 \n 0x0A Newline character.  
 \r 0x0D Carriage return.  
 \t 0x09 Tab character.  
 \v 0x0B Vertical tab.  
 \e 0x1B ASCII Escape character.  
 \0dd 0dd An octal character code, - where dd is one or more octal digits.  
 \xXX 0xXX A hexadecimal character - code, where XX is one or more hexadecimal digits.  
 \x{XX} 0xXX A hexadecimal character - code, where XX is one or more hexadecimal digits, - optionally a unicode character.  
 \cZ z-@ An ASCII escape sequence - control-Z, where Z is any ASCII character greater than or - equal to the character code for '@'.  
- -


- -

Miscellaneous escape sequences:

- -

The following are provided mostly for perl compatibility, but -note that there are some differences in the meanings of \l \L \u -and \U:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 \w Equivalent to [[:word:]].  
 \W Equivalent to [^[:word:]].  
 \s Equivalent to [[:space:]].  
 \S Equivalent to [^[:space:]].  
 \d Equivalent to [[:digit:]].  
 \D Equivalent to [^[:digit:]].  
 \l Equivalent to [[:lower:]].  
 \L Equivalent to [^[:lower:]].  
 \u Equivalent to [[:upper:]].  
 \U Equivalent to [^[:upper:]].  
 \C Any single character, - equivalent to '.'.  
 \X Match any Unicode combining - character sequence, for example "a\x 0301" (a - letter a with an acute).  
 \Q The begin quote operator, - everything that follows is treated as a literal character - until a \E end quote operator is found.  
 \E The end quote operator, - terminates a sequence begun with \Q.  
- -


- -

What gets matched?

- -

The regular expression library will match the first possible -matching string, if more than one string starting at a given -location can match then it matches the longest possible string, -unless the flag match_any is set, in which case the first match -encountered is returned. Use of the match_any option can reduce -the time taken to find the match - but is only useful if the user -is less concerned about what matched - for example it would not -be suitable for search and replace operations. In cases where -their are multiple possible matches all starting at the same -location, and all of the same length, then the match chosen is -the one with the longest first sub-expression, if that is the -same for two or more matches, then the second sub-expression will -be examined and so on.
-

- -
- -

Copyright Dr -John Maddock 1998-2000 all rights reserved.

- - diff --git a/template_class_ref.htm b/template_class_ref.htm deleted file mode 100644 index ccd0d3c9..00000000 --- a/template_class_ref.htm +++ /dev/null @@ -1,2479 +0,0 @@ - - - - - - -Regex++, template class and algorithm reference - - - - -

 

- - - - - - -

C++ Boost

-

Regex++ template - class reference.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

class regbase

- -

#include <boost/regex.hpp> -

- -

Class regbase is the template argument independent base class -for reg_expression, the only public members are the flag_type -enumerated values that determine how regular expressions are -interpreted.

- -
class regbase
-{
-public:
-   enum flag_type_
-   {
-      escape_in_lists = 1,                          // '\\' special inside [...] 
-      char_classes = escape_in_lists << 1,          // [[:CLASS:]] allowed 
-      intervals = char_classes << 1,                // {x,y} allowed 
-      limited_ops = intervals << 1,                 // all of + ? and | are normal characters 
-      newline_alt = limited_ops << 1,               // \n is the same as | 
-      bk_plus_qm = newline_alt << 1,                // uses \+ and \? 
-      bk_braces = bk_plus_qm << 1,                  // uses \{ and \} 
-      bk_parens = bk_braces << 1,                   // uses \( and \) 
-      bk_refs = bk_parens << 1,                     // \d allowed 
-      bk_vbar = bk_refs << 1,                       // uses \| 
-      use_except = bk_vbar << 1,                    // exception on error 
-      failbit = use_except << 1,                    // error flag 
-      literal = failbit << 1,                       // all characters are literals 
-      icase = literal << 1,                         // characters are matched regardless of case 
-      nocollate = icase << 1,                       // don't use locale specific collation 
-
-      basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs,
-      extended = char_classes | intervals | bk_refs,
-      normal = escape_in_lists | char_classes | intervals | bk_refs | nocollate,
-      emacs = bk_braces | bk_parens | bk_refs | bk_vbar,
-      awk = extended | escape_in_lists,
-      grep = basic | newline_alt,
-      egrep = extended | newline_alt,
-      sed = basic,
-      perl = normal
-   }; 
-   typedef unsigned int flag_type;
-};   
- -

 

- -

The enumerated type regbase::flag_type determines the -syntax rules for regular expression compilation, the various -flags have the following effects:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 regbase::escape_in_listsAllows the use of the escape - "\" character in sets of characters, for - example [\]] represents the set of characters containing - only "]". If this flag is not set then "\" - is an ordinary character inside sets. 
 regbase::char_classesWhen this bit is set, - character classes [:classname:] are allowed inside - character set declarations, for example "[[:word:]]" - represents the set of all characters that belong to the - character class "word". 
 regbase:: intervalsWhen this bit is set, - repetition intervals are allowed, for example "a{2,4}" - represents a repeat of between 2 and 4 letter a's. 
 regbase:: limited_opsWhen this bit is set all of - "+", "?" and "|" are - ordinary characters in all situations. 
 regbase:: newline_altWhen this bit is set, then - the newline character "\n" has the same effect - as the alternation operator "|". 
 regbase:: bk_plus_qmWhen this bit is set then - "\+" represents the one or more repetition - operator and "\?" represents the zero or one - repetition operator. When this bit is not set then - "+" and "?" are used instead. 
 regbase:: bk_bracesWhen this bit is set then - "\{" and "\}" are used for bounded - repetitions and "{" and "}" are - normal characters. This is the opposite of default - behavior. 
 regbase:: bk_parensWhen this bit is set then - "\(" and "\)" are used to group sub-expressions - and "(" and ")" are ordinary - characters, this is the opposite of default behaviour. 
 regbase:: bk_refsWhen this bit is set then - back references are allowed. 
 regbase:: bk_vbarWhen this bit is set then - "\|" represents the alternation operator and - "|" is an ordinary character. This is the - opposite of default behaviour. 
 regbase:: use_exceptWhen this bit is set then a bad_expression exception will - be thrown on error.  Use of this flag is deprecated - - reg_expression will always throw on error. 
 regbase:: failbitThis bit is set on error, if - regbase::use_except is not set, then this bit should be - checked to see if a regular expression is valid before - usage. 
 regbase::literalAll characters in the string - are treated as literals, there are no special characters - or escape sequences. 
 regbase::icaseAll characters in the string - are matched regardless of case. 
 regbase::nocollateLocale specific collation is - disabled when dealing with ranges in character set - declarations.  For example when this bit is set the - expression [a-c] would match the characters a, b and c - only regardless of locale, where as when this is not set - , then [a-c] matches any character which collates in the - range a to c. 
 regbase::basicEquivalent to the POSIX - basic regular expression syntax: char_classes | intervals - | limited_ops | bk_braces | bk_parens | bk_refs. 
 Regbase::extendedEquivalent to the POSIX - extended regular expression syntax: char_classes | - intervals | bk_refs. 
 regbase::normalThis is the - default setting, and represents how most people expect - the library to behave. Equivalent to the POSIX extended - syntax, but with locale specific collation disabled, and - escape characters inside set declarations enabled: - regbase::escape_in_lists | regbase::char_classes | - regbase::intervals | regbase::bk_refs | regbase::nocollate. 
 regbase::emacsProvides - compatability with the emacs editor, eqivalent to: - bk_braces | bk_parens | bk_refs | bk_vbar. 
 regbase::awk Provides - compatabilty with the Unix utility Awk, the same as POSIX - extended regular expressions, but allows escapes inside - bracket-expressions (character sets). Equivalent to - extended | escape_in_lists. 
 regbase::grepProvides - compatabilty with the Unix grep utility, the same as - POSIX basic regular expressions, but with the newline - character equivalent to the alternation operator. the - same as basic | newline_alt. 
 regbase::egrepProvides - compatabilty with the Unix egrep utility, the same as - POSIX extended regular expressions, but with the newline - character equivalent to the alternation operator. the - same as extended | newline_alt. 
 regbase::sedProvides - compatabilty with the Unix sed utility, the same as POSIX - basic regular expressions. 
 regbase::perlProvides - compatibility with the perl programming language, the - same as regbase::normal. 
- -
- -

Exception classes.

- -

#include <boost/pat_except.hpp> -

- -

An instance of bad_expression is thrown whenever a bad -regular expression is encountered.

- -
namespace boost{
-
-class bad_pattern : public std::runtime_error
-{
-public:
-   explicit bad_pattern(const std::string& s) : std::runtime_error(s){};
-};
-
-class bad_expression : public bad_pattern
-{
-public:
-   bad_expression(const std::string& s) : bad_pattern(s) {}
-};
-
-
-} // namespace boost
- -

Footnotes: the class bad_pattern forms the base class -for all pattern-matching exceptions, of which bad_expression -is one. The choice of std::runtime_error as the base class -for bad_pattern is moot, depending upon how the library is -used exceptions may be either logic errors (programmer supplied -expressions) or run time errors (user supplied expressions).

- -
- -

Class reg_expression

- -

#include <boost/regex.hpp> -

- -

The template class reg_expression encapsulates regular -expression parsing and compilation. The class derives from class regbase and takes three template -parameters:

- -

charT: determines the character type, i.e. -either char or wchar_t.

- -

traits: determines the behaviour of the -character type, for example whether character matching is case -sensitive or not, and which character class names are recognized. -A default traits class is provided: regex_traits<charT>. -

- -

Allocator: the allocator class used to allocate -memory by the class.

- -

For ease of use there are two typedefs that define the two -standard reg_expression instances, unless you want to use -custom allocators, you won't need to use anything other than -these:

- -
namespace boost{
-template <class charT, class traits = regex_traits<charT>, class Allocator = std::allocator<charT>  >
-class reg_expression;
-typedef reg_expression<char> regex;
-typedef reg_expression<wchar_t> wregex;
-}
- -

The definition of reg_expression follows: it is based -very closely on class basic_string, and fulfils the requirements -for a container of charT.

- -
namespace boost{
-template <class charT, class traits = regex_traits<charT>, class Allocator = std::allocator<charT>  >
-class reg_expression : public regbase
-{
-public: 
-   // typedefs:  
-   typedef charT char_type; 
-   typedef traits traits_type; 
-   // locale_type 
-   // placeholder for actual locale type used by the 
-   // traits class to localise *this. 
-   typedef typename traits::locale_type locale_type; 
-   // value_type 
-   typedef charT value_type; 
-   // reference, const_reference 
-   typedef charT& reference; 
-   typedef const charT& const_reference; 
-   // iterator, const_iterator 
-   typedef const charT* const_iterator; 
-   typedef const_iterator iterator; 
-   // difference_type 
-   typedef typename Allocator::difference_type difference_type; 
-   // size_type 
-   typedef typename Allocator::size_type size_type; 
-   // allocator_type 
-   typedef Allocator allocator_type; 
-   typedef Allocator alloc_type; 
-   // flag_type 
-   typedef boost::int_fast32_t flag_type; 
-public: 
-   // constructorsexplicit reg_expression(const Allocator& a = Allocator()); 
-   explicit reg_expression(const charT* p, flag_type f = regbase::normal, const Allocator& a = Allocator()); 
-   reg_expression(const charT* p1, const charT* p2, flag_type f = regbase::normal, const Allocator& a = Allocator()); 
-   reg_expression(const charT* p, size_type len, flag_type f, const Allocator& a = Allocator()); 
-   reg_expression(const reg_expression&); 
-   template <class ST, class SA> 
-   explicit reg_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regbase::normal, const Allocator& a = Allocator()); 
-   template <class I> 
-   reg_expression(I first, I last, flag_type f = regbase::normal, const Allocator& a = Allocator()); 
-   ~reg_expression(); 
-   reg_expression& operator=(const reg_expression&); 
-   reg_expression& operator=(const charT* ptr); 
-   template <class ST, class SA> 
-   reg_expression& operator=(const std::basic_string<charT, ST, SA>& p); 
-   // 
-   // assign: 
-   reg_expression& assign(const reg_expression& that); 
-   reg_expression& assign(const charT* ptr, flag_type f = regbase::normal); 
-   reg_expression& assign(const charT* first, const charT* last, flag_type f = regbase::normal); 
-   template <class string_traits, class A> 
-   reg_expression& assign( 
-       const std::basic_string<charT, string_traits, A>& s, 
-       flag_type f = regbase::normal); 
-   template <class iterator> 
-   reg_expression& assign(iterator first, 
-                          iterator last, 
-                          flag_type f = regbase::normal); 
-   // 
-   // allocator access: 
-   Allocator get_allocator()const; 
-   // 
-   // locale: 
-   locale_type imbue(locale_type l); 
-   locale_type getloc()const; 
-   // 
-   // flags: 
-   flag_type getflags()const; 
-   // 
-   // str: 
-   std::basic_string<charT> str()const; 
-   // 
-   // begin, end: 
-   const_iterator begin()const; 
-   const_iterator end()const; 
-   // 
-   // swap: 
-   void swap(reg_expression&)throw(); 
-   // 
-   // size: 
-   size_type size()const; 
-   // 
-   // max_size: 
-   size_type max_size()const; 
-   // 
-   // empty: 
-   bool empty()const; 
-   unsigned mark_count()const; 
-   bool operator==(const reg_expression&)const; 
-   bool operator<(const reg_expression&)const; 
-};
-} // namespace boost 
- -

Class reg_expression has the following public member functions: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 reg_expression(Allocator a = - Allocator()); Constructs a default - instance of reg_expression without any expression. 
 reg_expression(charT* p, unsigned - f = regbase::normal, Allocator a = Allocator()); Constructs an instance - of reg_expression from the expression denoted by the null - terminated string p, using the flags f to - determine regular expression syntax. See class regbase for allowable flag values. 
 reg_expression(charT* p1, - charT* p2, unsigned f = regbase::normal, Allocator - a = Allocator()); Constructs an instance - of reg_expression from the expression denoted by pair of - input-iterators p1 and p2, using the flags f - to determine regular expression syntax. See class regbase for allowable flag values. 
 reg_expression(charT* p, - size_type len, unsigned f, Allocator a = Allocator()); Constructs an instance - of reg_expression from the expression denoted by the - string p of length len, using the flags f - to determine regular expression syntax. See class regbase for allowable flag values. 
 template <class - ST, class SA>
- reg_expression(const std::basic_string<charT, - ST, SA>& p, boost::int_fast32_t f = regbase::normal, - const Allocator& a = Allocator());
 Constructs an instance - of reg_expression from the expression denoted by the - string p, using the flags f to determine - regular expression syntax. See class regbase - for allowable flag values.

Note - this member may not - be available depending upon your compiler capabilities.

-
 
 template <class I>
- reg_expression(I first, I last, flag_type f = regbase::normal, - const Allocator& a = Allocator());
 Constructs an instance - of reg_expression from the expression denoted by pair of - input-iterators p1 and p2, using the flags f - to determine regular expression syntax. See class regbase for allowable flag values. 
 reg_expression(const - reg_expression&);Copy constructor - copies an - existing regular expression. 
 reg_expression& operator=(const - reg_expression&);Copies an existing regular - expression. 
 reg_expression& operator=(const - charT* ptr);Equivalent to assign(ptr); 
 template <class ST, class - SA>

reg_expression& operator=(const std::basic_string<charT, - ST, SA>& p);

-
Equivalent to assign(p); 
 reg_expression& assign(const - reg_expression& that);Copies the regular - expression contained by that, throws bad_expression if that - does not contain a valid expression. Returns *this. 
 reg_expression& assign(const - charT* p, flag_type f = regbase::normal);Compiles a regular - expression from the expression denoted by the null - terminated string p, using the flags f to - determine regular expression syntax. See class regbase for allowable flag values. - Throws bad_expression if p - does not contain a valid expression. Returns *this. 
 reg_expression& assign(const - charT* first, const charT* last, flag_type f = - regbase::normal);Compiles a regular - expression from the expression denoted by the pair of - input-iterators first-last, using the flags f - to determine regular expression syntax. See class regbase for allowable flag values. - Throws bad_expression if first-last - does not contain a valid expression. Returns *this. 
 template <class - string_traits, class A>
- reg_expression& assign(const std::basic_string<charT, - string_traits, A>& s, flag_type f = regbase::normal);
Compiles a regular - expression from the expression denoted by the string s, - using the flags f to determine regular expression - syntax. See class regbase for - allowable flag values. Throws bad_expression - if s does not contain a valid expression. Returns - *this. 
 template <class - iterator>
- reg_expression& assign(iterator first, iterator last, - flag_type f = regbase::normal);
Compiles a regular - expression from the expression denoted by the pair of - input-iterators first-last, using the flags f - to determine regular expression syntax. See class regbase for allowable flag values. - Throws bad_expression if first-last - does not contain a valid expression. Returns *this. 
 Allocator get_allocator()const;Returns the allocator used - by the expression. 
 locale_type imbue(const - locale_type& l);Imbues the expression with - the specified locale, and invalidates the current - expression. May throw std::runtime_error if the call - results in an attempt to open a non-existent message - catalogue. 
 locale_type getloc()const;Returns the locale used by - the expression. 
 flag_type getflags()const;Returns the flags used to - compile the current expression. 
 std::basic_string<charT> - str()const;Returns the current - expression as a string. 
 const_iterator begin()const;Returns a pointer to the - first character of the current expression. 
 const_iterator end()const;Returns a pointer to the end - of the current expression. 
 size_type size()const;Returns the length of the - current expression. 
 size_type max_size()const;Returns the maximum length - of a regular expression text. 
 bool empty()const;Returns true if the object - contains no valid expression. 
 unsigned mark_count()const - ;Returns the number of sub-expressions - in the compiled regular expression. Note that this - includes the whole match (subexpression zero), so the - value returned is always >= 1. 
- -
- -

Class regex_traits

- -

#include <boost/regex/regex_traits.hpp> -

- -

This is a preliminary version of the regular expression -traits class, and is subject to change.

- -

The purpose of the traits class is to make it easier to -customise the behaviour of reg_expression and the -associated matching algorithms. Custom traits classes can handle -special character sets or define additional character classes, -for example one could define [[:kanji:]] as the set of all (Unicode) -kanji characters. This library provides three traits classes and -a wrapper class regex_traits, which inherits from one of -these depending upon the default localisation model in use, class -c_regex_traits encapsulates the global C locale, class w32_regex_traits -encapsulates the global Win32 locale (only available on Win32 -systems), and class cpp_regex_traits encapsulates the C++ -locale (only provided if std::locale is supported):

- -
template <class charT> class c_regex_traits;
-template<> class c_regex_traits<char> { /*details*/ };
-template<> class c_regex_traits<wchar_t> { /*details*/ };
-
-template <class charT> class w32_regex_traits;
-template<> class w32_regex_traits<char> { /*details*/ };
-template<> class w32_regex_traits<wchar_t> { /*details*/ };
-
-template <class charT> class cpp_regex_traits;
-template<> class cpp_regex_traits<char> { /*details*/ };
-template<> class cpp_regex_traits<wchar_t> { /*details*/ };
-
-template <class charT> class regex_traits : public base_type { /*detailts*/ };
- -

Where "base_type" defaults to w32_regex_traits -on Win32 systems, and c_regex_traits otherwise. The -default behaviour can be changed by defining one of -BOOST_REGEX_USE_C_LOCALE (forces use of c_regex_traits by -default), or BOOST_REGEX_USE_CPP_LOCALE (forces use of cpp_regex_traits -by default). Alternatively a specific traits class can be passed -to the reg_expression template.

- -

The requirements for custom traits classes are documented separately here....

- -

There is also an example of a custom traits class supplied by Christian Engström, -see iso8859_1_regex_traits.cpp -and iso8859_1_regex_traits.hpp, -see the -readme file for more details.

- -
- -

Class match_results

- -

#include <boost/regex.hpp> -

- -

Regular expressions are different from many simple pattern-matching -algorithms in that as well as finding an overall match they can -also produce sub-expression matches: each sub-expression being -delimited in the pattern by a pair of parenthesis (...). There -has to be some method for reporting sub-expression matches back -to the user: this is achieved this by defining a class match_results -that acts as an indexed collection of sub-expression matches, -each sub-expression match being contained in an object of type sub_match. -

- -
// 
-// class sub_match: 
-// denotes one sub-expression match. 
-//         
-template <class iterator>
-struct sub_match
-{
-   typedef typename std::iterator_traits<iterator>::value_type       value_type;
-   typedef typename std::iterator_traits<iterator>::difference_type  difference_type;
-   typedef iterator                                                  iterator_type;
-   
-   iterator first;
-   iterator second;
-   bool matched;
-
-   operator std::basic_string<value_type>()const;
-
-   bool operator==(const sub_match& that)const;
-   bool operator !=(const sub_match& that)const;
-   difference_type length()const;
-};
-
-// 
-// class match_results: 
-// contains an indexed collection of matched sub-expressions. 
-// 
-template <class iterator, class Allocator = std::allocator<typename std::iterator_traits<iterator>::value_type > > 
-class match_results 
-{ 
-public: 
-   typedef Allocator                                                 alloc_type; 
-   typedef typename Allocator::template Rebind<iterator>::size_type  size_type; 
-   typedef typename std::iterator_traits<iterator>::value_type       char_type; 
-   typedef sub_match<iterator>                                       value_type; 
-   typedef typename std::iterator_traits<iterator>::difference_type  difference_type; 
-   typedef iterator                                                  iterator_type; 
-   explicit match_results(const Allocator& a = Allocator()); 
-   match_results(const match_results& m); 
-   match_results& operator=(const match_results& m); 
-   ~match_results(); 
-   size_type size()const; 
-   const sub_match<iterator>& operator[](int n) const; 
-   Allocator allocator()const; 
-   difference_type length(int sub = 0)const; 
-   difference_type position(unsigned int sub = 0)const; 
-   unsigned int line()const; 
-   iterator line_start()const; 
-   std::basic_string<char_type> str(int sub = 0)const; 
-   void swap(match_results& that); 
-   bool operator==(const match_results& that)const; 
-   bool operator<(const match_results& that)const; 
-};
-typedef match_results<const char*> cmatch;
-typedef match_results<const wchar_t*> wcmatch; 
-typedef match_results<std::string::const_iterator> smatch;
-typedef match_results<std::wstring::const_iterator> wsmatch; 
- -

Class match_results is used for reporting what matched a -regular expression, it is passed to the matching algorithms regex_match and regex_search, -and is used by regex_grep to notify the -callback function (or function object) what matched. Note that -the default allocator parameter has been chosen to match the -default allocator parameter to reg_expression. match_results has -the following public member functions:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 match_results(Allocator a = - Allocator());Constructs an instance of - match_results, using allocator instance a. 
 match_results(const - match_results& m);Copy constructor. 
 match_results& operator=(const - match_results& m);Assignment operator. 
 const - sub_match<iterator>& operator[](size_type - n) const;Returns what matched, item 0 - represents the whole string, item 1 the first sub-expression - and so on. 
 Allocator& allocator()const;Returns the allocator used - by the class. 
 difference_type length(unsigned - int sub = 0);Returns the length of the - matched subexpression, defaults to the length of the - whole match, in effect this is equivalent to operator[](sub).second - - operator[](sub).first. 
 difference_type position(unsigned - int sub = 0);Returns the position of the - matched sub-expression, defaults to the position of the - whole match. The returned value is the position of the - match relative to the start of the string. 
 unsigned int - line()const;Returns the index of the - line on which the match occurred, indices start with 1, - not zero. Equivalent to the number of newline characters - prior to operator[](0).first plus one. 
 iterator line_start()const;Returns an iterator denoting - the start of the line on which the match occurred. 
 size_type size()const;Returns how many sub-expressions - are present in the match, including sub-expression zero (the - whole match). This is the case even if no matches were - found in the search operation - you must use the returned - value from regex_search / regex_match to determine whether - any match occured. 
- -


- -

The operator[] member function needs further explanation: it -returns a const reference to a structure of type -sub_match<iterator>, which has the following public members: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 typedef typename - std::iterator_traits<iterator>::value_type - value_type;The type pointed to by the - iterators. 
 typedef typename - std::iterator_traits<iterator>::difference_type - difference_type;A type that represents the - difference between two iterators. 
 typedef iterator - iterator_type;The iterator type. 
 iterator firstAn iterator denoting the - position of the start of the match. 
 iterator secondAn iterator denoting the - position of the end of the match. 
 bool matchedA Boolean value denoting - whether this sub-expression participated in the match. 
 difference_type length()const;Returns the length of the - sub-expression match. 
 operator std::basic_string<value_type> - ()const;Converts the sub-expression - match into an instance of std::basic_string<>. Note - that this member may be either absent, or present to a - more limited degree depending upon your compiler - capabilities. 
- -

Operator[] takes an integer as an argument that denotes the -sub-expression for which to return information, the argument can -take the following special values:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 -2Returns everything from the - end of the match, to the end of the input string, - equivalent to $' in perl. If this is a null string, then: -

first == second

-

And

-

matched == false.

-
 
 -1Returns everything from the - start of the input string (or the end of the last match - if this is a grep operation), to the start of this match. - Equivalent to $` in perl. If this is a null string, then: -

first == second

-

And

-

matched == false.

-
 
 0Returns the whole of what - matched, equivalent to $& in perl. The matched - parameter is always true. 
 0 < N < size()Returns what matched sub-expression - N, if this sub-expression did not participate in the - match then 

matched == false

-

otherwise:

-

matched == true.

-
 
 N < -2 or N >= size()Represents an out-of range - non-existent sub-expression. Returns a "null" - match in which

first == last

-

And

-

matched == false.

-
 
- -

Note that as well as being parameterised for an allocator, -match_results<> also takes an iterator type, this allows -any pair of iterators to be searched for a given regular -expression, provided the iterators have at least bi-directional -properties.

- -
- -

Algorithm regex_match

- -

#include <boost/regex.hpp> -

- -

The algorithm regex _match determines whether a given regular -expression matches a given sequence denoted by a pair of -bidirectional-iterators, the algorithm is defined as follows, note -that the result is true only if the expression matches the whole -of the input sequence, the main use of this function is data -input validation:

- -
template <class iterator, class Allocator, class charT, class traits, class Allocator2>
-bool regex_match(iterator first, 
-                 iterator last, 
-                 match_results<iterator, Allocator>& m, 
-                 const reg_expression<charT, traits, Allocator2>& e, 
-                 unsigned flags = match_default);
- -

The library also defines the following convenience versions, -which take either a const charT*, or a const std::basic_string<>& -in place of a pair of iterators [note - these versions may not be -available, or may be available in a more limited form, depending -upon your compilers capabilities]:

- -
template <class charT, class Allocator, class traits, class Allocator2>
-bool regex_match(const charT* str, 
-                 match_results<const charT*, Allocator>& m, 
-                 const reg_expression<charT, traits, Allocator2>& e, 
-                 unsigned flags = match_default)
-
-template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2>
-bool regex_match(const std::basic_string<charT, ST, SA>& s, 
-                 match_results<typename std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, 
-                 const reg_expression<charT, traits, Allocator2>& e, 
-                 unsigned flags = match_default);
- -

Finally there is a set of convenience versions that simply -return true or false and do not indicate what matched:

- -
template <class iterator, class Allocator, class charT, class traits, class Allocator2>
-bool regex_match(iterator first, 
-                 iterator last, 
-                 const reg_expression<charT, traits, Allocator2>& e, 
-                 unsigned flags = match_default);
-
-template <class charT, class Allocator, class traits, class Allocator2>
-bool regex_match(const charT* str, 
-                 const reg_expression<charT, traits, Allocator2>& e, 
-                 unsigned flags = match_default)
-
-template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2>
-bool regex_match(const std::basic_string<charT, ST, SA>& s, 
-                 const reg_expression<charT, traits, Allocator2>& e, 
-                 unsigned flags = match_default);
- -

The parameters for the main function version are as follows:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 iterator firstDenotes the start of the range to be matched. 
 iterator lastDenotes the end of the range - to be matched. 
 match_results<iterator, - Allocator>& mAn instance of match_results - in which what matched will be reported. On exit if a - match occurred then m[0] denotes the whole of the string - that matched, m[0].first must be equal to first, m[0].second - will be less than or equal to last. m[1] denotes the - first subexpression m[2] the second subexpression and so - on. If no match occurred then m[0].first = m[0].second = - last.

Note that since the match_results structure - stores only iterators, and not strings, the iterators/strings - passed to regex_match must be valid for as long as the - result is to be used. For that reason never pass - temporary string objects to regex_match.

-
 
 const - reg_expression<charT, traits, Allocator2>& eContains the regular - expression to be matched. 
 unsigned flags = - match_defaultDetermines the semantics - used for matching, a combination of one or more match_flags enumerators. 
- -

regex_match returns false if no match occurs or true if it -does. A match only occurs if it starts at first and -finishes at last. Example: the following example -processes an ftp response:

- -
#include <stdlib.h> 
-#include <boost/regex.hpp> 
-#include <string> 
-#include <iostream> 
-
-using namespace boost; 
-
-regex expression("([0-9]+)(\\-| |$)(.*)"); 
-
-// process_ftp: 
-// on success returns the ftp response code, and fills 
-// msg with the ftp response message. 
-int process_ftp(const char* response, std::string* msg) 
-{ 
-   cmatch what; 
-   if(regex_match(response, what, expression)) 
-   { 
-      // what[0] contains the whole string 
-      // what[1] contains the response code 
-      // what[2] contains the separator character 
-      // what[3] contains the text message. 
-      if(msg) 
-         msg->assign(what[3].first, what[3].second); 
-      return std::atoi(what[1].first); 
-   } 
-   // failure did not match 
-   if(msg) 
-      msg->erase(); 
-   return -1; 
-}
- -

The value of the flags parameter -passed to the algorithm must be a combination of one or more of -the following values:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 match_defaultThe default value, indicates - that first represents the start of a line, the - start of a buffer, and (possibly) the start of a word. - Also implies that last represents the end of a - line, the end of the buffer and (possibly) the end of a - word. Implies that a dot sub-expression "." - will match both the newline character and a null. 
 match_not_bolWhen this flag is set then first - does not represent the start of a new line. 
 match_not_eolWhen this flag is set then last - does not represent the end of a line. 
 match_not_bobWhen this flag is set then first - is not the beginning of a buffer. 
 match_not_eobWhen this flag is set then last - does not represent the end of a buffer. 
 match_not_bowWhen this flag is set then first - can never match the start of a word. 
 match_not_eowWhen this flag is set then last - can never match the end of a word. 
 match_not_dot_newlineWhen this flag is set then a - dot expression "." can not match the newline - character. 
 match_not_dot_nullWhen this flag is set then a - dot expression "." can not match a null - character. 
 match_prev_availWhen this flag - is set, then *--first is a valid expression and - the flags match_not_bol and match_not_bow have no effect, - since the value of the previous character can be used to - check these. 
 match_anyWhen this flag - is set, then the first string matched is returned, rather - than the longest possible match. This flag can - significantly reduce the time taken to find a match, but - what matches is undefined. 
 match_not_nullWhen this flag - is set, then the expression will never match a null - string. 
 match_continuousWhen this flags - is set, then during a grep operation, each successive - match must start from where the previous match finished. 
 match_partialWhen this flag - is set, the regex algorithms will report partial matches - that is - where one or more characters at the end of the text input - matched some prefix of the regular expression. 
- -

 

- -
- -

Algorithm regex_search

- -

 #include <boost/regex.hpp> -

- -

The algorithm regex_search will search a range denoted by a -pair of bidirectional-iterators for a given regular expression. -The algorithm uses various heuristics to reduce the search time -by only checking for a match if a match could conceivably start -at that position. The algorithm is defined as follows:

- -
template <class iterator, class Allocator, class charT, class traits, class Allocator2>
-bool regex_search(iterator first, 
-                iterator last, 
-                match_results<iterator, Allocator>& m, 
-                const reg_expression<charT, traits, Allocator2>& e, 
-                unsigned flags = match_default);
- -

The library also defines the following convenience versions, -which take either a const charT*, or a const std::basic_string<>& -in place of a pair of iterators [note - these versions may not be -available, or may be available in a more limited form, depending -upon your compilers capabilities]:

- -
template <class charT, class Allocator, class traits, class Allocator2>
-bool regex_search(const charT* str, 
-                match_results<const charT*, Allocator>& m, 
-                const reg_expression<charT, traits, Allocator2>& e, 
-                unsigned flags = match_default);
-
-template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2>
-bool regex_search(const std::basic_string<charT, ST, SA>& s, 
-                match_results<typename std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, 
-                const reg_expression<charT, traits, Allocator2>& e, 
-                unsigned flags = match_default);
- -

The parameters for the main function version are as follows:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 iterator firstThe starting position of the - range to search. 
 iterator lastThe ending position of the - range to search. 
 match_results<iterator, - Allocator>& mAn instance of match_results - in which what matched will be reported. On exit if a - match occurred then m[0] denotes the whole of the string - that matched, m[0].first and m[0].second will be less - than or equal to last. m[1] denotes the first sub-expression - m[2] the second sub-expression and so on. If no match - occurred then m[0].first = m[0].second = last.

Note - that since the match_results structure stores only - iterators, and not strings, the iterators/strings passed - to regex_search must be valid for as long as the result - is to be used. For that reason never pass temporary - string objects to regex_search.

-
 
 const - reg_expression<charT, traits, Allocator2>& eThe regular expression to - search for. 
 unsigned flags = - match_defaultThe flags that determine - what gets matched, a combination of one or more match_flags enumerators. 
- -


- -

Example: the following example, -takes the contents of a file in the form of a string, and -searches for all the C++ class declarations in the file. The code -will work regardless of the way that std::string is implemented, -for example it could easily be modified to work with the SGI rope -class, which uses a non-contiguous storage strategy.

- -
#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-
-// purpose: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-
-boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"); 
-
-void IndexClasses(map_type& m, const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-      boost::match_results<std::string::const_iterator> what; 
-   unsigned int flags = boost::match_default; 
-   while(regex_search(start, end, what, expression, flags)) 
-   { 
-      // what[0] contains the whole string 
-      // what[5] contains the class name. 
-      // what[6] contains the template specialisation if any. 
-      // add class name and position to map: 
-      m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-                what[5].first - file.begin(); 
-      // update search position: 
-      start = what[0].second; 
-      // update flags: 
-      flags |= boost::match_prev_avail; 
-      flags |= boost::match_not_bob; 
-   } 
-}
- 
- -
- -

Algorithm regex_grep

- -

#include <boost/regex.hpp> -

- -

 Regex_grep allows you to search through a bidirectional-iterator -range and locate all the (non-overlapping) matches with a given -regular expression. The function is declared as:

- -
template <class Predicate, class iterator, class charT, class traits, class Allocator>
-unsigned int regex_grep(Predicate foo, 
-                        iterator first, 
-                        iterator last, 
-                        const reg_expression<charT, traits, Allocator>& e, 
-                        unsigned flags = match_default)
- -

The library also defines the following convenience versions, -which take either a const charT*, or a const std::basic_string<>& -in place of a pair of iterators [note - these versions may not be -available, or may be available in a more limited form, depending -upon your compilers capabilities]:

- -
template <class Predicate, class charT, class Allocator, class traits>
-unsigned int regex_grep(Predicate foo, 
-              const charT* str, 
-              const reg_expression<charT, traits, Allocator>& e, 
-              unsigned flags = match_default);
-
-template <class Predicate, class ST, class SA, class Allocator, class charT, class traits>
-unsigned int regex_grep(Predicate foo, 
-              const std::basic_string<charT, ST, SA>& s, 
-              const reg_expression<charT, traits, Allocator>& e, 
-              unsigned flags = match_default);
- -

The parameters for the primary version of regex_grep have the -following meanings:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 fooA predicate function object - or function pointer, see below for more information. 
 firstThe start of the range to - search. 
 lastThe end of the range to - search. 
 eThe regular expression to - search for. 
 flagsThe flags that determine how - matching is carried out, one of the match_flags - enumerators. 
- -

 The algorithm finds all of the non-overlapping matches -of the expression e, for each match it fills a match_results<iterator, Allocator> -structure, which contains information on what matched, and calls -the predicate foo, passing the match_results<iterator, -Allocator> as a single argument. If the predicate returns -true, then the grep operation continues, otherwise it terminates -without searching for further matches. The function returns the -number of matches found.

- -

The general form of the predicate is:

- -
struct grep_predicate
-{
-   bool operator()(const match_results<iterator_type, expression_type::alloc_type>& m);
-};
- -

For example the regular expression "a*b" would find -one match in the string "aaaaab" and two in the string -"aaabb".

- -

Remember this algorithm can be used for a lot more than -implementing a version of grep, the predicate can be and do -anything that you want, grep utilities would output the results -to the screen, another program could index a file based on a -regular expression and store a set of bookmarks in a list, or a -text file conversion utility would output to file. The results of -one regex_grep can even be chained into another regex_grep to -create recursive parsers.

- -

Example: -convert the example from regex_search to use regex_grep -instead:

- -
#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-
-// IndexClasses: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-
-boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
-                 "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)" 
-                 "[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"); 
-
-class IndexClassesPred 
-{ 
-   map_type& m; 
-   std::string::const_iterator base; 
-public: 
-   IndexClassesPred(map_type& a, std::string::const_iterator b) : m(a), base(b) {} 
-   bool operator()(const match_results<std::string::const_iterator, regex::alloc_type>& what) 
-   { 
-      // what[0] contains the whole string 
-      // what[5] contains the class name. 
-      // what[6] contains the template specialisation if any. 
-      // add class name and position to map: 
-      m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-                what[5].first - base; 
-      return true; 
-   } 
-}; 
-
-void IndexClasses(map_type& m, const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-   regex_grep(IndexClassesPred(m, start), start, end, expression); 
-} 
- -

Example: -Use regex_grep to call a global callback function:

- -
#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-
-// purpose: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-
-boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"); 
-
-map_type class_index; 
-std::string::const_iterator base; 
-
-bool grep_callback(const boost::match_results<std::string::const_iterator, boost::regex::alloc_type>& what) 
-{ 
-   // what[0] contains the whole string 
-   // what[5] contains the class name. 
-   // what[6] contains the template specialisation if any. 
-   // add class name and position to map: 
-   class_index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-                what[5].first - base; 
-   return true; 
-} 
-
-void IndexClasses(const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-   base = start; 
-   regex_grep(grep_callback, start, end, expression, match_default); 
-}
-  
- -

Example: -use regex_grep to call a class member function, use the standard -library adapters std::mem_fun and std::bind1st to -convert the member function into a predicate:

- -
#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-#include <functional> 
-
-// purpose: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-
-class class_index 
-{ 
-   boost::regex expression; 
-   map_type index; 
-   std::string::const_iterator base; 
-   bool grep_callback(boost::match_results<std::string::const_iterator, boost::regex::alloc_type> what); 
-public: 
-   void IndexClasses(const std::string& file); 
-   class_index() 
-      : index(), 
-        expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
-                   "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?" 
-                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
-                   "(\\{|:[^;\\{()]*\\{)" 
-                   ){} 
-}; 
-
-bool class_index::grep_callback(boost::match_results<std::string::const_iterator, boost::regex::alloc_type> what) 
-{ 
-   // what[0] contains the whole string 
-   // what[5] contains the class name. 
-   // what[6] contains the template specialisation if any. 
-   // add class name and position to map: 
-   index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-               what[5].first - base; 
-   return true; 
-} 
-
-void class_index::IndexClasses(const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-   base = start; 
-   regex_grep(std::bind1st(std::mem_fun(&class_index::grep_callback), this), 
-              start, 
-              end, 
-              expression); 
-} 
-  
- -

Finally, -C++ Builder users can use C++ Builder's closure type as a -callback argument:

- -
#include <string> 
-#include <map> 
-#include <boost/regex.hpp> 
-#include <functional> 
-
-// purpose: 
-// takes the contents of a file in the form of a string 
-// and searches for all the C++ class definitions, storing 
-// their locations in a map of strings/int's 
-
-typedef std::map<std::string, int, std::less<std::string> > map_type; 
-class class_index 
-{ 
-   boost::regex expression; 
-   map_type index; 
-   std::string::const_iterator base; 
-   typedef boost::match_results<std::string::const_iterator, boost::regex::alloc_type> arg_type; 
-   bool grep_callback(const arg_type& what); 
-public: 
-   typedef bool (__closure* grep_callback_type)(const arg_type&); 
-   void IndexClasses(const std::string& file); 
-   class_index() 
-      : index(), 
-        expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
-                   "(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?" 
-                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
-                   "(\\{|:[^;\\{()]*\\{)" 
-                   ){} 
-}; 
-
-bool class_index::grep_callback(const arg_type& what) 
-{ 
-   // what[0] contains the whole string    
-// what[5] contains the class name.    
-// what[6] contains the template specialisation if any.    
-// add class name and position to map:    
-index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] = 
-               what[5].first - base; 
-   return true; 
-} 
-
-void class_index::IndexClasses(const std::string& file) 
-{ 
-   std::string::const_iterator start, end; 
-   start = file.begin(); 
-   end = file.end(); 
-   base = start; 
-   class_index::grep_callback_type cl = &(this->grep_callback); 
-   regex_grep(cl, 
-            start, 
-            end, 
-            expression); 
-} 
- -
- -

 Algorithm regex_format

- -

#include <boost/regex.hpp> -

- -

The algorithm regex_format takes the results of a match and -creates a new string based upon a format string, -regex_format can be used for search and replace operations:

- -
template <class OutputIterator, class iterator, class Allocator, class charT>
-OutputIterator regex_format(OutputIterator out,
-                            const match_results<iterator, Allocator>& m,
-                            const charT* fmt,
-                            unsigned flags = 0);
-
-template <class OutputIterator, class iterator, class Allocator, class charT>
-OutputIterator regex_format(OutputIterator out,
-                            const match_results<iterator, Allocator>& m,
-                            const std::basic_string<charT>& fmt,
-                            unsigned flags = 0);
- -

The library also defines the following convenience variation -of regex_format, which returns the result directly as a string, -rather than outputting to an iterator [note - this version may -not be available, or may be available in a more limited form, -depending upon your compilers capabilities]:

- -
template <class iterator, class Allocator, class charT>
-std::basic_string<charT> regex_format
-                                 (const match_results<iterator, Allocator>& m, 
-                                  const charT* fmt,
-                                  unsigned flags = 0);
-
-template <class iterator, class Allocator, class charT>
-std::basic_string<charT> regex_format
-                                 (const match_results<iterator, Allocator>& m, 
-                                  const std::basic_string<charT>& fmt,
-                                  unsigned flags = 0);
- -

Parameters to the main version of the function are passed as -follows:

- - - - - - - - - - - - - - - - - - - - - - - - - - -
 OutputIterator outAn output iterator type, the - output string is sent to this iterator. Typically this - would be a std::ostream_iterator. 
 const - match_results<iterator, Allocator>& mAn instance of - match_results<> obtained from one of the matching - algorithms above, and denoting what matched. 
 const charT* fmtA format string that - determines how the match is transformed into the new - string. 
 unsigned flagsOptional flags which - describe how the format string is to be interpreted. 
- -

Format flags are defined as follows: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 format_allEnables all syntax options (perl-like - plus extentions). 
 format_sedAllows only a sed-like - syntax. 
 format_perlAllows only a perl-like - syntax. 
 format_no_copyDisables copying of - unmatched sections to the output string during regex_merge operations. 
 format_first_onlyWhen this flag is set only the first occurance will - be replaced (applies to regex_merge only). 
- -


- -

The format string syntax (and available options) is described -more fully under format -strings.

- -
- -

Algorithm regex_merge

- -

#include <boost/regex.hpp> -

- -

The algorithm regex_merge is a combination of regex_grep and regex_format. -That is, it greps through the string finding all the matches to -the regular expression, for each match it then calls regex_format to format the string and -sends the result to the output iterator. Sections of text that do -not match are copied to the output unchanged only if the flags -parameter does not have the flag format_no_copy -set. If the flag format_first_only is -set then only the first occurance is replaced rather than all -occurrences.

- -
template <class OutputIterator, class iterator, class traits, class Allocator, class charT>
-OutputIterator regex_merge(OutputIterator out, 
-                          iterator first,
-                          iterator last,
-                          const reg_expression<charT, traits, Allocator>& e, 
-                          const charT* fmt, 
-                          unsigned int flags = match_default);
-
-template <class OutputIterator, class iterator, class traits, class Allocator, class charT>
-OutputIterator regex_merge(OutputIterator out, 
-                           iterator first,
-                           iterator last,
-                           const reg_expression<charT, traits, Allocator>& e, 
-                           std::basic_string<charT>& fmt, 
-                           unsigned int flags = match_default);
- -

The library also defines the following convenience variation -of regex_merge, which returns the result directly as a string, -rather than outputting to an iterator [note - this version may -not be available, or may be available in a more limited form, -depending upon your compilers capabilities]:

- -
template <class traits, class Allocator, class charT>
-std::basic_string<charT> regex_merge(const std::basic_string<charT>& text,
-                                     const reg_expression<charT, traits, Allocator>& e, 
-                                     const charT* fmt, 
-                                     unsigned int flags = match_default);
-
-template <class traits, class Allocator, class charT>
-std::basic_string<charT> regex_merge(const std::basic_string<charT>& text,
-                                     const reg_expression<charT, traits, Allocator>& e, 
-                                     const std::basic_string<charT>& fmt, 
-                                     unsigned int flags = match_default);
- -

Parameters to the main version of the function are passed as -follows:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 OutputIterator outAn output iterator type, the - output string is sent to this iterator. Typically this - would be a std::ostream_iterator. 
 iterator firstThe start of the range of - text to grep (bidirectional-iterator). 
 iterator lastThe end of the range of text - to grep (bidirectional-iterator). 
 const - reg_expression<charT, traits, Allocator>& eThe expression to search for. 
 const charT* fmtThe format string to be - applied to sections of text that match. 
 unsigned int - flags = match_defaultFlags which determine how - the expression is matched - see match_flags, - and how the format string is interpreted - see format_flags. 
- -

Example: the following example takes -C/C++ source code as input, and outputs syntax highlighted HTML -code.

- -
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <iterator>
-#include <boost/regex.hpp>
-#include <fstream>
-#include <iostream>
-
-// purpose:
-// takes the contents of a file and transform to
-// syntax highlighted code in html format
-
-boost::regex e1, e2;
-extern const char* expression_text;
-extern const char* format_string;
-extern const char* pre_expression;
-extern const char* pre_format;
-extern const char* header_text;
-extern const char* footer_text;
-
-void load_file(std::string& s, std::istream& is)
-{
-   s.erase();
-   s.reserve(is.rdbuf()->in_avail());
-   char c;
-   while(is.get(c))
-   {
-      if(s.capacity() == s.size())
-         s.reserve(s.capacity() * 3);
-      s.append(1, c);
-   }
-}
-
-int main(int argc, const char** argv)
-{
-   try{
-   e1.assign(expression_text);
-   e2.assign(pre_expression);
-   for(int i = 1; i < argc; ++i)
-   {
-      std::cout << "Processing file " << argv[i] << std::endl;
-      std::ifstream fs(argv[i]);
-      std::string in;
-      load_file(in, fs);
-      std::string out_name(std::string(argv[i]) + std::string(".htm"));
-      std::ofstream os(out_name.c_str());
-      os << header_text;
-      // strip '<' and '>' first by outputting to a
-      // temporary string stream
-      std::ostringstream t(std::ios::out | std::ios::binary);
-      std::ostream_iterator<char, char> oi(t);
-      boost::regex_merge(oi, in.begin(), in.end(), e2, pre_format);
-      // then output to final output stream
-      // adding syntax highlighting:
-      std::string s(t.str());
-      std::ostream_iterator<char, char> out(os);
-      boost::regex_merge(out, s.begin(), s.end(), e1, format_string);
-      os << footer_text;
-   }
-   }
-   catch(...)
-   { return -1; }
-   return 0;
-}
-
-extern const char* pre_expression = "(<)|(>)|\\r";
-extern const char* pre_format = "(?1<)(?2>)";
-
-
-const char* expression_text = // preprocessor directives: index 1
-                              "(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
-                              // comment: index 2
-                              "(//[^\\n]*|/\\*.*?\\*/)|"
-                              // literals: index 3
-                              "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
-                              // string literals: index 4
-                              "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
-                              // keywords: index 5
-                              "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
-                              "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
-                              "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
-                              "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
-                              "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
-                              "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
-                              "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
-                              "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
-                              "|using|virtual|void|volatile|wchar_t|while)\\>"
-                              ;
-
-const char* format_string = "(?1<font color=\"#008040\">$&</font>)"
-                            "(?2<I><font color=\"#000080\">$&</font></I>)"
-                            "(?3<font color=\"#0000A0\">$&</font>)"
-                            "(?4<font color=\"#0000FF\">$&</font>)"
-                            "(?5<B>$&</B>)";
-
-const char* header_text = "<HTML>\n<HEAD>\n"
-                          "<TITLE>Auto-generated html formated source</TITLE>\n"
-                          "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
-                          "</HEAD>\n"
-                          "<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
-                          "<P> </P>\n<PRE>";
-
-const char* footer_text = "</PRE>\n</BODY>\n\n";
- -
- -

Algorithm regex_split

- -

#include <boost/regex.hpp> -

- -

Algorithm regex_split performs a similar operation to the perl -split operation, and comes in three overloaded forms:

- -
template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2, class Alloc2>
-std::size_t regex_split(OutputIterator out, 
-                        std::basic_string<charT, Traits1, Alloc1>& s, 
-                        const reg_expression<charT, Traits2, Alloc2>& e,
-                        unsigned flags,
-                        std::size_t max_split);
-
-template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2, class Alloc2>
-std::size_t regex_split(OutputIterator out, 
-                        std::basic_string<charT, Traits1, Alloc1>& s, 
-                        const reg_expression<charT, Traits2, Alloc2>& e,
-                        unsigned flags = match_default);
-
-template <class OutputIterator, class charT, class Traits1, class Alloc1>
-std::size_t regex_split(OutputIterator out, 
-                        std::basic_string<charT, Traits1, Alloc1>& s);
- -

Each version takes an output-iterator for output, and a string -for input. If the expression contains no marked sub-expressions, -then the algorithm writes one string onto the output-iterator for -each section of input that does not match the expression. If the -expression does contain marked sub-expressions, then each time a -match is found, one string for each marked sub-expression will be -written to the output-iterator. No more than max_split strings -will be written to the output-iterator. Before returning, all the -input processed will be deleted from the string s (if max_split -is not reached then all of s will be deleted). Returns -the number of strings written to the output-iterator. If the -parameter max_split is not specified then it defaults to -UINT_MAX. If no expression is specified, then it defaults to -"\s+", and splitting occurs on whitespace.

- -

Example: -the following function will split the input string into a series -of tokens, and remove each token from the string s:

- -
unsigned tokenise(std::list<std::string>& l, std::string& s)
-{
-   return boost::regex_split(std::back_inserter(l), s);
-}
- -

Example: -the following short program will extract all of the URL's from a -html file, and print them out to cout:

- -
#include <list>
-#include <fstream>
-#include <iostream>
-#include <boost/regex.hpp>
-
-boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
-               boost::regbase::normal | boost::regbase::icase);
-
-void load_file(std::string& s, std::istream& is)
-{
-   s.erase();
-   //
-   // attempt to grow string buffer to match file size,
-   // this doesn't always work...
-   s.reserve(is.rdbuf()-&gtin_avail());
-   char c;
-   while(is.get(c))
-   {
-      // use logarithmic growth stategy, in case
-      // in_avail (above) returned zero:
-      if(s.capacity() == s.size())
-         s.reserve(s.capacity() * 3);
-      s.append(1, c);
-   }
-}
-
-
-int main(int argc, char** argv)
-{
-   std::string s;
-   std::list<std::string> l;
-
-   for(int i = 1; i < argc; ++i)
-   {
-      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
-      s.erase();
-      std::ifstream is(argv[i]);
-      load_file(s, is);
-      boost::regex_split(std::back_inserter(l), s, e);
-      while(l.size())
-      {
-         s = *(l.begin());
-         l.pop_front();
-         std::cout << s << std::endl;
-      }
-   }
-   return 0;
-}
- -
- -

Partial Matches

- -

The match-flag match_partial can be passed to the -following algorithms: regex_match, regex_search, and regex_grep. -When used it indicates that partial as well as full matches -should be found. A partial match is one that matched one or more -characters at the end of the text input, but did not match all of -the regular expression (although it may have done so had more -input been available). Partial matches are typically used when -either validating data input (checking each character as it is -entered on the keyboard), or when searching texts that are either -too long to load into memory (or even into a memory mapped file), -or are of indeterminate length (for example the source may be a -socket or similar). Partial and full matches can be -differentiated as shown in the following table (the variable M -represents an instance of match_results<> as filled in by -regex_match, regex_search or regex_grep):
-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ResultM[0].matchedM[0].firstM[0].second
No matchFalseUndefinedUndefinedUndefined
Partial matchTrueFalseStart of partial match.End of partial match (end of - text).
Full matchTrueTrueStart of full match.End of full match.
- -

The following example tests -to see whether the text could be a valid credit card number, as -the user presses a key, the character entered would be added to -the string being built up, and passed to is_possible_card_number. -If this returns true then the text could be a valid card number, -so the user interface's OK button would be enabled. If it returns -false, then this is not yet a valid card number, but could be -with more input, so the user interface would disable the OK -button. Finally, if the procedure throws an exception the input -could never become a valid number, and the inputted character -must be discarded, and a suitable error indication displayed to -the user.

- -
#include <string>
-#include <iostream>
-#include <boost/regex.hpp>
-
-boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
-
-bool is_possible_card_number(const std::string& input)
-{
-   //
-   // return false for partial match, true for full match, or throw for
-   // impossible match based on what we have so far...
-   boost::match_results<std::string::const_iterator> what;
-   if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
-   {
-      // the input so far could not possibly be valid so reject it:
-      throw std::runtime_error("Invalid data entered - this could not possibly be a valid card number");
-   }
-   // OK so far so good, but have we finished?
-   if(what[0].matched)
-   {
-      // excellent, we have a result:
-      return true;
-   }
-   // what we have so far is only a partial match...
-   return false;
-}
- -

In the following example, text -input is taken from a stream containing an unknown amount of -text; this example simply counts the number of html tags -encountered in the stream. The text is loaded into a buffer and -searched a part at a time, if a partial match was encountered, -then the partial match gets searched a second time as the start -of the next batch of text:

- -
#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <boost/regex.hpp>
-
-// match some kind of html tag:
-boost::regex e("<[^>]*>");
-// count how many:
-unsigned int tags = 0;
-// saved position of partial match:
-char* next_pos = 0;
-
-bool grep_callback(const boost::match_results<char*>& m)
-{
-   if(m[0].matched == false)
-   {
-      // save position and return:
-      next_pos = m[0].first;
-   }
-   else
-      ++tags;
-   return true;
-}
-
-void search(std::istream& is)
-{
-   char buf[4096];
-   next_pos = buf + sizeof(buf);
-   bool have_more = true;
-   while(have_more)
-   {
-      // how much do we copy forward from last try:
-      unsigned leftover = (buf + sizeof(buf)) - next_pos;
-      // and how much is left to fill:
-      unsigned size = next_pos - buf;
-      // copy forward whatever we have left:
-      memcpy(buf, next_pos, leftover);
-      // fill the rest from the stream:
-      unsigned read = is.readsome(buf + leftover, size);
-      // check to see if we've run out of text:
-      have_more = read == size;
-      // reset next_pos:
-      next_pos = buf + sizeof(buf);
-      // and then grep:
-      boost::regex_grep(grep_callback,
-                        buf,
-                        buf + read + leftover,
-                        e,
-                        boost::match_default | boost::match_partial);
-   }
-}
- -
- -

Copyright Dr -John Maddock 1998-2001 all rights reserved.

- - diff --git a/test/regress/parse.cpp b/test/regress/parse.cpp index 21da3695..caf490fb 100644 --- a/test/regress/parse.cpp +++ b/test/regress/parse.cpp @@ -52,26 +52,28 @@ flag_info flag_data[] = { { BOOST_RE_STR("REG_NOTEOL"), 10, REG_NOTEOL, 1 }, { BOOST_RE_STR("REG_STARTEND"), 12, REG_STARTEND, 1 }, - { BOOST_RE_STR("basic"), 5, regbase::basic, 2 }, - { BOOST_RE_STR("escape_in_lists"), 15, regbase::escape_in_lists, 2 }, - { BOOST_RE_STR("char_classes"), 12, regbase::char_classes, 2 }, - { BOOST_RE_STR("intervals"), 9, regbase::intervals, 2 }, - { BOOST_RE_STR("limited_ops"), 11, regbase::limited_ops, 2 }, - { BOOST_RE_STR("newline_alt"), 11, regbase::newline_alt, 2 }, - { BOOST_RE_STR("bk_plus_qm"), 10, regbase::bk_plus_qm, 2 }, - { BOOST_RE_STR("bk_braces"), 9, regbase::bk_braces, 2 }, - { BOOST_RE_STR("bk_parens"), 9, regbase::bk_parens, 2 }, - { BOOST_RE_STR("bk_refs"), 7, regbase::bk_refs, 2 }, - { BOOST_RE_STR("bk_vbar"), 7, regbase::bk_vbar, 2 }, - { BOOST_RE_STR("use_except"), 10, regbase::use_except, 2 }, - { BOOST_RE_STR("literal"), 7, regbase::literal, 2 }, + { BOOST_RE_STR("basic"), 5, regex_constants::basic, 2 }, + { BOOST_RE_STR("escape_in_lists"), 15, regex_constants::escape_in_lists, 2 }, + { BOOST_RE_STR("char_classes"), 12, regex_constants::char_classes, 2 }, + { BOOST_RE_STR("intervals"), 9, regex_constants::intervals, 2 }, + { BOOST_RE_STR("limited_ops"), 11, regex_constants::limited_ops, 2 }, + { BOOST_RE_STR("newline_alt"), 11, regex_constants::newline_alt, 2 }, + { BOOST_RE_STR("bk_plus_qm"), 10, regex_constants::bk_plus_qm, 2 }, + { BOOST_RE_STR("bk_braces"), 9, regex_constants::bk_braces, 2 }, + { BOOST_RE_STR("bk_parens"), 9, regex_constants::bk_parens, 2 }, + { BOOST_RE_STR("bk_refs"), 7, regex_constants::bk_refs, 2 }, + { BOOST_RE_STR("bk_vbar"), 7, regex_constants::bk_vbar, 2 }, + { BOOST_RE_STR("use_except"), 10, regex_constants::use_except, 2 }, + { BOOST_RE_STR("literal"), 7, regex_constants::literal, 2 }, + { BOOST_RE_STR("nosubs"), 6, regex_constants::nosubs, 2 }, + { BOOST_RE_STR("optimize"), 8, regex_constants::optimize, 2 }, #ifndef BOOST_REGEX_V3 - { BOOST_RE_STR("perlex"), 6, regbase::perlex, 2 }, + { BOOST_RE_STR("perlex"), 6, regex_constants::perlex, 2 }, #endif - { BOOST_RE_STR("normal"), 6, regbase::normal, 2 }, - { BOOST_RE_STR("basic"), 5, regbase::basic, 2 }, - { BOOST_RE_STR("extended"), 8, regbase::extended, 2 }, - { BOOST_RE_STR("perl"), 6, regbase::perl, 2 }, + { BOOST_RE_STR("normal"), 6, regex_constants::normal, 2 }, + { BOOST_RE_STR("basic"), 5, regex_constants::basic, 2 }, + { BOOST_RE_STR("extended"), 8, regex_constants::extended, 2 }, + { BOOST_RE_STR("perl"), 6, regex_constants::perl, 2 }, { BOOST_RE_STR("match_default"), 13, match_default, 3 }, { BOOST_RE_STR("match_not_bol"), 13, match_not_bol, 3 }, @@ -87,7 +89,9 @@ flag_info flag_data[] = { { BOOST_RE_STR("match_not_null"), 14, match_not_null, 3 }, { BOOST_RE_STR("match_continuous"), 16, match_continuous, 3 }, { BOOST_RE_STR("match_partial"), 13, match_partial, 3 }, + { BOOST_RE_STR("match_nosubs"), 12, match_nosubs, 3 }, + { BOOST_RE_STR("format_all"), 10, format_all, 3 }, { BOOST_RE_STR("format_sed"), 10, format_sed, 3 }, { BOOST_RE_STR("format_perl"), 11, format_perl, 3 }, { BOOST_RE_STR("format_no_copy"), 14, format_no_copy, 3 }, @@ -116,7 +120,7 @@ typedef basic_regex re_parse_t; typedef match_results parse_grep; typedef string_type::const_iterator parse_iterator; -re_parse_t parse_expression(expression_text, regbase::normal); +re_parse_t parse_expression(expression_text, regex_constants::normal); // // now define our grep predicate function object: diff --git a/test/regress/regex_test.cpp b/test/regress/regex_test.cpp index b91bc1a7..82e605af 100644 --- a/test/regress/regex_test.cpp +++ b/test/regress/regex_test.cpp @@ -55,11 +55,11 @@ typedef bool (*pred1_type)(const match_results&); typedef bool (*pred2_type)(const match_results&); //check that all the defined flags are available: -regbase::flag_type f = regbase::escape_in_lists | regbase::char_classes | regbase::intervals | regbase::limited_ops - | regbase::newline_alt | regbase::bk_plus_qm | regbase::bk_braces - | regbase::bk_parens | regbase::bk_refs | regbase::bk_vbar | regbase::use_except - | regbase::failbit | regbase::literal | regbase::icase | regbase::nocollate | regbase::basic - | regbase::extended | regbase::normal | regbase::emacs | regbase::awk | regbase::grep | regbase::egrep | regbase::sed; +regex::flag_type f = regex::escape_in_lists | regex::char_classes | regex::intervals | regex::limited_ops + | regex::newline_alt | regex::bk_plus_qm | regex::bk_braces + | regex::bk_parens | regex::bk_refs | regex::bk_vbar | regex::use_except + | regex::failbit | regex::literal | regex::icase | regex::nocollate | regex::basic + | regex::extended | regex::normal | regex::emacs | regex::awk | regex::grep | regex::egrep | regex::sed; template class reg_expression; template struct sub_match; @@ -92,13 +92,13 @@ template test_char_type* regex_format(test_char_type*, const match_results& m, const test_string_type& fmt, unsigned flags); -template test_char_type* regex_merge(test_char_type*, +template test_char_type* regex_replace(test_char_type*, ra_it, ra_it, const reg_expression&, const test_char_type*, unsigned int flags); -template test_char_type* regex_merge(test_char_type*, +template test_char_type* regex_replace(test_char_type*, ra_it, ra_it, const reg_expression& e, @@ -159,11 +159,11 @@ template test_string_type regex_format (const match_results&, const test_string_type&, unsigned flags); -template test_string_type regex_merge(const test_string_type&, +template test_string_type regex_replace(const test_string_type&, const reg_expression&, const test_char_type*, unsigned int flags); -template test_string_type regex_merge(const test_string_type&, +template test_string_type regex_replace(const test_string_type&, const reg_expression&, const test_string_type&, unsigned int flags); diff --git a/test/regress/tests.cpp b/test/regress/tests.cpp index e22fed4d..4fc35c47 100644 --- a/test/regress/tests.cpp +++ b/test/regress/tests.cpp @@ -70,7 +70,7 @@ void cpp_eh_tests(const basic_regex& ) #endif A a; basic_regex e(a); - e.set_expression(expression.c_str(), flags[2] | regbase::use_except); + e.set_expression(expression.c_str(), flags[2] | regex::use_except); #ifndef BOOST_NO_EXCEPTIONS } catch(const boost::bad_expression&) @@ -92,7 +92,7 @@ void cpp_eh_tests(const basic_regex& ) #endif { A a; - basic_regex e(expression.c_str(), flags[2] | regbase::use_except, a); + basic_regex e(expression.c_str(), flags[2] | regex::use_except, a); } #ifndef BOOST_NO_EXCEPTIONS catch(const boost::bad_expression&) @@ -360,14 +360,24 @@ void cpp_tests(const basic_regex& e, bool recurse = true) // // now try comparison operators: string_type s(m[0]); - if((s != m[0]) || (m[0] != s)) + if((s != m[0]) || (m[0] != s) + || !(s == m[0]) || !(m[0] == s) + || (s < m[0]) || (m[0] < s) + || (s > m[0]) || (m[0] > s) + || !(s <= m[0]) || !(m[0] <= s) + || !(s >= m[0]) || !(m[0] >= s)) { begin_error(); cout << "string comparison failed for result" << std::endl; } if(s.find_first_of((string_type::value_type)0) == string_type::npos) { - if((m[0] != s.c_str()) || (s.c_str() != m[0])) + if((m[0] != s.c_str()) || (s.c_str() != m[0]) + || !(m[0] == s.c_str()) || !(s.c_str() == m[0]) + || (m[0] > s.c_str()) || (s.c_str() > m[0]) + || (m[0] < s.c_str()) || (s.c_str() < m[0]) + || !(m[0] >= s.c_str()) || !(s.c_str() >= m[0]) + || !(m[0] <= s.c_str()) || !(s.c_str() <= m[0])) { begin_error(); cout << "string comparison failed for result" << std::endl; @@ -395,6 +405,12 @@ void cpp_tests(const basic_regex& e, bool recurse = true) begin_error(); cout << "regex++ API result mismatch in regex_search(const std::string&, match_results&, const basic_regex&, int)" << endl; } + if(!regex_search(s, e, static_cast(flags[3])) + || !regex_search(s.begin(), s.end(), e, static_cast(flags[3]))) + { + begin_error(); + cout << "regex++ API result mismatch in regex_search(const std::string&, const basic_regex&, int)" << endl; + } // // partial match should give same result as full match // provided a full match is expected: @@ -431,6 +447,11 @@ void cpp_tests(const basic_regex& e, bool recurse = true) begin_error(); cout << "regex++ API result mismatch in regex_search(const char_t*, match_results&, const basic_regex&, int)" << endl; } + if(!regex_search(search_text.c_str(), e, static_cast(flags[3]))) + { + begin_error(); + cout << "regex++ API result mismatch in regex_search(const char_t*, const basic_regex&, int)" << endl; + } } } if((false == recurse) && (matches[0] == 0) && (matches[1] == static_cast(search_text.size()))) @@ -718,9 +739,9 @@ void run_tests() try { #endif - unsigned int f = flags[2] & ~regbase::use_except; + unsigned int f = flags[2] & ~regex::use_except; if(flags[0] & REG_ICASE) - f |= regbase::icase; + f |= regex::icase; re_type e(expression.c_str(), f); cpp_tests(e, true); #ifndef BOOST_NO_EXCEPTIONS @@ -748,7 +769,7 @@ void run_tests() try { #endif - if(((flags[3] & match_partial) == 0) && (flags[2] == regbase::normal) && (has_nulls(search_text.begin(), search_text.end()) == false)) + if(((flags[3] & match_partial) == 0) && (flags[2] == regex::normal) && (has_nulls(search_text.begin(), search_text.end()) == false)) { RegEx e; e.SetExpression(expression.c_str(), flags[0] & REG_ICASE); diff --git a/test/regress/tests.txt b/test/regress/tests.txt index 0ae474d6..6bf003c9 100644 --- a/test/regress/tests.txt +++ b/test/regress/tests.txt @@ -802,7 +802,6 @@ a+ "...aaa,,," \x{21} "!" a+ "...aaa,,," \c@ \0 a+ "...aaa,,," \e \27 a+ "...aaa,,," \0101 A -a+ "...aaa,,," (\0101) A - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_sed format_no_copy (a+)(b+) ...aabb,, \0 aabb @@ -824,8 +823,9 @@ a+ "...aaa,,," (\0101) A (a+)(b+) ...aabb,, & & (a+)(b+) ...aabb,, \0 \0 (a+)(b+) ...aabb,, ()?: ()?: +a+ "...aaa,,," \0101 A -- match_default normal REG_EXTENDED REG_STARTEND REG_MERGE +- match_default format_all normal REG_EXTENDED REG_STARTEND REG_MERGE ; move to copying unmatched data: a+ "...aaa,,," bbb "...bbb,,," a+(b+) "...aaabb,,," $1 "...bb,,," @@ -836,7 +836,7 @@ a+(b+) "...aaabb,,,ab*abbb?" $1 "...bb,,,b*bbb?" (a+)|(b+) "...aaabb,,,ab*abbb?" (?1A:B)C "...ACBC,,,ACBC*ACBC?" (a+)|(b+) "...aaabb,,,ab*abbb?" ?1:B "...B,,,B*B?" -- match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_first_only +- match_default format_all normal REG_EXTENDED REG_STARTEND REG_MERGE format_first_only ; move to copying unmatched data, but replace first occurance only: a+ "...aaa,,," bbb "...bbb,,," a+(b+) "...aaabb,,," $1 "...bb,,," @@ -1133,6 +1133,8 @@ a()b\1 ab 0 2 1 1 "(.*\r\n){3}.* abcdefghijklmnopqrstuvwxyz.*\r\n" "00001 01 \r\n00002 02 1 2 3 4 5 6 7 8 9 0\r\n00003 03 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890\r\n00004 04 \r\n00005 05 \r\n00006 06 Seite: 0001\r\n00007 07 StartSeitEEnde: 0001\r\n00008 08 StartSeiTe Ende: 0001\r\n00009 09 Start seiteEnde: 0001\r\n00010 10 28.2.03\r\n00011 11 Page: 0001\r\n00012 12 Juhu die Erste: 0001\r\n00013 13 Es war einmal! 0001\r\n00014 14 ABCDEFGHIJKLMNOPQRSTUVWXYZ0001\r\n00015 15 abcdefghijklmnopqrstuvwxyz0001\r\n00016 16 lars.schmeiser@gft.com\r\n00017 17 \r\n00018 18 \r\n00019 19 \r\n00020 20 \r\n00021 21 1 2 3 4 5 6 7 8 9 0\r\n00022 22 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890\r\n00023 01 \r\n00024 02 1 2 3 4 5 6 7 8 9 0\r\n00025 03 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890\r\n00026 04 \r\n00027 05 \r\n00028 06 Seite: 0002\r\n00029 07 StartSeitEEnde: 0002\r\n00030 08 StartSeiTe Ende: 0002\r\n00031 09 Start seiteEnde: 0002\r\n00032 10 28.02.2003\r\n00033 11 Page: 0002\r\n00034 12 Juhu die Erste: 0002\r\n00035 13 Es war einmal! 0002\r\n00036 14 ABCDEFGHIJKLMNOPQRSTUVWXYZ0002\r\n00037 15 abcdefghijklmnopqrstuvwxyz0002\r\n00038 16 lars.schmeiser@194.1.12.111\r\n00039 17 \r\n00040 18 \r\n00041 19 \r\n00042 20 \r\n00043 21 1 2 3 4 5 6 7 8 9 0\r\n" 753 1076 934 1005 +- normal REG_PERL + ; new (?: construct ) (?>^abc) abc 0 3 (?>^abc) def\nabc 4 7 @@ -1172,4 +1174,58 @@ a()b\1 ab 0 2 1 1 ((?>Z)+|A)* ZABCDEFG 0 2 1 2 ((?>)+|A)* ! +; subtleties of matching with no sub-expressions marked +- normal match_nosubs REG_NO_POSIX_TEST +a(b?c)+d accd 0 4 +(wee|week)(knights|night) weeknights 0 10 +.* abc 0 3 +a(b|(c))d abd 0 3 +a(b|(c))d acd 0 3 +a(b*|c|e)d abbd 0 4 +a(b*|c|e)d acd 0 3 +a(b*|c|e)d ad 0 2 +a(b?)c abc 0 3 +a(b?)c ac 0 2 +a(b+)c abc 0 3 +a(b+)c abbbc 0 5 +a(b*)c ac 0 2 +(a|ab)(bc([de]+)f|cde) abcdef 0 6 +a([bc]?)c abc 0 3 +a([bc]?)c ac 0 2 +a([bc]+)c abc 0 3 +a([bc]+)c abcc 0 4 +a([bc]+)bc abcbc 0 5 +a(bb+|b)b abb 0 3 +a(bbb+|bb+|b)b abb 0 3 +a(bbb+|bb+|b)b abbb 0 4 +a(bbb+|bb+|b)bb abbb 0 4 +(.*).* abcdef 0 6 +(a*)* bc 0 0 + +- normal nosubs REG_NO_POSIX_TEST +a(b?c)+d accd 0 4 +(wee|week)(knights|night) weeknights 0 10 +.* abc 0 3 +a(b|(c))d abd 0 3 +a(b|(c))d acd 0 3 +a(b*|c|e)d abbd 0 4 +a(b*|c|e)d acd 0 3 +a(b*|c|e)d ad 0 2 +a(b?)c abc 0 3 +a(b?)c ac 0 2 +a(b+)c abc 0 3 +a(b+)c abbbc 0 5 +a(b*)c ac 0 2 +(a|ab)(bc([de]+)f|cde) abcdef 0 6 +a([bc]?)c abc 0 3 +a([bc]?)c ac 0 2 +a([bc]+)c abc 0 3 +a([bc]+)c abcc 0 4 +a([bc]+)bc abcbc 0 5 +a(bb+|b)b abb 0 3 +a(bbb+|bb+|b)b abb 0 3 +a(bbb+|bb+|b)b abbb 0 4 +a(bbb+|bb+|b)bb abbb 0 4 +(.*).* abcdef 0 6 +(a*)* bc 0 0 diff --git a/test/regress/wregex_test.cpp b/test/regress/wregex_test.cpp index 258facf7..68e69397 100644 --- a/test/regress/wregex_test.cpp +++ b/test/regress/wregex_test.cpp @@ -63,11 +63,11 @@ typedef bool (*pred1_type)(const match_results&); typedef bool (*pred2_type)(const match_results&); //check that all the defined flags are available: -regbase::flag_type f = regbase::escape_in_lists | regbase::char_classes | regbase::intervals | regbase::limited_ops - | regbase::newline_alt | regbase::bk_plus_qm | regbase::bk_braces - | regbase::bk_parens | regbase::bk_refs | regbase::bk_vbar | regbase::use_except - | regbase::failbit | regbase::literal | regbase::icase | regbase::nocollate | regbase::basic - | regbase::extended | regbase::normal | regbase::emacs | regbase::awk | regbase::grep | regbase::egrep | regbase::sed; +wregex::flag_type f = wregex::escape_in_lists | wregex::char_classes | wregex::intervals | wregex::limited_ops + | wregex::newline_alt | wregex::bk_plus_qm | wregex::bk_braces + | wregex::bk_parens | wregex::bk_refs | wregex::bk_vbar | wregex::use_except + | wregex::failbit | wregex::literal | wregex::icase | wregex::nocollate | wregex::basic + | wregex::extended | wregex::normal | wregex::emacs | wregex::awk | wregex::grep | wregex::egrep | wregex::sed; template class reg_expression; template struct sub_match; @@ -100,13 +100,13 @@ template char_type* regex_format(char_type*, const match_results& m, const test_string_type& fmt, unsigned flags); -template char_type* regex_merge(char_type*, +template char_type* regex_replace(char_type*, ra_it, ra_it, const reg_expression&, const char_type*, unsigned int flags); -template char_type* regex_merge(char_type*, +template char_type* regex_replace(char_type*, ra_it, ra_it, const reg_expression& e, @@ -167,11 +167,11 @@ template test_string_type regex_format (const match_results&, const test_string_type&, unsigned flags); -template test_string_type regex_merge(const test_string_type&, +template test_string_type regex_replace(const test_string_type&, const reg_expression&, const char_type*, unsigned int flags); -template test_string_type regex_merge(const test_string_type&, +template test_string_type regex_replace(const test_string_type&, const reg_expression&, const test_string_type&, unsigned int flags); diff --git a/traits_class_ref.htm b/traits_class_ref.htm deleted file mode 100644 index 669f5a87..00000000 --- a/traits_class_ref.htm +++ /dev/null @@ -1,1016 +0,0 @@ - - - - - - - - regex++ traits-class reference - - - - - - - - - -

C++ Boost

-

Regex++, Traits Class - Reference.

-

Copyright (c) 1998-2001

-

Dr John Maddock

-

Permission to use, copy, modify, - distribute and sell this software and its documentation - for any purpose is hereby granted without fee, provided - that the above copyright notice appear in all copies and - that both that copyright notice and this permission - notice appear in supporting documentation. Dr John - Maddock makes no representations about the suitability of - this software for any purpose. It is provided "as is" - without express or implied warranty.

-
- -
- -

This section describes the traits class requirements of the -reg_expression template class, these requirements are somewhat -complex (sorry), and subject to change as uses ask for new -features, however I will try to keep them stable for a while, and -ideally the requirements should lessen rather than increase.

- -

The reg_expression traits classes encapsulate both the -properties of a character type, and the properties of the locale -associated with that type. The associated locale may be defined -at run-time (via std::locale), or hard-coded into the traits -class and determined at compile time.

- -

The following example class illustrates the interface required -by a "typical" traits class for use with class -reg_expression:

- -
-class mytraits
-{
-   typedef implementation_defined char_type;
-   typedef implementation_defined uchar_type;
-   typedef implementation_defined size_type;
-   typedef implementation_defined string_type;
-   typedef implementation_defined locale_type;
-   typedef implementation_defined uint32_t;
-   struct sentry
-   {
-      sentry(const mytraits&);
-      operator void*() { return this; }
-   };
-
-   enum char_syntax_type
-   {
-      syntax_char = 0,
-      syntax_open_bracket = 1,                  // (
-      syntax_close_bracket = 2,                 // )
-      syntax_dollar = 3,                        // $
-      syntax_caret = 4,                         // ^
-      syntax_dot = 5,                           // .
-      syntax_star = 6,                          // *
-      syntax_plus = 7,                          // +
-      syntax_question = 8,                      // ?
-      syntax_open_set = 9,                      // [
-      syntax_close_set = 10,                    // ]
-      syntax_or = 11,                           // |
-      syntax_slash = 12,                        //
-      syntax_hash = 13,                         // #
-      syntax_dash = 14,                         // -
-      syntax_open_brace = 15,                   // {
-      syntax_close_brace = 16,                  // }
-      syntax_digit = 17,                        // 0-9
-      syntax_b = 18,                            // for \b
-      syntax_B = 19,                            // for \B
-      syntax_left_word = 20,                    // for \<
-      syntax_right_word = 21,                   // for \
-      syntax_w = 22,                            // for \w
-      syntax_W = 23,                            // for \W
-      syntax_start_buffer = 24,                 // for \`
-      syntax_end_buffer = 25,                   // for \'
-      syntax_newline = 26,                      // for newline alt
-      syntax_comma = 27,                        // for {x,y}
-
-      syntax_a = 28,                            // for \a
-      syntax_f = 29,                            // for \f
-      syntax_n = 30,                            // for \n
-      syntax_r = 31,                            // for \r
-      syntax_t = 32,                            // for \t
-      syntax_v = 33,                            // for \v
-      syntax_x = 34,                            // for \xdd
-      syntax_c = 35,                            // for \cx
-      syntax_colon = 36,                        // for [:...:]
-      syntax_equal = 37,                        // for [=...=]
-   
-      // perl ops:
-      syntax_e = 38,                            // for \e
-      syntax_l = 39,                            // for \l
-      syntax_L = 40,                            // for \L
-      syntax_u = 41,                            // for \u
-      syntax_U = 42,                            // for \U
-      syntax_s = 43,                            // for \s
-      syntax_S = 44,                            // for \S
-      syntax_d = 45,                            // for \d
-      syntax_D = 46,                            // for \D
-      syntax_E = 47,                            // for \Q\E
-      syntax_Q = 48,                            // for \Q\E
-      syntax_X = 49,                            // for \X
-      syntax_C = 50,                            // for \C
-      syntax_Z = 51,                            // for \Z
-      syntax_G = 52,                            // for \G
-      syntax_bang = 53,                         // reserved for future use '!'
-      syntax_and = 54,                          // reserve for future use '&'
-   };
-
-   enum{
-      char_class_none = 0,
-      char_class_alpha,
-      char_class_cntrl,
-      char_class_digit,
-      char_class_lower,
-      char_class_punct,
-      char_class_space,
-      char_class_upper,
-      char_class_xdigit,
-      char_class_blank,
-      char_class_unicode,
-      char_class_alnum,
-      char_class_graph,
-      char_class_print,
-      char_class_word
-   };
-
-   static size_t length(const char_type* p);
-   unsigned int syntax_type(size_type c)const;
-   char_type translate(char_type c, bool icase)const;
-   void transform(string_type& out, const string_type& in)const;
-   void transform_primary(string_type& out, const string_type& in)const;
-   bool is_separator(char_type c)const;
-   bool is_combining(char_type)const;
-   bool is_class(char_type c, uint32_t f)const;
-   int toi(char_type c)const;
-   int toi(const char_type*& first, const char_type* last, int radix)const;
-   uint32_t lookup_classname(const char_type* first, const char_type* last)const;
-   bool lookup_collatename(string_type& buf, const char_type* first, const char_type* last)const;
-   locale_type imbue(locale_type l);
-   locale_type getloc()const;
-   std::string error_string(unsigned id)const;
-
-   mytraits();
-   ~mytraits();
-};
-
- -

The member types required by a traits class are defined as -follows:
-  

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  Member - name Description -  
  char_type The - character type encapsulated by this traits class, must be - a POD type, and be convertible to uchar_type.  
  uchar_type - The - unsigned type corresponding to char_type, must be - convertible to size_type.  
  size_type An - unsigned integral type, with at least as much precision - as uchar_type.  
  string_type - A type - that offers the same facilities as std::basic_string<char_type. - This is used for collating elements, and sort strings, if - char_type has no locale dependent collation (it is not a - "character"), then it could be something - simpler than std::basic_string.  
  locale_type - A type - that encapsulates the locale used by the traits class, - probably std::locale but could be a platform specific - type, or a dummy type if per-instance locales are not - supported by the traits class.  
  uint32_t An - unsigned integral type with at least 32-bits of - precision, used as a bitmask type for character - classification.  
  sentry A class or - struct type which is constructible from an instance of - the traits class, and is convertible to void*. An - instance of type sentry will be constructed before - compiling each regular expression, it provides an - opportunity to carry out prefix/suffix operations on the - traits class. 

For example a traits class that - encapsulates the global locale, can use this as an - opportunity to synchronize with the global locale (by - updating any cached data).

-
 
- -


- The following member constants are used to represent the -locale independent syntax of a regular expression; the member -function syntax_type returns one of these values, and is -used to convert a locale dependent regular expression, into a -locale-independent sequence of tokens.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  Member - constant  English - language representation   
  syntax_char  - All non-special - characters.   
  syntax_open_bracket  -  
  syntax_close_bracket  -  
  syntax_dollar  -  
  syntax_caret  -  
  syntax_dot  -  
  syntax_star  -  
  syntax_plus  -  
  syntax_question  -  
  syntax_open_set  -  
  syntax_close_set  -  
  syntax_or  -  
  syntax_slash  -  
  syntax_hash  -  
  syntax_dash  -  
  syntax_open_brace  -  
  syntax_close_brace  -  
  syntax_digit  - 0123456789  -  
  syntax_b  -  
  syntax_B  -  
  syntax_left_word  - <  -  
  syntax_right_word  -    
  syntax_w  -  
  syntax_W  -  
  syntax_start_buffer  -  
  syntax_end_buffer  -  
  syntax_newline  - \n   
  syntax_comma  -  
  syntax_a  -  
  syntax_f  -  
  syntax_n  -  
  syntax_r  -  
  syntax_t  -  
  syntax_v  -  
  syntax_x  -  
  syntax_c  -  
  syntax_colon  -  
  syntax_equal  -  
  syntax_e  -  
  syntax_l  -  
  syntax_L  -  
  syntax_u  -  
  syntax_U  -  
  syntax_s  -  
  syntax_S  -  
  syntax_d  -  
  syntax_D  -  
  syntax_E  -  
  syntax_Q  -  
  syntax_X  -  
  syntax_C  -  
  syntax_Z  -  
  syntax_G  -  
  syntax_bang  -  
  syntax_and  - &  -  
- -

The following member constants are used to represent -particular character classifications:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  Member - constant  Description -  
  char_class_none  - No - classification, must be zero.  
  char_class_alpha  - All - alphabetic characters.  
  char_class_cntrl  - All - control characters.  
  char_class_digit  - All - decimal digits.  
  char_class_lower  - All lower - case characters.  
  char_class_punct  - All - punctuation characters.  
  char_class_space  - All white-space - characters.  
  char_class_upper  - All upper - case characters.  
  char_class_xdigit  - All - hexadecimal digit characters.  
  char_class_blank  - All blank - characters (space + tab).  
  char_class_unicode  - All - extended unicode characters - those that can not be - represented as a single narrow character.  
  char_class_alnum  - All alpha-numeric - characters.  
  char_class_graph  - All - graphic characters.  
  char_class_print  - All - printable characters.  
  char_class_word  - All word - characters (alphanumeric characters + the underscore).  
- -

The following member functions are required by all regular -expression traits classes, those members that are declared here -as const, could be declared static instead if the -class does not contain instance data:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  Member - function Description -  
  static - size_t length(const char_type* p); Returns - the length of the null-terminated string p.  
  unsigned - int syntax_type(size_type c)const;  Converts - an input character into a locale independent token (one - of the syntax_xxx member constants). Called when parsing - the regular expression into a locale-independent parse - tree. 

Example: in English language regular - expressions we would use "[[:word:]]" to - represent the character class of all word characters, and - "\w" as a shortcut for this. Consequently - syntax_type('w') returns syntax_w. In French language - regular expressions, we would use "[[:mot:]]" - in place of "[[:word:]]" and therefore "\m" - in place of "\w", therefore it is syntax_type('m') - that returns syntax_w.

-
 
  char_type - translate(char_type c, bool icase)const;  Translates - an input character into a unique identifier that - represents the equivalence class that that character - belongs to. If icase is true, then the returned value is - insensitive to case. 

[An equivalence class is - the set of all characters that must be treated as being - equivalent to each other.]

-
 
  void - transform(string_type& out, const string_type& in)const; -  Transforms - the string in, into a locale-dependent sort key, - and stores the result in out.  
  void - transform_primary(string_type& out, const - string_type& in)const;  Transforms - the string in, into a locale-dependent primary - sort key, and stores the result in out.  
  bool - is_separator(char_type c)const;  Returns - true only if c is a line separator.  
  bool - is_combining(char_type c)const;  Returns - true only if c is a unicode combining character.  
  bool - is_class(char_type c, uint32_t f)const;  Returns - true only if c is a member of one of the character - classes represented by the bitmap f.  
  int toi(char_type - c)const;  Converts - the character c to a decimal integer. 

[Precondition: - is_class(c,char_class_digit)==true]

-
 
  int toi(const - char_type*& first, const char_type* last, int radix)const; -  Converts - the string [first-last) into an integral value using base - radix. Stops when it finds the first non-digit - character, and sets first to point to that - character. 

[Precondition: is_class(*first,char_class_digit)==true] -

-
 
  uint32_t - lookup_classname(const char_type* first, const char_type* - last)const;  Returns - the bitmap representing the character class [first-last), - or char_class_none if [first-last) is not recognized as a - character class name.  
  bool - lookup_collatename(string_type& buf, const char_type* - first, const char_type* last)const; If the - sequence [first-last) is the name of a known collating - element, then stores the collating element in buf, and - returns true, otherwise returns false.  
  locale_type - imbue(locale_type l);  Imbues - the class with the locale l.  
  locale_type - getloc()const;  Returns - the traits-class locale.  
  std::string - error_string(unsigned id)const;  Returns - the locale-dependent error-string associated with the - error-number id. The parameter id is one of - the REG_XXX error codes described by the POSIX standard, - and defined in <boost/cregex.hpp.  
  mytraits(); -  Constructor. -  
  ~ mytraits(); -  Destructor. -  
- -

There is also an example of a custom traits class supplied by Christian Engström, -see iso8859_1_regex_traits.cpp -and iso8859_1_regex_traits.hpp. -This example inherits from c_regex_traits and provides it's own -implementations of two locale specific functions. This ensures -that the class gives consistent behaviour (albeit tied to one -locale) on all platforms. A fuller desciption by the author is -available in the readme file.
-

- -
- -

Copyright Dr -John Maddock 1998-2001 all rights reserved.

- -