From 0c7dbadc30a6da9fdf0f47cf883bb94603ff4da2 Mon Sep 17 00:00:00 2001 From: John Maddock Date: Fri, 10 Sep 2004 12:16:52 +0000 Subject: [PATCH] Started to improve docs. Added more tests. Added code to speed up traits class construction and usage. Tweeked extern template code. [SVN r25002] --- build/bcb6.mak | 2 +- build/gcc-shared.mak | 2 +- build/gcc.mak | 2 +- build/generic.mak | 2 +- build/sunpro.mak | 2 +- build/vc6-stlport.mak | 2 +- build/vc6.mak | 2 +- build/vc7-stlport.mak | 2 +- build/vc7.mak | 2 +- build/vc71-stlport.mak | 2 +- build/vc71.mak | 2 +- doc/Attic/character_class_names.html | 144 ++++ doc/Attic/collating_names.html | 358 +++++++++ doc/Attic/syntax.html | 751 +----------------- doc/Attic/syntax_basic.html | 226 ++++++ doc/Attic/syntax_extended.html | 471 +++++++++++ doc/Attic/syntax_option_type.html | 33 +- doc/Attic/syntax_perl.html | 502 ++++++++++++ doc/character_class_names.html | 144 ++++ doc/collating_names.html | 358 +++++++++ doc/index.html | 14 + doc/syntax.html | 751 +----------------- doc/syntax_basic.html | 226 ++++++ doc/syntax_extended.html | 471 +++++++++++ doc/syntax_option_type.html | 33 +- doc/syntax_perl.html | 502 ++++++++++++ include/boost/regex/v4/basic_regex_parser.hpp | 201 ++++- include/boost/regex/v4/cpp_regex_traits.hpp | 63 +- include/boost/regex/v4/instances.hpp | 4 +- include/boost/regex/v4/match_results.hpp | 4 +- include/boost/regex/v4/mem_block_cache.hpp | 2 +- .../boost/regex/v4/perl_matcher_common.hpp | 14 +- include/boost/regex/v4/regbase.hpp | 3 +- include/boost/regex/v4/regex.hpp | 12 +- include/boost/regex/v4/regex_traits.hpp | 2 +- include/boost/regex/v4/w32_regex_traits.hpp | 7 +- performance/Jamfile | 3 +- performance/command_line.cpp | 5 + performance/main.cpp | 16 + performance/regex_comparison.hpp | 11 + performance/time_dynamic_xpressive.cpp | 129 +++ performance/time_posix.cpp | 2 +- src/static_mutex.cpp | 2 +- test/Jamfile | 2 + test/object_cache/object_cache_test.cpp | 75 ++ test/regress/main.cpp | 3 + test/regress/test.hpp | 3 + test/regress/test_emacs.cpp | 157 ++++ test/regress/test_mfc.cpp | 2 +- test/regress/test_operators.cpp | 146 ++++ test/regress/test_overloads.cpp | 46 ++ test/static_mutex/static_mutex_test.cpp | 2 +- 52 files changed, 4388 insertions(+), 1534 deletions(-) create mode 100644 doc/Attic/character_class_names.html create mode 100644 doc/Attic/collating_names.html create mode 100644 doc/Attic/syntax_basic.html create mode 100644 doc/Attic/syntax_extended.html create mode 100644 doc/Attic/syntax_perl.html create mode 100644 doc/character_class_names.html create mode 100644 doc/collating_names.html create mode 100644 doc/syntax_basic.html create mode 100644 doc/syntax_extended.html create mode 100644 doc/syntax_perl.html create mode 100644 performance/time_dynamic_xpressive.cpp create mode 100644 test/object_cache/object_cache_test.cpp create mode 100644 test/regress/test_emacs.cpp create mode 100644 test/regress/test_operators.cpp create mode 100644 test/regress/test_overloads.cpp diff --git a/build/bcb6.mak b/build/bcb6.mak index d96ec23c..20733217 100644 --- a/build/bcb6.mak +++ b/build/bcb6.mak @@ -35,7 +35,7 @@ BCROOT=$(MAKEDIR)\.. !endif -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : bcb bcb\libboost_regex-bcb-s-1_31 bcb\libboost_regex-bcb-s-1_31.lib bcb\libboost_regex-bcb-mt-s-1_31 bcb\libboost_regex-bcb-mt-s-1_31.lib bcb\boost_regex-bcb-mt-1_31 bcb\boost_regex-bcb-mt-1_31.lib bcb\boost_regex-bcb-1_31 bcb\boost_regex-bcb-1_31.lib bcb\libboost_regex-bcb-mt-1_31 bcb\libboost_regex-bcb-mt-1_31.lib bcb\libboost_regex-bcb-1_31 bcb\libboost_regex-bcb-1_31.lib bcb\libboost_regex-bcb-sd-1_31 bcb\libboost_regex-bcb-sd-1_31.lib bcb\libboost_regex-bcb-mt-sd-1_31 bcb\libboost_regex-bcb-mt-sd-1_31.lib bcb\boost_regex-bcb-mt-d-1_31 bcb\boost_regex-bcb-mt-d-1_31.lib bcb\boost_regex-bcb-d-1_31 bcb\boost_regex-bcb-d-1_31.lib bcb\libboost_regex-bcb-mt-d-1_31 bcb\libboost_regex-bcb-mt-d-1_31.lib bcb\libboost_regex-bcb-d-1_31 bcb\libboost_regex-bcb-d-1_31.lib diff --git a/build/gcc-shared.mak b/build/gcc-shared.mak index 8af25e6b..a92e1c0e 100644 --- a/build/gcc-shared.mak +++ b/build/gcc-shared.mak @@ -29,7 +29,7 @@ C1=-c -O2 -I../../../ -fPIC C2=-c -g -I../../../ -fPIC -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : gcc gcc gcc/boost_regex-gcc-1_31_shared ./gcc/libboost_regex-gcc-1_31.so gcc gcc/boost_regex-gcc-d-1_31_shared ./gcc/libboost_regex-gcc-d-1_31.so diff --git a/build/gcc.mak b/build/gcc.mak index b87ee53b..c4e24aa1 100644 --- a/build/gcc.mak +++ b/build/gcc.mak @@ -30,7 +30,7 @@ C2=-c -g -I../../../ -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : gcc gcc gcc/boost_regex-gcc-1_31 ./gcc/libboost_regex-gcc-1_31.a gcc gcc/boost_regex-gcc-d-1_31 ./gcc/libboost_regex-gcc-d-1_31.a diff --git a/build/generic.mak b/build/generic.mak index f2a71305..cb7617ee 100644 --- a/build/generic.mak +++ b/build/generic.mak @@ -26,7 +26,7 @@ C1=-c -O2 -I../../../ -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : $(DIRNAME) $(DIRNAME) $(DIRNAME)/boost_regex ./$(DIRNAME)/libboost_regex.so diff --git a/build/sunpro.mak b/build/sunpro.mak index d619949d..84114f3e 100644 --- a/build/sunpro.mak +++ b/build/sunpro.mak @@ -34,7 +34,7 @@ SUNWS_CACHE_NAME=SunWS_cache -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : sunpro sunpro/libboost_regex$(LIBSUFFIX) sunpro/libboost_regex$(LIBSUFFIX).a sunpro/libboost_regex_mt$(LIBSUFFIX) sunpro/libboost_regex_mt$(LIBSUFFIX).a sunpro/shared_libboost_regex$(LIBSUFFIX) sunpro/libboost_regex$(LIBSUFFIX).so sunpro/shared_libboost_regex_mt$(LIBSUFFIX) sunpro/libboost_regex_mt$(LIBSUFFIX).so diff --git a/build/vc6-stlport.mak b/build/vc6-stlport.mak index 38f8dcb9..e830a7fe 100644 --- a/build/vc6-stlport.mak +++ b/build/vc6-stlport.mak @@ -40,7 +40,7 @@ NULL=nul !ENDIF -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : main_dir libboost_regex-vc6-mt-sp-1_31_dir ./vc6-stlport/libboost_regex-vc6-mt-sp-1_31.lib boost_regex-vc6-mt-p-1_31_dir ./vc6-stlport/boost_regex-vc6-mt-p-1_31.lib libboost_regex-vc6-mt-p-1_31_dir ./vc6-stlport/libboost_regex-vc6-mt-p-1_31.lib boost_regex-vc6-mt-gdp-1_31_dir ./vc6-stlport/boost_regex-vc6-mt-gdp-1_31.lib libboost_regex-vc6-mt-sgdp-1_31_dir ./vc6-stlport/libboost_regex-vc6-mt-sgdp-1_31.lib libboost_regex-vc6-mt-gdp-1_31_dir ./vc6-stlport/libboost_regex-vc6-mt-gdp-1_31.lib diff --git a/build/vc6.mak b/build/vc6.mak index f4ffcc49..819f29fe 100644 --- a/build/vc6.mak +++ b/build/vc6.mak @@ -36,7 +36,7 @@ NULL=nul !ENDIF -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : main_dir libboost_regex-vc6-s-1_31_dir ./vc6/libboost_regex-vc6-s-1_31.lib libboost_regex-vc6-mt-s-1_31_dir ./vc6/libboost_regex-vc6-mt-s-1_31.lib libboost_regex-vc6-sgd-1_31_dir ./vc6/libboost_regex-vc6-sgd-1_31.lib libboost_regex-vc6-mt-sgd-1_31_dir ./vc6/libboost_regex-vc6-mt-sgd-1_31.lib boost_regex-vc6-mt-gd-1_31_dir ./vc6/boost_regex-vc6-mt-gd-1_31.lib boost_regex-vc6-mt-1_31_dir ./vc6/boost_regex-vc6-mt-1_31.lib libboost_regex-vc6-mt-1_31_dir ./vc6/libboost_regex-vc6-mt-1_31.lib libboost_regex-vc6-mt-gd-1_31_dir ./vc6/libboost_regex-vc6-mt-gd-1_31.lib diff --git a/build/vc7-stlport.mak b/build/vc7-stlport.mak index 1c022bcd..f537d555 100644 --- a/build/vc7-stlport.mak +++ b/build/vc7-stlport.mak @@ -40,7 +40,7 @@ NULL=nul !ENDIF -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : main_dir libboost_regex-vc7-mt-sp-1_31_dir ./vc7-stlport/libboost_regex-vc7-mt-sp-1_31.lib boost_regex-vc7-mt-p-1_31_dir ./vc7-stlport/boost_regex-vc7-mt-p-1_31.lib libboost_regex-vc7-mt-p-1_31_dir ./vc7-stlport/libboost_regex-vc7-mt-p-1_31.lib boost_regex-vc7-mt-gdp-1_31_dir ./vc7-stlport/boost_regex-vc7-mt-gdp-1_31.lib libboost_regex-vc7-mt-sgdp-1_31_dir ./vc7-stlport/libboost_regex-vc7-mt-sgdp-1_31.lib libboost_regex-vc7-mt-gdp-1_31_dir ./vc7-stlport/libboost_regex-vc7-mt-gdp-1_31.lib diff --git a/build/vc7.mak b/build/vc7.mak index 489d99e9..7fff5206 100644 --- a/build/vc7.mak +++ b/build/vc7.mak @@ -36,7 +36,7 @@ NULL=nul !ENDIF -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : main_dir libboost_regex-vc7-s-1_31_dir ./vc7/libboost_regex-vc7-s-1_31.lib libboost_regex-vc7-mt-s-1_31_dir ./vc7/libboost_regex-vc7-mt-s-1_31.lib libboost_regex-vc7-sgd-1_31_dir ./vc7/libboost_regex-vc7-sgd-1_31.lib libboost_regex-vc7-mt-sgd-1_31_dir ./vc7/libboost_regex-vc7-mt-sgd-1_31.lib boost_regex-vc7-mt-gd-1_31_dir ./vc7/boost_regex-vc7-mt-gd-1_31.lib boost_regex-vc7-mt-1_31_dir ./vc7/boost_regex-vc7-mt-1_31.lib libboost_regex-vc7-mt-1_31_dir ./vc7/libboost_regex-vc7-mt-1_31.lib libboost_regex-vc7-mt-gd-1_31_dir ./vc7/libboost_regex-vc7-mt-gd-1_31.lib diff --git a/build/vc71-stlport.mak b/build/vc71-stlport.mak index d0e3e743..e2adc02c 100644 --- a/build/vc71-stlport.mak +++ b/build/vc71-stlport.mak @@ -40,7 +40,7 @@ NULL=nul !ENDIF -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : main_dir libboost_regex-vc71-mt-sp-1_31_dir ./vc71-stlport/libboost_regex-vc71-mt-sp-1_31.lib boost_regex-vc71-mt-p-1_31_dir ./vc71-stlport/boost_regex-vc71-mt-p-1_31.lib libboost_regex-vc71-mt-p-1_31_dir ./vc71-stlport/libboost_regex-vc71-mt-p-1_31.lib boost_regex-vc71-mt-gdp-1_31_dir ./vc71-stlport/boost_regex-vc71-mt-gdp-1_31.lib libboost_regex-vc71-mt-sgdp-1_31_dir ./vc71-stlport/libboost_regex-vc71-mt-sgdp-1_31.lib libboost_regex-vc71-mt-gdp-1_31_dir ./vc71-stlport/libboost_regex-vc71-mt-gdp-1_31.lib diff --git a/build/vc71.mak b/build/vc71.mak index 1cdb339a..4924564e 100644 --- a/build/vc71.mak +++ b/build/vc71.mak @@ -36,7 +36,7 @@ NULL=nul !ENDIF -ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp +ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp all : main_dir libboost_regex-vc71-s-1_31_dir ./vc71/libboost_regex-vc71-s-1_31.lib libboost_regex-vc71-mt-s-1_31_dir ./vc71/libboost_regex-vc71-mt-s-1_31.lib libboost_regex-vc71-sgd-1_31_dir ./vc71/libboost_regex-vc71-sgd-1_31.lib libboost_regex-vc71-mt-sgd-1_31_dir ./vc71/libboost_regex-vc71-mt-sgd-1_31.lib boost_regex-vc71-mt-gd-1_31_dir ./vc71/boost_regex-vc71-mt-gd-1_31.lib boost_regex-vc71-mt-1_31_dir ./vc71/boost_regex-vc71-mt-1_31.lib libboost_regex-vc71-mt-1_31_dir ./vc71/libboost_regex-vc71-mt-1_31.lib libboost_regex-vc71-mt-gd-1_31_dir ./vc71/libboost_regex-vc71-mt-gd-1_31.lib diff --git a/doc/Attic/character_class_names.html b/doc/Attic/character_class_names.html new file mode 100644 index 00000000..1049e7e4 --- /dev/null +++ b/doc/Attic/character_class_names.html @@ -0,0 +1,144 @@ + + + + Boost.Regex: Character Class Names + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Character Class Names.

+
+

Boost.Regex Index

+
+

+
+

+

The following character class names are support by Boost.Regex:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NamePOSIX-standard nameDescription
alnumYesAny alpha-numeric character.
alphaYesAny alphabetic character.
blankYesAny whitespace character that is not a line separator.
cntrlYesAny control character.
dNoAny decimal digit
digitYesAny decimal digit.
graphYesAny graphical character.
lNoAny lower case character.
lowerYesAny lower case character.
printYesAny printable character.
punctYesAny punctuation character.
sNoAny whitespace character.
spaceYesAny whitespace character.
unicodeNoAny extended character whose code point is above 255 in value.
uNoAny upper case character.
upperYesAny upper case character.
wNoAny word character (alphanumeric characters plus the underscore).
wordNoAny word character (alphanumeric characters plus the underscore).
xdigitYesAny hexadecimal digit character.
+

+

+


+

+

Revised  + + 09 Aug 2004  +

+

© Copyright John Maddock 2004

+

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt)

+ + diff --git a/doc/Attic/collating_names.html b/doc/Attic/collating_names.html new file mode 100644 index 00000000..c93ef6cf --- /dev/null +++ b/doc/Attic/collating_names.html @@ -0,0 +1,358 @@ + + + + Boost.Regex: Collating Element Names + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Collating Element Names

+
+

Boost.Regex Index

+
+

+
+

+

+ The following are treated as valid digraphs when used as a collating name:

+

"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj", + "Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".

+

The following symbolic names are recognised as valid collating element names, + in addition to any single character:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameCharacter
NUL\x00
SOH\x01
STX\x02
ETX\x03
EOT\x04
ENQ\x05
ACK\x06
alert\x07
backspace\x08
tab\t
newline\n
vertical-tab\v
form-feed\f
carriage-return\r
SO\xE
SI\xF
DLE\x10
DC1\x11
DC2\x12
DC3\x13
DC4\x14
NAK\x15
SYN\x16
ETB\x17
CAN\x18
EM\x19
SUB\x1A
ESC\x1B
IS4\x1C
IS3\x1D
IS2\x1E
IS1\x1F
space\x20
exclamation-mark!
quotation-mark"
number-sign#
dollar-sign$
percent-sign%
ampersand&
apostrophe'
left-parenthesis(
right-parenthesis)
asterisk*
plus-sign+
comma,
hyphen-
period.
slash/
zero0
one1
two2
three3
four4
five5
six6
seven7
eight8
nine9
colon:
semicolon;
less-than-sign<
equals-sign=
greater-than-sign>
question-mark?
commercial-at@
left-square-bracket[
backslash\
right-square-bracket]
circumflex~
underscore_
grave-accent`
left-curly-bracket{
vertical-line|
right-curly-bracket}
tilde~
DEL\x7F
+

+

+


+

+

Revised + + 24 Oct 2003 +

+

© Copyright John Maddock 1998- + 2003

+

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt)

+ + diff --git a/doc/Attic/syntax.html b/doc/Attic/syntax.html index d7e048a8..e757104b 100644 --- a/doc/Attic/syntax.html +++ b/doc/Attic/syntax.html @@ -25,742 +25,29 @@

This section covers the regular expression syntax used by this library, this is a programmers guide, the actual syntax presented to your program's users will - depend upon the flags used during expression compilation. -

-

Literals -

-

All characters are literals except: ".", "|", "*", "?", "+", "(", ")", "{", - "}", "[", "]", "^", "$" and "\". These characters are literals when preceded by - a "\". A literal is a character that matches itself, or matches the result of - traits_type::translate(), where traits_type is the traits template parameter to - class basic_regex.

-

Wildcard -

-

The dot character "." matches any single character except : when match_not_dot_null - is passed to the matching algorithms, the dot does not match a null character; - when match_not_dot_newline is passed to the matching algorithms, then - the dot does not match a newline character. -

-

Repeats -

-

A repeat is an expression that is repeated an arbitrary number of times. An - expression followed by "*" can be repeated any number of times including zero. - An expression followed by "+" can be repeated any number of times, but at least - once, if the expression is compiled with the flag regex_constants::bk_plus_qm - then "+" is an ordinary character and "\+" represents a repeat of once or more. - An expression followed by "?" may be repeated zero or one times only, if the - expression is compiled with the flag regex_constants::bk_plus_qm then "?" is an - ordinary character and "\?" represents the repeat zero or once operator. When - it is necessary to specify the minimum and maximum number of repeats - explicitly, the bounds operator "{}" may be used, thus "a{2}" is the letter "a" - repeated exactly twice, "a{2,4}" represents the letter "a" repeated between 2 - and 4 times, and "a{2,}" represents the letter "a" repeated at least twice with - no upper limit. Note that there must be no white-space inside the {}, and there - is no upper limit on the values of the lower and upper bounds. When the - expression is compiled with the flag regex_constants::bk_braces then "{" and - "}" are ordinary characters and "\{" and "\}" are used to delimit bounds - instead. All repeat expressions refer to the shortest possible previous - sub-expression: a single character; a character set, or a sub-expression - grouped with "()" for example. -

-

Examples: -

-

"ba*" will match all of "b", "ba", "baaa" etc. -

-

"ba+" will match "ba" or "baaaa" for example but not "b". -

-

"ba?" will match "b" or "ba". -

-

"ba{2,4}" will match "baa", "baaa" and "baaaa". -

-

Non-greedy repeats -

-

Whenever the "extended" regular expression syntax is in use (the default) then - non-greedy repeats are possible by appending a '?' after the repeat; a - non-greedy repeat is one which will match the shortest possible string. -

-

For example to match html tag pairs one could use something like: -

-

"<\s*tagname[^>]*>(.*?)<\s*/tagname\s*>" -

-

In this case $1 will contain the text between the tag pairs, and will be the - shortest possible matching string.  -

-

Parenthesis -

-

Parentheses serve two purposes, to group items together into a sub-expression, - and to mark what generated the match. For example the expression "(ab)*" would - match all of the string "ababab". The matching algorithms - regex_match and regex_search each take - an instance of match_results that reports what - caused the match, on exit from these functions the match_results - contains information both on what the whole expression matched and on what each - sub-expression matched. In the example above match_results[1] would contain a - pair of iterators denoting the final "ab" of the matching string. It is - permissible for sub-expressions to match null strings. If a sub-expression - takes no part in a match - for example if it is part of an alternative that is - not taken - then both of the iterators that are returned for that - sub-expression point to the end of the input string, and the matched parameter - for that sub-expression is false. Sub-expressions are indexed from left - to right starting from 1, sub-expression 0 is the whole expression. -

-

Non-Marking Parenthesis -

-

Sometimes you need to group sub-expressions with parenthesis, but don't want - the parenthesis to spit out another marked sub-expression, in this case a - non-marking parenthesis (?:expression) can be used. For example the following - expression creates no sub-expressions: -

-

"(?:abc)*"

-

Forward Lookahead Asserts  -

-

There are two forms of these; one for positive forward lookahead asserts, and - one for negative lookahead asserts:

-

"(?=abc)" matches zero characters only if they are followed by the expression - "abc".

-

"(?!abc)" matches zero characters only if they are not followed by the - expression "abc".

-

Independent sub-expressions

-

"(?>expression)" matches "expression" as an independent atom (the algorithm - will not backtrack into it if a failure occurs later in the expression).

-

Alternatives -

-

Alternatives occur when the expression can match either one sub-expression or - another, each alternative is separated by a "|", or a "\|" if the flag - regex_constants::bk_vbar is set, or by a newline character if the flag - regex_constants::newline_alt is set. Each alternative is the largest possible - previous sub-expression; this is the opposite behavior from repetition - operators. -

-

Examples: -

-

"a(b|c)" could match "ab" or "ac". -

-

"abc|def" could match "abc" or "def". -

-

Sets -

-

A set is a set of characters that can match any single character that is a - member of the set. Sets are delimited by "[" and "]" and can contain literals, - character ranges, character classes, collating elements and equivalence - classes. Set declarations that start with "^" contain the complement of the - elements that follow. -

-

Examples: -

-

Character literals: -

-

"[abc]" will match either of "a", "b", or "c". -

-

"[^abc] will match any character other than "a", "b", or "c". -

-

Character ranges: -

-

"[a-z]" will match any character in the range "a" to "z". -

-

"[^A-Z]" will match any character other than those in the range "A" to "Z". -

-

Note that character ranges are highly locale dependent if the flag - regex_constants::collate is set: they match any character that collates between - the endpoints of the range, ranges will only behave according to ASCII rules - when the default "C" locale is in effect. For example if the library is - compiled with the Win32 localization model, then [a-z] will match the ASCII - characters a-z, and also 'A', 'B' etc, but not 'Z' which collates just after - 'z'. This locale specific behavior is disabled by default (in perl mode), and - forces ranges to collate according to ASCII character code. -

-

Character classes are denoted using the syntax "[:classname:]" within a set - declaration, for example "[[:space:]]" is the set of all whitespace characters. - Character classes are only available if the flag regex_constants::char_classes - is set. The available character classes are: -
-   -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 alnumAny alpha numeric character. 
 alphaAny alphabetical character a-z and A-Z. Other - characters may also be included depending upon the locale. 
 blankAny blank character, either a space or a tab. 
 cntrlAny control character. 
 digitAny digit 0-9. 
 graphAny graphical character. 
 lowerAny lower case character a-z. Other characters may - also be included depending upon the locale. 
 printAny printable character. 
 punctAny punctuation character. 
 spaceAny whitespace character. 
 upperAny upper case character A-Z. Other characters may - also be included depending upon the locale. 
 xdigitAny hexadecimal digit character, 0-9, a-f and A-F. 
 wordAny word character - all alphanumeric characters plus - the underscore. 
 UnicodeAny character whose code is greater than 255, this - applies to the wide character traits classes only. 
-

-

There are some shortcuts that can be used in place of the character classes, - provided the flag regex_constants::escape_in_lists is set then you can use: -

-

\w in place of [:word:] -

-

\s in place of [:space:] -

-

\d in place of [:digit:] -

-

\l in place of [:lower:] -

-

\u in place of [:upper:]  -

-

Collating elements take the general form [.tagname.] inside a set declaration, - where tagname is either a single character, or a name of a collating - element, for example [[.a.]] is equivalent to [a], and [[.comma.]] is - equivalent to [,]. The library supports all the standard POSIX collating - element names, and in addition the following digraphs: "ae", "ch", "ll", "ss", - "nj", "dz", "lj", each in lower, upper and title case variations. - Multi-character collating elements can result in the set matching more than one - character, for example [[.ae.]] would match two characters, but note that - [^[.ae.]] would only match one character.  -

-

- Equivalence classes take the generalform[=tagname=] inside a set declaration, - where tagname is either a single character, or a name of a collating - element, and matches any character that is a member of the same primary - equivalence class as the collating element [.tagname.]. An equivalence class is - a set of characters that collate the same, a primary equivalence class is a set - of characters whose primary sort key are all the same (for example strings are - typically collated by character, then by accent, and then by case; the primary - sort key then relates to the character, the secondary to the accentation, and - the tertiary to the case). If there is no equivalence class corresponding to tagname - ,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no - locale independent method of obtaining the primary sort key for a character, - except under Win32. For other operating systems the library will "guess" the - primary sort key from the full sort key (obtained from strxfrm), so - equivalence classes are probably best considered broken under any operating - system other than Win32.  -

-

To include a literal "-" in a set declaration then: make it the first character - after the opening "[" or "[^", the endpoint of a range, a collating element, or - if the flag regex_constants::escape_in_lists is set then precede with an escape - character as in "[\-]". To include a literal "[" or "]" or "^" in a set then - make them the endpoint of a range, a collating element, or precede with an - escape character if the flag regex_constants::escape_in_lists is set. -

-

Line anchors -

-

An anchor is something that matches the null string at the start or end of a - line: "^" matches the null string at the start of a line, "$" matches the null - string at the end of a line. -

-

Back references -

-

A back reference is a reference to a previous sub-expression that has already - been matched, the reference is to what the sub-expression matched, not to the - expression itself. A back reference consists of the escape character "\" - followed by a digit "1" to "9", "\1" refers to the first sub-expression, "\2" - to the second etc. For example the expression "(.*)\1" matches any string that - is repeated about its mid-point for example "abcabc" or "xyzxyz". A back - reference to a sub-expression that did not participate in any match, matches - the null string: NB this is different to some other regular expression - matchers. Back references are only available if the expression is compiled with - the flag regex_constants::bk_refs set. -

-

Characters by code -

-

This is an extension to the algorithm that is not available in other libraries, - it consists of the escape character followed by the digit "0" followed by the - octal character code. For example "\023" represents the character whose octal - code is 23. Where ambiguity could occur use parentheses to break the expression - up: "\0103" represents the character whose code is 103, "(\010)3 represents the - character 10 followed by "3". To match characters by their hexadecimal code, - use \x followed by a string of hexadecimal digits, optionally enclosed inside - {}, for example \xf0 or \x{aff}, notice the latter example is a Unicode - character.

-

Word operators -

-

The following operators are provided for compatibility with the GNU regular - expression library. -

-

"\w" matches any single character that is a member of the "word" character - class, this is identical to the expression "[[:word:]]". -

-

"\W" matches any single character that is not a member of the "word" character - class, this is identical to the expression "[^[:word:]]". -

-

"\<" matches the null string at the start of a word. -

-

"\>" matches the null string at the end of the word. -

-

"\b" matches the null string at either the start or the end of a word. -

-

"\B" matches a null string within a word. -

-

The start of the sequence passed to the matching algorithms is considered to be - a potential start of a word unless the flag match_not_bow is set. The end of - the sequence passed to the matching algorithms is considered to be a potential - end of a word unless the flag match_not_eow is set. -

-

Buffer operators -

-

The following operators are provided for compatibility with the GNU regular - expression library, and Perl regular expressions: -

-

"\`" matches the start of a buffer. -

-

"\A" matches the start of the buffer. -

-

"\'" matches the end of a buffer. -

-

"\z" matches the end of a buffer. -

-

"\Z" matches the end of a buffer, or possibly one or more new line characters - followed by the end of the buffer. -

-

A buffer is considered to consist of the whole sequence passed to the matching - algorithms, unless the flags match_not_bob or match_not_eob are set. -

-

Escape operator -

-

The escape character "\" has several meanings. -

-

Inside a set declaration the escape character is a normal character unless the - flag regex_constants::escape_in_lists is set in which case whatever follows the - escape is a literal character regardless of its normal meaning. -

-

The escape operator may introduce an operator for example: back references, or - a word operator. -

-

The escape operator may make the following character normal, for example "\*" - represents a literal "*" rather than the repeat operator. -

-

Single character escape sequences -

-

The following escape sequences are aliases for single characters: -
-   -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Escape sequence - Character code - Meaning -  
 \a - 0x07 - Bell character. -  
 \f - 0x0C - Form feed. -  
 \n - 0x0A - Newline character. -  
 \r - 0x0D - Carriage return. -  
 \t - 0x09 - Tab character. -  
 \v - 0x0B - Vertical tab. -  
 \e - 0x1B - ASCII Escape character. -  
 \0dd - 0dd - An octal character code, where dd is one or - more octal digits. -  
 \xXX - 0xXX - A hexadecimal character code, where XX is one or more - hexadecimal digits. -  
 \x{XX} - 0xXX - A hexadecimal character code, where XX is one or more - hexadecimal digits, optionally a Unicode character. -  
 \cZ - z-@ - An ASCII escape sequence control-Z, where Z is any - ASCII character greater than or equal to the character code for '@'. -  
-

-

Miscellaneous escape sequences: -

-

The following are provided mostly for perl compatibility, but note that there - are some differences in the meanings of \l \L \u and \U: -
-   -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 \w - Equivalent to [[:word:]]. -  
 \W - Equivalent to [^[:word:]]. -  
 \s - Equivalent to [[:space:]]. -  
 \S - Equivalent to [^[:space:]]. -  
 \d - Equivalent to [[:digit:]]. -  
 \D - Equivalent to [^[:digit:]]. -  
 \l - Equivalent to [[:lower:]]. -  
 \L - Equivalent to [^[:lower:]]. -  
 \u - Equivalent to [[:upper:]]. -  
 \U - Equivalent to [^[:upper:]]. -  
 \C - Any single character, equivalent to '.'. -  
 \X - Match any Unicode combining character sequence, for - example "a\x 0301" (a letter a with an acute). -  
 \Q - The begin quote operator, everything that follows is - treated as a literal character until a \E end quote operator is found. -  
 \E - The end quote operator, terminates a sequence begun - with \Q. -  
-

-

What gets matched? -

-

- When the expression is compiled as a Perl-compatible regex then the matching - algorithms will perform a depth first search on the state machine and report - the first match found.

-

- When the expression is compiled as a POSIX-compatible regex then the matching - algorithms will match the first possible matching string, if more than one - string starting at a given location can match then it matches the longest - possible string, unless the flag match_any is set, in which case the first - match encountered is returned. Use of the match_any option can reduce the time - taken to find the match - but is only useful if the user is less concerned - about what matched - for example it would not be suitable for search and - replace operations. In cases where their are multiple possible matches all - starting at the same location, and all of the same length, then the match - chosen is the one with the longest first sub-expression, if that is the same - for two or more matches, then the second sub-expression will be examined and so - on. -

-

- The following table examples illustrate the main differences between Perl and - POSIX regular expression matching rules: -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Expression

-
-

Text

-
-

POSIX leftmost longest match

-
-

ECMAScript depth first search match

-
-

a|ab

-
-

xaby -

-
-

"ab"

-
-

"a"

-
-

.*([[:alnum:]]+).*

-
-

" abc def xyz "

-
-

$0 = " abc def xyz "
- $1 = "abc"

-
-

$0 = " abc def xyz "
- $1 = "z"

-
-

.*(a|xayy)

-
-

zzxayyzz

-
-

"zzxayy"

-
-

"zzxa"

-
-

These differences between Perl matching rules, and POSIX matching rules, mean - that these two regular expression syntaxes differ not only in the features - offered, but also in the form that the state machine takes and/or the - algorithms used to traverse the state machine.

+ depend upon the flags used during + expression compilation. +

+

There are three main syntax options available, depending upon how + you construct the regular expression object:

+ +

You can also construct a regular expression that treats every character as a + literal, but that's not really a "syntax"!


-

Revised +

Revised  - 24 Oct 2003 + 10 Sept 2004 

© Copyright John Maddock 1998- - 2003

+ 2004

Use, modification and distribution are subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

diff --git a/doc/Attic/syntax_basic.html b/doc/Attic/syntax_basic.html new file mode 100644 index 00000000..dc49248c --- /dev/null +++ b/doc/Attic/syntax_basic.html @@ -0,0 +1,226 @@ + + + + Boost.Regex: POSIX-Basic Regular Expression Syntax + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

POSIX Basic Regular Expression Syntax

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
POSIX Basic Syntax
+ Variations +
+
+
Grep
Emacs
+
+
Options
References
+
+

Synopsis

+

The POSIX-Basic regular expression syntax is used by the Unix utility sed, + and variations are used by grep and emacs.  You can + construct POSIX basic regular expressions in Boost.Regex by passing the flag basic + to the regex constructor, for example:

+
// e1 is a case sensitive POSIX-Basic expression:
+boost::regex e1(my_expression, boost::regex::basic);
+// e2 a case insensitive POSIX-Basic expression:
+boost::regex e2(my_expression, boost::regex::basic|boost::regex::icase);
+

POSIX Basic Syntax

+

In POSIX-Basic regular expressions, all characters are match themselves except + for the following special characters:

+
.[\*^$
+

Wildcard:

+

The single character '.' when used outside of a character set will match any + single character except:

+

The NULL character when the flag match_no_dot_null is passed to the + matching algorithms.

+

The newline character when the flag match_not_dot_newline is passed to + the matching algorithms.

+

Anchors:

+

A '^' character shall match the start of a line when used as the first + character of an expression, or the first character of a sub-expression.

+

A '$' character shall match the end of a line when used as the last character + of an expression, or the last character of a sub-expression.

+

Marked sub-expressions:

+

A section beginning \( and ending \) acts as a marked sub-expression.  + Whatever matched the sub-expression is split out in a separate field by the + matching algorithms.  Marked sub-expressions can also repeated, or referred-to by a back-reference.

+

Repeats:

+

Any atom (a single character, a marked sub-expression, or a character class) + can be repeated with the * operator.

+

For example a* will match any number of letter a's repeated zero or more times + (an atom repeated zero times matches an empty string), so the expression a*b + will match any of the following:

+
b
+ab
+aaaaaaaab
+

An atom can also be repeated with a bounded repeat:

+

a\{n\}  Matches 'a' repeated exactly n times.

+

a\{n,\}  Matches 'a' repeated n or more times.

+

a\{n, m\}  Matches 'a' repeated between n and m times + inclusive.

+

For example:

+
^a\{2,3\}$
+

Will match either of:

+
aa
+aaa
+

But neither of:

+
a
+aaaa
+

It is an error to use a repeat operator, if the preceding construct can not be + repeated, for example:

+
a\(*\)
+

Will raise an error, as there is nothing for the * operator to be applied to.

+

Back references:

+

An escape character followed by a digit n, where n is in the + range 1-9, matches the same string that was matched by sub-expression n.  + For example the expression:

+
^\(a*\).*\1$
+

Will match the string:

+
aaabbaaa
+

But not the string:

+
aaabba
+

Character sets:

+

A character set is a bracket-expression starting with [ and ending with ], it + defines a set of characters, and matches any single character that is a member + of that set.

+

A bracket expression may contain any combination of the following:

+
+
Single characters:
+

For example [abc], will match any of the characters 'a', 'b', or 'c'.

+
Character ranges:
+

For example [a-c] will match any single character in the range 'a' to + 'c'.  By default, for POSIX-Basic regular expressions, a character x + is within the range y to z, if it collates within that + range; this results in locale specific behavior.  This behavior can + be turned off by unsetting the collate + option flag - in which case whether a character appears within a range is + determined by comparing the code points of the characters only

+
Negation:
+

If the bracket-expression begins with the ^ character, then it matches the + complement of the characters it contains, for example [^a-c] matches any + character that is not in the range a-c.

+
Character classes:
+

An expression of the form [[:name:]] matches the named character class "name", + for example [[:lower:]] matches any lower case character.  See + character class names.

+
Collating Elements:
+

An expression of the form [[.col.] matches the collating element col.  + A collating element is any single character, or any sequence of characters that + collates as a single unit.  Collating elements may also be used as the end + point of a range, for example: [[.ae.]-c] matches the character sequence "ae", + plus any single character in the rangle "ae"-c, assuming that "ae" is treated + as a single collating element in the current locale.

+

As an extension, a collating element may also be specified via its + symbolic name, for example:

+

[[.NUL.]]

+

matches a NUL character.

+
Equivalence classes:
+

+ An expression of the form [[=col=]], matches any character or collating element + whose primary sort key is the same as that for collating element col, + as with collating elements the name col may be a + symbolic name.  A primary sort key is one that ignores case, + accentation, or locale-specific tailorings; so for example [[=a=]] matches any + of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  + Unfortunately implementation of this is reliant on the platform's collation and + localisation support; this feature can not be relied upon to work portably + across all platforms, or even all locales on one platform.

+
+
Combinations:
+

All of the above can be combined in one character set declaration, for example: + [[:digit:]a-c[.NUL.]].

+

Escapes

+

With the exception of the escape sequences \{, \}, \(, and \), which are + documented above, an escape followed by any character matches that + character.  This can be used to make the special characters .[\*^$, + "ordinary".  Note that the escape character loses its special meaning + inside a character set, so [\^] will match either a literal '\' or a '^'.

+

Variations

+

Grep

+

When an expression is compiled with the flag grep set, then the + expression is treated as a newline separated list of POSIX-Basic + expressions, a match is found if any of the expressions in the list match, for + example:

+
boost::regex e("abc\ndef", boost::regex::grep);
+

will match either of the POSIX-Basic expressions "abc" or "def".

+

As its name suggests, this behavior is consistent with the Unix utility grep.

+

emacs

+

In addition to the POSIX-Basic features the following + characters are also special:

+
+

+ repeats the preceding atom one or more times.

+

? repeats the preceding atom zero or one times.

+

*? A non-greedy version of *.

+

+? A non-greedy version of +.

+

?? A non-greedy version of ?.

+
+

And the following escape sequences are also recognised:

+
+

\| specifies an alternative.

+

\(?:  ...  \) is a non-marking grouping construct - allows you to + lexically group something without spitting out an extra sub-expression.

+

\w  matches any word character.

+

\W matches any non-word character.

+

\sx matches any character in the syntax group x, the following emacs + groupings are supported: 's', ' ', '_', 'w', '.', ')', '(', '"', '\'', '>' + and '<'.  Refer to the emacs docs for details.

+

\Sx matches any character not in the syntax grouping x.

+

\c and \C are not supported.

+

\` matches zero characters only at the start of a buffer (or string being + matched).

+

\' matches zero characters only at the end of a buffer (or string being + matched).

+

\b matches zero characters at a word boundary.

+

\B matches zero characters, not at a word boundary.

+

\< matches zero characters only at the start of a word.

+

\> matches zero characters only at the end of a word.

+
+

Options

+

There are a variety of flags that + may be combined with the basic and grep options when + constructing the regular expression, in particular note that the + newline_alt, no_char_classes, no-intervals, bk_plus_qm and bk_plus_vbar options + all alter the syntax, while the collate + and icase options modify how the case and locale sensitivity are to be + applied.

+

References

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions + and Headers, Section 9, Regular Expressions (FWD.1).

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, grep (FWD.1).

+

Emacs Version 21.3.

+

+


+

+

Revised  + + 21 Aug 2004  +

+

© Copyright John Maddock 2004

+ +

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt).

+
+ + + diff --git a/doc/Attic/syntax_extended.html b/doc/Attic/syntax_extended.html new file mode 100644 index 00000000..323b3e13 --- /dev/null +++ b/doc/Attic/syntax_extended.html @@ -0,0 +1,471 @@ + + + + Boost.Regex: POSIX-Extended Regular Expression Syntax + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

POSIX-Extended Regular Expression Syntax

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
POSIX Extended Syntax +
Variations +
+
+
egrep
awk 
+
+
Options
References
+
+

Synopsis

+

The POSIX-Extended regular expression syntax is supported by the POSIX C + regular expression API's, and variations are used by the utilities egrep + and awk. You can construct POSIX extended regular expressions in + Boost.Regex by passing the flag extended to the regex constructor, for + example:

+
// e1 is a case sensitive POSIX-Extended expression:
+boost::regex e1(my_expression, boost::regex::extended);
+// e2 a case insensitive POSIX-Extended expression:
+boost::regex e2(my_expression, boost::regex::extended|boost::regex::icase);
+

POSIX Extended Syntax

+

In POSIX-Extended regular expressions, all characters match themselves except + for the following special characters:

+
.[{()\*+?|^$
+

Wildcard:

+

The single character '.' when used outside of a character set will match any + single character except:

+

The NULL character when the flag match_no_dot_null is passed to the + matching algorithms.

+

The newline character when the flag match_not_dot_newline is passed to + the matching algorithms.

+

Anchors:

+

A '^' character shall match the start of a line when used as the first + character of an expression, or the first character of a sub-expression.

+

A '$' character shall match the end of a line when used as the last character + of an expression, or the last character of a sub-expression.

+

Marked sub-expressions:

+

A section beginning ( and ending ) acts as a marked sub-expression.  + Whatever matched the sub-expression is split out in a separate field by the + matching algorithms.  Marked sub-expressions can also repeated, or referred + to by a back-reference.

+

Repeats:

+

Any atom (a single character, a marked sub-expression, or a character class) + can be repeated with the *, +, ?, and {} operators.

+

The * operator will match the preceding atom zero or more times, for example + the expression a*b will match any of the following:

+
b
+ab
+aaaaaaaab
+

The + operator will match the preceding atom one or more times, for example + the expression a+b will match any of the following:

+
ab
+aaaaaaaab
+

But will not match:

+
b
+

The ? operator will match the preceding atom zero or one times, for + example the expression ca?b will match any of the following:

+
cb
+cab
+

But will not match:

+
caab
+

An atom can also be repeated with a bounded repeat:

+

a{n}  Matches 'a' repeated exactly n times.

+

a{n,}  Matches 'a' repeated n or more times.

+

a{n, m}  Matches 'a' repeated between n and m times + inclusive.

+

For example:

+
^a{2,3}$
+

Will match either of:

+
aa
+aaa
+

But neither of:

+
a
+aaaa
+

It is an error to use a repeat operator, if the preceding construct can not be + repeated, for example:

+
a(*)
+

Will raise an error, as there is nothing for the * operator to be applied to.

+

Back references:

+

An escape character followed by a digit n, where n is in the + range 1-9, matches the same string that was matched by sub-expression n.  + For example the expression:

+
^(a*).*\1$
+

Will match the string:

+
aaabbaaa
+

But not the string:

+
aaabba
+

Caution: the POSIX standard does not support back-references + for "extended" regular expressions, this is a compatible extension to that + standard.

+

Alternation

+

The | operator will match either of its arguments, so for example: abc|def will + match either "abc" or "def".  +

+

Parenthesis can be used to group alternations, for example: ab(d|ef) will match + either of "abd" or "abef".

+

Character sets:

+

A character set is a bracket-expression starting with [ and ending with ], it + defines a set of characters, and matches any single character that is a member + of that set.

+

A bracket expression may contain any combination of the following:

+
+
Single characters:
+

For example [abc], will match any of the characters 'a', 'b', or 'c'.

+
Character ranges:
+

For example [a-c] will match any single character in the range 'a' to + 'c'.  By default, for POSIX-Extended regular expressions, a character x + is within the range y to z, if it collates within that + range; this results in locale specific behavior.  This behavior can + be turned off by unsetting the collate + option flag - in which case whether a character appears within a range is + determined by comparing the code points of the characters only

+
Negation:
+

If the bracket-expression begins with the ^ character, then it matches the + complement of the characters it contains, for example [^a-c] matches any + character that is not in the range a-c.

+
Character classes:
+

An expression of the form [[:name:]] matches the named character class "name", + for example [[:lower:]] matches any lower case character.  See + character class names.

+
Collating Elements:
+

An expression of the form [[.col.] matches the collating element col.  + A collating element is any single character, or any sequence of characters that + collates as a single unit.  Collating elements may also be used as the end + point of a range, for example: [[.ae.]-c] matches the character sequence "ae", + plus any single character in the range "ae"-c, assuming that "ae" is treated + as a single collating element in the current locale.

+

As an extension, a collating element may also be specified via its + symbolic name, for example:

+

[[.NUL.]]

+

matches a NUL character.

+
Equivalence classes:
+

+ An expression of the form [[=col=]], matches any character or collating element + whose primary sort key is the same as that for collating element col, + as with colating elements the name col may be a + symbolic name.  A primary sort key is one that ignores case, + accentation, or locale-specific tailorings; so for example [[=a=]] matches any + of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  + Unfortunately implementation of this is reliant on the platform's collation and + localisation support; this feature can not be relied upon to work portably + across all platforms, or even all locales on one platform.

+
+
Combinations:
+

All of the above can be combined in one character set declaration, for example: + [[:digit:]a-c[.NUL.]].

+

Operator precedence

+

 The order of precedence for of operators is as shown in the following + table:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Collation-related bracket symbols[==] [::] [..]
Escaped characters + \
Character set (bracket expression) + []
Grouping()
Single-character-ERE duplication + * + ? {m,n}
Concatenation
Anchoring^$
Alternation|
+

+

Escapes

+

The POSIX standard defines no escape sequences for POSIX-Extended regular + expressions, except that:

+ +

However, that's rather restrictive, so the following standard-compatible + extensions are also supported by Boost.Regex:

+
+
Escapes matching a specific character
+

The following escape sequences are all synonyms for single characters:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EscapeCharacter
\a'\a'
\e0x1B
\f\f
\n\n
\r\r
\t\t
\v\v
\b\b (but only inside a character class declaration).
\cXAn ASCII escape sequence - the character whose code point is X % 32
\xddA hexadecimal escape sequence - matches the single character whose code point + is 0xdd.
\x{dddd}A hexadecimal escape sequence - matches the single character whose code point + is 0xdddd.
\0dddAn octal escape sequence - matches the single character whose code point is + 0ddd.
+

+
"Single character" character classes:
+

Any escaped character x, if x is the name of a character + class shall match any character that is a member of that class, and any escaped + character X, if x is the name of a character class, shall + match any character not in that class.

+

The following are supported by default:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Escape sequenceEquivalent to
\d[[:digit:]]
\l[[:lower:]]
\s[[:space:]]
\u[[:upper:]]
\w[[:word:]]
\D[^[:digit:]]
\L[^[:lower:]]
\S[^[:space:]]
\U[^[:upper:]]
\W[^[:word:]]
+

+
Word Boundaries
+

The following escape sequences match the boundaries of words:

+

+ + + + + + + + + + + + + + + + + +
\<Matches the start of a word.
\>Matches the end of a word.
\bMatches a word boundary (the start or end of a word).
\BMatches only when not at a word boundary.
+

+
Buffer boundaries
+

The following match only at buffer boundaries: a "buffer" in this context is + the whole of the input text that is being matched against (note that ^ and + $ may match embedded newlines within the text).

+

+ + + + + + + + + + + + + + + + + + + + + +
\`Matches at the start of a buffer only.
\'Matches at the end of a buffer only.
\AMatches at the start of a buffer only (the same as \`).
\zMatches at the end of a buffer only (the same as \').
\ZMatches an optional sequence of newlines at the end of a buffer: equivalent to + the regular expression \n*\z
+

+
Continuation Escape
+

The sequence \G matches only at the end of the last match found, or at the + start of the text being matched if no previous match was found.  This + escape useful if you're iterating over the matches contained within a text, and + you want each subsequence match to start where the last one ended.

+
Quoting escape
+

The escape sequence \Q begins a "quoted sequence": all the subsequent + characters are treated as literals, until either the end of the regular + expression or \E is found.  For example the expression: \Q\*+\Ea+ would + match either of:

+
\*+a
\*+aaa
+
Unicode escapes
+

+ + + + + + + + + +
\CMatches a single code point: in Boost regex this has exactly the same effect + as a "." operator.
\XMatches a combining character sequence: that is any non-combining character + followed by a sequence of zero or more combining characters.
+

+
Any other escape
+

Any other escape sequence matches the character that is escaped, for example \@ + matches a literal '@'.

+
+

Variations

+

Egrep

+

When an expression is compiled with the flag egrep set, then the + expression is treated as a newline separated list of POSIX-Extended + expressions, a match is found if any of the expressions in the list match, for + example:

+
boost::regex e("abc\ndef", boost::regex::egrep);
+

will match either of the POSIX-Basic expressions "abc" or "def".

+

As its name suggests, this behavior is consistent with the Unix utility egrep, + and with grep when used with the -E option.

+

awk

+

In addition to the POSIX-Extended features the + escape character is special inside a character class declaration. 

+

In addition, some escape sequences that are not defined as part of + POSIX-Extended specification are required to be supported - however Boost.Regex + supports these by default anyway.

+

Options

+

There are a variety of flags that + may be combined with the extended and egrep options when + constructing the regular expression, in particular note that the + newline_alt option alters the syntax, while the + collate, nosubs and icase options modify how the case and locale + sensitivity are to be applied.

+

References

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions + and Headers, Section 9, Regular Expressions.

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, egrep.

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, awk.

+
+

+

Revised  + + 21 Aug 2004  +

+

© Copyright John Maddock 2004

+ +

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt).

+
+ + + diff --git a/doc/Attic/syntax_option_type.html b/doc/Attic/syntax_option_type.html index 6da58afc..291c24c0 100644 --- a/doc/Attic/syntax_option_type.html +++ b/doc/Attic/syntax_option_type.html @@ -175,7 +175,7 @@ static const syntax_option_type collate; No Normally Boost.Regex behaves as if the Perl m-modifier is on: so the assertions ^ and $ match after and before embedded newlines respectively, - setting this flags is eqivalent to prefixing the expression with (?-m). + setting this flags is equivalent to prefixing the expression with (?-m). no_mod_s @@ -251,7 +251,7 @@ static const syntax_option_type collate; character classes permitted.

In addition some perl-style escape sequences are supported (actually the awk syntax requires \a \b \t \v \f \n and \r to be recognised, but other - escape sequences invoke undefined behaviour according to the POSIX standard).

+ escape sequences invoke undefined behavior according to the POSIX standard).

@@ -324,9 +324,9 @@ static const syntax_option_type collate; Yes

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX basic regular expressions in IEEE Std 1003.1-2001, - Portable Operating System Interface (POSIX ), Base Definitions and Headers, - Section 9, Regular Expressions (FWD.1). + same as that used by POSIX basic regular + expressions in IEEE Std 1003.1-2001, Portable Operating System Interface + (POSIX ), Base Definitions and Headers, Section 9, Regular Expressions (FWD.1).

@@ -340,13 +340,20 @@ static const syntax_option_type collate; Yes

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX utility grep in IEEE Std 1003.1-2001, Portable - Operating System Interface (POSIX ), Shells and Utilities, Section 4, - Utilities, grep (FWD.1).

+ same as that used by POSIX utility grep in + IEEE Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, grep (FWD.1).

That is to say, the same as POSIX basic syntax, but with the newline character - acting as an alternation character in addition to "|".

+ acting as an alternation character; the expression is treated as a newline + separated list of alternatives.

+ + emacs + No + Specifies that the grammar recognised is the superset of the POSIX-Basic + syntax used by the emacs program. +

The following options may also be set when using POSIX basic regular @@ -390,7 +397,10 @@ static const syntax_option_type collate; collate Yes -

Specifies that character ranges of the form "[a-b]" should be locale sensitive.

+

Specifies that character ranges of the form "[a-b]" should be locale + sensitive.  This bit is on by default for + POSIX-Basic regular expressions, but can be unset to force ranges to be + compared by code point only.

@@ -398,7 +408,7 @@ static const syntax_option_type collate; No Specifies that the \n character has the same effect as the alternation operator |.  Allows newline separated lists to be used as a list of - alternatives. + alternatives.  This bit is already set, if you use the grep option. no_char_classes @@ -482,3 +492,4 @@ static const syntax_option_type collate; or copy at http://www.boost.org/LICENSE_1_0.txt)

+ diff --git a/doc/Attic/syntax_perl.html b/doc/Attic/syntax_perl.html new file mode 100644 index 00000000..2af1260b --- /dev/null +++ b/doc/Attic/syntax_perl.html @@ -0,0 +1,502 @@ + + + + Boost.Regex: Perl Regular Expression Syntax + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

+ Perl Regular Expression Syntax

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Perl Syntax
+ Variations +
+
Options
Modifiers
References
+
+

Synopsis

+

The Perl regular expression syntax is based on that used by the programming + language Perl .  Perl regular expressions are the default + behavior in Boost.Regex or you can pass the flag perl to the + regex constructor, for example:

+
// e1 is a case sensitive Perl regular expression: 
+// since Perl is the default option there's no need to explicitly specify the syntax used here:
+boost::regex e1(my_expression);
+// e2 a case insensitive Perl regular expression:
+boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);
+

Perl Regular Expression Syntax

+

In Perl regular expressions, all characters match themselves except for + the following special characters:

+
.[{()\*+?|^$
+

Wildcard:

+

The single character '.' when used outside of a character set will match any + single character except:

+

The NULL character when the flag match_no_dot_null is passed to the + matching algorithms.

+

The newline character when the flag match_not_dot_newline is passed to + the matching algorithms.

+

Anchors:

+

A '^' character shall match the start of a line.

+

A '$' character shall match the end of a line.

+

Marked sub-expressions:

+

A section beginning ( and ending ) acts as a marked sub-expression.  + Whatever matched the sub-expression is split out in a separate field by the + matching algorithms.  Marked sub-expressions can also repeated, or referred + to by a back-reference.

+

Non-marking grouping:

+

A marked sub-expression is useful to lexically group part of a regular + expression, but has the side-effect of spitting out an extra field in the + result.  As an alternative you can lexically group part of a regular + expression, without generating a marked sub-expression by using (?: and ) , for + example (?:ab)+ will repeat "ab" without splitting out any separate + sub-expressions.

+

Repeats:

+

Any atom (a single character, a marked sub-expression, or a character class) + can be repeated with the *, +, ?, and {} operators.

+

The * operator will match the preceding atom zero or more times, for example + the expression a*b will match any of the following:

+
b
+ab
+aaaaaaaab
+

The + operator will match the preceding atom one or more times, for example + the expression a+b will match any of the following:

+
ab
+aaaaaaaab
+

But will not match:

+
b
+

The ? operator will match the preceding atom zero or one times, for + example the expression ca?b will match any of the following:

+
cb
+cab
+

But will not match:

+
caab
+

An atom can also be repeated with a bounded repeat:

+

a{n}  Matches 'a' repeated exactly n times.

+

a{n,}  Matches 'a' repeated n or more times.

+

a{n, m}  Matches 'a' repeated between n and m times + inclusive.

+

For example:

+
^a{2,3}$
+

Will match either of:

+
aa
+aaa
+

But neither of:

+
a
+aaaa
+

It is an error to use a repeat operator, if the preceding construct can not be + repeated, for example:

+
a(*)
+

Will raise an error, as there is nothing for the * operator to be applied to.

+

Non greedy repeats

+

The normal repeat operators are "greedy", that is to say they will consume as + much input as possible.  There are non-greedy versions available that will + consume as little input as possible while still producing a match.

+

*? Matches the previous atom zero or more times, while consuming as little + input as possible.

+

+? Matches the previous atom one or more times, while consuming as little input + as possible.

+

?? Matches the previous atom zero or one times, while consuming as little input + as possible.

+

{n,}? Matches the previous atom n or more times, while consuming + as little input as possible.

+

{n,m}? Matches the previous atom between n and m times, + while consuming as little input as possible.

+

Back references:

+

An escape character followed by a digit n, where n is in the + range 1-9, matches the same string that was matched by sub-expression n.  + For example the expression:

+
^(a*).*\1$
+

Will match the string:

+
aaabbaaa
+

But not the string:

+
aaabba
+

Alternation

+

The | operator will match either of its arguments, so for example: abc|def will + match either "abc" or "def".  +

+

Parenthesis can be used to group alternations, for example: ab(d|ef) will match + either of "abd" or "abef".

+

Character sets:

+

A character set is a bracket-expression starting with [ and ending with ], it + defines a set of characters, and matches any single character that is a member + of that set.

+

A bracket expression may contain any combination of the following:

+
+
Single characters:
+

For example [abc], will match any of the characters 'a', 'b', or 'c'.

+
Character ranges:
+

For example [a-c] will match any single character in the range 'a' to + 'c'.  By default, for POSIX-Perl regular expressions, a character x + is within the range y to z, if it collates within that + range; this results in locale specific behavior.  This behavior can + be turned off by unsetting the collate + option flag - in which case whether a character appears within a range is + determined by comparing the code points of the characters only

+
Negation:
+

If the bracket-expression begins with the ^ character, then it matches the + complement of the characters it contains, for example [^a-c] matches any + character that is not in the range a-c.

+
Character classes:
+

An expression of the form [[:name:]] matches the named character class "name", + for example [[:lower:]] matches any lower case character.  See + character class names.

+
Collating Elements:
+

An expression of the form [[.col.] matches the collating element col.  + A collating element is any single character, or any sequence of characters that + collates as a single unit.  Collating elements may also be used as the end + point of a range, for example: [[.ae.]-c] matches the character sequence "ae", + plus any single character in the range "ae"-c, assuming that "ae" is treated + as a single collating element in the current locale.

+

As an extension, a collating element may also be specified via it's + symbolic name, for example:

+

[[.NUL.]]

+

matches a NUL character.

+
Equivalence classes:
+

+ An expression of theform[[=col=]], matches any character or collating element + whose primary sort key is the same as that for collating element col, + as with colating elements the name col may be a + symbolic name.  A primary sort key is one that ignores case, + accentation, or locale-specific tailorings; so for example [[=a=]] matches any + of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  + Unfortunately implementation of this is reliant on the platform's collation and + localisation support; this feature can not be relied upon to work portably + across all platforms, or even all locales on one platform.

+
Escapes:
+

All the escape sequences that match a single character, or a single character + class are permitted within a character class definition, except the + negated character classes (\D \W etc).

+
+
Combinations:
+

All of the above can be combined in one character set declaration, for example: + [[:digit:]a-c[.NUL.]].

+

Operator precedence

+

 The order of precedence for of operators is as shown in the following + table:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Collation-related bracket symbols[==] [::] [..]
Escaped characters + \
Character set (bracket expression) + []
Grouping()
Single-character-ERE duplication + * + ? {m,n}
Concatenation
Anchoring^$
Alternation|
+

+

Escapes

+

Any special character preceded by an escape shall match itself. +

+

The following escape sequences are also supported:

+
+
Escapes matching a specific character
+

The following escape sequences are all synonyms for single characters:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EscapeCharacter
\a'\a'
\e0x1B
\f\f
\n\n
\r\r
\t\t
\v\v
\b\b (but only inside a character class declaration).
\cXAn ASCII escape sequence - the character whose code point is X % 32
\xddA hexadecimal escape sequence - matches the single character whose code point + is 0xdd.
\x{dddd}A hexadecimal escape sequence - matches the single character whose code point + is 0xdddd.
\0dddAn octal escape sequence - matches the single character whose code point is + 0ddd.
+

+
"Single character" character classes:
+

Any escaped character x, if x is the name of a character + class shall match any character that is a member of that class, and any escaped + character X, if x is the name of a character class, shall + match any character not in that class.

+

The following are supported by default:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Escape sequenceEquivalent to
\d[[:digit:]]
\l[[:lower:]]
\s[[:space:]]
\u[[:upper:]]
\w[[:word:]]
\D[^[:digit:]]
\L[^[:lower:]]
\S[^[:space:]]
\U[^[:upper:]]
\W[^[:word:]]
+

+
Word Boundaries
+

The following escape sequences match the boundaries of words:

+

+ + + + + + + + + + + + + + + + + +
\<Matches the start of a word.
\>Matches the end of a word.
\bMatches a word boundary (the start or end of a word).
\BMatches only when not at a word boundary.
+

+
Buffer boundaries
+

The following match only at buffer boundaries: a "buffer" in this context is + the whole of the input text that is being matched against (note that ^ and + $ may match embedded newlines within the text).

+

+ + + + + + + + + + + + + + + + + + + + + +
\`Matches at the start of a buffer only.
\'Matches at the end of a buffer only.
\AMatches at the start of a buffer only (the same as \`).
\zMatches at the end of a buffer only (the same as \').
\ZMatches an optional sequence of newlines at the end of a buffer: equivalent to + the regular expression \n*\z
+

+
Continuation Escape
+

The sequence \G matches only at the end of the last match found, or at the + start of the text being matched if no previous match was found.  This + escape useful if you're iterating over the matches contained within a text, and + you want each subsequence match to start where the last one ended.

+
Quoting escape
+

The escape sequence \Q begins a "quoted sequence": all the subsequent + characters are treated as literals, until either the end of the regular + expression or \E is found.  For example the expression: \Q\*+\Ea+ would + match either of:

+
\*+a
\*+aaa
+
Unicode escapes
+

+ + + + + + + + + +
\CMatches a single code point: in Boost regex this has exactly the same effect + as a "." operator.
\XMatches a combining character sequence: that is any non-combining character + followed by a sequence of zero or more combining characters.
+

+
Any other escape
+

Any other escape sequence matches the character that is escaped, for example \@ + matches a literal '@'.

+
+

Perl Extended Patterns

+

Perl-specific extensions to the regular expression syntax all start + with (?.

+
+
Comments
+

(?# ... ) is treated as a comment, it's contents are ignored.

+
Modifiers
+

(?imsx-imsx ... ) alters which of the perl modifiers are in effect + within the pattern, changes take effect from the point that the block is first + seen and extend to any enclosing ).  Letters before a '-' turn that perl + modifier on, letters afterward, turn it off.

+

(?imsx-imsx:pattern) applies the specified modifiers to pattern + only.

+
Non-marking grouping
+

(?:pattern) lexically groups pattern, without generating an + additional sub-expression.

+
Lookahead
+

(?=pattern) consumes zero characters, only if pattern matches.

+

(?!pattern) consumes zero characters, only if pattern does + not match.

+
Lookbehind
+

(?<=pattern) consumes zero characters, only if pattern could + be matched against the characters preceding the current position (pattern + must be of fixed length).

+

(?<!pattern) consumes zero characters, only if pattern could + not be matched against the characters preceding the current position (pattern + must be of fixed length).

+
Independent sub-expressions
+

(?>pattern) pattern is matched independently of the + surrounding patterns, the expression will never backtrack into pattern.

+
Conditional Expressions
+

(?(condition)yes-pattern|no-pattern) attempts to match yes-pattern + if the condition is true, otherwise attempts to match no-pattern.

+

(?(condition)yes-pattern) attempts to match yes-pattern if + the condition is true, otherwise fails.

+

Condition may be either a forward lookahead assert, or the + index of a marked sub-expression (the condition becomes true if the + sub-expression has been matched).

+
+

Variations

+

The options normal, ECMAScript, JavaScript + and JScript are all synonyms for Perl.

+

Options

+

There are a variety of flags that + may be combined with the Perl option when constructing the regular + expression, in particular note that the newline_alt + option alters the syntax, while the collate, + nosubs and icase options modify how the case and locale sensitivity + are to be applied.

+

Modifiers

+

The perl smix modifiers can either be applied using a (?smix-smix) + prefix to the regular expression, or with one of the regex-compile time flags + no_mod_m, mod_x, mod_s, and no_mod_s. +

+

References

+

Perl 5.6.

+
+

+

Revised  + + 21 Aug 2004  +

+

© Copyright John Maddock 2004

+ +

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt).

+
+ + + diff --git a/doc/character_class_names.html b/doc/character_class_names.html new file mode 100644 index 00000000..1049e7e4 --- /dev/null +++ b/doc/character_class_names.html @@ -0,0 +1,144 @@ + + + + Boost.Regex: Character Class Names + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Character Class Names.

+
+

Boost.Regex Index

+
+

+
+

+

The following character class names are support by Boost.Regex:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NamePOSIX-standard nameDescription
alnumYesAny alpha-numeric character.
alphaYesAny alphabetic character.
blankYesAny whitespace character that is not a line separator.
cntrlYesAny control character.
dNoAny decimal digit
digitYesAny decimal digit.
graphYesAny graphical character.
lNoAny lower case character.
lowerYesAny lower case character.
printYesAny printable character.
punctYesAny punctuation character.
sNoAny whitespace character.
spaceYesAny whitespace character.
unicodeNoAny extended character whose code point is above 255 in value.
uNoAny upper case character.
upperYesAny upper case character.
wNoAny word character (alphanumeric characters plus the underscore).
wordNoAny word character (alphanumeric characters plus the underscore).
xdigitYesAny hexadecimal digit character.
+

+

+


+

+

Revised  + + 09 Aug 2004  +

+

© Copyright John Maddock 2004

+

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt)

+ + diff --git a/doc/collating_names.html b/doc/collating_names.html new file mode 100644 index 00000000..c93ef6cf --- /dev/null +++ b/doc/collating_names.html @@ -0,0 +1,358 @@ + + + + Boost.Regex: Collating Element Names + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Collating Element Names

+
+

Boost.Regex Index

+
+

+
+

+

+ The following are treated as valid digraphs when used as a collating name:

+

"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj", + "Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".

+

The following symbolic names are recognised as valid collating element names, + in addition to any single character:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameCharacter
NUL\x00
SOH\x01
STX\x02
ETX\x03
EOT\x04
ENQ\x05
ACK\x06
alert\x07
backspace\x08
tab\t
newline\n
vertical-tab\v
form-feed\f
carriage-return\r
SO\xE
SI\xF
DLE\x10
DC1\x11
DC2\x12
DC3\x13
DC4\x14
NAK\x15
SYN\x16
ETB\x17
CAN\x18
EM\x19
SUB\x1A
ESC\x1B
IS4\x1C
IS3\x1D
IS2\x1E
IS1\x1F
space\x20
exclamation-mark!
quotation-mark"
number-sign#
dollar-sign$
percent-sign%
ampersand&
apostrophe'
left-parenthesis(
right-parenthesis)
asterisk*
plus-sign+
comma,
hyphen-
period.
slash/
zero0
one1
two2
three3
four4
five5
six6
seven7
eight8
nine9
colon:
semicolon;
less-than-sign<
equals-sign=
greater-than-sign>
question-mark?
commercial-at@
left-square-bracket[
backslash\
right-square-bracket]
circumflex~
underscore_
grave-accent`
left-curly-bracket{
vertical-line|
right-curly-bracket}
tilde~
DEL\x7F
+

+

+


+

+

Revised + + 24 Oct 2003 +

+

© Copyright John Maddock 1998- + 2003

+

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt)

+ + diff --git a/doc/index.html b/doc/index.html index 74c79f53..2818bfc8 100644 --- a/doc/index.html +++ b/doc/index.html @@ -36,6 +36,20 @@ Sun Forte Compiler
Other compilers (building with bjam)
+
Backgrounders +
+
+
Regular Expression Syntax +
+
+
Perl Regular Expressions
+
POSIX-Extended Regular Expressions
+
POSIX-Basic Regular Expressions
+
+
+ +
+
Reference
diff --git a/doc/syntax.html b/doc/syntax.html index d7e048a8..e757104b 100644 --- a/doc/syntax.html +++ b/doc/syntax.html @@ -25,742 +25,29 @@

This section covers the regular expression syntax used by this library, this is a programmers guide, the actual syntax presented to your program's users will - depend upon the flags used during expression compilation. -

-

Literals -

-

All characters are literals except: ".", "|", "*", "?", "+", "(", ")", "{", - "}", "[", "]", "^", "$" and "\". These characters are literals when preceded by - a "\". A literal is a character that matches itself, or matches the result of - traits_type::translate(), where traits_type is the traits template parameter to - class basic_regex.

-

Wildcard -

-

The dot character "." matches any single character except : when match_not_dot_null - is passed to the matching algorithms, the dot does not match a null character; - when match_not_dot_newline is passed to the matching algorithms, then - the dot does not match a newline character. -

-

Repeats -

-

A repeat is an expression that is repeated an arbitrary number of times. An - expression followed by "*" can be repeated any number of times including zero. - An expression followed by "+" can be repeated any number of times, but at least - once, if the expression is compiled with the flag regex_constants::bk_plus_qm - then "+" is an ordinary character and "\+" represents a repeat of once or more. - An expression followed by "?" may be repeated zero or one times only, if the - expression is compiled with the flag regex_constants::bk_plus_qm then "?" is an - ordinary character and "\?" represents the repeat zero or once operator. When - it is necessary to specify the minimum and maximum number of repeats - explicitly, the bounds operator "{}" may be used, thus "a{2}" is the letter "a" - repeated exactly twice, "a{2,4}" represents the letter "a" repeated between 2 - and 4 times, and "a{2,}" represents the letter "a" repeated at least twice with - no upper limit. Note that there must be no white-space inside the {}, and there - is no upper limit on the values of the lower and upper bounds. When the - expression is compiled with the flag regex_constants::bk_braces then "{" and - "}" are ordinary characters and "\{" and "\}" are used to delimit bounds - instead. All repeat expressions refer to the shortest possible previous - sub-expression: a single character; a character set, or a sub-expression - grouped with "()" for example. -

-

Examples: -

-

"ba*" will match all of "b", "ba", "baaa" etc. -

-

"ba+" will match "ba" or "baaaa" for example but not "b". -

-

"ba?" will match "b" or "ba". -

-

"ba{2,4}" will match "baa", "baaa" and "baaaa". -

-

Non-greedy repeats -

-

Whenever the "extended" regular expression syntax is in use (the default) then - non-greedy repeats are possible by appending a '?' after the repeat; a - non-greedy repeat is one which will match the shortest possible string. -

-

For example to match html tag pairs one could use something like: -

-

"<\s*tagname[^>]*>(.*?)<\s*/tagname\s*>" -

-

In this case $1 will contain the text between the tag pairs, and will be the - shortest possible matching string.  -

-

Parenthesis -

-

Parentheses serve two purposes, to group items together into a sub-expression, - and to mark what generated the match. For example the expression "(ab)*" would - match all of the string "ababab". The matching algorithms - regex_match and regex_search each take - an instance of match_results that reports what - caused the match, on exit from these functions the match_results - contains information both on what the whole expression matched and on what each - sub-expression matched. In the example above match_results[1] would contain a - pair of iterators denoting the final "ab" of the matching string. It is - permissible for sub-expressions to match null strings. If a sub-expression - takes no part in a match - for example if it is part of an alternative that is - not taken - then both of the iterators that are returned for that - sub-expression point to the end of the input string, and the matched parameter - for that sub-expression is false. Sub-expressions are indexed from left - to right starting from 1, sub-expression 0 is the whole expression. -

-

Non-Marking Parenthesis -

-

Sometimes you need to group sub-expressions with parenthesis, but don't want - the parenthesis to spit out another marked sub-expression, in this case a - non-marking parenthesis (?:expression) can be used. For example the following - expression creates no sub-expressions: -

-

"(?:abc)*"

-

Forward Lookahead Asserts  -

-

There are two forms of these; one for positive forward lookahead asserts, and - one for negative lookahead asserts:

-

"(?=abc)" matches zero characters only if they are followed by the expression - "abc".

-

"(?!abc)" matches zero characters only if they are not followed by the - expression "abc".

-

Independent sub-expressions

-

"(?>expression)" matches "expression" as an independent atom (the algorithm - will not backtrack into it if a failure occurs later in the expression).

-

Alternatives -

-

Alternatives occur when the expression can match either one sub-expression or - another, each alternative is separated by a "|", or a "\|" if the flag - regex_constants::bk_vbar is set, or by a newline character if the flag - regex_constants::newline_alt is set. Each alternative is the largest possible - previous sub-expression; this is the opposite behavior from repetition - operators. -

-

Examples: -

-

"a(b|c)" could match "ab" or "ac". -

-

"abc|def" could match "abc" or "def". -

-

Sets -

-

A set is a set of characters that can match any single character that is a - member of the set. Sets are delimited by "[" and "]" and can contain literals, - character ranges, character classes, collating elements and equivalence - classes. Set declarations that start with "^" contain the complement of the - elements that follow. -

-

Examples: -

-

Character literals: -

-

"[abc]" will match either of "a", "b", or "c". -

-

"[^abc] will match any character other than "a", "b", or "c". -

-

Character ranges: -

-

"[a-z]" will match any character in the range "a" to "z". -

-

"[^A-Z]" will match any character other than those in the range "A" to "Z". -

-

Note that character ranges are highly locale dependent if the flag - regex_constants::collate is set: they match any character that collates between - the endpoints of the range, ranges will only behave according to ASCII rules - when the default "C" locale is in effect. For example if the library is - compiled with the Win32 localization model, then [a-z] will match the ASCII - characters a-z, and also 'A', 'B' etc, but not 'Z' which collates just after - 'z'. This locale specific behavior is disabled by default (in perl mode), and - forces ranges to collate according to ASCII character code. -

-

Character classes are denoted using the syntax "[:classname:]" within a set - declaration, for example "[[:space:]]" is the set of all whitespace characters. - Character classes are only available if the flag regex_constants::char_classes - is set. The available character classes are: -
-   -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 alnumAny alpha numeric character. 
 alphaAny alphabetical character a-z and A-Z. Other - characters may also be included depending upon the locale. 
 blankAny blank character, either a space or a tab. 
 cntrlAny control character. 
 digitAny digit 0-9. 
 graphAny graphical character. 
 lowerAny lower case character a-z. Other characters may - also be included depending upon the locale. 
 printAny printable character. 
 punctAny punctuation character. 
 spaceAny whitespace character. 
 upperAny upper case character A-Z. Other characters may - also be included depending upon the locale. 
 xdigitAny hexadecimal digit character, 0-9, a-f and A-F. 
 wordAny word character - all alphanumeric characters plus - the underscore. 
 UnicodeAny character whose code is greater than 255, this - applies to the wide character traits classes only. 
-

-

There are some shortcuts that can be used in place of the character classes, - provided the flag regex_constants::escape_in_lists is set then you can use: -

-

\w in place of [:word:] -

-

\s in place of [:space:] -

-

\d in place of [:digit:] -

-

\l in place of [:lower:] -

-

\u in place of [:upper:]  -

-

Collating elements take the general form [.tagname.] inside a set declaration, - where tagname is either a single character, or a name of a collating - element, for example [[.a.]] is equivalent to [a], and [[.comma.]] is - equivalent to [,]. The library supports all the standard POSIX collating - element names, and in addition the following digraphs: "ae", "ch", "ll", "ss", - "nj", "dz", "lj", each in lower, upper and title case variations. - Multi-character collating elements can result in the set matching more than one - character, for example [[.ae.]] would match two characters, but note that - [^[.ae.]] would only match one character.  -

-

- Equivalence classes take the generalform[=tagname=] inside a set declaration, - where tagname is either a single character, or a name of a collating - element, and matches any character that is a member of the same primary - equivalence class as the collating element [.tagname.]. An equivalence class is - a set of characters that collate the same, a primary equivalence class is a set - of characters whose primary sort key are all the same (for example strings are - typically collated by character, then by accent, and then by case; the primary - sort key then relates to the character, the secondary to the accentation, and - the tertiary to the case). If there is no equivalence class corresponding to tagname - ,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no - locale independent method of obtaining the primary sort key for a character, - except under Win32. For other operating systems the library will "guess" the - primary sort key from the full sort key (obtained from strxfrm), so - equivalence classes are probably best considered broken under any operating - system other than Win32.  -

-

To include a literal "-" in a set declaration then: make it the first character - after the opening "[" or "[^", the endpoint of a range, a collating element, or - if the flag regex_constants::escape_in_lists is set then precede with an escape - character as in "[\-]". To include a literal "[" or "]" or "^" in a set then - make them the endpoint of a range, a collating element, or precede with an - escape character if the flag regex_constants::escape_in_lists is set. -

-

Line anchors -

-

An anchor is something that matches the null string at the start or end of a - line: "^" matches the null string at the start of a line, "$" matches the null - string at the end of a line. -

-

Back references -

-

A back reference is a reference to a previous sub-expression that has already - been matched, the reference is to what the sub-expression matched, not to the - expression itself. A back reference consists of the escape character "\" - followed by a digit "1" to "9", "\1" refers to the first sub-expression, "\2" - to the second etc. For example the expression "(.*)\1" matches any string that - is repeated about its mid-point for example "abcabc" or "xyzxyz". A back - reference to a sub-expression that did not participate in any match, matches - the null string: NB this is different to some other regular expression - matchers. Back references are only available if the expression is compiled with - the flag regex_constants::bk_refs set. -

-

Characters by code -

-

This is an extension to the algorithm that is not available in other libraries, - it consists of the escape character followed by the digit "0" followed by the - octal character code. For example "\023" represents the character whose octal - code is 23. Where ambiguity could occur use parentheses to break the expression - up: "\0103" represents the character whose code is 103, "(\010)3 represents the - character 10 followed by "3". To match characters by their hexadecimal code, - use \x followed by a string of hexadecimal digits, optionally enclosed inside - {}, for example \xf0 or \x{aff}, notice the latter example is a Unicode - character.

-

Word operators -

-

The following operators are provided for compatibility with the GNU regular - expression library. -

-

"\w" matches any single character that is a member of the "word" character - class, this is identical to the expression "[[:word:]]". -

-

"\W" matches any single character that is not a member of the "word" character - class, this is identical to the expression "[^[:word:]]". -

-

"\<" matches the null string at the start of a word. -

-

"\>" matches the null string at the end of the word. -

-

"\b" matches the null string at either the start or the end of a word. -

-

"\B" matches a null string within a word. -

-

The start of the sequence passed to the matching algorithms is considered to be - a potential start of a word unless the flag match_not_bow is set. The end of - the sequence passed to the matching algorithms is considered to be a potential - end of a word unless the flag match_not_eow is set. -

-

Buffer operators -

-

The following operators are provided for compatibility with the GNU regular - expression library, and Perl regular expressions: -

-

"\`" matches the start of a buffer. -

-

"\A" matches the start of the buffer. -

-

"\'" matches the end of a buffer. -

-

"\z" matches the end of a buffer. -

-

"\Z" matches the end of a buffer, or possibly one or more new line characters - followed by the end of the buffer. -

-

A buffer is considered to consist of the whole sequence passed to the matching - algorithms, unless the flags match_not_bob or match_not_eob are set. -

-

Escape operator -

-

The escape character "\" has several meanings. -

-

Inside a set declaration the escape character is a normal character unless the - flag regex_constants::escape_in_lists is set in which case whatever follows the - escape is a literal character regardless of its normal meaning. -

-

The escape operator may introduce an operator for example: back references, or - a word operator. -

-

The escape operator may make the following character normal, for example "\*" - represents a literal "*" rather than the repeat operator. -

-

Single character escape sequences -

-

The following escape sequences are aliases for single characters: -
-   -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Escape sequence - Character code - Meaning -  
 \a - 0x07 - Bell character. -  
 \f - 0x0C - Form feed. -  
 \n - 0x0A - Newline character. -  
 \r - 0x0D - Carriage return. -  
 \t - 0x09 - Tab character. -  
 \v - 0x0B - Vertical tab. -  
 \e - 0x1B - ASCII Escape character. -  
 \0dd - 0dd - An octal character code, where dd is one or - more octal digits. -  
 \xXX - 0xXX - A hexadecimal character code, where XX is one or more - hexadecimal digits. -  
 \x{XX} - 0xXX - A hexadecimal character code, where XX is one or more - hexadecimal digits, optionally a Unicode character. -  
 \cZ - z-@ - An ASCII escape sequence control-Z, where Z is any - ASCII character greater than or equal to the character code for '@'. -  
-

-

Miscellaneous escape sequences: -

-

The following are provided mostly for perl compatibility, but note that there - are some differences in the meanings of \l \L \u and \U: -
-   -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 \w - Equivalent to [[:word:]]. -  
 \W - Equivalent to [^[:word:]]. -  
 \s - Equivalent to [[:space:]]. -  
 \S - Equivalent to [^[:space:]]. -  
 \d - Equivalent to [[:digit:]]. -  
 \D - Equivalent to [^[:digit:]]. -  
 \l - Equivalent to [[:lower:]]. -  
 \L - Equivalent to [^[:lower:]]. -  
 \u - Equivalent to [[:upper:]]. -  
 \U - Equivalent to [^[:upper:]]. -  
 \C - Any single character, equivalent to '.'. -  
 \X - Match any Unicode combining character sequence, for - example "a\x 0301" (a letter a with an acute). -  
 \Q - The begin quote operator, everything that follows is - treated as a literal character until a \E end quote operator is found. -  
 \E - The end quote operator, terminates a sequence begun - with \Q. -  
-

-

What gets matched? -

-

- When the expression is compiled as a Perl-compatible regex then the matching - algorithms will perform a depth first search on the state machine and report - the first match found.

-

- When the expression is compiled as a POSIX-compatible regex then the matching - algorithms will match the first possible matching string, if more than one - string starting at a given location can match then it matches the longest - possible string, unless the flag match_any is set, in which case the first - match encountered is returned. Use of the match_any option can reduce the time - taken to find the match - but is only useful if the user is less concerned - about what matched - for example it would not be suitable for search and - replace operations. In cases where their are multiple possible matches all - starting at the same location, and all of the same length, then the match - chosen is the one with the longest first sub-expression, if that is the same - for two or more matches, then the second sub-expression will be examined and so - on. -

-

- The following table examples illustrate the main differences between Perl and - POSIX regular expression matching rules: -

-

- - - - - - - - - - - - - - - - - - - - - - - - - - -
-

Expression

-
-

Text

-
-

POSIX leftmost longest match

-
-

ECMAScript depth first search match

-
-

a|ab

-
-

xaby -

-
-

"ab"

-
-

"a"

-
-

.*([[:alnum:]]+).*

-
-

" abc def xyz "

-
-

$0 = " abc def xyz "
- $1 = "abc"

-
-

$0 = " abc def xyz "
- $1 = "z"

-
-

.*(a|xayy)

-
-

zzxayyzz

-
-

"zzxayy"

-
-

"zzxa"

-
-

These differences between Perl matching rules, and POSIX matching rules, mean - that these two regular expression syntaxes differ not only in the features - offered, but also in the form that the state machine takes and/or the - algorithms used to traverse the state machine.

+ depend upon the flags used during + expression compilation. +

+

There are three main syntax options available, depending upon how + you construct the regular expression object:

+ +

You can also construct a regular expression that treats every character as a + literal, but that's not really a "syntax"!


-

Revised +

Revised  - 24 Oct 2003 + 10 Sept 2004 

© Copyright John Maddock 1998- - 2003

+ 2004

Use, modification and distribution are subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

diff --git a/doc/syntax_basic.html b/doc/syntax_basic.html new file mode 100644 index 00000000..dc49248c --- /dev/null +++ b/doc/syntax_basic.html @@ -0,0 +1,226 @@ + + + + Boost.Regex: POSIX-Basic Regular Expression Syntax + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

POSIX Basic Regular Expression Syntax

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
POSIX Basic Syntax
+ Variations +
+
+
Grep
Emacs
+
+
Options
References
+
+

Synopsis

+

The POSIX-Basic regular expression syntax is used by the Unix utility sed, + and variations are used by grep and emacs.  You can + construct POSIX basic regular expressions in Boost.Regex by passing the flag basic + to the regex constructor, for example:

+
// e1 is a case sensitive POSIX-Basic expression:
+boost::regex e1(my_expression, boost::regex::basic);
+// e2 a case insensitive POSIX-Basic expression:
+boost::regex e2(my_expression, boost::regex::basic|boost::regex::icase);
+

POSIX Basic Syntax

+

In POSIX-Basic regular expressions, all characters are match themselves except + for the following special characters:

+
.[\*^$
+

Wildcard:

+

The single character '.' when used outside of a character set will match any + single character except:

+

The NULL character when the flag match_no_dot_null is passed to the + matching algorithms.

+

The newline character when the flag match_not_dot_newline is passed to + the matching algorithms.

+

Anchors:

+

A '^' character shall match the start of a line when used as the first + character of an expression, or the first character of a sub-expression.

+

A '$' character shall match the end of a line when used as the last character + of an expression, or the last character of a sub-expression.

+

Marked sub-expressions:

+

A section beginning \( and ending \) acts as a marked sub-expression.  + Whatever matched the sub-expression is split out in a separate field by the + matching algorithms.  Marked sub-expressions can also repeated, or referred-to by a back-reference.

+

Repeats:

+

Any atom (a single character, a marked sub-expression, or a character class) + can be repeated with the * operator.

+

For example a* will match any number of letter a's repeated zero or more times + (an atom repeated zero times matches an empty string), so the expression a*b + will match any of the following:

+
b
+ab
+aaaaaaaab
+

An atom can also be repeated with a bounded repeat:

+

a\{n\}  Matches 'a' repeated exactly n times.

+

a\{n,\}  Matches 'a' repeated n or more times.

+

a\{n, m\}  Matches 'a' repeated between n and m times + inclusive.

+

For example:

+
^a\{2,3\}$
+

Will match either of:

+
aa
+aaa
+

But neither of:

+
a
+aaaa
+

It is an error to use a repeat operator, if the preceding construct can not be + repeated, for example:

+
a\(*\)
+

Will raise an error, as there is nothing for the * operator to be applied to.

+

Back references:

+

An escape character followed by a digit n, where n is in the + range 1-9, matches the same string that was matched by sub-expression n.  + For example the expression:

+
^\(a*\).*\1$
+

Will match the string:

+
aaabbaaa
+

But not the string:

+
aaabba
+

Character sets:

+

A character set is a bracket-expression starting with [ and ending with ], it + defines a set of characters, and matches any single character that is a member + of that set.

+

A bracket expression may contain any combination of the following:

+
+
Single characters:
+

For example [abc], will match any of the characters 'a', 'b', or 'c'.

+
Character ranges:
+

For example [a-c] will match any single character in the range 'a' to + 'c'.  By default, for POSIX-Basic regular expressions, a character x + is within the range y to z, if it collates within that + range; this results in locale specific behavior.  This behavior can + be turned off by unsetting the collate + option flag - in which case whether a character appears within a range is + determined by comparing the code points of the characters only

+
Negation:
+

If the bracket-expression begins with the ^ character, then it matches the + complement of the characters it contains, for example [^a-c] matches any + character that is not in the range a-c.

+
Character classes:
+

An expression of the form [[:name:]] matches the named character class "name", + for example [[:lower:]] matches any lower case character.  See + character class names.

+
Collating Elements:
+

An expression of the form [[.col.] matches the collating element col.  + A collating element is any single character, or any sequence of characters that + collates as a single unit.  Collating elements may also be used as the end + point of a range, for example: [[.ae.]-c] matches the character sequence "ae", + plus any single character in the rangle "ae"-c, assuming that "ae" is treated + as a single collating element in the current locale.

+

As an extension, a collating element may also be specified via its + symbolic name, for example:

+

[[.NUL.]]

+

matches a NUL character.

+
Equivalence classes:
+

+ An expression of the form [[=col=]], matches any character or collating element + whose primary sort key is the same as that for collating element col, + as with collating elements the name col may be a + symbolic name.  A primary sort key is one that ignores case, + accentation, or locale-specific tailorings; so for example [[=a=]] matches any + of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  + Unfortunately implementation of this is reliant on the platform's collation and + localisation support; this feature can not be relied upon to work portably + across all platforms, or even all locales on one platform.

+
+
Combinations:
+

All of the above can be combined in one character set declaration, for example: + [[:digit:]a-c[.NUL.]].

+

Escapes

+

With the exception of the escape sequences \{, \}, \(, and \), which are + documented above, an escape followed by any character matches that + character.  This can be used to make the special characters .[\*^$, + "ordinary".  Note that the escape character loses its special meaning + inside a character set, so [\^] will match either a literal '\' or a '^'.

+

Variations

+

Grep

+

When an expression is compiled with the flag grep set, then the + expression is treated as a newline separated list of POSIX-Basic + expressions, a match is found if any of the expressions in the list match, for + example:

+
boost::regex e("abc\ndef", boost::regex::grep);
+

will match either of the POSIX-Basic expressions "abc" or "def".

+

As its name suggests, this behavior is consistent with the Unix utility grep.

+

emacs

+

In addition to the POSIX-Basic features the following + characters are also special:

+
+

+ repeats the preceding atom one or more times.

+

? repeats the preceding atom zero or one times.

+

*? A non-greedy version of *.

+

+? A non-greedy version of +.

+

?? A non-greedy version of ?.

+
+

And the following escape sequences are also recognised:

+
+

\| specifies an alternative.

+

\(?:  ...  \) is a non-marking grouping construct - allows you to + lexically group something without spitting out an extra sub-expression.

+

\w  matches any word character.

+

\W matches any non-word character.

+

\sx matches any character in the syntax group x, the following emacs + groupings are supported: 's', ' ', '_', 'w', '.', ')', '(', '"', '\'', '>' + and '<'.  Refer to the emacs docs for details.

+

\Sx matches any character not in the syntax grouping x.

+

\c and \C are not supported.

+

\` matches zero characters only at the start of a buffer (or string being + matched).

+

\' matches zero characters only at the end of a buffer (or string being + matched).

+

\b matches zero characters at a word boundary.

+

\B matches zero characters, not at a word boundary.

+

\< matches zero characters only at the start of a word.

+

\> matches zero characters only at the end of a word.

+
+

Options

+

There are a variety of flags that + may be combined with the basic and grep options when + constructing the regular expression, in particular note that the + newline_alt, no_char_classes, no-intervals, bk_plus_qm and bk_plus_vbar options + all alter the syntax, while the collate + and icase options modify how the case and locale sensitivity are to be + applied.

+

References

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions + and Headers, Section 9, Regular Expressions (FWD.1).

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, grep (FWD.1).

+

Emacs Version 21.3.

+

+


+

+

Revised  + + 21 Aug 2004  +

+

© Copyright John Maddock 2004

+ +

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt).

+
+ + + diff --git a/doc/syntax_extended.html b/doc/syntax_extended.html new file mode 100644 index 00000000..323b3e13 --- /dev/null +++ b/doc/syntax_extended.html @@ -0,0 +1,471 @@ + + + + Boost.Regex: POSIX-Extended Regular Expression Syntax + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

POSIX-Extended Regular Expression Syntax

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
POSIX Extended Syntax +
Variations +
+
+
egrep
awk 
+
+
Options
References
+
+

Synopsis

+

The POSIX-Extended regular expression syntax is supported by the POSIX C + regular expression API's, and variations are used by the utilities egrep + and awk. You can construct POSIX extended regular expressions in + Boost.Regex by passing the flag extended to the regex constructor, for + example:

+
// e1 is a case sensitive POSIX-Extended expression:
+boost::regex e1(my_expression, boost::regex::extended);
+// e2 a case insensitive POSIX-Extended expression:
+boost::regex e2(my_expression, boost::regex::extended|boost::regex::icase);
+

POSIX Extended Syntax

+

In POSIX-Extended regular expressions, all characters match themselves except + for the following special characters:

+
.[{()\*+?|^$
+

Wildcard:

+

The single character '.' when used outside of a character set will match any + single character except:

+

The NULL character when the flag match_no_dot_null is passed to the + matching algorithms.

+

The newline character when the flag match_not_dot_newline is passed to + the matching algorithms.

+

Anchors:

+

A '^' character shall match the start of a line when used as the first + character of an expression, or the first character of a sub-expression.

+

A '$' character shall match the end of a line when used as the last character + of an expression, or the last character of a sub-expression.

+

Marked sub-expressions:

+

A section beginning ( and ending ) acts as a marked sub-expression.  + Whatever matched the sub-expression is split out in a separate field by the + matching algorithms.  Marked sub-expressions can also repeated, or referred + to by a back-reference.

+

Repeats:

+

Any atom (a single character, a marked sub-expression, or a character class) + can be repeated with the *, +, ?, and {} operators.

+

The * operator will match the preceding atom zero or more times, for example + the expression a*b will match any of the following:

+
b
+ab
+aaaaaaaab
+

The + operator will match the preceding atom one or more times, for example + the expression a+b will match any of the following:

+
ab
+aaaaaaaab
+

But will not match:

+
b
+

The ? operator will match the preceding atom zero or one times, for + example the expression ca?b will match any of the following:

+
cb
+cab
+

But will not match:

+
caab
+

An atom can also be repeated with a bounded repeat:

+

a{n}  Matches 'a' repeated exactly n times.

+

a{n,}  Matches 'a' repeated n or more times.

+

a{n, m}  Matches 'a' repeated between n and m times + inclusive.

+

For example:

+
^a{2,3}$
+

Will match either of:

+
aa
+aaa
+

But neither of:

+
a
+aaaa
+

It is an error to use a repeat operator, if the preceding construct can not be + repeated, for example:

+
a(*)
+

Will raise an error, as there is nothing for the * operator to be applied to.

+

Back references:

+

An escape character followed by a digit n, where n is in the + range 1-9, matches the same string that was matched by sub-expression n.  + For example the expression:

+
^(a*).*\1$
+

Will match the string:

+
aaabbaaa
+

But not the string:

+
aaabba
+

Caution: the POSIX standard does not support back-references + for "extended" regular expressions, this is a compatible extension to that + standard.

+

Alternation

+

The | operator will match either of its arguments, so for example: abc|def will + match either "abc" or "def".  +

+

Parenthesis can be used to group alternations, for example: ab(d|ef) will match + either of "abd" or "abef".

+

Character sets:

+

A character set is a bracket-expression starting with [ and ending with ], it + defines a set of characters, and matches any single character that is a member + of that set.

+

A bracket expression may contain any combination of the following:

+
+
Single characters:
+

For example [abc], will match any of the characters 'a', 'b', or 'c'.

+
Character ranges:
+

For example [a-c] will match any single character in the range 'a' to + 'c'.  By default, for POSIX-Extended regular expressions, a character x + is within the range y to z, if it collates within that + range; this results in locale specific behavior.  This behavior can + be turned off by unsetting the collate + option flag - in which case whether a character appears within a range is + determined by comparing the code points of the characters only

+
Negation:
+

If the bracket-expression begins with the ^ character, then it matches the + complement of the characters it contains, for example [^a-c] matches any + character that is not in the range a-c.

+
Character classes:
+

An expression of the form [[:name:]] matches the named character class "name", + for example [[:lower:]] matches any lower case character.  See + character class names.

+
Collating Elements:
+

An expression of the form [[.col.] matches the collating element col.  + A collating element is any single character, or any sequence of characters that + collates as a single unit.  Collating elements may also be used as the end + point of a range, for example: [[.ae.]-c] matches the character sequence "ae", + plus any single character in the range "ae"-c, assuming that "ae" is treated + as a single collating element in the current locale.

+

As an extension, a collating element may also be specified via its + symbolic name, for example:

+

[[.NUL.]]

+

matches a NUL character.

+
Equivalence classes:
+

+ An expression of the form [[=col=]], matches any character or collating element + whose primary sort key is the same as that for collating element col, + as with colating elements the name col may be a + symbolic name.  A primary sort key is one that ignores case, + accentation, or locale-specific tailorings; so for example [[=a=]] matches any + of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  + Unfortunately implementation of this is reliant on the platform's collation and + localisation support; this feature can not be relied upon to work portably + across all platforms, or even all locales on one platform.

+
+
Combinations:
+

All of the above can be combined in one character set declaration, for example: + [[:digit:]a-c[.NUL.]].

+

Operator precedence

+

 The order of precedence for of operators is as shown in the following + table:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Collation-related bracket symbols[==] [::] [..]
Escaped characters + \
Character set (bracket expression) + []
Grouping()
Single-character-ERE duplication + * + ? {m,n}
Concatenation
Anchoring^$
Alternation|
+

+

Escapes

+

The POSIX standard defines no escape sequences for POSIX-Extended regular + expressions, except that:

+
    +
  • + Any special character preceded by an escape shall match itself. +
  • + The effect of any ordinary character being preceded by an escape is undefined. +
  • + An escape inside a character class declaration shall match itself (in other + words the escape character is not "special" inside a character class + declaration).
+

However, that's rather restrictive, so the following standard-compatible + extensions are also supported by Boost.Regex:

+
+
Escapes matching a specific character
+

The following escape sequences are all synonyms for single characters:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EscapeCharacter
\a'\a'
\e0x1B
\f\f
\n\n
\r\r
\t\t
\v\v
\b\b (but only inside a character class declaration).
\cXAn ASCII escape sequence - the character whose code point is X % 32
\xddA hexadecimal escape sequence - matches the single character whose code point + is 0xdd.
\x{dddd}A hexadecimal escape sequence - matches the single character whose code point + is 0xdddd.
\0dddAn octal escape sequence - matches the single character whose code point is + 0ddd.
+

+
"Single character" character classes:
+

Any escaped character x, if x is the name of a character + class shall match any character that is a member of that class, and any escaped + character X, if x is the name of a character class, shall + match any character not in that class.

+

The following are supported by default:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Escape sequenceEquivalent to
\d[[:digit:]]
\l[[:lower:]]
\s[[:space:]]
\u[[:upper:]]
\w[[:word:]]
\D[^[:digit:]]
\L[^[:lower:]]
\S[^[:space:]]
\U[^[:upper:]]
\W[^[:word:]]
+

+
Word Boundaries
+

The following escape sequences match the boundaries of words:

+

+ + + + + + + + + + + + + + + + + +
\<Matches the start of a word.
\>Matches the end of a word.
\bMatches a word boundary (the start or end of a word).
\BMatches only when not at a word boundary.
+

+
Buffer boundaries
+

The following match only at buffer boundaries: a "buffer" in this context is + the whole of the input text that is being matched against (note that ^ and + $ may match embedded newlines within the text).

+

+ + + + + + + + + + + + + + + + + + + + + +
\`Matches at the start of a buffer only.
\'Matches at the end of a buffer only.
\AMatches at the start of a buffer only (the same as \`).
\zMatches at the end of a buffer only (the same as \').
\ZMatches an optional sequence of newlines at the end of a buffer: equivalent to + the regular expression \n*\z
+

+
Continuation Escape
+

The sequence \G matches only at the end of the last match found, or at the + start of the text being matched if no previous match was found.  This + escape useful if you're iterating over the matches contained within a text, and + you want each subsequence match to start where the last one ended.

+
Quoting escape
+

The escape sequence \Q begins a "quoted sequence": all the subsequent + characters are treated as literals, until either the end of the regular + expression or \E is found.  For example the expression: \Q\*+\Ea+ would + match either of:

+
\*+a
\*+aaa
+
Unicode escapes
+

+ + + + + + + + + +
\CMatches a single code point: in Boost regex this has exactly the same effect + as a "." operator.
\XMatches a combining character sequence: that is any non-combining character + followed by a sequence of zero or more combining characters.
+

+
Any other escape
+

Any other escape sequence matches the character that is escaped, for example \@ + matches a literal '@'.

+
+

Variations

+

Egrep

+

When an expression is compiled with the flag egrep set, then the + expression is treated as a newline separated list of POSIX-Extended + expressions, a match is found if any of the expressions in the list match, for + example:

+
boost::regex e("abc\ndef", boost::regex::egrep);
+

will match either of the POSIX-Basic expressions "abc" or "def".

+

As its name suggests, this behavior is consistent with the Unix utility egrep, + and with grep when used with the -E option.

+

awk

+

In addition to the POSIX-Extended features the + escape character is special inside a character class declaration. 

+

In addition, some escape sequences that are not defined as part of + POSIX-Extended specification are required to be supported - however Boost.Regex + supports these by default anyway.

+

Options

+

There are a variety of flags that + may be combined with the extended and egrep options when + constructing the regular expression, in particular note that the + newline_alt option alters the syntax, while the + collate, nosubs and icase options modify how the case and locale + sensitivity are to be applied.

+

References

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions + and Headers, Section 9, Regular Expressions.

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, egrep.

+

IEEE + Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, awk.

+
+

+

Revised  + + 21 Aug 2004  +

+

© Copyright John Maddock 2004

+ +

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt).

+
+ + + diff --git a/doc/syntax_option_type.html b/doc/syntax_option_type.html index 6da58afc..291c24c0 100644 --- a/doc/syntax_option_type.html +++ b/doc/syntax_option_type.html @@ -175,7 +175,7 @@ static const syntax_option_type collate; No Normally Boost.Regex behaves as if the Perl m-modifier is on: so the assertions ^ and $ match after and before embedded newlines respectively, - setting this flags is eqivalent to prefixing the expression with (?-m). + setting this flags is equivalent to prefixing the expression with (?-m). no_mod_s @@ -251,7 +251,7 @@ static const syntax_option_type collate; character classes permitted.

In addition some perl-style escape sequences are supported (actually the awk syntax requires \a \b \t \v \f \n and \r to be recognised, but other - escape sequences invoke undefined behaviour according to the POSIX standard).

+ escape sequences invoke undefined behavior according to the POSIX standard).

@@ -324,9 +324,9 @@ static const syntax_option_type collate; Yes

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX basic regular expressions in IEEE Std 1003.1-2001, - Portable Operating System Interface (POSIX ), Base Definitions and Headers, - Section 9, Regular Expressions (FWD.1). + same as that used by POSIX basic regular + expressions in IEEE Std 1003.1-2001, Portable Operating System Interface + (POSIX ), Base Definitions and Headers, Section 9, Regular Expressions (FWD.1).

@@ -340,13 +340,20 @@ static const syntax_option_type collate; Yes

Specifies that the grammar recognized by the regular expression engine is the - same as that used by POSIX utility grep in IEEE Std 1003.1-2001, Portable - Operating System Interface (POSIX ), Shells and Utilities, Section 4, - Utilities, grep (FWD.1).

+ same as that used by POSIX utility grep in + IEEE Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and + Utilities, Section 4, Utilities, grep (FWD.1).

That is to say, the same as POSIX basic syntax, but with the newline character - acting as an alternation character in addition to "|".

+ acting as an alternation character; the expression is treated as a newline + separated list of alternatives.

+ + emacs + No + Specifies that the grammar recognised is the superset of the POSIX-Basic + syntax used by the emacs program. +

The following options may also be set when using POSIX basic regular @@ -390,7 +397,10 @@ static const syntax_option_type collate; collate Yes -

Specifies that character ranges of the form "[a-b]" should be locale sensitive.

+

Specifies that character ranges of the form "[a-b]" should be locale + sensitive.  This bit is on by default for + POSIX-Basic regular expressions, but can be unset to force ranges to be + compared by code point only.

@@ -398,7 +408,7 @@ static const syntax_option_type collate; No Specifies that the \n character has the same effect as the alternation operator |.  Allows newline separated lists to be used as a list of - alternatives. + alternatives.  This bit is already set, if you use the grep option. no_char_classes @@ -482,3 +492,4 @@ static const syntax_option_type collate; or copy at http://www.boost.org/LICENSE_1_0.txt)

+ diff --git a/doc/syntax_perl.html b/doc/syntax_perl.html new file mode 100644 index 00000000..2af1260b --- /dev/null +++ b/doc/syntax_perl.html @@ -0,0 +1,502 @@ + + + + Boost.Regex: Perl Regular Expression Syntax + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

+ Perl Regular Expression Syntax

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Perl Syntax
+ Variations +
+
Options
Modifiers
References
+
+

Synopsis

+

The Perl regular expression syntax is based on that used by the programming + language Perl .  Perl regular expressions are the default + behavior in Boost.Regex or you can pass the flag perl to the + regex constructor, for example:

+
// e1 is a case sensitive Perl regular expression: 
+// since Perl is the default option there's no need to explicitly specify the syntax used here:
+boost::regex e1(my_expression);
+// e2 a case insensitive Perl regular expression:
+boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);
+

Perl Regular Expression Syntax

+

In Perl regular expressions, all characters match themselves except for + the following special characters:

+
.[{()\*+?|^$
+

Wildcard:

+

The single character '.' when used outside of a character set will match any + single character except:

+

The NULL character when the flag match_no_dot_null is passed to the + matching algorithms.

+

The newline character when the flag match_not_dot_newline is passed to + the matching algorithms.

+

Anchors:

+

A '^' character shall match the start of a line.

+

A '$' character shall match the end of a line.

+

Marked sub-expressions:

+

A section beginning ( and ending ) acts as a marked sub-expression.  + Whatever matched the sub-expression is split out in a separate field by the + matching algorithms.  Marked sub-expressions can also repeated, or referred + to by a back-reference.

+

Non-marking grouping:

+

A marked sub-expression is useful to lexically group part of a regular + expression, but has the side-effect of spitting out an extra field in the + result.  As an alternative you can lexically group part of a regular + expression, without generating a marked sub-expression by using (?: and ) , for + example (?:ab)+ will repeat "ab" without splitting out any separate + sub-expressions.

+

Repeats:

+

Any atom (a single character, a marked sub-expression, or a character class) + can be repeated with the *, +, ?, and {} operators.

+

The * operator will match the preceding atom zero or more times, for example + the expression a*b will match any of the following:

+
b
+ab
+aaaaaaaab
+

The + operator will match the preceding atom one or more times, for example + the expression a+b will match any of the following:

+
ab
+aaaaaaaab
+

But will not match:

+
b
+

The ? operator will match the preceding atom zero or one times, for + example the expression ca?b will match any of the following:

+
cb
+cab
+

But will not match:

+
caab
+

An atom can also be repeated with a bounded repeat:

+

a{n}  Matches 'a' repeated exactly n times.

+

a{n,}  Matches 'a' repeated n or more times.

+

a{n, m}  Matches 'a' repeated between n and m times + inclusive.

+

For example:

+
^a{2,3}$
+

Will match either of:

+
aa
+aaa
+

But neither of:

+
a
+aaaa
+

It is an error to use a repeat operator, if the preceding construct can not be + repeated, for example:

+
a(*)
+

Will raise an error, as there is nothing for the * operator to be applied to.

+

Non greedy repeats

+

The normal repeat operators are "greedy", that is to say they will consume as + much input as possible.  There are non-greedy versions available that will + consume as little input as possible while still producing a match.

+

*? Matches the previous atom zero or more times, while consuming as little + input as possible.

+

+? Matches the previous atom one or more times, while consuming as little input + as possible.

+

?? Matches the previous atom zero or one times, while consuming as little input + as possible.

+

{n,}? Matches the previous atom n or more times, while consuming + as little input as possible.

+

{n,m}? Matches the previous atom between n and m times, + while consuming as little input as possible.

+

Back references:

+

An escape character followed by a digit n, where n is in the + range 1-9, matches the same string that was matched by sub-expression n.  + For example the expression:

+
^(a*).*\1$
+

Will match the string:

+
aaabbaaa
+

But not the string:

+
aaabba
+

Alternation

+

The | operator will match either of its arguments, so for example: abc|def will + match either "abc" or "def".  +

+

Parenthesis can be used to group alternations, for example: ab(d|ef) will match + either of "abd" or "abef".

+

Character sets:

+

A character set is a bracket-expression starting with [ and ending with ], it + defines a set of characters, and matches any single character that is a member + of that set.

+

A bracket expression may contain any combination of the following:

+
+
Single characters:
+

For example [abc], will match any of the characters 'a', 'b', or 'c'.

+
Character ranges:
+

For example [a-c] will match any single character in the range 'a' to + 'c'.  By default, for POSIX-Perl regular expressions, a character x + is within the range y to z, if it collates within that + range; this results in locale specific behavior.  This behavior can + be turned off by unsetting the collate + option flag - in which case whether a character appears within a range is + determined by comparing the code points of the characters only

+
Negation:
+

If the bracket-expression begins with the ^ character, then it matches the + complement of the characters it contains, for example [^a-c] matches any + character that is not in the range a-c.

+
Character classes:
+

An expression of the form [[:name:]] matches the named character class "name", + for example [[:lower:]] matches any lower case character.  See + character class names.

+
Collating Elements:
+

An expression of the form [[.col.] matches the collating element col.  + A collating element is any single character, or any sequence of characters that + collates as a single unit.  Collating elements may also be used as the end + point of a range, for example: [[.ae.]-c] matches the character sequence "ae", + plus any single character in the range "ae"-c, assuming that "ae" is treated + as a single collating element in the current locale.

+

As an extension, a collating element may also be specified via it's + symbolic name, for example:

+

[[.NUL.]]

+

matches a NUL character.

+
Equivalence classes:
+

+ An expression of theform[[=col=]], matches any character or collating element + whose primary sort key is the same as that for collating element col, + as with colating elements the name col may be a + symbolic name.  A primary sort key is one that ignores case, + accentation, or locale-specific tailorings; so for example [[=a=]] matches any + of the characters: a, à, á, â, ã, ä, å, A, À, Á, Â, Ã, Ä and Å.  + Unfortunately implementation of this is reliant on the platform's collation and + localisation support; this feature can not be relied upon to work portably + across all platforms, or even all locales on one platform.

+
Escapes:
+

All the escape sequences that match a single character, or a single character + class are permitted within a character class definition, except the + negated character classes (\D \W etc).

+
+
Combinations:
+

All of the above can be combined in one character set declaration, for example: + [[:digit:]a-c[.NUL.]].

+

Operator precedence

+

 The order of precedence for of operators is as shown in the following + table:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Collation-related bracket symbols[==] [::] [..]
Escaped characters + \
Character set (bracket expression) + []
Grouping()
Single-character-ERE duplication + * + ? {m,n}
Concatenation
Anchoring^$
Alternation|
+

+

Escapes

+

Any special character preceded by an escape shall match itself. +

+

The following escape sequences are also supported:

+
+
Escapes matching a specific character
+

The following escape sequences are all synonyms for single characters:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EscapeCharacter
\a'\a'
\e0x1B
\f\f
\n\n
\r\r
\t\t
\v\v
\b\b (but only inside a character class declaration).
\cXAn ASCII escape sequence - the character whose code point is X % 32
\xddA hexadecimal escape sequence - matches the single character whose code point + is 0xdd.
\x{dddd}A hexadecimal escape sequence - matches the single character whose code point + is 0xdddd.
\0dddAn octal escape sequence - matches the single character whose code point is + 0ddd.
+

+
"Single character" character classes:
+

Any escaped character x, if x is the name of a character + class shall match any character that is a member of that class, and any escaped + character X, if x is the name of a character class, shall + match any character not in that class.

+

The following are supported by default:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Escape sequenceEquivalent to
\d[[:digit:]]
\l[[:lower:]]
\s[[:space:]]
\u[[:upper:]]
\w[[:word:]]
\D[^[:digit:]]
\L[^[:lower:]]
\S[^[:space:]]
\U[^[:upper:]]
\W[^[:word:]]
+

+
Word Boundaries
+

The following escape sequences match the boundaries of words:

+

+ + + + + + + + + + + + + + + + + +
\<Matches the start of a word.
\>Matches the end of a word.
\bMatches a word boundary (the start or end of a word).
\BMatches only when not at a word boundary.
+

+
Buffer boundaries
+

The following match only at buffer boundaries: a "buffer" in this context is + the whole of the input text that is being matched against (note that ^ and + $ may match embedded newlines within the text).

+

+ + + + + + + + + + + + + + + + + + + + + +
\`Matches at the start of a buffer only.
\'Matches at the end of a buffer only.
\AMatches at the start of a buffer only (the same as \`).
\zMatches at the end of a buffer only (the same as \').
\ZMatches an optional sequence of newlines at the end of a buffer: equivalent to + the regular expression \n*\z
+

+
Continuation Escape
+

The sequence \G matches only at the end of the last match found, or at the + start of the text being matched if no previous match was found.  This + escape useful if you're iterating over the matches contained within a text, and + you want each subsequence match to start where the last one ended.

+
Quoting escape
+

The escape sequence \Q begins a "quoted sequence": all the subsequent + characters are treated as literals, until either the end of the regular + expression or \E is found.  For example the expression: \Q\*+\Ea+ would + match either of:

+
\*+a
\*+aaa
+
Unicode escapes
+

+ + + + + + + + + +
\CMatches a single code point: in Boost regex this has exactly the same effect + as a "." operator.
\XMatches a combining character sequence: that is any non-combining character + followed by a sequence of zero or more combining characters.
+

+
Any other escape
+

Any other escape sequence matches the character that is escaped, for example \@ + matches a literal '@'.

+
+

Perl Extended Patterns

+

Perl-specific extensions to the regular expression syntax all start + with (?.

+
+
Comments
+

(?# ... ) is treated as a comment, it's contents are ignored.

+
Modifiers
+

(?imsx-imsx ... ) alters which of the perl modifiers are in effect + within the pattern, changes take effect from the point that the block is first + seen and extend to any enclosing ).  Letters before a '-' turn that perl + modifier on, letters afterward, turn it off.

+

(?imsx-imsx:pattern) applies the specified modifiers to pattern + only.

+
Non-marking grouping
+

(?:pattern) lexically groups pattern, without generating an + additional sub-expression.

+
Lookahead
+

(?=pattern) consumes zero characters, only if pattern matches.

+

(?!pattern) consumes zero characters, only if pattern does + not match.

+
Lookbehind
+

(?<=pattern) consumes zero characters, only if pattern could + be matched against the characters preceding the current position (pattern + must be of fixed length).

+

(?<!pattern) consumes zero characters, only if pattern could + not be matched against the characters preceding the current position (pattern + must be of fixed length).

+
Independent sub-expressions
+

(?>pattern) pattern is matched independently of the + surrounding patterns, the expression will never backtrack into pattern.

+
Conditional Expressions
+

(?(condition)yes-pattern|no-pattern) attempts to match yes-pattern + if the condition is true, otherwise attempts to match no-pattern.

+

(?(condition)yes-pattern) attempts to match yes-pattern if + the condition is true, otherwise fails.

+

Condition may be either a forward lookahead assert, or the + index of a marked sub-expression (the condition becomes true if the + sub-expression has been matched).

+
+

Variations

+

The options normal, ECMAScript, JavaScript + and JScript are all synonyms for Perl.

+

Options

+

There are a variety of flags that + may be combined with the Perl option when constructing the regular + expression, in particular note that the newline_alt + option alters the syntax, while the collate, + nosubs and icase options modify how the case and locale sensitivity + are to be applied.

+

Modifiers

+

The perl smix modifiers can either be applied using a (?smix-smix) + prefix to the regular expression, or with one of the regex-compile time flags + no_mod_m, mod_x, mod_s, and no_mod_s. +

+

References

+

Perl 5.6.

+
+

+

Revised  + + 21 Aug 2004  +

+

© Copyright John Maddock 2004

+ +

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt).

+
+ + + diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index ed3d06b9..b8ae7b29 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -51,6 +51,7 @@ public: bool parse_inner_set(basic_char_set& char_set); bool parse_QE(); bool parse_perl_extension(); + bool add_emacs_code(bool negate); digraph get_next_set_literal(basic_char_set& char_set); charT unescape_character(); regex_constants::syntax_option_type parse_options(); @@ -183,6 +184,22 @@ bool basic_regex_parser::parse_basic() ++m_position; return parse_repeat(); } + case regex_constants::syntax_plus: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(1); + } + case regex_constants::syntax_question: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(0, 1); + } case regex_constants::syntax_open_set: return parse_set(); default: @@ -301,7 +318,10 @@ bool basic_regex_parser::parse_open_paren() // // begin by checking for a perl-style (?...) extension: // - if((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0) + if( + ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0) + || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + ) { if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) return parse_perl_extension(); @@ -377,7 +397,7 @@ template bool basic_regex_parser::parse_basic_escape() { ++m_position; - bool result; + bool result = true; switch(this->m_traits.escape_syntax_type(*m_position)) { case regex_constants::syntax_open_mark: @@ -418,7 +438,97 @@ bool basic_regex_parser::parse_basic_escape() break; case regex_constants::syntax_digit: return parse_backref(); + case regex_constants::escape_type_start_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_end_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_end); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_boundary); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_not_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_within_word); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_left_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_right_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_end); + } + else + result = parse_literal(); + break; default: + if(this->flags() & regbase::emacs_ex) + { + bool negate = true; + switch(*m_position) + { + case 'w': + negate = false; + // fall through: + case 'W': + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(this->m_word_mask); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + case 's': + negate = false; + // fall through: + case 'S': + return add_emacs_code(negate); + case 'c': + case 'C': + // not supported yet: + fail(regex_constants::error_escape, m_position - m_base); + return false; + default: + break; + } + } result = parse_literal(); break; } @@ -447,7 +557,7 @@ bool basic_regex_parser::parse_extended_escape() char_set.add_class(m); if(0 == this->append_set(char_set)) { - fail(regex_constants::error_range, m_position - m_base); + fail(regex_constants::error_ctype, m_position - m_base); return false; } ++m_position; @@ -533,7 +643,11 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ // when we get to here we may have a non-greedy ? mark still to come: // if((m_position != m_end) - && (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))) + && ( + (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex))) + ) + ) { // OK we have a perl regex, check for a '?': if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) @@ -1622,6 +1736,85 @@ bool basic_regex_parser::parse_perl_extension() return true; } +template +bool basic_regex_parser::add_emacs_code(bool negate) +{ + // + // parses an emacs style \sx or \Sx construct. + // + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + basic_char_set char_set; + if(negate) + char_set.negate(); + + static const charT s_punct[] = { 'p', 'u', 'n', 'c', 't', }; + + switch(*m_position) + { + case 's': + case ' ': + char_set.add_class(this->m_mask_space); + break; + case 'w': + char_set.add_class(this->m_word_mask); + break; + case '_': + char_set.add_single(digraph(charT('$'))); + char_set.add_single(digraph(charT('&'))); + char_set.add_single(digraph(charT('*'))); + char_set.add_single(digraph(charT('+'))); + char_set.add_single(digraph(charT('-'))); + char_set.add_single(digraph(charT('_'))); + char_set.add_single(digraph(charT('<'))); + char_set.add_single(digraph(charT('>'))); + break; + case '.': + char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5)); + break; + case '(': + char_set.add_single(digraph(charT('('))); + char_set.add_single(digraph(charT('['))); + char_set.add_single(digraph(charT('{'))); + break; + case ')': + char_set.add_single(digraph(charT(')'))); + char_set.add_single(digraph(charT(']'))); + char_set.add_single(digraph(charT('}'))); + break; + case '"': + char_set.add_single(digraph(charT('"'))); + char_set.add_single(digraph(charT('\''))); + char_set.add_single(digraph(charT('`'))); + break; + case '\'': + char_set.add_single(digraph(charT('\''))); + char_set.add_single(digraph(charT(','))); + char_set.add_single(digraph(charT('#'))); + break; + case '<': + char_set.add_single(digraph(charT(';'))); + break; + case '>': + char_set.add_single(digraph(charT('\n'))); + char_set.add_single(digraph(charT('\f'))); + break; + default: + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; +} + template regex_constants::syntax_option_type basic_regex_parser::parse_options() { diff --git a/include/boost/regex/v4/cpp_regex_traits.hpp b/include/boost/regex/v4/cpp_regex_traits.hpp index b3df768f..56004d86 100644 --- a/include/boost/regex/v4/cpp_regex_traits.hpp +++ b/include/boost/regex/v4/cpp_regex_traits.hpp @@ -30,11 +30,14 @@ #include #endif #ifdef BOOST_HAS_THREADS -#include +#include #endif #ifndef BOOST_REGEX_PRIMARY_TRANSFORM #include #endif +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#include +#endif #ifdef BOOST_HAS_ABI_HEADERS # include BOOST_ABI_PREFIX @@ -165,6 +168,25 @@ struct cpp_regex_traits_base std::messages const* m_pmessages; #endif std::collate const* m_pcollate; + + bool operator<(const cpp_regex_traits_base& b)const + { + if(m_pctype == b.m_pctype) + { + if(m_pmessages == b.m_pmessages) + { + return m_pcollate < b.m_pcollate; + } + return m_pmessages < b.m_pmessages; + } + return m_pctype < b.m_pctype; + } + bool operator==(const cpp_regex_traits_base& b)const + { + return (m_pctype == b.m_pctype) + && (m_pmessages == b.m_pmessages) + && (m_pcollate == b.m_pcollate); + } }; template @@ -191,7 +213,17 @@ class cpp_regex_traits_char_layer : public cpp_regex_traits_base typedef std::map map_type; typedef typename map_type::const_iterator map_iterator_type; public: - cpp_regex_traits_char_layer(const std::locale& l); + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base& b) + : cpp_regex_traits_base(b) + { + init(); + } + void init(); regex_constants::syntax_type syntax_type(charT c)const { @@ -217,8 +249,7 @@ private: }; template -cpp_regex_traits_char_layer::cpp_regex_traits_char_layer(const std::locale& l) - : cpp_regex_traits_base(l) +void cpp_regex_traits_char_layer::init() { // we need to start by initialising our syntax map so we know which // character is used for which purpose: @@ -307,6 +338,11 @@ public: { init(); } + cpp_regex_traits_char_layer(const cpp_regex_traits_base& l) + : cpp_regex_traits_base(l) + { + init(); + } regex_constants::syntax_type syntax_type(char c)const { @@ -393,7 +429,16 @@ public: typedef std::basic_string string_type; typedef charT char_type; //cpp_regex_traits_implementation(); - cpp_regex_traits_implementation(const std::locale& l); + cpp_regex_traits_implementation(const std::locale& l) + : cpp_regex_traits_char_layer(l), m_is(&m_sbuf) + { + init(); + } + cpp_regex_traits_implementation(const cpp_regex_traits_base& l) + : cpp_regex_traits_char_layer(l), m_is(&m_sbuf) + { + init(); + } std::string error_string(regex_constants::error_type n) const { if(!m_error_strings.empty()) @@ -429,6 +474,7 @@ private: // helpers: // char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; + void init(); #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET public: bool isctype(charT c, char_class_type m)const; @@ -605,8 +651,7 @@ typename cpp_regex_traits_implementation::string_type } template -cpp_regex_traits_implementation::cpp_regex_traits_implementation(const std::locale& l) -: cpp_regex_traits_char_layer(l), m_is(&m_sbuf) +void cpp_regex_traits_implementation::init() { #ifndef BOOST_NO_STD_MESSAGES #ifndef __IBMCPP__ @@ -798,8 +843,8 @@ bool cpp_regex_traits_implementation::isctype(const charT c, char_class_t template boost::shared_ptr > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) { - // TODO: create a cache for previously constructed objects. - return boost::shared_ptr >(new cpp_regex_traits_implementation(l)); + cpp_regex_traits_base key(l); + return ::boost::object_cache, cpp_regex_traits_implementation >::get(key, 5); } } // re_detail diff --git a/include/boost/regex/v4/instances.hpp b/include/boost/regex/v4/instances.hpp index 762e39be..7ab53716 100644 --- a/include/boost/regex/v4/instances.hpp +++ b/include/boost/regex/v4/instances.hpp @@ -57,7 +57,7 @@ template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >; # include BOOST_ABI_SUFFIX #endif -#elif defined(BOOST_MSVC) +#elif defined(BOOST_MSVC) || defined(BOOST_INTEL) || defined(__GNUC__) # ifndef BOOST_REGEX_INSTANTIATE # define template extern template @@ -69,6 +69,8 @@ template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >; # endif template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >; +template class BOOST_REGEX_DECL match_results< const BOOST_REGEX_CHAR_T* >; +template class BOOST_REGEX_DECL match_results< std::basic_string::const_iterator >; # ifdef BOOST_MSVC # pragma warning(pop) diff --git a/include/boost/regex/v4/match_results.hpp b/include/boost/regex/v4/match_results.hpp index 8bdefac1..eef5f587 100644 --- a/include/boost/regex/v4/match_results.hpp +++ b/include/boost/regex/v4/match_results.hpp @@ -71,7 +71,7 @@ public: // size: size_type size() const - { return m_subs.size() - 2; } + { return empty() ? 0 : m_subs.size() - 2; } size_type max_size() const { return m_subs.max_size(); } bool empty() const @@ -235,7 +235,7 @@ public: size_type len = m_subs.size(); if(len > n + 2) { - m_subs.erase(m_subs.begin()+n+2); + m_subs.erase(m_subs.begin()+n+2, m_subs.end()); std::fill(m_subs.begin(), m_subs.end(), v); } else diff --git a/include/boost/regex/v4/mem_block_cache.hpp b/include/boost/regex/v4/mem_block_cache.hpp index a021943a..d9cba994 100644 --- a/include/boost/regex/v4/mem_block_cache.hpp +++ b/include/boost/regex/v4/mem_block_cache.hpp @@ -20,7 +20,7 @@ #include #ifdef BOOST_HAS_THREADS -#include +#include #endif #ifdef BOOST_HAS_ABI_HEADERS diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 74c41da8..5d30e83f 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -54,7 +54,9 @@ perl_matcher::perl_matcher(BidiIterator first, estimate_max_state_count(static_cast(0)); if(!(m_match_flags & (match_perl|match_posix))) { - if((re.flags() & regbase::no_perl_ex) == 0) + if((re.flags() & (regbase::main_option_type|regbase::no_perl_ex)) == 0) + m_match_flags |= match_perl; + else if((re.flags() & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) m_match_flags |= match_perl; else m_match_flags |= match_posix; @@ -80,15 +82,17 @@ perl_matcher::perl_matcher(BidiIterator first, template void perl_matcher::estimate_max_state_count(std::random_access_iterator_tag*) { + static const difference_type k = 100000; difference_type dist = boost::re_detail::distance(base, last); traits_size_type states = static_cast(re.size()); states *= states; - difference_type lim = (std::numeric_limits::max)() - 100000 - states; - if(dist > (difference_type)(lim / states)) - max_state_count = lim; + difference_type lim = ((std::numeric_limits::max)() - k) / states; + if(dist >= lim) + max_state_count = (std::numeric_limits::max)(); else - max_state_count = 100000 + states * dist; + max_state_count = k + states * dist; } + template void perl_matcher::estimate_max_state_count(void*) { diff --git a/include/boost/regex/v4/regbase.hpp b/include/boost/regex/v4/regbase.hpp index 0db47417..7e62eab7 100644 --- a/include/boost/regex/v4/regbase.hpp +++ b/include/boost/regex/v4/regbase.hpp @@ -64,6 +64,7 @@ public: no_intervals = 1 << 9, // {x,y} not allowed bk_plus_qm = 1 << 10, // uses \+ and \? bk_vbar = 1 << 11, // use \| for alternatives + emacs_ex = 1 << 12, // enables emacs extensions // // options common to all groups: @@ -83,7 +84,7 @@ public: basic = basic_syntax_group | collate | no_escape_in_lists, extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists, normal = 0, - emacs = basic | no_char_classes | no_intervals, + emacs = basic_syntax_group | collate | emacs_ex | bk_vbar, awk = no_bk_refs | collate | no_perl_ex, grep = basic | newline_alt, egrep = extended | newline_alt, diff --git a/include/boost/regex/v4/regex.hpp b/include/boost/regex/v4/regex.hpp index e88461dc..9a879a3c 100644 --- a/include/boost/regex/v4/regex.hpp +++ b/include/boost/regex/v4/regex.hpp @@ -81,6 +81,12 @@ #ifndef BOOST_REGEX_V4_MATCH_RESULTS_HPP #include #endif +#ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP +#include +#endif +#ifndef BOOST_REGEX_MATCHER_HPP +#include +#endif // // template instances: @@ -136,12 +142,6 @@ typedef match_results wsmatch; #endif } // namespace boost -#ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP -#include -#endif -#ifndef BOOST_REGEX_MATCHER_HPP -#include -#endif #ifndef BOOST_REGEX_MATCH_HPP #include #endif diff --git a/include/boost/regex/v4/regex_traits.hpp b/include/boost/regex/v4/regex_traits.hpp index 4d9b50f3..044192d5 100644 --- a/include/boost/regex/v4/regex_traits.hpp +++ b/include/boost/regex/v4/regex_traits.hpp @@ -53,7 +53,7 @@ #include #endif -#include "boost/mpl/aux_/has_xxx.hpp" +#include "boost/mpl/has_xxx.hpp" #include #ifdef BOOST_HAS_ABI_HEADERS diff --git a/include/boost/regex/v4/w32_regex_traits.hpp b/include/boost/regex/v4/w32_regex_traits.hpp index cdbfbe5e..c9c42dea 100644 --- a/include/boost/regex/v4/w32_regex_traits.hpp +++ b/include/boost/regex/v4/w32_regex_traits.hpp @@ -23,11 +23,14 @@ #include #endif #ifdef BOOST_HAS_THREADS -#include +#include #endif #ifndef BOOST_REGEX_PRIMARY_TRANSFORM #include #endif +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#include +#endif #ifdef BOOST_HAS_ABI_HEADERS # include BOOST_ABI_PREFIX @@ -534,7 +537,7 @@ template boost::shared_ptr > create_w32_regex_traits(::boost::re_detail::lcid_type l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) { // TODO: create a cache for previously constructed objects. - return boost::shared_ptr >(new w32_regex_traits_implementation(l)); + return boost::object_cache< ::boost::re_detail::lcid_type, w32_regex_traits_implementation >::get(l, 5); } } // re_detail diff --git a/performance/Jamfile b/performance/Jamfile index 6be58f0a..48a395ff 100644 --- a/performance/Jamfile +++ b/performance/Jamfile @@ -2,7 +2,7 @@ subproject libs/regex/performance ; -SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_posix time_safe_greta ; +SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_dynamic_xpressive time_posix time_safe_greta ; if $(HS_REGEX_PATH) { @@ -42,3 +42,4 @@ exe regex_comparison : + diff --git a/performance/command_line.cpp b/performance/command_line.cpp index f444d63d..79b90a19 100644 --- a/performance/command_line.cpp +++ b/performance/command_line.cpp @@ -33,6 +33,7 @@ bool time_greta = false; bool time_safe_greta = false; bool time_posix = false; bool time_pcre = false; +bool time_xpressive = false; bool test_matches = false; bool test_code = false; @@ -79,6 +80,10 @@ int handle_argument(const std::string& what) #ifdef BOOST_HAS_PCRE else if(what == "-pcre") time_pcre = true; +#endif +#ifdef BOOST_HAS_XPRESSIVE + else if(what == "-xpressive") + time_xpressive = true; #endif else if(what == "-all") { diff --git a/performance/main.cpp b/performance/main.cpp index 33280a9a..486cb8da 100644 --- a/performance/main.cpp +++ b/performance/main.cpp @@ -65,6 +65,14 @@ void test_match(const std::string& re, const std::string& text, const std::strin r.pcre_time = time; std::cout << "\tPCRE regex: " << time << "s\n"; } +#endif +#ifdef BOOST_HAS_XPRESSIVE + if(time_xpressive == true) + { + time = dxpr::time_match(re, text, icase); + r.xpressive_time = time; + std::cout << "\txpressive regex: " << time << "s\n"; + } #endif r.finalise(); result_list.push_back(r); @@ -118,6 +126,14 @@ void test_find_all(const std::string& re, const std::string& text, const std::st r.pcre_time = time; std::cout << "\tPCRE regex: " << time << "s\n"; } +#endif +#ifdef BOOST_HAS_XPRESSIVE + if(time_xpressive == true) + { + time = dxpr::time_find_all(re, text, icase); + r.xpressive_time = time; + std::cout << "\txpressive regex: " << time << "s\n"; + } #endif r.finalise(); result_list.push_back(r); diff --git a/performance/regex_comparison.hpp b/performance/regex_comparison.hpp index 5e309ff4..a16dc138 100644 --- a/performance/regex_comparison.hpp +++ b/performance/regex_comparison.hpp @@ -26,6 +26,7 @@ extern bool time_greta; extern bool time_safe_greta; extern bool time_posix; extern bool time_pcre; +extern bool time_xpressive; extern bool test_matches; extern bool test_short_twain; @@ -53,6 +54,7 @@ struct results double safe_greta_time; double posix_time; double pcre_time; + double xpressive_time; double factor; std::string expression; std::string description; @@ -63,6 +65,7 @@ struct results safe_greta_time(-1), posix_time(-1), pcre_time(-1), + xpressive_time(-1), factor(std::numeric_limits::max()), expression(ex), description(desc) @@ -81,6 +84,8 @@ struct results factor = posix_time; if((pcre_time >= 0) && (pcre_time < factor)) factor = pcre_time; + if((xpressive_time >= 0) && (xpressive_time < factor)) + factor = xpressive_time; } }; @@ -123,6 +128,12 @@ double time_match(const std::string& re, const std::string& text, bool icase); double time_find_all(const std::string& re, const std::string& text, bool icase); } +namespace dxpr { +// xpressive tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); +} + void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase = false); void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase = false); inline void test_match(const std::string& re, const std::string& text, bool icase = false) diff --git a/performance/time_dynamic_xpressive.cpp b/performance/time_dynamic_xpressive.cpp new file mode 100644 index 00000000..cf81ec63 --- /dev/null +++ b/performance/time_dynamic_xpressive.cpp @@ -0,0 +1,129 @@ +/* +* +* Copyright (c) 2002 +* Dr John Maddock +* +* All rights reserved. +* May not be transfered or disclosed to a third party without +* prior consent of the author. +* +*/ + +#include "regex_comparison.hpp" + +#ifdef BOOST_HAS_XPRESSIVE +#include +#include +#include + +namespace dxpr +{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + boost::xpressive::sregex e; + e = (icase ? + boost::xpressive::sregex(boost::xpressive::sregex::compile(re)) + : boost::xpressive::sregex(boost::xpressive::sregex::compile(re, boost::xpressive::regex_constants::icase))); + boost::xpressive::smatch what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + assert(boost::xpressive::regex_match( text, what, e )); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::xpressive::regex_match( text, what, e ); + } + result = tim.elapsed(); + iter *= 2; + } while(result < 0.5); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::xpressive::regex_match( text, what, e ); + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + return result / iter; +} + +struct noop +{ + void operator()( boost::xpressive::smatch const & ) const + { + } +}; + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + boost::xpressive::sregex e; + e = (icase ? + boost::xpressive::sregex(boost::xpressive::sregex::compile(re)) + : boost::xpressive::sregex(boost::xpressive::sregex::compile(re, boost::xpressive::regex_constants::icase))); + boost::xpressive::smatch what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::xpressive::sregex_iterator begin( text.begin(), text.end(), e ), end; + std::for_each( begin, end, noop() ); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + if(result >10) + return result / iter; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::xpressive::sregex_iterator begin( text.begin(), text.end(), e ), end; + std::for_each( begin, end, noop() ); + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + return result / iter; +} + +} + +#else + +namespace dxpr{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} + +} +#endif + + diff --git a/performance/time_posix.cpp b/performance/time_posix.cpp index 6972ec8c..54e5cb98 100644 --- a/performance/time_posix.cpp +++ b/performance/time_posix.cpp @@ -27,7 +27,7 @@ double time_match(const std::string& re, const std::string& text, bool icase) int counter, repeats; double result = 0; double run; - if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED))) + if(0 != ::regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED))) return -1; do { diff --git a/src/static_mutex.cpp b/src/static_mutex.cpp index 65fa20d5..03e292c1 100644 --- a/src/static_mutex.cpp +++ b/src/static_mutex.cpp @@ -21,7 +21,7 @@ #ifdef BOOST_HAS_THREADS -#include +#include #if defined(BOOST_HAS_WINTHREADS) #define NOMINMAX diff --git a/test/Jamfile b/test/Jamfile index d24f08a0..a7173d61 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -14,6 +14,7 @@ test_anchors.cpp test_asserts.cpp test_backrefs.cpp test_deprecated.cpp +test_emacs.cpp test_escapes.cpp test_grep.cpp test_locale.cpp @@ -163,3 +164,4 @@ test-suite regex + diff --git a/test/object_cache/object_cache_test.cpp b/test/object_cache/object_cache_test.cpp new file mode 100644 index 00000000..3b82c3ff --- /dev/null +++ b/test/object_cache/object_cache_test.cpp @@ -0,0 +1,75 @@ +/* + * + * Copyright (c) 2004 + * Dr John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE object_cache_test.cpp + * VERSION see + * DESCRIPTION: Test code for a generic object cache. + */ +#include +#include + +class test_object +{ +public: + test_object(int i) + : m_value(i) + { + ++s_count; + } + int value()const + { + return m_value; + } + static int count() + { + return s_count; + } +private: + int m_value; + static int s_count; +}; + +int test_object::s_count = 0; + +static const int max_cache_size = 5; + +int test_main(int /*argc*/, char * /*argv*/[]) +{ + int i; + for(i = 0; i < 20; ++i) + { + boost::shared_ptr p = boost::object_cache::get(i, max_cache_size); + BOOST_TEST(p->value() == i); + p = boost::object_cache::get(i, max_cache_size); + BOOST_TEST(p->value() == i); + if(i) + { + p = boost::object_cache::get(i-1, max_cache_size); + BOOST_TEST(p->value() == i-1); + } + } + int current_count = test_object::count(); + for(int j = 0; j < 10; ++j) + { + for(i = 20 - max_cache_size; i < 20; ++i) + { + boost::shared_ptr p = boost::object_cache::get(i, max_cache_size); + BOOST_TEST(p->value() == i); + p = boost::object_cache::get(i, max_cache_size); + BOOST_TEST(p->value() == i); + } + } + BOOST_TEST(current_count == test_object::count()); + return 0; +} + diff --git a/test/regress/main.cpp b/test/regress/main.cpp index 509dbeaf..c14b425f 100644 --- a/test/regress/main.cpp +++ b/test/regress/main.cpp @@ -49,6 +49,9 @@ int cpp_main(int /*argc*/, char * /*argv*/[]) test_options(); test_options2(); test_en_locale(); + test_emacs(); + test_operators(); + test_overloads(); return error_count; } diff --git a/test/regress/test.hpp b/test/regress/test.hpp index f3ae6b72..944e4b32 100644 --- a/test/regress/test.hpp +++ b/test/regress/test.hpp @@ -208,5 +208,8 @@ void test_conditionals(); void test_options(); void test_options2(); void test_en_locale(); +void test_emacs(); +void test_operators(); +void test_overloads(); #endif diff --git a/test/regress/test_emacs.cpp b/test/regress/test_emacs.cpp new file mode 100644 index 00000000..3b125731 --- /dev/null +++ b/test/regress/test_emacs.cpp @@ -0,0 +1,157 @@ +/* + * + * Copyright (c) 2004 + * Dr John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#include "test.hpp" + +#ifdef BOOST_MSVC +#pragma warning(disable:4127) +#endif + +void test_emacs() +{ + using namespace boost::regex_constants; + // now try operator + : + TEST_REGEX_SEARCH("ab+", emacs, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("ab+", emacs, "ab", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("ab+", emacs, "sssabbbbbbsss", match_default, make_array(3, 10, -2, -2)); + TEST_REGEX_SEARCH("ab+c+", emacs, "abbb", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("ab+c+", emacs, "accc", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("ab+c+", emacs, "abbcc", match_default, make_array(0, 5, -2, -2)); + TEST_INVALID_REGEX("\\<+", emacs); + TEST_INVALID_REGEX("\\>+", emacs); + TEST_REGEX_SEARCH("\n+", emacs, "\n\n", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("\\+", emacs, "+", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\+", emacs, "++", match_default, make_array(0, 1, -2, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("\\++", emacs, "++", match_default, make_array(0, 2, -2, -2)); + + // now try operator ? + TEST_REGEX_SEARCH("a?", emacs, "b", match_default, make_array(0, 0, -2, 1, 1, -2, -2)); + TEST_REGEX_SEARCH("ab?", emacs, "a", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("ab?", emacs, "ab", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("ab?", emacs, "sssabbbbbbsss", match_default, make_array(3, 5, -2, -2)); + TEST_REGEX_SEARCH("ab?c?", emacs, "a", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("ab?c?", emacs, "abbb", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("ab?c?", emacs, "accc", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("ab?c?", emacs, "abcc", match_default, make_array(0, 3, -2, -2)); + TEST_INVALID_REGEX("\\?", emacs); + TEST_REGEX_SEARCH("\n?", emacs, "\n\n", match_default, make_array(0, 1, -2, 1, 2, -2, 2, 2, -2, -2)); + TEST_REGEX_SEARCH("\\?", emacs, "?", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\?", emacs, "?", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\??", emacs, "??", match_default, make_array(0, 1, -2, 1, 2, -2, 2, 2, -2, -2)); + + TEST_REGEX_SEARCH("a*?", emacs, "aa", match_default, make_array(0, 0, -2, 0, 1, -2, 1, 1, -2, 1, 2, -2, 2, 2, -2, -2)); + TEST_REGEX_SEARCH("^a*?$", emacs, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("^.*?$", emacs, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("^\\(a\\)*?$", emacs, "aa", match_default, make_array(0, 2, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("^[ab]*?$", emacs, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a??", emacs, "aa", match_default, make_array(0, 0, -2, 0, 1, -2, 1, 1, -2, 1, 2, -2, 2, 2, -2, -2)); + TEST_REGEX_SEARCH("a+?", emacs, "aa", match_default, make_array(0, 1, -2, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\{1,3\\}?", emacs, "aaa", match_default, make_array(0, 1, -2, 1, 2, -2, 2, 3, -2, -2)); + TEST_REGEX_SEARCH("\\w+?w", emacs, "...ccccccwcccccw", match_default, make_array(3, 10, -2, 10, 16, -2, -2)); + TEST_REGEX_SEARCH("\\W+\\w+?w", emacs, "...ccccccwcccccw", match_default, make_array(0, 10, -2, -2)); + TEST_REGEX_SEARCH("abc\\|\\w+?", emacs, "abd", match_default, make_array(0, 1, -2, 1, 2, -2, 2, 3, -2, -2)); + TEST_REGEX_SEARCH("abc\\|\\w+?", emacs, "abcd", match_default, make_array(0, 3, -2, 3, 4, -2, -2)); + TEST_REGEX_SEARCH("<\\ss*tag[^>]*>\\(.*?\\)<\\ss*/tag\\ss*>", emacs, " here is some text ", match_default, make_array(1, 29, 6, 23, -2, 30, 41, 35, 35, -2, -2)); + TEST_REGEX_SEARCH("<\\ss*tag[^>]*>\\(.*?\\)<\\ss*/tag\\ss*>", emacs, " < tag attr=\"something\">here is some text< /tag > ", match_default, make_array(1, 49, 24, 41, -2, 50, 61, 55, 55, -2, -2)); + TEST_INVALID_REGEX("a\\{1,3\\}\\{1\\}", emacs); + TEST_INVALID_REGEX("a**", emacs); + TEST_INVALID_REGEX("a++", emacs); + + TEST_REGEX_SEARCH("\\", emacs, "abc", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("abc\\>", emacs, "abcd", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("abc\\>", emacs, "abc\n", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("abc\\>", emacs, "abc::", match_default, make_array(0,3, -2, -2)); + TEST_REGEX_SEARCH("abc\\(?:\\>..\\|$\\)", emacs, "abc::", match_default, make_array(0, 5, -2, -2)); + TEST_REGEX_SEARCH("\\>", emacs, " ", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH(".\\>.", emacs, " ", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("abc\\>", emacs, "abc", match_default|match_not_eow, make_array(-2, -2)); + // word boundary: + TEST_REGEX_SEARCH("\\babcd", emacs, " abcd", match_default, make_array(2, 6, -2, -2)); + TEST_REGEX_SEARCH("\\bab", emacs, "cab", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("\\bab", emacs, "\nab", match_default, make_array(1, 3, -2, -2)); + TEST_REGEX_SEARCH("\\btag", emacs, "::tag", match_default, make_array(2, 5, -2, -2)); + TEST_REGEX_SEARCH("abc\\b", emacs, "abc", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("abc\\b", emacs, "abcd", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("abc\\b", emacs, "abc\n", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("abc\\b", emacs, "abc::", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("\\babcd", emacs, "abcd", match_default|match_not_bow, make_array(-2, -2)); + // within word: + TEST_REGEX_SEARCH("\\B", emacs, "ab", match_default, make_array(1, 1, -2, -2)); + TEST_REGEX_SEARCH("a\\Bb", emacs, "ab", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\B", emacs, "ab", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("a\\B", emacs, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a\\B", emacs, "a ", match_default, make_array(-2, -2)); + // buffer operators: + TEST_REGEX_SEARCH("\\`abc", emacs, "abc", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("\\`abc", emacs, "\nabc", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("\\`abc", emacs, " abc", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("abc\\'", emacs, "abc", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("abc\\'", emacs, "abc\n", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("abc\\'", emacs, "abc ", match_default, make_array(-2, -2)); + + TEST_REGEX_SEARCH("a\\|b", emacs, "a", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("a\\|b", emacs, "b", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("a\\|b\\|c", emacs, "c", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("a\\|\\(b\\)\\|.", emacs, "b", match_default, make_array(0, 1, 0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\(a\\)\\|b\\|.", emacs, "a", match_default, make_array(0, 1, 0, 1, -2, -2)); + TEST_REGEX_SEARCH("a\\(b\\|c\\)", emacs, "ab", match_default, make_array(0, 2, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\(b\\|c\\)", emacs, "ac", match_default, make_array(0, 2, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\(b\\|c\\)", emacs, "ad", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("\\(a\\|b\\|c\\)", emacs, "c", match_default, make_array(0, 1, 0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\(a\\|\\(b\\)\\|.\\)", emacs, "b", match_default, make_array(0, 1, 0, 1, 0, 1, -2, -2)); + TEST_INVALID_REGEX("\\|c", emacs); + TEST_INVALID_REGEX("c\\|", emacs); + TEST_INVALID_REGEX("\\(\\|\\)", emacs); + TEST_INVALID_REGEX("\\(a\\|\\)", emacs); + TEST_INVALID_REGEX("\\(\\|a\\)", emacs); + + TEST_REGEX_SEARCH("\\(?:abc\\)+", emacs, "xxabcabcxx", match_default, make_array(2, 8, -2, -2)); + TEST_REGEX_SEARCH("\\(?:a+\\)\\(b+\\)", emacs, "xaaabbbx", match_default, make_array(1, 7, 4, 7, -2, -2)); + TEST_REGEX_SEARCH("\\(a+\\)\\(?:b+\\)", emacs, "xaaabbba", match_default, make_array(1, 7, 1, 4, -2, -2)); + TEST_REGEX_SEARCH("\\(?:\\(a+\\)b+\\)", emacs, "xaaabbba", match_default, make_array(1, 7, 1, 4, -2, -2)); + TEST_REGEX_SEARCH("\\(?:a+\\(b+\\)\\)", emacs, "xaaabbba", match_default, make_array(1, 7, 4, 7, -2, -2)); + TEST_REGEX_SEARCH("a+\\(?#b+\\)b+", emacs, "xaaabbba", match_default, make_array(1, 7, -2, -2)); + TEST_REGEX_SEARCH("\\(a\\)\\(?:b\\|$\\)", emacs, "ab", match_default, make_array(0, 2, 0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\(a\\)\\(?:b\\|$\\)", emacs, "a", match_default, make_array(0, 1, 0, 1, -2, -2)); + + TEST_REGEX_SEARCH("\\ss+", emacs, "a b", match_default, make_array(1, 3, -2, -2)); + TEST_REGEX_SEARCH("\\Ss+", emacs, " ab ", match_default, make_array(1, 3, -2, -2)); + TEST_REGEX_SEARCH("\\sw+", emacs, " ab ", match_default, make_array(1, 3, -2, -2)); + TEST_REGEX_SEARCH("\\Sw+", emacs, "a b", match_default, make_array(1, 3, -2, -2)); + TEST_REGEX_SEARCH("\\s_+", emacs, " $&*+-_<> ", match_default, make_array(1, 9, -2, -2)); + TEST_REGEX_SEARCH("\\S_+", emacs, "$&*+-_<>b", match_default, make_array(8, 9, -2, -2)); + TEST_REGEX_SEARCH("\\s.+", emacs, " .,;!? ", match_default, make_array(1, 6, -2, -2)); + TEST_REGEX_SEARCH("\\S.+", emacs, ".,;!?b", match_default, make_array(5, 6, -2, -2)); + TEST_REGEX_SEARCH("\\s(+", emacs, "([{ ", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("\\S(+", emacs, "([{ ", match_default, make_array(3, 4, -2, -2)); + TEST_REGEX_SEARCH("\\s)+", emacs, ")]} ", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("\\S)+", emacs, ")]} ", match_default, make_array(3, 4, -2, -2)); + TEST_REGEX_SEARCH("\\s\"+", emacs, "\"'` ", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("\\S\"+", emacs, "\"'` ", match_default, make_array(3, 4, -2, -2)); + TEST_REGEX_SEARCH("\\s'+", emacs, "',# ", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("\\S'+", emacs, "',# ", match_default, make_array(3, 4, -2, -2)); + TEST_REGEX_SEARCH("\\s<+", emacs, "; ", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\S<+", emacs, "; ", match_default, make_array(1, 2, -2, -2)); + TEST_REGEX_SEARCH("\\s>+", emacs, "\n\f ", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("\\S>+", emacs, "\n\f ", match_default, make_array(2, 3, -2, -2)); +} + diff --git a/test/regress/test_mfc.cpp b/test/regress/test_mfc.cpp index 39afcb66..f75e5079 100644 --- a/test/regress/test_mfc.cpp +++ b/test/regress/test_mfc.cpp @@ -21,7 +21,7 @@ // #include -#if (defined(BOOST_MSVC) || defined(__ICL)) && (_MSC_VER >= 1300) +#if (defined(BOOST_MSVC) || defined(__ICL)) && (_MSC_VER >= 1300) && (_MSC_VER < 1400) # define TEST_MFC #endif diff --git a/test/regress/test_operators.cpp b/test/regress/test_operators.cpp new file mode 100644 index 00000000..010b090b --- /dev/null +++ b/test/regress/test_operators.cpp @@ -0,0 +1,146 @@ + +#include "test.hpp" + +template +void test_less(const T1& t1, const T2& t2) +{ + if(!(t1 < t2)) + { + BOOST_REGEX_TEST_ERROR("Failed < comparison", char); + } + if(!(t1 <= t2)) + { + BOOST_REGEX_TEST_ERROR("Failed <= comparison", char); + } + if(!(t1 != t2)) + { + BOOST_REGEX_TEST_ERROR("Failed != comparison", char); + } + if(t1 == t2) + { + BOOST_REGEX_TEST_ERROR("Failed == comparison", char); + } + if(t1 >= t2) + { + BOOST_REGEX_TEST_ERROR("Failed >= comparison", char); + } + if(t1 > t2) + { + BOOST_REGEX_TEST_ERROR("Failed > comparison", char); + } +} + +template +void test_greater(const T1& t1, const T2& t2) +{ + if(t1 < t2) + { + BOOST_REGEX_TEST_ERROR("Failed < comparison", char); + } + if(t1 <= t2) + { + BOOST_REGEX_TEST_ERROR("Failed <= comparison", char); + } + if(!(t1 != t2)) + { + BOOST_REGEX_TEST_ERROR("Failed != comparison", char); + } + if(t1 == t2) + { + BOOST_REGEX_TEST_ERROR("Failed == comparison", char); + } + if(!(t1 >= t2)) + { + BOOST_REGEX_TEST_ERROR("Failed >= comparison", char); + } + if(!(t1 > t2)) + { + BOOST_REGEX_TEST_ERROR("Failed > comparison", char); + } +} + +template +void test_equal(const T1& t1, const T2& t2) +{ + if(t1 < t2) + { + BOOST_REGEX_TEST_ERROR("Failed < comparison", char); + } + if(!(t1 <= t2)) + { + BOOST_REGEX_TEST_ERROR("Failed <= comparison", char); + } + if(t1 != t2) + { + BOOST_REGEX_TEST_ERROR("Failed != comparison", char); + } + if(!(t1 == t2)) + { + BOOST_REGEX_TEST_ERROR("Failed == comparison", char); + } + if(!(t1 >= t2)) + { + BOOST_REGEX_TEST_ERROR("Failed >= comparison", char); + } + if(t1 > t2) + { + BOOST_REGEX_TEST_ERROR("Failed > comparison", char); + } +} + +template +void test_plus(const T1& t1, const T2& t2, const T3& t3) +{ + if(t1 + t2 != t3) + { + BOOST_REGEX_TEST_ERROR("Failed addition", char); + } + if(t3 != t1 + t2) + { + BOOST_REGEX_TEST_ERROR("Failed addition", char); + } +} + +void test_operators() +{ + test_info::set_typename("sub_match operators"); + + std::string s1("a"); + std::string s2("b"); + boost::sub_match sub1, sub2; + sub1.first = s1.begin(); + sub1.second = s1.end(); + sub1.matched = true; + sub2.first = s2.begin(); + sub2.second = s2.end(); + sub2.matched = true; + + test_less(sub1, sub2); + test_less(sub1, s2.c_str()); + test_less(s1.c_str(), sub2); + test_less(sub1, *s2.c_str()); + test_less(*s1.c_str(), sub2); + test_less(sub1, s2); + //test_less(s1, sub2); + test_greater(sub2, sub1); + test_greater(sub2, s1.c_str()); + test_greater(s2.c_str(), sub1); + test_greater(sub2, *s1.c_str()); + test_greater(*s2.c_str(), sub1); + test_greater(sub2, s1); + //test_greater(s2, sub1); + test_equal(sub1, sub1); + test_equal(sub1, s1.c_str()); + test_equal(s1.c_str(), sub1); + test_equal(sub1, *s1.c_str()); + test_equal(*s1.c_str(), sub1); + test_equal(sub1, s1); + //test_equal(s1, sub1); + test_plus(sub2, sub1, "ba"); + test_plus(sub2, s1.c_str(), "ba"); + test_plus(s2.c_str(), sub1, "ba"); + test_plus(sub2, *s1.c_str(), "ba"); + test_plus(*s2.c_str(), sub1, "ba"); + test_plus(sub2, s1, "ba"); + //test_plus(s2, sub1, "ba"); +} \ No newline at end of file diff --git a/test/regress/test_overloads.cpp b/test/regress/test_overloads.cpp new file mode 100644 index 00000000..1ac3c95b --- /dev/null +++ b/test/regress/test_overloads.cpp @@ -0,0 +1,46 @@ + +#include "test.hpp" + +#define BOOST_REGEX_TEST(x)\ + if(!(x)){ BOOST_REGEX_TEST_ERROR("Error in: " BOOST_STRINGIZE(x), char); } + +void test_overloads() +{ + test_info::set_typename("sub_match operators"); + + // test all the available overloads with *one* simple + // expression, doing all these tests with all the test + // cases would just take to long... + + boost::regex e("abc"); + std::string s("abc"); + const std::string& cs = s; + boost::smatch sm; + boost::cmatch cm; + // regex_match: + BOOST_REGEX_TEST(boost::regex_match(cs.begin(), cs.end(), sm, e)) + BOOST_REGEX_TEST(boost::regex_match(cs.begin(), cs.end(), sm, e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_match(cs.begin(), cs.end(), e)) + BOOST_REGEX_TEST(boost::regex_match(cs.begin(), cs.end(), e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_match(s.c_str(), cm, e)) + BOOST_REGEX_TEST(boost::regex_match(s.c_str(), cm, e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_match(s.c_str(), e)) + BOOST_REGEX_TEST(boost::regex_match(s.c_str(), e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_match(s, sm, e)) + BOOST_REGEX_TEST(boost::regex_match(s, sm, e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_match(s, e)) + BOOST_REGEX_TEST(boost::regex_match(s, e, boost::regex_constants::match_default)) + // regex_search: + BOOST_REGEX_TEST(boost::regex_search(cs.begin(), cs.end(), sm, e)) + BOOST_REGEX_TEST(boost::regex_search(cs.begin(), cs.end(), sm, e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_search(cs.begin(), cs.end(), e)) + BOOST_REGEX_TEST(boost::regex_search(cs.begin(), cs.end(), e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_search(s.c_str(), cm, e)) + BOOST_REGEX_TEST(boost::regex_search(s.c_str(), cm, e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_search(s.c_str(), e)) + BOOST_REGEX_TEST(boost::regex_search(s.c_str(), e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_search(s, sm, e)) + BOOST_REGEX_TEST(boost::regex_search(s, sm, e, boost::regex_constants::match_default)) + BOOST_REGEX_TEST(boost::regex_search(s, e)) + BOOST_REGEX_TEST(boost::regex_search(s, e, boost::regex_constants::match_default)) +} \ No newline at end of file diff --git a/test/static_mutex/static_mutex_test.cpp b/test/static_mutex/static_mutex_test.cpp index ab76df8c..2899ca4d 100644 --- a/test/static_mutex/static_mutex_test.cpp +++ b/test/static_mutex/static_mutex_test.cpp @@ -17,7 +17,7 @@ */ #include -#include +#include #include #include