forked from boostorg/regex
Started to improve docs.
Added more tests. Added code to speed up traits class construction and usage. Tweeked extern template code. [SVN r25002]
This commit is contained in:
@ -35,7 +35,7 @@ BCROOT=$(MAKEDIR)\..
|
||||
!endif
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : bcb bcb\libboost_regex-bcb-s-1_31 bcb\libboost_regex-bcb-s-1_31.lib bcb\libboost_regex-bcb-mt-s-1_31 bcb\libboost_regex-bcb-mt-s-1_31.lib bcb\boost_regex-bcb-mt-1_31 bcb\boost_regex-bcb-mt-1_31.lib bcb\boost_regex-bcb-1_31 bcb\boost_regex-bcb-1_31.lib bcb\libboost_regex-bcb-mt-1_31 bcb\libboost_regex-bcb-mt-1_31.lib bcb\libboost_regex-bcb-1_31 bcb\libboost_regex-bcb-1_31.lib bcb\libboost_regex-bcb-sd-1_31 bcb\libboost_regex-bcb-sd-1_31.lib bcb\libboost_regex-bcb-mt-sd-1_31 bcb\libboost_regex-bcb-mt-sd-1_31.lib bcb\boost_regex-bcb-mt-d-1_31 bcb\boost_regex-bcb-mt-d-1_31.lib bcb\boost_regex-bcb-d-1_31 bcb\boost_regex-bcb-d-1_31.lib bcb\libboost_regex-bcb-mt-d-1_31 bcb\libboost_regex-bcb-mt-d-1_31.lib bcb\libboost_regex-bcb-d-1_31 bcb\libboost_regex-bcb-d-1_31.lib
|
||||
|
||||
|
@ -29,7 +29,7 @@ C1=-c -O2 -I../../../ -fPIC
|
||||
C2=-c -g -I../../../ -fPIC
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : gcc gcc gcc/boost_regex-gcc-1_31_shared ./gcc/libboost_regex-gcc-1_31.so gcc gcc/boost_regex-gcc-d-1_31_shared ./gcc/libboost_regex-gcc-d-1_31.so
|
||||
|
||||
|
@ -30,7 +30,7 @@ C2=-c -g -I../../../
|
||||
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : gcc gcc gcc/boost_regex-gcc-1_31 ./gcc/libboost_regex-gcc-1_31.a gcc gcc/boost_regex-gcc-d-1_31 ./gcc/libboost_regex-gcc-d-1_31.a
|
||||
|
||||
|
@ -26,7 +26,7 @@ C1=-c -O2 -I../../../
|
||||
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : $(DIRNAME) $(DIRNAME) $(DIRNAME)/boost_regex ./$(DIRNAME)/libboost_regex.so
|
||||
|
||||
|
@ -34,7 +34,7 @@ SUNWS_CACHE_NAME=SunWS_cache
|
||||
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : sunpro sunpro/libboost_regex$(LIBSUFFIX) sunpro/libboost_regex$(LIBSUFFIX).a sunpro/libboost_regex_mt$(LIBSUFFIX) sunpro/libboost_regex_mt$(LIBSUFFIX).a sunpro/shared_libboost_regex$(LIBSUFFIX) sunpro/libboost_regex$(LIBSUFFIX).so sunpro/shared_libboost_regex_mt$(LIBSUFFIX) sunpro/libboost_regex_mt$(LIBSUFFIX).so
|
||||
|
||||
|
@ -40,7 +40,7 @@ NULL=nul
|
||||
!ENDIF
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : main_dir libboost_regex-vc6-mt-sp-1_31_dir ./vc6-stlport/libboost_regex-vc6-mt-sp-1_31.lib boost_regex-vc6-mt-p-1_31_dir ./vc6-stlport/boost_regex-vc6-mt-p-1_31.lib libboost_regex-vc6-mt-p-1_31_dir ./vc6-stlport/libboost_regex-vc6-mt-p-1_31.lib boost_regex-vc6-mt-gdp-1_31_dir ./vc6-stlport/boost_regex-vc6-mt-gdp-1_31.lib libboost_regex-vc6-mt-sgdp-1_31_dir ./vc6-stlport/libboost_regex-vc6-mt-sgdp-1_31.lib libboost_regex-vc6-mt-gdp-1_31_dir ./vc6-stlport/libboost_regex-vc6-mt-gdp-1_31.lib
|
||||
|
||||
|
@ -36,7 +36,7 @@ NULL=nul
|
||||
!ENDIF
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : main_dir libboost_regex-vc6-s-1_31_dir ./vc6/libboost_regex-vc6-s-1_31.lib libboost_regex-vc6-mt-s-1_31_dir ./vc6/libboost_regex-vc6-mt-s-1_31.lib libboost_regex-vc6-sgd-1_31_dir ./vc6/libboost_regex-vc6-sgd-1_31.lib libboost_regex-vc6-mt-sgd-1_31_dir ./vc6/libboost_regex-vc6-mt-sgd-1_31.lib boost_regex-vc6-mt-gd-1_31_dir ./vc6/boost_regex-vc6-mt-gd-1_31.lib boost_regex-vc6-mt-1_31_dir ./vc6/boost_regex-vc6-mt-1_31.lib libboost_regex-vc6-mt-1_31_dir ./vc6/libboost_regex-vc6-mt-1_31.lib libboost_regex-vc6-mt-gd-1_31_dir ./vc6/libboost_regex-vc6-mt-gd-1_31.lib
|
||||
|
||||
|
@ -40,7 +40,7 @@ NULL=nul
|
||||
!ENDIF
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : main_dir libboost_regex-vc7-mt-sp-1_31_dir ./vc7-stlport/libboost_regex-vc7-mt-sp-1_31.lib boost_regex-vc7-mt-p-1_31_dir ./vc7-stlport/boost_regex-vc7-mt-p-1_31.lib libboost_regex-vc7-mt-p-1_31_dir ./vc7-stlport/libboost_regex-vc7-mt-p-1_31.lib boost_regex-vc7-mt-gdp-1_31_dir ./vc7-stlport/boost_regex-vc7-mt-gdp-1_31.lib libboost_regex-vc7-mt-sgdp-1_31_dir ./vc7-stlport/libboost_regex-vc7-mt-sgdp-1_31.lib libboost_regex-vc7-mt-gdp-1_31_dir ./vc7-stlport/libboost_regex-vc7-mt-gdp-1_31.lib
|
||||
|
||||
|
@ -36,7 +36,7 @@ NULL=nul
|
||||
!ENDIF
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : main_dir libboost_regex-vc7-s-1_31_dir ./vc7/libboost_regex-vc7-s-1_31.lib libboost_regex-vc7-mt-s-1_31_dir ./vc7/libboost_regex-vc7-mt-s-1_31.lib libboost_regex-vc7-sgd-1_31_dir ./vc7/libboost_regex-vc7-sgd-1_31.lib libboost_regex-vc7-mt-sgd-1_31_dir ./vc7/libboost_regex-vc7-mt-sgd-1_31.lib boost_regex-vc7-mt-gd-1_31_dir ./vc7/boost_regex-vc7-mt-gd-1_31.lib boost_regex-vc7-mt-1_31_dir ./vc7/boost_regex-vc7-mt-1_31.lib libboost_regex-vc7-mt-1_31_dir ./vc7/libboost_regex-vc7-mt-1_31.lib libboost_regex-vc7-mt-gd-1_31_dir ./vc7/libboost_regex-vc7-mt-gd-1_31.lib
|
||||
|
||||
|
@ -40,7 +40,7 @@ NULL=nul
|
||||
!ENDIF
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : main_dir libboost_regex-vc71-mt-sp-1_31_dir ./vc71-stlport/libboost_regex-vc71-mt-sp-1_31.lib boost_regex-vc71-mt-p-1_31_dir ./vc71-stlport/boost_regex-vc71-mt-p-1_31.lib libboost_regex-vc71-mt-p-1_31_dir ./vc71-stlport/libboost_regex-vc71-mt-p-1_31.lib boost_regex-vc71-mt-gdp-1_31_dir ./vc71-stlport/boost_regex-vc71-mt-gdp-1_31.lib libboost_regex-vc71-mt-sgdp-1_31_dir ./vc71-stlport/libboost_regex-vc71-mt-sgdp-1_31.lib libboost_regex-vc71-mt-gdp-1_31_dir ./vc71-stlport/libboost_regex-vc71-mt-gdp-1_31.lib
|
||||
|
||||
|
@ -36,7 +36,7 @@ NULL=nul
|
||||
!ENDIF
|
||||
|
||||
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cstring.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
ALL_HEADER= ../../../boost/regex/concepts.hpp ../../../boost/regex/config.hpp ../../../boost/regex/mfc.hpp ../../../boost/regex/pattern_except.hpp ../../../boost/regex/regex_traits.hpp ../../../boost/regex/static_mutex.hpp ../../../boost/regex/user.hpp ../../../boost/regex/v4/basic_regex.hpp ../../../boost/regex/v4/basic_regex_creator.hpp ../../../boost/regex/v4/basic_regex_parser.hpp ../../../boost/regex/v4/c_regex_traits.hpp ../../../boost/regex/v4/char_regex_traits.hpp ../../../boost/regex/v4/cpp_regex_traits.hpp ../../../boost/regex/v4/cregex.hpp ../../../boost/regex/v4/error_type.hpp ../../../boost/regex/v4/fileiter.hpp ../../../boost/regex/v4/instances.hpp ../../../boost/regex/v4/iterator_category.hpp ../../../boost/regex/v4/iterator_traits.hpp ../../../boost/regex/v4/match_flags.hpp ../../../boost/regex/v4/match_results.hpp ../../../boost/regex/v4/mem_block_cache.hpp ../../../boost/regex/v4/perl_matcher.hpp ../../../boost/regex/v4/perl_matcher_common.hpp ../../../boost/regex/v4/perl_matcher_non_recursive.hpp ../../../boost/regex/v4/perl_matcher_recursive.hpp ../../../boost/regex/v4/primary_transform.hpp ../../../boost/regex/v4/protected_call.hpp ../../../boost/regex/v4/regbase.hpp ../../../boost/regex/v4/regex.hpp ../../../boost/regex/v4/regex_cstring.hpp ../../../boost/regex/v4/regex_format.hpp ../../../boost/regex/v4/regex_fwd.hpp ../../../boost/regex/v4/regex_grep.hpp ../../../boost/regex/v4/regex_iterator.hpp ../../../boost/regex/v4/regex_kmp.hpp ../../../boost/regex/v4/regex_match.hpp ../../../boost/regex/v4/regex_merge.hpp ../../../boost/regex/v4/regex_raw_buffer.hpp ../../../boost/regex/v4/regex_replace.hpp ../../../boost/regex/v4/regex_search.hpp ../../../boost/regex/v4/regex_split.hpp ../../../boost/regex/v4/regex_stack.hpp ../../../boost/regex/v4/regex_token_iterator.hpp ../../../boost/regex/v4/regex_traits.hpp ../../../boost/regex/v4/regex_traits_defaults.hpp ../../../boost/regex/v4/regex_workaround.hpp ../../../boost/regex/v4/states.hpp ../../../boost/regex/v4/sub_match.hpp ../../../boost/regex/v4/syntax_type.hpp ../../../boost/regex/v4/w32_regex_traits.hpp ../../../boost/regex/config/borland.hpp ../../../boost/regex/config/cwchar.hpp ../../../boost/regex/config/regex_library_include.hpp
|
||||
|
||||
all : main_dir libboost_regex-vc71-s-1_31_dir ./vc71/libboost_regex-vc71-s-1_31.lib libboost_regex-vc71-mt-s-1_31_dir ./vc71/libboost_regex-vc71-mt-s-1_31.lib libboost_regex-vc71-sgd-1_31_dir ./vc71/libboost_regex-vc71-sgd-1_31.lib libboost_regex-vc71-mt-sgd-1_31_dir ./vc71/libboost_regex-vc71-mt-sgd-1_31.lib boost_regex-vc71-mt-gd-1_31_dir ./vc71/boost_regex-vc71-mt-gd-1_31.lib boost_regex-vc71-mt-1_31_dir ./vc71/boost_regex-vc71-mt-1_31.lib libboost_regex-vc71-mt-1_31_dir ./vc71/libboost_regex-vc71-mt-1_31.lib libboost_regex-vc71-mt-gd-1_31_dir ./vc71/libboost_regex-vc71-mt-gd-1_31.lib
|
||||
|
||||
|
144
doc/Attic/character_class_names.html
Normal file
144
doc/Attic/character_class_names.html
Normal file
@ -0,0 +1,144 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Character Class Names</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Character Class Names.</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>The following character class names are support by Boost.Regex:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Name</STRONG></TD>
|
||||
<TD><STRONG>POSIX-standard name</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alnum</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any alpha-numeric character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alpha</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any alphabetic character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>blank</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any whitespace character that is not a line separator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>cntrl</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any control character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>d</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any decimal digit</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>digit</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any decimal digit.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>graph</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any graphical character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>l</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any lower case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>lower</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any lower case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>print</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any printable character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>punct</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any punctuation character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>s</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any whitespace character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>space</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any whitespace character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>unicode</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any extended character whose code point is above 255 in value.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>u</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any upper case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>upper</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any upper case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>w</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any word character (alphanumeric characters plus the underscore).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>word</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any word character (alphanumeric characters plus the underscore).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>xdigit</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any hexadecimal digit character.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
09 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
358
doc/Attic/collating_names.html
Normal file
358
doc/Attic/collating_names.html
Normal file
@ -0,0 +1,358 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Collating Element Names</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Collating Element Names</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>
|
||||
The following are treated as valid digraphs when used as a collating name:</P>
|
||||
<P>"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj",
|
||||
"Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".</P>
|
||||
<P>The following symbolic names are recognised as valid collating element names,
|
||||
in addition to any single character:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="50%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Name</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>NUL</TD>
|
||||
<TD>\x00</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SOH</TD>
|
||||
<TD>\x01</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>STX</TD>
|
||||
<TD>\x02</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ETX</TD>
|
||||
<TD>\x03</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>EOT</TD>
|
||||
<TD>\x04</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ENQ</TD>
|
||||
<TD>\x05</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ACK</TD>
|
||||
<TD>\x06</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alert</TD>
|
||||
<TD>\x07</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>backspace</TD>
|
||||
<TD>\x08</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>tab</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>newline</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>vertical-tab</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>form-feed</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>carriage-return</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SO</TD>
|
||||
<TD>\xE</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SI</TD>
|
||||
<TD>\xF</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DLE</TD>
|
||||
<TD>\x10</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC1</TD>
|
||||
<TD>\x11</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC2</TD>
|
||||
<TD>\x12</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC3</TD>
|
||||
<TD>\x13</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC4</TD>
|
||||
<TD>\x14</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>NAK</TD>
|
||||
<TD>\x15</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SYN</TD>
|
||||
<TD>\x16</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ETB</TD>
|
||||
<TD>\x17</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>CAN</TD>
|
||||
<TD>\x18</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>EM</TD>
|
||||
<TD>\x19</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SUB</TD>
|
||||
<TD>\x1A</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ESC</TD>
|
||||
<TD>\x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS4</TD>
|
||||
<TD>\x1C</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS3</TD>
|
||||
<TD>\x1D</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS2</TD>
|
||||
<TD>\x1E</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS1</TD>
|
||||
<TD>\x1F</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>space</TD>
|
||||
<TD>\x20</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>exclamation-mark</TD>
|
||||
<TD>!</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>quotation-mark</TD>
|
||||
<TD>"</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>number-sign</TD>
|
||||
<TD>#</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>dollar-sign</TD>
|
||||
<TD>$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>percent-sign</TD>
|
||||
<TD>%</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ampersand</TD>
|
||||
<TD>&</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>apostrophe</TD>
|
||||
<TD>'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-parenthesis</TD>
|
||||
<TD>(</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-parenthesis</TD>
|
||||
<TD>)</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>asterisk</TD>
|
||||
<TD>*</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>plus-sign</TD>
|
||||
<TD>+</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>comma</TD>
|
||||
<TD>,</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>hyphen</TD>
|
||||
<TD>-</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>period</TD>
|
||||
<TD>.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>slash</TD>
|
||||
<TD>/</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>zero</TD>
|
||||
<TD>0</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>one</TD>
|
||||
<TD>1</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>two</TD>
|
||||
<TD>2</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>three</TD>
|
||||
<TD>3</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>four</TD>
|
||||
<TD>4</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>five</TD>
|
||||
<TD>5</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>six</TD>
|
||||
<TD>6</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>seven</TD>
|
||||
<TD>7</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>eight</TD>
|
||||
<TD>8</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>nine</TD>
|
||||
<TD>9</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>colon</TD>
|
||||
<TD>:</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>semicolon</TD>
|
||||
<TD>;</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>less-than-sign</TD>
|
||||
<TD><</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>equals-sign</TD>
|
||||
<TD>=</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>greater-than-sign</TD>
|
||||
<TD>></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>question-mark</TD>
|
||||
<TD>?</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>commercial-at</TD>
|
||||
<TD>@</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-square-bracket</TD>
|
||||
<TD>[</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>backslash</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-square-bracket</TD>
|
||||
<TD>]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>circumflex</TD>
|
||||
<TD>~</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>underscore</TD>
|
||||
<TD>_</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>grave-accent</TD>
|
||||
<TD>`</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-curly-bracket</TD>
|
||||
<TD>{</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>vertical-line</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-curly-bracket</TD>
|
||||
<TD>}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>tilde</TD>
|
||||
<TD>~</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DEL</TD>
|
||||
<TD>\x7F</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -25,742 +25,29 @@
|
||||
<HR>
|
||||
<P>This section covers the regular expression syntax used by this library, this is
|
||||
a programmers guide, the actual syntax presented to your program's users will
|
||||
depend upon the flags used during expression compilation.
|
||||
</P>
|
||||
<H3>Literals
|
||||
</H3>
|
||||
<P>All characters are literals except: ".", "|", "*", "?", "+", "(", ")", "{",
|
||||
"}", "[", "]", "^", "$" and "\". These characters are literals when preceded by
|
||||
a "\". A literal is a character that matches itself, or matches the result of
|
||||
traits_type::translate(), where traits_type is the traits template parameter to
|
||||
class basic_regex.</P>
|
||||
<H3>Wildcard
|
||||
</H3>
|
||||
<P>The dot character "." matches any single character except : when <I>match_not_dot_null</I>
|
||||
is passed to the matching algorithms, the dot does not match a null character;
|
||||
when <I>match_not_dot_newline</I> is passed to the matching algorithms, then
|
||||
the dot does not match a newline character.
|
||||
</P>
|
||||
<H3>Repeats
|
||||
</H3>
|
||||
<P>A repeat is an expression that is repeated an arbitrary number of times. An
|
||||
expression followed by "*" can be repeated any number of times including zero.
|
||||
An expression followed by "+" can be repeated any number of times, but at least
|
||||
once, if the expression is compiled with the flag regex_constants::bk_plus_qm
|
||||
then "+" is an ordinary character and "\+" represents a repeat of once or more.
|
||||
An expression followed by "?" may be repeated zero or one times only, if the
|
||||
expression is compiled with the flag regex_constants::bk_plus_qm then "?" is an
|
||||
ordinary character and "\?" represents the repeat zero or once operator. When
|
||||
it is necessary to specify the minimum and maximum number of repeats
|
||||
explicitly, the bounds operator "{}" may be used, thus "a{2}" is the letter "a"
|
||||
repeated exactly twice, "a{2,4}" represents the letter "a" repeated between 2
|
||||
and 4 times, and "a{2,}" represents the letter "a" repeated at least twice with
|
||||
no upper limit. Note that there must be no white-space inside the {}, and there
|
||||
is no upper limit on the values of the lower and upper bounds. When the
|
||||
expression is compiled with the flag regex_constants::bk_braces then "{" and
|
||||
"}" are ordinary characters and "\{" and "\}" are used to delimit bounds
|
||||
instead. All repeat expressions refer to the shortest possible previous
|
||||
sub-expression: a single character; a character set, or a sub-expression
|
||||
grouped with "()" for example.
|
||||
</P>
|
||||
<P>Examples:
|
||||
</P>
|
||||
<P>"ba*" will match all of "b", "ba", "baaa" etc.
|
||||
</P>
|
||||
<P>"ba+" will match "ba" or "baaaa" for example but not "b".
|
||||
</P>
|
||||
<P>"ba?" will match "b" or "ba".
|
||||
</P>
|
||||
<P>"ba{2,4}" will match "baa", "baaa" and "baaaa".
|
||||
</P>
|
||||
<H3>Non-greedy repeats
|
||||
</H3>
|
||||
<P>Whenever the "extended" regular expression syntax is in use (the default) then
|
||||
non-greedy repeats are possible by appending a '?' after the repeat; a
|
||||
non-greedy repeat is one which will match the <I>shortest</I> possible string.
|
||||
</P>
|
||||
<P>For example to match html tag pairs one could use something like:
|
||||
</P>
|
||||
<P>"<\s*tagname[^>]*>(.*?)<\s*/tagname\s*>"
|
||||
</P>
|
||||
<P>In this case $1 will contain the text between the tag pairs, and will be the
|
||||
shortest possible matching string.
|
||||
</P>
|
||||
<H3>Parenthesis
|
||||
</H3>
|
||||
<P>Parentheses serve two purposes, to group items together into a sub-expression,
|
||||
and to mark what generated the match. For example the expression "(ab)*" would
|
||||
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
|
||||
regex_match</A> and <A href="regex_search.html">regex_search</A> each take
|
||||
an instance of <A href="match_results.html">match_results</A> that reports what
|
||||
caused the match, on exit from these functions the <A href="match_results.html">match_results</A>
|
||||
contains information both on what the whole expression matched and on what each
|
||||
sub-expression matched. In the example above match_results[1] would contain a
|
||||
pair of iterators denoting the final "ab" of the matching string. It is
|
||||
permissible for sub-expressions to match null strings. If a sub-expression
|
||||
takes no part in a match - for example if it is part of an alternative that is
|
||||
not taken - then both of the iterators that are returned for that
|
||||
sub-expression point to the end of the input string, and the <I>matched</I> parameter
|
||||
for that sub-expression is <I>false</I>. Sub-expressions are indexed from left
|
||||
to right starting from 1, sub-expression 0 is the whole expression.
|
||||
</P>
|
||||
<H3>Non-Marking Parenthesis
|
||||
</H3>
|
||||
<P>Sometimes you need to group sub-expressions with parenthesis, but don't want
|
||||
the parenthesis to spit out another marked sub-expression, in this case a
|
||||
non-marking parenthesis (?:expression) can be used. For example the following
|
||||
expression creates no sub-expressions:
|
||||
</P>
|
||||
<P>"(?:abc)*"</P>
|
||||
<H3>Forward Lookahead Asserts
|
||||
</H3>
|
||||
<P>There are two forms of these; one for positive forward lookahead asserts, and
|
||||
one for negative lookahead asserts:</P>
|
||||
<P>"(?=abc)" matches zero characters only if they are followed by the expression
|
||||
"abc".</P>
|
||||
<P>"(?!abc)" matches zero characters only if they are not followed by the
|
||||
expression "abc".</P>
|
||||
<H3>Independent sub-expressions</H3>
|
||||
<P>"(?>expression)" matches "expression" as an independent atom (the algorithm
|
||||
will not backtrack into it if a failure occurs later in the expression).</P>
|
||||
<H3>Alternatives
|
||||
</H3>
|
||||
<P>Alternatives occur when the expression can match either one sub-expression or
|
||||
another, each alternative is separated by a "|", or a "\|" if the flag
|
||||
regex_constants::bk_vbar is set, or by a newline character if the flag
|
||||
regex_constants::newline_alt is set. Each alternative is the largest possible
|
||||
previous sub-expression; this is the opposite behavior from repetition
|
||||
operators.
|
||||
</P>
|
||||
<P>Examples:
|
||||
</P>
|
||||
<P>"a(b|c)" could match "ab" or "ac".
|
||||
</P>
|
||||
<P>"abc|def" could match "abc" or "def".
|
||||
</P>
|
||||
<H3>Sets
|
||||
</H3>
|
||||
<P>A set is a set of characters that can match any single character that is a
|
||||
member of the set. Sets are delimited by "[" and "]" and can contain literals,
|
||||
character ranges, character classes, collating elements and equivalence
|
||||
classes. Set declarations that start with "^" contain the complement of the
|
||||
elements that follow.
|
||||
</P>
|
||||
<P>Examples:
|
||||
</P>
|
||||
<P>Character literals:
|
||||
</P>
|
||||
<P>"[abc]" will match either of "a", "b", or "c".
|
||||
</P>
|
||||
<P>"[^abc] will match any character other than "a", "b", or "c".
|
||||
</P>
|
||||
<P>Character ranges:
|
||||
</P>
|
||||
<P>"[a-z]" will match any character in the range "a" to "z".
|
||||
</P>
|
||||
<P>"[^A-Z]" will match any character other than those in the range "A" to "Z".
|
||||
</P>
|
||||
<P>Note that character ranges are highly locale dependent if the flag
|
||||
regex_constants::collate is set: they match any character that collates between
|
||||
the endpoints of the range, ranges will only behave according to ASCII rules
|
||||
when the default "C" locale is in effect. For example if the library is
|
||||
compiled with the Win32 localization model, then [a-z] will match the ASCII
|
||||
characters a-z, and also 'A', 'B' etc, but not 'Z' which collates just after
|
||||
'z'. This locale specific behavior is disabled by default (in perl mode), and
|
||||
forces ranges to collate according to ASCII character code.
|
||||
</P>
|
||||
<P>Character classes are denoted using the syntax "[:classname:]" within a set
|
||||
declaration, for example "[[:space:]]" is the set of all whitespace characters.
|
||||
Character classes are only available if the flag regex_constants::char_classes
|
||||
is set. The available character classes are:
|
||||
<BR>
|
||||
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="50%">alnum</TD>
|
||||
<TD vAlign="top" width="50%">Any alpha numeric character.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">alpha</TD>
|
||||
<TD vAlign="top" width="50%">Any alphabetical character a-z and A-Z. Other
|
||||
characters may also be included depending upon the locale.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">blank</TD>
|
||||
<TD vAlign="top" width="50%">Any blank character, either a space or a tab.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">cntrl</TD>
|
||||
<TD vAlign="top" width="50%">Any control character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">digit</TD>
|
||||
<TD vAlign="top" width="50%">Any digit 0-9.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">graph</TD>
|
||||
<TD vAlign="top" width="50%">Any graphical character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">lower</TD>
|
||||
<TD vAlign="top" width="50%">Any lower case character a-z. Other characters may
|
||||
also be included depending upon the locale.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">print</TD>
|
||||
<TD vAlign="top" width="50%">Any printable character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">punct</TD>
|
||||
<TD vAlign="top" width="50%">Any punctuation character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">space</TD>
|
||||
<TD vAlign="top" width="50%">Any whitespace character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">upper</TD>
|
||||
<TD vAlign="top" width="50%">Any upper case character A-Z. Other characters may
|
||||
also be included depending upon the locale.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">xdigit</TD>
|
||||
<TD vAlign="top" width="50%">Any hexadecimal digit character, 0-9, a-f and A-F.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">word</TD>
|
||||
<TD vAlign="top" width="50%">Any word character - all alphanumeric characters plus
|
||||
the underscore.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">Unicode</TD>
|
||||
<TD vAlign="top" width="50%">Any character whose code is greater than 255, this
|
||||
applies to the wide character traits classes only.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>There are some shortcuts that can be used in place of the character classes,
|
||||
provided the flag regex_constants::escape_in_lists is set then you can use:
|
||||
</P>
|
||||
<P>\w in place of [:word:]
|
||||
</P>
|
||||
<P>\s in place of [:space:]
|
||||
</P>
|
||||
<P>\d in place of [:digit:]
|
||||
</P>
|
||||
<P>\l in place of [:lower:]
|
||||
</P>
|
||||
<P>\u in place of [:upper:]
|
||||
</P>
|
||||
<P>Collating elements take the general form [.tagname.] inside a set declaration,
|
||||
where <I>tagname</I> is either a single character, or a name of a collating
|
||||
element, for example [[.a.]] is equivalent to [a], and [[.comma.]] is
|
||||
equivalent to [,]. The library supports all the standard POSIX collating
|
||||
element names, and in addition the following digraphs: "ae", "ch", "ll", "ss",
|
||||
"nj", "dz", "lj", each in lower, upper and title case variations.
|
||||
Multi-character collating elements can result in the set matching more than one
|
||||
character, for example [[.ae.]] would match two characters, but note that
|
||||
[^[.ae.]] would only match one character.
|
||||
</P>
|
||||
<P>
|
||||
Equivalence classes take the generalform[=tagname=] inside a set declaration,
|
||||
where <I>tagname</I> is either a single character, or a name of a collating
|
||||
element, and matches any character that is a member of the same primary
|
||||
equivalence class as the collating element [.tagname.]. An equivalence class is
|
||||
a set of characters that collate the same, a primary equivalence class is a set
|
||||
of characters whose primary sort key are all the same (for example strings are
|
||||
typically collated by character, then by accent, and then by case; the primary
|
||||
sort key then relates to the character, the secondary to the accentation, and
|
||||
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
|
||||
,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
||||
locale independent method of obtaining the primary sort key for a character,
|
||||
except under Win32. For other operating systems the library will "guess" the
|
||||
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
|
||||
equivalence classes are probably best considered broken under any operating
|
||||
system other than Win32.
|
||||
</P>
|
||||
<P>To include a literal "-" in a set declaration then: make it the first character
|
||||
after the opening "[" or "[^", the endpoint of a range, a collating element, or
|
||||
if the flag regex_constants::escape_in_lists is set then precede with an escape
|
||||
character as in "[\-]". To include a literal "[" or "]" or "^" in a set then
|
||||
make them the endpoint of a range, a collating element, or precede with an
|
||||
escape character if the flag regex_constants::escape_in_lists is set.
|
||||
</P>
|
||||
<H3>Line anchors
|
||||
</H3>
|
||||
<P>An anchor is something that matches the null string at the start or end of a
|
||||
line: "^" matches the null string at the start of a line, "$" matches the null
|
||||
string at the end of a line.
|
||||
</P>
|
||||
<H3>Back references
|
||||
</H3>
|
||||
<P>A back reference is a reference to a previous sub-expression that has already
|
||||
been matched, the reference is to what the sub-expression matched, not to the
|
||||
expression itself. A back reference consists of the escape character "\"
|
||||
followed by a digit "1" to "9", "\1" refers to the first sub-expression, "\2"
|
||||
to the second etc. For example the expression "(.*)\1" matches any string that
|
||||
is repeated about its mid-point for example "abcabc" or "xyzxyz". A back
|
||||
reference to a sub-expression that did not participate in any match, matches
|
||||
the null string: NB this is different to some other regular expression
|
||||
matchers. Back references are only available if the expression is compiled with
|
||||
the flag regex_constants::bk_refs set.
|
||||
</P>
|
||||
<H3>Characters by code
|
||||
</H3>
|
||||
<P>This is an extension to the algorithm that is not available in other libraries,
|
||||
it consists of the escape character followed by the digit "0" followed by the
|
||||
octal character code. For example "\023" represents the character whose octal
|
||||
code is 23. Where ambiguity could occur use parentheses to break the expression
|
||||
up: "\0103" represents the character whose code is 103, "(\010)3 represents the
|
||||
character 10 followed by "3". To match characters by their hexadecimal code,
|
||||
use \x followed by a string of hexadecimal digits, optionally enclosed inside
|
||||
{}, for example \xf0 or \x{aff}, notice the latter example is a Unicode
|
||||
character.</P>
|
||||
<H3>Word operators
|
||||
</H3>
|
||||
<P>The following operators are provided for compatibility with the GNU regular
|
||||
expression library.
|
||||
</P>
|
||||
<P>"\w" matches any single character that is a member of the "word" character
|
||||
class, this is identical to the expression "[[:word:]]".
|
||||
</P>
|
||||
<P>"\W" matches any single character that is not a member of the "word" character
|
||||
class, this is identical to the expression "[^[:word:]]".
|
||||
</P>
|
||||
<P>"\<" matches the null string at the start of a word.
|
||||
</P>
|
||||
<P>"\>" matches the null string at the end of the word.
|
||||
</P>
|
||||
<P>"\b" matches the null string at either the start or the end of a word.
|
||||
</P>
|
||||
<P>"\B" matches a null string within a word.
|
||||
</P>
|
||||
<P>The start of the sequence passed to the matching algorithms is considered to be
|
||||
a potential start of a word unless the flag match_not_bow is set. The end of
|
||||
the sequence passed to the matching algorithms is considered to be a potential
|
||||
end of a word unless the flag match_not_eow is set.
|
||||
</P>
|
||||
<H3>Buffer operators
|
||||
</H3>
|
||||
<P>The following operators are provided for compatibility with the GNU regular
|
||||
expression library, and Perl regular expressions:
|
||||
</P>
|
||||
<P>"\`" matches the start of a buffer.
|
||||
</P>
|
||||
<P>"\A" matches the start of the buffer.
|
||||
</P>
|
||||
<P>"\'" matches the end of a buffer.
|
||||
</P>
|
||||
<P>"\z" matches the end of a buffer.
|
||||
</P>
|
||||
<P>"\Z" matches the end of a buffer, or possibly one or more new line characters
|
||||
followed by the end of the buffer.
|
||||
</P>
|
||||
<P>A buffer is considered to consist of the whole sequence passed to the matching
|
||||
algorithms, unless the flags match_not_bob or match_not_eob are set.
|
||||
</P>
|
||||
<H3>Escape operator
|
||||
</H3>
|
||||
<P>The escape character "\" has several meanings.
|
||||
</P>
|
||||
<P>Inside a set declaration the escape character is a normal character unless the
|
||||
flag regex_constants::escape_in_lists is set in which case whatever follows the
|
||||
escape is a literal character regardless of its normal meaning.
|
||||
</P>
|
||||
<P>The escape operator may introduce an operator for example: back references, or
|
||||
a word operator.
|
||||
</P>
|
||||
<P>The escape operator may make the following character normal, for example "\*"
|
||||
represents a literal "*" rather than the repeat operator.
|
||||
</P>
|
||||
<H4>Single character escape sequences
|
||||
</H4>
|
||||
<P>The following escape sequences are aliases for single characters:
|
||||
<BR>
|
||||
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="33%">Escape sequence
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Character code
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Meaning
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\a
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x07
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Bell character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\f
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x0C
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Form feed.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\n
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x0A
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Newline character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\r
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x0D
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Carriage return.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\t
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x09
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Tab character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\v
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x0B
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Vertical tab.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\e
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x1B
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">ASCII Escape character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\0dd
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0dd
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">An octal character code, where <I>dd</I> is one or
|
||||
more octal digits.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\xXX
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0xXX
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">A hexadecimal character code, where XX is one or more
|
||||
hexadecimal digits.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\x{XX}
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0xXX
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">A hexadecimal character code, where XX is one or more
|
||||
hexadecimal digits, optionally a Unicode character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\cZ
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">z-@
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">An ASCII escape sequence control-Z, where Z is any
|
||||
ASCII character greater than or equal to the character code for '@'.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4>Miscellaneous escape sequences:
|
||||
</H4>
|
||||
<P>The following are provided mostly for perl compatibility, but note that there
|
||||
are some differences in the meanings of \l \L \u and \U:
|
||||
<BR>
|
||||
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="0" cellPadding="6" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\w
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:word:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\W
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:word:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\s
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:space:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\S
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:space:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\d
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:digit:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\D
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:digit:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\l
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:lower:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\L
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:lower:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\u
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:upper:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\U
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:upper:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\C
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Any single character, equivalent to '.'.
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\X
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Match any Unicode combining character sequence, for
|
||||
example "a\x 0301" (a letter a with an acute).
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\Q
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">The begin quote operator, everything that follows is
|
||||
treated as a literal character until a \E end quote operator is found.
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\E
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">The end quote operator, terminates a sequence begun
|
||||
with \Q.
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3>What gets matched?
|
||||
</H3>
|
||||
<P>
|
||||
When the expression is compiled as a Perl-compatible regex then the matching
|
||||
algorithms will perform a depth first search on the state machine and report
|
||||
the first match found.</P>
|
||||
<P>
|
||||
When the expression is compiled as a POSIX-compatible regex then the matching
|
||||
algorithms will match the first possible matching string, if more than one
|
||||
string starting at a given location can match then it matches the longest
|
||||
possible string, unless the flag match_any is set, in which case the first
|
||||
match encountered is returned. Use of the match_any option can reduce the time
|
||||
taken to find the match - but is only useful if the user is less concerned
|
||||
about what matched - for example it would not be suitable for search and
|
||||
replace operations. In cases where their are multiple possible matches all
|
||||
starting at the same location, and all of the same length, then the match
|
||||
chosen is the one with the longest first sub-expression, if that is the same
|
||||
for two or more matches, then the second sub-expression will be examined and so
|
||||
on.
|
||||
</P>
|
||||
<P>
|
||||
The following table examples illustrate the main differences between Perl and
|
||||
POSIX regular expression matching rules:
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<TBODY>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Expression</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Text</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>POSIX leftmost longest match</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>ECMAScript depth first search match</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>a|ab</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> xaby</CODE>
|
||||
</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "ab"</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "a"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> .*([[:alnum:]]+).*</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> " abc def xyz "</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "abc"</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "z"</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> .*(a|xayy)</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> zzxayyzz</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "zzxayy"</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>"zzxa"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TBODY></CODE></TD></TR></TABLE>
|
||||
<P>These differences between Perl matching rules, and POSIX matching rules, mean
|
||||
that these two regular expression syntaxes differ not only in the features
|
||||
offered, but also in the form that the state machine takes and/or the
|
||||
algorithms used to traverse the state machine.</P>
|
||||
depend upon the <A href="syntax_option_type.html">flags</A> used during
|
||||
expression compilation.
|
||||
</P>
|
||||
<P>There are three main syntax options available, depending upon how
|
||||
you construct the regular expression object:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
<A href="syntax_perl.html">Perl</A> (this is the default behavior).</LI>
|
||||
<LI>
|
||||
<A href="syntax_extended.html">POSIX extended</A> (including the <A href="syntax_extended.html#egrep">
|
||||
egrep</A> and <A href="syntax_extended.html#awk">awk</A> variations).</LI>
|
||||
<LI>
|
||||
<A href="syntax_basic.html">POSIX Basic</A> (including the <A href="syntax_basic.html#grep">
|
||||
grep</A> and <A href="syntax_basic.html#emacs">emacs</A> variations).</LI></UL>
|
||||
<P>You can also construct a regular expression that treats every character as a <A href="syntax_option_type.html#literals">
|
||||
literal</A>, but that's not really a "syntax"!</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
10 Sept 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
226
doc/Attic/syntax_basic.html
Normal file
226
doc/Attic/syntax_basic.html
Normal file
@ -0,0 +1,226 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: POSIX-Basic Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">POSIX Basic Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#Basic">POSIX Basic Syntax</A> <dt><A href="#variations">
|
||||
Variations</A>
|
||||
<dd>
|
||||
<dl>
|
||||
<dt><A href="#grep">Grep</A> <dt><A href="#emacs">Emacs</A></dt>
|
||||
</dl>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The POSIX-Basic regular expression syntax is used by the Unix utility <EM>sed</EM>,
|
||||
and variations are used by <EM>grep</EM> and <EM>emacs</EM>. You can
|
||||
construct POSIX basic regular expressions in Boost.Regex by passing the flag <EM>basic</EM>
|
||||
to the regex constructor, for example:</P>
|
||||
<PRE>// e1 is a case sensitive POSIX-Basic expression:
|
||||
boost::regex e1(my_expression, boost::regex::basic);
|
||||
// e2 a case insensitive POSIX-Basic expression:
|
||||
boost::regex e2(my_expression, boost::regex::basic|boost::regex::icase);</PRE>
|
||||
<H3>POSIX Basic Syntax<A name="Basic"></A></H3>
|
||||
<P>In POSIX-Basic regular expressions, all characters are match themselves except
|
||||
for the following special characters:</P>
|
||||
<PRE>.[\*^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line when used as the first
|
||||
character of an expression, or the first character of a sub-expression.</P>
|
||||
<P>A '$' character shall match the end of a line when used as the last character
|
||||
of an expression, or the last character of a sub-expression.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning \( and ending \) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or referred-to by a back-reference.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the * operator.</P>
|
||||
<P>For example a* will match any number of letter a's repeated zero or more times
|
||||
(an atom repeated zero times matches an empty string), so the expression a*b
|
||||
will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a\{n\} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a\{n,\} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a\{n, m\} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a\{2,3\}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a\(*\)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^\(a*\).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Basic regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; this results in locale specific behavior. This behavior can
|
||||
be turned off by unsetting the <EM><A href="syntax_option_type.html#basic">collate</A></EM>
|
||||
option flag - in which case whether a character appears within a range is
|
||||
determined by comparing the code points of the characters only</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the rangle "ae"-c, assuming that "ae" is treated
|
||||
as a single collating element in the current locale.</P>
|
||||
<P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of the form [[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with collating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>With the exception of the escape sequences \{, \}, \(, and \), which are
|
||||
documented above, an escape followed by any character matches that
|
||||
character. This can be used to make the special characters .[\*^$,
|
||||
"ordinary". Note that the escape character loses its special meaning
|
||||
inside a character set, so [\^] will match either a literal '\' or a '^'.</P>
|
||||
<H3><A name="variations"></A>Variations</H3>
|
||||
<H4><A name="grep"></A>Grep</H4>
|
||||
<P>When an expression is compiled with the flag <EM>grep</EM> set, then the
|
||||
expression is treated as a newline separated list of <A href="#Basic">POSIX-Basic</A>
|
||||
expressions, a match is found if any of the expressions in the list match, for
|
||||
example:</P>
|
||||
<PRE>boost::regex e("abc\ndef", boost::regex::grep);</PRE>
|
||||
<P>will match either of the POSIX-Basic expressions "abc" or "def".</P>
|
||||
<P>As its name suggests, this behavior is consistent with the Unix utility <EM>grep</EM>.</P>
|
||||
<H4><A name="emacs"></A>emacs</H4>
|
||||
<P>In addition to the <A href="#Basic">POSIX-Basic features</A> the following
|
||||
characters are also special:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>+ repeats the preceding atom one or more times.</P>
|
||||
<P>? repeats the preceding atom zero or one times.</P>
|
||||
<P>*? A non-greedy version of *.</P>
|
||||
<P>+? A non-greedy version of +.</P>
|
||||
<P>?? A non-greedy version of ?.</P>
|
||||
</BLOCKQUOTE>
|
||||
<P>And the following escape sequences are also recognised:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>\| specifies an alternative.</P>
|
||||
<P>\(?: ... \) is a non-marking grouping construct - allows you to
|
||||
lexically group something without spitting out an extra sub-expression.</P>
|
||||
<P>\w matches any word character.</P>
|
||||
<P>\W matches any non-word character.</P>
|
||||
<P>\sx matches any character in the syntax group <EM>x</EM>, the following emacs
|
||||
groupings are supported: 's', ' ', '_', 'w', '.', ')', '(', '"', '\'', '>'
|
||||
and '<'. Refer to the emacs docs for details.</P>
|
||||
<P>\Sx matches any character not in the syntax grouping <EM>x</EM>.</P>
|
||||
<P>\c and \C are not supported.</P>
|
||||
<P>\` matches zero characters only at the start of a buffer (or string being
|
||||
matched).</P>
|
||||
<P>\' matches zero characters only at the end of a buffer (or string being
|
||||
matched).</P>
|
||||
<P>\b matches zero characters at a word boundary.</P>
|
||||
<P>\B matches zero characters, not at a word boundary.</P>
|
||||
<P>\< matches zero characters only at the start of a word.</P>
|
||||
<P>\> matches zero characters only at the end of a word.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#basic">variety of flags</A> that
|
||||
may be combined with the <EM>basic</EM> and <EM>grep</EM> options when
|
||||
constructing the regular expression, in particular note that the <A href="syntax_option_type.html#basic">
|
||||
newline_alt, no_char_classes, no-intervals, bk_plus_qm and bk_plus_vbar</A> options
|
||||
all alter the syntax, while the <A href="syntax_option_type.html#basic">collate
|
||||
and icase</A> options modify how the case and locale sensitivity are to be
|
||||
applied.</P>
|
||||
<H3>References</H3>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap09.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions
|
||||
and Headers, Section 9, Regular Expressions (FWD.1).</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/grep.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, grep (FWD.1).</A></P>
|
||||
<P><A href="http://www.gnu.org/software/emacs/">Emacs Version 21.3</A>.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
||||
|
471
doc/Attic/syntax_extended.html
Normal file
471
doc/Attic/syntax_extended.html
Normal file
@ -0,0 +1,471 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: POSIX-Extended Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">POSIX-Extended Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#extended">POSIX Extended Syntax</A>
|
||||
<dt><A href="#variations">Variations</A>
|
||||
<dd>
|
||||
<dl>
|
||||
<dt><A href="#egrep">egrep</A> <dt><A href="#awk">awk</A> </dt>
|
||||
</dl>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The POSIX-Extended regular expression syntax is supported by the POSIX C
|
||||
regular expression API's, and variations are used by the utilities <EM>egrep</EM>
|
||||
and <EM>awk</EM>. You can construct POSIX extended regular expressions in
|
||||
Boost.Regex by passing the flag <EM>extended</EM> to the regex constructor, for
|
||||
example:</P>
|
||||
<PRE>// e1 is a case sensitive POSIX-Extended expression:
|
||||
boost::regex e1(my_expression, boost::regex::extended);
|
||||
// e2 a case insensitive POSIX-Extended expression:
|
||||
boost::regex e2(my_expression, boost::regex::extended|boost::regex::icase);</PRE>
|
||||
<H3>POSIX Extended Syntax<A name="extended"></A></H3>
|
||||
<P>In POSIX-Extended regular expressions, all characters match themselves except
|
||||
for the following special characters:</P>
|
||||
<PRE>.[{()\*+?|^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line when used as the first
|
||||
character of an expression, or the first character of a sub-expression.</P>
|
||||
<P>A '$' character shall match the end of a line when used as the last character
|
||||
of an expression, or the last character of a sub-expression.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning ( and ending ) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or referred
|
||||
to by a back-reference.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the *, +, ?, and {} operators.</P>
|
||||
<P>The * operator will match the preceding atom zero or more times, for example
|
||||
the expression a*b will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>The + operator will match the preceding atom one or more times, for example
|
||||
the expression a+b will match any of the following:</P>
|
||||
<PRE>ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>b</PRE>
|
||||
<P>The ? operator will match the preceding atom zero or one times, for
|
||||
example the expression ca?b will match any of the following:</P>
|
||||
<PRE>cb
|
||||
cab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>caab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a{n} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a{n,} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a{n, m} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a{2,3}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a(*)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^(a*).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<P><EM><STRONG>Caution</STRONG>: the POSIX standard does not support back-references
|
||||
for "extended" regular expressions, this is a compatible extension to that
|
||||
standard.</EM></P>
|
||||
<H4>Alternation</H4>
|
||||
<P>The | operator will match either of its arguments, so for example: abc|def will
|
||||
match either "abc" or "def".
|
||||
</P>
|
||||
<P>Parenthesis can be used to group alternations, for example: ab(d|ef) will match
|
||||
either of "abd" or "abef".</P>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Extended regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; this results in locale specific behavior. This behavior can
|
||||
be turned off by unsetting the <EM><A href="syntax_option_type.html#extended">collate</A></EM>
|
||||
option flag - in which case whether a character appears within a range is
|
||||
determined by comparing the code points of the characters only</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the range "ae"-c, assuming that "ae" is treated
|
||||
as a single collating element in the current locale.</P>
|
||||
<P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of the form [[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Operator precedence</H4>
|
||||
<P> The order of precedence for of operators is as shown in the following
|
||||
table:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Collation-related bracket symbols</TD>
|
||||
<TD>[==] [::] [..]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Escaped characters
|
||||
</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Character set (bracket expression)
|
||||
</TD>
|
||||
<TD>[]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Grouping</TD>
|
||||
<TD>()</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Single-character-ERE duplication
|
||||
</TD>
|
||||
<TD>* + ? {m,n}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Concatenation</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Anchoring</TD>
|
||||
<TD>^$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Alternation</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>The POSIX standard defines no escape sequences for POSIX-Extended regular
|
||||
expressions, except that:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Any special character preceded by an escape shall match itself.
|
||||
<LI>
|
||||
The effect of any ordinary character being preceded by an escape is undefined.
|
||||
<LI>
|
||||
An escape inside a character class declaration shall match itself (in other
|
||||
words the escape character is not "special" inside a character class
|
||||
declaration).</LI></UL>
|
||||
<P>However, that's rather restrictive, so the following standard-compatible
|
||||
extensions are also supported by Boost.Regex:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Escapes matching a specific character</H5>
|
||||
<P>The following escape sequences are all synonyms for single characters:</P>
|
||||
<P>
|
||||
<TABLE id="Table7" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>'\a'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>0x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>\b (but only inside a character class declaration).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>An ASCII escape sequence - the character whose code point is X % 32</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xdd</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{dddd}</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\0ddd</TD>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
<P>Any escaped character <EM>x</EM>, if <EM>x</EM> is the name of a character
|
||||
class shall match any character that is a member of that class, and any escaped
|
||||
character <EM>X</EM>, if <EM>x</EM> is the name of a character class, shall
|
||||
match any character not in that class.</P>
|
||||
<P>The following are supported by default:</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="300" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape sequence</STRONG></TD>
|
||||
<TD><STRONG>Equivalent to</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\d</TD>
|
||||
<TD>[[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\l</TD>
|
||||
<TD>[[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\s</TD>
|
||||
<TD>[[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\u</TD>
|
||||
<TD>[[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\w</TD>
|
||||
<TD>[[:word:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>[^[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\L</TD>
|
||||
<TD>[^[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\S</TD>
|
||||
<TD>[^[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\U</TD>
|
||||
<TD>[^[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\W</TD>
|
||||
<TD>[^[:word:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\<</TD>
|
||||
<TD>Matches the start of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\></TD>
|
||||
<TD>Matches the end of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>Matches a word boundary (the start or end of a word).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\B</TD>
|
||||
<TD>Matches only when not at a word boundary.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Buffer boundaries</H5>
|
||||
<P>The following match only at buffer boundaries: a "buffer" in this context is
|
||||
the whole of the input text that is being matched against (note that ^ and
|
||||
$ may match embedded newlines within the text).</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\`</TD>
|
||||
<TD>Matches at the start of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\'</TD>
|
||||
<TD>Matches at the end of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\A</TD>
|
||||
<TD>Matches at the start of a buffer only (the same as \`).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\z</TD>
|
||||
<TD>Matches at the end of a buffer only (the same as \').</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\Z</TD>
|
||||
<TD>Matches an optional sequence of newlines at the end of a buffer: equivalent to
|
||||
the regular expression \n*\z</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Continuation Escape</H5>
|
||||
<P>The sequence \G matches only at the end of the last match found, or at the
|
||||
start of the text being matched if no previous match was found. This
|
||||
escape useful if you're iterating over the matches contained within a text, and
|
||||
you want each subsequence match to start where the last one ended.</P>
|
||||
<H5>Quoting escape</H5>
|
||||
<P>The escape sequence \Q begins a "quoted sequence": all the subsequent
|
||||
characters are treated as literals, until either the end of the regular
|
||||
expression or \E is found. For example the expression: \Q\*+\Ea+ would
|
||||
match either of:</P>
|
||||
<PRE>\*+a<BR>\*+aaa</PRE>
|
||||
<H5>Unicode escapes</H5>
|
||||
<P>
|
||||
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\C</TD>
|
||||
<TD>Matches a single code point: in Boost regex this has exactly the same effect
|
||||
as a "." operator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\X</TD>
|
||||
<TD>Matches a combining character sequence: that is any non-combining character
|
||||
followed by a sequence of zero or more combining characters.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Any other escape</H5>
|
||||
<P>Any other escape sequence matches the character that is escaped, for example \@
|
||||
matches a literal <A href="mailto:'@'">'@'</A>.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H3><A name="variations"></A>Variations</H3>
|
||||
<H4>Egrep<A name="egrep"></H4>
|
||||
<P>When an expression is compiled with the flag <EM>egrep</EM> set, then the
|
||||
expression is treated as a newline separated list of POSIX-Extended
|
||||
expressions, a match is found if any of the expressions in the list match, for
|
||||
example:</P>
|
||||
<PRE>boost::regex e("abc\ndef", boost::regex::egrep);</PRE>
|
||||
<P>will match either of the POSIX-Basic expressions "abc" or "def".</P>
|
||||
<P>As its name suggests, this behavior is consistent with the Unix utility <EM>egrep</EM>,
|
||||
and with <EM>grep</EM> when used with the -E option.</P>
|
||||
<H4>awk<A name="awk"></A></H4>
|
||||
<P>In addition to the <A href="#extended">POSIX-Extended features</A> the
|
||||
escape character is special inside a character class declaration. </P>
|
||||
<P>In addition, some escape sequences that are not defined as part of
|
||||
POSIX-Extended specification are required to be supported - however Boost.Regex
|
||||
supports these by default anyway.</P>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#extended">variety of flags</A> that
|
||||
may be combined with the <EM>extended</EM> and <EM>egrep</EM> options when
|
||||
constructing the regular expression, in particular note that the <A href="syntax_option_type.html#extended">
|
||||
newline_alt</A> option alters the syntax, while the <A href="syntax_option_type.html#extended">
|
||||
collate, nosubs and icase</A> options modify how the case and locale
|
||||
sensitivity are to be applied.</P>
|
||||
<H3><A name="refs">References</H3>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap09.html"> IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions
|
||||
and Headers, Section 9, Regular Expressions.</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/grep.html"> IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, egrep.</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/awk.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, awk.</A></P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -175,7 +175,7 @@ static const syntax_option_type collate;
|
||||
<TD>No</TD>
|
||||
<TD>Normally Boost.Regex behaves as if the Perl m-modifier is on: so the
|
||||
assertions ^ and $ match after and before embedded newlines respectively,
|
||||
setting this flags is eqivalent to prefixing the expression with (?-m).</TD>
|
||||
setting this flags is equivalent to prefixing the expression with (?-m).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_mod_s</TD>
|
||||
@ -251,7 +251,7 @@ static const syntax_option_type collate;
|
||||
character classes permitted.</P>
|
||||
<P>In addition some perl-style escape sequences are supported (actually the awk
|
||||
syntax requires \a \b \t \v \f \n and \r to be recognised, but other
|
||||
escape sequences invoke undefined behaviour according to the POSIX standard).</P>
|
||||
escape sequences invoke undefined behavior according to the POSIX standard).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
@ -324,9 +324,9 @@ static const syntax_option_type collate;
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by POSIX basic regular expressions in IEEE Std 1003.1-2001,
|
||||
Portable Operating System Interface (POSIX ), Base Definitions and Headers,
|
||||
Section 9, Regular Expressions (FWD.1).
|
||||
same as that used by <A href="syntax_basic.html#Basic">POSIX basic regular
|
||||
expressions</A> in IEEE Std 1003.1-2001, Portable Operating System Interface
|
||||
(POSIX ), Base Definitions and Headers, Section 9, Regular Expressions (FWD.1).
|
||||
</P>
|
||||
</TD>
|
||||
</TR>
|
||||
@ -340,13 +340,20 @@ static const syntax_option_type collate;
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by POSIX utility grep in IEEE Std 1003.1-2001, Portable
|
||||
Operating System Interface (POSIX ), Shells and Utilities, Section 4,
|
||||
Utilities, grep (FWD.1).</P>
|
||||
same as that used by <A href="syntax_basic.html#grep">POSIX utility grep</A> in
|
||||
IEEE Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, grep (FWD.1).</P>
|
||||
<P>That is to say, the same as POSIX basic syntax, but with the newline character
|
||||
acting as an alternation character in addition to "|".</P>
|
||||
acting as an alternation character; the expression is treated as a newline
|
||||
separated list of alternatives.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>emacs</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Specifies that the grammar recognised is the superset of the POSIX-Basic
|
||||
syntax used by the <A href="syntax_basic.html#emacs">emacs</A> program.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>The following options may also be set when using POSIX basic regular
|
||||
@ -390,7 +397,10 @@ static const syntax_option_type collate;
|
||||
<TD>collate</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that character ranges of the form "[a-b]" should be locale sensitive.</P>
|
||||
<P>Specifies that character ranges of the form "[a-b]" should be locale
|
||||
sensitive. <STRONG>This bit is</STRONG> <STRONG>on by default</STRONG> for
|
||||
POSIX-Basic regular expressions, but can be unset to force ranges to be
|
||||
compared by code point only.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
@ -398,7 +408,7 @@ static const syntax_option_type collate;
|
||||
<TD>No</TD>
|
||||
<TD>Specifies that the \n character has the same effect as the alternation
|
||||
operator |. Allows newline separated lists to be used as a list of
|
||||
alternatives.</TD>
|
||||
alternatives. This bit is already set, if you use the <EM>grep</EM> option.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_char_classes</TD>
|
||||
@ -482,3 +492,4 @@ static const syntax_option_type collate;
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
502
doc/Attic/syntax_perl.html
Normal file
502
doc/Attic/syntax_perl.html
Normal file
@ -0,0 +1,502 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Perl Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">
|
||||
Perl Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#Perl">Perl Syntax</A> <dt><A href="#variations">
|
||||
Variations</A>
|
||||
<dd>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#mods">Modifiers</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The Perl regular expression syntax is based on that used by the programming
|
||||
language <EM>Perl</EM> . Perl regular expressions are the default
|
||||
behavior in Boost.Regex or you can pass the flag <EM>perl</EM> to the
|
||||
regex constructor, for example:</P>
|
||||
<PRE>// e1 is a case sensitive Perl regular expression:
|
||||
// since Perl is the default option there's no need to explicitly specify the syntax used here:
|
||||
boost::regex e1(my_expression);
|
||||
// e2 a case insensitive Perl regular expression:
|
||||
boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);</PRE>
|
||||
<H3>Perl Regular Expression Syntax<A name="Perl"></A></H3>
|
||||
<P>In Perl regular expressions, all characters match themselves except for
|
||||
the following special characters:</P>
|
||||
<PRE>.[{()\*+?|^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line.</P>
|
||||
<P>A '$' character shall match the end of a line.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning ( and ending ) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or referred
|
||||
to by a back-reference.</P>
|
||||
<H4>Non-marking grouping:</H4>
|
||||
<P>A marked sub-expression is useful to lexically group part of a regular
|
||||
expression, but has the side-effect of spitting out an extra field in the
|
||||
result. As an alternative you can lexically group part of a regular
|
||||
expression, without generating a marked sub-expression by using (?: and ) , for
|
||||
example (?:ab)+ will repeat "ab" without splitting out any separate
|
||||
sub-expressions.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the *, +, ?, and {} operators.</P>
|
||||
<P>The * operator will match the preceding atom zero or more times, for example
|
||||
the expression a*b will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>The + operator will match the preceding atom one or more times, for example
|
||||
the expression a+b will match any of the following:</P>
|
||||
<PRE>ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>b</PRE>
|
||||
<P>The ? operator will match the preceding atom zero or one times, for
|
||||
example the expression ca?b will match any of the following:</P>
|
||||
<PRE>cb
|
||||
cab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>caab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a{n} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a{n,} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a{n, m} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a{2,3}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a(*)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Non greedy repeats</H4>
|
||||
<P>The normal repeat operators are "greedy", that is to say they will consume as
|
||||
much input as possible. There are non-greedy versions available that will
|
||||
consume as little input as possible while still producing a match.</P>
|
||||
<P>*? Matches the previous atom zero or more times, while consuming as little
|
||||
input as possible.</P>
|
||||
<P>+? Matches the previous atom one or more times, while consuming as little input
|
||||
as possible.</P>
|
||||
<P>?? Matches the previous atom zero or one times, while consuming as little input
|
||||
as possible.</P>
|
||||
<P>{n,}? Matches the previous atom <EM>n</EM> or more times, while consuming
|
||||
as little input as possible.</P>
|
||||
<P>{n,m}? Matches the previous atom between <EM>n</EM> and <EM>m</EM> times,
|
||||
while consuming as little input as possible.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^(a*).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<H4>Alternation</H4>
|
||||
<P>The | operator will match either of its arguments, so for example: abc|def will
|
||||
match either "abc" or "def".
|
||||
</P>
|
||||
<P>Parenthesis can be used to group alternations, for example: ab(d|ef) will match
|
||||
either of "abd" or "abef".</P>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Perl regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; this results in locale specific behavior. This behavior can
|
||||
be turned off by unsetting the <EM><A href="syntax_option_type.html#Perl">collate</A></EM>
|
||||
option flag - in which case whether a character appears within a range is
|
||||
determined by comparing the code points of the characters only</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the range "ae"-c, assuming that "ae" is treated
|
||||
as a single collating element in the current locale.</P>
|
||||
<P>As an extension, a collating element may also be specified via it's <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of theform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
<H5>Escapes:</H5>
|
||||
<P>All the escape sequences that match a single character, or a single character
|
||||
class are permitted within a character class definition, <EM>except</EM> the
|
||||
negated character classes (\D \W etc).</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Operator precedence</H4>
|
||||
<P> The order of precedence for of operators is as shown in the following
|
||||
table:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Collation-related bracket symbols</TD>
|
||||
<TD>[==] [::] [..]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Escaped characters
|
||||
</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Character set (bracket expression)
|
||||
</TD>
|
||||
<TD>[]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Grouping</TD>
|
||||
<TD>()</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Single-character-ERE duplication
|
||||
</TD>
|
||||
<TD>* + ? {m,n}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Concatenation</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Anchoring</TD>
|
||||
<TD>^$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Alternation</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>Any special character preceded by an escape shall match itself.
|
||||
</P>
|
||||
<P>The following escape sequences are also supported:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Escapes matching a specific character</H5>
|
||||
<P>The following escape sequences are all synonyms for single characters:</P>
|
||||
<P>
|
||||
<TABLE id="Table7" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>'\a'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>0x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>\b (but only inside a character class declaration).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>An ASCII escape sequence - the character whose code point is X % 32</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xdd</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{dddd}</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\0ddd</TD>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
<P>Any escaped character <EM>x</EM>, if <EM>x</EM> is the name of a character
|
||||
class shall match any character that is a member of that class, and any escaped
|
||||
character <EM>X</EM>, if <EM>x</EM> is the name of a character class, shall
|
||||
match any character not in that class.</P>
|
||||
<P>The following are supported by default:</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="300" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape sequence</STRONG></TD>
|
||||
<TD><STRONG>Equivalent to</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\d</TD>
|
||||
<TD>[[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\l</TD>
|
||||
<TD>[[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\s</TD>
|
||||
<TD>[[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\u</TD>
|
||||
<TD>[[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\w</TD>
|
||||
<TD>[[:word:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>[^[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\L</TD>
|
||||
<TD>[^[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\S</TD>
|
||||
<TD>[^[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\U</TD>
|
||||
<TD>[^[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\W</TD>
|
||||
<TD>[^[:word:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\<</TD>
|
||||
<TD>Matches the start of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\></TD>
|
||||
<TD>Matches the end of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>Matches a word boundary (the start or end of a word).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\B</TD>
|
||||
<TD>Matches only when not at a word boundary.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Buffer boundaries</H5>
|
||||
<P>The following match only at buffer boundaries: a "buffer" in this context is
|
||||
the whole of the input text that is being matched against (note that ^ and
|
||||
$ may match embedded newlines within the text).</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\`</TD>
|
||||
<TD>Matches at the start of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\'</TD>
|
||||
<TD>Matches at the end of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\A</TD>
|
||||
<TD>Matches at the start of a buffer only (the same as \`).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\z</TD>
|
||||
<TD>Matches at the end of a buffer only (the same as \').</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\Z</TD>
|
||||
<TD>Matches an optional sequence of newlines at the end of a buffer: equivalent to
|
||||
the regular expression \n*\z</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Continuation Escape</H5>
|
||||
<P>The sequence \G matches only at the end of the last match found, or at the
|
||||
start of the text being matched if no previous match was found. This
|
||||
escape useful if you're iterating over the matches contained within a text, and
|
||||
you want each subsequence match to start where the last one ended.</P>
|
||||
<H5>Quoting escape</H5>
|
||||
<P>The escape sequence \Q begins a "quoted sequence": all the subsequent
|
||||
characters are treated as literals, until either the end of the regular
|
||||
expression or \E is found. For example the expression: \Q\*+\Ea+ would
|
||||
match either of:</P>
|
||||
<PRE>\*+a<BR>\*+aaa</PRE>
|
||||
<H5>Unicode escapes</H5>
|
||||
<P>
|
||||
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\C</TD>
|
||||
<TD>Matches a single code point: in Boost regex this has exactly the same effect
|
||||
as a "." operator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\X</TD>
|
||||
<TD>Matches a combining character sequence: that is any non-combining character
|
||||
followed by a sequence of zero or more combining characters.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Any other escape</H5>
|
||||
<P>Any other escape sequence matches the character that is escaped, for example \@
|
||||
matches a literal <A href="mailto:'@'">'@'</A>.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H4 dir="ltr">Perl Extended Patterns</H4>
|
||||
<P dir="ltr">Perl-specific extensions to the regular expression syntax all start
|
||||
with (?.</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5 dir="ltr">Comments</H5>
|
||||
<P dir="ltr">(?# ... ) is treated as a comment, it's contents are ignored.</P>
|
||||
<H5 dir="ltr">Modifiers</H5>
|
||||
<P dir="ltr">(?imsx-imsx ... ) alters which of the perl modifiers are in effect
|
||||
within the pattern, changes take effect from the point that the block is first
|
||||
seen and extend to any enclosing ). Letters before a '-' turn that perl
|
||||
modifier on, letters afterward, turn it off.</P>
|
||||
<P dir="ltr">(?imsx-imsx:pattern) applies the specified modifiers to <EM>pattern</EM>
|
||||
only.</P>
|
||||
<H5 dir="ltr">Non-marking grouping</H5>
|
||||
<P dir="ltr">(?:pattern) lexically groups <EM>pattern</EM>, without generating an
|
||||
additional sub-expression.</P>
|
||||
<H5 dir="ltr">Lookahead</H5>
|
||||
<P dir="ltr">(?=pattern) consumes zero characters, only if <EM>pattern</EM> matches.</P>
|
||||
<P dir="ltr">(?!pattern) consumes zero characters, only if <EM>pattern</EM> does
|
||||
not match.</P>
|
||||
<H5 dir="ltr">Lookbehind</H5>
|
||||
<P dir="ltr">(?<=pattern) consumes zero characters, only if <EM>pattern</EM> could
|
||||
be matched against the characters preceding the current position (<EM>pattern</EM>
|
||||
must be of fixed length).</P>
|
||||
<P dir="ltr">(?<!pattern) consumes zero characters, only if <EM>pattern</EM> could
|
||||
not be matched against the characters preceding the current position (<EM>pattern</EM>
|
||||
must be of fixed length).</P>
|
||||
<H5 dir="ltr">Independent sub-expressions</H5>
|
||||
<P dir="ltr">(?>pattern) <EM>pattern</EM> is matched independently of the
|
||||
surrounding patterns, the expression will never backtrack into <EM>pattern</EM>.</P>
|
||||
<H5 dir="ltr">Conditional Expressions</H5>
|
||||
<P dir="ltr">(?(condition)yes-pattern|no-pattern) attempts to match <EM>yes-pattern</EM>
|
||||
if the <EM>condition </EM>is true, otherwise attempts to match <EM>no-pattern</EM>.</P>
|
||||
<P dir="ltr">(?(condition)yes-pattern) attempts to match <EM>yes-pattern</EM> if
|
||||
the <EM>condition </EM>is true, otherwise fails.</P>
|
||||
<P dir="ltr"><EM>Condition</EM> may be either a forward lookahead assert, or the
|
||||
index of a marked sub-expression (the condition becomes true if the
|
||||
sub-expression has been matched).</P>
|
||||
</BLOCKQUOTE>
|
||||
<H3><A name="variations"></A>Variations</H3>
|
||||
<P>The options <A href="syntax_option_type.html#perl"><EM>normal, ECMAScript, JavaScript</EM>
|
||||
and <EM>JScript</EM></A> are all synonyms for <EM>Perl</EM>.</P>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#Perl">variety of flags</A> that
|
||||
may be combined with the <EM>Perl</EM> option when constructing the regular
|
||||
expression, in particular note that the <A href="syntax_option_type.html#Perl">newline_alt</A>
|
||||
option alters the syntax, while the <A href="syntax_option_type.html#Perl">collate,
|
||||
nosubs and icase</A> options modify how the case and locale sensitivity
|
||||
are to be applied.</P>
|
||||
<H3><A name="mods"></A>Modifiers</H3>
|
||||
<P>The perl <EM>smix</EM> modifiers can either be applied using a (?smix-smix)
|
||||
prefix to the regular expression, or with one of the regex-compile time flags <EM><A href="syntax_option_type.html#Perl">
|
||||
no_mod_m, mod_x, mod_s, and no_mod_s</A></EM>.
|
||||
</P>
|
||||
<H3><A name="refs">References</H3>
|
||||
<P><A href="http://www.perldoc.com/perl5.6/pod/perlre.html"> Perl 5.6.</A></P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
||||
|
144
doc/character_class_names.html
Normal file
144
doc/character_class_names.html
Normal file
@ -0,0 +1,144 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Character Class Names</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Character Class Names.</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>The following character class names are support by Boost.Regex:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Name</STRONG></TD>
|
||||
<TD><STRONG>POSIX-standard name</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alnum</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any alpha-numeric character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alpha</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any alphabetic character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>blank</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any whitespace character that is not a line separator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>cntrl</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any control character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>d</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any decimal digit</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>digit</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any decimal digit.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>graph</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any graphical character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>l</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any lower case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>lower</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any lower case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>print</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any printable character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>punct</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any punctuation character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>s</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any whitespace character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>space</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any whitespace character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>unicode</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any extended character whose code point is above 255 in value.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>u</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any upper case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>upper</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any upper case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>w</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any word character (alphanumeric characters plus the underscore).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>word</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any word character (alphanumeric characters plus the underscore).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>xdigit</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any hexadecimal digit character.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
09 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
358
doc/collating_names.html
Normal file
358
doc/collating_names.html
Normal file
@ -0,0 +1,358 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Collating Element Names</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Collating Element Names</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>
|
||||
The following are treated as valid digraphs when used as a collating name:</P>
|
||||
<P>"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj",
|
||||
"Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".</P>
|
||||
<P>The following symbolic names are recognised as valid collating element names,
|
||||
in addition to any single character:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="50%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Name</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>NUL</TD>
|
||||
<TD>\x00</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SOH</TD>
|
||||
<TD>\x01</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>STX</TD>
|
||||
<TD>\x02</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ETX</TD>
|
||||
<TD>\x03</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>EOT</TD>
|
||||
<TD>\x04</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ENQ</TD>
|
||||
<TD>\x05</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ACK</TD>
|
||||
<TD>\x06</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alert</TD>
|
||||
<TD>\x07</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>backspace</TD>
|
||||
<TD>\x08</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>tab</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>newline</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>vertical-tab</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>form-feed</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>carriage-return</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SO</TD>
|
||||
<TD>\xE</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SI</TD>
|
||||
<TD>\xF</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DLE</TD>
|
||||
<TD>\x10</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC1</TD>
|
||||
<TD>\x11</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC2</TD>
|
||||
<TD>\x12</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC3</TD>
|
||||
<TD>\x13</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC4</TD>
|
||||
<TD>\x14</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>NAK</TD>
|
||||
<TD>\x15</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SYN</TD>
|
||||
<TD>\x16</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ETB</TD>
|
||||
<TD>\x17</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>CAN</TD>
|
||||
<TD>\x18</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>EM</TD>
|
||||
<TD>\x19</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SUB</TD>
|
||||
<TD>\x1A</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ESC</TD>
|
||||
<TD>\x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS4</TD>
|
||||
<TD>\x1C</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS3</TD>
|
||||
<TD>\x1D</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS2</TD>
|
||||
<TD>\x1E</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS1</TD>
|
||||
<TD>\x1F</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>space</TD>
|
||||
<TD>\x20</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>exclamation-mark</TD>
|
||||
<TD>!</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>quotation-mark</TD>
|
||||
<TD>"</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>number-sign</TD>
|
||||
<TD>#</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>dollar-sign</TD>
|
||||
<TD>$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>percent-sign</TD>
|
||||
<TD>%</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ampersand</TD>
|
||||
<TD>&</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>apostrophe</TD>
|
||||
<TD>'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-parenthesis</TD>
|
||||
<TD>(</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-parenthesis</TD>
|
||||
<TD>)</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>asterisk</TD>
|
||||
<TD>*</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>plus-sign</TD>
|
||||
<TD>+</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>comma</TD>
|
||||
<TD>,</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>hyphen</TD>
|
||||
<TD>-</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>period</TD>
|
||||
<TD>.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>slash</TD>
|
||||
<TD>/</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>zero</TD>
|
||||
<TD>0</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>one</TD>
|
||||
<TD>1</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>two</TD>
|
||||
<TD>2</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>three</TD>
|
||||
<TD>3</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>four</TD>
|
||||
<TD>4</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>five</TD>
|
||||
<TD>5</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>six</TD>
|
||||
<TD>6</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>seven</TD>
|
||||
<TD>7</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>eight</TD>
|
||||
<TD>8</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>nine</TD>
|
||||
<TD>9</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>colon</TD>
|
||||
<TD>:</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>semicolon</TD>
|
||||
<TD>;</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>less-than-sign</TD>
|
||||
<TD><</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>equals-sign</TD>
|
||||
<TD>=</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>greater-than-sign</TD>
|
||||
<TD>></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>question-mark</TD>
|
||||
<TD>?</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>commercial-at</TD>
|
||||
<TD>@</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-square-bracket</TD>
|
||||
<TD>[</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>backslash</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-square-bracket</TD>
|
||||
<TD>]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>circumflex</TD>
|
||||
<TD>~</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>underscore</TD>
|
||||
<TD>_</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>grave-accent</TD>
|
||||
<TD>`</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-curly-bracket</TD>
|
||||
<TD>{</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>vertical-line</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-curly-bracket</TD>
|
||||
<TD>}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>tilde</TD>
|
||||
<TD>~</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DEL</TD>
|
||||
<TD>\x7F</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -36,6 +36,20 @@
|
||||
Sun Forte Compiler</a> <dt><a href="install.html#other">Other compilers (building
|
||||
with bjam)</a></dt>
|
||||
</dl>
|
||||
<dt>Backgrounders
|
||||
<dd>
|
||||
<dl class="index">
|
||||
<dt><a href="syntax.html">Regular Expression Syntax</a>
|
||||
<dd>
|
||||
<dl class="index">
|
||||
<dt><a href="syntax_perl.html">Perl Regular Expressions</a></dt>
|
||||
<dt><a href="syntax_extended.html">POSIX-Extended Regular Expressions</a></dt>
|
||||
<dt><a href="syntax_basic.html">POSIX-Basic Regular Expressions</a></dt>
|
||||
</dl>
|
||||
</dd>
|
||||
</dt>
|
||||
</dl>
|
||||
</dd></dt>
|
||||
<dt>Reference
|
||||
<dd>
|
||||
<dl class="index">
|
||||
|
751
doc/syntax.html
751
doc/syntax.html
@ -25,742 +25,29 @@
|
||||
<HR>
|
||||
<P>This section covers the regular expression syntax used by this library, this is
|
||||
a programmers guide, the actual syntax presented to your program's users will
|
||||
depend upon the flags used during expression compilation.
|
||||
</P>
|
||||
<H3>Literals
|
||||
</H3>
|
||||
<P>All characters are literals except: ".", "|", "*", "?", "+", "(", ")", "{",
|
||||
"}", "[", "]", "^", "$" and "\". These characters are literals when preceded by
|
||||
a "\". A literal is a character that matches itself, or matches the result of
|
||||
traits_type::translate(), where traits_type is the traits template parameter to
|
||||
class basic_regex.</P>
|
||||
<H3>Wildcard
|
||||
</H3>
|
||||
<P>The dot character "." matches any single character except : when <I>match_not_dot_null</I>
|
||||
is passed to the matching algorithms, the dot does not match a null character;
|
||||
when <I>match_not_dot_newline</I> is passed to the matching algorithms, then
|
||||
the dot does not match a newline character.
|
||||
</P>
|
||||
<H3>Repeats
|
||||
</H3>
|
||||
<P>A repeat is an expression that is repeated an arbitrary number of times. An
|
||||
expression followed by "*" can be repeated any number of times including zero.
|
||||
An expression followed by "+" can be repeated any number of times, but at least
|
||||
once, if the expression is compiled with the flag regex_constants::bk_plus_qm
|
||||
then "+" is an ordinary character and "\+" represents a repeat of once or more.
|
||||
An expression followed by "?" may be repeated zero or one times only, if the
|
||||
expression is compiled with the flag regex_constants::bk_plus_qm then "?" is an
|
||||
ordinary character and "\?" represents the repeat zero or once operator. When
|
||||
it is necessary to specify the minimum and maximum number of repeats
|
||||
explicitly, the bounds operator "{}" may be used, thus "a{2}" is the letter "a"
|
||||
repeated exactly twice, "a{2,4}" represents the letter "a" repeated between 2
|
||||
and 4 times, and "a{2,}" represents the letter "a" repeated at least twice with
|
||||
no upper limit. Note that there must be no white-space inside the {}, and there
|
||||
is no upper limit on the values of the lower and upper bounds. When the
|
||||
expression is compiled with the flag regex_constants::bk_braces then "{" and
|
||||
"}" are ordinary characters and "\{" and "\}" are used to delimit bounds
|
||||
instead. All repeat expressions refer to the shortest possible previous
|
||||
sub-expression: a single character; a character set, or a sub-expression
|
||||
grouped with "()" for example.
|
||||
</P>
|
||||
<P>Examples:
|
||||
</P>
|
||||
<P>"ba*" will match all of "b", "ba", "baaa" etc.
|
||||
</P>
|
||||
<P>"ba+" will match "ba" or "baaaa" for example but not "b".
|
||||
</P>
|
||||
<P>"ba?" will match "b" or "ba".
|
||||
</P>
|
||||
<P>"ba{2,4}" will match "baa", "baaa" and "baaaa".
|
||||
</P>
|
||||
<H3>Non-greedy repeats
|
||||
</H3>
|
||||
<P>Whenever the "extended" regular expression syntax is in use (the default) then
|
||||
non-greedy repeats are possible by appending a '?' after the repeat; a
|
||||
non-greedy repeat is one which will match the <I>shortest</I> possible string.
|
||||
</P>
|
||||
<P>For example to match html tag pairs one could use something like:
|
||||
</P>
|
||||
<P>"<\s*tagname[^>]*>(.*?)<\s*/tagname\s*>"
|
||||
</P>
|
||||
<P>In this case $1 will contain the text between the tag pairs, and will be the
|
||||
shortest possible matching string.
|
||||
</P>
|
||||
<H3>Parenthesis
|
||||
</H3>
|
||||
<P>Parentheses serve two purposes, to group items together into a sub-expression,
|
||||
and to mark what generated the match. For example the expression "(ab)*" would
|
||||
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
|
||||
regex_match</A> and <A href="regex_search.html">regex_search</A> each take
|
||||
an instance of <A href="match_results.html">match_results</A> that reports what
|
||||
caused the match, on exit from these functions the <A href="match_results.html">match_results</A>
|
||||
contains information both on what the whole expression matched and on what each
|
||||
sub-expression matched. In the example above match_results[1] would contain a
|
||||
pair of iterators denoting the final "ab" of the matching string. It is
|
||||
permissible for sub-expressions to match null strings. If a sub-expression
|
||||
takes no part in a match - for example if it is part of an alternative that is
|
||||
not taken - then both of the iterators that are returned for that
|
||||
sub-expression point to the end of the input string, and the <I>matched</I> parameter
|
||||
for that sub-expression is <I>false</I>. Sub-expressions are indexed from left
|
||||
to right starting from 1, sub-expression 0 is the whole expression.
|
||||
</P>
|
||||
<H3>Non-Marking Parenthesis
|
||||
</H3>
|
||||
<P>Sometimes you need to group sub-expressions with parenthesis, but don't want
|
||||
the parenthesis to spit out another marked sub-expression, in this case a
|
||||
non-marking parenthesis (?:expression) can be used. For example the following
|
||||
expression creates no sub-expressions:
|
||||
</P>
|
||||
<P>"(?:abc)*"</P>
|
||||
<H3>Forward Lookahead Asserts
|
||||
</H3>
|
||||
<P>There are two forms of these; one for positive forward lookahead asserts, and
|
||||
one for negative lookahead asserts:</P>
|
||||
<P>"(?=abc)" matches zero characters only if they are followed by the expression
|
||||
"abc".</P>
|
||||
<P>"(?!abc)" matches zero characters only if they are not followed by the
|
||||
expression "abc".</P>
|
||||
<H3>Independent sub-expressions</H3>
|
||||
<P>"(?>expression)" matches "expression" as an independent atom (the algorithm
|
||||
will not backtrack into it if a failure occurs later in the expression).</P>
|
||||
<H3>Alternatives
|
||||
</H3>
|
||||
<P>Alternatives occur when the expression can match either one sub-expression or
|
||||
another, each alternative is separated by a "|", or a "\|" if the flag
|
||||
regex_constants::bk_vbar is set, or by a newline character if the flag
|
||||
regex_constants::newline_alt is set. Each alternative is the largest possible
|
||||
previous sub-expression; this is the opposite behavior from repetition
|
||||
operators.
|
||||
</P>
|
||||
<P>Examples:
|
||||
</P>
|
||||
<P>"a(b|c)" could match "ab" or "ac".
|
||||
</P>
|
||||
<P>"abc|def" could match "abc" or "def".
|
||||
</P>
|
||||
<H3>Sets
|
||||
</H3>
|
||||
<P>A set is a set of characters that can match any single character that is a
|
||||
member of the set. Sets are delimited by "[" and "]" and can contain literals,
|
||||
character ranges, character classes, collating elements and equivalence
|
||||
classes. Set declarations that start with "^" contain the complement of the
|
||||
elements that follow.
|
||||
</P>
|
||||
<P>Examples:
|
||||
</P>
|
||||
<P>Character literals:
|
||||
</P>
|
||||
<P>"[abc]" will match either of "a", "b", or "c".
|
||||
</P>
|
||||
<P>"[^abc] will match any character other than "a", "b", or "c".
|
||||
</P>
|
||||
<P>Character ranges:
|
||||
</P>
|
||||
<P>"[a-z]" will match any character in the range "a" to "z".
|
||||
</P>
|
||||
<P>"[^A-Z]" will match any character other than those in the range "A" to "Z".
|
||||
</P>
|
||||
<P>Note that character ranges are highly locale dependent if the flag
|
||||
regex_constants::collate is set: they match any character that collates between
|
||||
the endpoints of the range, ranges will only behave according to ASCII rules
|
||||
when the default "C" locale is in effect. For example if the library is
|
||||
compiled with the Win32 localization model, then [a-z] will match the ASCII
|
||||
characters a-z, and also 'A', 'B' etc, but not 'Z' which collates just after
|
||||
'z'. This locale specific behavior is disabled by default (in perl mode), and
|
||||
forces ranges to collate according to ASCII character code.
|
||||
</P>
|
||||
<P>Character classes are denoted using the syntax "[:classname:]" within a set
|
||||
declaration, for example "[[:space:]]" is the set of all whitespace characters.
|
||||
Character classes are only available if the flag regex_constants::char_classes
|
||||
is set. The available character classes are:
|
||||
<BR>
|
||||
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="50%">alnum</TD>
|
||||
<TD vAlign="top" width="50%">Any alpha numeric character.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">alpha</TD>
|
||||
<TD vAlign="top" width="50%">Any alphabetical character a-z and A-Z. Other
|
||||
characters may also be included depending upon the locale.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">blank</TD>
|
||||
<TD vAlign="top" width="50%">Any blank character, either a space or a tab.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">cntrl</TD>
|
||||
<TD vAlign="top" width="50%">Any control character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">digit</TD>
|
||||
<TD vAlign="top" width="50%">Any digit 0-9.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">graph</TD>
|
||||
<TD vAlign="top" width="50%">Any graphical character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">lower</TD>
|
||||
<TD vAlign="top" width="50%">Any lower case character a-z. Other characters may
|
||||
also be included depending upon the locale.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">print</TD>
|
||||
<TD vAlign="top" width="50%">Any printable character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">punct</TD>
|
||||
<TD vAlign="top" width="50%">Any punctuation character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">space</TD>
|
||||
<TD vAlign="top" width="50%">Any whitespace character.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">upper</TD>
|
||||
<TD vAlign="top" width="50%">Any upper case character A-Z. Other characters may
|
||||
also be included depending upon the locale.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">xdigit</TD>
|
||||
<TD vAlign="top" width="50%">Any hexadecimal digit character, 0-9, a-f and A-F.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">word</TD>
|
||||
<TD vAlign="top" width="50%">Any word character - all alphanumeric characters plus
|
||||
the underscore.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">Unicode</TD>
|
||||
<TD vAlign="top" width="50%">Any character whose code is greater than 255, this
|
||||
applies to the wide character traits classes only.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>There are some shortcuts that can be used in place of the character classes,
|
||||
provided the flag regex_constants::escape_in_lists is set then you can use:
|
||||
</P>
|
||||
<P>\w in place of [:word:]
|
||||
</P>
|
||||
<P>\s in place of [:space:]
|
||||
</P>
|
||||
<P>\d in place of [:digit:]
|
||||
</P>
|
||||
<P>\l in place of [:lower:]
|
||||
</P>
|
||||
<P>\u in place of [:upper:]
|
||||
</P>
|
||||
<P>Collating elements take the general form [.tagname.] inside a set declaration,
|
||||
where <I>tagname</I> is either a single character, or a name of a collating
|
||||
element, for example [[.a.]] is equivalent to [a], and [[.comma.]] is
|
||||
equivalent to [,]. The library supports all the standard POSIX collating
|
||||
element names, and in addition the following digraphs: "ae", "ch", "ll", "ss",
|
||||
"nj", "dz", "lj", each in lower, upper and title case variations.
|
||||
Multi-character collating elements can result in the set matching more than one
|
||||
character, for example [[.ae.]] would match two characters, but note that
|
||||
[^[.ae.]] would only match one character.
|
||||
</P>
|
||||
<P>
|
||||
Equivalence classes take the generalform[=tagname=] inside a set declaration,
|
||||
where <I>tagname</I> is either a single character, or a name of a collating
|
||||
element, and matches any character that is a member of the same primary
|
||||
equivalence class as the collating element [.tagname.]. An equivalence class is
|
||||
a set of characters that collate the same, a primary equivalence class is a set
|
||||
of characters whose primary sort key are all the same (for example strings are
|
||||
typically collated by character, then by accent, and then by case; the primary
|
||||
sort key then relates to the character, the secondary to the accentation, and
|
||||
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
|
||||
,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
||||
locale independent method of obtaining the primary sort key for a character,
|
||||
except under Win32. For other operating systems the library will "guess" the
|
||||
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
|
||||
equivalence classes are probably best considered broken under any operating
|
||||
system other than Win32.
|
||||
</P>
|
||||
<P>To include a literal "-" in a set declaration then: make it the first character
|
||||
after the opening "[" or "[^", the endpoint of a range, a collating element, or
|
||||
if the flag regex_constants::escape_in_lists is set then precede with an escape
|
||||
character as in "[\-]". To include a literal "[" or "]" or "^" in a set then
|
||||
make them the endpoint of a range, a collating element, or precede with an
|
||||
escape character if the flag regex_constants::escape_in_lists is set.
|
||||
</P>
|
||||
<H3>Line anchors
|
||||
</H3>
|
||||
<P>An anchor is something that matches the null string at the start or end of a
|
||||
line: "^" matches the null string at the start of a line, "$" matches the null
|
||||
string at the end of a line.
|
||||
</P>
|
||||
<H3>Back references
|
||||
</H3>
|
||||
<P>A back reference is a reference to a previous sub-expression that has already
|
||||
been matched, the reference is to what the sub-expression matched, not to the
|
||||
expression itself. A back reference consists of the escape character "\"
|
||||
followed by a digit "1" to "9", "\1" refers to the first sub-expression, "\2"
|
||||
to the second etc. For example the expression "(.*)\1" matches any string that
|
||||
is repeated about its mid-point for example "abcabc" or "xyzxyz". A back
|
||||
reference to a sub-expression that did not participate in any match, matches
|
||||
the null string: NB this is different to some other regular expression
|
||||
matchers. Back references are only available if the expression is compiled with
|
||||
the flag regex_constants::bk_refs set.
|
||||
</P>
|
||||
<H3>Characters by code
|
||||
</H3>
|
||||
<P>This is an extension to the algorithm that is not available in other libraries,
|
||||
it consists of the escape character followed by the digit "0" followed by the
|
||||
octal character code. For example "\023" represents the character whose octal
|
||||
code is 23. Where ambiguity could occur use parentheses to break the expression
|
||||
up: "\0103" represents the character whose code is 103, "(\010)3 represents the
|
||||
character 10 followed by "3". To match characters by their hexadecimal code,
|
||||
use \x followed by a string of hexadecimal digits, optionally enclosed inside
|
||||
{}, for example \xf0 or \x{aff}, notice the latter example is a Unicode
|
||||
character.</P>
|
||||
<H3>Word operators
|
||||
</H3>
|
||||
<P>The following operators are provided for compatibility with the GNU regular
|
||||
expression library.
|
||||
</P>
|
||||
<P>"\w" matches any single character that is a member of the "word" character
|
||||
class, this is identical to the expression "[[:word:]]".
|
||||
</P>
|
||||
<P>"\W" matches any single character that is not a member of the "word" character
|
||||
class, this is identical to the expression "[^[:word:]]".
|
||||
</P>
|
||||
<P>"\<" matches the null string at the start of a word.
|
||||
</P>
|
||||
<P>"\>" matches the null string at the end of the word.
|
||||
</P>
|
||||
<P>"\b" matches the null string at either the start or the end of a word.
|
||||
</P>
|
||||
<P>"\B" matches a null string within a word.
|
||||
</P>
|
||||
<P>The start of the sequence passed to the matching algorithms is considered to be
|
||||
a potential start of a word unless the flag match_not_bow is set. The end of
|
||||
the sequence passed to the matching algorithms is considered to be a potential
|
||||
end of a word unless the flag match_not_eow is set.
|
||||
</P>
|
||||
<H3>Buffer operators
|
||||
</H3>
|
||||
<P>The following operators are provided for compatibility with the GNU regular
|
||||
expression library, and Perl regular expressions:
|
||||
</P>
|
||||
<P>"\`" matches the start of a buffer.
|
||||
</P>
|
||||
<P>"\A" matches the start of the buffer.
|
||||
</P>
|
||||
<P>"\'" matches the end of a buffer.
|
||||
</P>
|
||||
<P>"\z" matches the end of a buffer.
|
||||
</P>
|
||||
<P>"\Z" matches the end of a buffer, or possibly one or more new line characters
|
||||
followed by the end of the buffer.
|
||||
</P>
|
||||
<P>A buffer is considered to consist of the whole sequence passed to the matching
|
||||
algorithms, unless the flags match_not_bob or match_not_eob are set.
|
||||
</P>
|
||||
<H3>Escape operator
|
||||
</H3>
|
||||
<P>The escape character "\" has several meanings.
|
||||
</P>
|
||||
<P>Inside a set declaration the escape character is a normal character unless the
|
||||
flag regex_constants::escape_in_lists is set in which case whatever follows the
|
||||
escape is a literal character regardless of its normal meaning.
|
||||
</P>
|
||||
<P>The escape operator may introduce an operator for example: back references, or
|
||||
a word operator.
|
||||
</P>
|
||||
<P>The escape operator may make the following character normal, for example "\*"
|
||||
represents a literal "*" rather than the repeat operator.
|
||||
</P>
|
||||
<H4>Single character escape sequences
|
||||
</H4>
|
||||
<P>The following escape sequences are aliases for single characters:
|
||||
<BR>
|
||||
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="33%">Escape sequence
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Character code
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Meaning
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\a
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x07
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Bell character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\f
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x0C
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Form feed.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\n
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x0A
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Newline character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\r
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x0D
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Carriage return.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\t
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x09
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Tab character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\v
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x0B
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">Vertical tab.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\e
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0x1B
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">ASCII Escape character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\0dd
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0dd
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">An octal character code, where <I>dd</I> is one or
|
||||
more octal digits.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\xXX
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0xXX
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">A hexadecimal character code, where XX is one or more
|
||||
hexadecimal digits.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\x{XX}
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">0xXX
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">A hexadecimal character code, where XX is one or more
|
||||
hexadecimal digits, optionally a Unicode character.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="33%">\cZ
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">z-@
|
||||
</TD>
|
||||
<TD vAlign="top" width="33%">An ASCII escape sequence control-Z, where Z is any
|
||||
ASCII character greater than or equal to the character code for '@'.
|
||||
</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4>Miscellaneous escape sequences:
|
||||
</H4>
|
||||
<P>The following are provided mostly for perl compatibility, but note that there
|
||||
are some differences in the meanings of \l \L \u and \U:
|
||||
<BR>
|
||||
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="0" cellPadding="6" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\w
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:word:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\W
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:word:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\s
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:space:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\S
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:space:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\d
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:digit:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\D
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:digit:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\l
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:lower:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\L
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:lower:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\u
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [[:upper:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\U
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Equivalent to [^[:upper:]].
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\C
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Any single character, equivalent to '.'.
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\X
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">Match any Unicode combining character sequence, for
|
||||
example "a\x 0301" (a letter a with an acute).
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\Q
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">The begin quote operator, everything that follows is
|
||||
treated as a literal character until a \E end quote operator is found.
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">\E
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">The end quote operator, terminates a sequence begun
|
||||
with \Q.
|
||||
</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3>What gets matched?
|
||||
</H3>
|
||||
<P>
|
||||
When the expression is compiled as a Perl-compatible regex then the matching
|
||||
algorithms will perform a depth first search on the state machine and report
|
||||
the first match found.</P>
|
||||
<P>
|
||||
When the expression is compiled as a POSIX-compatible regex then the matching
|
||||
algorithms will match the first possible matching string, if more than one
|
||||
string starting at a given location can match then it matches the longest
|
||||
possible string, unless the flag match_any is set, in which case the first
|
||||
match encountered is returned. Use of the match_any option can reduce the time
|
||||
taken to find the match - but is only useful if the user is less concerned
|
||||
about what matched - for example it would not be suitable for search and
|
||||
replace operations. In cases where their are multiple possible matches all
|
||||
starting at the same location, and all of the same length, then the match
|
||||
chosen is the one with the longest first sub-expression, if that is the same
|
||||
for two or more matches, then the second sub-expression will be examined and so
|
||||
on.
|
||||
</P>
|
||||
<P>
|
||||
The following table examples illustrate the main differences between Perl and
|
||||
POSIX regular expression matching rules:
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<TBODY>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Expression</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Text</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>POSIX leftmost longest match</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>ECMAScript depth first search match</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>a|ab</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> xaby</CODE>
|
||||
</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "ab"</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "a"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> .*([[:alnum:]]+).*</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> " abc def xyz "</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "abc"</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "z"</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> .*(a|xayy)</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> zzxayyzz</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "zzxayy"</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>"zzxa"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TBODY></CODE></TD></TR></TABLE>
|
||||
<P>These differences between Perl matching rules, and POSIX matching rules, mean
|
||||
that these two regular expression syntaxes differ not only in the features
|
||||
offered, but also in the form that the state machine takes and/or the
|
||||
algorithms used to traverse the state machine.</P>
|
||||
depend upon the <A href="syntax_option_type.html">flags</A> used during
|
||||
expression compilation.
|
||||
</P>
|
||||
<P>There are three main syntax options available, depending upon how
|
||||
you construct the regular expression object:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
<A href="syntax_perl.html">Perl</A> (this is the default behavior).</LI>
|
||||
<LI>
|
||||
<A href="syntax_extended.html">POSIX extended</A> (including the <A href="syntax_extended.html#egrep">
|
||||
egrep</A> and <A href="syntax_extended.html#awk">awk</A> variations).</LI>
|
||||
<LI>
|
||||
<A href="syntax_basic.html">POSIX Basic</A> (including the <A href="syntax_basic.html#grep">
|
||||
grep</A> and <A href="syntax_basic.html#emacs">emacs</A> variations).</LI></UL>
|
||||
<P>You can also construct a regular expression that treats every character as a <A href="syntax_option_type.html#literals">
|
||||
literal</A>, but that's not really a "syntax"!</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
10 Sept 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
226
doc/syntax_basic.html
Normal file
226
doc/syntax_basic.html
Normal file
@ -0,0 +1,226 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: POSIX-Basic Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">POSIX Basic Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#Basic">POSIX Basic Syntax</A> <dt><A href="#variations">
|
||||
Variations</A>
|
||||
<dd>
|
||||
<dl>
|
||||
<dt><A href="#grep">Grep</A> <dt><A href="#emacs">Emacs</A></dt>
|
||||
</dl>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The POSIX-Basic regular expression syntax is used by the Unix utility <EM>sed</EM>,
|
||||
and variations are used by <EM>grep</EM> and <EM>emacs</EM>. You can
|
||||
construct POSIX basic regular expressions in Boost.Regex by passing the flag <EM>basic</EM>
|
||||
to the regex constructor, for example:</P>
|
||||
<PRE>// e1 is a case sensitive POSIX-Basic expression:
|
||||
boost::regex e1(my_expression, boost::regex::basic);
|
||||
// e2 a case insensitive POSIX-Basic expression:
|
||||
boost::regex e2(my_expression, boost::regex::basic|boost::regex::icase);</PRE>
|
||||
<H3>POSIX Basic Syntax<A name="Basic"></A></H3>
|
||||
<P>In POSIX-Basic regular expressions, all characters are match themselves except
|
||||
for the following special characters:</P>
|
||||
<PRE>.[\*^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line when used as the first
|
||||
character of an expression, or the first character of a sub-expression.</P>
|
||||
<P>A '$' character shall match the end of a line when used as the last character
|
||||
of an expression, or the last character of a sub-expression.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning \( and ending \) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or referred-to by a back-reference.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the * operator.</P>
|
||||
<P>For example a* will match any number of letter a's repeated zero or more times
|
||||
(an atom repeated zero times matches an empty string), so the expression a*b
|
||||
will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a\{n\} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a\{n,\} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a\{n, m\} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a\{2,3\}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a\(*\)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^\(a*\).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Basic regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; this results in locale specific behavior. This behavior can
|
||||
be turned off by unsetting the <EM><A href="syntax_option_type.html#basic">collate</A></EM>
|
||||
option flag - in which case whether a character appears within a range is
|
||||
determined by comparing the code points of the characters only</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the rangle "ae"-c, assuming that "ae" is treated
|
||||
as a single collating element in the current locale.</P>
|
||||
<P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of the form [[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with collating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>With the exception of the escape sequences \{, \}, \(, and \), which are
|
||||
documented above, an escape followed by any character matches that
|
||||
character. This can be used to make the special characters .[\*^$,
|
||||
"ordinary". Note that the escape character loses its special meaning
|
||||
inside a character set, so [\^] will match either a literal '\' or a '^'.</P>
|
||||
<H3><A name="variations"></A>Variations</H3>
|
||||
<H4><A name="grep"></A>Grep</H4>
|
||||
<P>When an expression is compiled with the flag <EM>grep</EM> set, then the
|
||||
expression is treated as a newline separated list of <A href="#Basic">POSIX-Basic</A>
|
||||
expressions, a match is found if any of the expressions in the list match, for
|
||||
example:</P>
|
||||
<PRE>boost::regex e("abc\ndef", boost::regex::grep);</PRE>
|
||||
<P>will match either of the POSIX-Basic expressions "abc" or "def".</P>
|
||||
<P>As its name suggests, this behavior is consistent with the Unix utility <EM>grep</EM>.</P>
|
||||
<H4><A name="emacs"></A>emacs</H4>
|
||||
<P>In addition to the <A href="#Basic">POSIX-Basic features</A> the following
|
||||
characters are also special:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>+ repeats the preceding atom one or more times.</P>
|
||||
<P>? repeats the preceding atom zero or one times.</P>
|
||||
<P>*? A non-greedy version of *.</P>
|
||||
<P>+? A non-greedy version of +.</P>
|
||||
<P>?? A non-greedy version of ?.</P>
|
||||
</BLOCKQUOTE>
|
||||
<P>And the following escape sequences are also recognised:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>\| specifies an alternative.</P>
|
||||
<P>\(?: ... \) is a non-marking grouping construct - allows you to
|
||||
lexically group something without spitting out an extra sub-expression.</P>
|
||||
<P>\w matches any word character.</P>
|
||||
<P>\W matches any non-word character.</P>
|
||||
<P>\sx matches any character in the syntax group <EM>x</EM>, the following emacs
|
||||
groupings are supported: 's', ' ', '_', 'w', '.', ')', '(', '"', '\'', '>'
|
||||
and '<'. Refer to the emacs docs for details.</P>
|
||||
<P>\Sx matches any character not in the syntax grouping <EM>x</EM>.</P>
|
||||
<P>\c and \C are not supported.</P>
|
||||
<P>\` matches zero characters only at the start of a buffer (or string being
|
||||
matched).</P>
|
||||
<P>\' matches zero characters only at the end of a buffer (or string being
|
||||
matched).</P>
|
||||
<P>\b matches zero characters at a word boundary.</P>
|
||||
<P>\B matches zero characters, not at a word boundary.</P>
|
||||
<P>\< matches zero characters only at the start of a word.</P>
|
||||
<P>\> matches zero characters only at the end of a word.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#basic">variety of flags</A> that
|
||||
may be combined with the <EM>basic</EM> and <EM>grep</EM> options when
|
||||
constructing the regular expression, in particular note that the <A href="syntax_option_type.html#basic">
|
||||
newline_alt, no_char_classes, no-intervals, bk_plus_qm and bk_plus_vbar</A> options
|
||||
all alter the syntax, while the <A href="syntax_option_type.html#basic">collate
|
||||
and icase</A> options modify how the case and locale sensitivity are to be
|
||||
applied.</P>
|
||||
<H3>References</H3>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap09.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions
|
||||
and Headers, Section 9, Regular Expressions (FWD.1).</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/grep.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, grep (FWD.1).</A></P>
|
||||
<P><A href="http://www.gnu.org/software/emacs/">Emacs Version 21.3</A>.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
||||
|
471
doc/syntax_extended.html
Normal file
471
doc/syntax_extended.html
Normal file
@ -0,0 +1,471 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: POSIX-Extended Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">POSIX-Extended Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#extended">POSIX Extended Syntax</A>
|
||||
<dt><A href="#variations">Variations</A>
|
||||
<dd>
|
||||
<dl>
|
||||
<dt><A href="#egrep">egrep</A> <dt><A href="#awk">awk</A> </dt>
|
||||
</dl>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The POSIX-Extended regular expression syntax is supported by the POSIX C
|
||||
regular expression API's, and variations are used by the utilities <EM>egrep</EM>
|
||||
and <EM>awk</EM>. You can construct POSIX extended regular expressions in
|
||||
Boost.Regex by passing the flag <EM>extended</EM> to the regex constructor, for
|
||||
example:</P>
|
||||
<PRE>// e1 is a case sensitive POSIX-Extended expression:
|
||||
boost::regex e1(my_expression, boost::regex::extended);
|
||||
// e2 a case insensitive POSIX-Extended expression:
|
||||
boost::regex e2(my_expression, boost::regex::extended|boost::regex::icase);</PRE>
|
||||
<H3>POSIX Extended Syntax<A name="extended"></A></H3>
|
||||
<P>In POSIX-Extended regular expressions, all characters match themselves except
|
||||
for the following special characters:</P>
|
||||
<PRE>.[{()\*+?|^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line when used as the first
|
||||
character of an expression, or the first character of a sub-expression.</P>
|
||||
<P>A '$' character shall match the end of a line when used as the last character
|
||||
of an expression, or the last character of a sub-expression.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning ( and ending ) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or referred
|
||||
to by a back-reference.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the *, +, ?, and {} operators.</P>
|
||||
<P>The * operator will match the preceding atom zero or more times, for example
|
||||
the expression a*b will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>The + operator will match the preceding atom one or more times, for example
|
||||
the expression a+b will match any of the following:</P>
|
||||
<PRE>ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>b</PRE>
|
||||
<P>The ? operator will match the preceding atom zero or one times, for
|
||||
example the expression ca?b will match any of the following:</P>
|
||||
<PRE>cb
|
||||
cab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>caab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a{n} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a{n,} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a{n, m} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a{2,3}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a(*)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^(a*).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<P><EM><STRONG>Caution</STRONG>: the POSIX standard does not support back-references
|
||||
for "extended" regular expressions, this is a compatible extension to that
|
||||
standard.</EM></P>
|
||||
<H4>Alternation</H4>
|
||||
<P>The | operator will match either of its arguments, so for example: abc|def will
|
||||
match either "abc" or "def".
|
||||
</P>
|
||||
<P>Parenthesis can be used to group alternations, for example: ab(d|ef) will match
|
||||
either of "abd" or "abef".</P>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Extended regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; this results in locale specific behavior. This behavior can
|
||||
be turned off by unsetting the <EM><A href="syntax_option_type.html#extended">collate</A></EM>
|
||||
option flag - in which case whether a character appears within a range is
|
||||
determined by comparing the code points of the characters only</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the range "ae"-c, assuming that "ae" is treated
|
||||
as a single collating element in the current locale.</P>
|
||||
<P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of the form [[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Operator precedence</H4>
|
||||
<P> The order of precedence for of operators is as shown in the following
|
||||
table:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Collation-related bracket symbols</TD>
|
||||
<TD>[==] [::] [..]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Escaped characters
|
||||
</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Character set (bracket expression)
|
||||
</TD>
|
||||
<TD>[]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Grouping</TD>
|
||||
<TD>()</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Single-character-ERE duplication
|
||||
</TD>
|
||||
<TD>* + ? {m,n}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Concatenation</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Anchoring</TD>
|
||||
<TD>^$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Alternation</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>The POSIX standard defines no escape sequences for POSIX-Extended regular
|
||||
expressions, except that:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Any special character preceded by an escape shall match itself.
|
||||
<LI>
|
||||
The effect of any ordinary character being preceded by an escape is undefined.
|
||||
<LI>
|
||||
An escape inside a character class declaration shall match itself (in other
|
||||
words the escape character is not "special" inside a character class
|
||||
declaration).</LI></UL>
|
||||
<P>However, that's rather restrictive, so the following standard-compatible
|
||||
extensions are also supported by Boost.Regex:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Escapes matching a specific character</H5>
|
||||
<P>The following escape sequences are all synonyms for single characters:</P>
|
||||
<P>
|
||||
<TABLE id="Table7" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>'\a'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>0x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>\b (but only inside a character class declaration).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>An ASCII escape sequence - the character whose code point is X % 32</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xdd</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{dddd}</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\0ddd</TD>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
<P>Any escaped character <EM>x</EM>, if <EM>x</EM> is the name of a character
|
||||
class shall match any character that is a member of that class, and any escaped
|
||||
character <EM>X</EM>, if <EM>x</EM> is the name of a character class, shall
|
||||
match any character not in that class.</P>
|
||||
<P>The following are supported by default:</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="300" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape sequence</STRONG></TD>
|
||||
<TD><STRONG>Equivalent to</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\d</TD>
|
||||
<TD>[[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\l</TD>
|
||||
<TD>[[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\s</TD>
|
||||
<TD>[[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\u</TD>
|
||||
<TD>[[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\w</TD>
|
||||
<TD>[[:word:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>[^[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\L</TD>
|
||||
<TD>[^[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\S</TD>
|
||||
<TD>[^[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\U</TD>
|
||||
<TD>[^[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\W</TD>
|
||||
<TD>[^[:word:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\<</TD>
|
||||
<TD>Matches the start of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\></TD>
|
||||
<TD>Matches the end of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>Matches a word boundary (the start or end of a word).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\B</TD>
|
||||
<TD>Matches only when not at a word boundary.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Buffer boundaries</H5>
|
||||
<P>The following match only at buffer boundaries: a "buffer" in this context is
|
||||
the whole of the input text that is being matched against (note that ^ and
|
||||
$ may match embedded newlines within the text).</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\`</TD>
|
||||
<TD>Matches at the start of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\'</TD>
|
||||
<TD>Matches at the end of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\A</TD>
|
||||
<TD>Matches at the start of a buffer only (the same as \`).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\z</TD>
|
||||
<TD>Matches at the end of a buffer only (the same as \').</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\Z</TD>
|
||||
<TD>Matches an optional sequence of newlines at the end of a buffer: equivalent to
|
||||
the regular expression \n*\z</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Continuation Escape</H5>
|
||||
<P>The sequence \G matches only at the end of the last match found, or at the
|
||||
start of the text being matched if no previous match was found. This
|
||||
escape useful if you're iterating over the matches contained within a text, and
|
||||
you want each subsequence match to start where the last one ended.</P>
|
||||
<H5>Quoting escape</H5>
|
||||
<P>The escape sequence \Q begins a "quoted sequence": all the subsequent
|
||||
characters are treated as literals, until either the end of the regular
|
||||
expression or \E is found. For example the expression: \Q\*+\Ea+ would
|
||||
match either of:</P>
|
||||
<PRE>\*+a<BR>\*+aaa</PRE>
|
||||
<H5>Unicode escapes</H5>
|
||||
<P>
|
||||
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\C</TD>
|
||||
<TD>Matches a single code point: in Boost regex this has exactly the same effect
|
||||
as a "." operator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\X</TD>
|
||||
<TD>Matches a combining character sequence: that is any non-combining character
|
||||
followed by a sequence of zero or more combining characters.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Any other escape</H5>
|
||||
<P>Any other escape sequence matches the character that is escaped, for example \@
|
||||
matches a literal <A href="mailto:'@'">'@'</A>.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H3><A name="variations"></A>Variations</H3>
|
||||
<H4>Egrep<A name="egrep"></H4>
|
||||
<P>When an expression is compiled with the flag <EM>egrep</EM> set, then the
|
||||
expression is treated as a newline separated list of POSIX-Extended
|
||||
expressions, a match is found if any of the expressions in the list match, for
|
||||
example:</P>
|
||||
<PRE>boost::regex e("abc\ndef", boost::regex::egrep);</PRE>
|
||||
<P>will match either of the POSIX-Basic expressions "abc" or "def".</P>
|
||||
<P>As its name suggests, this behavior is consistent with the Unix utility <EM>egrep</EM>,
|
||||
and with <EM>grep</EM> when used with the -E option.</P>
|
||||
<H4>awk<A name="awk"></A></H4>
|
||||
<P>In addition to the <A href="#extended">POSIX-Extended features</A> the
|
||||
escape character is special inside a character class declaration. </P>
|
||||
<P>In addition, some escape sequences that are not defined as part of
|
||||
POSIX-Extended specification are required to be supported - however Boost.Regex
|
||||
supports these by default anyway.</P>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#extended">variety of flags</A> that
|
||||
may be combined with the <EM>extended</EM> and <EM>egrep</EM> options when
|
||||
constructing the regular expression, in particular note that the <A href="syntax_option_type.html#extended">
|
||||
newline_alt</A> option alters the syntax, while the <A href="syntax_option_type.html#extended">
|
||||
collate, nosubs and icase</A> options modify how the case and locale
|
||||
sensitivity are to be applied.</P>
|
||||
<H3><A name="refs">References</H3>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap09.html"> IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions
|
||||
and Headers, Section 9, Regular Expressions.</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/grep.html"> IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, egrep.</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/awk.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, awk.</A></P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -175,7 +175,7 @@ static const syntax_option_type collate;
|
||||
<TD>No</TD>
|
||||
<TD>Normally Boost.Regex behaves as if the Perl m-modifier is on: so the
|
||||
assertions ^ and $ match after and before embedded newlines respectively,
|
||||
setting this flags is eqivalent to prefixing the expression with (?-m).</TD>
|
||||
setting this flags is equivalent to prefixing the expression with (?-m).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_mod_s</TD>
|
||||
@ -251,7 +251,7 @@ static const syntax_option_type collate;
|
||||
character classes permitted.</P>
|
||||
<P>In addition some perl-style escape sequences are supported (actually the awk
|
||||
syntax requires \a \b \t \v \f \n and \r to be recognised, but other
|
||||
escape sequences invoke undefined behaviour according to the POSIX standard).</P>
|
||||
escape sequences invoke undefined behavior according to the POSIX standard).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
@ -324,9 +324,9 @@ static const syntax_option_type collate;
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by POSIX basic regular expressions in IEEE Std 1003.1-2001,
|
||||
Portable Operating System Interface (POSIX ), Base Definitions and Headers,
|
||||
Section 9, Regular Expressions (FWD.1).
|
||||
same as that used by <A href="syntax_basic.html#Basic">POSIX basic regular
|
||||
expressions</A> in IEEE Std 1003.1-2001, Portable Operating System Interface
|
||||
(POSIX ), Base Definitions and Headers, Section 9, Regular Expressions (FWD.1).
|
||||
</P>
|
||||
</TD>
|
||||
</TR>
|
||||
@ -340,13 +340,20 @@ static const syntax_option_type collate;
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by POSIX utility grep in IEEE Std 1003.1-2001, Portable
|
||||
Operating System Interface (POSIX ), Shells and Utilities, Section 4,
|
||||
Utilities, grep (FWD.1).</P>
|
||||
same as that used by <A href="syntax_basic.html#grep">POSIX utility grep</A> in
|
||||
IEEE Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, grep (FWD.1).</P>
|
||||
<P>That is to say, the same as POSIX basic syntax, but with the newline character
|
||||
acting as an alternation character in addition to "|".</P>
|
||||
acting as an alternation character; the expression is treated as a newline
|
||||
separated list of alternatives.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>emacs</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Specifies that the grammar recognised is the superset of the POSIX-Basic
|
||||
syntax used by the <A href="syntax_basic.html#emacs">emacs</A> program.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>The following options may also be set when using POSIX basic regular
|
||||
@ -390,7 +397,10 @@ static const syntax_option_type collate;
|
||||
<TD>collate</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that character ranges of the form "[a-b]" should be locale sensitive.</P>
|
||||
<P>Specifies that character ranges of the form "[a-b]" should be locale
|
||||
sensitive. <STRONG>This bit is</STRONG> <STRONG>on by default</STRONG> for
|
||||
POSIX-Basic regular expressions, but can be unset to force ranges to be
|
||||
compared by code point only.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
@ -398,7 +408,7 @@ static const syntax_option_type collate;
|
||||
<TD>No</TD>
|
||||
<TD>Specifies that the \n character has the same effect as the alternation
|
||||
operator |. Allows newline separated lists to be used as a list of
|
||||
alternatives.</TD>
|
||||
alternatives. This bit is already set, if you use the <EM>grep</EM> option.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_char_classes</TD>
|
||||
@ -482,3 +492,4 @@ static const syntax_option_type collate;
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
502
doc/syntax_perl.html
Normal file
502
doc/syntax_perl.html
Normal file
@ -0,0 +1,502 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Perl Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../c++boost.gif" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">
|
||||
Perl Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#Perl">Perl Syntax</A> <dt><A href="#variations">
|
||||
Variations</A>
|
||||
<dd>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#mods">Modifiers</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The Perl regular expression syntax is based on that used by the programming
|
||||
language <EM>Perl</EM> . Perl regular expressions are the default
|
||||
behavior in Boost.Regex or you can pass the flag <EM>perl</EM> to the
|
||||
regex constructor, for example:</P>
|
||||
<PRE>// e1 is a case sensitive Perl regular expression:
|
||||
// since Perl is the default option there's no need to explicitly specify the syntax used here:
|
||||
boost::regex e1(my_expression);
|
||||
// e2 a case insensitive Perl regular expression:
|
||||
boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);</PRE>
|
||||
<H3>Perl Regular Expression Syntax<A name="Perl"></A></H3>
|
||||
<P>In Perl regular expressions, all characters match themselves except for
|
||||
the following special characters:</P>
|
||||
<PRE>.[{()\*+?|^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line.</P>
|
||||
<P>A '$' character shall match the end of a line.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning ( and ending ) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or referred
|
||||
to by a back-reference.</P>
|
||||
<H4>Non-marking grouping:</H4>
|
||||
<P>A marked sub-expression is useful to lexically group part of a regular
|
||||
expression, but has the side-effect of spitting out an extra field in the
|
||||
result. As an alternative you can lexically group part of a regular
|
||||
expression, without generating a marked sub-expression by using (?: and ) , for
|
||||
example (?:ab)+ will repeat "ab" without splitting out any separate
|
||||
sub-expressions.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the *, +, ?, and {} operators.</P>
|
||||
<P>The * operator will match the preceding atom zero or more times, for example
|
||||
the expression a*b will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>The + operator will match the preceding atom one or more times, for example
|
||||
the expression a+b will match any of the following:</P>
|
||||
<PRE>ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>b</PRE>
|
||||
<P>The ? operator will match the preceding atom zero or one times, for
|
||||
example the expression ca?b will match any of the following:</P>
|
||||
<PRE>cb
|
||||
cab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>caab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a{n} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a{n,} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a{n, m} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a{2,3}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a(*)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Non greedy repeats</H4>
|
||||
<P>The normal repeat operators are "greedy", that is to say they will consume as
|
||||
much input as possible. There are non-greedy versions available that will
|
||||
consume as little input as possible while still producing a match.</P>
|
||||
<P>*? Matches the previous atom zero or more times, while consuming as little
|
||||
input as possible.</P>
|
||||
<P>+? Matches the previous atom one or more times, while consuming as little input
|
||||
as possible.</P>
|
||||
<P>?? Matches the previous atom zero or one times, while consuming as little input
|
||||
as possible.</P>
|
||||
<P>{n,}? Matches the previous atom <EM>n</EM> or more times, while consuming
|
||||
as little input as possible.</P>
|
||||
<P>{n,m}? Matches the previous atom between <EM>n</EM> and <EM>m</EM> times,
|
||||
while consuming as little input as possible.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^(a*).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<H4>Alternation</H4>
|
||||
<P>The | operator will match either of its arguments, so for example: abc|def will
|
||||
match either "abc" or "def".
|
||||
</P>
|
||||
<P>Parenthesis can be used to group alternations, for example: ab(d|ef) will match
|
||||
either of "abd" or "abef".</P>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Perl regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; this results in locale specific behavior. This behavior can
|
||||
be turned off by unsetting the <EM><A href="syntax_option_type.html#Perl">collate</A></EM>
|
||||
option flag - in which case whether a character appears within a range is
|
||||
determined by comparing the code points of the characters only</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the range "ae"-c, assuming that "ae" is treated
|
||||
as a single collating element in the current locale.</P>
|
||||
<P>As an extension, a collating element may also be specified via it's <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of theform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
<H5>Escapes:</H5>
|
||||
<P>All the escape sequences that match a single character, or a single character
|
||||
class are permitted within a character class definition, <EM>except</EM> the
|
||||
negated character classes (\D \W etc).</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Operator precedence</H4>
|
||||
<P> The order of precedence for of operators is as shown in the following
|
||||
table:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Collation-related bracket symbols</TD>
|
||||
<TD>[==] [::] [..]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Escaped characters
|
||||
</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Character set (bracket expression)
|
||||
</TD>
|
||||
<TD>[]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Grouping</TD>
|
||||
<TD>()</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Single-character-ERE duplication
|
||||
</TD>
|
||||
<TD>* + ? {m,n}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Concatenation</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Anchoring</TD>
|
||||
<TD>^$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Alternation</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>Any special character preceded by an escape shall match itself.
|
||||
</P>
|
||||
<P>The following escape sequences are also supported:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Escapes matching a specific character</H5>
|
||||
<P>The following escape sequences are all synonyms for single characters:</P>
|
||||
<P>
|
||||
<TABLE id="Table7" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>'\a'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>0x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>\b (but only inside a character class declaration).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>An ASCII escape sequence - the character whose code point is X % 32</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xdd</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{dddd}</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\0ddd</TD>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
<P>Any escaped character <EM>x</EM>, if <EM>x</EM> is the name of a character
|
||||
class shall match any character that is a member of that class, and any escaped
|
||||
character <EM>X</EM>, if <EM>x</EM> is the name of a character class, shall
|
||||
match any character not in that class.</P>
|
||||
<P>The following are supported by default:</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="300" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape sequence</STRONG></TD>
|
||||
<TD><STRONG>Equivalent to</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\d</TD>
|
||||
<TD>[[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\l</TD>
|
||||
<TD>[[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\s</TD>
|
||||
<TD>[[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\u</TD>
|
||||
<TD>[[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\w</TD>
|
||||
<TD>[[:word:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>[^[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\L</TD>
|
||||
<TD>[^[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\S</TD>
|
||||
<TD>[^[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\U</TD>
|
||||
<TD>[^[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\W</TD>
|
||||
<TD>[^[:word:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\<</TD>
|
||||
<TD>Matches the start of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\></TD>
|
||||
<TD>Matches the end of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>Matches a word boundary (the start or end of a word).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\B</TD>
|
||||
<TD>Matches only when not at a word boundary.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Buffer boundaries</H5>
|
||||
<P>The following match only at buffer boundaries: a "buffer" in this context is
|
||||
the whole of the input text that is being matched against (note that ^ and
|
||||
$ may match embedded newlines within the text).</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\`</TD>
|
||||
<TD>Matches at the start of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\'</TD>
|
||||
<TD>Matches at the end of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\A</TD>
|
||||
<TD>Matches at the start of a buffer only (the same as \`).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\z</TD>
|
||||
<TD>Matches at the end of a buffer only (the same as \').</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\Z</TD>
|
||||
<TD>Matches an optional sequence of newlines at the end of a buffer: equivalent to
|
||||
the regular expression \n*\z</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Continuation Escape</H5>
|
||||
<P>The sequence \G matches only at the end of the last match found, or at the
|
||||
start of the text being matched if no previous match was found. This
|
||||
escape useful if you're iterating over the matches contained within a text, and
|
||||
you want each subsequence match to start where the last one ended.</P>
|
||||
<H5>Quoting escape</H5>
|
||||
<P>The escape sequence \Q begins a "quoted sequence": all the subsequent
|
||||
characters are treated as literals, until either the end of the regular
|
||||
expression or \E is found. For example the expression: \Q\*+\Ea+ would
|
||||
match either of:</P>
|
||||
<PRE>\*+a<BR>\*+aaa</PRE>
|
||||
<H5>Unicode escapes</H5>
|
||||
<P>
|
||||
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\C</TD>
|
||||
<TD>Matches a single code point: in Boost regex this has exactly the same effect
|
||||
as a "." operator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\X</TD>
|
||||
<TD>Matches a combining character sequence: that is any non-combining character
|
||||
followed by a sequence of zero or more combining characters.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Any other escape</H5>
|
||||
<P>Any other escape sequence matches the character that is escaped, for example \@
|
||||
matches a literal <A href="mailto:'@'">'@'</A>.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H4 dir="ltr">Perl Extended Patterns</H4>
|
||||
<P dir="ltr">Perl-specific extensions to the regular expression syntax all start
|
||||
with (?.</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5 dir="ltr">Comments</H5>
|
||||
<P dir="ltr">(?# ... ) is treated as a comment, it's contents are ignored.</P>
|
||||
<H5 dir="ltr">Modifiers</H5>
|
||||
<P dir="ltr">(?imsx-imsx ... ) alters which of the perl modifiers are in effect
|
||||
within the pattern, changes take effect from the point that the block is first
|
||||
seen and extend to any enclosing ). Letters before a '-' turn that perl
|
||||
modifier on, letters afterward, turn it off.</P>
|
||||
<P dir="ltr">(?imsx-imsx:pattern) applies the specified modifiers to <EM>pattern</EM>
|
||||
only.</P>
|
||||
<H5 dir="ltr">Non-marking grouping</H5>
|
||||
<P dir="ltr">(?:pattern) lexically groups <EM>pattern</EM>, without generating an
|
||||
additional sub-expression.</P>
|
||||
<H5 dir="ltr">Lookahead</H5>
|
||||
<P dir="ltr">(?=pattern) consumes zero characters, only if <EM>pattern</EM> matches.</P>
|
||||
<P dir="ltr">(?!pattern) consumes zero characters, only if <EM>pattern</EM> does
|
||||
not match.</P>
|
||||
<H5 dir="ltr">Lookbehind</H5>
|
||||
<P dir="ltr">(?<=pattern) consumes zero characters, only if <EM>pattern</EM> could
|
||||
be matched against the characters preceding the current position (<EM>pattern</EM>
|
||||
must be of fixed length).</P>
|
||||
<P dir="ltr">(?<!pattern) consumes zero characters, only if <EM>pattern</EM> could
|
||||
not be matched against the characters preceding the current position (<EM>pattern</EM>
|
||||
must be of fixed length).</P>
|
||||
<H5 dir="ltr">Independent sub-expressions</H5>
|
||||
<P dir="ltr">(?>pattern) <EM>pattern</EM> is matched independently of the
|
||||
surrounding patterns, the expression will never backtrack into <EM>pattern</EM>.</P>
|
||||
<H5 dir="ltr">Conditional Expressions</H5>
|
||||
<P dir="ltr">(?(condition)yes-pattern|no-pattern) attempts to match <EM>yes-pattern</EM>
|
||||
if the <EM>condition </EM>is true, otherwise attempts to match <EM>no-pattern</EM>.</P>
|
||||
<P dir="ltr">(?(condition)yes-pattern) attempts to match <EM>yes-pattern</EM> if
|
||||
the <EM>condition </EM>is true, otherwise fails.</P>
|
||||
<P dir="ltr"><EM>Condition</EM> may be either a forward lookahead assert, or the
|
||||
index of a marked sub-expression (the condition becomes true if the
|
||||
sub-expression has been matched).</P>
|
||||
</BLOCKQUOTE>
|
||||
<H3><A name="variations"></A>Variations</H3>
|
||||
<P>The options <A href="syntax_option_type.html#perl"><EM>normal, ECMAScript, JavaScript</EM>
|
||||
and <EM>JScript</EM></A> are all synonyms for <EM>Perl</EM>.</P>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#Perl">variety of flags</A> that
|
||||
may be combined with the <EM>Perl</EM> option when constructing the regular
|
||||
expression, in particular note that the <A href="syntax_option_type.html#Perl">newline_alt</A>
|
||||
option alters the syntax, while the <A href="syntax_option_type.html#Perl">collate,
|
||||
nosubs and icase</A> options modify how the case and locale sensitivity
|
||||
are to be applied.</P>
|
||||
<H3><A name="mods"></A>Modifiers</H3>
|
||||
<P>The perl <EM>smix</EM> modifiers can either be applied using a (?smix-smix)
|
||||
prefix to the regular expression, or with one of the regex-compile time flags <EM><A href="syntax_option_type.html#Perl">
|
||||
no_mod_m, mod_x, mod_s, and no_mod_s</A></EM>.
|
||||
</P>
|
||||
<H3><A name="refs">References</H3>
|
||||
<P><A href="http://www.perldoc.com/perl5.6/pod/perlre.html"> Perl 5.6.</A></P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -51,6 +51,7 @@ public:
|
||||
bool parse_inner_set(basic_char_set<charT, traits>& char_set);
|
||||
bool parse_QE();
|
||||
bool parse_perl_extension();
|
||||
bool add_emacs_code(bool negate);
|
||||
digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
|
||||
charT unescape_character();
|
||||
regex_constants::syntax_option_type parse_options();
|
||||
@ -183,6 +184,22 @@ bool basic_regex_parser<charT, traits>::parse_basic()
|
||||
++m_position;
|
||||
return parse_repeat();
|
||||
}
|
||||
case regex_constants::syntax_plus:
|
||||
if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
|
||||
return parse_literal();
|
||||
else
|
||||
{
|
||||
++m_position;
|
||||
return parse_repeat(1);
|
||||
}
|
||||
case regex_constants::syntax_question:
|
||||
if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
|
||||
return parse_literal();
|
||||
else
|
||||
{
|
||||
++m_position;
|
||||
return parse_repeat(0, 1);
|
||||
}
|
||||
case regex_constants::syntax_open_set:
|
||||
return parse_set();
|
||||
default:
|
||||
@ -301,7 +318,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
//
|
||||
// begin by checking for a perl-style (?...) extension:
|
||||
//
|
||||
if((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
|
||||
if(
|
||||
((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
|
||||
|| ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
|
||||
)
|
||||
{
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
|
||||
return parse_perl_extension();
|
||||
@ -377,7 +397,7 @@ template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_basic_escape()
|
||||
{
|
||||
++m_position;
|
||||
bool result;
|
||||
bool result = true;
|
||||
switch(this->m_traits.escape_syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::syntax_open_mark:
|
||||
@ -418,7 +438,97 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
|
||||
break;
|
||||
case regex_constants::syntax_digit:
|
||||
return parse_backref();
|
||||
case regex_constants::escape_type_start_buffer:
|
||||
if(this->flags() & regbase::emacs_ex)
|
||||
{
|
||||
++m_position;
|
||||
this->append_state(syntax_element_buffer_start);
|
||||
}
|
||||
else
|
||||
result = parse_literal();
|
||||
break;
|
||||
case regex_constants::escape_type_end_buffer:
|
||||
if(this->flags() & regbase::emacs_ex)
|
||||
{
|
||||
++m_position;
|
||||
this->append_state(syntax_element_buffer_end);
|
||||
}
|
||||
else
|
||||
result = parse_literal();
|
||||
break;
|
||||
case regex_constants::escape_type_word_assert:
|
||||
if(this->flags() & regbase::emacs_ex)
|
||||
{
|
||||
++m_position;
|
||||
this->append_state(syntax_element_word_boundary);
|
||||
}
|
||||
else
|
||||
result = parse_literal();
|
||||
break;
|
||||
case regex_constants::escape_type_not_word_assert:
|
||||
if(this->flags() & regbase::emacs_ex)
|
||||
{
|
||||
++m_position;
|
||||
this->append_state(syntax_element_within_word);
|
||||
}
|
||||
else
|
||||
result = parse_literal();
|
||||
break;
|
||||
case regex_constants::escape_type_left_word:
|
||||
if(this->flags() & regbase::emacs_ex)
|
||||
{
|
||||
++m_position;
|
||||
this->append_state(syntax_element_word_start);
|
||||
}
|
||||
else
|
||||
result = parse_literal();
|
||||
break;
|
||||
case regex_constants::escape_type_right_word:
|
||||
if(this->flags() & regbase::emacs_ex)
|
||||
{
|
||||
++m_position;
|
||||
this->append_state(syntax_element_word_end);
|
||||
}
|
||||
else
|
||||
result = parse_literal();
|
||||
break;
|
||||
default:
|
||||
if(this->flags() & regbase::emacs_ex)
|
||||
{
|
||||
bool negate = true;
|
||||
switch(*m_position)
|
||||
{
|
||||
case 'w':
|
||||
negate = false;
|
||||
// fall through:
|
||||
case 'W':
|
||||
{
|
||||
basic_char_set<charT, traits> char_set;
|
||||
if(negate)
|
||||
char_set.negate();
|
||||
char_set.add_class(this->m_word_mask);
|
||||
if(0 == this->append_set(char_set))
|
||||
{
|
||||
fail(regex_constants::error_ctype, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
++m_position;
|
||||
return true;
|
||||
}
|
||||
case 's':
|
||||
negate = false;
|
||||
// fall through:
|
||||
case 'S':
|
||||
return add_emacs_code(negate);
|
||||
case 'c':
|
||||
case 'C':
|
||||
// not supported yet:
|
||||
fail(regex_constants::error_escape, m_position - m_base);
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
result = parse_literal();
|
||||
break;
|
||||
}
|
||||
@ -447,7 +557,7 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape()
|
||||
char_set.add_class(m);
|
||||
if(0 == this->append_set(char_set))
|
||||
{
|
||||
fail(regex_constants::error_range, m_position - m_base);
|
||||
fail(regex_constants::error_ctype, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
++m_position;
|
||||
@ -533,7 +643,11 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
|
||||
// when we get to here we may have a non-greedy ? mark still to come:
|
||||
//
|
||||
if((m_position != m_end)
|
||||
&& (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))))
|
||||
&& (
|
||||
(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
|
||||
|| ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
|
||||
)
|
||||
)
|
||||
{
|
||||
// OK we have a perl regex, check for a '?':
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
|
||||
@ -1622,6 +1736,85 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
|
||||
{
|
||||
//
|
||||
// parses an emacs style \sx or \Sx construct.
|
||||
//
|
||||
if(++m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_escape, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
basic_char_set<charT, traits> char_set;
|
||||
if(negate)
|
||||
char_set.negate();
|
||||
|
||||
static const charT s_punct[] = { 'p', 'u', 'n', 'c', 't', };
|
||||
|
||||
switch(*m_position)
|
||||
{
|
||||
case 's':
|
||||
case ' ':
|
||||
char_set.add_class(this->m_mask_space);
|
||||
break;
|
||||
case 'w':
|
||||
char_set.add_class(this->m_word_mask);
|
||||
break;
|
||||
case '_':
|
||||
char_set.add_single(digraph<charT>(charT('$')));
|
||||
char_set.add_single(digraph<charT>(charT('&')));
|
||||
char_set.add_single(digraph<charT>(charT('*')));
|
||||
char_set.add_single(digraph<charT>(charT('+')));
|
||||
char_set.add_single(digraph<charT>(charT('-')));
|
||||
char_set.add_single(digraph<charT>(charT('_')));
|
||||
char_set.add_single(digraph<charT>(charT('<')));
|
||||
char_set.add_single(digraph<charT>(charT('>')));
|
||||
break;
|
||||
case '.':
|
||||
char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
|
||||
break;
|
||||
case '(':
|
||||
char_set.add_single(digraph<charT>(charT('(')));
|
||||
char_set.add_single(digraph<charT>(charT('[')));
|
||||
char_set.add_single(digraph<charT>(charT('{')));
|
||||
break;
|
||||
case ')':
|
||||
char_set.add_single(digraph<charT>(charT(')')));
|
||||
char_set.add_single(digraph<charT>(charT(']')));
|
||||
char_set.add_single(digraph<charT>(charT('}')));
|
||||
break;
|
||||
case '"':
|
||||
char_set.add_single(digraph<charT>(charT('"')));
|
||||
char_set.add_single(digraph<charT>(charT('\'')));
|
||||
char_set.add_single(digraph<charT>(charT('`')));
|
||||
break;
|
||||
case '\'':
|
||||
char_set.add_single(digraph<charT>(charT('\'')));
|
||||
char_set.add_single(digraph<charT>(charT(',')));
|
||||
char_set.add_single(digraph<charT>(charT('#')));
|
||||
break;
|
||||
case '<':
|
||||
char_set.add_single(digraph<charT>(charT(';')));
|
||||
break;
|
||||
case '>':
|
||||
char_set.add_single(digraph<charT>(charT('\n')));
|
||||
char_set.add_single(digraph<charT>(charT('\f')));
|
||||
break;
|
||||
default:
|
||||
fail(regex_constants::error_ctype, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
if(0 == this->append_set(char_set))
|
||||
{
|
||||
fail(regex_constants::error_ctype, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
++m_position;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
|
||||
{
|
||||
|
@ -30,11 +30,14 @@
|
||||
#include <boost/regex/v4/regex_traits_defaults.hpp>
|
||||
#endif
|
||||
#ifdef BOOST_HAS_THREADS
|
||||
#include <boost/regex/static_mutex.hpp>
|
||||
#include <boost/regex/pending/static_mutex.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_PRIMARY_TRANSFORM
|
||||
#include <boost/regex/v4/primary_transform.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_OBJECT_CACHE_HPP
|
||||
#include <boost/regex/pending/object_cache.hpp>
|
||||
#endif
|
||||
|
||||
#ifdef BOOST_HAS_ABI_HEADERS
|
||||
# include BOOST_ABI_PREFIX
|
||||
@ -165,6 +168,25 @@ struct cpp_regex_traits_base
|
||||
std::messages<charT> const* m_pmessages;
|
||||
#endif
|
||||
std::collate<charT> const* m_pcollate;
|
||||
|
||||
bool operator<(const cpp_regex_traits_base& b)const
|
||||
{
|
||||
if(m_pctype == b.m_pctype)
|
||||
{
|
||||
if(m_pmessages == b.m_pmessages)
|
||||
{
|
||||
return m_pcollate < b.m_pcollate;
|
||||
}
|
||||
return m_pmessages < b.m_pmessages;
|
||||
}
|
||||
return m_pctype < b.m_pctype;
|
||||
}
|
||||
bool operator==(const cpp_regex_traits_base& b)const
|
||||
{
|
||||
return (m_pctype == b.m_pctype)
|
||||
&& (m_pmessages == b.m_pmessages)
|
||||
&& (m_pcollate == b.m_pcollate);
|
||||
}
|
||||
};
|
||||
|
||||
template <class charT>
|
||||
@ -191,7 +213,17 @@ class cpp_regex_traits_char_layer : public cpp_regex_traits_base<charT>
|
||||
typedef std::map<charT, regex_constants::syntax_type> map_type;
|
||||
typedef typename map_type::const_iterator map_iterator_type;
|
||||
public:
|
||||
cpp_regex_traits_char_layer(const std::locale& l);
|
||||
cpp_regex_traits_char_layer(const std::locale& l)
|
||||
: cpp_regex_traits_base<charT>(l)
|
||||
{
|
||||
init();
|
||||
}
|
||||
cpp_regex_traits_char_layer(const cpp_regex_traits_base<charT>& b)
|
||||
: cpp_regex_traits_base<charT>(b)
|
||||
{
|
||||
init();
|
||||
}
|
||||
void init();
|
||||
|
||||
regex_constants::syntax_type syntax_type(charT c)const
|
||||
{
|
||||
@ -217,8 +249,7 @@ private:
|
||||
};
|
||||
|
||||
template <class charT>
|
||||
cpp_regex_traits_char_layer<charT>::cpp_regex_traits_char_layer(const std::locale& l)
|
||||
: cpp_regex_traits_base<charT>(l)
|
||||
void cpp_regex_traits_char_layer<charT>::init()
|
||||
{
|
||||
// we need to start by initialising our syntax map so we know which
|
||||
// character is used for which purpose:
|
||||
@ -307,6 +338,11 @@ public:
|
||||
{
|
||||
init();
|
||||
}
|
||||
cpp_regex_traits_char_layer(const cpp_regex_traits_base<char>& l)
|
||||
: cpp_regex_traits_base<char>(l)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
regex_constants::syntax_type syntax_type(char c)const
|
||||
{
|
||||
@ -393,7 +429,16 @@ public:
|
||||
typedef std::basic_string<charT> string_type;
|
||||
typedef charT char_type;
|
||||
//cpp_regex_traits_implementation();
|
||||
cpp_regex_traits_implementation(const std::locale& l);
|
||||
cpp_regex_traits_implementation(const std::locale& l)
|
||||
: cpp_regex_traits_char_layer<charT>(l), m_is(&m_sbuf)
|
||||
{
|
||||
init();
|
||||
}
|
||||
cpp_regex_traits_implementation(const cpp_regex_traits_base<charT>& l)
|
||||
: cpp_regex_traits_char_layer<charT>(l), m_is(&m_sbuf)
|
||||
{
|
||||
init();
|
||||
}
|
||||
std::string error_string(regex_constants::error_type n) const
|
||||
{
|
||||
if(!m_error_strings.empty())
|
||||
@ -429,6 +474,7 @@ private:
|
||||
// helpers:
|
||||
//
|
||||
char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
|
||||
void init();
|
||||
#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
|
||||
public:
|
||||
bool isctype(charT c, char_class_type m)const;
|
||||
@ -605,8 +651,7 @@ typename cpp_regex_traits_implementation<charT>::string_type
|
||||
}
|
||||
|
||||
template <class charT>
|
||||
cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const std::locale& l)
|
||||
: cpp_regex_traits_char_layer<charT>(l), m_is(&m_sbuf)
|
||||
void cpp_regex_traits_implementation<charT>::init()
|
||||
{
|
||||
#ifndef BOOST_NO_STD_MESSAGES
|
||||
#ifndef __IBMCPP__
|
||||
@ -798,8 +843,8 @@ bool cpp_regex_traits_implementation<charT>::isctype(const charT c, char_class_t
|
||||
template <class charT>
|
||||
boost::shared_ptr<cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT))
|
||||
{
|
||||
// TODO: create a cache for previously constructed objects.
|
||||
return boost::shared_ptr<cpp_regex_traits_implementation<charT> >(new cpp_regex_traits_implementation<charT>(l));
|
||||
cpp_regex_traits_base<charT> key(l);
|
||||
return ::boost::object_cache<cpp_regex_traits_base<charT>, cpp_regex_traits_implementation<charT> >::get(key, 5);
|
||||
}
|
||||
|
||||
} // re_detail
|
||||
|
@ -57,7 +57,7 @@ template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >;
|
||||
# include BOOST_ABI_SUFFIX
|
||||
#endif
|
||||
|
||||
#elif defined(BOOST_MSVC)
|
||||
#elif defined(BOOST_MSVC) || defined(BOOST_INTEL) || defined(__GNUC__)
|
||||
|
||||
# ifndef BOOST_REGEX_INSTANTIATE
|
||||
# define template extern template
|
||||
@ -69,6 +69,8 @@ template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >;
|
||||
# endif
|
||||
|
||||
template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T >;
|
||||
template class BOOST_REGEX_DECL match_results< const BOOST_REGEX_CHAR_T* >;
|
||||
template class BOOST_REGEX_DECL match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >;
|
||||
|
||||
# ifdef BOOST_MSVC
|
||||
# pragma warning(pop)
|
||||
|
@ -71,7 +71,7 @@ public:
|
||||
|
||||
// size:
|
||||
size_type size() const
|
||||
{ return m_subs.size() - 2; }
|
||||
{ return empty() ? 0 : m_subs.size() - 2; }
|
||||
size_type max_size() const
|
||||
{ return m_subs.max_size(); }
|
||||
bool empty() const
|
||||
@ -235,7 +235,7 @@ public:
|
||||
size_type len = m_subs.size();
|
||||
if(len > n + 2)
|
||||
{
|
||||
m_subs.erase(m_subs.begin()+n+2);
|
||||
m_subs.erase(m_subs.begin()+n+2, m_subs.end());
|
||||
std::fill(m_subs.begin(), m_subs.end(), v);
|
||||
}
|
||||
else
|
||||
|
@ -20,7 +20,7 @@
|
||||
|
||||
#include <new>
|
||||
#ifdef BOOST_HAS_THREADS
|
||||
#include <boost/regex/static_mutex.hpp>
|
||||
#include <boost/regex/pending/static_mutex.hpp>
|
||||
#endif
|
||||
|
||||
#ifdef BOOST_HAS_ABI_HEADERS
|
||||
|
@ -54,7 +54,9 @@ perl_matcher<BidiIterator, Allocator, traits>::perl_matcher(BidiIterator first,
|
||||
estimate_max_state_count(static_cast<category*>(0));
|
||||
if(!(m_match_flags & (match_perl|match_posix)))
|
||||
{
|
||||
if((re.flags() & regbase::no_perl_ex) == 0)
|
||||
if((re.flags() & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
|
||||
m_match_flags |= match_perl;
|
||||
else if((re.flags() & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
|
||||
m_match_flags |= match_perl;
|
||||
else
|
||||
m_match_flags |= match_posix;
|
||||
@ -80,15 +82,17 @@ perl_matcher<BidiIterator, Allocator, traits>::perl_matcher(BidiIterator first,
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
|
||||
{
|
||||
static const difference_type k = 100000;
|
||||
difference_type dist = boost::re_detail::distance(base, last);
|
||||
traits_size_type states = static_cast<traits_size_type>(re.size());
|
||||
states *= states;
|
||||
difference_type lim = (std::numeric_limits<difference_type>::max)() - 100000 - states;
|
||||
if(dist > (difference_type)(lim / states))
|
||||
max_state_count = lim;
|
||||
difference_type lim = ((std::numeric_limits<difference_type>::max)() - k) / states;
|
||||
if(dist >= lim)
|
||||
max_state_count = (std::numeric_limits<difference_type>::max)();
|
||||
else
|
||||
max_state_count = 100000 + states * dist;
|
||||
max_state_count = k + states * dist;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
|
||||
{
|
||||
|
@ -64,6 +64,7 @@ public:
|
||||
no_intervals = 1 << 9, // {x,y} not allowed
|
||||
bk_plus_qm = 1 << 10, // uses \+ and \?
|
||||
bk_vbar = 1 << 11, // use \| for alternatives
|
||||
emacs_ex = 1 << 12, // enables emacs extensions
|
||||
|
||||
//
|
||||
// options common to all groups:
|
||||
@ -83,7 +84,7 @@ public:
|
||||
basic = basic_syntax_group | collate | no_escape_in_lists,
|
||||
extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists,
|
||||
normal = 0,
|
||||
emacs = basic | no_char_classes | no_intervals,
|
||||
emacs = basic_syntax_group | collate | emacs_ex | bk_vbar,
|
||||
awk = no_bk_refs | collate | no_perl_ex,
|
||||
grep = basic | newline_alt,
|
||||
egrep = extended | newline_alt,
|
||||
|
@ -81,6 +81,12 @@
|
||||
#ifndef BOOST_REGEX_V4_MATCH_RESULTS_HPP
|
||||
#include <boost/regex/v4/match_results.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP
|
||||
#include <boost/regex/v4/protected_call.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_MATCHER_HPP
|
||||
#include <boost/regex/v4/perl_matcher.hpp>
|
||||
#endif
|
||||
|
||||
//
|
||||
// template instances:
|
||||
@ -136,12 +142,6 @@ typedef match_results<std::wstring::const_iterator> wsmatch;
|
||||
#endif
|
||||
|
||||
} // namespace boost
|
||||
#ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP
|
||||
#include <boost/regex/v4/protected_call.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_MATCHER_HPP
|
||||
#include <boost/regex/v4/perl_matcher.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_MATCH_HPP
|
||||
#include <boost/regex/v4/regex_match.hpp>
|
||||
#endif
|
||||
|
@ -53,7 +53,7 @@
|
||||
#include <boost/regex_fwd.hpp>
|
||||
#endif
|
||||
|
||||
#include "boost/mpl/aux_/has_xxx.hpp"
|
||||
#include "boost/mpl/has_xxx.hpp"
|
||||
#include <boost/static_assert.hpp>
|
||||
|
||||
#ifdef BOOST_HAS_ABI_HEADERS
|
||||
|
@ -23,11 +23,14 @@
|
||||
#include <boost/regex/v4/regex_traits_defaults.hpp>
|
||||
#endif
|
||||
#ifdef BOOST_HAS_THREADS
|
||||
#include <boost/regex/static_mutex.hpp>
|
||||
#include <boost/regex/pending/static_mutex.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_PRIMARY_TRANSFORM
|
||||
#include <boost/regex/v4/primary_transform.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_OBJECT_CACHE_HPP
|
||||
#include <boost/regex/pending/object_cache.hpp>
|
||||
#endif
|
||||
|
||||
#ifdef BOOST_HAS_ABI_HEADERS
|
||||
# include BOOST_ABI_PREFIX
|
||||
@ -534,7 +537,7 @@ template <class charT>
|
||||
boost::shared_ptr<w32_regex_traits_implementation<charT> > create_w32_regex_traits(::boost::re_detail::lcid_type l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT))
|
||||
{
|
||||
// TODO: create a cache for previously constructed objects.
|
||||
return boost::shared_ptr<w32_regex_traits_implementation<charT> >(new w32_regex_traits_implementation<charT>(l));
|
||||
return boost::object_cache< ::boost::re_detail::lcid_type, w32_regex_traits_implementation<charT> >::get(l, 5);
|
||||
}
|
||||
|
||||
} // re_detail
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
subproject libs/regex/performance ;
|
||||
|
||||
SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_posix time_safe_greta ;
|
||||
SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_dynamic_xpressive time_posix time_safe_greta ;
|
||||
|
||||
if $(HS_REGEX_PATH)
|
||||
{
|
||||
@ -42,3 +42,4 @@ exe regex_comparison :
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -33,6 +33,7 @@ bool time_greta = false;
|
||||
bool time_safe_greta = false;
|
||||
bool time_posix = false;
|
||||
bool time_pcre = false;
|
||||
bool time_xpressive = false;
|
||||
|
||||
bool test_matches = false;
|
||||
bool test_code = false;
|
||||
@ -79,6 +80,10 @@ int handle_argument(const std::string& what)
|
||||
#ifdef BOOST_HAS_PCRE
|
||||
else if(what == "-pcre")
|
||||
time_pcre = true;
|
||||
#endif
|
||||
#ifdef BOOST_HAS_XPRESSIVE
|
||||
else if(what == "-xpressive")
|
||||
time_xpressive = true;
|
||||
#endif
|
||||
else if(what == "-all")
|
||||
{
|
||||
|
@ -65,6 +65,14 @@ void test_match(const std::string& re, const std::string& text, const std::strin
|
||||
r.pcre_time = time;
|
||||
std::cout << "\tPCRE regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
#ifdef BOOST_HAS_XPRESSIVE
|
||||
if(time_xpressive == true)
|
||||
{
|
||||
time = dxpr::time_match(re, text, icase);
|
||||
r.xpressive_time = time;
|
||||
std::cout << "\txpressive regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
r.finalise();
|
||||
result_list.push_back(r);
|
||||
@ -118,6 +126,14 @@ void test_find_all(const std::string& re, const std::string& text, const std::st
|
||||
r.pcre_time = time;
|
||||
std::cout << "\tPCRE regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
#ifdef BOOST_HAS_XPRESSIVE
|
||||
if(time_xpressive == true)
|
||||
{
|
||||
time = dxpr::time_find_all(re, text, icase);
|
||||
r.xpressive_time = time;
|
||||
std::cout << "\txpressive regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
r.finalise();
|
||||
result_list.push_back(r);
|
||||
|
@ -26,6 +26,7 @@ extern bool time_greta;
|
||||
extern bool time_safe_greta;
|
||||
extern bool time_posix;
|
||||
extern bool time_pcre;
|
||||
extern bool time_xpressive;
|
||||
|
||||
extern bool test_matches;
|
||||
extern bool test_short_twain;
|
||||
@ -53,6 +54,7 @@ struct results
|
||||
double safe_greta_time;
|
||||
double posix_time;
|
||||
double pcre_time;
|
||||
double xpressive_time;
|
||||
double factor;
|
||||
std::string expression;
|
||||
std::string description;
|
||||
@ -63,6 +65,7 @@ struct results
|
||||
safe_greta_time(-1),
|
||||
posix_time(-1),
|
||||
pcre_time(-1),
|
||||
xpressive_time(-1),
|
||||
factor(std::numeric_limits<double>::max()),
|
||||
expression(ex),
|
||||
description(desc)
|
||||
@ -81,6 +84,8 @@ struct results
|
||||
factor = posix_time;
|
||||
if((pcre_time >= 0) && (pcre_time < factor))
|
||||
factor = pcre_time;
|
||||
if((xpressive_time >= 0) && (xpressive_time < factor))
|
||||
factor = xpressive_time;
|
||||
}
|
||||
};
|
||||
|
||||
@ -123,6 +128,12 @@ double time_match(const std::string& re, const std::string& text, bool icase);
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase);
|
||||
|
||||
}
|
||||
namespace dxpr {
|
||||
// xpressive tests:
|
||||
double time_match(const std::string& re, const std::string& text, bool icase);
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase);
|
||||
}
|
||||
|
||||
void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase = false);
|
||||
void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase = false);
|
||||
inline void test_match(const std::string& re, const std::string& text, bool icase = false)
|
||||
|
129
performance/time_dynamic_xpressive.cpp
Normal file
129
performance/time_dynamic_xpressive.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* All rights reserved.
|
||||
* May not be transfered or disclosed to a third party without
|
||||
* prior consent of the author.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "regex_comparison.hpp"
|
||||
|
||||
#ifdef BOOST_HAS_XPRESSIVE
|
||||
#include <cassert>
|
||||
#include <boost/timer.hpp>
|
||||
#include <boost/xpressive/xpressive.hpp>
|
||||
|
||||
namespace dxpr
|
||||
{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
boost::xpressive::sregex e;
|
||||
e = (icase ?
|
||||
boost::xpressive::sregex(boost::xpressive::sregex::compile(re))
|
||||
: boost::xpressive::sregex(boost::xpressive::sregex::compile(re, boost::xpressive::regex_constants::icase)));
|
||||
boost::xpressive::smatch what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
assert(boost::xpressive::regex_match( text, what, e ));
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::xpressive::regex_match( text, what, e );
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
} while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::xpressive::regex_match( text, what, e );
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = (std::min)(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
struct noop
|
||||
{
|
||||
void operator()( boost::xpressive::smatch const & ) const
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
boost::xpressive::sregex e;
|
||||
e = (icase ?
|
||||
boost::xpressive::sregex(boost::xpressive::sregex::compile(re))
|
||||
: boost::xpressive::sregex(boost::xpressive::sregex::compile(re, boost::xpressive::regex_constants::icase)));
|
||||
boost::xpressive::smatch what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::xpressive::sregex_iterator begin( text.begin(), text.end(), e ), end;
|
||||
std::for_each( begin, end, noop() );
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
if(result >10)
|
||||
return result / iter;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::xpressive::sregex_iterator begin( text.begin(), text.end(), e ), end;
|
||||
std::for_each( begin, end, noop() );
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = (std::min)(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace dxpr{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -27,7 +27,7 @@ double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED)))
|
||||
if(0 != ::regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED)))
|
||||
return -1;
|
||||
do
|
||||
{
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
#ifdef BOOST_HAS_THREADS
|
||||
|
||||
#include <boost/regex/static_mutex.hpp>
|
||||
#include <boost/regex/pending/static_mutex.hpp>
|
||||
|
||||
#if defined(BOOST_HAS_WINTHREADS)
|
||||
#define NOMINMAX
|
||||
|
@ -14,6 +14,7 @@ test_anchors.cpp
|
||||
test_asserts.cpp
|
||||
test_backrefs.cpp
|
||||
test_deprecated.cpp
|
||||
test_emacs.cpp
|
||||
test_escapes.cpp
|
||||
test_grep.cpp
|
||||
test_locale.cpp
|
||||
@ -163,3 +164,4 @@ test-suite regex
|
||||
|
||||
|
||||
|
||||
|
||||
|
75
test/object_cache/object_cache_test.cpp
Normal file
75
test/object_cache/object_cache_test.cpp
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2004
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* LOCATION: see http://www.boost.org for most recent version.
|
||||
* FILE object_cache_test.cpp
|
||||
* VERSION see <boost/version.hpp>
|
||||
* DESCRIPTION: Test code for a generic object cache.
|
||||
*/
|
||||
#include <boost/regex/pending/object_cache.hpp>
|
||||
#include <boost/test/included/test_exec_monitor.hpp>
|
||||
|
||||
class test_object
|
||||
{
|
||||
public:
|
||||
test_object(int i)
|
||||
: m_value(i)
|
||||
{
|
||||
++s_count;
|
||||
}
|
||||
int value()const
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
static int count()
|
||||
{
|
||||
return s_count;
|
||||
}
|
||||
private:
|
||||
int m_value;
|
||||
static int s_count;
|
||||
};
|
||||
|
||||
int test_object::s_count = 0;
|
||||
|
||||
static const int max_cache_size = 5;
|
||||
|
||||
int test_main(int /*argc*/, char * /*argv*/[])
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < 20; ++i)
|
||||
{
|
||||
boost::shared_ptr<test_object> p = boost::object_cache<int, test_object>::get(i, max_cache_size);
|
||||
BOOST_TEST(p->value() == i);
|
||||
p = boost::object_cache<int, test_object>::get(i, max_cache_size);
|
||||
BOOST_TEST(p->value() == i);
|
||||
if(i)
|
||||
{
|
||||
p = boost::object_cache<int, test_object>::get(i-1, max_cache_size);
|
||||
BOOST_TEST(p->value() == i-1);
|
||||
}
|
||||
}
|
||||
int current_count = test_object::count();
|
||||
for(int j = 0; j < 10; ++j)
|
||||
{
|
||||
for(i = 20 - max_cache_size; i < 20; ++i)
|
||||
{
|
||||
boost::shared_ptr<test_object> p = boost::object_cache<int, test_object>::get(i, max_cache_size);
|
||||
BOOST_TEST(p->value() == i);
|
||||
p = boost::object_cache<int, test_object>::get(i, max_cache_size);
|
||||
BOOST_TEST(p->value() == i);
|
||||
}
|
||||
}
|
||||
BOOST_TEST(current_count == test_object::count());
|
||||
return 0;
|
||||
}
|
||||
|
@ -49,6 +49,9 @@ int cpp_main(int /*argc*/, char * /*argv*/[])
|
||||
test_options();
|
||||
test_options2();
|
||||
test_en_locale();
|
||||
test_emacs();
|
||||
test_operators();
|
||||
test_overloads();
|
||||
return error_count;
|
||||
}
|
||||
|
||||
|
@ -208,5 +208,8 @@ void test_conditionals();
|
||||
void test_options();
|
||||
void test_options2();
|
||||
void test_en_locale();
|
||||
void test_emacs();
|
||||
void test_operators();
|
||||
void test_overloads();
|
||||
|
||||
#endif
|
||||
|
157
test/regress/test_emacs.cpp
Normal file
157
test/regress/test_emacs.cpp
Normal file
@ -0,0 +1,157 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2004
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
*/
|
||||
|
||||
#include "test.hpp"
|
||||
|
||||
#ifdef BOOST_MSVC
|
||||
#pragma warning(disable:4127)
|
||||
#endif
|
||||
|
||||
void test_emacs()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
// now try operator + :
|
||||
TEST_REGEX_SEARCH("ab+", emacs, "a", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab+", emacs, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab+", emacs, "sssabbbbbbsss", match_default, make_array(3, 10, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab+c+", emacs, "abbb", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab+c+", emacs, "accc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab+c+", emacs, "abbcc", match_default, make_array(0, 5, -2, -2));
|
||||
TEST_INVALID_REGEX("\\<+", emacs);
|
||||
TEST_INVALID_REGEX("\\>+", emacs);
|
||||
TEST_REGEX_SEARCH("\n+", emacs, "\n\n", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\+", emacs, "+", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\+", emacs, "++", match_default, make_array(0, 1, -2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\++", emacs, "++", match_default, make_array(0, 2, -2, -2));
|
||||
|
||||
// now try operator ?
|
||||
TEST_REGEX_SEARCH("a?", emacs, "b", match_default, make_array(0, 0, -2, 1, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab?", emacs, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab?", emacs, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab?", emacs, "sssabbbbbbsss", match_default, make_array(3, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab?c?", emacs, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab?c?", emacs, "abbb", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab?c?", emacs, "accc", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab?c?", emacs, "abcc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_INVALID_REGEX("\\<?", emacs);
|
||||
TEST_INVALID_REGEX("\\>?", emacs);
|
||||
TEST_REGEX_SEARCH("\n?", emacs, "\n\n", match_default, make_array(0, 1, -2, 1, 2, -2, 2, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\?", emacs, "?", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\?", emacs, "?", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\??", emacs, "??", match_default, make_array(0, 1, -2, 1, 2, -2, 2, 2, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("a*?", emacs, "aa", match_default, make_array(0, 0, -2, 0, 1, -2, 1, 1, -2, 1, 2, -2, 2, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^a*?$", emacs, "aa", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^.*?$", emacs, "aa", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^\\(a\\)*?$", emacs, "aa", match_default, make_array(0, 2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^[ab]*?$", emacs, "aa", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a??", emacs, "aa", match_default, make_array(0, 0, -2, 0, 1, -2, 1, 1, -2, 1, 2, -2, 2, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a+?", emacs, "aa", match_default, make_array(0, 1, -2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\{1,3\\}?", emacs, "aaa", match_default, make_array(0, 1, -2, 1, 2, -2, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\w+?w", emacs, "...ccccccwcccccw", match_default, make_array(3, 10, -2, 10, 16, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\W+\\w+?w", emacs, "...ccccccwcccccw", match_default, make_array(0, 10, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\|\\w+?", emacs, "abd", match_default, make_array(0, 1, -2, 1, 2, -2, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\|\\w+?", emacs, "abcd", match_default, make_array(0, 3, -2, 3, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("<\\ss*tag[^>]*>\\(.*?\\)<\\ss*/tag\\ss*>", emacs, " <tag>here is some text</tag> <tag></tag>", match_default, make_array(1, 29, 6, 23, -2, 30, 41, 35, 35, -2, -2));
|
||||
TEST_REGEX_SEARCH("<\\ss*tag[^>]*>\\(.*?\\)<\\ss*/tag\\ss*>", emacs, " < tag attr=\"something\">here is some text< /tag > <tag></tag>", match_default, make_array(1, 49, 24, 41, -2, 50, 61, 55, 55, -2, -2));
|
||||
TEST_INVALID_REGEX("a\\{1,3\\}\\{1\\}", emacs);
|
||||
TEST_INVALID_REGEX("a**", emacs);
|
||||
TEST_INVALID_REGEX("a++", emacs);
|
||||
|
||||
TEST_REGEX_SEARCH("\\<abcd", emacs, " abcd", match_default, make_array(2, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\<ab", emacs, "cab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\<ab", emacs, "\nab", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\<tag", emacs, "::tag", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\<abcd", emacs, "abcd", match_default|match_not_bow, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\<abcd", emacs, " abcd", match_default|match_not_bow, make_array(2, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\<", emacs, "ab ", match_default|match_not_bow, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH(".\\<.", emacs, "ab", match_default|match_not_bow, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH(".\\<.", emacs, " b", match_default|match_not_bow, make_array(0, 2, -2, -2));
|
||||
// word end:
|
||||
TEST_REGEX_SEARCH("abc\\>", emacs, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\>", emacs, "abcd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\>", emacs, "abc\n", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\>", emacs, "abc::", match_default, make_array(0,3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\(?:\\>..\\|$\\)", emacs, "abc::", match_default, make_array(0, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\>", emacs, " ", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH(".\\>.", emacs, " ", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\>", emacs, "abc", match_default|match_not_eow, make_array(-2, -2));
|
||||
// word boundary:
|
||||
TEST_REGEX_SEARCH("\\babcd", emacs, " abcd", match_default, make_array(2, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\bab", emacs, "cab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\bab", emacs, "\nab", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\btag", emacs, "::tag", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\b", emacs, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\b", emacs, "abcd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\b", emacs, "abc\n", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\b", emacs, "abc::", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\babcd", emacs, "abcd", match_default|match_not_bow, make_array(-2, -2));
|
||||
// within word:
|
||||
TEST_REGEX_SEARCH("\\B", emacs, "ab", match_default, make_array(1, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\Bb", emacs, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\B", emacs, "ab", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\B", emacs, "a", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a\\B", emacs, "a ", match_default, make_array(-2, -2));
|
||||
// buffer operators:
|
||||
TEST_REGEX_SEARCH("\\`abc", emacs, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\`abc", emacs, "\nabc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\`abc", emacs, " abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\'", emacs, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\'", emacs, "abc\n", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\'", emacs, "abc ", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("a\\|b", emacs, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\|b", emacs, "b", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\|b\\|c", emacs, "c", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\|\\(b\\)\\|.", emacs, "b", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\(a\\)\\|b\\|.", emacs, "a", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\(b\\|c\\)", emacs, "ab", match_default, make_array(0, 2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\(b\\|c\\)", emacs, "ac", match_default, make_array(0, 2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\(b\\|c\\)", emacs, "ad", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\(a\\|b\\|c\\)", emacs, "c", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\(a\\|\\(b\\)\\|.\\)", emacs, "b", match_default, make_array(0, 1, 0, 1, 0, 1, -2, -2));
|
||||
TEST_INVALID_REGEX("\\|c", emacs);
|
||||
TEST_INVALID_REGEX("c\\|", emacs);
|
||||
TEST_INVALID_REGEX("\\(\\|\\)", emacs);
|
||||
TEST_INVALID_REGEX("\\(a\\|\\)", emacs);
|
||||
TEST_INVALID_REGEX("\\(\\|a\\)", emacs);
|
||||
|
||||
TEST_REGEX_SEARCH("\\(?:abc\\)+", emacs, "xxabcabcxx", match_default, make_array(2, 8, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\(?:a+\\)\\(b+\\)", emacs, "xaaabbbx", match_default, make_array(1, 7, 4, 7, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\(a+\\)\\(?:b+\\)", emacs, "xaaabbba", match_default, make_array(1, 7, 1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\(?:\\(a+\\)b+\\)", emacs, "xaaabbba", match_default, make_array(1, 7, 1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\(?:a+\\(b+\\)\\)", emacs, "xaaabbba", match_default, make_array(1, 7, 4, 7, -2, -2));
|
||||
TEST_REGEX_SEARCH("a+\\(?#b+\\)b+", emacs, "xaaabbba", match_default, make_array(1, 7, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\(a\\)\\(?:b\\|$\\)", emacs, "ab", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\(a\\)\\(?:b\\|$\\)", emacs, "a", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("\\ss+", emacs, "a b", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\Ss+", emacs, " ab ", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\sw+", emacs, " ab ", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\Sw+", emacs, "a b", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\s_+", emacs, " $&*+-_<> ", match_default, make_array(1, 9, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\S_+", emacs, "$&*+-_<>b", match_default, make_array(8, 9, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\s.+", emacs, " .,;!? ", match_default, make_array(1, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\S.+", emacs, ".,;!?b", match_default, make_array(5, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\s(+", emacs, "([{ ", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\S(+", emacs, "([{ ", match_default, make_array(3, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\s)+", emacs, ")]} ", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\S)+", emacs, ")]} ", match_default, make_array(3, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\s\"+", emacs, "\"'` ", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\S\"+", emacs, "\"'` ", match_default, make_array(3, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\s'+", emacs, "',# ", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\S'+", emacs, "',# ", match_default, make_array(3, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\s<+", emacs, "; ", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\S<+", emacs, "; ", match_default, make_array(1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\s>+", emacs, "\n\f ", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\S>+", emacs, "\n\f ", match_default, make_array(2, 3, -2, -2));
|
||||
}
|
||||
|
@ -21,7 +21,7 @@
|
||||
//
|
||||
#include <boost/config.hpp>
|
||||
|
||||
#if (defined(BOOST_MSVC) || defined(__ICL)) && (_MSC_VER >= 1300)
|
||||
#if (defined(BOOST_MSVC) || defined(__ICL)) && (_MSC_VER >= 1300) && (_MSC_VER < 1400)
|
||||
# define TEST_MFC
|
||||
#endif
|
||||
|
||||
|
146
test/regress/test_operators.cpp
Normal file
146
test/regress/test_operators.cpp
Normal file
@ -0,0 +1,146 @@
|
||||
|
||||
#include "test.hpp"
|
||||
|
||||
template <class T1, class T2>
|
||||
void test_less(const T1& t1, const T2& t2)
|
||||
{
|
||||
if(!(t1 < t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed < comparison", char);
|
||||
}
|
||||
if(!(t1 <= t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed <= comparison", char);
|
||||
}
|
||||
if(!(t1 != t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed != comparison", char);
|
||||
}
|
||||
if(t1 == t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed == comparison", char);
|
||||
}
|
||||
if(t1 >= t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed >= comparison", char);
|
||||
}
|
||||
if(t1 > t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed > comparison", char);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
void test_greater(const T1& t1, const T2& t2)
|
||||
{
|
||||
if(t1 < t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed < comparison", char);
|
||||
}
|
||||
if(t1 <= t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed <= comparison", char);
|
||||
}
|
||||
if(!(t1 != t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed != comparison", char);
|
||||
}
|
||||
if(t1 == t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed == comparison", char);
|
||||
}
|
||||
if(!(t1 >= t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed >= comparison", char);
|
||||
}
|
||||
if(!(t1 > t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed > comparison", char);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
void test_equal(const T1& t1, const T2& t2)
|
||||
{
|
||||
if(t1 < t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed < comparison", char);
|
||||
}
|
||||
if(!(t1 <= t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed <= comparison", char);
|
||||
}
|
||||
if(t1 != t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed != comparison", char);
|
||||
}
|
||||
if(!(t1 == t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed == comparison", char);
|
||||
}
|
||||
if(!(t1 >= t2))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed >= comparison", char);
|
||||
}
|
||||
if(t1 > t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed > comparison", char);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T1, class T2, class T3>
|
||||
void test_plus(const T1& t1, const T2& t2, const T3& t3)
|
||||
{
|
||||
if(t1 + t2 != t3)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed addition", char);
|
||||
}
|
||||
if(t3 != t1 + t2)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed addition", char);
|
||||
}
|
||||
}
|
||||
|
||||
void test_operators()
|
||||
{
|
||||
test_info<char>::set_typename("sub_match operators");
|
||||
|
||||
std::string s1("a");
|
||||
std::string s2("b");
|
||||
boost::sub_match<std::string::const_iterator> sub1, sub2;
|
||||
sub1.first = s1.begin();
|
||||
sub1.second = s1.end();
|
||||
sub1.matched = true;
|
||||
sub2.first = s2.begin();
|
||||
sub2.second = s2.end();
|
||||
sub2.matched = true;
|
||||
|
||||
test_less(sub1, sub2);
|
||||
test_less(sub1, s2.c_str());
|
||||
test_less(s1.c_str(), sub2);
|
||||
test_less(sub1, *s2.c_str());
|
||||
test_less(*s1.c_str(), sub2);
|
||||
test_less(sub1, s2);
|
||||
//test_less(s1, sub2);
|
||||
test_greater(sub2, sub1);
|
||||
test_greater(sub2, s1.c_str());
|
||||
test_greater(s2.c_str(), sub1);
|
||||
test_greater(sub2, *s1.c_str());
|
||||
test_greater(*s2.c_str(), sub1);
|
||||
test_greater(sub2, s1);
|
||||
//test_greater(s2, sub1);
|
||||
test_equal(sub1, sub1);
|
||||
test_equal(sub1, s1.c_str());
|
||||
test_equal(s1.c_str(), sub1);
|
||||
test_equal(sub1, *s1.c_str());
|
||||
test_equal(*s1.c_str(), sub1);
|
||||
test_equal(sub1, s1);
|
||||
//test_equal(s1, sub1);
|
||||
test_plus(sub2, sub1, "ba");
|
||||
test_plus(sub2, s1.c_str(), "ba");
|
||||
test_plus(s2.c_str(), sub1, "ba");
|
||||
test_plus(sub2, *s1.c_str(), "ba");
|
||||
test_plus(*s2.c_str(), sub1, "ba");
|
||||
test_plus(sub2, s1, "ba");
|
||||
//test_plus(s2, sub1, "ba");
|
||||
}
|
46
test/regress/test_overloads.cpp
Normal file
46
test/regress/test_overloads.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
#include "test.hpp"
|
||||
|
||||
#define BOOST_REGEX_TEST(x)\
|
||||
if(!(x)){ BOOST_REGEX_TEST_ERROR("Error in: " BOOST_STRINGIZE(x), char); }
|
||||
|
||||
void test_overloads()
|
||||
{
|
||||
test_info<char>::set_typename("sub_match operators");
|
||||
|
||||
// test all the available overloads with *one* simple
|
||||
// expression, doing all these tests with all the test
|
||||
// cases would just take to long...
|
||||
|
||||
boost::regex e("abc");
|
||||
std::string s("abc");
|
||||
const std::string& cs = s;
|
||||
boost::smatch sm;
|
||||
boost::cmatch cm;
|
||||
// regex_match:
|
||||
BOOST_REGEX_TEST(boost::regex_match(cs.begin(), cs.end(), sm, e))
|
||||
BOOST_REGEX_TEST(boost::regex_match(cs.begin(), cs.end(), sm, e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_match(cs.begin(), cs.end(), e))
|
||||
BOOST_REGEX_TEST(boost::regex_match(cs.begin(), cs.end(), e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_match(s.c_str(), cm, e))
|
||||
BOOST_REGEX_TEST(boost::regex_match(s.c_str(), cm, e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_match(s.c_str(), e))
|
||||
BOOST_REGEX_TEST(boost::regex_match(s.c_str(), e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_match(s, sm, e))
|
||||
BOOST_REGEX_TEST(boost::regex_match(s, sm, e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_match(s, e))
|
||||
BOOST_REGEX_TEST(boost::regex_match(s, e, boost::regex_constants::match_default))
|
||||
// regex_search:
|
||||
BOOST_REGEX_TEST(boost::regex_search(cs.begin(), cs.end(), sm, e))
|
||||
BOOST_REGEX_TEST(boost::regex_search(cs.begin(), cs.end(), sm, e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_search(cs.begin(), cs.end(), e))
|
||||
BOOST_REGEX_TEST(boost::regex_search(cs.begin(), cs.end(), e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_search(s.c_str(), cm, e))
|
||||
BOOST_REGEX_TEST(boost::regex_search(s.c_str(), cm, e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_search(s.c_str(), e))
|
||||
BOOST_REGEX_TEST(boost::regex_search(s.c_str(), e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_search(s, sm, e))
|
||||
BOOST_REGEX_TEST(boost::regex_search(s, sm, e, boost::regex_constants::match_default))
|
||||
BOOST_REGEX_TEST(boost::regex_search(s, e))
|
||||
BOOST_REGEX_TEST(boost::regex_search(s, e, boost::regex_constants::match_default))
|
||||
}
|
@ -17,7 +17,7 @@
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <boost/regex/static_mutex.hpp>
|
||||
#include <boost/regex/pending/static_mutex.hpp>
|
||||
#include <boost/thread/thread.hpp>
|
||||
#include <boost/timer.hpp>
|
||||
|
||||
|
Reference in New Issue
Block a user