diff --git a/.travis.yml b/.travis.yml index bad8a07e..060a16aa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,18 +4,11 @@ language: cpp -sudo: false - -python: "2.7" - -os: - - linux - - osx - branches: only: - master - develop + - /feature\/.*/ env: matrix: @@ -30,25 +23,15 @@ matrix: - os: linux env: TEST_CMAKE=true # variables unused - just for identification in travis ci gui script: - - git submodule update --init tools/cmake - - git submodule update --init libs/conversion - - git submodule update --init libs/function_types - - git submodule update --init libs/fusion - - git submodule update --init libs/typeof - mkdir __build__ && cd __build__ - - cmake .. -DBOOST_ENABLE_CMAKE=ON -DBOOST_REGEX_INCLUDE_EXAMPLES=ON + - cmake .. -DBOOST_ENABLE_CMAKE=ON -DBOOST_INCLUDE_LIBRARIES=regex -DBOOST_REGEX_INCLUDE_EXAMPLES=ON - cmake --build . - os: linux env: TEST_CMAKE=true BUILD_SHARED_LIBS=On # variables unused - just for identification in travis ci gui script: - - git submodule update --init tools/cmake - - git submodule update --init libs/conversion - - git submodule update --init libs/function_types - - git submodule update --init libs/fusion - - git submodule update --init libs/typeof - mkdir __build__ && cd __build__ - - cmake .. -DBUILD_SHARED_LIBS=ON -DBOOST_ENABLE_CMAKE=ON -DBOOST_REGEX_INCLUDE_EXAMPLES=ON + - cmake .. -DBUILD_SHARED_LIBS=ON -DBOOST_ENABLE_CMAKE=ON -DBOOST_INCLUDE_LIBRARIES=regex -DBOOST_REGEX_INCLUDE_EXAMPLES=ON - cmake --build . - os: linux @@ -265,63 +248,27 @@ matrix: - os: osx env: TOOLSET=clang COMPILER=clang++ CXXSTD=11 osx_image: xcode6.4 - + # On this image, git doesn't support --jobs 3 + install: + - BOOST_BRANCH=develop && [ "$TRAVIS_BRANCH" == "master" ] && BOOST_BRANCH=master || true + - cd .. + - git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root + - cd boost-root + - git submodule update --init tools/boostdep + - cp -r $TRAVIS_BUILD_DIR/* libs/regex + - python tools/boostdep/depinst/depinst.py -I example regex + - ./bootstrap.sh + - ./b2 headers install: - BOOST_BRANCH=develop && [ "$TRAVIS_BRANCH" == "master" ] && BOOST_BRANCH=master || true - cd .. - - git clone -b $BOOST_BRANCH https://github.com/boostorg/boost.git boost-root + - git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root - cd boost-root - - git submodule update --init tools/build - - git submodule update --init tools/boost_install - - git submodule update --init libs/headers - - git submodule update --init libs/config - - git submodule update --init libs/core - - git submodule update --init libs/container_hash - - git submodule update --init libs/detail - - git submodule update --init libs/smart_ptr - - git submodule update --init libs/predef - - git submodule update --init libs/assert - - git submodule update --init libs/throw_exception - - git submodule update --init libs/mpl - - git submodule update --init libs/type_traits - - git submodule update --init libs/static_assert - - git submodule update --init libs/integer - - git submodule update --init libs/preprocessor - - git submodule update --init libs/functional - - git submodule update --init libs/program_options - - git submodule update --init libs/chrono - - git submodule update --init libs/system - - git submodule update --init libs/thread - - git submodule update --init libs/winapi - - git submodule update --init libs/move - - git submodule update --init libs/date_time - - git submodule update --init libs/ratio - - git submodule update --init libs/iterator - - git submodule update --init libs/range - - git submodule update --init libs/any - - git submodule update --init libs/concept_check - - git submodule update --init libs/array - - git submodule update --init libs/timer - - git submodule update --init libs/bind - - git submodule update --init libs/utility - - git submodule update --init libs/io - - git submodule update --init libs/intrusive - - git submodule update --init libs/container - - git submodule update --init libs/tuple - - git submodule update --init libs/exception - - git submodule update --init libs/function - - git submodule update --init libs/type_index - - git submodule update --init libs/lexical_cast - - git submodule update --init libs/numeric - - git submodule update --init libs/math - - git submodule update --init libs/tokenizer - - git submodule update --init libs/optional - - git submodule update --init libs/atomic - - git submodule update --init libs/rational - - git submodule update --init libs/algorithm + - git submodule update --init tools/boostdep - cp -r $TRAVIS_BUILD_DIR/* libs/regex + - python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex - ./bootstrap.sh - ./b2 headers diff --git a/CMakeLists.txt b/CMakeLists.txt index d7e190be..a61ba77c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,11 +41,10 @@ ##### Current Limitations: # # - Doesn't compile or run tests -# - Doesn't support installation # -cmake_minimum_required( VERSION 3.5 ) -project( BoostRegex LANGUAGES CXX ) +cmake_minimum_required( VERSION 3.5...3.16 ) +project( boost_regex VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX ) option( BOOST_REGEX_INCLUDE_EXAMPLES "Also build (some) boost regex examples" OFF ) option( BOOST_REGEX_USE_ICU "Enable ICU support in boost regex" OFF ) @@ -55,20 +54,11 @@ file( GLOB BOOST_REGEX_SRC ./src/*.cpp ) add_library( boost_regex ${BOOST_REGEX_SRC} ) add_library( Boost::regex ALIAS boost_regex ) -# Currently, installation isn't supported directly, -# but someone else might install this target from the parent -# CMake script, so lets proactively differentiate between -# the include directory during regular use (BUILD_INTERFACE) -# and after installation -target_include_directories( boost_regex - PUBLIC - $ - $ -) +target_include_directories( boost_regex PUBLIC include ) target_compile_definitions( boost_regex PUBLIC - # No need for autolink and we don't mangle library name anyway + # No need for autolink BOOST_REGEX_NO_LIB $<$,SHARED_LIBRARY>:BOOST_REGEX_DYN_LINK=1> $<$,STATIC_LIBRARY>:BOOST_REGEX_STATIC_LINK=1> diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp index b28b17c7..c56dd8ef 100644 --- a/include/boost/regex/v4/basic_regex.hpp +++ b/include/boost/regex/v4/basic_regex.hpp @@ -70,13 +70,14 @@ void bubble_down_one(I first, I last) } } +static const int hash_value_mask = 1 << (std::numeric_limits::digits - 1); + template inline int hash_value_from_capture_name(Iterator i, Iterator j) { std::size_t r = boost::hash_range(i, j); - r %= ((std::numeric_limits::max)() - 10001); - r += 10000; - return static_cast(r); + r %= ((std::numeric_limits::max)()); + return static_cast(r) | hash_value_mask; } class named_subexpressions diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index 5b866b2e..611d34b4 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -20,6 +20,8 @@ #ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP #define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP +#include + #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) @@ -239,7 +241,7 @@ protected: bool m_icase; // true for case insensitive matches unsigned m_repeater_id; // the state_id of the next repeater bool m_has_backrefs; // true if there are actually any backrefs - unsigned m_backrefs; // bitmask of permitted backrefs + indexed_bit_flag m_backrefs; // bitmask of permitted backrefs boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; bool m_has_recursions; // set when we have recursive expresisons to fixup std::vector m_recursion_checks; // notes which recursions we've followed while analysing this expression @@ -268,7 +270,7 @@ private: template basic_regex_creator::basic_regex_creator(regex_data* data) : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_icase(false), m_repeater_id(0), - m_has_backrefs(false), m_backrefs(0), m_bad_repeats(0), m_has_recursions(false), m_word_mask(0), m_mask_space(0), m_lower_mask(0), m_upper_mask(0), m_alpha_mask(0) + m_has_backrefs(false), m_bad_repeats(0), m_has_recursions(false), m_word_mask(0), m_mask_space(0), m_lower_mask(0), m_upper_mask(0), m_alpha_mask(0) { m_pdata->m_data.clear(); m_pdata->m_status = ::boost::regex_constants::error_ok; @@ -764,7 +766,7 @@ void basic_regex_creator::fixup_recursions(re_syntax_base* state) if(idx < 0) { idx = -idx-1; - if(idx >= 10000) + if(idx >= hash_value_mask) { idx = m_pdata->get_id(idx); if(idx <= 0) @@ -796,7 +798,7 @@ void basic_regex_creator::fixup_recursions(re_syntax_base* state) bool ok = false; re_syntax_base* p = base; std::ptrdiff_t idx = static_cast(state)->alt.i; - if(idx > 10000) + if(idx >= hash_value_mask) { // // There may be more than one capture group with this hash, just do what Perl diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 9cf2bad0..ff52f329 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -558,8 +558,8 @@ bool basic_regex_parser::parse_open_paren() // // allow backrefs to this mark: // - if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT)) - this->m_backrefs |= 1u << (markid - 1); + if(markid > 0) + this->m_backrefs.set(markid); return true; } @@ -925,7 +925,7 @@ escape_type_class_jump: } if(negative) i = 1 + (static_cast(m_mark_count) - i); - if(((i > 0) && (i < std::numeric_limits::digits) && (i - 1 < static_cast(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (static_cast(this->m_pdata->get_id(i))-1 < static_cast(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1))))) + if(((i < hash_value_mask) && (i > 0) && (this->m_backrefs.test(i))) || ((i >= hash_value_mask) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id(i))))) { m_position = pc; re_brace* pb = static_cast(this->append_state(syntax_element_backref, sizeof(re_brace))); @@ -1957,7 +1957,7 @@ bool basic_regex_parser::parse_backref() charT c = unescape_character(); this->append_literal(c); } - else if((i > 0) && (this->m_backrefs & (1u << (i-1)))) + else if((i > 0) && (this->m_backrefs.test(i))) { m_position = pc; re_brace* pb = static_cast(this->append_state(syntax_element_backref, sizeof(re_brace))); @@ -2731,8 +2731,7 @@ option_group_jump: // // allow backrefs to this mark: // - if(markid < (int)(sizeof(unsigned) * CHAR_BIT)) - this->m_backrefs |= 1u << (markid - 1); + this->m_backrefs.set(markid); } return true; } diff --git a/include/boost/regex/v4/indexed_bit_flag.hpp b/include/boost/regex/v4/indexed_bit_flag.hpp new file mode 100644 index 00000000..c9d32c59 --- /dev/null +++ b/include/boost/regex/v4/indexed_bit_flag.hpp @@ -0,0 +1,54 @@ +/* + * + * Copyright (c) 2020 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_parser.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_parser. + */ + +#include +#include + +#ifndef BOOST_REGEX_V4_INDEXED_BIT_FLAG_HPP +#define BOOST_REGEX_V4_INDEXED_BIT_FLAG_HPP + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +class indexed_bit_flag +{ + boost::uint64_t low_mask; + std::set mask_set; +public: + indexed_bit_flag() : low_mask(0) {} + void set(std::size_t i) + { + if (i < std::numeric_limits::digits - 1) + low_mask |= static_cast(1u) << i; + else + mask_set.insert(i); + } + bool test(std::size_t i) + { + if (i < std::numeric_limits::digits - 1) + return low_mask & static_cast(1u) << i ? true : false; + else + return mask_set.find(i) != mask_set.end(); + } +}; + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + + +#endif diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 00edd63e..7d6ff488 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -619,7 +619,7 @@ bool perl_matcher::match_backref() // or PCRE. // int index = static_cast(pstate)->index; - if(index >= 10000) + if(index >= hash_value_mask) { named_subexpressions::range_type r = re.get_data().equal_range(index); BOOST_ASSERT(r.first != r.second); @@ -768,7 +768,7 @@ inline bool perl_matcher::match_assert_backref( { // Have we matched subexpression "index"? // Check if index is a hash value: - if(index >= 10000) + if(index >= hash_value_mask) { named_subexpressions::range_type r = re.get_data().equal_range(index); while(r.first != r.second) @@ -792,7 +792,7 @@ inline bool perl_matcher::match_assert_backref( // Have we recursed into subexpression "index"? // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. int idx = -(index+1); - if(idx >= 10000) + if(idx >= hash_value_mask) { named_subexpressions::range_type r = re.get_data().equal_range(idx); int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx; diff --git a/test/regress/main.cpp b/test/regress/main.cpp index e3e3dd7f..87ad9ff4 100644 --- a/test/regress/main.cpp +++ b/test/regress/main.cpp @@ -139,10 +139,10 @@ int cpp_main(int /*argc*/, char * /*argv*/[]) int* get_array_data() { - static boost::thread_specific_ptr > tp; + static boost::thread_specific_ptr > tp; if(tp.get() == 0) - tp.reset(new boost::array); + tp.reset(new boost::array); return tp.get()->data(); } @@ -160,9 +160,9 @@ const int* make_array(int first, ...) #ifdef TEST_THREADS int* data = get_array_data(); #else - static int data[200]; + static int data[800]; #endif - std::fill_n(data, 200, -2); + std::fill_n(data, 800, -2); va_list ap; va_start(ap, first); // diff --git a/test/regress/test_backrefs.cpp b/test/regress/test_backrefs.cpp index 58f4dedb..be9f54ca 100644 --- a/test/regress/test_backrefs.cpp +++ b/test/regress/test_backrefs.cpp @@ -103,5 +103,10 @@ void test_backrefs() TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbbbd", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("^(?'foo'.)\\g{foo}", perl, "abc", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("a(?'foo'[bc])\\g{foo}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2)); + + // Bug cases from https://github.com/boostorg/regex/issues/75 + TEST_REGEX_SEARCH("(?:(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)\\g{-1}|WORKING)", perl, "WORKING", match_default, make_array(0, 7, -2, -2)); + TEST_REGEX_SEARCH("(?:(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)\\g{-1}|WORKING)", perl, "WORKING", match_default, make_array(0, 7, -2, -2)); + }