diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7cc86d2c..3d2016cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,12 +13,12 @@ on: release: types: [published, created, edited] jobs: - ubuntu-focal: - runs-on: ubuntu-20.04 + ubuntu-jammy: + runs-on: ubuntu-22.04 strategy: fail-fast: false matrix: - compiler: [ g++-9, g++-10, clang++-9, clang++-10 ] + compiler: [ g++-11, g++-12, g++-13 ] standard: [ c++11, c++14, c++17, c++2a ] steps: - uses: actions/checkout@v2 @@ -34,7 +34,7 @@ jobs: - name: Add repository run: sudo apt-add-repository -y "ppa:ubuntu-toolchain-r/test" - name: Install packages - run: sudo apt install g++-9 g++-10 clang-9 clang-10 libicu-dev + run: sudo apt install g++-11 g++-12 g++-13 - name: Checkout main boost run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root - name: Update tools/boostdep @@ -55,11 +55,8 @@ jobs: - name: Generate user config run: 'echo "using $TOOLSET : : ${{ matrix.compiler }} : -std=${{ matrix.standard }} ;" > ~/user-config.jam' working-directory: ../boost-root - - name: Config info install - run: ../../../b2 config_info_travis_install toolset=$TOOLSET - working-directory: ../boost-root/libs/config/test - name: Config info - run: ./config_info_travis + run: ../../../b2 print_config_info toolset=$TOOLSET -std=${{ matrix.standard }} working-directory: ../boost-root/libs/config/test - name: Test run: ../../../b2 toolset=$TOOLSET @@ -89,12 +86,12 @@ jobs: - name: Test run: ${{ matrix.compiler }} -std=${{ matrix.standard }} -I../../include *.cpp ../../src/*.cpp -o regress && ./regress working-directory: ./test/regress - ubuntu-bionic: - runs-on: ubuntu-18.04 + ubuntu-jammy-clang: + runs-on: ubuntu-22.04 strategy: fail-fast: false matrix: - compiler: [ g++-7, g++-8, clang++-7, clang++-8 ] + compiler: [ clang++-15 ] standard: [ c++11, c++14, c++17 ] steps: - uses: actions/checkout@v2 @@ -110,7 +107,7 @@ jobs: - name: Add repository run: sudo apt-add-repository -y "ppa:ubuntu-toolchain-r/test" - name: Install packages - run: sudo apt install g++-7 g++-8 clang-7 clang-8 libicu-dev + run: sudo apt install clang-15 - name: Checkout main boost run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root - name: Update tools/boostdep @@ -131,11 +128,8 @@ jobs: - name: Generate user config run: 'echo "using $TOOLSET : : ${{ matrix.compiler }} : -std=${{ matrix.standard }} ;" > ~/user-config.jam' working-directory: ../boost-root - - name: Config info install - run: ../../../b2 config_info_travis_install toolset=$TOOLSET - working-directory: ../boost-root/libs/config/test - name: Config info - run: ./config_info_travis + run: ../../../b2 print_config_info toolset=$TOOLSET -std=${{ matrix.standard }} working-directory: ../boost-root/libs/config/test - name: Test run: ../../../b2 toolset=$TOOLSET define=CI_SUPPRESS_KNOWN_ISSUES define=SLOW_COMPILER diff --git a/include/boost/regex/config.hpp b/include/boost/regex/config.hpp index bed485fa..ee867e1c 100644 --- a/include/boost/regex/config.hpp +++ b/include/boost/regex/config.hpp @@ -90,6 +90,8 @@ # endif #endif +#define BOOST_REGEX_MAX_RECURSION_DEPTH 100 + /**************************************************************************** * diff --git a/include/boost/regex/user.hpp b/include/boost/regex/user.hpp index 4b159bc5..d5dc5206 100644 --- a/include/boost/regex/user.hpp +++ b/include/boost/regex/user.hpp @@ -93,3 +93,7 @@ // define this if you want regex to use __cdecl calling convensions, even when __fastcall is available: // #define BOOST_REGEX_NO_FASTCALL + +// define this to control how deep non-avoidable recursive function calls can go, typical expressions +// and format strings should only recurse a handful of times, this is mainly to prevent DOS type attacks. +// #define BOOST_REGEX_MAX_RECURSION_DEPTH 100 diff --git a/include/boost/regex/v5/basic_regex_creator.hpp b/include/boost/regex/v5/basic_regex_creator.hpp index bb76c7c1..6e49ffe9 100644 --- a/include/boost/regex/v5/basic_regex_creator.hpp +++ b/include/boost/regex/v5/basic_regex_creator.hpp @@ -248,7 +248,7 @@ private: void fixup_recursions(re_syntax_base* state); void create_startmaps(re_syntax_base* state); int calculate_backstep(re_syntax_base* state); - void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask); + void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask, unsigned recursion_count = 0); unsigned get_restart_type(re_syntax_base* state); void set_all_masks(unsigned char* bits, unsigned char); bool is_bad_repeat(re_syntax_base* pt); @@ -1074,7 +1074,7 @@ struct recursion_saver }; template -void basic_regex_creator::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask) +void basic_regex_creator::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask, unsigned recursion_count) { recursion_saver saved_recursions(&m_recursion_checks); int not_last_jump = 1; @@ -1085,6 +1085,28 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, // track case sensitivity: bool l_icase = m_icase; + if (recursion_count > BOOST_REGEX_MAX_RECURSION_DEPTH) + { + // Oops error: + if (0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_complexity; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if (0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Expression complexity exceeded."; + boost::regex_error e(message, boost::regex_constants::error_complexity, 0); + e.raise(); + } + return; + } + while(state) { switch(state->type) @@ -1122,7 +1144,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, } // now figure out if we can match a NULL string at this point: if(pnull) - create_startmap(state->next.p, 0, pnull, mask); + create_startmap(state->next.p, 0, pnull, mask, ++recursion_count); return; } case syntax_element_recurse: @@ -1184,7 +1206,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, case syntax_element_word_start: { // recurse, then AND with all the word characters: - create_startmap(state->next.p, l_map, pnull, mask); + create_startmap(state->next.p, l_map, pnull, mask, ++recursion_count); if(l_map) { l_map[0] |= mask_init; @@ -1199,7 +1221,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, case syntax_element_word_end: { // recurse, then AND with all the word characters: - create_startmap(state->next.p, l_map, pnull, mask); + create_startmap(state->next.p, l_map, pnull, mask, ++recursion_count); if(l_map) { l_map[0] |= mask_init; @@ -1291,11 +1313,11 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, return; } set_bad_repeat(state); - create_startmap(state->next.p, l_map, pnull, mask); + create_startmap(state->next.p, l_map, pnull, mask, ++recursion_count); if((state->type == syntax_element_alt) || (static_cast(state)->min == 0) || (not_last_jump == 0)) - create_startmap(rep->alt.p, l_map, pnull, mask); + create_startmap(rep->alt.p, l_map, pnull, mask, ++recursion_count); } } return; @@ -1354,7 +1376,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, if(ok && ((m_recursion_checks[static_cast(state)->index] & 2u) == 0)) { m_recursion_checks[static_cast(state)->index] |= 2u; - create_startmap(p->next.p, l_map, pnull, mask); + create_startmap(p->next.p, l_map, pnull, mask, ++recursion_count); } } state = state->next.p; diff --git a/include/boost/regex/v5/regex_format.hpp b/include/boost/regex/v5/regex_format.hpp index 4c82d185..c13d4b26 100644 --- a/include/boost/regex/v5/regex_format.hpp +++ b/include/boost/regex/v5/regex_format.hpp @@ -97,11 +97,11 @@ private: void put(char_type c); void put(const sub_match_type& sub); - void format_all(); + void format_all(unsigned recursion_count = 0); void format_perl(); void format_escape(); - void format_conditional(); - void format_until_scope_end(); + void format_conditional(unsigned recursion_count); + void format_until_scope_end(unsigned recursion_count); bool handle_perl_verb(bool have_brace); inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const std::integral_constant&) @@ -199,8 +199,13 @@ OutputIterator basic_regex_formatter -void basic_regex_formatter::format_all() +void basic_regex_formatter::format_all(unsigned recursion_count) { + if (recursion_count > BOOST_REGEX_MAX_RECURSION_DEPTH) + { + // We need to protect ourselves from bad format strings used as DOS attacks: + BOOST_THROW_EXCEPTION(std::runtime_error("Excessive recursion in format string, this looks like a deliberately malformed expression.")); + } // over and over: while(m_position != m_end) { @@ -224,7 +229,7 @@ void basic_regex_formatter::format ++m_position; bool have_conditional = m_have_conditional; m_have_conditional = false; - format_until_scope_end(); + format_until_scope_end(recursion_count); m_have_conditional = have_conditional; if(m_position == m_end) return; @@ -255,7 +260,7 @@ void basic_regex_formatter::format if(m_flags & boost::regex_constants::format_all) { ++m_position; - format_conditional(); + format_conditional(recursion_count); break; } put(*m_position); @@ -644,7 +649,7 @@ void basic_regex_formatter::format } template -void basic_regex_formatter::format_conditional() +void basic_regex_formatter::format_conditional(unsigned recursion_count) { if(m_position == m_end) { @@ -692,7 +697,7 @@ void basic_regex_formatter::format if(m_results[v].matched) { m_have_conditional = true; - format_all(); + format_all(++recursion_count); m_have_conditional = false; if((m_position != m_end) && (*m_position == static_cast(':'))) { @@ -702,7 +707,7 @@ void basic_regex_formatter::format output_state saved_state = m_state; m_state = output_none; // format the rest of this scope: - format_until_scope_end(); + format_until_scope_end(recursion_count); // restore output state: m_state = saved_state; } @@ -714,7 +719,7 @@ void basic_regex_formatter::format m_state = output_none; // format until ':' or ')': m_have_conditional = true; - format_all(); + format_all(++recursion_count); m_have_conditional = false; // restore state: m_state = saved_state; @@ -723,17 +728,17 @@ void basic_regex_formatter::format // skip the ':': ++m_position; // format the rest of this scope: - format_until_scope_end(); + format_until_scope_end(recursion_count); } } } template -void basic_regex_formatter::format_until_scope_end() +void basic_regex_formatter::format_until_scope_end(unsigned recursion_count) { do { - format_all(); + format_all(++recursion_count); if((m_position == m_end) || (*m_position == static_cast(')'))) return; put(*m_position++); diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index edbe190a..e0363d6b 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -78,149 +78,60 @@ lib boost_regex_recursive : local regress-sources = regress/$(R_SOURCE) ; -test-suite regex - : - [ run regress/$(R_SOURCE) ../build//boost_regex ../build//icu_options - : # command line - : # input files - : # requirements - : regex_regress ] +run regress/$(R_SOURCE) ../build//boost_regex ../build//icu_options + : # command line + : # input files + : # requirements + : regex_regress ; - [ run regress/$(R_SOURCE) ../build//boost_regex - ../../thread/build//boost_thread ../build//icu_options - : # command line - : # input files - : # requirements - TEST_THREADS - : regex_regress_threaded ] +run regress/$(R_SOURCE) ../build//boost_regex + ../../thread/build//boost_thread ../build//icu_options + : # command line + : # input files + : # requirements + TEST_THREADS + : regex_regress_threaded ; - [ run regress/$(R_SOURCE) ../build//boost_regex - ../../thread/build//boost_thread ../build//icu_options - : # command line - : # input files - : # requirements - TEST_THREADS - BOOST_REGEX_MAX_CACHE_BLOCKS=0 - : regex_regress_threaded_no_cache ] - - [ regex-test posix_api_check : c_compiler_checks/posix_api_check.c ] - - [ compile c_compiler_checks/wide_posix_api_check.c - : : wide_posix_api_check_c ] - - [ regex-test posix_api_check_cpp : c_compiler_checks/posix_api_check.cpp ] - - [ regex-test wide_posix_api_check_cpp - : c_compiler_checks/wide_posix_api_check.cpp ] - - [ run pathology/bad_expression_test.cpp : : : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - ] - - [ run pathology/recursion_test.cpp : : : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - ] - - [ run named_subexpressions/named_subexpressions_test.cpp : : : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - ] - - [ run unicode/unicode_iterator_test.cpp : : : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - release TEST_UTF8 : unicode_iterator_test_utf8 ] - [ run unicode/unicode_iterator_test.cpp : : : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - release TEST_UTF16 : unicode_iterator_test_utf16 ] - [ run unicode/unicode_casefold_test.cpp - ../build//boost_regex ../build//icu_options - ] - [ run static_mutex/static_mutex_test.cpp - ../../thread/build//boost_thread ../build//boost_regex - ] - [ run object_cache/object_cache_test.cpp : : : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - ] - - [ run config_info/regex_config_info.cpp - ../build//boost_regex/static - : # command line - : # input files - : always_show_run_output - ] - [ run config_info/regex_config_info.cpp ../build//boost_regex - : # command line - : # input files - : always_show_run_output - : regex_dll_config_info - ] - - [ run collate_info/collate_info.cpp ../build//boost_regex - : : : always_show_run_output : test_collate_info ] - - - [ link concepts/concept_check.cpp : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - ] - [ link concepts/concept_check.cpp : - BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : no ] : standalone_concept_check - ] - [ link concepts/icu_concept_check.cpp : - BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - ] - [ link concepts/icu_concept_check.cpp : - [ check-target-builds ../build//is_legacy_03 : : no ] : standalone_icu_concept_check - ] - [ link concepts/range_concept_check.cpp : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - ] - [ run concepts/test_bug_11988.cpp : : : - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - ] - - [ run - # sources - captures/captures_test.cpp - ../build//icu_options - : # additional args - : # test-files - : # requirements - multi - BOOST_REGEX_MATCH_EXTRA=1 - BOOST_REGEX_NO_LIB=1 - [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] - : # test name - captures_test - ] - - [ run regress/$(R_SOURCE) .//boost_regex_recursive - ../build//icu_options - : # command line - : # input files - : # requirements - BOOST_REGEX_RECURSIVE=1 - BOOST_REGEX_CXX03=1 - : regex_regress_recursive ] - -[ run regress/$(R_SOURCE) ./noeh_test//boost_regex_noeh - ../build//icu_options - : # command line - : # input files - : # requirements - BOOST_NO_EXCEPTIONS=1 - off - static - shared - : regex_regress_noeh ] - -; +run regress/$(R_SOURCE) ../build//boost_regex + ../../thread/build//boost_thread ../build//icu_options + : # command line + : # input files + : # requirements + TEST_THREADS + BOOST_REGEX_MAX_CACHE_BLOCKS=0 + : regex_regress_threaded_no_cache ; +regex-test posix_api_check : c_compiler_checks/posix_api_check.c ; +compile c_compiler_checks/wide_posix_api_check.c : : wide_posix_api_check_c ; +regex-test posix_api_check_cpp : c_compiler_checks/posix_api_check.cpp ; +regex-test wide_posix_api_check_cpp : c_compiler_checks/wide_posix_api_check.cpp ; +run pathology/bad_expression_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ; +run pathology/recursion_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ; +run named_subexpressions/named_subexpressions_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ; +run unicode/unicode_iterator_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] release TEST_UTF8 : unicode_iterator_test_utf8 ; +run unicode/unicode_iterator_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] release TEST_UTF16 : unicode_iterator_test_utf16 ; +run unicode/unicode_casefold_test.cpp ../build//boost_regex ../build//icu_options ; +run static_mutex/static_mutex_test.cpp ../../thread/build//boost_thread ../build//boost_regex ; +run object_cache/object_cache_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ; +run config_info/regex_config_info.cpp ../build//boost_regex/static : : : always_show_run_output ; +run config_info/regex_config_info.cpp ../build//boost_regex : : : always_show_run_output : regex_dll_config_info ; +run collate_info/collate_info.cpp ../build//boost_regex : : : always_show_run_output : test_collate_info ; +link concepts/concept_check.cpp : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ; +link concepts/concept_check.cpp : BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : no ] : standalone_concept_check ; +link concepts/icu_concept_check.cpp : BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ; +link concepts/icu_concept_check.cpp : [ check-target-builds ../build//is_legacy_03 : : no ] : standalone_icu_concept_check ; +link concepts/range_concept_check.cpp : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ; +run concepts/test_bug_11988.cpp : : : [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ; +run captures/captures_test.cpp ../build//icu_options : : : multi BOOST_REGEX_MATCH_EXTRA=1 BOOST_REGEX_NO_LIB=1 + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] : captures_test ; +run regress/$(R_SOURCE) .//boost_regex_recursive ../build//icu_options : : : BOOST_REGEX_RECURSIVE=1 BOOST_REGEX_CXX03=1 : regex_regress_recursive ; +run regress/$(R_SOURCE) ./noeh_test//boost_regex_noeh ../build//icu_options : : : BOOST_NO_EXCEPTIONS=1 off static shared : regex_regress_noeh ; compile test_consolidated.cpp ; build-project ../example ; # `quick` target (for CI) run quick.cpp ../build//boost_regex ; - compile test_warnings.cpp : msvc:all msvc:on gcc:all gcc:on @@ -240,3 +151,4 @@ compile test_windows_defs_3.cpp ; compile test_windows_defs_4.cpp ; run issue153.cpp : : : msvc:-STACK:2097152 ; +run bad_format_string.cpp ; diff --git a/test/bad_format_string.cpp b/test/bad_format_string.cpp new file mode 100644 index 00000000..935bf53b --- /dev/null +++ b/test/bad_format_string.cpp @@ -0,0 +1,32 @@ +/* + * + * Copyright (c) 2024 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#include +#include +#include + +int main() +{ + try{ + std::string format_string("((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((("); + boost::regex e("foo"); + std::string in("foobar"); + std::string out; + boost::regex_replace(std::back_inserter(out), in.begin(), in.end(), + e, format_string, boost::match_default | boost::format_all); + BOOST_TEST(false); + } + catch (const std::runtime_error&) + { + std::cout << "OK" << std::endl; + } + return boost::report_errors(); +} diff --git a/test/regress/test_alt.cpp b/test/regress/test_alt.cpp index 7eb187fc..b96e70a5 100644 --- a/test/regress/test_alt.cpp +++ b/test/regress/test_alt.cpp @@ -51,5 +51,7 @@ void test_alt() TEST_REGEX_SEARCH("a\nb", grep, "a", match_default, make_array(0, 1, -2, -2)); TEST_REGEX_SEARCH("a\nb", egrep, "b", match_default, make_array(0, 1, -2, -2)); TEST_REGEX_SEARCH("a\nb", egrep, "a", match_default, make_array(0, 1, -2, -2)); + // DOS protection: + TEST_INVALID_REGEX("(|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||(?0))", perl); } diff --git a/test/regress/test_anchors.cpp b/test/regress/test_anchors.cpp index e695f920..b7d2a4f7 100644 --- a/test/regress/test_anchors.cpp +++ b/test/regress/test_anchors.cpp @@ -63,5 +63,7 @@ void test_anchors() TEST_REGEX_SEARCH_W(L"^.", boost::regex::extended, L"\x2028 \x2028", match_default, make_array(0, 1, -2, 1, 2, -2, -2)); TEST_REGEX_SEARCH_W(L".$", boost::regex::extended, L" \x2028 \x2028", match_default, make_array(0, 1, -2, 2, 3, -2, 3, 4, -2, -2)); #endif + // DOS attack prevention: + TEST_INVALID_REGEX("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$", perl); }