Set a limit on max recursions.

Applies to last remaining recursive calls in regex creation, and format string parsing.
Added tests, and updated CI.
This commit is contained in:
jzmaddock
2024-03-16 19:06:10 +00:00
parent 237e69caf6
commit b86985e65a
9 changed files with 146 additions and 171 deletions

View File

@ -13,12 +13,12 @@ on:
release:
types: [published, created, edited]
jobs:
ubuntu-focal:
runs-on: ubuntu-20.04
ubuntu-jammy:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
compiler: [ g++-9, g++-10, clang++-9, clang++-10 ]
compiler: [ g++-11, g++-12, g++-13 ]
standard: [ c++11, c++14, c++17, c++2a ]
steps:
- uses: actions/checkout@v2
@ -34,7 +34,7 @@ jobs:
- name: Add repository
run: sudo apt-add-repository -y "ppa:ubuntu-toolchain-r/test"
- name: Install packages
run: sudo apt install g++-9 g++-10 clang-9 clang-10 libicu-dev
run: sudo apt install g++-11 g++-12 g++-13
- name: Checkout main boost
run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root
- name: Update tools/boostdep
@ -55,11 +55,8 @@ jobs:
- name: Generate user config
run: 'echo "using $TOOLSET : : ${{ matrix.compiler }} : <cxxflags>-std=${{ matrix.standard }} ;" > ~/user-config.jam'
working-directory: ../boost-root
- name: Config info install
run: ../../../b2 config_info_travis_install toolset=$TOOLSET
working-directory: ../boost-root/libs/config/test
- name: Config info
run: ./config_info_travis
run: ../../../b2 print_config_info toolset=$TOOLSET <cxxflags>-std=${{ matrix.standard }}
working-directory: ../boost-root/libs/config/test
- name: Test
run: ../../../b2 toolset=$TOOLSET
@ -89,12 +86,12 @@ jobs:
- name: Test
run: ${{ matrix.compiler }} -std=${{ matrix.standard }} -I../../include *.cpp ../../src/*.cpp -o regress && ./regress
working-directory: ./test/regress
ubuntu-bionic:
runs-on: ubuntu-18.04
ubuntu-jammy-clang:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
compiler: [ g++-7, g++-8, clang++-7, clang++-8 ]
compiler: [ clang++-15 ]
standard: [ c++11, c++14, c++17 ]
steps:
- uses: actions/checkout@v2
@ -110,7 +107,7 @@ jobs:
- name: Add repository
run: sudo apt-add-repository -y "ppa:ubuntu-toolchain-r/test"
- name: Install packages
run: sudo apt install g++-7 g++-8 clang-7 clang-8 libicu-dev
run: sudo apt install clang-15
- name: Checkout main boost
run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root
- name: Update tools/boostdep
@ -131,11 +128,8 @@ jobs:
- name: Generate user config
run: 'echo "using $TOOLSET : : ${{ matrix.compiler }} : <cxxflags>-std=${{ matrix.standard }} ;" > ~/user-config.jam'
working-directory: ../boost-root
- name: Config info install
run: ../../../b2 config_info_travis_install toolset=$TOOLSET
working-directory: ../boost-root/libs/config/test
- name: Config info
run: ./config_info_travis
run: ../../../b2 print_config_info toolset=$TOOLSET <cxxflags>-std=${{ matrix.standard }}
working-directory: ../boost-root/libs/config/test
- name: Test
run: ../../../b2 toolset=$TOOLSET define=CI_SUPPRESS_KNOWN_ISSUES define=SLOW_COMPILER

View File

@ -90,6 +90,8 @@
# endif
#endif
#define BOOST_REGEX_MAX_RECURSION_DEPTH 100
/****************************************************************************
*

View File

@ -93,3 +93,7 @@
// define this if you want regex to use __cdecl calling convensions, even when __fastcall is available:
// #define BOOST_REGEX_NO_FASTCALL
// define this to control how deep non-avoidable recursive function calls can go, typical expressions
// and format strings should only recurse a handful of times, this is mainly to prevent DOS type attacks.
// #define BOOST_REGEX_MAX_RECURSION_DEPTH 100

View File

@ -248,7 +248,7 @@ private:
void fixup_recursions(re_syntax_base* state);
void create_startmaps(re_syntax_base* state);
int calculate_backstep(re_syntax_base* state);
void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask, unsigned recursion_count = 0);
unsigned get_restart_type(re_syntax_base* state);
void set_all_masks(unsigned char* bits, unsigned char);
bool is_bad_repeat(re_syntax_base* pt);
@ -1074,7 +1074,7 @@ struct recursion_saver
};
template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask, unsigned recursion_count)
{
recursion_saver saved_recursions(&m_recursion_checks);
int not_last_jump = 1;
@ -1085,6 +1085,28 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
// track case sensitivity:
bool l_icase = m_icase;
if (recursion_count > BOOST_REGEX_MAX_RECURSION_DEPTH)
{
// Oops error:
if (0 == this->m_pdata->m_status) // update the error code if not already set
this->m_pdata->m_status = boost::regex_constants::error_complexity;
//
// clear the expression, we should be empty:
//
this->m_pdata->m_expression = 0;
this->m_pdata->m_expression_len = 0;
//
// and throw if required:
//
if (0 == (this->flags() & regex_constants::no_except))
{
std::string message = "Expression complexity exceeded.";
boost::regex_error e(message, boost::regex_constants::error_complexity, 0);
e.raise();
}
return;
}
while(state)
{
switch(state->type)
@ -1122,7 +1144,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
}
// now figure out if we can match a NULL string at this point:
if(pnull)
create_startmap(state->next.p, 0, pnull, mask);
create_startmap(state->next.p, 0, pnull, mask, ++recursion_count);
return;
}
case syntax_element_recurse:
@ -1184,7 +1206,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
case syntax_element_word_start:
{
// recurse, then AND with all the word characters:
create_startmap(state->next.p, l_map, pnull, mask);
create_startmap(state->next.p, l_map, pnull, mask, ++recursion_count);
if(l_map)
{
l_map[0] |= mask_init;
@ -1199,7 +1221,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
case syntax_element_word_end:
{
// recurse, then AND with all the word characters:
create_startmap(state->next.p, l_map, pnull, mask);
create_startmap(state->next.p, l_map, pnull, mask, ++recursion_count);
if(l_map)
{
l_map[0] |= mask_init;
@ -1291,11 +1313,11 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
return;
}
set_bad_repeat(state);
create_startmap(state->next.p, l_map, pnull, mask);
create_startmap(state->next.p, l_map, pnull, mask, ++recursion_count);
if((state->type == syntax_element_alt)
|| (static_cast<re_repeat*>(state)->min == 0)
|| (not_last_jump == 0))
create_startmap(rep->alt.p, l_map, pnull, mask);
create_startmap(rep->alt.p, l_map, pnull, mask, ++recursion_count);
}
}
return;
@ -1354,7 +1376,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
if(ok && ((m_recursion_checks[static_cast<re_brace*>(state)->index] & 2u) == 0))
{
m_recursion_checks[static_cast<re_brace*>(state)->index] |= 2u;
create_startmap(p->next.p, l_map, pnull, mask);
create_startmap(p->next.p, l_map, pnull, mask, ++recursion_count);
}
}
state = state->next.p;

View File

@ -97,11 +97,11 @@ private:
void put(char_type c);
void put(const sub_match_type& sub);
void format_all();
void format_all(unsigned recursion_count = 0);
void format_perl();
void format_escape();
void format_conditional();
void format_until_scope_end();
void format_conditional(unsigned recursion_count);
void format_until_scope_end(unsigned recursion_count);
bool handle_perl_verb(bool have_brace);
inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const std::integral_constant<bool, false>&)
@ -199,8 +199,13 @@ OutputIterator basic_regex_formatter<OutputIterator, Results, traits, ForwardIte
}
template <class OutputIterator, class Results, class traits, class ForwardIter>
void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_all()
void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_all(unsigned recursion_count)
{
if (recursion_count > BOOST_REGEX_MAX_RECURSION_DEPTH)
{
// We need to protect ourselves from bad format strings used as DOS attacks:
BOOST_THROW_EXCEPTION(std::runtime_error("Excessive recursion in format string, this looks like a deliberately malformed expression."));
}
// over and over:
while(m_position != m_end)
{
@ -224,7 +229,7 @@ void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format
++m_position;
bool have_conditional = m_have_conditional;
m_have_conditional = false;
format_until_scope_end();
format_until_scope_end(recursion_count);
m_have_conditional = have_conditional;
if(m_position == m_end)
return;
@ -255,7 +260,7 @@ void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format
if(m_flags & boost::regex_constants::format_all)
{
++m_position;
format_conditional();
format_conditional(recursion_count);
break;
}
put(*m_position);
@ -644,7 +649,7 @@ void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format
}
template <class OutputIterator, class Results, class traits, class ForwardIter>
void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_conditional()
void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_conditional(unsigned recursion_count)
{
if(m_position == m_end)
{
@ -692,7 +697,7 @@ void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format
if(m_results[v].matched)
{
m_have_conditional = true;
format_all();
format_all(++recursion_count);
m_have_conditional = false;
if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
{
@ -702,7 +707,7 @@ void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format
output_state saved_state = m_state;
m_state = output_none;
// format the rest of this scope:
format_until_scope_end();
format_until_scope_end(recursion_count);
// restore output state:
m_state = saved_state;
}
@ -714,7 +719,7 @@ void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format
m_state = output_none;
// format until ':' or ')':
m_have_conditional = true;
format_all();
format_all(++recursion_count);
m_have_conditional = false;
// restore state:
m_state = saved_state;
@ -723,17 +728,17 @@ void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format
// skip the ':':
++m_position;
// format the rest of this scope:
format_until_scope_end();
format_until_scope_end(recursion_count);
}
}
}
template <class OutputIterator, class Results, class traits, class ForwardIter>
void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_until_scope_end()
void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_until_scope_end(unsigned recursion_count)
{
do
{
format_all();
format_all(++recursion_count);
if((m_position == m_end) || (*m_position == static_cast<char_type>(')')))
return;
put(*m_position++);

View File

@ -78,149 +78,60 @@ lib boost_regex_recursive :
local regress-sources = regress/$(R_SOURCE) ;
test-suite regex
:
[ run regress/$(R_SOURCE) ../build//boost_regex ../build//icu_options
run regress/$(R_SOURCE) ../build//boost_regex ../build//icu_options
: # command line
: # input files
: # requirements
: regex_regress ]
: regex_regress ;
[ run regress/$(R_SOURCE) ../build//boost_regex
run regress/$(R_SOURCE) ../build//boost_regex
../../thread/build//boost_thread ../build//icu_options
: # command line
: # input files
: # requirements
<define>TEST_THREADS
: regex_regress_threaded ]
: regex_regress_threaded ;
[ run regress/$(R_SOURCE) ../build//boost_regex
run regress/$(R_SOURCE) ../build//boost_regex
../../thread/build//boost_thread ../build//icu_options
: # command line
: # input files
: # requirements
<define>TEST_THREADS
<define>BOOST_REGEX_MAX_CACHE_BLOCKS=0
: regex_regress_threaded_no_cache ]
[ regex-test posix_api_check : c_compiler_checks/posix_api_check.c ]
[ compile c_compiler_checks/wide_posix_api_check.c
: : wide_posix_api_check_c ]
[ regex-test posix_api_check_cpp : c_compiler_checks/posix_api_check.cpp ]
[ regex-test wide_posix_api_check_cpp
: c_compiler_checks/wide_posix_api_check.cpp ]
[ run pathology/bad_expression_test.cpp : : :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
]
[ run pathology/recursion_test.cpp : : :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
]
[ run named_subexpressions/named_subexpressions_test.cpp : : :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
]
[ run unicode/unicode_iterator_test.cpp : : :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
release <define>TEST_UTF8 : unicode_iterator_test_utf8 ]
[ run unicode/unicode_iterator_test.cpp : : :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
release <define>TEST_UTF16 : unicode_iterator_test_utf16 ]
[ run unicode/unicode_casefold_test.cpp
../build//boost_regex ../build//icu_options
]
[ run static_mutex/static_mutex_test.cpp
../../thread/build//boost_thread ../build//boost_regex
]
[ run object_cache/object_cache_test.cpp : : :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
]
[ run config_info/regex_config_info.cpp
../build//boost_regex/<link>static
: # command line
: # input files
: <test-info>always_show_run_output
]
[ run config_info/regex_config_info.cpp ../build//boost_regex
: # command line
: # input files
: <test-info>always_show_run_output
: regex_dll_config_info
]
[ run collate_info/collate_info.cpp ../build//boost_regex
: : : <test-info>always_show_run_output : test_collate_info ]
[ link concepts/concept_check.cpp :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
]
[ link concepts/concept_check.cpp :
<define>BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : <build>no ] : standalone_concept_check
]
[ link concepts/icu_concept_check.cpp :
<define>BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
]
[ link concepts/icu_concept_check.cpp :
[ check-target-builds ../build//is_legacy_03 : : <build>no ] : standalone_icu_concept_check
]
[ link concepts/range_concept_check.cpp :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
]
[ run concepts/test_bug_11988.cpp : : :
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
]
[ run
# sources
captures/captures_test.cpp
../build//icu_options
: # additional args
: # test-files
: # requirements
<threading>multi
<define>BOOST_REGEX_MATCH_EXTRA=1
<define>BOOST_REGEX_NO_LIB=1
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
: # test name
captures_test
]
[ run regress/$(R_SOURCE) .//boost_regex_recursive
../build//icu_options
: # command line
: # input files
: # requirements
<define>BOOST_REGEX_RECURSIVE=1
<define>BOOST_REGEX_CXX03=1
: regex_regress_recursive ]
[ run regress/$(R_SOURCE) ./noeh_test//boost_regex_noeh
../build//icu_options
: # command line
: # input files
: # requirements
<define>BOOST_NO_EXCEPTIONS=1
<exception-handling>off
<link>static
<runtime-link>shared
: regex_regress_noeh ]
;
: regex_regress_threaded_no_cache ;
regex-test posix_api_check : c_compiler_checks/posix_api_check.c ;
compile c_compiler_checks/wide_posix_api_check.c : : wide_posix_api_check_c ;
regex-test posix_api_check_cpp : c_compiler_checks/posix_api_check.cpp ;
regex-test wide_posix_api_check_cpp : c_compiler_checks/wide_posix_api_check.cpp ;
run pathology/bad_expression_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] ;
run pathology/recursion_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] ;
run named_subexpressions/named_subexpressions_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] ;
run unicode/unicode_iterator_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] release <define>TEST_UTF8 : unicode_iterator_test_utf8 ;
run unicode/unicode_iterator_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] release <define>TEST_UTF16 : unicode_iterator_test_utf16 ;
run unicode/unicode_casefold_test.cpp ../build//boost_regex ../build//icu_options ;
run static_mutex/static_mutex_test.cpp ../../thread/build//boost_thread ../build//boost_regex ;
run object_cache/object_cache_test.cpp : : : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] ;
run config_info/regex_config_info.cpp ../build//boost_regex/<link>static : : : <test-info>always_show_run_output ;
run config_info/regex_config_info.cpp ../build//boost_regex : : : <test-info>always_show_run_output : regex_dll_config_info ;
run collate_info/collate_info.cpp ../build//boost_regex : : : <test-info>always_show_run_output : test_collate_info ;
link concepts/concept_check.cpp : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] ;
link concepts/concept_check.cpp : <define>BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : <build>no ] : standalone_concept_check ;
link concepts/icu_concept_check.cpp : <define>BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] ;
link concepts/icu_concept_check.cpp : [ check-target-builds ../build//is_legacy_03 : : <build>no ] : standalone_icu_concept_check ;
link concepts/range_concept_check.cpp : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] ;
run concepts/test_bug_11988.cpp : : : [ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] ;
run captures/captures_test.cpp ../build//icu_options : : : <threading>multi <define>BOOST_REGEX_MATCH_EXTRA=1 <define>BOOST_REGEX_NO_LIB=1
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ] : captures_test ;
run regress/$(R_SOURCE) .//boost_regex_recursive ../build//icu_options : : : <define>BOOST_REGEX_RECURSIVE=1 <define>BOOST_REGEX_CXX03=1 : regex_regress_recursive ;
run regress/$(R_SOURCE) ./noeh_test//boost_regex_noeh ../build//icu_options : : : <define>BOOST_NO_EXCEPTIONS=1 <exception-handling>off <link>static <runtime-link>shared : regex_regress_noeh ;
compile test_consolidated.cpp ;
build-project ../example ;
# `quick` target (for CI)
run quick.cpp ../build//boost_regex ;
compile test_warnings.cpp
: <toolset>msvc:<warnings>all <toolset>msvc:<warnings-as-errors>on
<toolset>gcc:<warnings>all <toolset>gcc:<warnings-as-errors>on
@ -240,3 +151,4 @@ compile test_windows_defs_3.cpp ;
compile test_windows_defs_4.cpp ;
run issue153.cpp : : : <toolset>msvc:<linkflags>-STACK:2097152 ;
run bad_format_string.cpp ;

View File

@ -0,0 +1,32 @@
/*
*
* Copyright (c) 2024
* John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
#include <iostream>
#include <boost/regex.hpp>
#include <boost/core/lightweight_test.hpp>
int main()
{
try{
std::string format_string("(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((");
boost::regex e("foo");
std::string in("foobar");
std::string out;
boost::regex_replace(std::back_inserter(out), in.begin(), in.end(),
e, format_string, boost::match_default | boost::format_all);
BOOST_TEST(false);
}
catch (const std::runtime_error&)
{
std::cout << "OK" << std::endl;
}
return boost::report_errors();
}

View File

@ -51,5 +51,7 @@ void test_alt()
TEST_REGEX_SEARCH("a\nb", grep, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a\nb", egrep, "b", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a\nb", egrep, "a", match_default, make_array(0, 1, -2, -2));
// DOS protection:
TEST_INVALID_REGEX("(|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||(?0))", perl);
}

View File

@ -63,5 +63,7 @@ void test_anchors()
TEST_REGEX_SEARCH_W(L"^.", boost::regex::extended, L"\x2028 \x2028", match_default, make_array(0, 1, -2, 1, 2, -2, -2));
TEST_REGEX_SEARCH_W(L".$", boost::regex::extended, L" \x2028 \x2028", match_default, make_array(0, 1, -2, 2, 3, -2, 3, 4, -2, -2));
#endif
// DOS attack prevention:
TEST_INVALID_REGEX("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$", perl);
}