Compare commits

...

5 Commits

Author SHA1 Message Date
57ca08240e Make backstep calculation non-recursive.
Refs https://github.com/boostorg/regex/pull/236.
2024-12-16 16:58:25 +00:00
0cbaa4ef17 fix #232 (#234)
* add failing test case
* fix issue #232
Credit to OSS-Fuzz for finding the problematic test regexes and configurations.
2024-12-05 13:08:52 +00:00
f0afa5d9b8 #include cleanup (#231)
* update library headers to have self-contained #includes

* update gha ci to run for feature/** branches
2024-11-23 11:10:27 +00:00
9a8d2b13a3 Tentative fix for #227. (#230)
Fixes #227.
2024-11-06 09:20:07 +00:00
bd0e76f42f Correct example/Jamfile.v2 so that grep gets valid command line argum… (#229)
* Correct example/Jamfile.v2 so that grep gets valid command line arguments.
2024-11-02 18:29:39 +00:00
30 changed files with 324 additions and 22 deletions

View File

@ -9,6 +9,7 @@ on:
branches:
- master
- develop
- feature/**
pull_request:
release:
types: [published, created, edited]

View File

@ -44,7 +44,7 @@ path-constant HERE : . ;
test-suite regex-examples :
[ regex-test-run timer/regex_timer.cpp /boost/smart_ptr//boost_smart_ptr : $(HERE)/timer/input_script.txt ]
[ regex-test-run grep/grep.cpp /boost/program_options//boost_program_options/<link>static : -n -b $(HERE)/../include/boost/regex.hpp : test_grep ]
[ regex-test-run grep/grep.cpp /boost/program_options//boost_program_options/<link>static : -n -b -E include $(HERE)/../include/boost/regex.hpp : test_grep ]
[ regex-test-run snippets/credit_card_example.cpp ]
[ regex-test-run snippets/mfc_example.cpp ]
[ regex-test-run snippets/icu_example.cpp ]

View File

@ -19,6 +19,15 @@
#ifndef BOOST_REGEX_V5_BASIC_REGEX_HPP
#define BOOST_REGEX_V5_BASIC_REGEX_HPP
#include <boost/regex/v5/regbase.hpp>
#include <boost/regex/v5/syntax_type.hpp>
#include <boost/regex/v5/regex_traits.hpp>
#include <boost/regex/v5/states.hpp>
#include <boost/regex/v5/regex_raw_buffer.hpp>
#include <algorithm>
#include <limits>
#include <memory>
#include <vector>
namespace boost{

View File

@ -28,6 +28,9 @@
#endif
#endif
#include <boost/regex/v5/basic_regex.hpp>
#include <vector>
#include <set>
namespace boost{
@ -971,7 +974,12 @@ template <class charT, class traits>
int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
{
typedef typename traits::char_class_type m_type;
int result = 0;
int last_alternative_result = -1;
std::vector<std::tuple<int, re_syntax_base*>> stack;
while(state)
{
switch(state->type)
@ -990,9 +998,28 @@ int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state
}
break;
case syntax_element_endmark:
if((static_cast<re_brace*>(state)->index == -1)
if ((static_cast<re_brace*>(state)->index == -1)
|| (static_cast<re_brace*>(state)->index == -2))
return result;
{
// We've finished the calculation, check against any previous alternatives:
if (last_alternative_result >= 0)
{
if (last_alternative_result != result)
return -1;
}
else
last_alternative_result = result;
if (stack.size())
{
// Skip to next alternative and calculate that as well:
std::tie(result, state) = stack.back();
stack.pop_back();
continue;
}
else
return result;
}
break;
case syntax_element_literal:
result += static_cast<re_literal*>(state)->length;
@ -1048,11 +1075,13 @@ int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state
continue;
case syntax_element_alt:
{
int r1 = calculate_backstep(state->next.p);
int r2 = calculate_backstep(static_cast<re_alt*>(state)->alt.p);
if((r1 < 0) || (r1 != r2))
// Push the alternative if we haven't pushed too many already:
if(stack.size() > BOOST_REGEX_MAX_BLOCKS)
return -1;
return result + r1;
stack.push_back(std::make_tuple(result, static_cast<re_alt*>(state)->alt.p));
// and take the first one:
state = state->next.p;
continue;
}
default:
break;

View File

@ -19,6 +19,13 @@
#ifndef BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP
#define BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP
#include <boost/regex/v5/basic_regex_creator.hpp>
#include <climits>
#include <cstdint>
#include <limits>
#include <type_traits>
namespace boost{
namespace BOOST_REGEX_DETAIL_NS{

View File

@ -21,7 +21,12 @@
#include <boost/regex/config.hpp>
#include <boost/regex/v5/regex_workaround.hpp>
#include <boost/regex/v5/primary_transform.hpp>
#include <boost/regex/v5/regex_traits_defaults.hpp>
#include <cctype>
#include <cstdint>
#include <cwctype>
namespace boost{

View File

@ -20,6 +20,8 @@
#ifndef BOOST_REGEX_V5_CHAR_REGEX_TRAITS_HPP
#define BOOST_REGEX_V5_CHAR_REGEX_TRAITS_HPP
#include <boost/regex/v5/regex_traits.hpp>
namespace boost{
namespace deprecated{

View File

@ -19,6 +19,8 @@
#ifndef BOOST_REGEX_V5_ITERATOR_TRAITS_HPP
#define BOOST_REGEX_V5_ITERATOR_TRAITS_HPP
#include <iterator>
namespace boost{
namespace BOOST_REGEX_DETAIL_NS{

View File

@ -19,6 +19,14 @@
#ifndef BOOST_REGEX_V5_MATCH_RESULTS_HPP
#define BOOST_REGEX_V5_MATCH_RESULTS_HPP
#include <boost/regex/v5/match_flags.hpp>
#include <boost/regex/v5/sub_match.hpp>
#include <boost/regex/v5/basic_regex.hpp>
#include <boost/regex/v5/regex_format.hpp>
#include <string>
#include <vector>
namespace boost{
#ifdef BOOST_REGEX_MSVC
#pragma warning(push)

View File

@ -18,6 +18,8 @@
#ifndef BOOST_REGEX_V5_MEM_BLOCK_CACHE_HPP
#define BOOST_REGEX_V5_MEM_BLOCK_CACHE_HPP
#include <boost/regex/config.hpp>
#include <new>
#ifdef BOOST_HAS_THREADS
#include <mutex>
@ -31,6 +33,7 @@
#endif
#endif
namespace boost{
namespace BOOST_REGEX_DETAIL_NS{

View File

@ -12,7 +12,18 @@
#ifndef BOOST_REGEX_MATCHER_HPP
#define BOOST_REGEX_MATCHER_HPP
#include <boost/regex/v5/match_flags.hpp>
#include <boost/regex/v5/match_results.hpp>
#include <boost/regex/v5/regbase.hpp>
#include <boost/regex/v5/iterator_category.hpp>
#include <boost/regex/v5/states.hpp>
#include <boost/regex/v5/regex_traits.hpp>
#ifndef BOOST_REGEX_STANDALONE
#include <boost/throw_exception.hpp>
#endif
#include <climits>
#ifdef BOOST_REGEX_MSVC
# pragma warning(push)
@ -28,25 +39,34 @@
#endif
#endif
#ifndef BOOST_REGEX_STANDALONE
# define BOOST_REGEX_DETAIL_THROW(ex) boost::throw_exception(ex)
#else
# define BOOST_REGEX_DETAIL_THROW(ex) throw ex
#endif
namespace boost{
namespace BOOST_REGEX_DETAIL_NS{
//
// error checking API:
//
inline void verify_options(boost::regex_constants::syntax_option_type, match_flag_type mf)
inline void verify_options(boost::regex_constants::syntax_option_type, match_flag_type mf)
{
auto is_perl = (mf & match_perl);
auto is_posix = (mf & match_posix);
if (is_perl && is_posix)
{
BOOST_REGEX_DETAIL_THROW(std::logic_error("Usage Error: Can't mix Perl and POSIX matching rules"));
}
//
// can't mix match_extra with POSIX matching rules:
//
if ((mf & match_extra) && (mf & match_posix))
if ((mf & match_extra) && is_posix)
{
std::logic_error msg("Usage Error: Can't mix regular expression captures with POSIX matching rules");
#ifndef BOOST_REGEX_STANDALONE
throw_exception(msg);
#else
throw msg;
#endif
BOOST_REGEX_DETAIL_THROW(std::logic_error("Usage Error: Can't mix regular expression captures with POSIX matching rules"));
}
}
//

View File

@ -20,6 +20,22 @@
#ifndef BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP
#define BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP
#include <boost/regex/config.hpp>
#ifndef BOOST_REGEX_STANDALONE
#include <boost/config.hpp>
#if defined(BOOST_HAS_PRAGMA_ONCE)
#pragma once
#include <boost/regex/v5/perl_matcher.hpp>
#endif
#endif
#include <boost/regex/v5/basic_regex.hpp>
#include <boost/regex/v5/match_flags.hpp>
#include <boost/regex/v5/match_results.hpp>
#ifdef BOOST_REGEX_MSVC
# pragma warning(push)
#pragma warning(disable:4459)
@ -44,12 +60,7 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
if(e.empty())
{
// precondition failure: e is not a valid regex.
std::invalid_argument ex("Invalid regular expression object");
#ifndef BOOST_REGEX_STANDALONE
boost::throw_exception(ex);
#else
throw e;
#endif
BOOST_REGEX_DETAIL_THROW(std::invalid_argument("Invalid regular expression object"));
}
pstate = 0;
m_match_flags = f;
@ -82,7 +93,11 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
// Disable match_any if requested in the state machine:
if(e.get_data().m_disable_match_any)
{
if (m_match_flags & match_posix)
BOOST_REGEX_DETAIL_THROW(std::logic_error("Invalid regex for POSIX-style matching"));
m_match_flags &= regex_constants::match_not_any;
}
}
#ifdef BOOST_REGEX_MSVC
# pragma warning(pop)

View File

@ -20,6 +20,18 @@
#ifndef BOOST_REGEX_V5_PERL_MATCHER_NON_RECURSIVE_HPP
#define BOOST_REGEX_V5_PERL_MATCHER_NON_RECURSIVE_HPP
#include <boost/regex/config.hpp>
#ifndef BOOST_REGEX_STANDALONE
#include <boost/config.hpp>
#if defined(BOOST_HAS_PRAGMA_ONCE)
#pragma once
#include <boost/regex/v5/perl_matcher.hpp>
#endif
#endif
#include <boost/regex/v5/mem_block_cache.hpp>
#ifdef BOOST_REGEX_MSVC
@ -1204,7 +1216,10 @@ bool perl_matcher<BidiIterator, Allocator, traits>::skip_until_paren(int index,
else if(pstate->type == syntax_element_startmark)
{
int idx = static_cast<const re_brace*>(pstate)->index;
pstate = pstate->next.p;
if(idx > 0)
match_startmark();
else
pstate = pstate->next.p;
skip_until_paren(idx, false);
continue;
}

View File

@ -21,6 +21,10 @@
#ifndef BOOST_REGEX_FORMAT_HPP
#define BOOST_REGEX_FORMAT_HPP
#include <boost/regex/v5/match_flags.hpp>
#include <boost/regex/v5/sub_match.hpp>
#include <boost/regex/v5/regex_traits_defaults.hpp>
#include <type_traits>
#include <functional>

View File

@ -19,6 +19,10 @@
#ifndef BOOST_REGEX_V5_REGEX_GREP_HPP
#define BOOST_REGEX_V5_REGEX_GREP_HPP
#include <boost/regex/v5/basic_regex.hpp>
#include <boost/regex/v5/match_flags.hpp>
#include <boost/regex/v5/match_results.hpp>
#include <boost/regex/v5/perl_matcher.hpp>
namespace boost{

View File

@ -19,6 +19,9 @@
#ifndef BOOST_REGEX_V5_REGEX_ITERATOR_HPP
#define BOOST_REGEX_V5_REGEX_ITERATOR_HPP
#include <boost/regex/v5/basic_regex.hpp>
#include <boost/regex/v5/match_results.hpp>
#include <memory>
namespace boost{

View File

@ -22,6 +22,9 @@
#ifndef BOOST_REGEX_MATCH_HPP
#define BOOST_REGEX_MATCH_HPP
#include <boost/regex/v5/match_results.hpp>
#include <boost/regex/v5/perl_matcher.hpp>
namespace boost{
//

View File

@ -27,6 +27,7 @@
#include <algorithm>
#include <cstddef>
#include <cstring>
namespace boost{
namespace BOOST_REGEX_DETAIL_NS{

View File

@ -22,6 +22,10 @@
#define BOOST_REGEX_V5_REGEX_REPLACE_HPP
#include <boost/regex/v5/basic_regex.hpp>
#include <boost/regex/v5/match_flags.hpp>
#include <boost/regex/v5/regex_iterator.hpp>
namespace boost{
template <class OutputIterator, class BidirectionalIterator, class traits, class charT, class Formatter>

View File

@ -20,6 +20,9 @@
#define BOOST_REGEX_V5_REGEX_SEARCH_HPP
#include <boost/regex/v5/match_results.hpp>
#include <boost/regex/v5/perl_matcher.hpp>
namespace boost{
template <class BidiIterator, class Allocator, class charT, class traits>

View File

@ -21,6 +21,9 @@
#ifndef BOOST_REGEX_SPLIT_HPP
#define BOOST_REGEX_SPLIT_HPP
#include <boost/regex/v5/basic_regex.hpp>
#include <boost/regex/v5/match_results.hpp>
namespace boost{
#ifdef BOOST_REGEX_MSVC

View File

@ -19,6 +19,10 @@
#ifndef BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP
#define BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP
#include <boost/regex/v5/basic_regex.hpp>
#include <boost/regex/v5/match_results.hpp>
#include <boost/regex/v5/sub_match.hpp>
#include <memory>
namespace boost{

View File

@ -19,6 +19,11 @@
#ifndef BOOST_REGEX_V5_STATES_HPP
#define BOOST_REGEX_V5_STATES_HPP
#include <boost/regex/v5/regex_raw_buffer.hpp>
#include <climits>
#include <cstddef>
namespace boost{
namespace BOOST_REGEX_DETAIL_NS{

View File

@ -19,6 +19,9 @@
#ifndef BOOST_REGEX_V5_SUB_MATCH_HPP
#define BOOST_REGEX_V5_SUB_MATCH_HPP
#include <iterator>
#include <utility>
namespace boost{
template <class BidiIterator>

View File

@ -19,6 +19,21 @@
#ifndef BOOST_REGEX_V5_U32REGEX_ITERATOR_HPP
#define BOOST_REGEX_V5_U32REGEX_ITERATOR_HPP
#include <boost/regex/config.hpp>
#ifndef BOOST_REGEX_STANDALONE
#include <boost/config.hpp>
#if defined(BOOST_HAS_PRAGMA_ONCE)
#pragma once
#include <boost/regex/v5/icu.hpp>
#endif
#endif
#include <boost/regex/v5/match_flags.hpp>
#include <boost/regex/v5/match_results.hpp>
namespace boost{
template <class BidirectionalIterator>

View File

@ -19,6 +19,18 @@
#ifndef BOOST_REGEX_V5_U32REGEX_TOKEN_ITERATOR_HPP
#define BOOST_REGEX_V5_U32REGEX_TOKEN_ITERATOR_HPP
#include <boost/regex/config.hpp>
#ifndef BOOST_REGEX_STANDALONE
#include <boost/config.hpp>
#if defined(BOOST_HAS_PRAGMA_ONCE)
#pragma once
#include <boost/regex/v5/icu.hpp>
#endif
#endif
namespace boost{
#ifdef BOOST_REGEX_MSVC

View File

@ -135,3 +135,7 @@ compile test_windows_defs_3.cpp ;
compile test_windows_defs_4.cpp ;
run issue153.cpp : : : "<toolset>msvc:<linkflags>-STACK:2097152" ;
run issue227.cpp ;
run issue232.cpp ;
run lookbehind_recursion_stress_test.cpp ;

19
test/issue227.cpp Normal file
View File

@ -0,0 +1,19 @@
/*
* Copyright (c) 2024
* Christian Mazakas
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
#include <boost/regex.hpp>
#include <string>
int main() {
boost::regex rx("(*ACCEPT)*+\\1((*ACCEPT)*+\\K)");
std::string str = "Z";
boost::smatch what;
boost::regex_search(str, what, rx, boost::match_default | boost::match_partial);
}

71
test/issue232.cpp Normal file
View File

@ -0,0 +1,71 @@
#include <boost/core/lightweight_test.hpp>
#include <boost/regex.hpp>
#include <cstddef>
#include <vector>
template<std::size_t N0, std::size_t N = N0 - 1>
void tester( char const (&str)[ N0 ] )
{
std::vector<char> s(N, '\0');
std::memcpy(s.data(), str, N);
boost::regex rx(s.begin(), s.end());
std::vector<std::string> wheres;
wheres.push_back(std::string(15, 'H'));
wheres.push_back("");
wheres.push_back(" ");
// Perl-style matching
for (auto const& where : wheres) {
boost::match_results<std::string::const_iterator> what;
bool match = boost::regex_match(where, what, rx, boost::match_default | boost::match_partial | boost::match_any | boost::match_perl);
(void) match;
}
// POSIX-style matching
for (auto const& where : wheres) {
try {
boost::match_results<std::string::const_iterator> what;
bool match = boost::regex_match(where, what, rx, boost::match_default | boost::match_partial | boost::match_any | boost::match_posix);
(void) match;
} catch(...) {}
}
}
int main()
{
// test strings derived from fuzzing
// we keep a simple human-readable version
char const str1[] = "(Y(*COMMIT)|\\K\\D|.)+";
char const str2[] = "(Y(*COMMIT){||\\K\\D|||||||||\\K|||ss|||||.|\232*(?(50)\027\0204657|H)\020}\031\000.* 6.'?-i)+[L??.\000\000\000\004\000\000\000\000?..<[\000\024R]*+";
char const str3[] = "(Y(*COMMIT)\xFF\x80|\\L\\K||||||||||.|||||\x84|||||\x00\x00\x10||||||.* .'?-i)[L??...-i)[L??...[\x00\x14R]*+";
char const str4[] = "(Y(*COMMIT)\x96||.* .* .\\K|||\x9F||||\x9C|.|||||\x84\x99|||\x01\x00\x00\x00|||'?-i#PL\x00\x01.\x86??OMMIT)?...[\x00\x14R]*+";
tester(str1);
tester(str2);
tester(str3);
tester(str4);
// prove that we catch certain impossible scenarios
{
char const* str = "abcd";
boost::regex rx(str);
boost::match_results<std::string::const_iterator> what;
std::string where(15, 'H');
BOOST_TEST_THROWS(boost::regex_match(where, rx, boost::match_posix | boost::match_perl), std::logic_error);
}
{
char const* str = "ab(*COMMIT)cd";
boost::regex rx(str);
boost::match_results<std::string::const_iterator> what;
std::string where(15, 'H');
BOOST_TEST_THROWS(boost::regex_match(where, rx, boost::match_posix), std::logic_error);
}
return boost::report_errors();
}

View File

@ -0,0 +1,28 @@
/*
*
* Copyright (c) 2024
* John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
#include <boost/regex.hpp>
int main()
{
std::string s("(?<=(");
s.append(1000, '|');
s += "))";
boost::regex rx(s);
s = "(?<=(a";
for (unsigned i = 0; i < 1000; ++i)
{
s += "|a";
}
s += "))";
boost::regex rx2(s);
}