Remove limit on the number of backrefs possible.

Changes named sub-expressions to use different hashing scheme: high order bit is now always set to clashes between hashes and indexes don't happen until 2^30 or 2^62 sub-expressions in 32 and 64 bit code respectively.
Changes bitmask of seen sub-expressions to use dynamic storage for sub-expression indexes above 64.
Adds tests for the above.
Fixes https://github.com/boostorg/regex/issues/75.
This commit is contained in:
jzmaddock
2020-01-19 11:28:36 +00:00
parent b5d60694cc
commit 4bb4d392e4
7 changed files with 81 additions and 20 deletions

View File

@ -20,6 +20,8 @@
#ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
#define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
#include <boost/regex/v4/indexed_bit_flag.hpp>
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4103)
@ -239,7 +241,7 @@ protected:
bool m_icase; // true for case insensitive matches
unsigned m_repeater_id; // the state_id of the next repeater
bool m_has_backrefs; // true if there are actually any backrefs
unsigned m_backrefs; // bitmask of permitted backrefs
indexed_bit_flag m_backrefs; // bitmask of permitted backrefs
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
bool m_has_recursions; // set when we have recursive expresisons to fixup
std::vector<unsigned char> m_recursion_checks; // notes which recursions we've followed while analysing this expression
@ -267,7 +269,7 @@ private:
template <class charT, class traits>
basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
: m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false)
: m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_has_recursions(false)
{
m_pdata->m_data.clear();
m_pdata->m_status = ::boost::regex_constants::error_ok;
@ -763,7 +765,7 @@ void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
if(idx < 0)
{
idx = -idx-1;
if(idx >= 10000)
if(idx >= hash_value_mask)
{
idx = m_pdata->get_id(idx);
if(idx <= 0)
@ -795,7 +797,7 @@ void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
bool ok = false;
re_syntax_base* p = base;
std::ptrdiff_t idx = static_cast<re_jump*>(state)->alt.i;
if(idx > 10000)
if(idx >= hash_value_mask)
{
//
// There may be more than one capture group with this hash, just do what Perl