Remove limit on the number of backrefs possible.

Changes named sub-expressions to use different hashing scheme: high order bit is now always set to clashes between hashes and indexes don't happen until 2^30 or 2^62 sub-expressions in 32 and 64 bit code respectively.
Changes bitmask of seen sub-expressions to use dynamic storage for sub-expression indexes above 64.
Adds tests for the above.
Fixes https://github.com/boostorg/regex/issues/75.
This commit is contained in:
jzmaddock
2020-01-19 11:28:36 +00:00
parent b5d60694cc
commit 4bb4d392e4
7 changed files with 81 additions and 20 deletions

View File

@ -545,8 +545,8 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
//
// allow backrefs to this mark:
//
if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
this->m_backrefs |= 1u << (markid - 1);
if(markid > 0)
this->m_backrefs.set(markid);
return true;
}
@ -912,7 +912,7 @@ escape_type_class_jump:
}
if(negative)
i = 1 + m_mark_count - i;
if(((i > 0) && (i < std::numeric_limits<unsigned>::digits) && (i - 1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_pdata->get_id(i)-1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
if(((i > 0) && (this->m_backrefs.test(i)) || ((i >= hash_value_mask) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id(i))))))
{
m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
@ -1944,7 +1944,7 @@ bool basic_regex_parser<charT, traits>::parse_backref()
charT c = unescape_character();
this->append_literal(c);
}
else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
else if((i > 0) && (this->m_backrefs.test(i)))
{
m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
@ -2718,8 +2718,7 @@ option_group_jump:
//
// allow backrefs to this mark:
//
if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
this->m_backrefs |= 1u << (markid - 1);
this->m_backrefs.set(markid);
}
return true;
}