forked from boostorg/regex
Allow back references to refer to a capture that hasn't happened yet ("forward reference").
Fixes https://github.com/boostorg/regex/issues/133.
This commit is contained in:
@ -195,7 +195,11 @@ You can also use the \g escape for the same function, for example:
|
|||||||
[[[^\g{one}]][Match whatever matched the sub-expression named "one"]]
|
[[[^\g{one}]][Match whatever matched the sub-expression named "one"]]
|
||||||
]
|
]
|
||||||
|
|
||||||
Finally the \k escape can be used to refer to named subexpressions, for example [^\k<two>] will match
|
Note that a back reference can also be a forward-reference to a sub-expression that has not yet
|
||||||
|
been seen - this only really makes sense within a repeat, so for example `(\2two|(one))+` will
|
||||||
|
match "oneonetwo".
|
||||||
|
|
||||||
|
Finally the \k escape can be used to refer to named subexpressions, for example [^\k<two>] will
|
||||||
whatever matched the subexpression named "two".
|
whatever matched the subexpression named "two".
|
||||||
|
|
||||||
[h4 Alternation]
|
[h4 Alternation]
|
||||||
|
@ -232,7 +232,6 @@ protected:
|
|||||||
bool m_icase; // true for case insensitive matches
|
bool m_icase; // true for case insensitive matches
|
||||||
unsigned m_repeater_id; // the state_id of the next repeater
|
unsigned m_repeater_id; // the state_id of the next repeater
|
||||||
bool m_has_backrefs; // true if there are actually any backrefs
|
bool m_has_backrefs; // true if there are actually any backrefs
|
||||||
indexed_bit_flag m_backrefs; // bitmask of permitted backrefs
|
|
||||||
std::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
|
std::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
|
||||||
bool m_has_recursions; // set when we have recursive expressions to fixup
|
bool m_has_recursions; // set when we have recursive expressions to fixup
|
||||||
std::vector<unsigned char> m_recursion_checks; // notes which recursions we've followed while analysing this expression
|
std::vector<unsigned char> m_recursion_checks; // notes which recursions we've followed while analysing this expression
|
||||||
|
@ -98,6 +98,7 @@ private:
|
|||||||
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
|
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
|
||||||
bool m_has_case_change; // true if somewhere in the current block the case has changed
|
bool m_has_case_change; // true if somewhere in the current block the case has changed
|
||||||
unsigned m_recursion_count; // How many times we've called parse_all.
|
unsigned m_recursion_count; // How many times we've called parse_all.
|
||||||
|
unsigned m_max_backref; // Largest index of any backref.
|
||||||
#if defined(BOOST_REGEX_MSVC) && defined(_M_IX86)
|
#if defined(BOOST_REGEX_MSVC) && defined(_M_IX86)
|
||||||
// This is an ugly warning suppression workaround (for warnings *inside* std::vector
|
// This is an ugly warning suppression workaround (for warnings *inside* std::vector
|
||||||
// that can not otherwise be suppressed)...
|
// that can not otherwise be suppressed)...
|
||||||
@ -114,7 +115,7 @@ private:
|
|||||||
template <class charT, class traits>
|
template <class charT, class traits>
|
||||||
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
|
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
|
||||||
: basic_regex_creator<charT, traits>(data), m_parser_proc(), m_base(0), m_end(0), m_position(0),
|
: basic_regex_creator<charT, traits>(data), m_parser_proc(), m_base(0), m_end(0), m_position(0),
|
||||||
m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
|
m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0), m_max_backref(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,6 +185,13 @@ void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2,
|
|||||||
return;
|
return;
|
||||||
// fill in our sub-expression count:
|
// fill in our sub-expression count:
|
||||||
this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count;
|
this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count;
|
||||||
|
//
|
||||||
|
// Check we don't have backreferences to sub-expressions which don't exist:
|
||||||
|
//
|
||||||
|
if (m_max_backref > m_mark_count)
|
||||||
|
{
|
||||||
|
fail(regex_constants::error_backref, std::distance(m_base, m_position), "Found a backreference to a non-existant sub-expression.");
|
||||||
|
}
|
||||||
this->finalize(p1, p2);
|
this->finalize(p1, p2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -529,11 +537,6 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
|||||||
// restore the alternate insertion point:
|
// restore the alternate insertion point:
|
||||||
//
|
//
|
||||||
this->m_alt_insert_point = last_alt_point;
|
this->m_alt_insert_point = last_alt_point;
|
||||||
//
|
|
||||||
// allow backrefs to this mark:
|
|
||||||
//
|
|
||||||
if(markid > 0)
|
|
||||||
this->m_backrefs.set(markid);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -899,12 +902,14 @@ escape_type_class_jump:
|
|||||||
}
|
}
|
||||||
if(negative)
|
if(negative)
|
||||||
i = 1 + (static_cast<std::intmax_t>(m_mark_count) - i);
|
i = 1 + (static_cast<std::intmax_t>(m_mark_count) - i);
|
||||||
if(((i < hash_value_mask) && (i > 0) && (this->m_backrefs.test((std::size_t)i))) || ((i >= hash_value_mask) && (this->m_pdata->get_id((int)i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id((int)i)))))
|
if(((i < hash_value_mask) && (i > 0)) || ((i >= hash_value_mask) && (this->m_pdata->get_id((int)i) > 0)))
|
||||||
{
|
{
|
||||||
m_position = pc;
|
m_position = pc;
|
||||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
||||||
pb->index = (int)i;
|
pb->index = (int)i;
|
||||||
pb->icase = this->flags() & regbase::icase;
|
pb->icase = this->flags() & regbase::icase;
|
||||||
|
if ((i > m_max_backref) && (i < hash_value_mask))
|
||||||
|
m_max_backref = i;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1934,12 +1939,14 @@ bool basic_regex_parser<charT, traits>::parse_backref()
|
|||||||
charT c = unescape_character();
|
charT c = unescape_character();
|
||||||
this->append_literal(c);
|
this->append_literal(c);
|
||||||
}
|
}
|
||||||
else if((i > 0) && (this->m_backrefs.test((std::size_t)i)))
|
else if((i > 0))
|
||||||
{
|
{
|
||||||
m_position = pc;
|
m_position = pc;
|
||||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
||||||
pb->index = (int)i;
|
pb->index = (int)i;
|
||||||
pb->icase = this->flags() & regbase::icase;
|
pb->icase = this->flags() & regbase::icase;
|
||||||
|
if(i > m_max_backref)
|
||||||
|
m_max_backref = i;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -2695,10 +2702,6 @@ option_group_jump:
|
|||||||
{
|
{
|
||||||
if(this->flags() & regbase::save_subexpression_location)
|
if(this->flags() & regbase::save_subexpression_location)
|
||||||
this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1;
|
this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1;
|
||||||
//
|
|
||||||
// allow backrefs to this mark:
|
|
||||||
//
|
|
||||||
this->m_backrefs.set(markid);
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1,54 +0,0 @@
|
|||||||
/*
|
|
||||||
*
|
|
||||||
* Copyright (c) 2020
|
|
||||||
* John Maddock
|
|
||||||
*
|
|
||||||
* Use, modification and distribution are subject to the
|
|
||||||
* Boost Software License, Version 1.0. (See accompanying file
|
|
||||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* LOCATION: see http://www.boost.org for most recent version.
|
|
||||||
* FILE basic_regex_parser.cpp
|
|
||||||
* VERSION see <boost/version.hpp>
|
|
||||||
* DESCRIPTION: Declares template class basic_regex_parser.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <boost/regex/config.hpp>
|
|
||||||
#include <set>
|
|
||||||
|
|
||||||
#ifndef BOOST_REGEX_V5_INDEXED_BIT_FLAG_HPP
|
|
||||||
#define BOOST_REGEX_V5_INDEXED_BIT_FLAG_HPP
|
|
||||||
|
|
||||||
namespace boost{
|
|
||||||
namespace BOOST_REGEX_DETAIL_NS{
|
|
||||||
|
|
||||||
class indexed_bit_flag
|
|
||||||
{
|
|
||||||
std::uint64_t low_mask;
|
|
||||||
std::set<std::size_t> mask_set;
|
|
||||||
public:
|
|
||||||
indexed_bit_flag() : low_mask(0) {}
|
|
||||||
void set(std::size_t i)
|
|
||||||
{
|
|
||||||
if (i < std::numeric_limits<std::uint64_t>::digits - 1)
|
|
||||||
low_mask |= static_cast<std::uint64_t>(1u) << i;
|
|
||||||
else
|
|
||||||
mask_set.insert(i);
|
|
||||||
}
|
|
||||||
bool test(std::size_t i)
|
|
||||||
{
|
|
||||||
if (i < std::numeric_limits<std::uint64_t>::digits - 1)
|
|
||||||
return low_mask & static_cast<std::uint64_t>(1u) << i ? true : false;
|
|
||||||
else
|
|
||||||
return mask_set.find(i) != mask_set.end();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace BOOST_REGEX_DETAIL_NS
|
|
||||||
} // namespace boost
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
@ -19,13 +19,15 @@ void test_backrefs()
|
|||||||
{
|
{
|
||||||
using namespace boost::regex_constants;
|
using namespace boost::regex_constants;
|
||||||
TEST_INVALID_REGEX("a(b)\\2c", perl);
|
TEST_INVALID_REGEX("a(b)\\2c", perl);
|
||||||
TEST_INVALID_REGEX("a(b\\1)c", perl);
|
//TEST_INVALID_REGEX("a(b\\1)c", perl);
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
||||||
TEST_REGEX_SEARCH("^(.)\\1", perl, "abc", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("^(.)\\1", perl, "abc", match_default, make_array(-2, -2));
|
||||||
TEST_REGEX_SEARCH("a([bc])\\1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
TEST_REGEX_SEARCH("a([bc])\\1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||||
TEST_REGEX_SEARCH("a\\([bc]\\)\\1d", basic, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
TEST_REGEX_SEARCH("a\\([bc]\\)\\1d", basic, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("(\\2two|(one))+", perl, "oneonetwo", match_default, make_array(0, 9, 3, 9, 0, 3, -2, -2));
|
||||||
|
TEST_INVALID_REGEX("(\\3two|(one))+", perl);
|
||||||
// strictly speaking this is at best ambiguous, at worst wrong, this is what most
|
// strictly speaking this is at best ambiguous, at worst wrong, this is what most
|
||||||
// re implimentations will match though.
|
// re implimentations will match though.
|
||||||
TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbccd", match_default, make_array(0, 6, 3, 5, 3, 4, -2, -2));
|
TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbccd", match_default, make_array(0, 6, 3, 5, 3, 4, -2, -2));
|
||||||
@ -59,7 +61,7 @@ void test_backrefs()
|
|||||||
// Now test the \g version:
|
// Now test the \g version:
|
||||||
//
|
//
|
||||||
TEST_INVALID_REGEX("a(b)\\g2c", perl);
|
TEST_INVALID_REGEX("a(b)\\g2c", perl);
|
||||||
TEST_INVALID_REGEX("a(b\\g1)c", perl);
|
//TEST_INVALID_REGEX("a(b\\g1)c", perl);
|
||||||
TEST_INVALID_REGEX("a(b\\g0)c", perl);
|
TEST_INVALID_REGEX("a(b\\g0)c", perl);
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||||
@ -67,8 +69,10 @@ void test_backrefs()
|
|||||||
TEST_REGEX_SEARCH("^(.)\\g1", perl, "abc", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("^(.)\\g1", perl, "abc", match_default, make_array(-2, -2));
|
||||||
TEST_REGEX_SEARCH("a([bc])\\g1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
TEST_REGEX_SEARCH("a([bc])\\g1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||||
TEST_INVALID_REGEX("a(b)\\g{2}c", perl);
|
TEST_INVALID_REGEX("a(b)\\g{2}c", perl);
|
||||||
TEST_INVALID_REGEX("a(b\\g{1})c", perl);
|
//TEST_INVALID_REGEX("a(b\\g{1})c", perl);
|
||||||
TEST_INVALID_REGEX("a(b\\g{0})c", perl);
|
TEST_INVALID_REGEX("a(b\\g{0})c", perl);
|
||||||
|
TEST_REGEX_SEARCH("(\\g{2}two|(one))+", perl, "oneonetwo", match_default, make_array(0, 9, 3, 9, 0, 3, -2, -2));
|
||||||
|
TEST_INVALID_REGEX("(\\g{3}two|(one))+", perl);
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbd", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
||||||
@ -76,7 +80,7 @@ void test_backrefs()
|
|||||||
TEST_REGEX_SEARCH("a([bc])\\g{1}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
TEST_REGEX_SEARCH("a([bc])\\g{1}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||||
// And again but with negative indexes:
|
// And again but with negative indexes:
|
||||||
TEST_INVALID_REGEX("a(b)\\g-2c", perl);
|
TEST_INVALID_REGEX("a(b)\\g-2c", perl);
|
||||||
TEST_INVALID_REGEX("a(b\\g-1)c", perl);
|
//TEST_INVALID_REGEX("a(b\\g-1)c", perl);
|
||||||
TEST_INVALID_REGEX("a(b\\g-0)c", perl);
|
TEST_INVALID_REGEX("a(b\\g-0)c", perl);
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||||
@ -84,7 +88,7 @@ void test_backrefs()
|
|||||||
TEST_REGEX_SEARCH("^(.)\\g1", perl, "abc", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("^(.)\\g1", perl, "abc", match_default, make_array(-2, -2));
|
||||||
TEST_REGEX_SEARCH("a([bc])\\g1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
TEST_REGEX_SEARCH("a([bc])\\g1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||||
TEST_INVALID_REGEX("a(b)\\g{-2}c", perl);
|
TEST_INVALID_REGEX("a(b)\\g{-2}c", perl);
|
||||||
TEST_INVALID_REGEX("a(b\\g{-1})c", perl);
|
//TEST_INVALID_REGEX("a(b\\g{-1})c", perl);
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbd", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||||
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
||||||
|
Reference in New Issue
Block a user