Fix code to handle multiple named-subexpressions with the same name.

Updated test cases to match.

[SVN r65943]
This commit is contained in:
John Maddock
2010-10-13 16:53:13 +00:00
parent a35cb5b902
commit 29088ce346
9 changed files with 149 additions and 99 deletions

View File

@ -53,7 +53,7 @@ void bubble_down_one(I first, I last)
if(first != last)
{
I next = last - 1;
while((next != first) && !(*(next-1) < *next))
while((next != first) && (*next < *(next-1)))
{
(next-1)->swap(*next);
--next;
@ -61,22 +61,6 @@ void bubble_down_one(I first, I last)
}
}
//
// Class named_subexpressions
// Contains information about named subexpressions within the regex.
//
template <class charT>
class named_subexpressions_base
{
public:
virtual int get_id(const charT* i, const charT* j)const = 0;
virtual int get_id(std::size_t h)const = 0;
#ifdef __GNUC__
// warning supression:
virtual ~named_subexpressions_base(){}
#endif
};
template <class Iterator>
inline std::size_t hash_value_from_capture_name(Iterator i, Iterator j)
{
@ -86,13 +70,14 @@ inline std::size_t hash_value_from_capture_name(Iterator i, Iterator j)
return r;
}
template <class charT>
class named_subexpressions : public named_subexpressions_base<charT>
class named_subexpressions
{
public:
struct name
{
template <class charT>
name(const charT* i, const charT* j, int idx)
: /*n(i, j), */ index(idx)
: index(idx)
{
hash = hash_value_from_capture_name(i, j);
}
@ -100,31 +85,35 @@ class named_subexpressions : public named_subexpressions_base<charT>
: index(idx), hash(h)
{
}
//std::vector<charT> n;
int index;
std::size_t hash;
bool operator < (const name& other)const
{
return hash < other.hash; //std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end());
return hash < other.hash;
}
bool operator == (const name& other)const
{
return hash == other.hash; //n == other.n;
return hash == other.hash;
}
void swap(name& other)
{
//n.swap(other.n);
std::swap(index, other.index);
std::swap(hash, other.hash);
}
};
public:
typedef std::vector<name>::const_iterator const_iterator;
typedef std::pair<const_iterator, const_iterator> range_type;
named_subexpressions(){}
template <class charT>
void set_name(const charT* i, const charT* j, int index)
{
m_sub_names.push_back(name(i, j, index));
bubble_down_one(m_sub_names.begin(), m_sub_names.end());
}
template <class charT>
int get_id(const charT* i, const charT* j)const
{
name t(i, j, 0);
@ -135,72 +124,37 @@ public:
}
return -1;
}
template <class charT>
range_type equal_range(const charT* i, const charT* j)const
{
name t(i, j, 0);
return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t);
}
int get_id(std::size_t h)const
{
name t(h, 0);
typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
if((pos != m_sub_names.end()) && (*pos == t))
{
return pos->index;
}
return -1;
}
range_type equal_range(std::size_t h)const
{
name t(h, 0);
return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t);
}
private:
std::vector<name> m_sub_names;
};
template <class charT, class Other>
class named_subexpressions_converter : public named_subexpressions_base<charT>
{
boost::shared_ptr<named_subexpressions<Other> > m_converter;
public:
named_subexpressions_converter(boost::shared_ptr<named_subexpressions<Other> > s)
: m_converter(s) {}
int get_id(const charT* i, const charT* j)const
{
if(i == j)
return -1;
std::vector<Other> v;
while(i != j)
{
v.push_back(*i);
++i;
}
return m_converter->get_id(&v[0], &v[0] + v.size());
}
int get_id(std::size_t h)const
{
return m_converter->get_id(h);
}
};
template <class To>
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp(
boost::shared_ptr<named_subexpressions<To> > s,
boost::integral_constant<bool,true> const&)
{
return s;
}
template <class To, class From>
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp(
boost::shared_ptr<named_subexpressions<From> > s,
boost::integral_constant<bool,false> const&)
{
return boost::shared_ptr<named_subexpressions_converter<To, From> >(new named_subexpressions_converter<To, From>(s));
}
template <class To, class From>
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs(
boost::shared_ptr<named_subexpressions<From> > s)
{
typedef typename boost::is_same<To, From>::type tag_type;
return convert_to_named_subs_imp<To>(s, tag_type());
}
//
// class regex_data:
// represents the data we wish to expose to the matching algorithms.
//
template <class charT, class traits>
struct regex_data : public named_subexpressions<charT>
struct regex_data : public named_subexpressions
{
typedef regex_constants::syntax_option_type flag_type;
typedef std::size_t size_type;
@ -672,7 +626,7 @@ public:
BOOST_ASSERT(0 != m_pimpl.get());
return m_pimpl->get_data();
}
boost::shared_ptr<re_detail::named_subexpressions<charT> > get_named_subs()const
boost::shared_ptr<re_detail::named_subexpressions > get_named_subs()const
{
return m_pimpl;
}

View File

@ -806,7 +806,13 @@ void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
re_syntax_base* p = base;
std::ptrdiff_t idx = static_cast<re_jump*>(state)->alt.i;
if(idx > 10000)
{
//
// There may be more than one capture group with this hash, just do what Perl
// does and recurse to the leftmost:
//
idx = m_pdata->get_id(idx);
}
while(p)
{
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))

View File

@ -820,7 +820,11 @@ escape_type_class_jump:
return false;
}
// maybe have \g{ddd}
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
regex_constants::syntax_type syn_end = 0;
if((syn == regex_constants::syntax_open_brace)
|| (syn == regex_constants::escape_type_left_word)
|| (syn == regex_constants::escape_type_end_buffer))
{
if(++m_position == m_end)
{
@ -828,6 +832,18 @@ escape_type_class_jump:
return false;
}
have_brace = true;
switch(syn)
{
case regex_constants::syntax_open_brace:
syn_end = regex_constants::syntax_close_brace;
break;
case regex_constants::escape_type_left_word:
syn_end = regex_constants::escape_type_right_word;
break;
default:
syn_end = regex_constants::escape_type_end_buffer;
break;
}
}
negative = (*m_position == static_cast<charT>('-'));
if((negative) && (++m_position == m_end))
@ -837,18 +853,20 @@ escape_type_class_jump:
}
const charT* pc = m_position;
int i = this->m_traits.toi(pc, m_end, 10);
if(i < 0)
if((i < 0) && syn_end)
{
// Check for a named capture:
// Check for a named capture, get the leftmost one if there is more than one:
const charT* base = m_position;
while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
{
++m_position;
i = this->m_pdata->get_id(base, m_position);
}
i = hash_value_from_capture_name(base, m_position);
pc = m_position;
}
if(negative)
i = 1 + m_mark_count - i;
if((i > 0) && (this->m_backrefs & (1u << (i-1))))
if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
{
m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
@ -863,7 +881,7 @@ escape_type_class_jump:
m_position = pc;
if(have_brace)
{
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
{
fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
return false;

View File

@ -38,7 +38,6 @@ namespace boost{
namespace re_detail{
template <class charT>
class named_subexpressions;
}
@ -69,7 +68,7 @@ public:
typedef typename re_detail::regex_iterator_traits<
BidiIterator>::value_type char_type;
typedef std::basic_string<char_type> string_type;
typedef re_detail::named_subexpressions_base<char_type> named_sub_type;
typedef re_detail::named_subexpressions named_sub_type;
// construct/copy/destroy:
explicit match_results(const Allocator& a = Allocator())
@ -225,10 +224,15 @@ public:
//
const_reference named_subexpression(const char_type* i, const char_type* j) const
{
//
// Scan for the leftmost *matched* subexpression with the specified named:
//
if(m_is_singular)
raise_logic_error();
int index = m_named_subs->get_id(i, j);
return index > 0 ? (*this)[index] : m_null;
re_detail::named_subexpressions::range_type r = m_named_subs->equal_range(i, j);
while((r.first != r.second) && ((*this)[r.first->index].matched == false))
++r.first;
return r.first != r.second ? (*this)[r.first->index] : m_null;
}
template <class charT>
const_reference named_subexpression(const charT* i, const charT* j) const
@ -243,10 +247,20 @@ public:
}
int named_subexpression_index(const char_type* i, const char_type* j) const
{
//
// Scan for the leftmost *matched* subexpression with the specified named.
// If none found then return the leftmost expression with that name,
// otherwise an invalid index:
//
if(m_is_singular)
raise_logic_error();
int index = m_named_subs->get_id(i, j);
return index > 0 ? index : -20;
re_detail::named_subexpressions::range_type s, r;
s = r = m_named_subs->equal_range(i, j);
while((r.first != r.second) && ((*this)[r.first->index].matched == false))
++r.first;
if(r.first == r.second)
r = s;
return r.first != r.second ? r.first->index : -20;
}
template <class charT>
int named_subexpression_index(const charT* i, const charT* j) const

View File

@ -200,7 +200,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
m_match_flags |= regex_constants::match_all;
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
m_presult->set_base(base);
m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs()));
m_presult->set_named_subs(this->re.get_named_subs());
if(m_match_flags & match_posix)
m_result = *m_presult;
verify_options(re.flags(), m_match_flags);
@ -262,7 +262,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
pstate = re.get_first_state();
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last);
m_presult->set_base(base);
m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs()));
m_presult->set_named_subs(this->re.get_named_subs());
m_match_flags |= regex_constants::match_init;
}
else
@ -588,8 +588,23 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
// in the match, this is in line with ECMAScript, but not Perl
// or PCRE.
//
BidiIterator i = (*m_presult)[static_cast<const re_brace*>(pstate)->index].first;
BidiIterator j = (*m_presult)[static_cast<const re_brace*>(pstate)->index].second;
int index = static_cast<const re_brace*>(pstate)->index;
if(index >= 10000)
{
named_subexpressions::range_type r = re.get_data().equal_range(index);
BOOST_ASSERT(r.first != r.second);
do
{
index = r.first->index;
++r.first;
}while((r.first != r.second) && ((*m_presult)[index].matched != true));
}
if((m_match_flags & match_perl) && !(*m_presult)[index].matched)
return false;
BidiIterator i = (*m_presult)[index].first;
BidiIterator j = (*m_presult)[index].second;
while(i != j)
{
if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
@ -713,7 +728,7 @@ inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref(
{
// return true if marked sub-expression N has been matched:
int index = static_cast<const re_brace*>(pstate)->index;
bool result;
bool result = false;
if(index == 9999)
{
// Magic value for a (DEFINE) block:
@ -721,11 +736,25 @@ inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref(
}
else if(index > 0)
{
// Have we matched subexpression "index"?
// Check if index is a hash value:
if(index >= 10000)
index = re.get_data().get_id(index);
// Have we matched subexpression "index"?
result = (*m_presult)[index].matched;
{
named_subexpressions::range_type r = re.get_data().equal_range(index);
while(r.first != r.second)
{
if((*m_presult)[r.first->index].matched)
{
result = true;
break;
}
++r.first;
}
}
else
{
result = (*m_presult)[index].matched;
}
pstate = pstate->next.p;
}
else
@ -734,8 +763,20 @@ inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref(
// If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
int idx = -index-1;
if(idx >= 10000)
idx = re.get_data().get_id(idx);
result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
{
named_subexpressions::range_type r = re.get_data().equal_range(idx);
int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx;
while(r.first != r.second)
{
result |= (stack_index == r.first->index);
if(result)break;
++r.first;
}
}
else
{
result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
}
pstate = pstate->next.p;
}
return result;