forked from boostorg/regex
Added support for (?(DEFINE) blocks and updated the docs accordingly.
Added support for ICU libraries which may be named icui18n.lib on some Win32 platforms. [SVN r55267]
This commit is contained in:
@ -20,6 +20,7 @@
|
||||
#define BOOST_REGEX_V4_BASIC_REGEX_HPP
|
||||
|
||||
#include <boost/type_traits/is_same.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#ifdef BOOST_MSVC
|
||||
#pragma warning(push)
|
||||
@ -68,34 +69,53 @@ template <class charT>
|
||||
class named_subexpressions_base
|
||||
{
|
||||
public:
|
||||
virtual int get_id(const charT* i, const charT* j) = 0;
|
||||
virtual int get_id(const charT* i, const charT* j)const = 0;
|
||||
virtual int get_id(std::size_t hash)const = 0;
|
||||
#ifdef __GNUC__
|
||||
// warning supression:
|
||||
virtual ~named_subexpressions_base(){}
|
||||
#endif
|
||||
};
|
||||
|
||||
template <class Iterator>
|
||||
inline std::size_t hash_value_from_capture_name(Iterator i, Iterator j)
|
||||
{
|
||||
std::size_t r = boost::hash_range(i, j);
|
||||
r %= ((std::numeric_limits<int>::max)() - 10001);
|
||||
r += 10000;
|
||||
return r;
|
||||
}
|
||||
|
||||
template <class charT>
|
||||
class named_subexpressions : public named_subexpressions_base<charT>
|
||||
{
|
||||
struct name
|
||||
{
|
||||
name(const charT* i, const charT* j, int idx)
|
||||
: n(i, j), index(idx) {}
|
||||
std::vector<charT> n;
|
||||
: /*n(i, j), */ index(idx)
|
||||
{
|
||||
hash = hash_value_from_capture_name(i, j);
|
||||
}
|
||||
name(std::size_t h, int idx)
|
||||
: index(idx), hash(h)
|
||||
{
|
||||
}
|
||||
//std::vector<charT> n;
|
||||
int index;
|
||||
std::size_t hash;
|
||||
bool operator < (const name& other)const
|
||||
{
|
||||
return std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end());
|
||||
return hash < other.hash; //std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end());
|
||||
}
|
||||
bool operator == (const name& other)const
|
||||
{
|
||||
return n == other.n;
|
||||
return hash == other.hash; //n == other.n;
|
||||
}
|
||||
void swap(name& other)
|
||||
{
|
||||
n.swap(other.n);
|
||||
//n.swap(other.n);
|
||||
std::swap(index, other.index);
|
||||
std::swap(hash, other.hash);
|
||||
}
|
||||
};
|
||||
public:
|
||||
@ -105,7 +125,7 @@ public:
|
||||
m_sub_names.push_back(name(i, j, index));
|
||||
bubble_down_one(m_sub_names.begin(), m_sub_names.end());
|
||||
}
|
||||
int get_id(const charT* i, const charT* j)
|
||||
int get_id(const charT* i, const charT* j)const
|
||||
{
|
||||
name t(i, j, 0);
|
||||
typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
|
||||
@ -115,6 +135,16 @@ public:
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int get_id(std::size_t h)const
|
||||
{
|
||||
name t(h, 0);
|
||||
typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
|
||||
if((pos != m_sub_names.end()) && (*pos == t))
|
||||
{
|
||||
return pos->index;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
private:
|
||||
std::vector<name> m_sub_names;
|
||||
};
|
||||
@ -126,7 +156,7 @@ class named_subexpressions_converter : public named_subexpressions_base<charT>
|
||||
public:
|
||||
named_subexpressions_converter(boost::shared_ptr<named_subexpressions<Other> > s)
|
||||
: m_converter(s) {}
|
||||
virtual int get_id(const charT* i, const charT* j)
|
||||
int get_id(const charT* i, const charT* j)const
|
||||
{
|
||||
if(i == j)
|
||||
return -1;
|
||||
@ -138,6 +168,10 @@ public:
|
||||
}
|
||||
return m_converter->get_id(&v[0], &v[0] + v.size());
|
||||
}
|
||||
int get_id(std::size_t h)const
|
||||
{
|
||||
return m_converter->get_id(h);
|
||||
}
|
||||
};
|
||||
|
||||
template <class To>
|
||||
|
@ -762,29 +762,57 @@ void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
|
||||
{
|
||||
switch(state->type)
|
||||
{
|
||||
case syntax_element_assert_backref:
|
||||
{
|
||||
// just check that the index is valid:
|
||||
int id = static_cast<const re_brace*>(state)->index;
|
||||
if(id < 0)
|
||||
{
|
||||
id = -id-1;
|
||||
if(id >= 10000)
|
||||
{
|
||||
id = m_pdata->get_id(id);
|
||||
if(id <= 0)
|
||||
{
|
||||
// check of sub-expression that doesn't exist:
|
||||
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
||||
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
|
||||
//
|
||||
// clear the expression, we should be empty:
|
||||
//
|
||||
this->m_pdata->m_expression = 0;
|
||||
this->m_pdata->m_expression_len = 0;
|
||||
//
|
||||
// and throw if required:
|
||||
//
|
||||
if(0 == (this->flags() & regex_constants::no_except))
|
||||
{
|
||||
std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern);
|
||||
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
|
||||
e.raise();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case syntax_element_recurse:
|
||||
{
|
||||
bool ok = false;
|
||||
re_syntax_base* p = base;
|
||||
/*
|
||||
if(static_cast<re_jump*>(state)->alt.i == 0)
|
||||
int id = static_cast<re_jump*>(state)->alt.i;
|
||||
if(id > 10000)
|
||||
id = m_pdata->get_id(id);
|
||||
while(p)
|
||||
{
|
||||
ok = true;
|
||||
static_cast<re_jump*>(state)->alt.p = p;
|
||||
}
|
||||
else
|
||||
{*/
|
||||
while(p)
|
||||
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == id))
|
||||
{
|
||||
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == static_cast<re_jump*>(state)->alt.i))
|
||||
{
|
||||
static_cast<re_jump*>(state)->alt.p = p;
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
p = p->next.p;
|
||||
static_cast<re_jump*>(state)->alt.p = p;
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
//}
|
||||
p = p->next.p;
|
||||
}
|
||||
if(!ok)
|
||||
{
|
||||
// recursion to sub-expression that doesn't exist:
|
||||
|
@ -1961,8 +1961,27 @@ insert_recursion:
|
||||
int v = this->m_traits.toi(m_position, m_end, 10);
|
||||
if(*m_position == charT('R'))
|
||||
{
|
||||
++m_position;
|
||||
v = -this->m_traits.toi(m_position, m_end, 10);
|
||||
if(++m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
if(*m_position == charT('&'))
|
||||
{
|
||||
const charT* base = ++m_position;
|
||||
while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
|
||||
}
|
||||
else
|
||||
{
|
||||
v = -this->m_traits.toi(m_position, m_end, 10);
|
||||
}
|
||||
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
||||
br->index = v < 0 ? (v - 1) : 0;
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
||||
@ -1976,6 +1995,58 @@ insert_recursion:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if((*m_position == charT('\'')) || (*m_position == charT('<')))
|
||||
{
|
||||
const charT* base = ++m_position;
|
||||
while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
v = static_cast<int>(hash_value_from_capture_name(base, m_position));
|
||||
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
||||
br->index = v;
|
||||
if((*m_position != charT('>')) && (*m_position != charT('\'')) || (++m_position == m_end))
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
if(++m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(*m_position == charT('D'))
|
||||
{
|
||||
const char* def = "DEFINE";
|
||||
while(*def && (m_position != m_end) && (*m_position == charT(*def)))
|
||||
++m_position, ++def;
|
||||
if((m_position == m_end) || *def)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
||||
br->index = 9999; // special magic value!
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
if(++m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(v > 0)
|
||||
{
|
||||
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
||||
@ -2081,6 +2152,43 @@ named_capture_jump:
|
||||
}
|
||||
goto insert_recursion;
|
||||
}
|
||||
if(*m_position == charT('&'))
|
||||
{
|
||||
++m_position;
|
||||
const charT* base = m_position;
|
||||
while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_backref, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
v = static_cast<int>(hash_value_from_capture_name(base, m_position));
|
||||
goto insert_recursion;
|
||||
}
|
||||
if(*m_position == charT('P'))
|
||||
{
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_backref, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
if(*m_position == charT('>'))
|
||||
{
|
||||
++m_position;
|
||||
const charT* base = m_position;
|
||||
while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_backref, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
v = static_cast<int>(hash_value_from_capture_name(base, m_position));
|
||||
goto insert_recursion;
|
||||
}
|
||||
}
|
||||
//
|
||||
// lets assume that we have a (?imsx) group and try and parse it:
|
||||
//
|
||||
@ -2183,9 +2291,20 @@ option_group_jump:
|
||||
}
|
||||
else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
|
||||
{
|
||||
// Can't have seen more than one alternative:
|
||||
fail(regex_constants::error_bad_pattern, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// We must *not* have seen an alternative inside a (DEFINE) block:
|
||||
b = this->getaddress(b->next.i, b);
|
||||
if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
|
||||
{
|
||||
fail(regex_constants::error_bad_pattern, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// check for invalid repetition of next state:
|
||||
b = this->getaddress(expected_alt_point);
|
||||
b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
|
||||
|
@ -714,8 +714,16 @@ inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref(
|
||||
// return true if marked sub-expression N has been matched:
|
||||
int index = static_cast<const re_brace*>(pstate)->index;
|
||||
bool result;
|
||||
if(index > 0)
|
||||
if(index == 9999)
|
||||
{
|
||||
// Magic value for a (DEFINE) block:
|
||||
return false;
|
||||
}
|
||||
else if(index > 0)
|
||||
{
|
||||
// Check if index is a hash value:
|
||||
if(index >= 10000)
|
||||
index = re.get_data().get_id(index);
|
||||
// Have we matched subexpression "index"?
|
||||
result = (*m_presult)[index].matched;
|
||||
pstate = pstate->next.p;
|
||||
@ -724,7 +732,10 @@ inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref(
|
||||
{
|
||||
// Have we recursed into subexpression "index"?
|
||||
// If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
|
||||
result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == -index-1) || (index == 0));
|
||||
int id = -index-1;
|
||||
if(id >= 10000)
|
||||
id = re.get_data().get_id(id);
|
||||
result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == id) || (index == 0));
|
||||
pstate = pstate->next.p;
|
||||
}
|
||||
return result;
|
||||
|
Reference in New Issue
Block a user