forked from boostorg/regex
Add support for named sub-expressions.
[SVN r52823]
This commit is contained in:
@ -844,6 +844,42 @@ struct BoostRegexConcept
|
||||
m_string = m_char + m_sub;
|
||||
ignore_unused_variable_warning(m_string);
|
||||
|
||||
// Named sub-expressions:
|
||||
m_sub = m_cresults[&m_char];
|
||||
ignore_unused_variable_warning(m_sub);
|
||||
m_sub = m_cresults[m_string];
|
||||
ignore_unused_variable_warning(m_sub);
|
||||
m_sub = m_cresults[""];
|
||||
ignore_unused_variable_warning(m_sub);
|
||||
m_sub = m_cresults[std::string("")];
|
||||
ignore_unused_variable_warning(m_sub);
|
||||
m_string = m_cresults.str(&m_char);
|
||||
ignore_unused_variable_warning(m_string);
|
||||
m_string = m_cresults.str(m_string);
|
||||
ignore_unused_variable_warning(m_string);
|
||||
m_string = m_cresults.str("");
|
||||
ignore_unused_variable_warning(m_string);
|
||||
m_string = m_cresults.str(std::string(""));
|
||||
ignore_unused_variable_warning(m_string);
|
||||
|
||||
typename match_results_type::difference_type diff;
|
||||
diff = m_cresults.length(&m_char);
|
||||
ignore_unused_variable_warning(diff);
|
||||
diff = m_cresults.length(m_string);
|
||||
ignore_unused_variable_warning(diff);
|
||||
diff = m_cresults.length("");
|
||||
ignore_unused_variable_warning(diff);
|
||||
diff = m_cresults.length(std::string(""));
|
||||
ignore_unused_variable_warning(diff);
|
||||
diff = m_cresults.position(&m_char);
|
||||
ignore_unused_variable_warning(diff);
|
||||
diff = m_cresults.position(m_string);
|
||||
ignore_unused_variable_warning(diff);
|
||||
diff = m_cresults.position("");
|
||||
ignore_unused_variable_warning(diff);
|
||||
diff = m_cresults.position(std::string(""));
|
||||
ignore_unused_variable_warning(diff);
|
||||
|
||||
#ifndef BOOST_NO_STD_LOCALE
|
||||
m_stream << m_sub;
|
||||
m_stream << m_cresults;
|
||||
|
@ -19,6 +19,8 @@
|
||||
#ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP
|
||||
#define BOOST_REGEX_V4_BASIC_REGEX_HPP
|
||||
|
||||
#include <boost/type_traits/is_same.hpp>
|
||||
|
||||
#ifdef BOOST_MSVC
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4103)
|
||||
@ -44,12 +46,123 @@ namespace re_detail{
|
||||
template <class charT, class traits>
|
||||
class basic_regex_parser;
|
||||
|
||||
template <class I>
|
||||
void bubble_down_one(I first, I last)
|
||||
{
|
||||
if(first != last)
|
||||
{
|
||||
I next = last - 1;
|
||||
while((next != first) && !(*(next-1) < *next))
|
||||
{
|
||||
(next-1)->swap(*next);
|
||||
--next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Class named_subexpressions
|
||||
// Contains information about named subexpressions within the regex.
|
||||
//
|
||||
template <class charT>
|
||||
class named_subexpressions_base
|
||||
{
|
||||
public:
|
||||
virtual int get_id(const charT* i, const charT* j) = 0;
|
||||
};
|
||||
|
||||
template <class charT>
|
||||
class named_subexpressions : public named_subexpressions_base<charT>
|
||||
{
|
||||
struct name
|
||||
{
|
||||
name(const charT* i, const charT* j, int idx)
|
||||
: n(i, j), index(idx) {}
|
||||
std::vector<charT> n;
|
||||
int index;
|
||||
bool operator < (const name& other)const
|
||||
{
|
||||
return std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end());
|
||||
}
|
||||
bool operator == (const name& other)const
|
||||
{
|
||||
return n == other.n;
|
||||
}
|
||||
void swap(name& other)
|
||||
{
|
||||
n.swap(other.n);
|
||||
std::swap(index, other.index);
|
||||
}
|
||||
};
|
||||
public:
|
||||
named_subexpressions(){}
|
||||
void set_name(const charT* i, const charT* j, int index)
|
||||
{
|
||||
m_sub_names.push_back(name(i, j, index));
|
||||
bubble_down_one(m_sub_names.begin(), m_sub_names.end());
|
||||
}
|
||||
int get_id(const charT* i, const charT* j)
|
||||
{
|
||||
name t(i, j, 0);
|
||||
typename std::vector<name>::const_iterator pos = lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
|
||||
if((pos != m_sub_names.end()) && (*pos == t))
|
||||
{
|
||||
return pos->index;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
private:
|
||||
std::vector<name> m_sub_names;
|
||||
};
|
||||
|
||||
template <class charT, class Other>
|
||||
class named_subexpressions_converter : public named_subexpressions_base<charT>
|
||||
{
|
||||
boost::shared_ptr<named_subexpressions<Other> > m_converter;
|
||||
public:
|
||||
named_subexpressions_converter(boost::shared_ptr<named_subexpressions<Other> > s)
|
||||
: m_converter(s) {}
|
||||
virtual int get_id(const charT* i, const charT* j)
|
||||
{
|
||||
if(i == j)
|
||||
return -1;
|
||||
std::vector<Other> v;
|
||||
while(i != j)
|
||||
{
|
||||
v.push_back(*i);
|
||||
++i;
|
||||
}
|
||||
return m_converter->get_id(&v[0], &v[0] + v.size());
|
||||
}
|
||||
};
|
||||
|
||||
template <class To>
|
||||
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp(
|
||||
boost::shared_ptr<named_subexpressions<To> > s,
|
||||
boost::integral_constant<bool,true> const&)
|
||||
{
|
||||
return s;
|
||||
}
|
||||
template <class To, class From>
|
||||
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp(
|
||||
boost::shared_ptr<named_subexpressions<From> > s,
|
||||
boost::integral_constant<bool,false> const&)
|
||||
{
|
||||
return boost::shared_ptr<named_subexpressions_converter<To, From> >(new named_subexpressions_converter<To, From>(s));
|
||||
}
|
||||
template <class To, class From>
|
||||
inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs(
|
||||
boost::shared_ptr<named_subexpressions<From> > s)
|
||||
{
|
||||
typedef typename boost::is_same<To, From>::type tag_type;
|
||||
return convert_to_named_subs_imp<To>(s, tag_type());
|
||||
}
|
||||
//
|
||||
// class regex_data:
|
||||
// represents the data we wish to expose to the matching algorithms.
|
||||
//
|
||||
template <class charT, class traits>
|
||||
struct regex_data
|
||||
struct regex_data : public named_subexpressions<charT>
|
||||
{
|
||||
typedef regex_constants::syntax_option_type flag_type;
|
||||
typedef std::size_t size_type;
|
||||
@ -520,6 +633,10 @@ public:
|
||||
BOOST_ASSERT(0 != m_pimpl.get());
|
||||
return m_pimpl->get_data();
|
||||
}
|
||||
boost::shared_ptr<re_detail::named_subexpressions<charT> > get_named_subs()const
|
||||
{
|
||||
return m_pimpl;
|
||||
}
|
||||
|
||||
private:
|
||||
shared_ptr<re_detail::basic_regex_implementation<charT, traits> > m_pimpl;
|
||||
|
@ -777,6 +777,15 @@ escape_type_class_jump:
|
||||
}
|
||||
const charT* pc = m_position;
|
||||
int i = this->m_traits.toi(pc, m_end, 10);
|
||||
if(i < 0)
|
||||
{
|
||||
// Check for a named capture:
|
||||
const charT* base = m_position;
|
||||
while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
|
||||
++m_position;
|
||||
i = this->m_pdata->get_id(base, m_position);
|
||||
pc = m_position;
|
||||
}
|
||||
if(negative)
|
||||
i = 1 + m_mark_count - i;
|
||||
if((i > 0) && (this->m_backrefs & (1u << (i-1))))
|
||||
@ -1784,6 +1793,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
regex_constants::syntax_option_type old_flags = this->flags();
|
||||
bool old_case_change = m_has_case_change;
|
||||
m_has_case_change = false;
|
||||
charT name_delim;
|
||||
//
|
||||
// select the actual extension used:
|
||||
//
|
||||
@ -1825,8 +1835,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
pb->index = markid = -1;
|
||||
else
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
// Probably a named capture which also starts (?< :
|
||||
name_delim = '>';
|
||||
--m_position;
|
||||
goto named_capture_jump;
|
||||
}
|
||||
++m_position;
|
||||
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
|
||||
@ -1903,7 +1915,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
fail(regex_constants::error_paren, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
m_position -= 2;
|
||||
@ -1914,6 +1926,40 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
case regex_constants::syntax_close_mark:
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
case regex_constants::escape_type_end_buffer:
|
||||
{
|
||||
name_delim = *m_position;
|
||||
named_capture_jump:
|
||||
markid = 0;
|
||||
if(0 == (this->flags() & regbase::nosubs))
|
||||
{
|
||||
markid = ++m_mark_count;
|
||||
#ifndef BOOST_NO_STD_DISTANCE
|
||||
if(this->flags() & regbase::save_subexpression_location)
|
||||
this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
|
||||
#else
|
||||
if(this->flags() & regbase::save_subexpression_location)
|
||||
this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
|
||||
#endif
|
||||
}
|
||||
pb->index = markid;
|
||||
const charT* base = ++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_paren, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
while((m_position != m_end) && (*m_position != name_delim))
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_paren, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
this->m_pdata->set_name(base, m_position, markid);
|
||||
++m_position;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
//
|
||||
// lets assume that we have a (?imsx) group and try and parse it:
|
||||
@ -2043,6 +2089,22 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
// and the case change data:
|
||||
//
|
||||
m_has_case_change = old_case_change;
|
||||
|
||||
if(markid > 0)
|
||||
{
|
||||
#ifndef BOOST_NO_STD_DISTANCE
|
||||
if(this->flags() & regbase::save_subexpression_location)
|
||||
this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
|
||||
#else
|
||||
if(this->flags() & regbase::save_subexpression_location)
|
||||
this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
|
||||
#endif
|
||||
//
|
||||
// allow backrefs to this mark:
|
||||
//
|
||||
if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
|
||||
this->m_backrefs |= 1u << (markid - 1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,13 @@ namespace boost{
|
||||
#pragma warning(disable : 4251 4231 4660)
|
||||
#endif
|
||||
|
||||
namespace re_detail{
|
||||
|
||||
template <class charT>
|
||||
class named_subexpressions;
|
||||
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator>
|
||||
class match_results
|
||||
{
|
||||
@ -62,13 +69,14 @@ public:
|
||||
typedef typename re_detail::regex_iterator_traits<
|
||||
BidiIterator>::value_type char_type;
|
||||
typedef std::basic_string<char_type> string_type;
|
||||
typedef re_detail::named_subexpressions_base<char_type> named_sub_type;
|
||||
|
||||
// construct/copy/destroy:
|
||||
explicit match_results(const Allocator& a = Allocator())
|
||||
#ifndef BOOST_NO_STD_ALLOCATOR
|
||||
: m_subs(a), m_base() {}
|
||||
: m_subs(a), m_base(), m_last_closed_paren(0) {}
|
||||
#else
|
||||
: m_subs(), m_base() { (void)a; }
|
||||
: m_subs(), m_base(), m_last_closed_paren(0) { (void)a; }
|
||||
#endif
|
||||
match_results(const match_results& m)
|
||||
: m_subs(m.m_subs), m_base(m.m_base) {}
|
||||
@ -95,6 +103,24 @@ public:
|
||||
return m_subs[sub].length();
|
||||
return 0;
|
||||
}
|
||||
difference_type length(const char_type* sub) const
|
||||
{
|
||||
const char_type* end = sub;
|
||||
while(*end) ++end;
|
||||
return length(named_subexpression_index(sub, end));
|
||||
}
|
||||
template <class charT>
|
||||
difference_type length(const charT* sub) const
|
||||
{
|
||||
const charT* end = sub;
|
||||
while(*end) ++end;
|
||||
return length(named_subexpression_index(sub, end));
|
||||
}
|
||||
template <class charT, class Traits, class A>
|
||||
difference_type length(const std::basic_string<charT, Traits, A>& sub) const
|
||||
{
|
||||
return length(sub.c_str());
|
||||
}
|
||||
difference_type position(size_type sub = 0) const
|
||||
{
|
||||
sub += 2;
|
||||
@ -108,6 +134,24 @@ public:
|
||||
}
|
||||
return ~static_cast<difference_type>(0);
|
||||
}
|
||||
difference_type position(const char_type* sub) const
|
||||
{
|
||||
const char_type* end = sub;
|
||||
while(*end) ++end;
|
||||
return position(named_subexpression_index(sub, end));
|
||||
}
|
||||
template <class charT>
|
||||
difference_type position(const charT* sub) const
|
||||
{
|
||||
const charT* end = sub;
|
||||
while(*end) ++end;
|
||||
return position(named_subexpression_index(sub, end));
|
||||
}
|
||||
template <class charT, class Traits, class A>
|
||||
difference_type position(const std::basic_string<charT, Traits, A>& sub) const
|
||||
{
|
||||
return position(sub.c_str());
|
||||
}
|
||||
string_type str(int sub = 0) const
|
||||
{
|
||||
sub += 2;
|
||||
@ -122,6 +166,25 @@ public:
|
||||
}
|
||||
return result;
|
||||
}
|
||||
string_type str(const char_type* sub) const
|
||||
{
|
||||
return (*this)[sub].str();
|
||||
}
|
||||
template <class Traits, class A>
|
||||
string_type str(const std::basic_string<char_type, Traits, A>& sub) const
|
||||
{
|
||||
return (*this)[sub].str();
|
||||
}
|
||||
template <class charT>
|
||||
string_type str(const charT* sub) const
|
||||
{
|
||||
return (*this)[sub].str();
|
||||
}
|
||||
template <class charT, class Traits, class A>
|
||||
string_type str(const std::basic_string<charT, Traits, A>& sub) const
|
||||
{
|
||||
return (*this)[sub].str();
|
||||
}
|
||||
const_reference operator[](int sub) const
|
||||
{
|
||||
sub += 2;
|
||||
@ -131,6 +194,75 @@ public:
|
||||
}
|
||||
return m_null;
|
||||
}
|
||||
//
|
||||
// Named sub-expressions:
|
||||
//
|
||||
const_reference named_subexpression(const char_type* i, const char_type* j) const
|
||||
{
|
||||
int index = m_named_subs->get_id(i, j);
|
||||
return index > 0 ? (*this)[index] : m_null;
|
||||
}
|
||||
template <class charT>
|
||||
const_reference named_subexpression(const charT* i, const charT* j) const
|
||||
{
|
||||
BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type));
|
||||
if(i == j)
|
||||
return m_null;
|
||||
std::vector<char_type> s;
|
||||
while(i != j)
|
||||
s.insert(s.end(), *i++);
|
||||
return named_subexpression(&*s.begin(), &*s.begin() + s.size());
|
||||
}
|
||||
int named_subexpression_index(const char_type* i, const char_type* j) const
|
||||
{
|
||||
int index = m_named_subs->get_id(i, j);
|
||||
return index > 0 ? index : -20;
|
||||
}
|
||||
template <class charT>
|
||||
int named_subexpression_index(const charT* i, const charT* j) const
|
||||
{
|
||||
BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type));
|
||||
if(i == j)
|
||||
return -20;
|
||||
std::vector<char_type> s;
|
||||
while(i != j)
|
||||
s.insert(s.end(), *i++);
|
||||
return named_subexpression_index(&*s.begin(), &*s.begin() + s.size());
|
||||
}
|
||||
template <class Traits, class A>
|
||||
const_reference operator[](const std::basic_string<char_type, Traits, A>& s) const
|
||||
{
|
||||
return named_subexpression(s.c_str(), s.c_str() + s.size());
|
||||
}
|
||||
const_reference operator[](const char_type* p) const
|
||||
{
|
||||
const char_type* e = p;
|
||||
while(*e) ++e;
|
||||
return named_subexpression(p, e);
|
||||
}
|
||||
|
||||
template <class charT>
|
||||
const_reference operator[](const charT* p) const
|
||||
{
|
||||
BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type));
|
||||
if(*p == 0)
|
||||
return m_null;
|
||||
std::vector<char_type> s;
|
||||
while(*p)
|
||||
s.insert(s.end(), *p++);
|
||||
return named_subexpression(&*s.begin(), &*s.begin() + s.size());
|
||||
}
|
||||
template <class charT, class Traits, class A>
|
||||
const_reference operator[](const std::basic_string<charT, Traits, A>& ns) const
|
||||
{
|
||||
BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type));
|
||||
if(ns.empty())
|
||||
return m_null;
|
||||
std::vector<char_type> s;
|
||||
for(unsigned i = 0; i < ns.size(); ++i)
|
||||
s.insert(s.end(), ns[i]);
|
||||
return named_subexpression(&*s.begin(), &*s.begin() + s.size());
|
||||
}
|
||||
|
||||
const_reference prefix() const
|
||||
{
|
||||
@ -186,6 +318,10 @@ public:
|
||||
::boost::re_detail::regex_format_imp(i, *this, fmt.data(), fmt.data() + fmt.size(), flags, re.get_traits());
|
||||
return result;
|
||||
}
|
||||
const_reference get_last_closed_paren()const
|
||||
{
|
||||
return m_last_closed_paren == 0 ? m_null : (*this)[m_last_closed_paren];
|
||||
}
|
||||
|
||||
allocator_type get_allocator() const
|
||||
{
|
||||
@ -232,6 +368,8 @@ public:
|
||||
|
||||
void BOOST_REGEX_CALL set_second(BidiIterator i, size_type pos, bool m = true, bool escape_k = false)
|
||||
{
|
||||
if(pos)
|
||||
m_last_closed_paren = pos;
|
||||
pos += 2;
|
||||
BOOST_ASSERT(m_subs.size() > pos);
|
||||
m_subs[pos].second = i;
|
||||
@ -261,6 +399,7 @@ public:
|
||||
m_subs.insert(m_subs.end(), n+2-len, v);
|
||||
}
|
||||
m_subs[1].first = i;
|
||||
m_last_closed_paren = 0;
|
||||
}
|
||||
void BOOST_REGEX_CALL set_base(BidiIterator pos)
|
||||
{
|
||||
@ -301,11 +440,17 @@ public:
|
||||
}
|
||||
void BOOST_REGEX_CALL maybe_assign(const match_results<BidiIterator, Allocator>& m);
|
||||
|
||||
void BOOST_REGEX_CALL set_named_subs(boost::shared_ptr<named_sub_type> subs)
|
||||
{
|
||||
m_named_subs = subs;
|
||||
}
|
||||
|
||||
private:
|
||||
vector_type m_subs; // subexpressions
|
||||
BidiIterator m_base; // where the search started from
|
||||
sub_match<BidiIterator> m_null; // a null match
|
||||
boost::shared_ptr<named_sub_type> m_named_subs;
|
||||
int m_last_closed_paren;
|
||||
};
|
||||
|
||||
template <class BidiIterator, class Allocator>
|
||||
|
@ -200,6 +200,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
|
||||
m_match_flags |= regex_constants::match_all;
|
||||
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
|
||||
m_presult->set_base(base);
|
||||
m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs()));
|
||||
if(m_match_flags & match_posix)
|
||||
m_result = *m_presult;
|
||||
verify_options(re.flags(), m_match_flags);
|
||||
@ -261,6 +262,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
|
||||
pstate = re.get_first_state();
|
||||
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last);
|
||||
m_presult->set_base(base);
|
||||
m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs()));
|
||||
m_match_flags |= regex_constants::match_init;
|
||||
}
|
||||
else
|
||||
|
@ -107,6 +107,7 @@ private:
|
||||
void format_escape();
|
||||
void format_conditional();
|
||||
void format_until_scope_end();
|
||||
bool handle_perl_verb(bool have_brace);
|
||||
|
||||
const traits& m_traits; // the traits class for localised formatting operations
|
||||
const Results& m_results; // the match_results being used.
|
||||
@ -250,6 +251,25 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
|
||||
case '$':
|
||||
put(*m_position++);
|
||||
break;
|
||||
case '+':
|
||||
if((++m_position != m_end) && (*m_position == '{'))
|
||||
{
|
||||
const char_type* base = ++m_position;
|
||||
while((m_position != m_end) && (*m_position != '}')) ++m_position;
|
||||
if(m_position != m_end)
|
||||
{
|
||||
// Named sub-expression:
|
||||
put(this->m_results.named_subexpression(base, m_position));
|
||||
++m_position;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_position = --base;
|
||||
}
|
||||
}
|
||||
put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]);
|
||||
break;
|
||||
case '{':
|
||||
have_brace = true;
|
||||
++m_position;
|
||||
@ -258,14 +278,18 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
|
||||
// see if we have a number:
|
||||
{
|
||||
std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end);
|
||||
len = (std::min)(static_cast<std::ptrdiff_t>(2), len);
|
||||
//len = (std::min)(static_cast<std::ptrdiff_t>(2), len);
|
||||
int v = m_traits.toi(m_position, m_position + len, 10);
|
||||
if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}'))))
|
||||
{
|
||||
// leave the $ as is, and carry on:
|
||||
m_position = --save_position;
|
||||
put(*m_position);
|
||||
++m_position;
|
||||
// Look for a Perl-5.10 verb:
|
||||
if(!handle_perl_verb(have_brace))
|
||||
{
|
||||
// leave the $ as is, and carry on:
|
||||
m_position = --save_position;
|
||||
put(*m_position);
|
||||
++m_position;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// otherwise output sub v:
|
||||
@ -276,6 +300,123 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
|
||||
}
|
||||
}
|
||||
|
||||
template <class OutputIterator, class Results, class traits>
|
||||
bool basic_regex_formatter<OutputIterator, Results, traits>::handle_perl_verb(bool have_brace)
|
||||
{
|
||||
//
|
||||
// We may have a capitalised string containing a Perl action:
|
||||
//
|
||||
static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' };
|
||||
static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' };
|
||||
static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' };
|
||||
static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' };
|
||||
static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' };
|
||||
static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' };
|
||||
|
||||
if(have_brace && (*m_position == '^'))
|
||||
++m_position;
|
||||
|
||||
int max_len = m_end - m_position;
|
||||
|
||||
if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH))
|
||||
{
|
||||
m_position += 5;
|
||||
if(have_brace)
|
||||
{
|
||||
if(*m_position == '}')
|
||||
++m_position;
|
||||
else
|
||||
{
|
||||
m_position -= 5;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
put(this->m_results[0]);
|
||||
return true;
|
||||
}
|
||||
if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH))
|
||||
{
|
||||
m_position += 8;
|
||||
if(have_brace)
|
||||
{
|
||||
if(*m_position == '}')
|
||||
++m_position;
|
||||
else
|
||||
{
|
||||
m_position -= 8;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
put(this->m_results.prefix());
|
||||
return true;
|
||||
}
|
||||
if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH))
|
||||
{
|
||||
m_position += 9;
|
||||
if(have_brace)
|
||||
{
|
||||
if(*m_position == '}')
|
||||
++m_position;
|
||||
else
|
||||
{
|
||||
m_position -= 9;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
put(this->m_results.suffix());
|
||||
return true;
|
||||
}
|
||||
if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH))
|
||||
{
|
||||
m_position += 16;
|
||||
if(have_brace)
|
||||
{
|
||||
if(*m_position == '}')
|
||||
++m_position;
|
||||
else
|
||||
{
|
||||
m_position -= 16;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]);
|
||||
return true;
|
||||
}
|
||||
if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT))
|
||||
{
|
||||
m_position += 20;
|
||||
if(have_brace)
|
||||
{
|
||||
if(*m_position == '}')
|
||||
++m_position;
|
||||
else
|
||||
{
|
||||
m_position -= 20;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
put(this->m_results.get_last_closed_paren());
|
||||
return true;
|
||||
}
|
||||
if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT))
|
||||
{
|
||||
m_position += 2;
|
||||
if(have_brace)
|
||||
{
|
||||
if(*m_position == '}')
|
||||
++m_position;
|
||||
else
|
||||
{
|
||||
m_position -= 2;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
put(this->m_results.get_last_closed_paren());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class OutputIterator, class Results, class traits>
|
||||
void basic_regex_formatter<OutputIterator, Results, traits>::format_escape()
|
||||
{
|
||||
|
@ -326,9 +326,9 @@ inline const charT* get_escape_R_string()
|
||||
#endif
|
||||
static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
||||
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}',
|
||||
'\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')' };
|
||||
'\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' };
|
||||
static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
||||
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')' };
|
||||
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
|
||||
|
||||
charT c = static_cast<charT>(0x2029u);
|
||||
bool b = (static_cast<unsigned>(c) == 0x2029u);
|
||||
|
Reference in New Issue
Block a user