forked from boostorg/regex
Fix gcc warnings from ICU wrappers.
Add optional support for marked sub-expression location information. Add support for ${n} in format replacement text. Fixes #2556. Fixes #2269. Fixes #2514. [SVN r50370]
This commit is contained in:
@ -74,6 +74,9 @@ struct regex_data
|
||||
unsigned int m_can_be_null; // whether we can match a null string
|
||||
re_detail::raw_storage m_data; // the buffer in which our states are constructed
|
||||
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
|
||||
std::vector<
|
||||
std::pair<
|
||||
std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
|
||||
};
|
||||
//
|
||||
// class basic_regex_implementation
|
||||
@ -122,6 +125,14 @@ public:
|
||||
{
|
||||
return this->m_expression;
|
||||
}
|
||||
std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
|
||||
{
|
||||
if(n == 0)
|
||||
throw std::out_of_range("0 is not a valid subexpression index.");
|
||||
const std::pair<std::size_t, std::size_t>& pi = this->m_subs.at(n - 1);
|
||||
std::pair<const_iterator, const_iterator> p(expression() + pi.first, expression() + pi.second);
|
||||
return p;
|
||||
}
|
||||
//
|
||||
// begin, end:
|
||||
const_iterator BOOST_REGEX_CALL begin()const
|
||||
@ -366,7 +377,13 @@ public:
|
||||
return m_pimpl.get() ? m_pimpl->str() : std::basic_string<charT>();
|
||||
}
|
||||
//
|
||||
// begin, end:
|
||||
// begin, end, subexpression:
|
||||
std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
|
||||
{
|
||||
if(!m_pimpl.get())
|
||||
throw std::logic_error("Can't access subexpressions in an invalid regex.");
|
||||
return m_pimpl->subexpression(n);
|
||||
}
|
||||
const_iterator BOOST_REGEX_CALL begin()const
|
||||
{
|
||||
return (m_pimpl.get() ? m_pimpl->begin() : 0);
|
||||
|
@ -368,7 +368,11 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
//
|
||||
unsigned markid = 0;
|
||||
if(0 == (this->flags() & regbase::nosubs))
|
||||
{
|
||||
markid = ++m_mark_count;
|
||||
if(this->flags() & regbase::save_subexpression_location)
|
||||
this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
|
||||
}
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
||||
pb->index = markid;
|
||||
std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
||||
@ -415,6 +419,8 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
return false;
|
||||
}
|
||||
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
|
||||
if(markid && (this->flags() & regbase::save_subexpression_location))
|
||||
this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
|
||||
++m_position;
|
||||
//
|
||||
// append closing parenthesis state:
|
||||
|
@ -65,10 +65,16 @@ inline bool can_start(unsigned short c, const unsigned char* map, unsigned char
|
||||
#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
|
||||
inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
|
||||
{
|
||||
return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
|
||||
return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#if !defined(BOOST_NO_INTRINSIC_WCHAR_T)
|
||||
inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask)
|
||||
{
|
||||
return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
|
@ -84,6 +84,7 @@ public:
|
||||
nocollate = 0, // don't use locale specific collation (deprecated)
|
||||
collate = 1 << 21, // use locale specific collation
|
||||
nosubs = 1 << 22, // don't mark sub-expressions
|
||||
save_subexpression_location = 1 << 23, // save subexpression locations
|
||||
optimize = 0, // not really supported
|
||||
|
||||
|
||||
@ -141,6 +142,7 @@ namespace regex_constants{
|
||||
mod_x = ::boost::regbase::mod_x,
|
||||
mod_s = ::boost::regbase::mod_s,
|
||||
no_mod_s = ::boost::regbase::no_mod_s,
|
||||
save_subexpression_location = ::boost::regbase::save_subexpression_location,
|
||||
|
||||
basic = ::boost::regbase::basic,
|
||||
extended = ::boost::regbase::extended,
|
||||
|
@ -231,6 +231,8 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
|
||||
//
|
||||
// OK find out what kind it is:
|
||||
//
|
||||
bool have_brace = false;
|
||||
const char_type* save_position = m_position;
|
||||
switch(*m_position)
|
||||
{
|
||||
case '&':
|
||||
@ -248,22 +250,28 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
|
||||
case '$':
|
||||
put(*m_position++);
|
||||
break;
|
||||
case '{':
|
||||
have_brace = true;
|
||||
++m_position;
|
||||
// fall through....
|
||||
default:
|
||||
// see if we have a number:
|
||||
{
|
||||
std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end);
|
||||
len = (std::min)(static_cast<std::ptrdiff_t>(2), len);
|
||||
int v = m_traits.toi(m_position, m_position + len, 10);
|
||||
if(v < 0)
|
||||
if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}'))))
|
||||
{
|
||||
// leave the $ as is, and carry on:
|
||||
--m_position;
|
||||
m_position = --save_position;
|
||||
put(*m_position);
|
||||
++m_position;
|
||||
break;
|
||||
}
|
||||
// otherwise output sub v:
|
||||
put(this->m_results[v]);
|
||||
if(have_brace)
|
||||
++m_position;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -285,7 +285,7 @@ inline u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator(cons
|
||||
#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2)
|
||||
inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UChar* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default)
|
||||
{
|
||||
return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, m);
|
||||
return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, submatch, m);
|
||||
}
|
||||
#endif
|
||||
template <class charT, class Traits, class Alloc>
|
||||
@ -347,7 +347,7 @@ inline u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator(cons
|
||||
#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2)
|
||||
inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UChar* p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default)
|
||||
{
|
||||
return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, m);
|
||||
return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, submatch, m);
|
||||
}
|
||||
#endif
|
||||
template <class charT, class Traits, class Alloc>
|
||||
|
Reference in New Issue
Block a user