Fix gcc warnings from ICU wrappers.

Add optional support for marked sub-expression location information.
Add support for ${n} in format replacement text.
Fixes #2556.
Fixes #2269.
Fixes #2514.

[SVN r50370]
This commit is contained in:
John Maddock
2008-12-23 11:46:00 +00:00
parent c997a1fcc6
commit b4152cd74d
94 changed files with 1344 additions and 1068 deletions

View File

@ -74,6 +74,9 @@ struct regex_data
unsigned int m_can_be_null; // whether we can match a null string
re_detail::raw_storage m_data; // the buffer in which our states are constructed
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
std::vector<
std::pair<
std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
};
//
// class basic_regex_implementation
@ -122,6 +125,14 @@ public:
{
return this->m_expression;
}
std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
{
if(n == 0)
throw std::out_of_range("0 is not a valid subexpression index.");
const std::pair<std::size_t, std::size_t>& pi = this->m_subs.at(n - 1);
std::pair<const_iterator, const_iterator> p(expression() + pi.first, expression() + pi.second);
return p;
}
//
// begin, end:
const_iterator BOOST_REGEX_CALL begin()const
@ -366,7 +377,13 @@ public:
return m_pimpl.get() ? m_pimpl->str() : std::basic_string<charT>();
}
//
// begin, end:
// begin, end, subexpression:
std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
{
if(!m_pimpl.get())
throw std::logic_error("Can't access subexpressions in an invalid regex.");
return m_pimpl->subexpression(n);
}
const_iterator BOOST_REGEX_CALL begin()const
{
return (m_pimpl.get() ? m_pimpl->begin() : 0);

View File

@ -368,7 +368,11 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
//
unsigned markid = 0;
if(0 == (this->flags() & regbase::nosubs))
{
markid = ++m_mark_count;
if(this->flags() & regbase::save_subexpression_location)
this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
}
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
pb->index = markid;
std::ptrdiff_t last_paren_start = this->getoffset(pb);
@ -415,6 +419,8 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
return false;
}
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
if(markid && (this->flags() & regbase::save_subexpression_location))
this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
++m_position;
//
// append closing parenthesis state:

View File

@ -65,10 +65,16 @@ inline bool can_start(unsigned short c, const unsigned char* map, unsigned char
#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
{
return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask);
}
#endif
#endif
#if !defined(BOOST_NO_INTRINSIC_WCHAR_T)
inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask)
{
return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask));
}
#endif
//

View File

@ -84,6 +84,7 @@ public:
nocollate = 0, // don't use locale specific collation (deprecated)
collate = 1 << 21, // use locale specific collation
nosubs = 1 << 22, // don't mark sub-expressions
save_subexpression_location = 1 << 23, // save subexpression locations
optimize = 0, // not really supported
@ -141,6 +142,7 @@ namespace regex_constants{
mod_x = ::boost::regbase::mod_x,
mod_s = ::boost::regbase::mod_s,
no_mod_s = ::boost::regbase::no_mod_s,
save_subexpression_location = ::boost::regbase::save_subexpression_location,
basic = ::boost::regbase::basic,
extended = ::boost::regbase::extended,

View File

@ -231,6 +231,8 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
//
// OK find out what kind it is:
//
bool have_brace = false;
const char_type* save_position = m_position;
switch(*m_position)
{
case '&':
@ -248,22 +250,28 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
case '$':
put(*m_position++);
break;
case '{':
have_brace = true;
++m_position;
// fall through....
default:
// see if we have a number:
{
std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end);
len = (std::min)(static_cast<std::ptrdiff_t>(2), len);
int v = m_traits.toi(m_position, m_position + len, 10);
if(v < 0)
if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}'))))
{
// leave the $ as is, and carry on:
--m_position;
m_position = --save_position;
put(*m_position);
++m_position;
break;
}
// otherwise output sub v:
put(this->m_results[v]);
if(have_brace)
++m_position;
}
}
}

View File

@ -285,7 +285,7 @@ inline u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator(cons
#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2)
inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UChar* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default)
{
return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, m);
return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, submatch, m);
}
#endif
template <class charT, class Traits, class Alloc>
@ -347,7 +347,7 @@ inline u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator(cons
#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2)
inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UChar* p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default)
{
return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, m);
return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, submatch, m);
}
#endif
template <class charT, class Traits, class Alloc>