Almost complete regex implementation now...

[SVN r22718]
This commit is contained in:
John Maddock
2004-05-01 11:23:02 +00:00
parent 641d60b059
commit d2c3ec6d57
26 changed files with 587 additions and 210 deletions

View File

@ -53,6 +53,8 @@ public:
ogrep_predicate(unsigned int& i, const char* p, iterator start, iterator end) : lines(i), filename(p), last_line(-1), end_of_storage(end), last_line_start(start) {}
ogrep_predicate(const ogrep_predicate& o) : lines(o.lines), filename(o.filename), last_line(o.last_line), end_of_storage(o.end_of_storage), last_line_start(o.last_line_start) {}
bool operator () (const boost::match_results<iterator, Allocator>& i);
private:
void operator=(const ogrep_predicate&);
};
// ideally we'd ignor the allocator type and use a template member function

View File

@ -19,15 +19,7 @@
#include <boost/regex.hpp>
// case sensitive reg_expression determines our allocator type:
typedef boost::reg_expression<char> re_type;
typedef re_type::allocator_type allocator_type;
// now declare static (global) data, including an allocator
// instance which we'll pass to all instances that require an allocator.
extern allocator_type a;
typedef boost::basic_regex<char> re_type;
extern re_type e;
// flags for output:

View File

@ -46,10 +46,7 @@ using std::endl;
#include <algorithm>
#endif
allocator_type a;
re_type e(a);
//rei_type ei(a);
re_type e;
// flags for output:

View File

@ -145,7 +145,7 @@ int main(int argc, char**argv)
double tim;
bool result;
int iters = 100;
double wait_time = std::min(t.elapsed_min() * 1000, 1.0);
double wait_time = (std::min)(t.elapsed_min() * 1000, 1.0);
while(true)
{

View File

@ -174,6 +174,7 @@ using std::distance;
# ifdef BOOST_MSVC
// warning suppression with VC6:
# pragma warning(disable: 4800)
# pragma warning(disable: 4786)
# endif
# define BOOST_REGEX_MAKE_BOOL(x) static_cast<bool>(x)
#endif
@ -367,12 +368,14 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page();
namespace boost{
namespace re_detail{
BOOST_REGEX_DECL void BOOST_REGEX_CALL raise_runtime_error(const std::runtime_error& ex);
template <class traits>
void raise_error(const traits& t, unsigned code)
{
(void)t; // warning suppression
std::runtime_error e(t.error_string(code));
throw_exception(e);
::boost::re_detail::raise_runtime_error(e);
}
}

View File

@ -89,7 +89,7 @@ class static_mutex
{
public:
typedef scoped_static_mutex_lock scoped_lock;
volatile boost::int32_t m_mutex;
boost::int32_t m_mutex;
};
#define BOOST_STATIC_MUTEX_INIT { 0, }

View File

@ -198,6 +198,7 @@ protected:
re_syntax_base* m_last_state; // the last state we added
bool m_icase; // true for case insensitive matches
unsigned m_repeater_id; // the id of the next repeater
bool m_has_backrefs; // true if there are actually any backrefs
unsigned m_backrefs; // bitmask of permitted backrefs
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
@ -211,17 +212,19 @@ private:
void fixup_pointers(re_syntax_base* state);
void create_startmaps(re_syntax_base* state);
void create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask);
int calculate_backstep(re_syntax_base* state);
void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
unsigned get_restart_type(re_syntax_base* state);
void set_all_masks(unsigned char* bits, unsigned char);
bool is_bad_repeat(re_syntax_base* pt);
void set_bad_repeat(re_syntax_base* pt);
syntax_element_type get_repeat_type(re_syntax_base* state);
void probe_leading_repeat(re_syntax_base* state);
};
template <class charT, class traits>
basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
: m_pdata(data), m_traits(data->m_traits), m_last_state(0), m_repeater_id(0), m_backrefs(0)
: m_pdata(data), m_traits(data->m_traits), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0)
{
m_pdata->m_data.clear();
static const charT w = 'w';
@ -244,6 +247,9 @@ basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits
template <class charT, class traits>
re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s)
{
// if the state is a backref then make a note of it:
if(t == syntax_element_backref)
this->m_has_backrefs = true;
// append a new state, start by aligning our last one:
m_pdata->m_data.align();
// set the offset to the next state in our last one:
@ -538,7 +544,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
return 0; // invalid or unsupported equivalence class
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
charT c(i);
charT c(static_cast<charT>(i));
string_type s2 = this->m_traits.transform_primary(&c, &c+1);
if(s == s2)
result->_map[i] = true;
@ -585,6 +591,8 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
// get the restart type:
m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
// optimise a leading repeat if there is one:
probe_leading_repeat(m_pdata->m_first_state);
}
template <class charT, class traits>
@ -645,6 +653,11 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
// adjust the type of the state to allow for faster matching:
state->type = this->get_repeat_type(state);
return;
case syntax_element_backstep:
// we need to calculate how big the backstep is:
static_cast<re_brace*>(state)->index
= this->calculate_backstep(state->next.p);
// fall through:
default:
state = state->next.p;
}
@ -652,7 +665,65 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask)
int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
{
typedef typename traits::char_class_type mask_type;
int result = 0;
while(state)
{
switch(state->type)
{
case syntax_element_startmark:
if((static_cast<re_brace*>(state)->index == -1)
|| (static_cast<re_brace*>(state)->index == -2))
{
state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
continue;
}
else if(static_cast<re_brace*>(state)->index == -3)
{
state = state->next.p->next.p;
continue;
}
break;
case syntax_element_endmark:
if((static_cast<re_brace*>(state)->index == -1)
|| (static_cast<re_brace*>(state)->index == -2))
return result;
case syntax_element_literal:
result += static_cast<re_literal*>(state)->length;
break;
case syntax_element_wild:
case syntax_element_set:
result += 1;
break;
case syntax_element_backref:
case syntax_element_rep:
case syntax_element_combining:
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_long_set_rep:
case syntax_element_backstep:
return -1;
case syntax_element_long_set:
if(static_cast<re_set_long<mask_type>*>(state)->singleton == 0)
return -1;
result += 1;
break;
case syntax_element_jump:
state = static_cast<re_jump*>(state)->alt.p;
continue;
default:
break;
}
state = state->next.p;
}
return -1;
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
{
int not_last_jump = 1;
while(state)
@ -661,16 +732,16 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
{
case syntax_element_literal:
{
// don't set anything in *pnull, set each element in map
// don't set anything in *pnull, set each element in l_map
// that could match the first character in the literal:
if(map)
if(l_map)
{
map[0] |= mask_init;
l_map[0] |= mask_init;
charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
if(m_traits.translate(static_cast<charT>(i), m_icase) == first_char)
map[i] |= mask;
l_map[i] |= mask;
}
}
return;
@ -678,11 +749,11 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
case syntax_element_end_line:
{
// next character must be a line separator (if there is one):
if(map)
if(l_map)
{
map[0] |= mask_init;
map['\n'] |= mask;
map['\r'] |= mask;
l_map[0] |= mask_init;
l_map['\n'] |= mask;
l_map['\r'] |= mask;
}
// now figure out if we can match a NULL string at this point:
if(pnull)
@ -697,13 +768,13 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
case syntax_element_wild:
{
// can't be null, any character can match:
set_all_masks(map, mask);
set_all_masks(l_map, mask);
return;
}
case syntax_element_match:
{
// must be null, any character can match:
set_all_masks(map, mask);
set_all_masks(l_map, mask);
if(pnull)
*pnull |= mask;
return;
@ -711,14 +782,14 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
case syntax_element_word_start:
{
// recurse, then AND with all the word characters:
create_startmap(state->next.p, map, pnull, mask);
if(map)
create_startmap(state->next.p, l_map, pnull, mask);
if(l_map)
{
map[0] |= mask_init;
l_map[0] |= mask_init;
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
if(!m_traits.is_class(static_cast<charT>(i), m_word_mask))
map[i] &= static_cast<unsigned char>(~mask);
l_map[i] &= static_cast<unsigned char>(~mask);
}
}
return;
@ -726,14 +797,14 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
case syntax_element_word_end:
{
// recurse, then AND with all the word characters:
create_startmap(state->next.p, map, pnull, mask);
if(map)
create_startmap(state->next.p, l_map, pnull, mask);
if(l_map)
{
map[0] |= mask_init;
l_map[0] |= mask_init;
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
if(m_traits.is_class(static_cast<charT>(i), m_word_mask))
map[i] &= static_cast<unsigned char>(~mask);
l_map[i] &= static_cast<unsigned char>(~mask);
}
}
return;
@ -746,32 +817,32 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
return;
}
case syntax_element_long_set:
if(map)
if(l_map)
{
typedef typename traits::char_class_type mask_type;
if(static_cast<re_set_long<mask_type>*>(state)->singleton)
{
map[0] |= mask_init;
l_map[0] |= mask_init;
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
charT c = static_cast<charT>(i);
if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata))
map[i] |= mask;
l_map[i] |= mask;
}
}
else
set_all_masks(map, mask);
set_all_masks(l_map, mask);
}
return;
case syntax_element_set:
if(map)
if(l_map)
{
map[0] |= mask_init;
l_map[0] |= mask_init;
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
if(static_cast<re_set*>(state)->_map[
static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), this->m_icase))])
map[i] |= mask;
l_map[i] |= mask;
}
}
return;
@ -790,14 +861,14 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
re_alt* rep = static_cast<re_alt*>(state);
if(rep->_map[0] & mask_init)
{
if(map)
if(l_map)
{
// copy previous results:
map[0] |= mask_init;
l_map[0] |= mask_init;
for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
{
if(rep->_map[i] & mask_any)
map[i] |= mask;
l_map[i] |= mask;
}
}
if(pnull)
@ -812,29 +883,53 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
// so take the union of the two options:
if(is_bad_repeat(state))
{
set_all_masks(map, mask);
set_all_masks(l_map, mask);
return;
}
set_bad_repeat(state);
create_startmap(state->next.p, map, pnull, mask);
if((state->type == syntax_element_alt)
create_startmap(state->next.p, l_map, pnull, mask);
if((state->type == syntax_element_alt)
|| (static_cast<re_repeat*>(state)->min == 0)
|| (not_last_jump == 0))
create_startmap(rep->alt.p, map, pnull, mask);
create_startmap(rep->alt.p, l_map, pnull, mask);
}
}
return;
case syntax_element_soft_buffer_end:
// match newline or null:
if(map)
if(l_map)
{
map[0] |= mask_init;
map['\n'] |= mask;
map['\r'] |= mask;
l_map[0] |= mask_init;
l_map['\n'] |= mask;
l_map['\r'] |= mask;
}
if(pnull)
*pnull |= mask;
return;
case syntax_element_endmark:
// need to handle independent subs as a special case:
if(static_cast<re_brace*>(state)->index == -3)
{
// can be null, any character can match:
set_all_masks(l_map, mask);
if(pnull)
*pnull |= mask;
return;
}
else
{
state = state->next.p;
break;
}
case syntax_element_startmark:
// need to handle independent subs as a special case:
if(static_cast<re_brace*>(state)->index == -3)
{
state = state->next.p->next.p;
break;
}
// otherwise fall through:
default:
state = state->next.p;
}
@ -962,6 +1057,48 @@ syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_synta
return state->type;
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
{
// enumerate our states, and see if we have a leading repeat
// for which failed search restarts can be optimised;
do
{
switch(state->type)
{
case syntax_element_startmark:
if(static_cast<re_brace*>(state)->index >= 0)
{
state = state->next.p;
continue;
}
return;
case syntax_element_endmark:
case syntax_element_start_line:
case syntax_element_end_line:
case syntax_element_word_boundary:
case syntax_element_within_word:
case syntax_element_word_start:
case syntax_element_word_end:
case syntax_element_buffer_start:
case syntax_element_buffer_end:
case syntax_element_restart_continue:
state = state->next.p;
break;
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_long_set_rep:
if(this->m_has_backrefs == 0)
static_cast<re_repeat*>(state)->leading = true;
// fall through:
default:
return;
}
}while(state);
}
} // namespace re_detail
} // namespace boost

View File

@ -249,16 +249,14 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
//
if((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
{
if(m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
return parse_perl_extension();
}
//
// update our mark count, and append the required state:
//
unsigned markid;
if(this->flags() & regbase::nosubs)
markid = 0;
else
unsigned markid = 0;
if(0 == (this->flags() & regbase::nosubs))
markid = ++m_mark_count;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
pb->index = markid;
@ -1070,6 +1068,10 @@ bool basic_regex_parser<charT, traits>::parse_backref()
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_QE()
{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4127)
#endif
//
// parse a \Q...\E sequence:
//
@ -1104,6 +1106,9 @@ bool basic_regex_parser<charT, traits>::parse_QE()
++start;
}
return true;
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
}
template <class charT, class traits>
@ -1114,7 +1119,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
//
// backup some state, and prepare the way:
//
int markid;
int markid = 0;
std::ptrdiff_t jump_offset = 0;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
std::ptrdiff_t last_paren_start = this->getoffset(pb);
@ -1157,6 +1162,35 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
break;
case regex_constants::escape_type_left_word:
{
// a lookbehind assertion:
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
if(t == regex_constants::syntax_not)
pb->index = markid = -2;
else if(t == regex_constants::syntax_equal)
pb->index = markid = -1;
else
fail(REG_BADRPT, m_position - m_base);
++m_position;
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
this->append_state(syntax_element_backstep, sizeof(re_brace));
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
break;
}
case regex_constants::escape_type_right_word:
//
// an independent sub-expression:
//
pb->index = markid = -3;
++m_position;
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
break;
default:
fail(REG_BADRPT, m_position - m_base);
}
@ -1180,6 +1214,11 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
this->m_pdata->m_data.align();
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
if(this->m_last_state == jmp)
{
// Oops... we didn't have anything inside the assertion:
fail(REG_EMPTY, m_position - m_base);
}
}
//
// append closing parenthesis state:

View File

@ -29,6 +29,15 @@
#include <boost/regex/v4/primary_transform.hpp>
#endif
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_PREFIX
#endif
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4786)
#endif
namespace boost{
//
@ -58,8 +67,8 @@ public:
const charT* getnext() { return this->gptr(); }
protected:
std::basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n);
typename parser_buf<charT, traits>::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which);
typename parser_buf<charT, traits>::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which);
//typename parser_buf<charT, traits>::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which);
//typename parser_buf<charT, traits>::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which);
private:
parser_buf& operator=(const parser_buf&);
parser_buf(const parser_buf&);
@ -73,6 +82,7 @@ parser_buf<charT, traits>::setbuf(char_type* s, streamsize n)
return this;
}
#if 0
template<class charT, class traits>
typename parser_buf<charT, traits>::pos_type
parser_buf<charT, traits>::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which)
@ -131,7 +141,7 @@ parser_buf<charT, traits>::seekpos(pos_type sp, ::std::ios_base::openmode which)
}
return pos_type(off_type(-1));
}
#endif
//
// class cpp_regex_traits_base:
@ -308,9 +318,25 @@ class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT
{
public:
typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
typedef typename std::ctype<charT>::mask native_mask_type;
BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 16);
BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 17);
BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 18);
#ifdef __GNUC__
BOOST_STATIC_CONSTANT(native_mask_type,
mask_base =
std::ctype<charT>::alnum
| std::ctype<charT>::alpha
| std::ctype<charT>::cntrl
| std::ctype<charT>::digit
| std::ctype<charT>::graph
| std::ctype<charT>::lower
| std::ctype<charT>::print
| std::ctype<charT>::punct
| std::ctype<charT>::space
| std::ctype<charT>::upper
| std::ctype<charT>::xdigit);
#else
BOOST_STATIC_CONSTANT(char_class_type,
mask_base =
std::ctype<charT>::alnum
@ -324,6 +350,7 @@ public:
| std::ctype<charT>::space
| std::ctype<charT>::upper
| std::ctype<charT>::xdigit);
#endif
//BOOST_STATIC_ASSERT(0 == (mask_base & (mask_word | mask_unicode)));
@ -346,9 +373,9 @@ public:
char_class_type result = lookup_classname_imp(p1, p2);
if(result == 0)
{
string_type s(p1, p2);
this->m_pctype->tolower(&*s.begin(), &*s.end());
result = lookup_classname_imp(&*s.begin(), &*s.end());
string_type temp(p1, p2);
this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size());
result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size());
}
return result;
}
@ -388,20 +415,20 @@ typename cpp_regex_traits_implementation<charT>::string_type
// the best we can do is translate to lower case, then get a regular sort key:
{
result.assign(p1, p2);
m_pctype->tolower(&*result.begin(), &*result.end());
result = this->m_pcollate->transform(&*result.begin(), &*result.end());
this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size());
result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size());
break;
}
case sort_fixed:
{
// get a regular sort key, and then truncate it:
result.assign(this->m_pcollate->transform(&*result.begin(), &*result.end()));
result.assign(this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size()));
result.erase(this->m_collate_delim);
break;
}
case sort_delim:
// get a regular sort key, and then truncate everything after the delim:
result.assign(this->m_pcollate->transform(&*result.begin(), &*result.end()));
result.assign(this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size()));
std::size_t i;
for(i = 0; i < result.size(); ++i)
{
@ -425,10 +452,30 @@ typename cpp_regex_traits_implementation<charT>::string_type
if(pos != m_custom_collate_names.end())
return pos->second;
}
#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
std::string name(p1, p2);
#else
std::string name;
const charT* p0 = p1;
while(p0 != p2)
name.append(1, char(*p0++));
#endif
name = lookup_default_collate_name(name);
#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
if(name.size())
return string_type(name.begin(), name.end());
#else
if(name.size())
{
string_type result;
typedef std::string::const_iterator iter;
iter b = name.begin();
iter e = name.end();
while(b != e)
result.append(1, charT(*b++));
return result;
}
#endif
if(p2 - p1 == 1)
return string_type(1, *p1);
return string_type();
@ -731,4 +778,12 @@ static_mutex& cpp_regex_traits<charT>::get_mutex_inst()
} // boost
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_SUFFIX
#endif
#endif

View File

@ -333,6 +333,7 @@ private:
bool match_char_repeat();
bool match_dot_repeat_fast();
bool match_dot_repeat_slow();
bool match_backstep();
bool backtrack_till_match(unsigned count);
// find procs stored in s_find_vtable:

View File

@ -659,6 +659,17 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
return false;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
{
std::ptrdiff_t maxlen = std::distance(search_base, position);
if(maxlen < static_cast<const re_brace*>(pstate)->index)
return false;
std::advance(position, -static_cast<const re_brace*>(pstate)->index);
pstate = pstate->next.p;
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
{
@ -737,7 +748,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
return true;
while(position != last)
{
while((position != last) && (*position != '\n'))
while((position != last) && !is_separator(*position))
++position;
if(position == last)
return false;

View File

@ -113,7 +113,7 @@ struct saved_single_repeat : public saved_state
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{
static matcher_proc_type const s_match_vtable[26] =
static matcher_proc_type const s_match_vtable[27] =
{
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -141,6 +141,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
};
push_recursion_stopper();
@ -717,8 +718,9 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
#ifdef __BORLANDC__
#pragma option push -w-8008 -w-8066 -w-8004
#endif
typedef typename traits::char_class_type mask_type;
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
const re_set_long<typename traits::char_class_type>* set = static_cast<const re_set_long<typename traits::char_class_type>*>(pstate->next.p);
const re_set_long<mask_type>* set = static_cast<const re_set_long<mask_type>*>(pstate->next.p);
std::size_t count = 0;
//
// start by working out how much we can skip:
@ -1207,6 +1209,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat(bool
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool r)
{
typedef typename traits::char_class_type mask_type;
saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state);
// if we have a match, just discard this state:
@ -1219,7 +1222,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
const re_repeat* rep = pmp->rep;
std::size_t count = pmp->count;
pstate = rep->next.p;
const re_set_long<typename traits::char_class_type>* set = static_cast<const re_set_long<typename traits::char_class_type>*>(pstate);
const re_set_long<mask_type>* set = static_cast<const re_set_long<mask_type>*>(pstate);
position = pmp->last_position;
assert(rep->type == syntax_element_long_set_rep);

View File

@ -48,7 +48,7 @@ public:
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{
static matcher_proc_type const s_match_vtable[26] =
static matcher_proc_type const s_match_vtable[27] =
{
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -76,6 +76,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
};
if(state_count > max_state_count)

View File

@ -17,6 +17,13 @@
* by the current locale.
*/
#ifndef BOOST_REGEX_PRIMARY_TRANSFORM
#define BOOST_REGEX_PRIMARY_TRANSFORM
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_PREFIX
#endif
namespace boost{
namespace re_detail{
@ -31,6 +38,12 @@ enum{
template <class S, class charT>
unsigned count_chars(const S& s, charT c)
{
//
// Count how many occurances of character c occur
// in string s: if c is a delimeter between collation
// fields, then this should be the same value for all
// sort keys:
//
unsigned int count = 0;
for(unsigned pos = 0; pos < s.size(); ++pos)
{
@ -53,20 +66,17 @@ unsigned find_sort_syntax(const traits* pt, charT* delim)
// Suppress incorrect warning for MSVC
(void)pt;
string_type a(1, (char_type)'a');
string_type sa;
pt->transform(sa, a);
char_type a[2] = {'a', '\0', };
string_type sa(pt->transform(a, a+1));
if(sa == a)
{
*delim = 0;
return sort_C;
}
string_type A(1, (char_type)'A');
string_type sA;
pt->transform(sA, A);
string_type c(1, (char_type)';');
string_type sc;
pt->transform(sc, c);
char_type A[2] = { 'A', '\0', };
string_type sA(pt->transform(A, A+1));
char_type c[2] = { ';', '\0', };
string_type sc(pt->transform(c, c+1));
int pos = 0;
while((pos <= static_cast<int>(sa.size())) && (pos <= static_cast<int>(sA.size())) && (sa[pos] == sA[pos])) ++pos;
@ -77,11 +87,11 @@ unsigned find_sort_syntax(const traits* pt, charT* delim)
return sort_unknown;
}
//
// at this point sa[pos] is either the end of a fixed with field
// at this point sa[pos] is either the end of a fixed width field
// or the character that acts as a delimiter:
//
charT maybe_delim = sa[pos];
if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(c, maybe_delim)))
if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim)))
{
*delim = maybe_delim;
return sort_delim;
@ -89,7 +99,7 @@ unsigned find_sort_syntax(const traits* pt, charT* delim)
//
// OK doen't look like a delimiter, try for fixed width field:
//
if((sa.size() == sA.size()) && (sa.size() == c.size()))
if((sa.size() == sA.size()) && (sa.size() == sc.size()))
{
// note assumes that the fixed width field is less than
// numeric_limits<charT>::max(), should be true for all types
@ -108,6 +118,11 @@ unsigned find_sort_syntax(const traits* pt, charT* delim)
} // namespace re_detail
} // namespace boost
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_SUFFIX
#endif
#endif

View File

@ -41,9 +41,6 @@
#ifndef BOOST_REGEX_FWD_HPP
#include <boost/regex_fwd.hpp>
#endif
#ifndef BOOST_REGEX_STACK_HPP
#include <boost/regex/v4/regex_stack.hpp>
#endif
#ifndef BOOST_REGEX_RAW_BUFFER_HPP
#include <boost/regex/v4/regex_raw_buffer.hpp>
#endif

View File

@ -35,6 +35,10 @@
#include <boost/regex/v4/cpp_regex_traits.hpp>
#endif
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_PREFIX
#endif
namespace boost{
template <class charT, class implementationT >
@ -45,5 +49,9 @@ struct regex_traits : public implementationT
} // namespace boost
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_SUFFIX
#endif
#endif // include

View File

@ -19,6 +19,10 @@
#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
#define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_PREFIX
#endif
namespace boost{ namespace re_detail{
BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n);
@ -77,6 +81,11 @@ inline bool is_separator(charT c)
{
return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (static_cast<int>(c) == 0x2028) || (static_cast<int>(c) == 0x2029));
}
template <>
inline bool is_separator<char>(char c)
{
return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r'));
}
//
// get a default collating element:
@ -99,7 +108,7 @@ struct character_pointer_range
}
bool operator == (const character_pointer_range& r)const
{
return (std::distance(p1, p2) == std::distance(r.p1, r.p2)) && std::equal(p1, p2, r.p1);
return ((p2 - p1) == (r.p2 - r.p1)) && std::equal(p1, p2, r.p1);
}
};
template <class charT>
@ -183,4 +192,8 @@ int parse_value(const charT*& p1, const charT* p2, const traits& traits_inst, in
} // re_detail
} // boost
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_SUFFIX
#endif
#endif

View File

@ -106,7 +106,9 @@ enum syntax_element_type
syntax_element_dot_rep = syntax_element_restart_continue + 1,
syntax_element_char_rep = syntax_element_dot_rep + 1,
syntax_element_short_set_rep = syntax_element_char_rep + 1,
syntax_element_long_set_rep = syntax_element_short_set_rep + 1
syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
// a backstep for lookbehind repeats:
syntax_element_backstep = syntax_element_long_set_rep + 1
};
#ifdef BOOST_REGEX_DEBUG

View File

@ -49,6 +49,10 @@ bad_expression::~bad_expression() throw() {}
namespace re_detail{
BOOST_REGEX_DECL void BOOST_REGEX_CALL raise_runtime_error(const std::runtime_error& ex)
{
::boost::throw_exception(ex);
}
//
// error checking API:
//

View File

@ -90,7 +90,7 @@ void scoped_static_mutex_lock::lock()
#if !defined(InterlockedCompareExchangePointer)
while(0 != InterlockedCompareExchange(reinterpret_cast<void**>((boost::uint_least16_t*)&(m_mutex.m_mutex)), (void*)1, 0))
#else
while(0 != InterlockedCompareExchange(reinterpret_cast<volatile LONG*>(&(m_mutex.m_mutex)), 1, 0))
while(0 != InterlockedCompareExchange(reinterpret_cast<LONG*>(&(m_mutex.m_mutex)), 1, 0))
#endif
{
Sleep(0);
@ -106,7 +106,7 @@ void scoped_static_mutex_lock::unlock()
#if !defined(InterlockedCompareExchangePointer)
InterlockedExchange((LONG*)&(m_mutex.m_mutex), 0);
#else
InterlockedExchange(reinterpret_cast<volatile LONG*>(&(m_mutex.m_mutex)), 0);
InterlockedExchange(reinterpret_cast<LONG*>(&(m_mutex.m_mutex)), 0);
#endif
m_have_lock = false;
}

View File

@ -1,7 +1,18 @@
#include <boost/detail/workaround.hpp>
#if BOOST_WORKAROUND(__BORLANDC__, < 0x560)
// we get unresolved externals from basic_string
// unless we do this, a well known Borland bug:
#define _RWSTD_COMPILE_INSTANTIATE
#endif
#include "test.hpp"
#ifdef BOOST_MSVC
#pragma warning(disable:4127)
#endif
void basic_tests()
{
using namespace boost::regex_constants;
@ -417,8 +428,10 @@ void test_anchors()
//
TEST_REGEX_SEARCH("^.", extended, " \n \r\n ", match_default, make_array(0, 1, -2, 3, 4, -2, 7, 8, -2, -2));
TEST_REGEX_SEARCH(".$", extended, " \n \r\n ", match_default, make_array(1, 2, -2, 4, 5, -2, 8, 9, -2, -2));
TEST_REGEX_SEARCH_W(L"^.", extended, L"\u2028 \u2028", match_default, make_array(0, 1, -2, 1, 2, -2, -2));
TEST_REGEX_SEARCH_W(L".$", extended, L" \u2028 \u2028", match_default, make_array(0, 1, -2, 2, 3, -2, 3, 4, -2, -2));
#if !BOOST_WORKAROUND(__BORLANDC__, < 0x560)
TEST_REGEX_SEARCH_W(L"^.", extended, L"\x2028 \x2028", match_default, make_array(0, 1, -2, 1, 2, -2, -2));
TEST_REGEX_SEARCH_W(L".$", extended, L" \x2028 \x2028", match_default, make_array(0, 1, -2, 2, 3, -2, 3, 4, -2, -2));
#endif
}
void test_backrefs()
@ -491,7 +504,9 @@ void test_character_escapes()
TEST_REGEX_SEARCH("a\\Q+*?\\\\Eb", perl, "a+*?\\b", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("\\C+", perl, "abcde", match_default, make_array(0, 5, -2, -2));
TEST_REGEX_SEARCH("\\X+", perl, "abcde", match_default, make_array(0, 5, -2, -2));
TEST_REGEX_SEARCH_W(L"\\X", perl, L"a\u0300\u0301", match_default, make_array(0, 3, -2, -2));
#if !BOOST_WORKAROUND(__BORLANDC__, < 0x560)
TEST_REGEX_SEARCH_W(L"\\X", perl, L"a\x0300\x0301", match_default, make_array(0, 3, -2, -2));
#endif
}
void test_assertion_escapes()
@ -667,6 +682,7 @@ void test_tricky_cases()
void test_tricky_cases2()
{
using namespace boost::regex_constants;
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFF", match_default, make_array(0, 4, 0, 4, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2));
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "35", match_default, make_array(0, 2, 0, 2, -1, -1, 0, 2, -1, -1, -1, -1, -1, -1, -2, -2));
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFu", match_default, make_array(0, 5, 0, 4, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2));
@ -679,14 +695,16 @@ void test_tricky_cases2()
// posix only:
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", awk, "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);", match_default, make_array(0, 53, 28, 42, -2, -2));
// now try and test some unicode specific characters:
TEST_REGEX_SEARCH_W(L"[[:unicode:]]+", perl, L"a\u0300\u0400z", match_default, make_array(1, 3, -2, -2));
TEST_REGEX_SEARCH_W(L"[\x10-\xff]", perl, L"\u0300\u0400", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH_W(L"[\01-\05]{5}", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH_W(L"[\x300-\x400]+", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH_W(L"[\\x{300}-\\x{400}]+", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH_W(L"\\x{300}\\x{400}+", perl, L"\u0300\u0400\u0400\u0400\u0400\u0400", match_default, make_array(0, 6, -2, -2));
#if !BOOST_WORKAROUND(__BORLANDC__, < 0x560)
TEST_REGEX_SEARCH_W(L"[[:unicode:]]+", perl, L"a\x0300\x0400z", match_default, make_array(1, 3, -2, -2));
TEST_REGEX_SEARCH_W(L"[\x10-\xff]", perl, L"\x0300\x0400", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH_W(L"[\01-\05]{5}", perl, L"\x0300\x0400\x0300\x0400\x0300\x0400", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH_W(L"[\x300-\x400]+", perl, L"\x0300\x0400\x0300\x0400\x0300\x0400", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH_W(L"[\\x{300}-\\x{400}]+", perl, L"\x0300\x0400\x0300\x0400\x0300\x0400", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH_W(L"\\x{300}\\x{400}+", perl, L"\x0300\x0400\x0400\x0400\x0400\x0400", match_default, make_array(0, 6, -2, -2));
#endif
// finally try some case insensitive matches:
TEST_REGEX_SEARCH("0123456789@abcdefghijklmnopqrstuvwxyz\\[\\\\\\]\\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\\{\\|\\}", perl|icase, "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}", match_default, make_array(0, 72, -2, -2));
TEST_REGEX_SEARCH("0123456789@abcdefghijklmnopqrstuvwxyz\\[\\\\\\]\\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\\{\\|\\}", perl|icase, "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}", match_default, make_array(0, 72, -2, -2));
TEST_REGEX_SEARCH("a", perl|icase, "A", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("A", perl|icase, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[abc]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));
@ -763,6 +781,9 @@ void test_tricky_cases2()
TEST_REGEX_SEARCH("()\\1", perl, "a", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a()\\1b", perl, "ab", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a()b\\1", perl, "ab", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("([a-c]+)\\1", perl, "abcbc", match_default, make_array(1, 5, 1, 3, -2, -2));
TEST_REGEX_SEARCH(".+abc", perl, "xxxxxxxxyyyyyyyyab", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(.+)\\1", perl, "abcdxxxyyyxxxyyy", match_default, make_array(4, 16, 4, 10, -2, -2));
//
// the strings in the next test case are too long for most compilers to cope with,
@ -806,7 +827,7 @@ void test_tricky_cases2()
make_array(753, 1076, 934, 1005, -2, 2143, 2466, 2324, 2395, -2, -2));
test(char(0), test_regex_search_tag());
}while(0);
#ifndef BOOST_NO_WREGEX
#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
do{
std::string st(big_text);
test_info<wchar_t>::set_info(__FILE__, __LINE__,
@ -997,6 +1018,18 @@ void test_forward_lookahead_asserts()
TEST_REGEX_SEARCH("^(?=.*\\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$", perl, "abc3", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?=.*\\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$", perl, "abC3", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("^(?=.*\\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$", perl, "ABCD3", match_default, make_array(-2, -2));
// lookbehind assertions, added 2004-04-30
TEST_REGEX_SEARCH("/\\*.*(?<=\\*)/", perl, "/**/", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("/\\*.*(?<=\\*)/", perl, "/*****/ ", match_default, make_array(0, 7, -2, -2));
TEST_REGEX_SEARCH("(?<=['\"]).*?(?=['\"])", perl, " 'ac' ", match_default, make_array(2, 4, -2, -2));
TEST_REGEX_SEARCH("(?<=['\"]).*?(?=['\"])", perl, " \"ac\" ", match_default, make_array(2, 4, -2, -2));
TEST_REGEX_SEARCH("(?<=['\"]).*?(?<!\\\\)(?=['\"])", perl, " \"ac\" ", match_default, make_array(2, 4, -2, -2));
TEST_REGEX_SEARCH("(?<=['\"]).*?(?<!\\\\)(?=['\"])", perl, " \"ac\\\"\" ", match_default, make_array(2, 6, -2, -2));
TEST_INVALID_REGEX("(?<=[abc]", perl);
TEST_INVALID_REGEX("(?<=", perl);
TEST_INVALID_REGEX("(?<", perl);
TEST_INVALID_REGEX("(?", perl);
}
void test_fast_repeats()
@ -1178,105 +1211,109 @@ void test_fast_repeats()
TEST_REGEX_SEARCH("ab[_[.ae.]]{2,5}?xy", perl, "ab______xy", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("ab[_[.ae.]]{2,5}xy", perl, "ab_xy", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("([5[.ae.]]*?).somesite", perl, "//555.somesite", match_default, make_array(2, 14, 2, 5, -2, -2));
//TEST_REGEX_SEARCH("", perl, "", match_default, make_array(-2, -2));
#if 0
- normal REG_PERL
; new (?: construct )
(?>^abc) abc 0 3
(?>^abc) def\nabc 4 7
(?>^abc) defabc -1 -1
(?>.*/)foo /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ -1 -1
(?>.*/)foo /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo 0 67
(?>(\.\d\d[1-9]?))\d+ 1.230003938 1 11 1 4
(?>(\.\d\d[1-9]?))\d+ 1.875000282 1 11 1 5
(?>(\.\d\d[1-9]?))\d+ 1.235 -1 -1
^((?>\w+)|(?>\s+))*$ "now is the time for all good men to come to the aid of the party" 0 64 59 64
^((?>\w+)|(?>\s+))*$ "this is not a line with only words and spaces!" -1 -1
((?>\d+))(\w) 12345a 0 6 0 5 5 6
((?>\d+))(\w) 12345+ -1 -1
((?>\d+))(\d) 12345 -1 -1
(?>a+)b aaab 0 4
((?>a+)b) aaab 0 4 0 4
(?>(a+))b aaab 0 4 0 3
(?>b)+ aaabbbccc 3 6
(?>a+|b+|c+)*c aaabbbbccccd 0 8
((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x 2 18 17 18
\(((?>[^()]+)|\([^()]+\))+\) (abc) 0 5 1 4
\(((?>[^()]+)|\([^()]+\))+\) (abc(def)xyz) 0 13 9 12
\(((?>[^()]+)|\([^()]+\))+\) ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -1 -1
(?>a*)* a 0 1
(?>a*)* aa 0 2
(?>a*)* aaaa 0 4
(?>a*)* a 0 1
(?>a*)* aaabcde 0 3
((?>a*))* aaaaa 0 5 5 5
((?>a*))* aabbaa 0 2 2 2
((?>a*?))* aaaaa 0 0 0 0
((?>a*?))* aabbaa 0 0 0 0
"word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword" "word cat dog elephant mussel cow horse canary baboon snake shark otherword" 0 74
"word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword" "word cat dog elephant mussel cow horse canary baboon snake shark" -1 -1
"word (?>[a-zA-Z0-9]+ ){0,30}otherword" "word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope" -1 -1
"word (?>[a-zA-Z0-9]+ ){0,30}otherword" "word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I really really hope otherword" -1 -1
((?>Z)+|A)* ZABCDEFG 0 2 1 2
((?>)+|A)* !
; subtleties of matching with no sub-expressions marked
- normal match_nosubs REG_NO_POSIX_TEST
a(b?c)+d accd 0 4
(wee|week)(knights|night) weeknights 0 10
.* abc 0 3
a(b|(c))d abd 0 3
a(b|(c))d acd 0 3
a(b*|c|e)d abbd 0 4
a(b*|c|e)d acd 0 3
a(b*|c|e)d ad 0 2
a(b?)c abc 0 3
a(b?)c ac 0 2
a(b+)c abc 0 3
a(b+)c abbbc 0 5
a(b*)c ac 0 2
(a|ab)(bc([de]+)f|cde) abcdef 0 6
a([bc]?)c abc 0 3
a([bc]?)c ac 0 2
a([bc]+)c abc 0 3
a([bc]+)c abcc 0 4
a([bc]+)bc abcbc 0 5
a(bb+|b)b abb 0 3
a(bbb+|bb+|b)b abb 0 3
a(bbb+|bb+|b)b abbb 0 4
a(bbb+|bb+|b)bb abbb 0 4
(.*).* abcdef 0 6
(a*)* bc 0 0
- normal nosubs REG_NO_POSIX_TEST
a(b?c)+d accd 0 4
(wee|week)(knights|night) weeknights 0 10
.* abc 0 3
a(b|(c))d abd 0 3
a(b|(c))d acd 0 3
a(b*|c|e)d abbd 0 4
a(b*|c|e)d acd 0 3
a(b*|c|e)d ad 0 2
a(b?)c abc 0 3
a(b?)c ac 0 2
a(b+)c abc 0 3
a(b+)c abbbc 0 5
a(b*)c ac 0 2
(a|ab)(bc([de]+)f|cde) abcdef 0 6
a([bc]?)c abc 0 3
a([bc]?)c ac 0 2
a([bc]+)c abc 0 3
a([bc]+)c abcc 0 4
a([bc]+)bc abcbc 0 5
a(bb+|b)b abb 0 3
a(bbb+|bb+|b)b abb 0 3
a(bbb+|bb+|b)b abbb 0 4
a(bbb+|bb+|b)bb abbb 0 4
(.*).* abcdef 0 6
(a*)* bc 0 0
#endif
}
void test_independent_subs()
{
using namespace boost::regex_constants;
TEST_REGEX_SEARCH("(?>^abc)", perl, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("(?>^abc)", perl, "def\nabc", match_default, make_array(4, 7, -2, -2));
TEST_REGEX_SEARCH("(?>^abc)", perl, "defabc", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?>.*/)foo", perl, "/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?>.*/)foo", perl, "/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo", match_default, make_array(0, 67, -2, -2));
TEST_REGEX_SEARCH("(?>(\\.\\d\\d[1-9]?))\\d+", perl, "1.230003938", match_default, make_array(1, 11, 1, 4, -2, -2));
TEST_REGEX_SEARCH("(?>(\\.\\d\\d[1-9]?))\\d+", perl, "1.875000282", match_default, make_array(1, 11, 1, 5, -2, -2));
TEST_REGEX_SEARCH("(?>(\\.\\d\\d[1-9]?))\\d+", perl, "1.235", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^((?>\\w+)|(?>\\s+))*$", perl, "now is the time for all good men to come to the aid of the party", match_default, make_array(0, 64, 59, 64, -2, -2));
TEST_REGEX_SEARCH("^((?>\\w+)|(?>\\s+))*$", perl, "this is not a line with only words and spaces!", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("((?>\\d+))(\\w)", perl, "12345a", match_default, make_array(0, 6, 0, 5, 5, 6, -2, -2));
TEST_REGEX_SEARCH("((?>\\d+))(\\w)", perl, "12345+", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("((?>\\d+))(\\d)", perl, "12345", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?>a+)b", perl, "aaab", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("((?>a+)b)", perl, "aaab", match_default, make_array(0, 4, 0, 4, -2, -2));
TEST_REGEX_SEARCH("(?>(a+))b", perl, "aaab", match_default, make_array(0, 4, 0, 3, -2, -2));
TEST_REGEX_SEARCH("(?>b)+", perl, "aaabbbccc", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("(?>a+|b+|c+)*c", perl, "aaabbbbccccd", match_default, make_array(0, 8, -2, 8, 9, -2, 9, 10, -2, 10, 11, -2, -2));
TEST_REGEX_SEARCH("((?>[^()]+)|\\([^()]*\\))+", perl, "((abc(ade)ufh()()x", match_default, make_array(2, 18, 17, 18, -2, -2));
TEST_REGEX_SEARCH("\\(((?>[^()]+)|\\([^()]+\\))+\\)", perl, "(abc)", match_default, make_array(0, 5, 1, 4, -2, -2));
TEST_REGEX_SEARCH("\\(((?>[^()]+)|\\([^()]+\\))+\\)", perl, "(abc(def)xyz)", match_default, make_array(0, 13, 9, 12, -2, -2));
TEST_REGEX_SEARCH("\\(((?>[^()]+)|\\([^()]+\\))+\\)", perl, "((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?>a*)*", perl, "a", match_default, make_array(0, 1, -2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("(?>a*)*", perl, "aa", match_default, make_array(0, 2, -2, 2, 2, -2, -2));
TEST_REGEX_SEARCH("(?>a*)*", perl, "aaaa", match_default, make_array(0, 4, -2, 4, 4, -2, -2));
TEST_REGEX_SEARCH("(?>a*)*", perl, "a", match_default, make_array(0, 1, -2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("(?>a*)*", perl, "aaabcde", match_default, make_array(0, 3, -2, 3, 3, -2, 4, 4, -2, 5, 5, -2, 6, 6, -2, 7, 7, -2, -2));
TEST_REGEX_SEARCH("((?>a*))*", perl, "aaaaa", match_default, make_array(0, 5, 5, 5, -2, 5, 5, 5, 5, -2, -2));
TEST_REGEX_SEARCH("((?>a*))*", perl, "aabbaa", match_default, make_array(0, 2, 2, 2, -2, 2, 2, 2, 2, -2, 3, 3, 3, 3, -2, 4, 6, 6, 6, -2, 6, 6, 6, 6, -2, -2));
TEST_REGEX_SEARCH("((?>a*?))*", perl, "aaaaa", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 2, 2, 2, 2, -2, 3, 3, 3, 3, -2, 4, 4, 4, 4, -2, 5, 5, 5, 5, -2, -2));
TEST_REGEX_SEARCH("((?>a*?))*", perl, "aabbaa", match_default, make_array(0, 0, 0, 0, -2, 1, 1, 1, 1, -2, 2, 2, 2, 2, -2, 3, 3, 3, 3, -2, 4, 4, 4, 4, -2, 5, 5, 5, 5, -2, 6, 6, 6, 6, -2, -2));
TEST_REGEX_SEARCH("word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword", perl, "word cat dog elephant mussel cow horse canary baboon snake shark otherword", match_default, make_array(0, 74, -2, -2));
TEST_REGEX_SEARCH("word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword", perl, "word cat dog elephant mussel cow horse canary baboon snake shark", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("word (?>[a-zA-Z0-9]+ ){0,30}otherword", perl, "word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("word (?>[a-zA-Z0-9]+ ){0,30}otherword", perl, "word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I really really hope otherword", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("((?>Z)+|A)+", perl, "ZABCDEFG", match_default, make_array(0, 2, 1, 2, -2, -2));
TEST_INVALID_REGEX("((?>)+|A)+", perl);
}
void test_nosubs()
{
using namespace boost::regex_constants;
// subtleties of matching with no sub-expressions marked
TEST_REGEX_SEARCH("a(b?c)+d", perl, "accd", match_default|match_nosubs, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("(wee|week)(knights|night)", perl, "weeknights", match_default|match_nosubs, make_array(0, 10, -2, -2));
TEST_REGEX_SEARCH(".*", perl, "abc", match_default|match_nosubs, make_array(0, 3, -2, 3, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", perl, "abd", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", perl, "acd", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "abbd", match_default|match_nosubs, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "acd", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "ad", match_default|match_nosubs, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a(b?)c", perl, "abc", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b?)c", perl, "ac", match_default|match_nosubs, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a(b+)c", perl, "abc", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b+)c", perl, "abbbc", match_default|match_nosubs, make_array(0, 5, -2, -2));
TEST_REGEX_SEARCH("a(b*)c", perl, "ac", match_default|match_nosubs, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("(a|ab)(bc([de]+)f|cde)", perl, "abcdef", match_default|match_nosubs, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("a([bc]?)c", perl, "abc", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a([bc]?)c", perl, "ac", match_default|match_nosubs, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)c", perl, "abc", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)c", perl, "abcc", match_default|match_nosubs, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)bc", perl, "abcbc", match_default|match_nosubs, make_array(0, 5, -2, -2));
TEST_REGEX_SEARCH("a(bb+|b)b", perl, "abb", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl, "abb", match_default|match_nosubs, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl, "abbb", match_default|match_nosubs, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl, "abbb", match_default|match_nosubs, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("(.*).*", perl, "abcdef", match_default|match_nosubs, make_array(0, 6, -2, 6, 6, -2, -2));
TEST_REGEX_SEARCH("(a*)*", perl, "bc", match_default|match_nosubs, make_array(0, 0, -2, 1, 1, -2, 2, 2, -2, -2));
TEST_REGEX_SEARCH("a(b?c)+d", perl|nosubs, "accd", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("(wee|week)(knights|night)", perl|nosubs, "weeknights", match_default, make_array(0, 10, -2, -2));
TEST_REGEX_SEARCH(".*", perl|nosubs, "abc", match_default, make_array(0, 3, -2, 3, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", perl|nosubs, "abd", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", perl|nosubs, "acd", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl|nosubs, "abbd", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl|nosubs, "acd", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl|nosubs, "ad", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a(b?)c", perl|nosubs, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b?)c", perl|nosubs, "ac", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a(b+)c", perl|nosubs, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b+)c", perl|nosubs, "abbbc", match_default, make_array(0, 5, -2, -2));
TEST_REGEX_SEARCH("a(b*)c", perl|nosubs, "ac", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("(a|ab)(bc([de]+)f|cde)", perl|nosubs, "abcdef", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("a([bc]?)c", perl|nosubs, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a([bc]?)c", perl|nosubs, "ac", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)c", perl|nosubs, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)c", perl|nosubs, "abcc", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)bc", perl|nosubs, "abcbc", match_default, make_array(0, 5, -2, -2));
TEST_REGEX_SEARCH("a(bb+|b)b", perl|nosubs, "abb", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl|nosubs, "abb", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl|nosubs, "abbb", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl|nosubs, "abbb", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("(.*).*", perl|nosubs, "abcdef", match_default, make_array(0, 6, -2, 6, 6, -2, -2));
TEST_REGEX_SEARCH("(a*)*", perl|nosubs, "bc", match_default, make_array(0, 0, -2, 1, 1, -2, 2, 2, -2, -2));
}

View File

@ -11,7 +11,9 @@
template <class charT>
class test_info
{
public:
typedef std::basic_string<charT> string_type;
private:
struct data_type
{
std::string file;

View File

@ -22,6 +22,8 @@ int cpp_main(int argc, char * argv[])
test_partial_match();
test_forward_lookahead_asserts();
test_fast_repeats();
test_independent_subs();
test_nosubs();
return error_count;
}

View File

@ -140,5 +140,7 @@ void test_partial_match();
void test_forward_lookahead_asserts();
void test_fast_repeats();
void test_tricky_cases2();
void test_independent_subs();
void test_nosubs();
#endif

View File

@ -0,0 +1,54 @@
#ifndef BOOST_REGEX_REGRESS_REGEX_REPLACE_HPP
#define BOOST_REGEX_REGRESS_REGEX_REPLACE_HPP
#include "info.hpp"
template<class charT, class traits>
void test_regex_replace(boost::basic_regex<charT, traits>& r)
{
typedef std::basic_string<charT> string_type;
const string_type& search_text = test_info<charT>::search_text();
boost::regex_constants::match_flag_type opts = test_info<charT>::match_options();
const string_type& format_string = test_info<charT>::format_string();
const string_type& result_string = test_info<charT>::result_string();
string_type result = boost::regex_replace(search_text, r, format_string, opts);
if(result != result_string)
{
BOOST_REGEX_TEST_ERROR("regex_replace generated an incorrect string result", charT);
}
}
struct test_regex_replace_tag{};
template<class charT, class traits>
void test(boost::basic_regex<charT, traits>& r, const test_regex_replace_tag&)
{
const std::basic_string<charT>& expression = test_info<charT>::expression();
boost::regex_constants::syntax_option_type syntax_options = test_info<charT>::syntax_options();
try{
r.assign(expression, syntax_options);
test_regex_replace(r);
}
catch(const boost::bad_expression& e)
{
BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done: " << e.what(), charT);
}
catch(const std::runtime_error& r)
{
BOOST_REGEX_TEST_ERROR("Received an unexpected std::runtime_error: " << r.what(), charT);
}
catch(const std::exception& r)
{
BOOST_REGEX_TEST_ERROR("Received an unexpected std::exception: " << r.what(), charT);
}
catch(...)
{
BOOST_REGEX_TEST_ERROR("Received an unexpected exception of unknown type", charT);
}
}
#endif

View File

@ -134,7 +134,7 @@ void test_regex_grep(boost::basic_regex<charT, traits>& r)
boost::regex_constants::match_flag_type opts = test_info<charT>::match_options();
const int* answer_table = test_info<charT>::answer_table();
grep_test_predicate<charT, traits> pred(search_text.begin(), answer_table);
boost::regex_grep(pred, search_text, r, opts);
boost::regex_grep(pred, search_text.begin(), search_text.end(), r, opts);
}
template<class charT, class traits>
@ -158,7 +158,7 @@ void test_regex_match(boost::basic_regex<charT, traits>& r)
{
BOOST_REGEX_TEST_ERROR("boost::regex_match found a match when it should not have done so.", charT);
}
else if((answer_table[0] == 0) && (answer_table[1] == search_text.size()))
else if((answer_table[0] == 0) && (answer_table[1] == static_cast<int>(search_text.size())))
{
if(boost::regex_match(
search_text.begin(),