Almost complete POSIX regex support now...

[SVN r22624]
This commit is contained in:
John Maddock
2004-04-09 16:04:34 +00:00
parent 38b58f2007
commit 50b5391c8f
21 changed files with 1446 additions and 605 deletions

View File

@ -59,7 +59,7 @@ lib boost_regex : ../src/$(SOURCES) <template>regex-options
;
dll boost_regex : ../src/$(SOURCES).cpp <template>regex-dll-options
dll boost_regex : ../src/$(SOURCES) <template>regex-dll-options
:
common-variant-tag
:

View File

@ -60,6 +60,7 @@
# include <boost/throw_exception.hpp>
# include <boost/scoped_ptr.hpp>
# include <boost/shared_ptr.hpp>
# include <boost/mpl/bool_fwd.hpp>
# ifndef BOOST_NO_STD_LOCALE
# include <locale>
# endif

View File

@ -154,6 +154,11 @@ public:
{
return this->m_can_be_null;
}
const regex_data<charT, traits>& get_data()const
{
basic_regex_implementation<charT, traits> const* p = this;
return *static_cast<const regex_data<charT, traits>*>(p);
}
};
} // namespace re_detail
@ -470,6 +475,11 @@ public:
assert(m_pimpl.get());
return m_pimpl->can_be_null();
}
const re_detail::regex_data<charT, traits>& get_data()const
{
assert(m_pimpl.get());
return m_pimpl->get_data();
}
private:
shared_ptr<re_detail::basic_regex_implementation<charT, traits> > m_pimpl;

View File

@ -28,6 +28,106 @@ namespace boost{
namespace re_detail{
template <class charT>
struct digraph : public std::pair<charT, charT>
{
digraph(charT c1 = 0, charT c2 = 0) : std::pair<charT, charT>(c1, c2){}
digraph(const std::basic_string<charT>& s) : std::pair<charT, charT>()
{
BOOST_ASSERT(s.size() <= 2);
BOOST_ASSERT(s.size());
this->first = s[0];
this->second = (s.size() > 1) ? s[1] : 0;
}
};
template <class charT, class traits>
class basic_char_set
{
public:
typedef digraph<charT> digraph_type;
typedef std::basic_string<charT> string_type;
typedef typename traits::char_class_type mask_type;
basic_char_set()
{
m_negate = false;
m_has_digraphs = false;
m_classes = 0;
m_empty = true;
}
void add_single(const digraph_type& s)
{
m_singles.push_back(s);
if(s.second)
m_has_digraphs = true;
m_empty = false;
}
void add_range(const digraph_type& first, const digraph_type& end)
{
m_ranges.push_back(first);
m_ranges.push_back(end);
if(first.second || end.second)
m_has_digraphs = true;
m_empty = false;
}
void add_class(mask_type m)
{
m_classes |= m;
m_empty = false;
}
void negate()
{
m_negate = true;
m_empty = false;
}
//
// accessor functions:
//
bool has_digraphs()const
{
return m_has_digraphs;
}
bool is_negated()const
{
return m_negate;
}
typedef typename std::vector<digraph_type>::const_iterator list_iterator;
list_iterator singles_begin()const
{
return m_singles.begin();
}
list_iterator singles_end()const
{
return m_singles.end();
}
list_iterator ranges_begin()const
{
return m_ranges.begin();
}
list_iterator ranges_end()const
{
return m_ranges.end();
}
mask_type classes()const
{
return m_classes;
}
bool empty()const
{
return m_empty;
}
private:
std::vector<digraph_type> m_singles; // a list of single characters to match
std::vector<digraph_type> m_ranges; // a list of end points of our ranges
bool m_negate; // true if the set is to be negated
bool m_has_digraphs; // true if we have digraphs present
mask_type m_classes; // character classes to match
bool m_empty; // whether we've added anything yet
};
template <class charT, class traits>
class basic_regex_creator
{
@ -54,38 +154,63 @@ public:
m_pdata->m_flags = flags;
m_icase = flags & regex_constants::icase;
}
regbase::flag_type flags()
{
return m_pdata->m_flags;
}
re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
re_literal* append_literal(charT c);
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*);
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*);
void finalize(const charT* p1, const charT* p2);
protected:
regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
const traits& m_traits; // convenience reference to traits class
re_syntax_base* m_last_state;// the last state we added
bool m_icase; // true for case insensitive matches
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
const traits& m_traits; // convenience reference to traits class
re_syntax_base* m_last_state; // the last state we added
bool m_icase; // true for case insensitive matches
unsigned m_repeater_id; // the id of the next repeater
unsigned m_backrefs; // bitmask of permitted backrefs
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character
typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character
private:
basic_regex_creator& operator=(const basic_regex_creator&);
basic_regex_creator(const basic_regex_creator&);
void fixup_pointers(re_syntax_base* state);
void create_startmaps(re_syntax_base* state);
void create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask, re_syntax_base* terminal);
void create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask);
unsigned get_restart_type(re_syntax_base* state);
void set_all_masks(unsigned char* bits, unsigned char);
bool is_bad_repeat(re_syntax_base* pt);
void set_bad_repeat(re_syntax_base* pt);
};
template <class charT, class traits>
basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
: m_pdata(data), m_traits(data->m_traits), m_last_state(0)
: m_pdata(data), m_traits(data->m_traits), m_last_state(0), m_repeater_id(0), m_backrefs(0)
{
m_pdata->m_data.clear();
static const charT w = 'w';
static const charT s = 's';
static const charT l[] = { 'l', 'o', 'w', 'e', 'r', };
static const charT u[] = { 'u', 'p', 'p', 'e', 'r', };
static const charT a[] = { 'a', 'l', 'p', 'h', 'a', };
m_word_mask = m_traits.lookup_classname(&w, &w +1);
m_mask_space = m_traits.lookup_classname(&s, &s +1);
m_lower_mask = m_traits.lookup_classname(l, l + 5);
m_upper_mask = m_traits.lookup_classname(u, u + 5);
m_alpha_mask = m_traits.lookup_classname(a, a + 5);
BOOST_ASSERT(m_word_mask);
BOOST_ASSERT(m_mask_space);
BOOST_ASSERT(m_lower_mask);
BOOST_ASSERT(m_upper_mask);
BOOST_ASSERT(m_alpha_mask);
}
template <class charT, class traits>
@ -148,6 +273,213 @@ re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
return result;
}
template <class charT, class traits>
inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
const basic_char_set<charT, traits>& char_set)
{
typedef mpl::bool_<sizeof(charT) == 1> truth_type;
return char_set.has_digraphs()
? append_set(char_set, static_cast<mpl::false_*>(0))
: append_set(char_set, static_cast<truth_type*>(0));
}
template <class charT, class traits>
re_syntax_base* basic_regex_creator<charT, traits>::append_set(
const basic_char_set<charT, traits>& char_set, mpl::false_*)
{
typedef std::basic_string<charT> string_type;
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
typedef typename traits::char_class_type mask_type;
re_set_long<mask_type>* result = static_cast<re_set_long<mask_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<mask_type>)));
//
// fill in the basics:
//
result->csingles = static_cast<unsigned int>(std::distance(char_set.singles_begin(), char_set.singles_end()));
result->cranges = static_cast<unsigned int>(std::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
result->cequivalents = 0;
result->cclasses = char_set.classes();
if(flags() & regbase::icase)
{
// adjust classes as needed:
if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
result->cclasses |= m_alpha_mask;
}
result->isnot = char_set.is_negated();
result->singleton = !char_set.has_digraphs();
//
// remember where the state is for later:
//
std::ptrdiff_t offset = getoffset(result);
//
// now extend with all the singles:
//
item_iterator first, last;
first = char_set.singles_begin();
last = char_set.singles_end();
while(first != last)
{
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
p[0] = m_traits.translate(first->first, m_icase);
if(first->second)
{
p[1] = m_traits.translate(first->second, m_icase);
p[2] = 0;
}
else
p[1] = 0;
++first;
}
//
// now extend with all the ranges:
//
first = char_set.ranges_begin();
last = char_set.ranges_end();
while(first != last)
{
// first grab the endpoints of the range:
digraph<charT> c1 = *first;
c1.first = this->m_traits.translate(c1.first, this->m_icase);
c1.second = this->m_traits.translate(c1.second, this->m_icase);
++first;
digraph<charT> c2 = *first;
c2.first = this->m_traits.translate(c2.first, this->m_icase);
c2.second = this->m_traits.translate(c2.second, this->m_icase);
++first;
string_type s1, s2;
// different actions now depending upon whether collation is turned on:
if(flags() & regex_constants::collate)
{
// we need to transform our range into sort keys:
s1 = this->m_traits.transform(&c1.first, (c1.second ? &c1.second +1 : &c1.second));
s2 = this->m_traits.transform(&c2.first, (c2.second ? &c2.second +1 : &c2.second));
}
else
{
if(c1.second)
s1 = string_type(&c1.first, &c1.second+1);
else
s1 = string_type(1, c1.first);
if(c2.second)
s2 = string_type(&c2.first, &c2.second+1);
else
s2 = string_type(1, c2.first);
}
if(s1 > s2)
{
// Oops error:
return 0;
}
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
std::memcpy(p, s1.c_str(), sizeof(charT) * (s1.size() + 1));
p += s1.size() + 1;
std::memcpy(p, s2.c_str(), sizeof(charT) * (s2.size() + 1));
}
//
// finally reset the address of our last state:
//
m_last_state = result = static_cast<re_set_long<mask_type>*>(getaddress(offset));
return result;
}
template <class charT, class traits>
re_syntax_base* basic_regex_creator<charT, traits>::append_set(
const basic_char_set<charT, traits>& char_set, mpl::true_*)
{
typedef std::basic_string<charT> string_type;
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
bool negate = char_set.is_negated();
std::memset(result->_map, 0, sizeof(result->_map));
//
// handle singles first:
//
item_iterator first, last;
first = char_set.singles_begin();
last = char_set.singles_end();
while(first != last)
{
for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
{
if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
== this->m_traits.translate(first->first, this->m_icase))
result->_map[i] = true;
}
++first;
}
//
// OK now handle ranges:
//
first = char_set.ranges_begin();
last = char_set.ranges_end();
while(first != last)
{
// first grab the endpoints of the range:
charT c1 = this->m_traits.translate(first->first, this->m_icase);
++first;
charT c2 = this->m_traits.translate(first->first, this->m_icase);
++first;
// different actions now depending upon whether collation is turned on:
if(flags() & regex_constants::collate)
{
// we need to transform our range into sort keys:
string_type s1 = this->m_traits.transform(&c1, &c1 +1);
string_type s2 = this->m_traits.transform(&c2, &c2 +1);
if(s1 > s2)
{
// Oops error:
return 0;
}
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
charT c3 = static_cast<charT>(i);
string_type s3 = this->m_traits.transform(&c3, &c3 +1);
if((s1 <= s3) && (s3 <= s2))
result->_map[i] = true;
}
}
else
{
if(c1 > c2)
{
// Oops error:
return 0;
}
// everything in range matches:
std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
}
}
//
// and now the classes:
//
typedef typename traits::char_class_type mask_type;
mask_type m = char_set.classes();
if(flags() & regbase::icase)
{
// adjust m as needed:
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
m |= m_alpha_mask;
}
if(m != 0)
{
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
if(this->m_traits.is_class(static_cast<charT>(i), m))
result->_map[i] = true;
}
}
if(negate)
{
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
result->_map[i] = !(result->_map[i]);
}
}
return result;
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
{
@ -174,7 +506,8 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
m_pdata->m_can_be_null = 0;
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all, 0);
m_bad_repeats = 0;
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
// get the restart type:
m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
}
@ -186,12 +519,15 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
{
switch(state->type)
{
case syntax_element_alt:
case syntax_element_rep:
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_long_set_rep:
// set the id of this repeat:
static_cast<re_repeat*>(state)->id = m_repeater_id++;
// fall through:
case syntax_element_alt:
std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
static_cast<re_alt*>(state)->can_be_null = 0;
// fall through:
@ -227,8 +563,10 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
// create other startmaps *first*, since we can use the
// results from these when creating out own:
create_startmaps(state->next.p);
create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take, state);
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip, state);
m_bad_repeats = 0;
create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
m_bad_repeats = 0;
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
return;
default:
state = state->next.p;
@ -237,9 +575,10 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask, re_syntax_base* terminal)
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask)
{
while(state && (state != terminal))
int not_last_jump = 1;
while(state)
{
switch(state->type)
{
@ -270,34 +609,20 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
}
// now figure out if we can match a NULL string at this point:
if(pnull)
create_startmap(state->next.p, 0, pnull, mask, terminal);
create_startmap(state->next.p, 0, pnull, mask);
return;
}
case syntax_element_backref:
case syntax_element_wild:
{
// can't be null, any character can match:
if(map)
{
map[0] |= mask_init;
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
map[i] |= mask;
}
}
set_all_masks(map, mask);
return;
}
case syntax_element_match:
{
// must be null, any character can match:
if(map)
{
map[0] |= mask_init;
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
map[i] |= mask;
}
}
set_all_masks(map, mask);
if(pnull)
*pnull |= mask;
return;
@ -305,7 +630,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
case syntax_element_word_start:
{
// recurse, then AND with all the word characters:
create_startmap(state->next.p, map, pnull, mask, terminal);
create_startmap(state->next.p, map, pnull, mask);
if(map)
{
map[0] |= mask_init;
@ -320,7 +645,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
case syntax_element_word_end:
{
// recurse, then AND with all the word characters:
create_startmap(state->next.p, map, pnull, mask, terminal);
create_startmap(state->next.p, map, pnull, mask);
if(map)
{
map[0] |= mask_init;
@ -340,13 +665,35 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
return;
}
case syntax_element_long_set:
assert(0);
if(map)
{
typedef typename traits::char_class_type mask_type;
map[0] |= mask_init;
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
charT c = static_cast<charT>(i);
if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata))
map[i] |= mask;
}
}
return;
case syntax_element_set:
assert(0);
if(map)
{
map[0] |= mask_init;
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
{
if(static_cast<re_set*>(state)->_map[
static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), this->m_icase))])
map[i] |= mask;
}
}
return;
case syntax_element_jump:
// take the jump:
state = static_cast<re_alt*>(state)->alt.p;
break;;
not_last_jump = -1;
break;
case syntax_element_alt:
case syntax_element_rep:
case syntax_element_dot_rep:
@ -360,6 +707,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
if(map)
{
// copy previous results:
map[0] |= mask_init;
for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
{
if(rep->_map[i] & mask_any)
@ -376,8 +724,17 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
{
// we haven't created a startmap for this alternative yet
// so take the union of the two options:
create_startmap(state->next.p, map, pnull, mask, state);
create_startmap(rep->alt.p, map, pnull, mask, state);
if(is_bad_repeat(state))
{
set_all_masks(map, mask);
return;
}
set_bad_repeat(state);
create_startmap(state->next.p, map, pnull, mask);
if((state->type == syntax_element_alt)
|| (static_cast<re_repeat*>(state)->min == 0)
|| (not_last_jump == 0))
create_startmap(rep->alt.p, map, pnull, mask);
}
}
return;
@ -395,6 +752,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
default:
state = state->next.p;
}
++not_last_jump;
}
}
@ -416,8 +774,9 @@ unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* st
return regbase::restart_line;
case syntax_element_word_boundary:
case syntax_element_word_start:
return regbase::restart_line;
return regbase::restart_word;
case syntax_element_buffer_start:
case syntax_element_restart_continue:
return regbase::restart_continue;
default:
state = 0;
@ -427,6 +786,68 @@ unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* st
return regbase::restart_any;
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
{
//
// set mask in all of bits elements,
// if bits[0] has mask_init not set then we can
// optimise this to a call to memset:
//
if(bits)
{
if(bits[0] == 0)
(std::memset)(bits, mask, 1u << CHAR_BIT);
else
{
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
bits[i] |= mask;
}
bits[0] |= mask_init;
}
}
template <class charT, class traits>
bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
{
switch(pt->type)
{
case syntax_element_rep:
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_long_set_rep:
{
unsigned id = static_cast<re_repeat*>(pt)->id;
if(id > sizeof(m_bad_repeats) * CHAR_BIT)
return true; // run out of bits, assume we can't traverse this one.
return m_bad_repeats & (1u << id);
}
default:
return false;
}
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
{
switch(pt->type)
{
case syntax_element_rep:
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_long_set_rep:
{
unsigned id = static_cast<re_repeat*>(pt)->id;
if(id <= sizeof(m_bad_repeats) * CHAR_BIT)
m_bad_repeats |= (1u << id);
}
default:
break;
}
}
} // namespace re_detail
} // namespace boost

View File

@ -45,6 +45,12 @@ public:
bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
bool parse_repeat_range(bool isbasic);
bool parse_alt();
bool parse_set();
bool parse_backref();
void parse_set_literal(basic_char_set<charT, traits>& char_set);
bool parse_inner_set(basic_char_set<charT, traits>& char_set);
digraph<charT> get_next_set_literal();
charT unescape_character();
private:
typedef bool (basic_regex_parser::*parser_proc_type)();
@ -54,7 +60,6 @@ private:
const charT* m_position; // our current parser position
unsigned m_mark_count; // how many sub-expressions we have
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
unsigned m_repeater_id; // the id of the next repeater
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
basic_regex_parser& operator=(const basic_regex_parser&);
@ -63,7 +68,7 @@ private:
template <class charT, class traits>
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0), m_alt_insert_point(0)
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0)
{
}
@ -151,6 +156,8 @@ bool basic_regex_parser<charT, traits>::parse_basic()
++m_position;
return parse_repeat();
}
case regex_constants::syntax_open_set:
return parse_set();
default:
return parse_literal();
}
@ -160,7 +167,7 @@ bool basic_regex_parser<charT, traits>::parse_basic()
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_extended()
{
bool result;
bool result = true;
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_open_mark:
@ -205,6 +212,8 @@ bool basic_regex_parser<charT, traits>::parse_extended()
break;
case regex_constants::syntax_or:
return parse_alt();
case regex_constants::syntax_open_set:
return parse_set();
default:
result = parse_literal();
break;
@ -260,6 +269,11 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
// restore the alternate insertion point:
//
this->m_alt_insert_point = last_alt_point;
//
// allow backrefs to this mark:
//
if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
this->m_backrefs |= 1u << (markid - 1);
return true;
}
@ -276,7 +290,7 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
case regex_constants::syntax_close_mark:
return false;
case regex_constants::syntax_plus:
if(this->m_pdata->m_flags & regex_constants::bk_plus_qm)
if(this->flags() & regex_constants::bk_plus_qm)
{
++m_position;
return parse_repeat(1);
@ -284,7 +298,7 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
else
return parse_literal();
case regex_constants::syntax_question:
if(this->m_pdata->m_flags & regex_constants::bk_plus_qm)
if(this->flags() & regex_constants::bk_plus_qm)
{
++m_position;
return parse_repeat(0, 1);
@ -292,22 +306,24 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
else
return parse_literal();
case regex_constants::syntax_open_brace:
if(this->m_pdata->m_flags & regbase::no_intervals)
if(this->flags() & regbase::no_intervals)
return parse_literal();
++m_position;
return parse_repeat_range(true);
case regex_constants::syntax_close_brace:
if(this->m_pdata->m_flags & regbase::no_intervals)
if(this->flags() & regbase::no_intervals)
return parse_literal();
fail(REG_EBRACE, this->m_position - this->m_base);
result = false;
break;
case regex_constants::syntax_or:
if(this->m_pdata->m_flags & regbase::bk_vbar)
if(this->flags() & regbase::bk_vbar)
return parse_alt();
else
result = parse_literal();
break;
case regex_constants::syntax_digit:
return parse_backref();
default:
result = parse_literal();
break;
@ -319,8 +335,35 @@ template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_extended_escape()
{
++m_position;
bool negate = false; // in case this is a character class escape: \w \d etc
switch(this->m_traits.escape_syntax_type(*m_position))
{
case regex_constants::escape_type_not_class:
negate = true;
// fall through:
case regex_constants::escape_type_class:
{
typedef typename traits::char_class_type mask_type;
mask_type m = this->m_traits.lookup_classname(m_position, m_position+1);
if(m != 0)
{
basic_char_set<charT, traits> char_set;
if(negate)
char_set.negate();
char_set.add_class(m);
if(0 == this->append_set(char_set))
fail(REG_ERANGE, m_position - m_base);
++m_position;
return true;
}
//
// not a class, just a regular unknown escape:
//
this->append_literal(unescape_character());
break;
}
case regex_constants::syntax_digit:
return parse_backref();
case regex_constants::escape_type_left_word:
++m_position;
this->append_state(syntax_element_word_start);
@ -329,8 +372,29 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape()
++m_position;
this->append_state(syntax_element_word_end);
break;
case regex_constants::escape_type_start_buffer:
++m_position;
this->append_state(syntax_element_buffer_start);
break;
case regex_constants::escape_type_end_buffer:
++m_position;
this->append_state(syntax_element_buffer_end);
break;
case regex_constants::escape_type_word_assert:
++m_position;
this->append_state(syntax_element_word_boundary);
break;
case regex_constants::escape_type_not_word_assert:
++m_position;
this->append_state(syntax_element_within_word);
break;
case regex_constants::escape_type_Z:
++m_position;
this->append_state(syntax_element_soft_buffer_end);
break;
default:
return parse_literal();
this->append_literal(unescape_character());
break;
}
return true;
}
@ -355,7 +419,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
// when we get to here we may have a non-greedy ? mark still to come:
//
if((m_position != m_end)
&& (0 == (this->m_pdata->m_flags & (regbase::main_option_type | regbase::no_perl_ex))))
&& (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))))
{
// OK we have a perl regex, check for a '?':
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
@ -417,7 +481,6 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
rep->max = high;
rep->greedy = greedy;
rep->leading = false;
rep->id = m_repeater_id++;
// store our repeater position for later:
std::ptrdiff_t rep_off = this->getoffset(rep);
// and append a back jump to the repeat:
@ -535,7 +598,7 @@ bool basic_regex_parser<charT, traits>::parse_alt()
//
// if we didn't actually add any trailing states then that's an error:
//
if(this->m_alt_insert_point == this->m_pdata->m_data.size())
if(this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
fail(REG_EMPTY, this->m_position - this->m_base);
//
// fix up the jump we added to point to the end of the states
@ -548,6 +611,311 @@ bool basic_regex_parser<charT, traits>::parse_alt()
return result;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_set()
{
++m_position;
if(m_position == m_end)
fail(REG_EBRACK, m_position - m_base);
basic_char_set<charT, traits> char_set;
const charT* base = m_position; // where the '[' was
const charT* item_base = m_position; // where the '[' or '^' was
while(m_position != m_end)
{
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_caret:
if(m_position == base)
{
char_set.negate();
++m_position;
item_base = m_position;
}
else
parse_set_literal(char_set);
break;
case regex_constants::syntax_close_set:
if(m_position == item_base)
{
parse_set_literal(char_set);
break;
}
else
{
++m_position;
if(0 == this->append_set(char_set))
fail(REG_ERANGE, m_position - m_base);
}
return true;
case regex_constants::syntax_open_set:
if(parse_inner_set(char_set))
break;
return true;
default:
parse_set_literal(char_set);
break;
}
}
return m_position != m_end;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
{
//
// we have either a character class [:name:]
// a collating element [.name.]
// or an equivalence class [=name=]
//
if(m_end == ++m_position)
fail(REG_EBRACK, m_position - m_base);
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_colon:
{
// check that character classes are actually enabled:
if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
== (regbase::basic_syntax_group | regbase::no_char_classes))
{
--m_position;
parse_set_literal(char_set);
return true;
}
// skip the ':'
if(m_end == ++m_position)
fail(REG_EBRACK, m_position - m_base);
const charT* name_first = m_position;
// skip at least one character, then find the matching ':]'
if(m_end == ++m_position)
fail(REG_EBRACK, m_position - m_base);
while((m_position != m_end)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
++m_position;
const charT* name_last = m_position;
if(m_end == m_position)
fail(REG_EBRACK, m_position - m_base);
if((m_end == ++m_position)
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
fail(REG_EBRACK, m_position - m_base);
typedef typename traits::char_class_type mask_type;
mask_type m = this->m_traits.lookup_classname(name_first, name_last);
if(0 == m)
{
if(char_set.empty() && (name_last - name_first == 1))
{
// maybe a special case:
++m_position;
if( (m_position != m_end)
&& (this->m_traits.syntax_type(*m_position)
== regex_constants::syntax_close_set))
{
if(this->m_traits.escape_syntax_type(*name_first)
== regex_constants::escape_type_left_word)
{
++m_position;
this->append_state(syntax_element_word_start);
return false;
}
if(this->m_traits.escape_syntax_type(*name_first)
== regex_constants::escape_type_right_word)
{
++m_position;
this->append_state(syntax_element_word_end);
return false;
}
}
}
fail(REG_ECTYPE, name_first - m_base);
}
char_set.add_class(m);
++m_position;
break;
}
default:
--m_position;
parse_set_literal(char_set);
break;
}
return true;
}
template <class charT, class traits>
void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
{
digraph<charT> start_range = get_next_set_literal();
if(m_end == m_position)
fail(REG_EBRACK, m_position - m_base);
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
{
// we have a range:
if(m_end == ++m_position)
fail(REG_EBRACK, m_position - m_base);
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
{
digraph<charT> end_range = get_next_set_literal();
char_set.add_range(start_range, end_range);
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
fail(REG_ERANGE, m_position - m_base);
return;
}
--m_position;
}
char_set.add_single(start_range);
}
template <class charT, class traits>
digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal()
{
digraph<charT> result;
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_escape:
// check to see if escapes are supported first:
if(this->flags() & regex_constants::no_escape_in_lists)
{
result = *m_position++;
break;
}
++m_position;
result = unescape_character();
break;
default:
result = *m_position++;
}
return result;
}
template <class charT, class traits>
charT basic_regex_parser<charT, traits>::unescape_character()
{
charT result(0);
if(m_position == m_end)
fail(REG_EESCAPE, m_position - m_base);
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::escape_type_control_a:
result = charT('\a');
break;
case regex_constants::escape_type_e:
result = charT(27);
break;
case regex_constants::escape_type_control_f:
result = charT('\f');
break;
case regex_constants::escape_type_control_n:
result = charT('\n');
break;
case regex_constants::escape_type_control_r:
result = charT('\r');
break;
case regex_constants::escape_type_control_t:
result = charT('\t');
break;
case regex_constants::escape_type_control_v:
result = charT('\v');
break;
case regex_constants::escape_type_word_assert:
result = charT('\b');
break;
case regex_constants::escape_type_ascii_control:
++m_position;
if(m_position == m_end)
{
fail(REG_EESCAPE, m_position - m_base);
return result;
}
if((*m_position < charT('@'))
|| (*m_position > charT(125)) )
{
fail(REG_EESCAPE, m_position - m_base);
return result;
}
result = static_cast<charT>(*m_position - charT('@'));
break;
case regex_constants::escape_type_hex:
++m_position;
if(m_position == m_end)
{
fail(REG_EESCAPE, m_position - m_base);
break;
}
// maybe have \x{ddd}
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
{
++m_position;
if(m_position == m_end)
{
fail(REG_EESCAPE, m_position - m_base);
break;
}
int i = this->m_traits.toi(m_position, m_end, 16);
if((m_position == m_end)
|| (i < 0)
|| (i > (std::numeric_limits<charT>::max)())
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
{
fail(REG_BADBR, m_position - m_base);
}
++m_position;
result = charT(i);
}
else
{
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position);
int i = this->m_traits.toi(m_position, m_position + len, 16);
if((i < 0)
|| (i >> (sizeof(charT) * CHAR_BIT)))
{
fail(REG_EESCAPE, m_position - m_base);
}
result = charT(i);
}
return result;
case regex_constants::syntax_digit:
{
// an octal escape sequence, the first character must be a zero
// followed by up to 3 octal digits:
std::ptrdiff_t len = (std::min)(std::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
int val = this->m_traits.toi(m_position, m_position + len, 8);
if(val < 0)
fail(REG_EESCAPE, m_position - m_base);
return static_cast<charT>(val);
}
default:
result = *m_position;
break;
}
++m_position;
return result;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_backref()
{
if(m_position == m_end)
{
fail(REG_EESCAPE, m_position - m_end);
}
int i = this->m_traits.toi(m_position, m_position + 1, 10);
if((i > 0) && (this->m_backrefs & (1u << (i-1))))
{
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
pb->index = i;
}
else if(i == 0)
{
// not a backref at all but an octal escape sequence:
--m_position;
charT c = unescape_character();
this->append_literal(c);
}
else
fail(REG_ESUBREG, m_position - m_end);
return true;
}
} // namespace re_detail
} // namespace boost

View File

@ -304,6 +304,27 @@ template <class charT>
class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT>
{
public:
typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 16);
BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 17);
BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 18);
BOOST_STATIC_CONSTANT(char_class_type,
mask_base =
std::ctype<charT>::alnum
| std::ctype<charT>::alpha
| std::ctype<charT>::cntrl
| std::ctype<charT>::digit
| std::ctype<charT>::graph
| std::ctype<charT>::lower
| std::ctype<charT>::print
| std::ctype<charT>::punct
| std::ctype<charT>::space
| std::ctype<charT>::upper
| std::ctype<charT>::xdigit);
//BOOST_STATIC_ASSERT(0 == (mask_base & (mask_word | mask_unicode)));
typedef std::basic_string<charT> string_type;
//cpp_regex_traits_implementation();
cpp_regex_traits_implementation(const std::locale& l);
@ -316,10 +337,25 @@ public:
}
return get_default_error_string(n);
}
char_class_type lookup_classname(const charT* p1, const charT* p2) const
{
char_class_type result = lookup_classname_imp(p1, p2);
if(result == 0)
{
string_type s(p1, p2);
this->m_pctype->tolower(&*s.begin(), &*s.end());
result = lookup_classname_imp(&*s.begin(), &*s.end());
}
return result;
}
re_detail::parser_buf<charT> m_sbuf; // buffer for parsing numbers.
std::basic_istream<charT> m_is; // stream for parsing numbers.
private:
std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID
//
// helpers:
//
char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
};
template <class charT>
@ -349,7 +385,7 @@ cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const st
//
if((int)cat >= 0)
{
for(int i = 0; i <= boost::regex_constants::error_unknown; ++i)
for(boost::regex_constants::error_type i = 0; i <= boost::regex_constants::error_unknown; ++i)
{
const char* p = get_default_error_string(i);
string_type default_message;
@ -369,6 +405,39 @@ cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const st
}
}
template <class charT>
typename cpp_regex_traits_implementation<charT>::char_class_type
cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
{
static const char_class_type masks[] =
{
0,
std::ctype<char>::alnum,
std::ctype<char>::alpha,
cpp_regex_traits_implementation<charT>::mask_blank,
std::ctype<char>::cntrl,
std::ctype<char>::digit,
std::ctype<char>::digit,
std::ctype<char>::graph,
std::ctype<char>::lower,
std::ctype<char>::lower,
std::ctype<char>::print,
std::ctype<char>::punct,
std::ctype<char>::space,
std::ctype<char>::space,
std::ctype<char>::upper,
cpp_regex_traits_implementation<charT>::mask_unicode,
std::ctype<char>::upper,
std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
std::ctype<char>::xdigit,
};
std::size_t id = 1 + re_detail::get_default_class_id(p1, p2);
assert(id < sizeof(masks) / sizeof(masks[0]));
return masks[id];
}
template <class charT>
boost::shared_ptr<cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT))
{
@ -376,6 +445,15 @@ boost::shared_ptr<cpp_regex_traits_implementation<charT> > create_cpp_regex_trai
return boost::shared_ptr<cpp_regex_traits_implementation<charT> >(new cpp_regex_traits_implementation<charT>(l));
}
//
// helpers to suppress warnings:
//
template <class charT>
inline bool is_extended(charT c)
{ return c > 256; }
inline bool is_extended(char)
{ return false; }
} // re_detail
template <class charT>
@ -390,25 +468,6 @@ public:
typedef std::locale locale_type;
typedef boost::uint_least32_t char_class_type;
BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 16);
BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 17);
BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 18);
BOOST_STATIC_CONSTANT(char_class_type,
mask_base =
std::ctype<char>::alnum
| std::ctype<char>::alpha
| std::ctype<char>::cntrl
| std::ctype<char>::digit
| std::ctype<char>::graph
| std::ctype<char>::lower
| std::ctype<char>::print
| std::ctype<char>::punct
| std::ctype<char>::space
| std::ctype<char>::upper
| std::ctype<char>::xdigit);
//BOOST_STATIC_ASSERT(0 == (mask_base & (mask_word | mask_unicode)));
cpp_regex_traits()
: m_pimpl(re_detail::create_cpp_regex_traits<charT>(std::locale()))
{ }
@ -438,33 +497,7 @@ public:
}
char_class_type lookup_classname(const charT* p1, const charT* p2) const
{
static const char_class_type masks[] =
{
0,
std::ctype<char>::alnum,
std::ctype<char>::alpha,
cpp_regex_traits<charT>::mask_blank,
std::ctype<char>::cntrl,
std::ctype<char>::digit,
std::ctype<char>::digit,
std::ctype<char>::graph,
std::ctype<char>::lower,
std::ctype<char>::lower,
std::ctype<char>::print,
std::ctype<char>::punct,
std::ctype<char>::space,
std::ctype<char>::space,
cpp_regex_traits<charT>::mask_unicode,
std::ctype<char>::upper,
std::ctype<char>::upper,
std::ctype<char>::alnum | cpp_regex_traits<charT>::mask_word,
std::ctype<char>::alnum | cpp_regex_traits<charT>::mask_word,
std::ctype<char>::xdigit,
};
int id = re_detail::get_default_class_id(p1, p2);
assert(id >= -1);
assert(id < sizeof(masks) / sizeof(masks[0]));
return masks[1 + id];
return m_pimpl->lookup_classname(p1, p2);
}
string_type lookup_collatename(const charT* p1, const charT* p2) const
{
@ -472,16 +505,17 @@ public:
}
bool is_class(charT c, char_class_type f) const
{
if((f & cpp_regex_traits<charT>::mask_base)
typedef typename std::ctype<charT>::mask ctype_mask;
if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_base)
&& (m_pimpl->m_pctype->is(
static_cast<std::ctype<charT>::mask>(f & cpp_regex_traits<charT>::mask_base), c)))
static_cast<ctype_mask>(f & re_detail::cpp_regex_traits_implementation<charT>::mask_base), c)))
return true;
else if((f & cpp_regex_traits<charT>::mask_unicode) && (c >= 256))
else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_unicode) && re_detail::is_extended(c))
return true;
else if((f & cpp_regex_traits<charT>::mask_word) && (c == '_'))
else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_'))
return true;
else if((f & cpp_regex_traits<charT>::mask_blank)
&& m_pimpl->m_pctype->is(static_cast<std::ctype<charT>::mask>(f & cpp_regex_traits<charT>::mask_base), c)
else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_blank)
&& m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
&& !re_detail::is_separator(c))
return true;
return false;
@ -515,6 +549,7 @@ private:
// catalog name handler:
//
static std::string& get_catalog_name_inst();
#ifdef BOOST_HAS_THREADS
static static_mutex& get_mutex_inst();
#endif

View File

@ -23,12 +23,15 @@
#ifndef BOOST_REGEX_ERROR_TYPE_HPP
#define BOOST_REGEX_ERROR_TYPE_HPP
#ifdef __cplusplus
namespace boost{
#endif
//
// start with the POSIX API versions of these:
//
typedef unsigned reg_error_t;
typedef reg_error_t reg_errcode_t; // backwards compatibility
static const reg_error_t REG_NOERROR = 0; /* Success. */
static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */
@ -57,6 +60,7 @@ static const reg_error_t REG_ESTACK = 19; /* out of stack space */
static const reg_error_t REG_E_UNKNOWN = 20; /* unknown error */
static const reg_error_t REG_ENOSYS = REG_E_UNKNOWN; /* Reserved. */
#ifdef __cplusplus
namespace regex_constants{
typedef ::boost::reg_error_t error_type;
@ -80,5 +84,6 @@ static const error_type error_unknown = REG_E_UNKNOWN;
}
}
#endif // __cplusplus
#endif

View File

@ -91,17 +91,17 @@ template <class iterator, class charT, class traits_type, class char_classT>
iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
iterator last,
const re_set_long<char_classT>* set_,
const basic_regex<charT, traits_type>& e)
const regex_data<charT, traits_type>& e)
{
const charT* p = reinterpret_cast<const charT*>(set_+1);
iterator ptr;
unsigned int i;
bool icase = e.flags() & regex_constants::icase;
bool icase = e.m_flags & regex_constants::icase;
if(next == last) return next;
typedef typename traits_type::string_type traits_string_type;
const traits_type& traits_inst = e.get_traits();
const traits_type& traits_inst = e.m_traits;
// dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
// referenced
@ -149,17 +149,17 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
// try and match a range, NB only a single character can match
if(set_->cranges)
{
if((e.flags() & regex_constants::collate) == 0)
if((e.m_flags & regex_constants::collate) == 0)
s1.assign(1, col);
else
s1 = traits_inst.transform(&col, &col + 1);
for(i = 0; i < set_->cranges; ++i)
{
if(STR_COMP(s1, p) <= 0)
if(STR_COMP(s1, p) >= 0)
{
while(*p)++p;
++p;
if(STR_COMP(s1, p) >= 0)
if(STR_COMP(s1, p) <= 0)
return set_->isnot ? next : ++next;
}
else
@ -412,7 +412,7 @@ private:
void push_assertion(const re_syntax_base* ps, bool positive);
void push_alt(const re_syntax_base* ps);
void push_repeater_count(int i, repeater_count<BidiIterator>** s);
void push_single_repeat(unsigned c, const re_repeat* r, BidiIterator last_position, int id);
void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int id);
void push_non_greedy_repeat(const re_syntax_base* ps);

View File

@ -208,10 +208,10 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
else
{
// start again:
search_base = position = (*m_presult)[0].second;
search_base = position = m_result[0].second;
// If last match was null and match_not_null was not set then increment
// our start position, otherwise we go into an infinite loop:
if(((m_match_flags & match_not_null) == 0) && (m_presult->length() == 0))
if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
{
if(position == last)
return false;
@ -590,7 +590,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
// let the traits class do the work:
if(position == last)
return false;
BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re);
BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data());
if(t != position)
{
pstate = pstate->next.p;

View File

@ -103,10 +103,10 @@ struct save_state_init
template <class BidiIterator>
struct saved_single_repeat : public saved_state
{
unsigned count;
std::size_t count;
const re_repeat* rep;
BidiIterator last_position;
saved_single_repeat(unsigned c, const re_repeat* r, BidiIterator lp, int arg_id)
saved_single_repeat(std::size_t c, const re_repeat* r, BidiIterator lp, int arg_id)
: saved_state(arg_id), count(c), rep(r), last_position(lp){}
};
@ -275,7 +275,7 @@ inline void perl_matcher<BidiIterator, Allocator, traits>::push_repeater_count(i
}
template <class BidiIterator, class Allocator, class traits>
inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(unsigned c, const re_repeat* r, BidiIterator last_position, int id)
inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int id)
{
saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state);
--pmp;
@ -585,11 +585,11 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
assert(1 == static_cast<const re_literal*>(rep->next.p)->length);
const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
unsigned count = 0;
std::size_t count = 0;
//
// start by working out how much we can skip:
//
unsigned desired = rep->greedy ? rep->max : rep->min;
std::size_t desired = rep->greedy ? rep->max : rep->min;
if(::boost::is_random_access_iterator<BidiIterator>::value)
{
BidiIterator end = position;
@ -652,11 +652,11 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
#endif
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
unsigned count = 0;
std::size_t count = 0;
//
// start by working out how much we can skip:
//
unsigned desired = rep->greedy ? rep->max : rep->min;
std::size_t desired = rep->greedy ? rep->max : rep->min;
if(::boost::is_random_access_iterator<BidiIterator>::value)
{
BidiIterator end = position;
@ -719,17 +719,17 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
#endif
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
const re_set_long<typename traits::char_class_type>* set = static_cast<const re_set_long<typename traits::char_class_type>*>(pstate->next.p);
unsigned count = 0;
std::size_t count = 0;
//
// start by working out how much we can skip:
//
unsigned desired = rep->greedy ? rep->max : rep->min;
std::size_t desired = rep->greedy ? rep->max : rep->min;
if(::boost::is_random_access_iterator<BidiIterator>::value)
{
BidiIterator end = position;
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
BidiIterator origin(position);
while((position != end) && (position != re_is_set_member(position, last, set, re)))
while((position != end) && (position != re_is_set_member(position, last, set, re.get_data())))
{
++position;
}
@ -737,7 +737,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
}
else
{
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re)))
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data())))
{
++position;
++count;
@ -926,7 +926,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_greedy_single_repeat(
}
const re_repeat* rep = pmp->rep;
unsigned count = pmp->count;
std::size_t count = pmp->count;
assert(rep->next.p);
assert(rep->alt.p);
@ -975,7 +975,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_slow_dot_repeat(bool
}
const re_repeat* rep = pmp->rep;
unsigned count = pmp->count;
std::size_t count = pmp->count;
assert(rep->type == syntax_element_dot_rep);
assert(rep->next.p);
assert(rep->alt.p);
@ -1037,7 +1037,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_fast_dot_repeat(bool
}
const re_repeat* rep = pmp->rep;
unsigned count = pmp->count;
std::size_t count = pmp->count;
assert(count < rep->max);
position = pmp->last_position;
@ -1089,7 +1089,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_char_repeat(bool r)
}
const re_repeat* rep = pmp->rep;
unsigned count = pmp->count;
std::size_t count = pmp->count;
pstate = rep->next.p;
const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
position = pmp->last_position;
@ -1153,7 +1153,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat(bool
}
const re_repeat* rep = pmp->rep;
unsigned count = pmp->count;
std::size_t count = pmp->count;
pstate = rep->next.p;
const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
position = pmp->last_position;
@ -1217,7 +1217,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
}
const re_repeat* rep = pmp->rep;
unsigned count = pmp->count;
std::size_t count = pmp->count;
pstate = rep->next.p;
const re_set_long<typename traits::char_class_type>* set = static_cast<const re_set_long<typename traits::char_class_type>*>(pstate);
position = pmp->last_position;
@ -1234,7 +1234,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
// wind forward until we can skip out of the repeat:
do
{
if(position == re_is_set_member(position, last, set, re))
if(position == re_is_set_member(position, last, set, re.get_data()))
{
// failed repeat match, discard this state and look for another:
destroy_single_repeat();

View File

@ -637,7 +637,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
BidiIterator end = position;
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
BidiIterator origin(position);
while((position != end) && (position != re_is_set_member(position, last, set, re)))
while((position != end) && (position != re_is_set_member(position, last, set, re.get_data())))
{
++position;
}
@ -645,7 +645,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
}
else
{
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re)))
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data())))
{
++position;
++count;
@ -665,7 +665,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
{
while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
{
if(position != re_is_set_member(position, last, set, re))
if(position != re_is_set_member(position, last, set, re.get_data()))
{
++position;
++count;
@ -685,7 +685,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
if(position == last)
return false;
position = save_pos;
if(position != re_is_set_member(position, last, set, re))
if(position != re_is_set_member(position, last, set, re.get_data()))
{
++position;
++count;

View File

@ -76,7 +76,7 @@ public:
basic = basic_syntax_group | collate,
basic = basic_syntax_group | collate | no_escape_in_lists,
extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists,
normal = 0,
emacs = basic | no_char_classes | no_intervals,
@ -123,6 +123,8 @@ namespace regex_constants{
bk_plus_qm = ::boost::regbase::bk_plus_qm,
bk_vbar = ::boost::regbase::bk_vbar,
no_intervals = ::boost::regbase::no_intervals,
no_char_classes = ::boost::regbase::no_char_classes,
no_escape_in_lists = ::boost::regbase::no_escape_in_lists,
basic = ::boost::regbase::basic,
extended = ::boost::regbase::extended,

View File

@ -54,23 +54,22 @@ inline unsigned int regex_grep(Predicate foo,
return count; // we've reached the end, don't try and find an extra null match.
if(m.length() == 0)
{
if(m[0].second == last)
return count;
// we found a NULL-match, now try to find
// a non-NULL one at the same position:
BidiIterator last_end(m[0].second);
if(last_end == last)
return count;
match_results<BidiIterator, match_allocator_type> m2(m);
matcher.setf(match_not_null | match_continuous);
if(matcher.find())
{
++count;
last_end = m[0].second;
if(0 == foo(m))
return count;
}
else
{
// reset match back to where it was:
m.set_second(last_end);
m = m2;
}
matcher.unsetf((match_not_null | match_continuous) & ~flags);
}

View File

@ -132,17 +132,19 @@ int get_default_class_id(const charT* p1, const charT* p2)
{data+40, data+45,}, // punct
{data+45, data+46,}, // s
{data+45, data+50,}, // space
{data+50, data+57,}, // unicode
{data+57, data+58,}, // u
{data+50, data+57,}, // unicode
{data+57, data+62,}, // upper
{data+62, data+63,}, // w
{data+62, data+66,}, // word
{data+66, data+72,}, // xdigit
};
static const character_pointer_range<charT>* ranges_begin = ranges;
static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
character_pointer_range<charT> t = { p1, p2, };
const character_pointer_range<charT>* p = std::lower_bound(ranges, ranges + (sizeof(ranges)/sizeof(ranges[0])), t);
if(t == *p)
const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
if((p != ranges_end) && (t == *p))
return static_cast<int>(p - ranges);
return -1;
}

View File

@ -222,11 +222,15 @@ enum re_jump_size_type
/*** proc re_is_set_member *********************************************
Forward declaration: we'll need this one later...
***********************************************************************/
template<class charT, class traits>
struct regex_data;
template <class iterator, class charT, class traits_type, class char_classT>
iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
iterator last,
const re_set_long<char_classT>* set_,
const basic_regex<charT, traits_type>& e);
const regex_data<charT, traits_type>& e);
} // namespace re_detail

View File

@ -49,8 +49,8 @@ const char* get_default_syntax(regex_constants::syntax_type n)
">",
"",
"",
"A",
"z",
"A`",
"z'",
"\n",
",",
"a",

View File

@ -55,9 +55,8 @@ template test-dll
#
template regression-dll
: <template>test-dll # sources
regress/parse.cpp
regress/regress.cpp
regress/tests.cpp
regress/main.cpp
regress/basic_tests.cpp
<lib>../../test/build/boost_prg_exec_monitor
;
@ -66,14 +65,7 @@ test-suite regex
[ regex-test regex_regress
: <template>regression # sources
: # requirements
: regress/tests.txt # input files
]
[ regex-test regex_wide_regress
: <template>regression # sources
<template>../build/msvc-stlport-tricky
: <define>TEST_UNICODE=1 # requirements
: regress/tests.txt # input files
: # input files
]
[ regex-test posix_api_check
@ -115,15 +107,8 @@ test-suite regex
[ regex-test regex_regress_dll
: <template>regression-dll # sources
: # requirements
: regress/tests.txt # input files
]
[ regex-test regex_wide_regress_dll
: <template>regression-dll # sources
<template>../build/msvc-stlport-tricky
: <define>TEST_UNICODE=1 # requirements
: regress/tests.txt # input files
: # requirements
: # input files
]
[ compile concepts/concept_check.cpp

View File

@ -16,7 +16,7 @@ EX_SOURCES =
wide_posix_api.cpp
winstances.cpp ;
lib boost_regex_extra : ../../src/$(EX_SOURCES).cpp <template>../../build/regex-options
lib boost_regex_extra : ../../src/$(EX_SOURCES) <template>../../build/regex-options
:
<define>BOOST_REGEX_MATCH_EXTRA=1
:

View File

@ -181,6 +181,11 @@ void basic_tests()
TEST_INVALID_REGEX("a\\{1,b\\}", basic);
TEST_INVALID_REGEX("a\\{1,2v\\}", basic);
}
void test_alt()
{
using namespace boost::regex_constants;
// now test the alternation operator |
TEST_REGEX_SEARCH("a|b", perl, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a|b", perl, "b", match_default, make_array(0, 1, -2, -2));
@ -205,441 +210,430 @@ void basic_tests()
TEST_REGEX_SEARCH("a|", basic|bk_vbar, "a|", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "b", match_default, make_array(0, 1, -2, -2));
}
void test_sets()
{
using namespace boost::regex_constants;
// now test the set operator []
TEST_REGEX_SEARCH("[abc]", extended, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[abc]", extended, "b", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[abc]", extended, "c", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[abc]", extended, "d", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[^bcd]", extended, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[^bcd]", extended, "b", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[^bcd]", extended, "d", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[^bcd]", extended, "e", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a[b]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[ab]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[^ab]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[]b]c", extended, "a]c", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[[b]c", extended, "a[c", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[-b]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[^]b]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[^-b]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[b-]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
TEST_INVALID_REGEX("a[b", extended);
TEST_INVALID_REGEX("a[]", extended);
// now some ranges:
TEST_REGEX_SEARCH("[b-e]", extended, "a", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[b-e]", extended, "b", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[b-e]", extended, "e", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[b-e]", extended, "f", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[^b-e]", extended, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[^b-e]", extended, "b", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[^b-e]", extended, "e", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[^b-e]", extended, "f", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a[1-3]c", extended, "a2c", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[-3]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[-3]c", extended, "a3c", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a[^-3]c", extended, "a-c", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a[^-3]c", extended, "a3c", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a[^-3]c", extended, "axc", match_default, make_array(0, 3, -2, -2));
TEST_INVALID_REGEX("a[3-1]c", extended);
TEST_INVALID_REGEX("a[1-3-5]c", extended);
TEST_INVALID_REGEX("a[1-", extended);
// and some classes
TEST_REGEX_SEARCH("a[[:alpha:]]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
TEST_INVALID_REGEX("a[[:unknown:]]c", extended);
TEST_INVALID_REGEX("a[[:", extended);
TEST_INVALID_REGEX("a[[:alpha", extended);
TEST_INVALID_REGEX("a[[:alpha:]", extended);
TEST_INVALID_REGEX("a[[:alpha,:]", extended);
TEST_INVALID_REGEX("a[[:]:]]b", extended);
TEST_INVALID_REGEX("a[[:-:]]b", extended);
TEST_INVALID_REGEX("a[[:alph:]]", extended);
TEST_INVALID_REGEX("a[[:alphabet:]]", extended);
TEST_REGEX_SEARCH("[[:alnum:]]+", extended, "-%@a0X_-", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("[[:alpha:]]+", extended, " -%@aX_0-", match_default, make_array(4, 6, -2, -2));
TEST_REGEX_SEARCH("[[:blank:]]+", extended, "a \tb", match_default, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("[[:cntrl:]]+", extended, " a\n\tb", match_default, make_array(2, 4, -2, -2));
TEST_REGEX_SEARCH("[[:digit:]]+", extended, "a019b", match_default, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("[[:graph:]]+", extended, " a%b ", match_default, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("[[:lower:]]+", extended, "AabC", match_default, make_array(1, 3, -2, -2));
TEST_REGEX_SEARCH("[[:print:]]+", extended, "AabC", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("[[:punct:]]+", extended, " %-&\t", match_default, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("[[:space:]]+", extended, "a \n\t\rb", match_default, make_array(1, 5, -2, -2));
TEST_REGEX_SEARCH("[[:upper:]]+", extended, "aBCd", match_default, make_array(1, 3, -2, -2));
TEST_REGEX_SEARCH("[[:xdigit:]]+", extended, "p0f3Cx", match_default, make_array(1, 5, -2, -2));
//
// escapes are supported in character classes if we have either
// perl or awk regular expressions:
//
TEST_REGEX_SEARCH("[\\n]", perl, "\n", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[\\n]", basic, "\n", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[\\n]", basic, "\\", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, ":", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, "[", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, "c", match_default, make_array(0, 1, -2, -2));
//
// test single character escapes:
//
TEST_REGEX_SEARCH("\\w", perl, "A", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\w", perl, "Z", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\w", perl, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\w", perl, "z", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\w", perl, "_", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\w", perl, "}", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\w", perl, "`", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\w", perl, "[", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\w", perl, "@", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\W", perl, "a", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\W", perl, "z", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\W", perl, "A", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\W", perl, "Z", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\W", perl, "_", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\W", perl, "}", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\W", perl, "`", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\W", perl, "[", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\W", perl, "@", match_default, make_array(0, 1, -2, -2));
}
void test_anchors()
{
// line anchors:
using namespace boost::regex_constants;
TEST_REGEX_SEARCH("^ab", extended, "ab", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default, make_array(3, 5, -2, -2));
TEST_REGEX_SEARCH("ab$", extended, "ab", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_not_bol | match_not_eol, make_array(3, 5, -2, -2));
TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_not_bol | match_not_eol, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_single_line, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_single_line, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
}
void test_backrefs()
{
using namespace boost::regex_constants;
TEST_INVALID_REGEX("a(b)\\2c", perl);
TEST_INVALID_REGEX("a(b\\1)c", perl);
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbd", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(.)\\1", perl, "abc", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a([bc])\\1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
// strictly speaking this is at best ambiguous, at worst wrong, this is what most
// re implimentations will match though.
TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbccd", match_default, make_array(0, 6, 3, 5, 3, 4, -2, -2));
TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbcbd", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a((b)*\\2)*d", perl, "abbbd", match_default, make_array(0, 5, 1, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("(ab*)[ab]*\\1", perl, "ababaaa", match_default, make_array(0, 4, 0, 2, -2, -2));
TEST_REGEX_SEARCH("(a)\\1bcd", perl, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabd", match_default, make_array(0, 4, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(a)\\1bc*[ce]d", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
TEST_REGEX_SEARCH("^(a)\\1b(c)*cd$", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, 4, 5, -2, -2));
TEST_REGEX_SEARCH("(ab*)[ab]*\\1", extended, "ababaaa", match_default, make_array(0, 7, 0, 1, -2, -2));
}
void test_character_escapes()
{
using namespace boost::regex_constants;
// characters by code
TEST_REGEX_SEARCH("\\0101", perl, "A", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\00", perl, "\0", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\0", perl, "\0", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\0172", perl, "z", match_default, make_array(0, 1, -2, -2));
// extra escape sequences:
TEST_REGEX_SEARCH("\\a", perl, "\a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\f", perl, "\f", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\n", perl, "\n", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\r", perl, "\r", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\v", perl, "\v", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\t", perl, "\t", match_default, make_array(0, 1, -2, -2));
// updated tests for version 2:
TEST_REGEX_SEARCH("\\x41", perl, "A", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\xff", perl, "\xff", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\xFF", perl, "\xff", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\c@", perl, "\0", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\cA", perl, "\x1", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\cz", perl, "\x3A", match_default, make_array(0, 1, -2, -2));
TEST_INVALID_REGEX("\\c=", extended);
TEST_INVALID_REGEX("\\c?", extended);
TEST_REGEX_SEARCH("=:", perl, "=:", match_default, make_array(0, 2, -2, -2));
}
void test_assertion_escapes()
{
using namespace boost::regex_constants;
// word start:
TEST_REGEX_SEARCH("\\<abcd", perl, " abcd", match_default, make_array(2, 6, -2, -2));
TEST_REGEX_SEARCH("\\<ab", perl, "cab", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\<ab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
TEST_REGEX_SEARCH("\\<tag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
// word end:
TEST_REGEX_SEARCH("abc\\>", perl, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("abc\\>", perl, "abcd", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("abc\\>", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("abc\\>", perl, "abc::", match_default, make_array(0,3, -2, -2));
// word boundary:
TEST_REGEX_SEARCH("\\babcd", perl, " abcd", match_default, make_array(2, 6, -2, -2));
TEST_REGEX_SEARCH("\\bab", perl, "cab", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\bab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
TEST_REGEX_SEARCH("\\btag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("abc\\b", perl, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("abc\\b", perl, "abcd", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("abc\\b", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("abc\\b", perl, "abc::", match_default, make_array(0, 3, -2, -2));
// within word:
TEST_REGEX_SEARCH("\\B", perl, "ab", match_default, make_array(1, 1, -2, -2));
TEST_REGEX_SEARCH("a\\Bb", perl, "ab", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a\\B", perl, "ab", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a\\B", perl, "a", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a\\B", perl, "a ", match_default, make_array(-2, -2));
// buffer operators:
TEST_REGEX_SEARCH("\\`abc", perl, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("\\`abc", perl, "\nabc", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("\\`abc", perl, " abc", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("abc\\'", perl, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("abc\\'", perl, "abc\n", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("abc\\'", perl, "abc ", match_default, make_array(-2, -2));
// word start:
TEST_REGEX_SEARCH("[[:<:]]abcd", perl, " abcd", match_default, make_array(2, 6, -2, -2));
TEST_REGEX_SEARCH("[[:<:]]ab", perl, "cab", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("[[:<:]]ab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
TEST_REGEX_SEARCH("[[:<:]]tag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
// word end
TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abcd", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc::", match_default, make_array(0, 3, -2, -2));
}
void test_tricky_cases()
{
using namespace boost::regex_constants;
//TEST_REGEX_SEARCH("", perl, "", match_default, make_array(-2, -2));
//
// now follows various complex expressions designed to try and bust the matcher:
//
TEST_REGEX_SEARCH("a(((b)))c", perl, "abc", match_default, make_array(0, 3, 1, 2, 1, 2, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", perl, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", perl, "acd", match_default, make_array(0, 3, 1, 2, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b*|c)d", perl, "abbd", match_default, make_array(0, 4, 1, 3, -2, -2));
// just gotta have one DFA-buster, of course
TEST_REGEX_SEARCH("a[ab]{20}", perl, "aaaaabaaaabaaaabaaaab", match_default, make_array(0, 21, -2, -2));
// and an inline expansion in case somebody gets tricky
TEST_REGEX_SEARCH("a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]", perl, "aaaaabaaaabaaaabaaaab", match_default, make_array(0, 21, -2, -2));
// and in case somebody just slips in an NFA...
TEST_REGEX_SEARCH("a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)", perl, "aaaaabaaaabaaaabaaaabweeknights", match_default, make_array(0, 31, 21, 24, 24, 31, -2, -2));
// one really big one
TEST_REGEX_SEARCH("1234567890123456789012345678901234567890123456789012345678901234567890", perl, "a1234567890123456789012345678901234567890123456789012345678901234567890b", match_default, make_array(1, 71, -2, -2));
// fish for problems as brackets go past 8
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn]", perl, "xacegikmoq", match_default, make_array(1, 8, -2, -2));
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op]", perl, "xacegikmoq", match_default, make_array(1, 9, -2, -2));
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op][qr]", perl, "xacegikmoqy", match_default, make_array(1, 10, -2, -2));
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op][q]", perl, "xacegikmoqy", match_default, make_array(1, 10, -2, -2));
// and as parenthesis go past 9:
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)", perl, "zabcdefghi", match_default, make_array(1, 9, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, -2, -2));
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)", perl, "zabcdefghij", match_default, make_array(1, 10, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, -2, -2));
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)", perl, "zabcdefghijk", match_default, make_array(1, 11, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, -2, -2));
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)", perl, "zabcdefghijkl", match_default, make_array(1, 12, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, -2, -2));
TEST_REGEX_SEARCH("(a)d|(b)c", perl, "abc", match_default, make_array(1, 3, -1, -1, 1, 2, -2, -2));
TEST_REGEX_SEARCH("_+((www)|(ftp)|(mailto)):_*", perl, "_wwwnocolon _mailto:", match_default, make_array(12, 20, 13, 19, -1, -1, -1, -1, 13, 19, -2, -2));
// subtleties of matching
TEST_REGEX_SEARCH("a(b)?c\\1d", perl, "acd", match_default, make_array(0, 3, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b?c)+d", perl, "accd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("(wee|week)(knights|night)", perl, "weeknights", match_default, make_array(0, 10, 0, 3, 3, 10, -2, -2));
TEST_REGEX_SEARCH(".*", perl, "abc", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", perl, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|(c))d", perl, "acd", match_default, make_array(0, 3, 1, 2, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "abbd", match_default, make_array(0, 4, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "acd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "ad", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a(b?)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b?)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a(b+)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b+)c", perl, "abbbc", match_default, make_array(0, 5, 1, 4, -2, -2));
TEST_REGEX_SEARCH("a(b*)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("(a|ab)(bc([de]+)f|cde)", perl, "abcdef", match_default, make_array(0, 6, 0, 1, 1, 6, 3, 5, -2, -2));
TEST_REGEX_SEARCH("a([bc]?)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a([bc]?)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)c", perl, "abcc", match_default, make_array(0, 4, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a([bc]+)bc", perl, "abcbc", match_default, make_array(0, 5, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a(bb+|b)b", perl, "abb", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl, "abb", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl, "abbb", match_default, make_array(0, 4, 1, 3, -2, -2));
TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl, "abbb", match_default, make_array(0, 4, 1, 2, -2, -2));
TEST_REGEX_SEARCH("(.*).*", perl, "abcdef", match_default, make_array(0, 6, 0, 6, -2, -2));
TEST_REGEX_SEARCH("(a*)*", perl, "bc", match_default, make_array(0, 0, 0, 0, -2, -2));
TEST_REGEX_SEARCH("xyx*xz", perl, "xyxxxxyxxxz", match_default, make_array(5, 11, -2, -2));
// do we get the right subexpression when it is used more than once?
TEST_REGEX_SEARCH("a(b|c)*d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c)*d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c)+d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c)+d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c?)+d", perl, "ad", match_default, make_array(0, 2, 1, 1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,0}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,1}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,1}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,2}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b|c){0,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,1}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,2}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c){1,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,2}d", perl, "acbd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcbcd", match_default, make_array(0, 6, 4, 5, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|c){2,}d", perl, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
// perl only:
TEST_REGEX_SEARCH("a(b|c?)+d", perl, "abcd", match_default, make_array(0, 4, 3, 3, -2, -2));
TEST_REGEX_SEARCH("a(b+|((c)*))+d", perl, "abd", match_default, make_array(0, 3, 2, 2, 2, 2, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b+|((c)*))+d", perl, "abcd", match_default, make_array(0, 4, 3, 3, 3, 3, 2, 3, -2, -2));
// posix only:
TEST_REGEX_SEARCH("a(b|c?)+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b|((c)*))+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, 2, 3, 2, 3, -2, -2));
TEST_REGEX_SEARCH("a(b+|((c)*))+d", extended, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -1, -1, -2, -2));
TEST_REGEX_SEARCH("a(b+|((c)*))+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, 2, 3, 2, 3, -2, -2));
// literals:
TEST_REGEX_SEARCH("\\**?/{}", literal, "\\**?/{}", match_default, make_array(0, 7, -2, -2));
// try to match C++ syntax elements:
// line comment:
TEST_REGEX_SEARCH("//[^\\n]*", perl, "++i //here is a line comment\n", match_default, make_array(4, 28, -2, -2));
// block comment:
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/* here is a block comment */", match_default, make_array(0, 29, 26, 27, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/**/", match_default, make_array(0, 4, -1, -1, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/***/", match_default, make_array(0, 5, -1, -1, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/****/", match_default, make_array(0, 6, -1, -1, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/*****/", match_default, make_array(0, 7, -1, -1, -2, -2));
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/*****/*/", match_default, make_array(0, 7, -1, -1, -2, -2));
// preprossor directives:
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol", match_default, make_array(0, 19, -1, -1, -2, -2));
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol(x) #x", match_default, make_array(0, 25, -1, -1, -2, -2));
// perl only:
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);", match_default, make_array(0, 53, 30, 42, -2, -2));
// literals:
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFF", match_default, make_array(0, 4, 0, 4, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2));
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "35", match_default, make_array(0, 2, 0, 2, -1, -1, 0, 2, -1, -1, -1, -1, -1, -1, -2, -2));
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFu", match_default, make_array(0, 5, 0, 4, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2));
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFL", match_default, make_array(0, 5, 0, 4, 0, 4, -1, -1, 4, 5, -1, -1, -1, -1, -2, -2));
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFFFFFFFFFFFFFFFuint64", match_default, make_array(0, 24, 0, 18, 0, 18, -1, -1, 19, 24, 19, 24, 22, 24, -2, -2));
// strings:
TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\x3A'", match_default, make_array(0, 6, 4, 5, -2, -2));
TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\''", match_default, make_array(0, 4, 1, 3, -2, -2));
TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\n'", match_default, make_array(0, 4, 1, 3, -2, -2));
// posix only:
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", awk, "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);", match_default, make_array(0, 53, 28, 42, -2, -2));
// now try and test some unicode specific characters:
TEST_REGEX_SEARCH_W(L"[[:unicode:]]+", perl, L"a\u0300\u0400z", match_default, make_array(1, 3, -2, -2));
TEST_REGEX_SEARCH_W(L"[\x10-\xff]", perl, L"\u0300\u0400", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH_W(L"[\01-\05]{5}", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH_W(L"[\x300-\x400]+", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH_W(L"[\\x{300}-\\x{400}]+", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH_W(L"\\x{300}\\x{400}+", perl, L"\u0300\u0400\u0400\u0400\u0400\u0400", match_default, make_array(0, 6, -2, -2));
// finally try some case insensitive matches:
TEST_REGEX_SEARCH("0123456789@abcdefghijklmnopqrstuvwxyz\\[\\\\\\]\\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\\{\\|\\}", perl|icase, "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}", match_default, make_array(0, 72, -2, -2));
TEST_REGEX_SEARCH("a", perl|icase, "A", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("A", perl|icase, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("[abc]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("[ABC]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("[a-z]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("[A-Z]+", perl|icase, "abzANZ", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("[a-Z]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("[A-z]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("[[:lower:]]+", perl|icase, "abyzABYZ", match_default, make_array(0, 8, -2, -2));
TEST_REGEX_SEARCH("[[:upper:]]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("[[:word:]]+", perl|icase, "abcZZZ", match_default, make_array(0, 6, -2, -2));
TEST_REGEX_SEARCH("[[:alpha:]]+", perl|icase, "abyzABYZ", match_default, make_array(0, 8, -2, -2));
TEST_REGEX_SEARCH("[[:alnum:]]+", perl|icase, "09abyzABYZ", match_default, make_array(0, 10, -2, -2));
// known and suspected bugs:
TEST_REGEX_SEARCH("\\(", perl, "(", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\)", perl, ")", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\$", perl, "$", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\^", perl, "^", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\.", perl, ".", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\*", perl, "*", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\+", perl, "+", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\?", perl, "?", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\[", perl, "[", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\]", perl, "]", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\|", perl, "|", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\\\", perl, "\\", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("#", perl, "#", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\#", perl, "#", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a-", perl, "a-", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("\\-", perl, "-", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\{", perl, "{", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\}", perl, "}", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("0", perl, "0", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("1", perl, "1", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("9", perl, "9", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("b", perl, "b", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("B", perl, "B", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("<", perl, "<", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH(">", perl, ">", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("w", perl, "w", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("W", perl, "W", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("`", perl, "`", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH(" ", perl, " ", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("\\n", perl, "\n", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH(",", perl, ",", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a", perl, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("f", perl, "f", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("n", perl, "n", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("r", perl, "r", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("t", perl, "t", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("v", perl, "v", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("c", perl, "c", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("x", perl, "x", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH(":", perl, ":", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("(\\.[[:alnum:]]+){2}", perl, "w.a.b ", match_default, make_array(1, 5, 3, 5, -2, -2));
#if 0
; now test the set operator []
- match_default normal REG_EXTENDED
; try some literals first
[abc] a 0 1
[abc] b 0 1
[abc] c 0 1
[abc] d -1 -1
[^bcd] a 0 1
[^bcd] b -1 -1
[^bcd] d -1 -1
[^bcd] e 0 1
a[b]c abc 0 3
a[ab]c abc 0 3
a[^ab]c adc 0 3
a[]b]c a]c 0 3
a[[b]c a[c 0 3
a[-b]c a-c 0 3
a[^]b]c adc 0 3
a[^-b]c adc 0 3
a[b-]c a-c 0 3
a[b !
a[] !
; then some ranges
[b-e] a -1 -1
[b-e] b 0 1
[b-e] e 0 1
[b-e] f -1 -1
[^b-e] a 0 1
[^b-e] b -1 -1
[^b-e] e -1 -1
[^b-e] f 0 1
a[1-3]c a2c 0 3
a[3-1]c !
a[1-3-5]c !
a[1- !
; and some classes
a[[:alpha:]]c abc 0 3
a[[:unknown:]]c !
a[[: !
a[[:alpha !
a[[:alpha:] !
a[[:alpha,:] !
a[[:]:]]b !
a[[:-:]]b !
a[[:alph:]] !
a[[:alphabet:]] !
[[:alnum:]]+ -%@a0X_- 3 6
[[:alpha:]]+ -%@aX_0- 3 5
[[:blank:]]+ "a \tb" 1 4
[[:cntrl:]]+ a\n\tb 1 3
[[:digit:]]+ a019b 1 4
[[:graph:]]+ " a%b " 1 4
[[:lower:]]+ AabC 1 3
; This test fails with STLPort, disable for now as this is a corner case anyway...
;[[:print:]]+ "\na b\n" 1 4
[[:punct:]]+ " %-&\t" 1 4
[[:space:]]+ "a \n\t\rb" 1 5
[[:upper:]]+ aBCd 1 3
[[:xdigit:]]+ p0f3Cx 1 5
; now test flag settings:
- escape_in_lists REG_NO_POSIX_TEST
[\n] \n 0 1
- REG_NO_POSIX_TEST
[\n] \n -1 -1
[\n] \\ 0 1
[[:class:] : 0 1
[[:class:] [ 0 1
[[:class:] c 0 1
; line anchors
- match_default normal REG_EXTENDED
^ab ab 0 2
^ab xxabxx -1 -1
^ab xx\nabzz 3 5
ab$ ab 0 2
ab$ abxx -1 -1
ab$ ab\nzz 0 2
- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
^ab ab -1 -1
^ab xxabxx -1 -1
^ab xx\nabzz 3 5
ab$ ab -1 -1
ab$ abxx -1 -1
ab$ ab\nzz 0 2
; line anchors, single line mode
- match_default normal match_single_line REG_NO_POSIX_TEST
^ab ab 0 2
^ab xxabxx -1 -1
^ab xx\nabzz -1 -1
ab$ ab 0 2
ab$ abxx -1 -1
ab$ ab\nzz -1 -1
- match_default match_not_bol match_not_eol normal REG_NO_POSIX_TEST match_single_line
^ab ab -1 -1
^ab xxabxx -1 -1
^ab xx\nabzz -1 -1
ab$ ab -1 -1
ab$ abxx -1 -1
ab$ ab\nzz -1 -1
; back references
- match_default normal REG_PERL
a(b)\2c !
a(b\1)c !
a(b*)c\1d abbcbbd 0 7 1 3
a(b*)c\1d abbcbd -1 -1
a(b*)c\1d abbcbbbd -1 -1
^(.)\1 abc -1 -1
a([bc])\1d abcdabbd 4 8 5 6
; strictly speaking this is at best ambiguous, at worst wrong, this is what most
; re implimentations will match though.
a(([bc])\2)*d abbccd 0 6 3 5 3 4
a(([bc])\2)*d abbcbd -1 -1
a((b)*\2)*d abbbd 0 5 1 4 2 3
; perl only:
(ab*)[ab]*\1 ababaaa 0 4 0 2
(a)\1bcd aabcd 0 5 0 1
(a)\1bc*d aabcd 0 5 0 1
(a)\1bc*d aabd 0 4 0 1
(a)\1bc*d aabcccd 0 7 0 1
(a)\1bc*[ce]d aabcccd 0 7 0 1
^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
; posix only:
- match_default extended REG_EXTENDED
(ab*)[ab]*\1 ababaaa 0 7 0 1
;
; characters by code:
- match_default normal REG_PERL REG_STARTEND
\0101 A 0 1
\00 \0 0 1
\0 \0 0 1
\0172 z 0 1
;
; word operators:
\w a 0 1
\w z 0 1
\w A 0 1
\w Z 0 1
\w _ 0 1
\w } -1 -1
\w ` -1 -1
\w [ -1 -1
\w @ -1 -1
; non-word:
\W a -1 -1
\W z -1 -1
\W A -1 -1
\W Z -1 -1
\W _ -1 -1
\W } 0 1
\W ` 0 1
\W [ 0 1
\W @ 0 1
; word start:
\<abcd " abcd" 2 6
\<ab cab -1 -1
\<ab "\nab" 1 3
\<tag ::tag 2 5
;word end:
abc\> abc 0 3
abc\> abcd -1 -1
abc\> abc\n 0 3
abc\> abc:: 0 3
; word boundary:
\babcd " abcd" 2 6
\bab cab -1 -1
\bab "\nab" 1 3
\btag ::tag 2 5
abc\b abc 0 3
abc\b abcd -1 -1
abc\b abc\n 0 3
abc\b abc:: 0 3
; within word:
\B ab 1 1
a\Bb ab 0 2
a\B ab 0 1
a\B a -1 -1
a\B "a " -1 -1
;
; buffer operators:
\`abc abc 0 3
\`abc \nabc -1 -1
\`abc " abc" -1 -1
abc\' abc 0 3
abc\' abc\n -1 -1
abc\' "abc " -1 -1
;
; extra escape sequences:
\a \a 0 1
\f \f 0 1
\n \n 0 1
\r \r 0 1
\t \t 0 1
\v \v 0 1
;
; now follows various complex expressions designed to try and bust the matcher:
a(((b)))c abc 0 3 1 2 1 2 1 2
a(b|(c))d abd 0 3 1 2 -1 -1
a(b|(c))d acd 0 3 1 2 1 2
a(b*|c)d abbd 0 4 1 3
; just gotta have one DFA-buster, of course
a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
; and an inline expansion in case somebody gets tricky
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
; and in case somebody just slips in an NFA...
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
; one really big one
1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
; fish for problems as brackets go past 8
[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
; and as parenthesis go past 9:
(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
(a)d|(b)c abc 1 3 -1 -1 1 2
"_+((www)|(ftp)|(mailto)):_*" "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19
; subtleties of matching
a(b)?c\1d acd 0 3 -1 -1
a(b?c)+d accd 0 4 2 3
(wee|week)(knights|night) weeknights 0 10 0 3 3 10
.* abc 0 3
a(b|(c))d abd 0 3 1 2 -1 -1
a(b|(c))d acd 0 3 1 2 1 2
a(b*|c|e)d abbd 0 4 1 3
a(b*|c|e)d acd 0 3 1 2
a(b*|c|e)d ad 0 2 1 1
a(b?)c abc 0 3 1 2
a(b?)c ac 0 2 1 1
a(b+)c abc 0 3 1 2
a(b+)c abbbc 0 5 1 4
a(b*)c ac 0 2 1 1
(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
a([bc]?)c abc 0 3 1 2
a([bc]?)c ac 0 2 1 1
a([bc]+)c abc 0 3 1 2
a([bc]+)c abcc 0 4 1 3
a([bc]+)bc abcbc 0 5 1 3
a(bb+|b)b abb 0 3 1 2
a(bbb+|bb+|b)b abb 0 3 1 2
a(bbb+|bb+|b)b abbb 0 4 1 3
a(bbb+|bb+|b)bb abbb 0 4 1 2
(.*).* abcdef 0 6 0 6
(a*)* bc 0 0 0 0
xyx*xz xyxxxxyxxxz 5 11
; do we get the right subexpression when it is used more than once?
a(b|c)*d ad 0 2 -1 -1
a(b|c)*d abcd 0 4 2 3
a(b|c)+d abd 0 3 1 2
a(b|c)+d abcd 0 4 2 3
a(b|c?)+d ad 0 2 1 1
a(b|c){0,0}d ad 0 2 -1 -1
a(b|c){0,1}d ad 0 2 -1 -1
a(b|c){0,1}d abd 0 3 1 2
a(b|c){0,2}d ad 0 2 -1 -1
a(b|c){0,2}d abcd 0 4 2 3
a(b|c){0,}d ad 0 2 -1 -1
a(b|c){0,}d abcd 0 4 2 3
a(b|c){1,1}d abd 0 3 1 2
a(b|c){1,2}d abd 0 3 1 2
a(b|c){1,2}d abcd 0 4 2 3
a(b|c){1,}d abd 0 3 1 2
a(b|c){1,}d abcd 0 4 2 3
a(b|c){2,2}d acbd 0 4 2 3
a(b|c){2,2}d abcd 0 4 2 3
a(b|c){2,4}d abcd 0 4 2 3
a(b|c){2,4}d abcbd 0 5 3 4
a(b|c){2,4}d abcbcd 0 6 4 5
a(b|c){2,}d abcd 0 4 2 3
a(b|c){2,}d abcbd 0 5 3 4
; perl only:
a(b|c?)+d abcd 0 4 3 3
a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
; posix only:
- match_default extended REG_EXTENDED REG_STARTEND
a(b|c?)+d abcd 0 4 2 3
a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
- match_default normal REG_EXTENDED REG_STARTEND REG_NOSPEC literal
\**?/{} \\**?/{} 0 7
- match_default normal REG_PERL
; try to match C++ syntax elements:
; line comment:
//[^\n]* "++i //here is a line comment\n" 4 28
; block comment:
/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
; preprossor directives:
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
; perl only:
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42
; literals:
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24
; strings:
'([^\\']|\\.)*' '\\x3A' 0 6 4 5
'([^\\']|\\.)*' '\\'' 0 4 1 3
'([^\\']|\\.)*' '\\n' 0 4 1 3
; posix only:
- match_default extended escape_in_lists REG_EXTENDED REG_NO_POSIX_TEST ; we disable POSIX testing because it can't handle escapes in sets
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 28 42
; now try and test some unicode specific characters:
- match_default normal REG_PERL REG_UNICODE_ONLY
[[:unicode:]]+ a\0300\0400z 1 3
[\x10-\xff] \39135\12409 -1 -1
[\01-\05]{5} \36865\36865\36865\36865\36865 -1 -1
; finally try some case insensitive matches:
- match_default normal REG_EXTENDED REG_ICASE
; upper and lower have no meaning here so they fail, however these
; may compile with other libraries...
;[[:lower:]] !
;[[:upper:]] !
0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
; known and suspected bugs:
- match_default normal REG_EXTENDED
\( ( 0 1
\) ) 0 1
\$ $ 0 1
\^ ^ 0 1
\. . 0 1
\* * 0 1
\+ + 0 1
\? ? 0 1
\[ [ 0 1
\] ] 0 1
\| | 0 1
\\ \\ 0 1
# # 0 1
\# # 0 1
a- a- 0 2
\- - 0 1
\{ { 0 1
\} } 0 1
0 0 0 1
1 1 0 1
9 9 0 1
b b 0 1
B B 0 1
< < 0 1
> > 0 1
w w 0 1
W W 0 1
` ` 0 1
' ' 0 1
\n \n 0 1
, , 0 1
a a 0 1
f f 0 1
n n 0 1
r r 0 1
t t 0 1
v v 0 1
c c 0 1
x x 0 1
: : 0 1
(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
- match_default normal REG_EXTENDED REG_ICASE
a A 0 1
A a 0 1
[abc]+ abcABC 0 6
[ABC]+ abcABC 0 6
[a-z]+ abcABC 0 6
[A-Z]+ abzANZ 0 6
[a-Z]+ abzABZ 0 6
[A-z]+ abzABZ 0 6
[[:lower:]]+ abyzABYZ 0 8
[[:upper:]]+ abzABZ 0 6
[[:word:]]+ abcZZZ 0 6
[[:alpha:]]+ abyzABYZ 0 8
[[:alnum:]]+ 09abyzABYZ 0 10
; updated tests for version 2:
- match_default normal REG_EXTENDED
\x41 A 0 1
\xff \255 0 1
\xFF \255 0 1
- match_default normal REG_EXTENDED REG_NO_POSIX_TEST
\c@ \0 0 1
- match_default normal REG_EXTENDED
\cA \1 0 1
\cz \58 0 1
\c= !
\c? !
=: =: 0 2
; word start:
[[:<:]]abcd " abcd" 2 6
[[:<:]]ab cab -1 -1
[[:<:]]ab "\nab" 1 3
[[:<:]]tag ::tag 2 5
;word end:
abc[[:>:]] abc 0 3
abc[[:>:]] abcd -1 -1
abc[[:>:]] abc\n 0 3
abc[[:>:]] abc:: 0 3
; collating elements and rewritten set code:
- match_default normal REG_EXTENDED REG_STARTEND
[[.zero.]] 0 0 1

View File

@ -7,6 +7,13 @@ int error_count = 0;
int cpp_main(int argc, char * argv[])
{
basic_tests();
test_alt();
test_sets();
test_anchors();
test_backrefs();
test_character_escapes();
test_assertion_escapes();
test_tricky_cases();
return error_count;
}
@ -42,4 +49,5 @@ const int* make_array(int first, ...)
}
va_end(ap);
return data;
}
}

View File

@ -85,6 +85,13 @@ const int* make_array(int first, ...);
// define the test group proceedures:
//
void basic_tests();
void test_alt();
void test_sets();
void test_anchors();
void test_backrefs();
void test_character_escapes();
void test_assertion_escapes();
void test_tricky_cases();
#endif