mirror of
https://github.com/boostorg/regex.git
synced 2025-07-29 12:07:28 +02:00
Almost complete POSIX regex support now...
[SVN r22624]
This commit is contained in:
@ -59,7 +59,7 @@ lib boost_regex : ../src/$(SOURCES) <template>regex-options
|
||||
;
|
||||
|
||||
|
||||
dll boost_regex : ../src/$(SOURCES).cpp <template>regex-dll-options
|
||||
dll boost_regex : ../src/$(SOURCES) <template>regex-dll-options
|
||||
:
|
||||
common-variant-tag
|
||||
:
|
||||
|
@ -60,6 +60,7 @@
|
||||
# include <boost/throw_exception.hpp>
|
||||
# include <boost/scoped_ptr.hpp>
|
||||
# include <boost/shared_ptr.hpp>
|
||||
# include <boost/mpl/bool_fwd.hpp>
|
||||
# ifndef BOOST_NO_STD_LOCALE
|
||||
# include <locale>
|
||||
# endif
|
||||
|
@ -154,6 +154,11 @@ public:
|
||||
{
|
||||
return this->m_can_be_null;
|
||||
}
|
||||
const regex_data<charT, traits>& get_data()const
|
||||
{
|
||||
basic_regex_implementation<charT, traits> const* p = this;
|
||||
return *static_cast<const regex_data<charT, traits>*>(p);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace re_detail
|
||||
@ -470,6 +475,11 @@ public:
|
||||
assert(m_pimpl.get());
|
||||
return m_pimpl->can_be_null();
|
||||
}
|
||||
const re_detail::regex_data<charT, traits>& get_data()const
|
||||
{
|
||||
assert(m_pimpl.get());
|
||||
return m_pimpl->get_data();
|
||||
}
|
||||
|
||||
private:
|
||||
shared_ptr<re_detail::basic_regex_implementation<charT, traits> > m_pimpl;
|
||||
|
@ -28,6 +28,106 @@ namespace boost{
|
||||
|
||||
namespace re_detail{
|
||||
|
||||
template <class charT>
|
||||
struct digraph : public std::pair<charT, charT>
|
||||
{
|
||||
digraph(charT c1 = 0, charT c2 = 0) : std::pair<charT, charT>(c1, c2){}
|
||||
digraph(const std::basic_string<charT>& s) : std::pair<charT, charT>()
|
||||
{
|
||||
BOOST_ASSERT(s.size() <= 2);
|
||||
BOOST_ASSERT(s.size());
|
||||
this->first = s[0];
|
||||
this->second = (s.size() > 1) ? s[1] : 0;
|
||||
}
|
||||
};
|
||||
|
||||
template <class charT, class traits>
|
||||
class basic_char_set
|
||||
{
|
||||
public:
|
||||
typedef digraph<charT> digraph_type;
|
||||
typedef std::basic_string<charT> string_type;
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
|
||||
basic_char_set()
|
||||
{
|
||||
m_negate = false;
|
||||
m_has_digraphs = false;
|
||||
m_classes = 0;
|
||||
m_empty = true;
|
||||
}
|
||||
|
||||
void add_single(const digraph_type& s)
|
||||
{
|
||||
m_singles.push_back(s);
|
||||
if(s.second)
|
||||
m_has_digraphs = true;
|
||||
m_empty = false;
|
||||
}
|
||||
void add_range(const digraph_type& first, const digraph_type& end)
|
||||
{
|
||||
m_ranges.push_back(first);
|
||||
m_ranges.push_back(end);
|
||||
if(first.second || end.second)
|
||||
m_has_digraphs = true;
|
||||
m_empty = false;
|
||||
}
|
||||
void add_class(mask_type m)
|
||||
{
|
||||
m_classes |= m;
|
||||
m_empty = false;
|
||||
}
|
||||
void negate()
|
||||
{
|
||||
m_negate = true;
|
||||
m_empty = false;
|
||||
}
|
||||
|
||||
//
|
||||
// accessor functions:
|
||||
//
|
||||
bool has_digraphs()const
|
||||
{
|
||||
return m_has_digraphs;
|
||||
}
|
||||
bool is_negated()const
|
||||
{
|
||||
return m_negate;
|
||||
}
|
||||
typedef typename std::vector<digraph_type>::const_iterator list_iterator;
|
||||
list_iterator singles_begin()const
|
||||
{
|
||||
return m_singles.begin();
|
||||
}
|
||||
list_iterator singles_end()const
|
||||
{
|
||||
return m_singles.end();
|
||||
}
|
||||
list_iterator ranges_begin()const
|
||||
{
|
||||
return m_ranges.begin();
|
||||
}
|
||||
list_iterator ranges_end()const
|
||||
{
|
||||
return m_ranges.end();
|
||||
}
|
||||
mask_type classes()const
|
||||
{
|
||||
return m_classes;
|
||||
}
|
||||
bool empty()const
|
||||
{
|
||||
return m_empty;
|
||||
}
|
||||
private:
|
||||
std::vector<digraph_type> m_singles; // a list of single characters to match
|
||||
std::vector<digraph_type> m_ranges; // a list of end points of our ranges
|
||||
bool m_negate; // true if the set is to be negated
|
||||
bool m_has_digraphs; // true if we have digraphs present
|
||||
mask_type m_classes; // character classes to match
|
||||
bool m_empty; // whether we've added anything yet
|
||||
};
|
||||
|
||||
template <class charT, class traits>
|
||||
class basic_regex_creator
|
||||
{
|
||||
@ -54,38 +154,63 @@ public:
|
||||
m_pdata->m_flags = flags;
|
||||
m_icase = flags & regex_constants::icase;
|
||||
}
|
||||
regbase::flag_type flags()
|
||||
{
|
||||
return m_pdata->m_flags;
|
||||
}
|
||||
re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
|
||||
re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
|
||||
re_literal* append_literal(charT c);
|
||||
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
|
||||
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*);
|
||||
re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*);
|
||||
void finalize(const charT* p1, const charT* p2);
|
||||
protected:
|
||||
regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
|
||||
const traits& m_traits; // convenience reference to traits class
|
||||
re_syntax_base* m_last_state;// the last state we added
|
||||
bool m_icase; // true for case insensitive matches
|
||||
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
|
||||
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
|
||||
regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
|
||||
const traits& m_traits; // convenience reference to traits class
|
||||
re_syntax_base* m_last_state; // the last state we added
|
||||
bool m_icase; // true for case insensitive matches
|
||||
unsigned m_repeater_id; // the id of the next repeater
|
||||
unsigned m_backrefs; // bitmask of permitted backrefs
|
||||
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
|
||||
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
|
||||
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
|
||||
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
|
||||
typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character
|
||||
typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character
|
||||
private:
|
||||
basic_regex_creator& operator=(const basic_regex_creator&);
|
||||
basic_regex_creator(const basic_regex_creator&);
|
||||
|
||||
void fixup_pointers(re_syntax_base* state);
|
||||
void create_startmaps(re_syntax_base* state);
|
||||
void create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask, re_syntax_base* terminal);
|
||||
void create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask);
|
||||
unsigned get_restart_type(re_syntax_base* state);
|
||||
void set_all_masks(unsigned char* bits, unsigned char);
|
||||
bool is_bad_repeat(re_syntax_base* pt);
|
||||
void set_bad_repeat(re_syntax_base* pt);
|
||||
};
|
||||
|
||||
template <class charT, class traits>
|
||||
basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
|
||||
: m_pdata(data), m_traits(data->m_traits), m_last_state(0)
|
||||
: m_pdata(data), m_traits(data->m_traits), m_last_state(0), m_repeater_id(0), m_backrefs(0)
|
||||
{
|
||||
m_pdata->m_data.clear();
|
||||
static const charT w = 'w';
|
||||
static const charT s = 's';
|
||||
static const charT l[] = { 'l', 'o', 'w', 'e', 'r', };
|
||||
static const charT u[] = { 'u', 'p', 'p', 'e', 'r', };
|
||||
static const charT a[] = { 'a', 'l', 'p', 'h', 'a', };
|
||||
m_word_mask = m_traits.lookup_classname(&w, &w +1);
|
||||
m_mask_space = m_traits.lookup_classname(&s, &s +1);
|
||||
m_lower_mask = m_traits.lookup_classname(l, l + 5);
|
||||
m_upper_mask = m_traits.lookup_classname(u, u + 5);
|
||||
m_alpha_mask = m_traits.lookup_classname(a, a + 5);
|
||||
BOOST_ASSERT(m_word_mask);
|
||||
BOOST_ASSERT(m_mask_space);
|
||||
BOOST_ASSERT(m_lower_mask);
|
||||
BOOST_ASSERT(m_upper_mask);
|
||||
BOOST_ASSERT(m_alpha_mask);
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
@ -148,6 +273,213 @@ re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
const basic_char_set<charT, traits>& char_set)
|
||||
{
|
||||
typedef mpl::bool_<sizeof(charT) == 1> truth_type;
|
||||
return char_set.has_digraphs()
|
||||
? append_set(char_set, static_cast<mpl::false_*>(0))
|
||||
: append_set(char_set, static_cast<truth_type*>(0));
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
const basic_char_set<charT, traits>& char_set, mpl::false_*)
|
||||
{
|
||||
typedef std::basic_string<charT> string_type;
|
||||
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
|
||||
re_set_long<mask_type>* result = static_cast<re_set_long<mask_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<mask_type>)));
|
||||
//
|
||||
// fill in the basics:
|
||||
//
|
||||
result->csingles = static_cast<unsigned int>(std::distance(char_set.singles_begin(), char_set.singles_end()));
|
||||
result->cranges = static_cast<unsigned int>(std::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
|
||||
result->cequivalents = 0;
|
||||
result->cclasses = char_set.classes();
|
||||
if(flags() & regbase::icase)
|
||||
{
|
||||
// adjust classes as needed:
|
||||
if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
|
||||
result->cclasses |= m_alpha_mask;
|
||||
}
|
||||
|
||||
result->isnot = char_set.is_negated();
|
||||
result->singleton = !char_set.has_digraphs();
|
||||
//
|
||||
// remember where the state is for later:
|
||||
//
|
||||
std::ptrdiff_t offset = getoffset(result);
|
||||
//
|
||||
// now extend with all the singles:
|
||||
//
|
||||
item_iterator first, last;
|
||||
first = char_set.singles_begin();
|
||||
last = char_set.singles_end();
|
||||
while(first != last)
|
||||
{
|
||||
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
|
||||
p[0] = m_traits.translate(first->first, m_icase);
|
||||
if(first->second)
|
||||
{
|
||||
p[1] = m_traits.translate(first->second, m_icase);
|
||||
p[2] = 0;
|
||||
}
|
||||
else
|
||||
p[1] = 0;
|
||||
++first;
|
||||
}
|
||||
//
|
||||
// now extend with all the ranges:
|
||||
//
|
||||
first = char_set.ranges_begin();
|
||||
last = char_set.ranges_end();
|
||||
while(first != last)
|
||||
{
|
||||
// first grab the endpoints of the range:
|
||||
digraph<charT> c1 = *first;
|
||||
c1.first = this->m_traits.translate(c1.first, this->m_icase);
|
||||
c1.second = this->m_traits.translate(c1.second, this->m_icase);
|
||||
++first;
|
||||
digraph<charT> c2 = *first;
|
||||
c2.first = this->m_traits.translate(c2.first, this->m_icase);
|
||||
c2.second = this->m_traits.translate(c2.second, this->m_icase);
|
||||
++first;
|
||||
string_type s1, s2;
|
||||
// different actions now depending upon whether collation is turned on:
|
||||
if(flags() & regex_constants::collate)
|
||||
{
|
||||
// we need to transform our range into sort keys:
|
||||
s1 = this->m_traits.transform(&c1.first, (c1.second ? &c1.second +1 : &c1.second));
|
||||
s2 = this->m_traits.transform(&c2.first, (c2.second ? &c2.second +1 : &c2.second));
|
||||
}
|
||||
else
|
||||
{
|
||||
if(c1.second)
|
||||
s1 = string_type(&c1.first, &c1.second+1);
|
||||
else
|
||||
s1 = string_type(1, c1.first);
|
||||
if(c2.second)
|
||||
s2 = string_type(&c2.first, &c2.second+1);
|
||||
else
|
||||
s2 = string_type(1, c2.first);
|
||||
}
|
||||
if(s1 > s2)
|
||||
{
|
||||
// Oops error:
|
||||
return 0;
|
||||
}
|
||||
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
|
||||
std::memcpy(p, s1.c_str(), sizeof(charT) * (s1.size() + 1));
|
||||
p += s1.size() + 1;
|
||||
std::memcpy(p, s2.c_str(), sizeof(charT) * (s2.size() + 1));
|
||||
}
|
||||
//
|
||||
// finally reset the address of our last state:
|
||||
//
|
||||
m_last_state = result = static_cast<re_set_long<mask_type>*>(getaddress(offset));
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
const basic_char_set<charT, traits>& char_set, mpl::true_*)
|
||||
{
|
||||
typedef std::basic_string<charT> string_type;
|
||||
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
|
||||
|
||||
re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
|
||||
bool negate = char_set.is_negated();
|
||||
std::memset(result->_map, 0, sizeof(result->_map));
|
||||
//
|
||||
// handle singles first:
|
||||
//
|
||||
item_iterator first, last;
|
||||
first = char_set.singles_begin();
|
||||
last = char_set.singles_end();
|
||||
while(first != last)
|
||||
{
|
||||
for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
|
||||
{
|
||||
if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
|
||||
== this->m_traits.translate(first->first, this->m_icase))
|
||||
result->_map[i] = true;
|
||||
}
|
||||
++first;
|
||||
}
|
||||
//
|
||||
// OK now handle ranges:
|
||||
//
|
||||
first = char_set.ranges_begin();
|
||||
last = char_set.ranges_end();
|
||||
while(first != last)
|
||||
{
|
||||
// first grab the endpoints of the range:
|
||||
charT c1 = this->m_traits.translate(first->first, this->m_icase);
|
||||
++first;
|
||||
charT c2 = this->m_traits.translate(first->first, this->m_icase);
|
||||
++first;
|
||||
// different actions now depending upon whether collation is turned on:
|
||||
if(flags() & regex_constants::collate)
|
||||
{
|
||||
// we need to transform our range into sort keys:
|
||||
string_type s1 = this->m_traits.transform(&c1, &c1 +1);
|
||||
string_type s2 = this->m_traits.transform(&c2, &c2 +1);
|
||||
if(s1 > s2)
|
||||
{
|
||||
// Oops error:
|
||||
return 0;
|
||||
}
|
||||
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
charT c3 = static_cast<charT>(i);
|
||||
string_type s3 = this->m_traits.transform(&c3, &c3 +1);
|
||||
if((s1 <= s3) && (s3 <= s2))
|
||||
result->_map[i] = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(c1 > c2)
|
||||
{
|
||||
// Oops error:
|
||||
return 0;
|
||||
}
|
||||
// everything in range matches:
|
||||
std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
|
||||
}
|
||||
}
|
||||
//
|
||||
// and now the classes:
|
||||
//
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
mask_type m = char_set.classes();
|
||||
if(flags() & regbase::icase)
|
||||
{
|
||||
// adjust m as needed:
|
||||
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
|
||||
m |= m_alpha_mask;
|
||||
}
|
||||
if(m != 0)
|
||||
{
|
||||
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
if(this->m_traits.is_class(static_cast<charT>(i), m))
|
||||
result->_map[i] = true;
|
||||
}
|
||||
}
|
||||
if(negate)
|
||||
{
|
||||
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
result->_map[i] = !(result->_map[i]);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
|
||||
{
|
||||
@ -174,7 +506,8 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
|
||||
std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
|
||||
m_pdata->m_can_be_null = 0;
|
||||
|
||||
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all, 0);
|
||||
m_bad_repeats = 0;
|
||||
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
|
||||
// get the restart type:
|
||||
m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
|
||||
}
|
||||
@ -186,12 +519,15 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
|
||||
{
|
||||
switch(state->type)
|
||||
{
|
||||
case syntax_element_alt:
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
case syntax_element_char_rep:
|
||||
case syntax_element_short_set_rep:
|
||||
case syntax_element_long_set_rep:
|
||||
// set the id of this repeat:
|
||||
static_cast<re_repeat*>(state)->id = m_repeater_id++;
|
||||
// fall through:
|
||||
case syntax_element_alt:
|
||||
std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
|
||||
static_cast<re_alt*>(state)->can_be_null = 0;
|
||||
// fall through:
|
||||
@ -227,8 +563,10 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
// create other startmaps *first*, since we can use the
|
||||
// results from these when creating out own:
|
||||
create_startmaps(state->next.p);
|
||||
create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take, state);
|
||||
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip, state);
|
||||
m_bad_repeats = 0;
|
||||
create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
|
||||
m_bad_repeats = 0;
|
||||
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
|
||||
return;
|
||||
default:
|
||||
state = state->next.p;
|
||||
@ -237,9 +575,10 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask, re_syntax_base* terminal)
|
||||
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask)
|
||||
{
|
||||
while(state && (state != terminal))
|
||||
int not_last_jump = 1;
|
||||
while(state)
|
||||
{
|
||||
switch(state->type)
|
||||
{
|
||||
@ -270,34 +609,20 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
}
|
||||
// now figure out if we can match a NULL string at this point:
|
||||
if(pnull)
|
||||
create_startmap(state->next.p, 0, pnull, mask, terminal);
|
||||
create_startmap(state->next.p, 0, pnull, mask);
|
||||
return;
|
||||
}
|
||||
case syntax_element_backref:
|
||||
case syntax_element_wild:
|
||||
{
|
||||
// can't be null, any character can match:
|
||||
if(map)
|
||||
{
|
||||
map[0] |= mask_init;
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
map[i] |= mask;
|
||||
}
|
||||
}
|
||||
set_all_masks(map, mask);
|
||||
return;
|
||||
}
|
||||
case syntax_element_match:
|
||||
{
|
||||
// must be null, any character can match:
|
||||
if(map)
|
||||
{
|
||||
map[0] |= mask_init;
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
map[i] |= mask;
|
||||
}
|
||||
}
|
||||
set_all_masks(map, mask);
|
||||
if(pnull)
|
||||
*pnull |= mask;
|
||||
return;
|
||||
@ -305,7 +630,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
case syntax_element_word_start:
|
||||
{
|
||||
// recurse, then AND with all the word characters:
|
||||
create_startmap(state->next.p, map, pnull, mask, terminal);
|
||||
create_startmap(state->next.p, map, pnull, mask);
|
||||
if(map)
|
||||
{
|
||||
map[0] |= mask_init;
|
||||
@ -320,7 +645,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
case syntax_element_word_end:
|
||||
{
|
||||
// recurse, then AND with all the word characters:
|
||||
create_startmap(state->next.p, map, pnull, mask, terminal);
|
||||
create_startmap(state->next.p, map, pnull, mask);
|
||||
if(map)
|
||||
{
|
||||
map[0] |= mask_init;
|
||||
@ -340,13 +665,35 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
return;
|
||||
}
|
||||
case syntax_element_long_set:
|
||||
assert(0);
|
||||
if(map)
|
||||
{
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
map[0] |= mask_init;
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
charT c = static_cast<charT>(i);
|
||||
if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata))
|
||||
map[i] |= mask;
|
||||
}
|
||||
}
|
||||
return;
|
||||
case syntax_element_set:
|
||||
assert(0);
|
||||
if(map)
|
||||
{
|
||||
map[0] |= mask_init;
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
if(static_cast<re_set*>(state)->_map[
|
||||
static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), this->m_icase))])
|
||||
map[i] |= mask;
|
||||
}
|
||||
}
|
||||
return;
|
||||
case syntax_element_jump:
|
||||
// take the jump:
|
||||
state = static_cast<re_alt*>(state)->alt.p;
|
||||
break;;
|
||||
not_last_jump = -1;
|
||||
break;
|
||||
case syntax_element_alt:
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
@ -360,6 +707,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
if(map)
|
||||
{
|
||||
// copy previous results:
|
||||
map[0] |= mask_init;
|
||||
for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
|
||||
{
|
||||
if(rep->_map[i] & mask_any)
|
||||
@ -376,8 +724,17 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
{
|
||||
// we haven't created a startmap for this alternative yet
|
||||
// so take the union of the two options:
|
||||
create_startmap(state->next.p, map, pnull, mask, state);
|
||||
create_startmap(rep->alt.p, map, pnull, mask, state);
|
||||
if(is_bad_repeat(state))
|
||||
{
|
||||
set_all_masks(map, mask);
|
||||
return;
|
||||
}
|
||||
set_bad_repeat(state);
|
||||
create_startmap(state->next.p, map, pnull, mask);
|
||||
if((state->type == syntax_element_alt)
|
||||
|| (static_cast<re_repeat*>(state)->min == 0)
|
||||
|| (not_last_jump == 0))
|
||||
create_startmap(rep->alt.p, map, pnull, mask);
|
||||
}
|
||||
}
|
||||
return;
|
||||
@ -395,6 +752,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
default:
|
||||
state = state->next.p;
|
||||
}
|
||||
++not_last_jump;
|
||||
}
|
||||
}
|
||||
|
||||
@ -416,8 +774,9 @@ unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* st
|
||||
return regbase::restart_line;
|
||||
case syntax_element_word_boundary:
|
||||
case syntax_element_word_start:
|
||||
return regbase::restart_line;
|
||||
return regbase::restart_word;
|
||||
case syntax_element_buffer_start:
|
||||
case syntax_element_restart_continue:
|
||||
return regbase::restart_continue;
|
||||
default:
|
||||
state = 0;
|
||||
@ -427,6 +786,68 @@ unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* st
|
||||
return regbase::restart_any;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
|
||||
{
|
||||
//
|
||||
// set mask in all of bits elements,
|
||||
// if bits[0] has mask_init not set then we can
|
||||
// optimise this to a call to memset:
|
||||
//
|
||||
if(bits)
|
||||
{
|
||||
if(bits[0] == 0)
|
||||
(std::memset)(bits, mask, 1u << CHAR_BIT);
|
||||
else
|
||||
{
|
||||
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
bits[i] |= mask;
|
||||
}
|
||||
bits[0] |= mask_init;
|
||||
}
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
|
||||
{
|
||||
switch(pt->type)
|
||||
{
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
case syntax_element_char_rep:
|
||||
case syntax_element_short_set_rep:
|
||||
case syntax_element_long_set_rep:
|
||||
{
|
||||
unsigned id = static_cast<re_repeat*>(pt)->id;
|
||||
if(id > sizeof(m_bad_repeats) * CHAR_BIT)
|
||||
return true; // run out of bits, assume we can't traverse this one.
|
||||
return m_bad_repeats & (1u << id);
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
|
||||
{
|
||||
switch(pt->type)
|
||||
{
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
case syntax_element_char_rep:
|
||||
case syntax_element_short_set_rep:
|
||||
case syntax_element_long_set_rep:
|
||||
{
|
||||
unsigned id = static_cast<re_repeat*>(pt)->id;
|
||||
if(id <= sizeof(m_bad_repeats) * CHAR_BIT)
|
||||
m_bad_repeats |= (1u << id);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re_detail
|
||||
|
||||
} // namespace boost
|
||||
|
@ -45,6 +45,12 @@ public:
|
||||
bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
|
||||
bool parse_repeat_range(bool isbasic);
|
||||
bool parse_alt();
|
||||
bool parse_set();
|
||||
bool parse_backref();
|
||||
void parse_set_literal(basic_char_set<charT, traits>& char_set);
|
||||
bool parse_inner_set(basic_char_set<charT, traits>& char_set);
|
||||
digraph<charT> get_next_set_literal();
|
||||
charT unescape_character();
|
||||
|
||||
private:
|
||||
typedef bool (basic_regex_parser::*parser_proc_type)();
|
||||
@ -54,7 +60,6 @@ private:
|
||||
const charT* m_position; // our current parser position
|
||||
unsigned m_mark_count; // how many sub-expressions we have
|
||||
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
|
||||
unsigned m_repeater_id; // the id of the next repeater
|
||||
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
|
||||
|
||||
basic_regex_parser& operator=(const basic_regex_parser&);
|
||||
@ -63,7 +68,7 @@ private:
|
||||
|
||||
template <class charT, class traits>
|
||||
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
|
||||
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0), m_alt_insert_point(0)
|
||||
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -151,6 +156,8 @@ bool basic_regex_parser<charT, traits>::parse_basic()
|
||||
++m_position;
|
||||
return parse_repeat();
|
||||
}
|
||||
case regex_constants::syntax_open_set:
|
||||
return parse_set();
|
||||
default:
|
||||
return parse_literal();
|
||||
}
|
||||
@ -160,7 +167,7 @@ bool basic_regex_parser<charT, traits>::parse_basic()
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_extended()
|
||||
{
|
||||
bool result;
|
||||
bool result = true;
|
||||
switch(this->m_traits.syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::syntax_open_mark:
|
||||
@ -205,6 +212,8 @@ bool basic_regex_parser<charT, traits>::parse_extended()
|
||||
break;
|
||||
case regex_constants::syntax_or:
|
||||
return parse_alt();
|
||||
case regex_constants::syntax_open_set:
|
||||
return parse_set();
|
||||
default:
|
||||
result = parse_literal();
|
||||
break;
|
||||
@ -260,6 +269,11 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
// restore the alternate insertion point:
|
||||
//
|
||||
this->m_alt_insert_point = last_alt_point;
|
||||
//
|
||||
// allow backrefs to this mark:
|
||||
//
|
||||
if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
|
||||
this->m_backrefs |= 1u << (markid - 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -276,7 +290,7 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
|
||||
case regex_constants::syntax_close_mark:
|
||||
return false;
|
||||
case regex_constants::syntax_plus:
|
||||
if(this->m_pdata->m_flags & regex_constants::bk_plus_qm)
|
||||
if(this->flags() & regex_constants::bk_plus_qm)
|
||||
{
|
||||
++m_position;
|
||||
return parse_repeat(1);
|
||||
@ -284,7 +298,7 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
|
||||
else
|
||||
return parse_literal();
|
||||
case regex_constants::syntax_question:
|
||||
if(this->m_pdata->m_flags & regex_constants::bk_plus_qm)
|
||||
if(this->flags() & regex_constants::bk_plus_qm)
|
||||
{
|
||||
++m_position;
|
||||
return parse_repeat(0, 1);
|
||||
@ -292,22 +306,24 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
|
||||
else
|
||||
return parse_literal();
|
||||
case regex_constants::syntax_open_brace:
|
||||
if(this->m_pdata->m_flags & regbase::no_intervals)
|
||||
if(this->flags() & regbase::no_intervals)
|
||||
return parse_literal();
|
||||
++m_position;
|
||||
return parse_repeat_range(true);
|
||||
case regex_constants::syntax_close_brace:
|
||||
if(this->m_pdata->m_flags & regbase::no_intervals)
|
||||
if(this->flags() & regbase::no_intervals)
|
||||
return parse_literal();
|
||||
fail(REG_EBRACE, this->m_position - this->m_base);
|
||||
result = false;
|
||||
break;
|
||||
case regex_constants::syntax_or:
|
||||
if(this->m_pdata->m_flags & regbase::bk_vbar)
|
||||
if(this->flags() & regbase::bk_vbar)
|
||||
return parse_alt();
|
||||
else
|
||||
result = parse_literal();
|
||||
break;
|
||||
case regex_constants::syntax_digit:
|
||||
return parse_backref();
|
||||
default:
|
||||
result = parse_literal();
|
||||
break;
|
||||
@ -319,8 +335,35 @@ template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_extended_escape()
|
||||
{
|
||||
++m_position;
|
||||
bool negate = false; // in case this is a character class escape: \w \d etc
|
||||
switch(this->m_traits.escape_syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::escape_type_not_class:
|
||||
negate = true;
|
||||
// fall through:
|
||||
case regex_constants::escape_type_class:
|
||||
{
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
mask_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
||||
if(m != 0)
|
||||
{
|
||||
basic_char_set<charT, traits> char_set;
|
||||
if(negate)
|
||||
char_set.negate();
|
||||
char_set.add_class(m);
|
||||
if(0 == this->append_set(char_set))
|
||||
fail(REG_ERANGE, m_position - m_base);
|
||||
++m_position;
|
||||
return true;
|
||||
}
|
||||
//
|
||||
// not a class, just a regular unknown escape:
|
||||
//
|
||||
this->append_literal(unescape_character());
|
||||
break;
|
||||
}
|
||||
case regex_constants::syntax_digit:
|
||||
return parse_backref();
|
||||
case regex_constants::escape_type_left_word:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_word_start);
|
||||
@ -329,8 +372,29 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape()
|
||||
++m_position;
|
||||
this->append_state(syntax_element_word_end);
|
||||
break;
|
||||
case regex_constants::escape_type_start_buffer:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_buffer_start);
|
||||
break;
|
||||
case regex_constants::escape_type_end_buffer:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_buffer_end);
|
||||
break;
|
||||
case regex_constants::escape_type_word_assert:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_word_boundary);
|
||||
break;
|
||||
case regex_constants::escape_type_not_word_assert:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_within_word);
|
||||
break;
|
||||
case regex_constants::escape_type_Z:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_soft_buffer_end);
|
||||
break;
|
||||
default:
|
||||
return parse_literal();
|
||||
this->append_literal(unescape_character());
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -355,7 +419,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
|
||||
// when we get to here we may have a non-greedy ? mark still to come:
|
||||
//
|
||||
if((m_position != m_end)
|
||||
&& (0 == (this->m_pdata->m_flags & (regbase::main_option_type | regbase::no_perl_ex))))
|
||||
&& (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))))
|
||||
{
|
||||
// OK we have a perl regex, check for a '?':
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
|
||||
@ -417,7 +481,6 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
|
||||
rep->max = high;
|
||||
rep->greedy = greedy;
|
||||
rep->leading = false;
|
||||
rep->id = m_repeater_id++;
|
||||
// store our repeater position for later:
|
||||
std::ptrdiff_t rep_off = this->getoffset(rep);
|
||||
// and append a back jump to the repeat:
|
||||
@ -535,7 +598,7 @@ bool basic_regex_parser<charT, traits>::parse_alt()
|
||||
//
|
||||
// if we didn't actually add any trailing states then that's an error:
|
||||
//
|
||||
if(this->m_alt_insert_point == this->m_pdata->m_data.size())
|
||||
if(this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
|
||||
fail(REG_EMPTY, this->m_position - this->m_base);
|
||||
//
|
||||
// fix up the jump we added to point to the end of the states
|
||||
@ -548,6 +611,311 @@ bool basic_regex_parser<charT, traits>::parse_alt()
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_set()
|
||||
{
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
basic_char_set<charT, traits> char_set;
|
||||
|
||||
const charT* base = m_position; // where the '[' was
|
||||
const charT* item_base = m_position; // where the '[' or '^' was
|
||||
|
||||
while(m_position != m_end)
|
||||
{
|
||||
switch(this->m_traits.syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::syntax_caret:
|
||||
if(m_position == base)
|
||||
{
|
||||
char_set.negate();
|
||||
++m_position;
|
||||
item_base = m_position;
|
||||
}
|
||||
else
|
||||
parse_set_literal(char_set);
|
||||
break;
|
||||
case regex_constants::syntax_close_set:
|
||||
if(m_position == item_base)
|
||||
{
|
||||
parse_set_literal(char_set);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
++m_position;
|
||||
if(0 == this->append_set(char_set))
|
||||
fail(REG_ERANGE, m_position - m_base);
|
||||
}
|
||||
return true;
|
||||
case regex_constants::syntax_open_set:
|
||||
if(parse_inner_set(char_set))
|
||||
break;
|
||||
return true;
|
||||
default:
|
||||
parse_set_literal(char_set);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return m_position != m_end;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
|
||||
{
|
||||
//
|
||||
// we have either a character class [:name:]
|
||||
// a collating element [.name.]
|
||||
// or an equivalence class [=name=]
|
||||
//
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
switch(this->m_traits.syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::syntax_colon:
|
||||
{
|
||||
// check that character classes are actually enabled:
|
||||
if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
|
||||
== (regbase::basic_syntax_group | regbase::no_char_classes))
|
||||
{
|
||||
--m_position;
|
||||
parse_set_literal(char_set);
|
||||
return true;
|
||||
}
|
||||
// skip the ':'
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
const charT* name_first = m_position;
|
||||
// skip at least one character, then find the matching ':]'
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
while((m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
|
||||
++m_position;
|
||||
const charT* name_last = m_position;
|
||||
if(m_end == m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
if((m_end == ++m_position)
|
||||
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
mask_type m = this->m_traits.lookup_classname(name_first, name_last);
|
||||
if(0 == m)
|
||||
{
|
||||
if(char_set.empty() && (name_last - name_first == 1))
|
||||
{
|
||||
// maybe a special case:
|
||||
++m_position;
|
||||
if( (m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position)
|
||||
== regex_constants::syntax_close_set))
|
||||
{
|
||||
if(this->m_traits.escape_syntax_type(*name_first)
|
||||
== regex_constants::escape_type_left_word)
|
||||
{
|
||||
++m_position;
|
||||
this->append_state(syntax_element_word_start);
|
||||
return false;
|
||||
}
|
||||
if(this->m_traits.escape_syntax_type(*name_first)
|
||||
== regex_constants::escape_type_right_word)
|
||||
{
|
||||
++m_position;
|
||||
this->append_state(syntax_element_word_end);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
fail(REG_ECTYPE, name_first - m_base);
|
||||
}
|
||||
char_set.add_class(m);
|
||||
++m_position;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
--m_position;
|
||||
parse_set_literal(char_set);
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
|
||||
{
|
||||
digraph<charT> start_range = get_next_set_literal();
|
||||
if(m_end == m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
|
||||
{
|
||||
// we have a range:
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
|
||||
{
|
||||
digraph<charT> end_range = get_next_set_literal();
|
||||
char_set.add_range(start_range, end_range);
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
|
||||
fail(REG_ERANGE, m_position - m_base);
|
||||
return;
|
||||
}
|
||||
--m_position;
|
||||
}
|
||||
char_set.add_single(start_range);
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal()
|
||||
{
|
||||
digraph<charT> result;
|
||||
switch(this->m_traits.syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::syntax_escape:
|
||||
// check to see if escapes are supported first:
|
||||
if(this->flags() & regex_constants::no_escape_in_lists)
|
||||
{
|
||||
result = *m_position++;
|
||||
break;
|
||||
}
|
||||
++m_position;
|
||||
result = unescape_character();
|
||||
break;
|
||||
default:
|
||||
result = *m_position++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
charT basic_regex_parser<charT, traits>::unescape_character()
|
||||
{
|
||||
charT result(0);
|
||||
if(m_position == m_end)
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
switch(this->m_traits.syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::escape_type_control_a:
|
||||
result = charT('\a');
|
||||
break;
|
||||
case regex_constants::escape_type_e:
|
||||
result = charT(27);
|
||||
break;
|
||||
case regex_constants::escape_type_control_f:
|
||||
result = charT('\f');
|
||||
break;
|
||||
case regex_constants::escape_type_control_n:
|
||||
result = charT('\n');
|
||||
break;
|
||||
case regex_constants::escape_type_control_r:
|
||||
result = charT('\r');
|
||||
break;
|
||||
case regex_constants::escape_type_control_t:
|
||||
result = charT('\t');
|
||||
break;
|
||||
case regex_constants::escape_type_control_v:
|
||||
result = charT('\v');
|
||||
break;
|
||||
case regex_constants::escape_type_word_assert:
|
||||
result = charT('\b');
|
||||
break;
|
||||
case regex_constants::escape_type_ascii_control:
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
return result;
|
||||
}
|
||||
if((*m_position < charT('@'))
|
||||
|| (*m_position > charT(125)) )
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
return result;
|
||||
}
|
||||
result = static_cast<charT>(*m_position - charT('@'));
|
||||
break;
|
||||
case regex_constants::escape_type_hex:
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
break;
|
||||
}
|
||||
// maybe have \x{ddd}
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
|
||||
{
|
||||
++m_position;
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
break;
|
||||
}
|
||||
int i = this->m_traits.toi(m_position, m_end, 16);
|
||||
if((m_position == m_end)
|
||||
|| (i < 0)
|
||||
|| (i > (std::numeric_limits<charT>::max)())
|
||||
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
|
||||
{
|
||||
fail(REG_BADBR, m_position - m_base);
|
||||
}
|
||||
++m_position;
|
||||
result = charT(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position);
|
||||
int i = this->m_traits.toi(m_position, m_position + len, 16);
|
||||
if((i < 0)
|
||||
|| (i >> (sizeof(charT) * CHAR_BIT)))
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
}
|
||||
result = charT(i);
|
||||
}
|
||||
return result;
|
||||
case regex_constants::syntax_digit:
|
||||
{
|
||||
// an octal escape sequence, the first character must be a zero
|
||||
// followed by up to 3 octal digits:
|
||||
std::ptrdiff_t len = (std::min)(std::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
|
||||
int val = this->m_traits.toi(m_position, m_position + len, 8);
|
||||
if(val < 0)
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
return static_cast<charT>(val);
|
||||
}
|
||||
default:
|
||||
result = *m_position;
|
||||
break;
|
||||
}
|
||||
++m_position;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_backref()
|
||||
{
|
||||
if(m_position == m_end)
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_end);
|
||||
}
|
||||
int i = this->m_traits.toi(m_position, m_position + 1, 10);
|
||||
if((i > 0) && (this->m_backrefs & (1u << (i-1))))
|
||||
{
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
||||
pb->index = i;
|
||||
}
|
||||
else if(i == 0)
|
||||
{
|
||||
// not a backref at all but an octal escape sequence:
|
||||
--m_position;
|
||||
charT c = unescape_character();
|
||||
this->append_literal(c);
|
||||
}
|
||||
else
|
||||
fail(REG_ESUBREG, m_position - m_end);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace re_detail
|
||||
} // namespace boost
|
||||
|
||||
|
@ -304,6 +304,27 @@ template <class charT>
|
||||
class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT>
|
||||
{
|
||||
public:
|
||||
typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 16);
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 17);
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 18);
|
||||
BOOST_STATIC_CONSTANT(char_class_type,
|
||||
mask_base =
|
||||
std::ctype<charT>::alnum
|
||||
| std::ctype<charT>::alpha
|
||||
| std::ctype<charT>::cntrl
|
||||
| std::ctype<charT>::digit
|
||||
| std::ctype<charT>::graph
|
||||
| std::ctype<charT>::lower
|
||||
| std::ctype<charT>::print
|
||||
| std::ctype<charT>::punct
|
||||
| std::ctype<charT>::space
|
||||
| std::ctype<charT>::upper
|
||||
| std::ctype<charT>::xdigit);
|
||||
|
||||
//BOOST_STATIC_ASSERT(0 == (mask_base & (mask_word | mask_unicode)));
|
||||
|
||||
|
||||
typedef std::basic_string<charT> string_type;
|
||||
//cpp_regex_traits_implementation();
|
||||
cpp_regex_traits_implementation(const std::locale& l);
|
||||
@ -316,10 +337,25 @@ public:
|
||||
}
|
||||
return get_default_error_string(n);
|
||||
}
|
||||
char_class_type lookup_classname(const charT* p1, const charT* p2) const
|
||||
{
|
||||
char_class_type result = lookup_classname_imp(p1, p2);
|
||||
if(result == 0)
|
||||
{
|
||||
string_type s(p1, p2);
|
||||
this->m_pctype->tolower(&*s.begin(), &*s.end());
|
||||
result = lookup_classname_imp(&*s.begin(), &*s.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
re_detail::parser_buf<charT> m_sbuf; // buffer for parsing numbers.
|
||||
std::basic_istream<charT> m_is; // stream for parsing numbers.
|
||||
private:
|
||||
std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID
|
||||
//
|
||||
// helpers:
|
||||
//
|
||||
char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
|
||||
};
|
||||
|
||||
template <class charT>
|
||||
@ -349,7 +385,7 @@ cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const st
|
||||
//
|
||||
if((int)cat >= 0)
|
||||
{
|
||||
for(int i = 0; i <= boost::regex_constants::error_unknown; ++i)
|
||||
for(boost::regex_constants::error_type i = 0; i <= boost::regex_constants::error_unknown; ++i)
|
||||
{
|
||||
const char* p = get_default_error_string(i);
|
||||
string_type default_message;
|
||||
@ -369,6 +405,39 @@ cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const st
|
||||
}
|
||||
}
|
||||
|
||||
template <class charT>
|
||||
typename cpp_regex_traits_implementation<charT>::char_class_type
|
||||
cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
|
||||
{
|
||||
static const char_class_type masks[] =
|
||||
{
|
||||
0,
|
||||
std::ctype<char>::alnum,
|
||||
std::ctype<char>::alpha,
|
||||
cpp_regex_traits_implementation<charT>::mask_blank,
|
||||
std::ctype<char>::cntrl,
|
||||
std::ctype<char>::digit,
|
||||
std::ctype<char>::digit,
|
||||
std::ctype<char>::graph,
|
||||
std::ctype<char>::lower,
|
||||
std::ctype<char>::lower,
|
||||
std::ctype<char>::print,
|
||||
std::ctype<char>::punct,
|
||||
std::ctype<char>::space,
|
||||
std::ctype<char>::space,
|
||||
std::ctype<char>::upper,
|
||||
cpp_regex_traits_implementation<charT>::mask_unicode,
|
||||
std::ctype<char>::upper,
|
||||
std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
|
||||
std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
|
||||
std::ctype<char>::xdigit,
|
||||
};
|
||||
std::size_t id = 1 + re_detail::get_default_class_id(p1, p2);
|
||||
assert(id < sizeof(masks) / sizeof(masks[0]));
|
||||
return masks[id];
|
||||
}
|
||||
|
||||
|
||||
template <class charT>
|
||||
boost::shared_ptr<cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT))
|
||||
{
|
||||
@ -376,6 +445,15 @@ boost::shared_ptr<cpp_regex_traits_implementation<charT> > create_cpp_regex_trai
|
||||
return boost::shared_ptr<cpp_regex_traits_implementation<charT> >(new cpp_regex_traits_implementation<charT>(l));
|
||||
}
|
||||
|
||||
//
|
||||
// helpers to suppress warnings:
|
||||
//
|
||||
template <class charT>
|
||||
inline bool is_extended(charT c)
|
||||
{ return c > 256; }
|
||||
inline bool is_extended(char)
|
||||
{ return false; }
|
||||
|
||||
} // re_detail
|
||||
|
||||
template <class charT>
|
||||
@ -390,25 +468,6 @@ public:
|
||||
typedef std::locale locale_type;
|
||||
typedef boost::uint_least32_t char_class_type;
|
||||
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 16);
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 17);
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 18);
|
||||
BOOST_STATIC_CONSTANT(char_class_type,
|
||||
mask_base =
|
||||
std::ctype<char>::alnum
|
||||
| std::ctype<char>::alpha
|
||||
| std::ctype<char>::cntrl
|
||||
| std::ctype<char>::digit
|
||||
| std::ctype<char>::graph
|
||||
| std::ctype<char>::lower
|
||||
| std::ctype<char>::print
|
||||
| std::ctype<char>::punct
|
||||
| std::ctype<char>::space
|
||||
| std::ctype<char>::upper
|
||||
| std::ctype<char>::xdigit);
|
||||
|
||||
//BOOST_STATIC_ASSERT(0 == (mask_base & (mask_word | mask_unicode)));
|
||||
|
||||
cpp_regex_traits()
|
||||
: m_pimpl(re_detail::create_cpp_regex_traits<charT>(std::locale()))
|
||||
{ }
|
||||
@ -438,33 +497,7 @@ public:
|
||||
}
|
||||
char_class_type lookup_classname(const charT* p1, const charT* p2) const
|
||||
{
|
||||
static const char_class_type masks[] =
|
||||
{
|
||||
0,
|
||||
std::ctype<char>::alnum,
|
||||
std::ctype<char>::alpha,
|
||||
cpp_regex_traits<charT>::mask_blank,
|
||||
std::ctype<char>::cntrl,
|
||||
std::ctype<char>::digit,
|
||||
std::ctype<char>::digit,
|
||||
std::ctype<char>::graph,
|
||||
std::ctype<char>::lower,
|
||||
std::ctype<char>::lower,
|
||||
std::ctype<char>::print,
|
||||
std::ctype<char>::punct,
|
||||
std::ctype<char>::space,
|
||||
std::ctype<char>::space,
|
||||
cpp_regex_traits<charT>::mask_unicode,
|
||||
std::ctype<char>::upper,
|
||||
std::ctype<char>::upper,
|
||||
std::ctype<char>::alnum | cpp_regex_traits<charT>::mask_word,
|
||||
std::ctype<char>::alnum | cpp_regex_traits<charT>::mask_word,
|
||||
std::ctype<char>::xdigit,
|
||||
};
|
||||
int id = re_detail::get_default_class_id(p1, p2);
|
||||
assert(id >= -1);
|
||||
assert(id < sizeof(masks) / sizeof(masks[0]));
|
||||
return masks[1 + id];
|
||||
return m_pimpl->lookup_classname(p1, p2);
|
||||
}
|
||||
string_type lookup_collatename(const charT* p1, const charT* p2) const
|
||||
{
|
||||
@ -472,16 +505,17 @@ public:
|
||||
}
|
||||
bool is_class(charT c, char_class_type f) const
|
||||
{
|
||||
if((f & cpp_regex_traits<charT>::mask_base)
|
||||
typedef typename std::ctype<charT>::mask ctype_mask;
|
||||
if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_base)
|
||||
&& (m_pimpl->m_pctype->is(
|
||||
static_cast<std::ctype<charT>::mask>(f & cpp_regex_traits<charT>::mask_base), c)))
|
||||
static_cast<ctype_mask>(f & re_detail::cpp_regex_traits_implementation<charT>::mask_base), c)))
|
||||
return true;
|
||||
else if((f & cpp_regex_traits<charT>::mask_unicode) && (c >= 256))
|
||||
else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_unicode) && re_detail::is_extended(c))
|
||||
return true;
|
||||
else if((f & cpp_regex_traits<charT>::mask_word) && (c == '_'))
|
||||
else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_'))
|
||||
return true;
|
||||
else if((f & cpp_regex_traits<charT>::mask_blank)
|
||||
&& m_pimpl->m_pctype->is(static_cast<std::ctype<charT>::mask>(f & cpp_regex_traits<charT>::mask_base), c)
|
||||
else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_blank)
|
||||
&& m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
|
||||
&& !re_detail::is_separator(c))
|
||||
return true;
|
||||
return false;
|
||||
@ -515,6 +549,7 @@ private:
|
||||
// catalog name handler:
|
||||
//
|
||||
static std::string& get_catalog_name_inst();
|
||||
|
||||
#ifdef BOOST_HAS_THREADS
|
||||
static static_mutex& get_mutex_inst();
|
||||
#endif
|
||||
|
@ -23,12 +23,15 @@
|
||||
#ifndef BOOST_REGEX_ERROR_TYPE_HPP
|
||||
#define BOOST_REGEX_ERROR_TYPE_HPP
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace boost{
|
||||
#endif
|
||||
|
||||
//
|
||||
// start with the POSIX API versions of these:
|
||||
//
|
||||
typedef unsigned reg_error_t;
|
||||
typedef reg_error_t reg_errcode_t; // backwards compatibility
|
||||
|
||||
static const reg_error_t REG_NOERROR = 0; /* Success. */
|
||||
static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */
|
||||
@ -57,6 +60,7 @@ static const reg_error_t REG_ESTACK = 19; /* out of stack space */
|
||||
static const reg_error_t REG_E_UNKNOWN = 20; /* unknown error */
|
||||
static const reg_error_t REG_ENOSYS = REG_E_UNKNOWN; /* Reserved. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace regex_constants{
|
||||
|
||||
typedef ::boost::reg_error_t error_type;
|
||||
@ -80,5 +84,6 @@ static const error_type error_unknown = REG_E_UNKNOWN;
|
||||
|
||||
}
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif
|
||||
|
@ -91,17 +91,17 @@ template <class iterator, class charT, class traits_type, class char_classT>
|
||||
iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
|
||||
iterator last,
|
||||
const re_set_long<char_classT>* set_,
|
||||
const basic_regex<charT, traits_type>& e)
|
||||
const regex_data<charT, traits_type>& e)
|
||||
{
|
||||
const charT* p = reinterpret_cast<const charT*>(set_+1);
|
||||
iterator ptr;
|
||||
unsigned int i;
|
||||
bool icase = e.flags() & regex_constants::icase;
|
||||
bool icase = e.m_flags & regex_constants::icase;
|
||||
|
||||
if(next == last) return next;
|
||||
|
||||
typedef typename traits_type::string_type traits_string_type;
|
||||
const traits_type& traits_inst = e.get_traits();
|
||||
const traits_type& traits_inst = e.m_traits;
|
||||
|
||||
// dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
|
||||
// referenced
|
||||
@ -149,17 +149,17 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
|
||||
// try and match a range, NB only a single character can match
|
||||
if(set_->cranges)
|
||||
{
|
||||
if((e.flags() & regex_constants::collate) == 0)
|
||||
if((e.m_flags & regex_constants::collate) == 0)
|
||||
s1.assign(1, col);
|
||||
else
|
||||
s1 = traits_inst.transform(&col, &col + 1);
|
||||
for(i = 0; i < set_->cranges; ++i)
|
||||
{
|
||||
if(STR_COMP(s1, p) <= 0)
|
||||
if(STR_COMP(s1, p) >= 0)
|
||||
{
|
||||
while(*p)++p;
|
||||
++p;
|
||||
if(STR_COMP(s1, p) >= 0)
|
||||
if(STR_COMP(s1, p) <= 0)
|
||||
return set_->isnot ? next : ++next;
|
||||
}
|
||||
else
|
||||
@ -412,7 +412,7 @@ private:
|
||||
void push_assertion(const re_syntax_base* ps, bool positive);
|
||||
void push_alt(const re_syntax_base* ps);
|
||||
void push_repeater_count(int i, repeater_count<BidiIterator>** s);
|
||||
void push_single_repeat(unsigned c, const re_repeat* r, BidiIterator last_position, int id);
|
||||
void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int id);
|
||||
void push_non_greedy_repeat(const re_syntax_base* ps);
|
||||
|
||||
|
||||
|
@ -208,10 +208,10 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
|
||||
else
|
||||
{
|
||||
// start again:
|
||||
search_base = position = (*m_presult)[0].second;
|
||||
search_base = position = m_result[0].second;
|
||||
// If last match was null and match_not_null was not set then increment
|
||||
// our start position, otherwise we go into an infinite loop:
|
||||
if(((m_match_flags & match_not_null) == 0) && (m_presult->length() == 0))
|
||||
if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
|
||||
{
|
||||
if(position == last)
|
||||
return false;
|
||||
@ -590,7 +590,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
|
||||
// let the traits class do the work:
|
||||
if(position == last)
|
||||
return false;
|
||||
BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re);
|
||||
BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data());
|
||||
if(t != position)
|
||||
{
|
||||
pstate = pstate->next.p;
|
||||
|
@ -103,10 +103,10 @@ struct save_state_init
|
||||
template <class BidiIterator>
|
||||
struct saved_single_repeat : public saved_state
|
||||
{
|
||||
unsigned count;
|
||||
std::size_t count;
|
||||
const re_repeat* rep;
|
||||
BidiIterator last_position;
|
||||
saved_single_repeat(unsigned c, const re_repeat* r, BidiIterator lp, int arg_id)
|
||||
saved_single_repeat(std::size_t c, const re_repeat* r, BidiIterator lp, int arg_id)
|
||||
: saved_state(arg_id), count(c), rep(r), last_position(lp){}
|
||||
};
|
||||
|
||||
@ -275,7 +275,7 @@ inline void perl_matcher<BidiIterator, Allocator, traits>::push_repeater_count(i
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(unsigned c, const re_repeat* r, BidiIterator last_position, int id)
|
||||
inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int id)
|
||||
{
|
||||
saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state);
|
||||
--pmp;
|
||||
@ -585,11 +585,11 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
|
||||
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
|
||||
assert(1 == static_cast<const re_literal*>(rep->next.p)->length);
|
||||
const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
|
||||
unsigned count = 0;
|
||||
std::size_t count = 0;
|
||||
//
|
||||
// start by working out how much we can skip:
|
||||
//
|
||||
unsigned desired = rep->greedy ? rep->max : rep->min;
|
||||
std::size_t desired = rep->greedy ? rep->max : rep->min;
|
||||
if(::boost::is_random_access_iterator<BidiIterator>::value)
|
||||
{
|
||||
BidiIterator end = position;
|
||||
@ -652,11 +652,11 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
|
||||
#endif
|
||||
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
|
||||
const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
|
||||
unsigned count = 0;
|
||||
std::size_t count = 0;
|
||||
//
|
||||
// start by working out how much we can skip:
|
||||
//
|
||||
unsigned desired = rep->greedy ? rep->max : rep->min;
|
||||
std::size_t desired = rep->greedy ? rep->max : rep->min;
|
||||
if(::boost::is_random_access_iterator<BidiIterator>::value)
|
||||
{
|
||||
BidiIterator end = position;
|
||||
@ -719,17 +719,17 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
#endif
|
||||
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
|
||||
const re_set_long<typename traits::char_class_type>* set = static_cast<const re_set_long<typename traits::char_class_type>*>(pstate->next.p);
|
||||
unsigned count = 0;
|
||||
std::size_t count = 0;
|
||||
//
|
||||
// start by working out how much we can skip:
|
||||
//
|
||||
unsigned desired = rep->greedy ? rep->max : rep->min;
|
||||
std::size_t desired = rep->greedy ? rep->max : rep->min;
|
||||
if(::boost::is_random_access_iterator<BidiIterator>::value)
|
||||
{
|
||||
BidiIterator end = position;
|
||||
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
|
||||
BidiIterator origin(position);
|
||||
while((position != end) && (position != re_is_set_member(position, last, set, re)))
|
||||
while((position != end) && (position != re_is_set_member(position, last, set, re.get_data())))
|
||||
{
|
||||
++position;
|
||||
}
|
||||
@ -737,7 +737,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
}
|
||||
else
|
||||
{
|
||||
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re)))
|
||||
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data())))
|
||||
{
|
||||
++position;
|
||||
++count;
|
||||
@ -926,7 +926,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_greedy_single_repeat(
|
||||
}
|
||||
|
||||
const re_repeat* rep = pmp->rep;
|
||||
unsigned count = pmp->count;
|
||||
std::size_t count = pmp->count;
|
||||
assert(rep->next.p);
|
||||
assert(rep->alt.p);
|
||||
|
||||
@ -975,7 +975,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_slow_dot_repeat(bool
|
||||
}
|
||||
|
||||
const re_repeat* rep = pmp->rep;
|
||||
unsigned count = pmp->count;
|
||||
std::size_t count = pmp->count;
|
||||
assert(rep->type == syntax_element_dot_rep);
|
||||
assert(rep->next.p);
|
||||
assert(rep->alt.p);
|
||||
@ -1037,7 +1037,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_fast_dot_repeat(bool
|
||||
}
|
||||
|
||||
const re_repeat* rep = pmp->rep;
|
||||
unsigned count = pmp->count;
|
||||
std::size_t count = pmp->count;
|
||||
|
||||
assert(count < rep->max);
|
||||
position = pmp->last_position;
|
||||
@ -1089,7 +1089,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_char_repeat(bool r)
|
||||
}
|
||||
|
||||
const re_repeat* rep = pmp->rep;
|
||||
unsigned count = pmp->count;
|
||||
std::size_t count = pmp->count;
|
||||
pstate = rep->next.p;
|
||||
const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
|
||||
position = pmp->last_position;
|
||||
@ -1153,7 +1153,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat(bool
|
||||
}
|
||||
|
||||
const re_repeat* rep = pmp->rep;
|
||||
unsigned count = pmp->count;
|
||||
std::size_t count = pmp->count;
|
||||
pstate = rep->next.p;
|
||||
const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
|
||||
position = pmp->last_position;
|
||||
@ -1217,7 +1217,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
|
||||
}
|
||||
|
||||
const re_repeat* rep = pmp->rep;
|
||||
unsigned count = pmp->count;
|
||||
std::size_t count = pmp->count;
|
||||
pstate = rep->next.p;
|
||||
const re_set_long<typename traits::char_class_type>* set = static_cast<const re_set_long<typename traits::char_class_type>*>(pstate);
|
||||
position = pmp->last_position;
|
||||
@ -1234,7 +1234,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
|
||||
// wind forward until we can skip out of the repeat:
|
||||
do
|
||||
{
|
||||
if(position == re_is_set_member(position, last, set, re))
|
||||
if(position == re_is_set_member(position, last, set, re.get_data()))
|
||||
{
|
||||
// failed repeat match, discard this state and look for another:
|
||||
destroy_single_repeat();
|
||||
|
@ -637,7 +637,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
BidiIterator end = position;
|
||||
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
|
||||
BidiIterator origin(position);
|
||||
while((position != end) && (position != re_is_set_member(position, last, set, re)))
|
||||
while((position != end) && (position != re_is_set_member(position, last, set, re.get_data())))
|
||||
{
|
||||
++position;
|
||||
}
|
||||
@ -645,7 +645,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
}
|
||||
else
|
||||
{
|
||||
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re)))
|
||||
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data())))
|
||||
{
|
||||
++position;
|
||||
++count;
|
||||
@ -665,7 +665,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
{
|
||||
while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
|
||||
{
|
||||
if(position != re_is_set_member(position, last, set, re))
|
||||
if(position != re_is_set_member(position, last, set, re.get_data()))
|
||||
{
|
||||
++position;
|
||||
++count;
|
||||
@ -685,7 +685,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
if(position == last)
|
||||
return false;
|
||||
position = save_pos;
|
||||
if(position != re_is_set_member(position, last, set, re))
|
||||
if(position != re_is_set_member(position, last, set, re.get_data()))
|
||||
{
|
||||
++position;
|
||||
++count;
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
|
||||
|
||||
|
||||
basic = basic_syntax_group | collate,
|
||||
basic = basic_syntax_group | collate | no_escape_in_lists,
|
||||
extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists,
|
||||
normal = 0,
|
||||
emacs = basic | no_char_classes | no_intervals,
|
||||
@ -123,6 +123,8 @@ namespace regex_constants{
|
||||
bk_plus_qm = ::boost::regbase::bk_plus_qm,
|
||||
bk_vbar = ::boost::regbase::bk_vbar,
|
||||
no_intervals = ::boost::regbase::no_intervals,
|
||||
no_char_classes = ::boost::regbase::no_char_classes,
|
||||
no_escape_in_lists = ::boost::regbase::no_escape_in_lists,
|
||||
|
||||
basic = ::boost::regbase::basic,
|
||||
extended = ::boost::regbase::extended,
|
||||
|
@ -54,23 +54,22 @@ inline unsigned int regex_grep(Predicate foo,
|
||||
return count; // we've reached the end, don't try and find an extra null match.
|
||||
if(m.length() == 0)
|
||||
{
|
||||
if(m[0].second == last)
|
||||
return count;
|
||||
// we found a NULL-match, now try to find
|
||||
// a non-NULL one at the same position:
|
||||
BidiIterator last_end(m[0].second);
|
||||
if(last_end == last)
|
||||
return count;
|
||||
match_results<BidiIterator, match_allocator_type> m2(m);
|
||||
matcher.setf(match_not_null | match_continuous);
|
||||
if(matcher.find())
|
||||
{
|
||||
++count;
|
||||
last_end = m[0].second;
|
||||
if(0 == foo(m))
|
||||
return count;
|
||||
}
|
||||
else
|
||||
{
|
||||
// reset match back to where it was:
|
||||
m.set_second(last_end);
|
||||
m = m2;
|
||||
}
|
||||
matcher.unsetf((match_not_null | match_continuous) & ~flags);
|
||||
}
|
||||
|
@ -132,17 +132,19 @@ int get_default_class_id(const charT* p1, const charT* p2)
|
||||
{data+40, data+45,}, // punct
|
||||
{data+45, data+46,}, // s
|
||||
{data+45, data+50,}, // space
|
||||
{data+50, data+57,}, // unicode
|
||||
{data+57, data+58,}, // u
|
||||
{data+50, data+57,}, // unicode
|
||||
{data+57, data+62,}, // upper
|
||||
{data+62, data+63,}, // w
|
||||
{data+62, data+66,}, // word
|
||||
{data+66, data+72,}, // xdigit
|
||||
};
|
||||
static const character_pointer_range<charT>* ranges_begin = ranges;
|
||||
static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
|
||||
|
||||
character_pointer_range<charT> t = { p1, p2, };
|
||||
const character_pointer_range<charT>* p = std::lower_bound(ranges, ranges + (sizeof(ranges)/sizeof(ranges[0])), t);
|
||||
if(t == *p)
|
||||
const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
|
||||
if((p != ranges_end) && (t == *p))
|
||||
return static_cast<int>(p - ranges);
|
||||
return -1;
|
||||
}
|
||||
|
@ -222,11 +222,15 @@ enum re_jump_size_type
|
||||
/*** proc re_is_set_member *********************************************
|
||||
Forward declaration: we'll need this one later...
|
||||
***********************************************************************/
|
||||
|
||||
template<class charT, class traits>
|
||||
struct regex_data;
|
||||
|
||||
template <class iterator, class charT, class traits_type, class char_classT>
|
||||
iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
|
||||
iterator last,
|
||||
const re_set_long<char_classT>* set_,
|
||||
const basic_regex<charT, traits_type>& e);
|
||||
const regex_data<charT, traits_type>& e);
|
||||
|
||||
} // namespace re_detail
|
||||
|
||||
|
@ -49,8 +49,8 @@ const char* get_default_syntax(regex_constants::syntax_type n)
|
||||
">",
|
||||
"",
|
||||
"",
|
||||
"A",
|
||||
"z",
|
||||
"A`",
|
||||
"z'",
|
||||
"\n",
|
||||
",",
|
||||
"a",
|
||||
|
25
test/Jamfile
25
test/Jamfile
@ -55,9 +55,8 @@ template test-dll
|
||||
#
|
||||
template regression-dll
|
||||
: <template>test-dll # sources
|
||||
regress/parse.cpp
|
||||
regress/regress.cpp
|
||||
regress/tests.cpp
|
||||
regress/main.cpp
|
||||
regress/basic_tests.cpp
|
||||
<lib>../../test/build/boost_prg_exec_monitor
|
||||
;
|
||||
|
||||
@ -66,14 +65,7 @@ test-suite regex
|
||||
[ regex-test regex_regress
|
||||
: <template>regression # sources
|
||||
: # requirements
|
||||
: regress/tests.txt # input files
|
||||
]
|
||||
|
||||
[ regex-test regex_wide_regress
|
||||
: <template>regression # sources
|
||||
<template>../build/msvc-stlport-tricky
|
||||
: <define>TEST_UNICODE=1 # requirements
|
||||
: regress/tests.txt # input files
|
||||
: # input files
|
||||
]
|
||||
|
||||
[ regex-test posix_api_check
|
||||
@ -115,15 +107,8 @@ test-suite regex
|
||||
|
||||
[ regex-test regex_regress_dll
|
||||
: <template>regression-dll # sources
|
||||
: # requirements
|
||||
: regress/tests.txt # input files
|
||||
]
|
||||
|
||||
[ regex-test regex_wide_regress_dll
|
||||
: <template>regression-dll # sources
|
||||
<template>../build/msvc-stlport-tricky
|
||||
: <define>TEST_UNICODE=1 # requirements
|
||||
: regress/tests.txt # input files
|
||||
: # requirements
|
||||
: # input files
|
||||
]
|
||||
|
||||
[ compile concepts/concept_check.cpp
|
||||
|
@ -16,7 +16,7 @@ EX_SOURCES =
|
||||
wide_posix_api.cpp
|
||||
winstances.cpp ;
|
||||
|
||||
lib boost_regex_extra : ../../src/$(EX_SOURCES).cpp <template>../../build/regex-options
|
||||
lib boost_regex_extra : ../../src/$(EX_SOURCES) <template>../../build/regex-options
|
||||
:
|
||||
<define>BOOST_REGEX_MATCH_EXTRA=1
|
||||
:
|
||||
|
@ -181,6 +181,11 @@ void basic_tests()
|
||||
TEST_INVALID_REGEX("a\\{1,b\\}", basic);
|
||||
TEST_INVALID_REGEX("a\\{1,2v\\}", basic);
|
||||
|
||||
}
|
||||
|
||||
void test_alt()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
// now test the alternation operator |
|
||||
TEST_REGEX_SEARCH("a|b", perl, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a|b", perl, "b", match_default, make_array(0, 1, -2, -2));
|
||||
@ -205,441 +210,430 @@ void basic_tests()
|
||||
TEST_REGEX_SEARCH("a|", basic|bk_vbar, "a|", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "b", match_default, make_array(0, 1, -2, -2));
|
||||
}
|
||||
|
||||
void test_sets()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
// now test the set operator []
|
||||
TEST_REGEX_SEARCH("[abc]", extended, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[abc]", extended, "b", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[abc]", extended, "c", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[abc]", extended, "d", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[^bcd]", extended, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[^bcd]", extended, "b", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[^bcd]", extended, "d", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[^bcd]", extended, "e", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[b]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[ab]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[^ab]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[]b]c", extended, "a]c", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[[b]c", extended, "a[c", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[-b]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[^]b]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[^-b]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[b-]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_INVALID_REGEX("a[b", extended);
|
||||
TEST_INVALID_REGEX("a[]", extended);
|
||||
|
||||
// now some ranges:
|
||||
TEST_REGEX_SEARCH("[b-e]", extended, "a", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[b-e]", extended, "b", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[b-e]", extended, "e", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[b-e]", extended, "f", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[^b-e]", extended, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[^b-e]", extended, "b", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[^b-e]", extended, "e", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[^b-e]", extended, "f", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[1-3]c", extended, "a2c", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[-3]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[-3]c", extended, "a3c", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a[^-3]c", extended, "a-c", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a[^-3]c", extended, "a3c", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a[^-3]c", extended, "axc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_INVALID_REGEX("a[3-1]c", extended);
|
||||
TEST_INVALID_REGEX("a[1-3-5]c", extended);
|
||||
TEST_INVALID_REGEX("a[1-", extended);
|
||||
|
||||
// and some classes
|
||||
TEST_REGEX_SEARCH("a[[:alpha:]]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_INVALID_REGEX("a[[:unknown:]]c", extended);
|
||||
TEST_INVALID_REGEX("a[[:", extended);
|
||||
TEST_INVALID_REGEX("a[[:alpha", extended);
|
||||
TEST_INVALID_REGEX("a[[:alpha:]", extended);
|
||||
TEST_INVALID_REGEX("a[[:alpha,:]", extended);
|
||||
TEST_INVALID_REGEX("a[[:]:]]b", extended);
|
||||
TEST_INVALID_REGEX("a[[:-:]]b", extended);
|
||||
TEST_INVALID_REGEX("a[[:alph:]]", extended);
|
||||
TEST_INVALID_REGEX("a[[:alphabet:]]", extended);
|
||||
TEST_REGEX_SEARCH("[[:alnum:]]+", extended, "-%@a0X_-", match_default, make_array(3, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:alpha:]]+", extended, " -%@aX_0-", match_default, make_array(4, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:blank:]]+", extended, "a \tb", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:cntrl:]]+", extended, " a\n\tb", match_default, make_array(2, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:digit:]]+", extended, "a019b", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:graph:]]+", extended, " a%b ", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:lower:]]+", extended, "AabC", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:print:]]+", extended, "AabC", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:punct:]]+", extended, " %-&\t", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:space:]]+", extended, "a \n\t\rb", match_default, make_array(1, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:upper:]]+", extended, "aBCd", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:xdigit:]]+", extended, "p0f3Cx", match_default, make_array(1, 5, -2, -2));
|
||||
|
||||
//
|
||||
// escapes are supported in character classes if we have either
|
||||
// perl or awk regular expressions:
|
||||
//
|
||||
TEST_REGEX_SEARCH("[\\n]", perl, "\n", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[\\n]", basic, "\n", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[\\n]", basic, "\\", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, ":", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, "[", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, "c", match_default, make_array(0, 1, -2, -2));
|
||||
//
|
||||
// test single character escapes:
|
||||
//
|
||||
TEST_REGEX_SEARCH("\\w", perl, "A", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\w", perl, "Z", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\w", perl, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\w", perl, "z", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\w", perl, "_", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\w", perl, "}", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\w", perl, "`", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\w", perl, "[", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\w", perl, "@", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "a", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "z", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "A", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "Z", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "_", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "}", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "`", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "[", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\W", perl, "@", match_default, make_array(0, 1, -2, -2));
|
||||
}
|
||||
|
||||
void test_anchors()
|
||||
{
|
||||
// line anchors:
|
||||
using namespace boost::regex_constants;
|
||||
TEST_REGEX_SEARCH("^ab", extended, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default, make_array(3, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default, make_array(0, 2, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_not_bol | match_not_eol, make_array(3, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_not_bol | match_not_eol, make_array(0, 2, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_single_line, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_single_line, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_single_line, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_single_line, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_single_line, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_single_line, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
|
||||
}
|
||||
|
||||
void test_backrefs()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
TEST_INVALID_REGEX("a(b)\\2c", perl);
|
||||
TEST_INVALID_REGEX("a(b\\1)c", perl);
|
||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^(.)\\1", perl, "abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc])\\1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||
// strictly speaking this is at best ambiguous, at worst wrong, this is what most
|
||||
// re implimentations will match though.
|
||||
TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbccd", match_default, make_array(0, 6, 3, 5, 3, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a((b)*\\2)*d", perl, "abbbd", match_default, make_array(0, 5, 1, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(ab*)[ab]*\\1", perl, "ababaaa", match_default, make_array(0, 4, 0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)\\1bcd", perl, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabd", match_default, make_array(0, 4, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)\\1bc*[ce]d", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(a)\\1b(c)*cd$", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, 4, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("(ab*)[ab]*\\1", extended, "ababaaa", match_default, make_array(0, 7, 0, 1, -2, -2));
|
||||
}
|
||||
|
||||
void test_character_escapes()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
// characters by code
|
||||
TEST_REGEX_SEARCH("\\0101", perl, "A", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\00", perl, "\0", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\0", perl, "\0", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\0172", perl, "z", match_default, make_array(0, 1, -2, -2));
|
||||
// extra escape sequences:
|
||||
TEST_REGEX_SEARCH("\\a", perl, "\a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\f", perl, "\f", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\n", perl, "\n", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\r", perl, "\r", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\v", perl, "\v", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\t", perl, "\t", match_default, make_array(0, 1, -2, -2));
|
||||
|
||||
// updated tests for version 2:
|
||||
TEST_REGEX_SEARCH("\\x41", perl, "A", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\xff", perl, "\xff", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\xFF", perl, "\xff", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\c@", perl, "\0", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\cA", perl, "\x1", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\cz", perl, "\x3A", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_INVALID_REGEX("\\c=", extended);
|
||||
TEST_INVALID_REGEX("\\c?", extended);
|
||||
TEST_REGEX_SEARCH("=:", perl, "=:", match_default, make_array(0, 2, -2, -2));
|
||||
}
|
||||
|
||||
void test_assertion_escapes()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
// word start:
|
||||
TEST_REGEX_SEARCH("\\<abcd", perl, " abcd", match_default, make_array(2, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\<ab", perl, "cab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\<ab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\<tag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
|
||||
// word end:
|
||||
TEST_REGEX_SEARCH("abc\\>", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\>", perl, "abcd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\>", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\>", perl, "abc::", match_default, make_array(0,3, -2, -2));
|
||||
// word boundary:
|
||||
TEST_REGEX_SEARCH("\\babcd", perl, " abcd", match_default, make_array(2, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\bab", perl, "cab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\bab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\btag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\b", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\b", perl, "abcd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\b", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\b", perl, "abc::", match_default, make_array(0, 3, -2, -2));
|
||||
// within word:
|
||||
TEST_REGEX_SEARCH("\\B", perl, "ab", match_default, make_array(1, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\Bb", perl, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\B", perl, "ab", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\B", perl, "a", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a\\B", perl, "a ", match_default, make_array(-2, -2));
|
||||
// buffer operators:
|
||||
TEST_REGEX_SEARCH("\\`abc", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\`abc", perl, "\nabc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\`abc", perl, " abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\'", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\'", perl, "abc\n", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc\\'", perl, "abc ", match_default, make_array(-2, -2));
|
||||
|
||||
// word start:
|
||||
TEST_REGEX_SEARCH("[[:<:]]abcd", perl, " abcd", match_default, make_array(2, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:<:]]ab", perl, "cab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("[[:<:]]ab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:<:]]tag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
|
||||
// word end
|
||||
TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abcd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc::", match_default, make_array(0, 3, -2, -2));
|
||||
}
|
||||
|
||||
void test_tricky_cases()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
//TEST_REGEX_SEARCH("", perl, "", match_default, make_array(-2, -2));
|
||||
//
|
||||
// now follows various complex expressions designed to try and bust the matcher:
|
||||
//
|
||||
TEST_REGEX_SEARCH("a(((b)))c", perl, "abc", match_default, make_array(0, 3, 1, 2, 1, 2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|(c))d", perl, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|(c))d", perl, "acd", match_default, make_array(0, 3, 1, 2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*|c)d", perl, "abbd", match_default, make_array(0, 4, 1, 3, -2, -2));
|
||||
// just gotta have one DFA-buster, of course
|
||||
TEST_REGEX_SEARCH("a[ab]{20}", perl, "aaaaabaaaabaaaabaaaab", match_default, make_array(0, 21, -2, -2));
|
||||
// and an inline expansion in case somebody gets tricky
|
||||
TEST_REGEX_SEARCH("a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]", perl, "aaaaabaaaabaaaabaaaab", match_default, make_array(0, 21, -2, -2));
|
||||
// and in case somebody just slips in an NFA...
|
||||
TEST_REGEX_SEARCH("a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)", perl, "aaaaabaaaabaaaabaaaabweeknights", match_default, make_array(0, 31, 21, 24, 24, 31, -2, -2));
|
||||
// one really big one
|
||||
TEST_REGEX_SEARCH("1234567890123456789012345678901234567890123456789012345678901234567890", perl, "a1234567890123456789012345678901234567890123456789012345678901234567890b", match_default, make_array(1, 71, -2, -2));
|
||||
// fish for problems as brackets go past 8
|
||||
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn]", perl, "xacegikmoq", match_default, make_array(1, 8, -2, -2));
|
||||
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op]", perl, "xacegikmoq", match_default, make_array(1, 9, -2, -2));
|
||||
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op][qr]", perl, "xacegikmoqy", match_default, make_array(1, 10, -2, -2));
|
||||
TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op][q]", perl, "xacegikmoqy", match_default, make_array(1, 10, -2, -2));
|
||||
// and as parenthesis go past 9:
|
||||
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)", perl, "zabcdefghi", match_default, make_array(1, 9, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)", perl, "zabcdefghij", match_default, make_array(1, 10, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)", perl, "zabcdefghijk", match_default, make_array(1, 11, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)", perl, "zabcdefghijkl", match_default, make_array(1, 12, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)d|(b)c", perl, "abc", match_default, make_array(1, 3, -1, -1, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("_+((www)|(ftp)|(mailto)):_*", perl, "_wwwnocolon _mailto:", match_default, make_array(12, 20, 13, 19, -1, -1, -1, -1, 13, 19, -2, -2));
|
||||
// subtleties of matching
|
||||
TEST_REGEX_SEARCH("a(b)?c\\1d", perl, "acd", match_default, make_array(0, 3, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b?c)+d", perl, "accd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(wee|week)(knights|night)", perl, "weeknights", match_default, make_array(0, 10, 0, 3, 3, 10, -2, -2));
|
||||
TEST_REGEX_SEARCH(".*", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|(c))d", perl, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|(c))d", perl, "acd", match_default, make_array(0, 3, 1, 2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "abbd", match_default, make_array(0, 4, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "acd", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "ad", match_default, make_array(0, 2, 1, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b?)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b?)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b+)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b+)c", perl, "abbbc", match_default, make_array(0, 5, 1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a|ab)(bc([de]+)f|cde)", perl, "abcdef", match_default, make_array(0, 6, 0, 1, 1, 6, 3, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc]?)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc]?)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc]+)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc]+)c", perl, "abcc", match_default, make_array(0, 4, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc]+)bc", perl, "abcbc", match_default, make_array(0, 5, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(bb+|b)b", perl, "abb", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl, "abb", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl, "abbb", match_default, make_array(0, 4, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl, "abbb", match_default, make_array(0, 4, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("(.*).*", perl, "abcdef", match_default, make_array(0, 6, 0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a*)*", perl, "bc", match_default, make_array(0, 0, 0, 0, -2, -2));
|
||||
TEST_REGEX_SEARCH("xyx*xz", perl, "xyxxxxyxxxz", match_default, make_array(5, 11, -2, -2));
|
||||
// do we get the right subexpression when it is used more than once?
|
||||
TEST_REGEX_SEARCH("a(b|c)*d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c)*d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c)+d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c)+d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c?)+d", perl, "ad", match_default, make_array(0, 2, 1, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){0,0}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){0,1}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){0,1}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){0,2}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){0,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){0,}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){0,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){1,1}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){1,2}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){1,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){1,}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){1,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){2,2}d", perl, "acbd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){2,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcbcd", match_default, make_array(0, 6, 4, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){2,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c){2,}d", perl, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
|
||||
// perl only:
|
||||
TEST_REGEX_SEARCH("a(b|c?)+d", perl, "abcd", match_default, make_array(0, 4, 3, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b+|((c)*))+d", perl, "abd", match_default, make_array(0, 3, 2, 2, 2, 2, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b+|((c)*))+d", perl, "abcd", match_default, make_array(0, 4, 3, 3, 3, 3, 2, 3, -2, -2));
|
||||
// posix only:
|
||||
TEST_REGEX_SEARCH("a(b|c?)+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|((c)*))+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, 2, 3, 2, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b+|((c)*))+d", extended, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b+|((c)*))+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, 2, 3, 2, 3, -2, -2));
|
||||
// literals:
|
||||
TEST_REGEX_SEARCH("\\**?/{}", literal, "\\**?/{}", match_default, make_array(0, 7, -2, -2));
|
||||
// try to match C++ syntax elements:
|
||||
// line comment:
|
||||
TEST_REGEX_SEARCH("//[^\\n]*", perl, "++i //here is a line comment\n", match_default, make_array(4, 28, -2, -2));
|
||||
// block comment:
|
||||
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/* here is a block comment */", match_default, make_array(0, 29, 26, 27, -2, -2));
|
||||
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/**/", match_default, make_array(0, 4, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/***/", match_default, make_array(0, 5, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/****/", match_default, make_array(0, 6, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/*****/", match_default, make_array(0, 7, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/*****/*/", match_default, make_array(0, 7, -1, -1, -2, -2));
|
||||
// preprossor directives:
|
||||
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol", match_default, make_array(0, 19, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol(x) #x", match_default, make_array(0, 25, -1, -1, -2, -2));
|
||||
// perl only:
|
||||
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);", match_default, make_array(0, 53, 30, 42, -2, -2));
|
||||
// literals:
|
||||
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFF", match_default, make_array(0, 4, 0, 4, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "35", match_default, make_array(0, 2, 0, 2, -1, -1, 0, 2, -1, -1, -1, -1, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFu", match_default, make_array(0, 5, 0, 4, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFL", match_default, make_array(0, 5, 0, 4, 0, 4, -1, -1, 4, 5, -1, -1, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFFFFFFFFFFFFFFFuint64", match_default, make_array(0, 24, 0, 18, 0, 18, -1, -1, 19, 24, 19, 24, 22, 24, -2, -2));
|
||||
// strings:
|
||||
TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\x3A'", match_default, make_array(0, 6, 4, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\''", match_default, make_array(0, 4, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\n'", match_default, make_array(0, 4, 1, 3, -2, -2));
|
||||
// posix only:
|
||||
TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", awk, "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);", match_default, make_array(0, 53, 28, 42, -2, -2));
|
||||
// now try and test some unicode specific characters:
|
||||
TEST_REGEX_SEARCH_W(L"[[:unicode:]]+", perl, L"a\u0300\u0400z", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH_W(L"[\x10-\xff]", perl, L"\u0300\u0400", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH_W(L"[\01-\05]{5}", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH_W(L"[\x300-\x400]+", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH_W(L"[\\x{300}-\\x{400}]+", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH_W(L"\\x{300}\\x{400}+", perl, L"\u0300\u0400\u0400\u0400\u0400\u0400", match_default, make_array(0, 6, -2, -2));
|
||||
// finally try some case insensitive matches:
|
||||
TEST_REGEX_SEARCH("0123456789@abcdefghijklmnopqrstuvwxyz\\[\\\\\\]\\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\\{\\|\\}", perl|icase, "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}", match_default, make_array(0, 72, -2, -2));
|
||||
TEST_REGEX_SEARCH("a", perl|icase, "A", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("A", perl|icase, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("[abc]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[ABC]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[a-z]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[A-Z]+", perl|icase, "abzANZ", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[a-Z]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[A-z]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:lower:]]+", perl|icase, "abyzABYZ", match_default, make_array(0, 8, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:upper:]]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:word:]]+", perl|icase, "abcZZZ", match_default, make_array(0, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:alpha:]]+", perl|icase, "abyzABYZ", match_default, make_array(0, 8, -2, -2));
|
||||
TEST_REGEX_SEARCH("[[:alnum:]]+", perl|icase, "09abyzABYZ", match_default, make_array(0, 10, -2, -2));
|
||||
|
||||
// known and suspected bugs:
|
||||
TEST_REGEX_SEARCH("\\(", perl, "(", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\)", perl, ")", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\$", perl, "$", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\^", perl, "^", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\.", perl, ".", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\*", perl, "*", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\+", perl, "+", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\?", perl, "?", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\[", perl, "[", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\]", perl, "]", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\|", perl, "|", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\\\", perl, "\\", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("#", perl, "#", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\#", perl, "#", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a-", perl, "a-", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\-", perl, "-", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\{", perl, "{", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\}", perl, "}", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("0", perl, "0", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("1", perl, "1", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("9", perl, "9", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("b", perl, "b", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("B", perl, "B", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("<", perl, "<", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH(">", perl, ">", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("w", perl, "w", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("W", perl, "W", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("`", perl, "`", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH(" ", perl, " ", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\n", perl, "\n", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH(",", perl, ",", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a", perl, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("f", perl, "f", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("n", perl, "n", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("r", perl, "r", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("t", perl, "t", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("v", perl, "v", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("c", perl, "c", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("x", perl, "x", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH(":", perl, ":", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(\\.[[:alnum:]]+){2}", perl, "w.a.b ", match_default, make_array(1, 5, 3, 5, -2, -2));
|
||||
#if 0
|
||||
|
||||
; now test the set operator []
|
||||
- match_default normal REG_EXTENDED
|
||||
; try some literals first
|
||||
[abc] a 0 1
|
||||
[abc] b 0 1
|
||||
[abc] c 0 1
|
||||
[abc] d -1 -1
|
||||
[^bcd] a 0 1
|
||||
[^bcd] b -1 -1
|
||||
[^bcd] d -1 -1
|
||||
[^bcd] e 0 1
|
||||
a[b]c abc 0 3
|
||||
a[ab]c abc 0 3
|
||||
a[^ab]c adc 0 3
|
||||
a[]b]c a]c 0 3
|
||||
a[[b]c a[c 0 3
|
||||
a[-b]c a-c 0 3
|
||||
a[^]b]c adc 0 3
|
||||
a[^-b]c adc 0 3
|
||||
a[b-]c a-c 0 3
|
||||
a[b !
|
||||
a[] !
|
||||
|
||||
; then some ranges
|
||||
[b-e] a -1 -1
|
||||
[b-e] b 0 1
|
||||
[b-e] e 0 1
|
||||
[b-e] f -1 -1
|
||||
[^b-e] a 0 1
|
||||
[^b-e] b -1 -1
|
||||
[^b-e] e -1 -1
|
||||
[^b-e] f 0 1
|
||||
a[1-3]c a2c 0 3
|
||||
a[3-1]c !
|
||||
a[1-3-5]c !
|
||||
a[1- !
|
||||
|
||||
; and some classes
|
||||
a[[:alpha:]]c abc 0 3
|
||||
a[[:unknown:]]c !
|
||||
a[[: !
|
||||
a[[:alpha !
|
||||
a[[:alpha:] !
|
||||
a[[:alpha,:] !
|
||||
a[[:]:]]b !
|
||||
a[[:-:]]b !
|
||||
a[[:alph:]] !
|
||||
a[[:alphabet:]] !
|
||||
[[:alnum:]]+ -%@a0X_- 3 6
|
||||
[[:alpha:]]+ -%@aX_0- 3 5
|
||||
[[:blank:]]+ "a \tb" 1 4
|
||||
[[:cntrl:]]+ a\n\tb 1 3
|
||||
[[:digit:]]+ a019b 1 4
|
||||
[[:graph:]]+ " a%b " 1 4
|
||||
[[:lower:]]+ AabC 1 3
|
||||
; This test fails with STLPort, disable for now as this is a corner case anyway...
|
||||
;[[:print:]]+ "\na b\n" 1 4
|
||||
[[:punct:]]+ " %-&\t" 1 4
|
||||
[[:space:]]+ "a \n\t\rb" 1 5
|
||||
[[:upper:]]+ aBCd 1 3
|
||||
[[:xdigit:]]+ p0f3Cx 1 5
|
||||
|
||||
; now test flag settings:
|
||||
- escape_in_lists REG_NO_POSIX_TEST
|
||||
[\n] \n 0 1
|
||||
- REG_NO_POSIX_TEST
|
||||
[\n] \n -1 -1
|
||||
[\n] \\ 0 1
|
||||
[[:class:] : 0 1
|
||||
[[:class:] [ 0 1
|
||||
[[:class:] c 0 1
|
||||
|
||||
; line anchors
|
||||
- match_default normal REG_EXTENDED
|
||||
^ab ab 0 2
|
||||
^ab xxabxx -1 -1
|
||||
^ab xx\nabzz 3 5
|
||||
ab$ ab 0 2
|
||||
ab$ abxx -1 -1
|
||||
ab$ ab\nzz 0 2
|
||||
- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
|
||||
^ab ab -1 -1
|
||||
^ab xxabxx -1 -1
|
||||
^ab xx\nabzz 3 5
|
||||
ab$ ab -1 -1
|
||||
ab$ abxx -1 -1
|
||||
ab$ ab\nzz 0 2
|
||||
|
||||
; line anchors, single line mode
|
||||
- match_default normal match_single_line REG_NO_POSIX_TEST
|
||||
^ab ab 0 2
|
||||
^ab xxabxx -1 -1
|
||||
^ab xx\nabzz -1 -1
|
||||
ab$ ab 0 2
|
||||
ab$ abxx -1 -1
|
||||
ab$ ab\nzz -1 -1
|
||||
- match_default match_not_bol match_not_eol normal REG_NO_POSIX_TEST match_single_line
|
||||
^ab ab -1 -1
|
||||
^ab xxabxx -1 -1
|
||||
^ab xx\nabzz -1 -1
|
||||
ab$ ab -1 -1
|
||||
ab$ abxx -1 -1
|
||||
ab$ ab\nzz -1 -1
|
||||
|
||||
; back references
|
||||
- match_default normal REG_PERL
|
||||
a(b)\2c !
|
||||
a(b\1)c !
|
||||
a(b*)c\1d abbcbbd 0 7 1 3
|
||||
a(b*)c\1d abbcbd -1 -1
|
||||
a(b*)c\1d abbcbbbd -1 -1
|
||||
^(.)\1 abc -1 -1
|
||||
a([bc])\1d abcdabbd 4 8 5 6
|
||||
; strictly speaking this is at best ambiguous, at worst wrong, this is what most
|
||||
; re implimentations will match though.
|
||||
a(([bc])\2)*d abbccd 0 6 3 5 3 4
|
||||
|
||||
a(([bc])\2)*d abbcbd -1 -1
|
||||
a((b)*\2)*d abbbd 0 5 1 4 2 3
|
||||
; perl only:
|
||||
(ab*)[ab]*\1 ababaaa 0 4 0 2
|
||||
(a)\1bcd aabcd 0 5 0 1
|
||||
(a)\1bc*d aabcd 0 5 0 1
|
||||
(a)\1bc*d aabd 0 4 0 1
|
||||
(a)\1bc*d aabcccd 0 7 0 1
|
||||
(a)\1bc*[ce]d aabcccd 0 7 0 1
|
||||
^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
|
||||
|
||||
; posix only:
|
||||
- match_default extended REG_EXTENDED
|
||||
(ab*)[ab]*\1 ababaaa 0 7 0 1
|
||||
|
||||
;
|
||||
; characters by code:
|
||||
- match_default normal REG_PERL REG_STARTEND
|
||||
\0101 A 0 1
|
||||
\00 \0 0 1
|
||||
\0 \0 0 1
|
||||
\0172 z 0 1
|
||||
|
||||
;
|
||||
; word operators:
|
||||
\w a 0 1
|
||||
\w z 0 1
|
||||
\w A 0 1
|
||||
\w Z 0 1
|
||||
\w _ 0 1
|
||||
\w } -1 -1
|
||||
\w ` -1 -1
|
||||
\w [ -1 -1
|
||||
\w @ -1 -1
|
||||
; non-word:
|
||||
\W a -1 -1
|
||||
\W z -1 -1
|
||||
\W A -1 -1
|
||||
\W Z -1 -1
|
||||
\W _ -1 -1
|
||||
\W } 0 1
|
||||
\W ` 0 1
|
||||
\W [ 0 1
|
||||
\W @ 0 1
|
||||
; word start:
|
||||
\<abcd " abcd" 2 6
|
||||
\<ab cab -1 -1
|
||||
\<ab "\nab" 1 3
|
||||
\<tag ::tag 2 5
|
||||
;word end:
|
||||
abc\> abc 0 3
|
||||
abc\> abcd -1 -1
|
||||
abc\> abc\n 0 3
|
||||
abc\> abc:: 0 3
|
||||
; word boundary:
|
||||
\babcd " abcd" 2 6
|
||||
\bab cab -1 -1
|
||||
\bab "\nab" 1 3
|
||||
\btag ::tag 2 5
|
||||
abc\b abc 0 3
|
||||
abc\b abcd -1 -1
|
||||
abc\b abc\n 0 3
|
||||
abc\b abc:: 0 3
|
||||
; within word:
|
||||
\B ab 1 1
|
||||
a\Bb ab 0 2
|
||||
a\B ab 0 1
|
||||
a\B a -1 -1
|
||||
a\B "a " -1 -1
|
||||
|
||||
;
|
||||
; buffer operators:
|
||||
\`abc abc 0 3
|
||||
\`abc \nabc -1 -1
|
||||
\`abc " abc" -1 -1
|
||||
abc\' abc 0 3
|
||||
abc\' abc\n -1 -1
|
||||
abc\' "abc " -1 -1
|
||||
|
||||
;
|
||||
; extra escape sequences:
|
||||
\a \a 0 1
|
||||
\f \f 0 1
|
||||
\n \n 0 1
|
||||
\r \r 0 1
|
||||
\t \t 0 1
|
||||
\v \v 0 1
|
||||
|
||||
|
||||
;
|
||||
; now follows various complex expressions designed to try and bust the matcher:
|
||||
a(((b)))c abc 0 3 1 2 1 2 1 2
|
||||
a(b|(c))d abd 0 3 1 2 -1 -1
|
||||
a(b|(c))d acd 0 3 1 2 1 2
|
||||
a(b*|c)d abbd 0 4 1 3
|
||||
; just gotta have one DFA-buster, of course
|
||||
a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
|
||||
; and an inline expansion in case somebody gets tricky
|
||||
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
|
||||
; and in case somebody just slips in an NFA...
|
||||
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
|
||||
; one really big one
|
||||
1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
|
||||
; fish for problems as brackets go past 8
|
||||
[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
|
||||
[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
|
||||
[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
|
||||
[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
|
||||
; and as parenthesis go past 9:
|
||||
(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
|
||||
(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
|
||||
(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
|
||||
(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
|
||||
(a)d|(b)c abc 1 3 -1 -1 1 2
|
||||
"_+((www)|(ftp)|(mailto)):_*" "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19
|
||||
|
||||
; subtleties of matching
|
||||
a(b)?c\1d acd 0 3 -1 -1
|
||||
a(b?c)+d accd 0 4 2 3
|
||||
(wee|week)(knights|night) weeknights 0 10 0 3 3 10
|
||||
.* abc 0 3
|
||||
a(b|(c))d abd 0 3 1 2 -1 -1
|
||||
a(b|(c))d acd 0 3 1 2 1 2
|
||||
a(b*|c|e)d abbd 0 4 1 3
|
||||
a(b*|c|e)d acd 0 3 1 2
|
||||
a(b*|c|e)d ad 0 2 1 1
|
||||
a(b?)c abc 0 3 1 2
|
||||
a(b?)c ac 0 2 1 1
|
||||
a(b+)c abc 0 3 1 2
|
||||
a(b+)c abbbc 0 5 1 4
|
||||
a(b*)c ac 0 2 1 1
|
||||
(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
|
||||
a([bc]?)c abc 0 3 1 2
|
||||
a([bc]?)c ac 0 2 1 1
|
||||
a([bc]+)c abc 0 3 1 2
|
||||
a([bc]+)c abcc 0 4 1 3
|
||||
a([bc]+)bc abcbc 0 5 1 3
|
||||
a(bb+|b)b abb 0 3 1 2
|
||||
a(bbb+|bb+|b)b abb 0 3 1 2
|
||||
a(bbb+|bb+|b)b abbb 0 4 1 3
|
||||
a(bbb+|bb+|b)bb abbb 0 4 1 2
|
||||
(.*).* abcdef 0 6 0 6
|
||||
(a*)* bc 0 0 0 0
|
||||
xyx*xz xyxxxxyxxxz 5 11
|
||||
|
||||
; do we get the right subexpression when it is used more than once?
|
||||
a(b|c)*d ad 0 2 -1 -1
|
||||
a(b|c)*d abcd 0 4 2 3
|
||||
a(b|c)+d abd 0 3 1 2
|
||||
a(b|c)+d abcd 0 4 2 3
|
||||
a(b|c?)+d ad 0 2 1 1
|
||||
a(b|c){0,0}d ad 0 2 -1 -1
|
||||
a(b|c){0,1}d ad 0 2 -1 -1
|
||||
a(b|c){0,1}d abd 0 3 1 2
|
||||
a(b|c){0,2}d ad 0 2 -1 -1
|
||||
a(b|c){0,2}d abcd 0 4 2 3
|
||||
a(b|c){0,}d ad 0 2 -1 -1
|
||||
a(b|c){0,}d abcd 0 4 2 3
|
||||
a(b|c){1,1}d abd 0 3 1 2
|
||||
a(b|c){1,2}d abd 0 3 1 2
|
||||
a(b|c){1,2}d abcd 0 4 2 3
|
||||
a(b|c){1,}d abd 0 3 1 2
|
||||
a(b|c){1,}d abcd 0 4 2 3
|
||||
a(b|c){2,2}d acbd 0 4 2 3
|
||||
a(b|c){2,2}d abcd 0 4 2 3
|
||||
a(b|c){2,4}d abcd 0 4 2 3
|
||||
a(b|c){2,4}d abcbd 0 5 3 4
|
||||
a(b|c){2,4}d abcbcd 0 6 4 5
|
||||
a(b|c){2,}d abcd 0 4 2 3
|
||||
a(b|c){2,}d abcbd 0 5 3 4
|
||||
; perl only:
|
||||
a(b|c?)+d abcd 0 4 3 3
|
||||
a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
|
||||
a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
|
||||
|
||||
; posix only:
|
||||
- match_default extended REG_EXTENDED REG_STARTEND
|
||||
a(b|c?)+d abcd 0 4 2 3
|
||||
a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
|
||||
a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
|
||||
a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
|
||||
|
||||
|
||||
- match_default normal REG_EXTENDED REG_STARTEND REG_NOSPEC literal
|
||||
\**?/{} \\**?/{} 0 7
|
||||
|
||||
- match_default normal REG_PERL
|
||||
; try to match C++ syntax elements:
|
||||
; line comment:
|
||||
//[^\n]* "++i //here is a line comment\n" 4 28
|
||||
; block comment:
|
||||
/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
|
||||
/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
|
||||
/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
|
||||
/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
|
||||
/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
|
||||
/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
|
||||
; preprossor directives:
|
||||
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
|
||||
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
|
||||
; perl only:
|
||||
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42
|
||||
; literals:
|
||||
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
|
||||
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1
|
||||
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
|
||||
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1
|
||||
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24
|
||||
; strings:
|
||||
'([^\\']|\\.)*' '\\x3A' 0 6 4 5
|
||||
'([^\\']|\\.)*' '\\'' 0 4 1 3
|
||||
'([^\\']|\\.)*' '\\n' 0 4 1 3
|
||||
|
||||
; posix only:
|
||||
- match_default extended escape_in_lists REG_EXTENDED REG_NO_POSIX_TEST ; we disable POSIX testing because it can't handle escapes in sets
|
||||
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 28 42
|
||||
|
||||
|
||||
; now try and test some unicode specific characters:
|
||||
- match_default normal REG_PERL REG_UNICODE_ONLY
|
||||
[[:unicode:]]+ a\0300\0400z 1 3
|
||||
[\x10-\xff] \39135\12409 -1 -1
|
||||
[\01-\05]{5} \36865\36865\36865\36865\36865 -1 -1
|
||||
|
||||
; finally try some case insensitive matches:
|
||||
- match_default normal REG_EXTENDED REG_ICASE
|
||||
; upper and lower have no meaning here so they fail, however these
|
||||
; may compile with other libraries...
|
||||
;[[:lower:]] !
|
||||
;[[:upper:]] !
|
||||
0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
|
||||
|
||||
; known and suspected bugs:
|
||||
- match_default normal REG_EXTENDED
|
||||
\( ( 0 1
|
||||
\) ) 0 1
|
||||
\$ $ 0 1
|
||||
\^ ^ 0 1
|
||||
\. . 0 1
|
||||
\* * 0 1
|
||||
\+ + 0 1
|
||||
\? ? 0 1
|
||||
\[ [ 0 1
|
||||
\] ] 0 1
|
||||
\| | 0 1
|
||||
\\ \\ 0 1
|
||||
# # 0 1
|
||||
\# # 0 1
|
||||
a- a- 0 2
|
||||
\- - 0 1
|
||||
\{ { 0 1
|
||||
\} } 0 1
|
||||
0 0 0 1
|
||||
1 1 0 1
|
||||
9 9 0 1
|
||||
b b 0 1
|
||||
B B 0 1
|
||||
< < 0 1
|
||||
> > 0 1
|
||||
w w 0 1
|
||||
W W 0 1
|
||||
` ` 0 1
|
||||
' ' 0 1
|
||||
\n \n 0 1
|
||||
, , 0 1
|
||||
a a 0 1
|
||||
f f 0 1
|
||||
n n 0 1
|
||||
r r 0 1
|
||||
t t 0 1
|
||||
v v 0 1
|
||||
c c 0 1
|
||||
x x 0 1
|
||||
: : 0 1
|
||||
(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
|
||||
|
||||
- match_default normal REG_EXTENDED REG_ICASE
|
||||
a A 0 1
|
||||
A a 0 1
|
||||
[abc]+ abcABC 0 6
|
||||
[ABC]+ abcABC 0 6
|
||||
[a-z]+ abcABC 0 6
|
||||
[A-Z]+ abzANZ 0 6
|
||||
[a-Z]+ abzABZ 0 6
|
||||
[A-z]+ abzABZ 0 6
|
||||
[[:lower:]]+ abyzABYZ 0 8
|
||||
[[:upper:]]+ abzABZ 0 6
|
||||
[[:word:]]+ abcZZZ 0 6
|
||||
[[:alpha:]]+ abyzABYZ 0 8
|
||||
[[:alnum:]]+ 09abyzABYZ 0 10
|
||||
|
||||
; updated tests for version 2:
|
||||
- match_default normal REG_EXTENDED
|
||||
\x41 A 0 1
|
||||
\xff \255 0 1
|
||||
\xFF \255 0 1
|
||||
- match_default normal REG_EXTENDED REG_NO_POSIX_TEST
|
||||
\c@ \0 0 1
|
||||
- match_default normal REG_EXTENDED
|
||||
\cA \1 0 1
|
||||
\cz \58 0 1
|
||||
\c= !
|
||||
\c? !
|
||||
=: =: 0 2
|
||||
|
||||
; word start:
|
||||
[[:<:]]abcd " abcd" 2 6
|
||||
[[:<:]]ab cab -1 -1
|
||||
[[:<:]]ab "\nab" 1 3
|
||||
[[:<:]]tag ::tag 2 5
|
||||
;word end:
|
||||
abc[[:>:]] abc 0 3
|
||||
abc[[:>:]] abcd -1 -1
|
||||
abc[[:>:]] abc\n 0 3
|
||||
abc[[:>:]] abc:: 0 3
|
||||
|
||||
; collating elements and rewritten set code:
|
||||
- match_default normal REG_EXTENDED REG_STARTEND
|
||||
[[.zero.]] 0 0 1
|
||||
|
@ -7,6 +7,13 @@ int error_count = 0;
|
||||
int cpp_main(int argc, char * argv[])
|
||||
{
|
||||
basic_tests();
|
||||
test_alt();
|
||||
test_sets();
|
||||
test_anchors();
|
||||
test_backrefs();
|
||||
test_character_escapes();
|
||||
test_assertion_escapes();
|
||||
test_tricky_cases();
|
||||
return error_count;
|
||||
}
|
||||
|
||||
@ -42,4 +49,5 @@ const int* make_array(int first, ...)
|
||||
}
|
||||
va_end(ap);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,6 +85,13 @@ const int* make_array(int first, ...);
|
||||
// define the test group proceedures:
|
||||
//
|
||||
void basic_tests();
|
||||
void test_alt();
|
||||
void test_sets();
|
||||
void test_anchors();
|
||||
void test_backrefs();
|
||||
void test_character_escapes();
|
||||
void test_assertion_escapes();
|
||||
void test_tricky_cases();
|
||||
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user