mirror of
https://github.com/boostorg/regex.git
synced 2025-07-29 12:07:28 +02:00
Almost complete implementation...
[SVN r22669]
This commit is contained in:
@ -236,9 +236,9 @@ namespace boost{ typedef wchar_t regex_wchar_type; }
|
||||
# if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK)
|
||||
# define BOOST_DYN_LINK
|
||||
# endif
|
||||
#ifdef BOOST_REGEX_DIAG
|
||||
# define BOOST_LIB_DIAGNOSTIC
|
||||
#endif
|
||||
# ifdef BOOST_REGEX_DIAG
|
||||
# define BOOST_LIB_DIAGNOSTIC
|
||||
# endif
|
||||
# include <boost/config/auto_link.hpp>
|
||||
#endif
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#define BOOST_REGEX_STATIC_MUTEX_HPP
|
||||
|
||||
#include <boost/config.hpp>
|
||||
#include <boost/regex/config.hpp> // dll import/export options.
|
||||
|
||||
#ifdef BOOST_HAS_PTHREADS
|
||||
#include <pthread.h>
|
||||
@ -35,7 +36,7 @@
|
||||
//
|
||||
namespace boost{
|
||||
|
||||
class scoped_static_mutex_lock;
|
||||
class BOOST_REGEX_DECL scoped_static_mutex_lock;
|
||||
|
||||
class static_mutex
|
||||
{
|
||||
@ -46,7 +47,7 @@ public:
|
||||
|
||||
#define BOOST_STATIC_MUTEX_INIT { PTHREAD_MUTEX_INITIALIZER, }
|
||||
|
||||
class scoped_static_mutex_lock
|
||||
class BOOST_REGEX_DECL scoped_static_mutex_lock
|
||||
{
|
||||
public:
|
||||
scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
|
||||
@ -82,7 +83,7 @@ inline bool scoped_static_mutex_lock::locked()const
|
||||
|
||||
namespace boost{
|
||||
|
||||
class scoped_static_mutex_lock;
|
||||
class BOOST_REGEX_DECL scoped_static_mutex_lock;
|
||||
|
||||
class static_mutex
|
||||
{
|
||||
@ -93,7 +94,7 @@ public:
|
||||
|
||||
#define BOOST_STATIC_MUTEX_INIT { 0, }
|
||||
|
||||
class scoped_static_mutex_lock
|
||||
class BOOST_REGEX_DECL scoped_static_mutex_lock
|
||||
{
|
||||
public:
|
||||
scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
|
||||
@ -134,10 +135,10 @@ inline bool scoped_static_mutex_lock::locked()const
|
||||
|
||||
namespace boost{
|
||||
|
||||
class scoped_static_mutex_lock;
|
||||
extern "C" void free_static_mutex();
|
||||
class BOOST_REGEX_DECL scoped_static_mutex_lock;
|
||||
extern "C" BOOST_REGEX_DECL void free_static_mutex();
|
||||
|
||||
class static_mutex
|
||||
class BOOST_REGEX_DECL static_mutex
|
||||
{
|
||||
public:
|
||||
typedef scoped_static_mutex_lock scoped_lock;
|
||||
@ -148,7 +149,7 @@ public:
|
||||
|
||||
#define BOOST_STATIC_MUTEX_INIT { }
|
||||
|
||||
class scoped_static_mutex_lock
|
||||
class BOOST_REGEX_DECL scoped_static_mutex_lock
|
||||
{
|
||||
public:
|
||||
scoped_static_mutex_lock(static_mutex& mut, bool lk = true);
|
||||
|
@ -68,7 +68,7 @@ struct regex_data
|
||||
//
|
||||
template <class charT, class traits>
|
||||
class basic_regex_implementation
|
||||
: protected regex_data<charT, traits>
|
||||
: public regex_data<charT, traits>
|
||||
{
|
||||
public:
|
||||
typedef regex_constants::syntax_option_type flag_type;
|
||||
|
@ -45,8 +45,8 @@ template <class charT, class traits>
|
||||
class basic_char_set
|
||||
{
|
||||
public:
|
||||
typedef digraph<charT> digraph_type;
|
||||
typedef std::basic_string<charT> string_type;
|
||||
typedef digraph<charT> digraph_type;
|
||||
typedef typename traits::string_type string_type;
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
|
||||
basic_char_set()
|
||||
@ -68,8 +68,16 @@ public:
|
||||
{
|
||||
m_ranges.push_back(first);
|
||||
m_ranges.push_back(end);
|
||||
if(first.second || end.second)
|
||||
if(first.second)
|
||||
{
|
||||
m_has_digraphs = true;
|
||||
add_single(first);
|
||||
}
|
||||
if(end.second)
|
||||
{
|
||||
m_has_digraphs = true;
|
||||
add_single(end);
|
||||
}
|
||||
m_empty = false;
|
||||
}
|
||||
void add_class(mask_type m)
|
||||
@ -77,10 +85,20 @@ public:
|
||||
m_classes |= m;
|
||||
m_empty = false;
|
||||
}
|
||||
void add_equivalent(const digraph_type& s)
|
||||
{
|
||||
m_equivalents.push_back(s);
|
||||
if(s.second)
|
||||
{
|
||||
m_has_digraphs = true;
|
||||
add_single(s);
|
||||
}
|
||||
m_empty = false;
|
||||
}
|
||||
void negate()
|
||||
{
|
||||
m_negate = true;
|
||||
m_empty = false;
|
||||
//m_empty = false;
|
||||
}
|
||||
|
||||
//
|
||||
@ -111,6 +129,14 @@ public:
|
||||
{
|
||||
return m_ranges.end();
|
||||
}
|
||||
list_iterator equivalents_begin()const
|
||||
{
|
||||
return m_equivalents.begin();
|
||||
}
|
||||
list_iterator equivalents_end()const
|
||||
{
|
||||
return m_equivalents.end();
|
||||
}
|
||||
mask_type classes()const
|
||||
{
|
||||
return m_classes;
|
||||
@ -126,6 +152,7 @@ private:
|
||||
bool m_has_digraphs; // true if we have digraphs present
|
||||
mask_type m_classes; // character classes to match
|
||||
bool m_empty; // whether we've added anything yet
|
||||
std::vector<digraph_type> m_equivalents; // a list of equivalence classes
|
||||
};
|
||||
|
||||
template <class charT, class traits>
|
||||
@ -189,6 +216,7 @@ private:
|
||||
void set_all_masks(unsigned char* bits, unsigned char);
|
||||
bool is_bad_repeat(re_syntax_base* pt);
|
||||
void set_bad_repeat(re_syntax_base* pt);
|
||||
syntax_element_type get_repeat_type(re_syntax_base* state);
|
||||
};
|
||||
|
||||
template <class charT, class traits>
|
||||
@ -297,7 +325,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
//
|
||||
result->csingles = static_cast<unsigned int>(std::distance(char_set.singles_begin(), char_set.singles_end()));
|
||||
result->cranges = static_cast<unsigned int>(std::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
|
||||
result->cequivalents = 0;
|
||||
result->cequivalents = static_cast<unsigned int>(std::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
|
||||
result->cclasses = char_set.classes();
|
||||
if(flags() & regbase::icase)
|
||||
{
|
||||
@ -377,6 +405,27 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
std::memcpy(p, s2.c_str(), sizeof(charT) * (s2.size() + 1));
|
||||
}
|
||||
//
|
||||
// now process the equivalence classes:
|
||||
//
|
||||
first = char_set.equivalents_begin();
|
||||
last = char_set.equivalents_end();
|
||||
while(first != last)
|
||||
{
|
||||
string_type s;
|
||||
if(first->second)
|
||||
{
|
||||
charT cs[2] = { first->first, first->second, };
|
||||
s = m_traits.transform_primary(cs, cs+2);
|
||||
}
|
||||
else
|
||||
s = m_traits.transform_primary(&first->first, &first->first+1);
|
||||
if(s.empty())
|
||||
return 0; // invalid or unsupported equivalence class
|
||||
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
|
||||
std::memcpy(p, s.c_str(), sizeof(charT) * (s.size() + 1));
|
||||
++first;
|
||||
}
|
||||
//
|
||||
// finally reset the address of our last state:
|
||||
//
|
||||
m_last_state = result = static_cast<re_set_long<mask_type>*>(getaddress(offset));
|
||||
@ -470,6 +519,32 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
result->_map[i] = true;
|
||||
}
|
||||
}
|
||||
//
|
||||
// now process the equivalence classes:
|
||||
//
|
||||
first = char_set.equivalents_begin();
|
||||
last = char_set.equivalents_end();
|
||||
while(first != last)
|
||||
{
|
||||
string_type s;
|
||||
if(first->second)
|
||||
{
|
||||
charT cs[2] = { first->first, first->second, };
|
||||
s = m_traits.transform_primary(cs, cs+2);
|
||||
}
|
||||
else
|
||||
s = m_traits.transform_primary(&first->first, &first->first+1);
|
||||
if(s.empty())
|
||||
return 0; // invalid or unsupported equivalence class
|
||||
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
charT c(i);
|
||||
string_type s2 = this->m_traits.transform_primary(&c, &c+1);
|
||||
if(s == s2)
|
||||
result->_map[i] = true;
|
||||
}
|
||||
++first;
|
||||
}
|
||||
if(negate)
|
||||
{
|
||||
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
@ -567,6 +642,8 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
|
||||
m_bad_repeats = 0;
|
||||
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
|
||||
// adjust the type of the state to allow for faster matching:
|
||||
state->type = this->get_repeat_type(state);
|
||||
return;
|
||||
default:
|
||||
state = state->next.p;
|
||||
@ -613,6 +690,10 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
return;
|
||||
}
|
||||
case syntax_element_backref:
|
||||
// can be null, and any character can match:
|
||||
if(pnull)
|
||||
*pnull |= mask;
|
||||
// fall through:
|
||||
case syntax_element_wild:
|
||||
{
|
||||
// can't be null, any character can match:
|
||||
@ -668,13 +749,18 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
if(map)
|
||||
{
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
map[0] |= mask_init;
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
if(static_cast<re_set_long<mask_type>*>(state)->singleton)
|
||||
{
|
||||
charT c = static_cast<charT>(i);
|
||||
if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata))
|
||||
map[i] |= mask;
|
||||
map[0] |= mask_init;
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
charT c = static_cast<charT>(i);
|
||||
if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata))
|
||||
map[i] |= mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
set_all_masks(map, mask);
|
||||
}
|
||||
return;
|
||||
case syntax_element_set:
|
||||
@ -772,7 +858,6 @@ unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* st
|
||||
continue;
|
||||
case syntax_element_start_line:
|
||||
return regbase::restart_line;
|
||||
case syntax_element_word_boundary:
|
||||
case syntax_element_word_start:
|
||||
return regbase::restart_word;
|
||||
case syntax_element_buffer_start:
|
||||
@ -848,6 +933,35 @@ void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
|
||||
}
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
|
||||
{
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
if(state->type == syntax_element_rep)
|
||||
{
|
||||
// check to see if we are repeating a single state:
|
||||
if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
|
||||
{
|
||||
switch(state->next.p->type)
|
||||
{
|
||||
case re_detail::syntax_element_wild:
|
||||
return re_detail::syntax_element_dot_rep;
|
||||
case re_detail::syntax_element_literal:
|
||||
return re_detail::syntax_element_char_rep;
|
||||
case re_detail::syntax_element_set:
|
||||
return re_detail::syntax_element_short_set_rep;
|
||||
case re_detail::syntax_element_long_set:
|
||||
if(static_cast<re_detail::re_set_long<mask_type>*>(state->next.p)->singleton)
|
||||
return re_detail::syntax_element_long_set_rep;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return state->type;
|
||||
}
|
||||
|
||||
} // namespace re_detail
|
||||
|
||||
} // namespace boost
|
||||
|
@ -49,11 +49,15 @@ public:
|
||||
bool parse_backref();
|
||||
void parse_set_literal(basic_char_set<charT, traits>& char_set);
|
||||
bool parse_inner_set(basic_char_set<charT, traits>& char_set);
|
||||
digraph<charT> get_next_set_literal();
|
||||
bool parse_QE();
|
||||
bool parse_perl_extension();
|
||||
digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
|
||||
charT unescape_character();
|
||||
|
||||
private:
|
||||
typedef bool (basic_regex_parser::*parser_proc_type)();
|
||||
typedef typename traits::string_type string_type;
|
||||
typedef typename traits::char_class_type char_class_type;
|
||||
parser_proc_type m_parser_proc; // the main parser to use
|
||||
const charT* m_base; // the start of the string being parsed
|
||||
const charT* m_end; // the end of the string being parsed
|
||||
@ -235,13 +239,29 @@ bool basic_regex_parser<charT, traits>::parse_literal()
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
{
|
||||
//
|
||||
// skip the '(' and error check:
|
||||
//
|
||||
if(++m_position == m_end)
|
||||
fail(REG_EPAREN, m_position - m_base);
|
||||
//
|
||||
// begin by checking for a perl-style (?...) extension:
|
||||
//
|
||||
if((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
|
||||
{
|
||||
if(m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
|
||||
return parse_perl_extension();
|
||||
}
|
||||
//
|
||||
// update our mark count, and append the required state:
|
||||
//
|
||||
unsigned markid = ++m_mark_count;
|
||||
unsigned markid;
|
||||
if(this->flags() & regbase::nosubs)
|
||||
markid = 0;
|
||||
else
|
||||
markid = ++m_mark_count;
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
||||
pb->index = markid;
|
||||
++m_position;
|
||||
std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
||||
// back up insertion point for alternations, and set new point:
|
||||
std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
||||
@ -392,6 +412,18 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape()
|
||||
++m_position;
|
||||
this->append_state(syntax_element_soft_buffer_end);
|
||||
break;
|
||||
case regex_constants::escape_type_Q:
|
||||
return parse_QE();
|
||||
case regex_constants::escape_type_C:
|
||||
return parse_match_any();
|
||||
case regex_constants::escape_type_X:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_combining);
|
||||
break;
|
||||
case regex_constants::escape_type_G:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_restart_continue);
|
||||
break;
|
||||
default:
|
||||
this->append_literal(unescape_character());
|
||||
break;
|
||||
@ -465,6 +497,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
|
||||
case syntax_element_alt:
|
||||
case syntax_element_soft_buffer_end:
|
||||
case syntax_element_restart_continue:
|
||||
case syntax_element_jump:
|
||||
// can't legally repeat any of the above:
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
default:
|
||||
@ -653,6 +686,38 @@ bool basic_regex_parser<charT, traits>::parse_set()
|
||||
if(parse_inner_set(char_set))
|
||||
break;
|
||||
return true;
|
||||
case regex_constants::syntax_escape:
|
||||
{
|
||||
//
|
||||
// look ahead and see if this is a character class shortcut
|
||||
// \d \w \s etc...
|
||||
//
|
||||
++m_position;
|
||||
if(this->m_traits.escape_syntax_type(*m_position)
|
||||
== regex_constants::escape_type_class)
|
||||
{
|
||||
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
||||
if(m)
|
||||
{
|
||||
char_set.add_class(m);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if(this->m_traits.escape_syntax_type(*m_position)
|
||||
== regex_constants::escape_type_not_class)
|
||||
{
|
||||
// negated character classes aren't supported:
|
||||
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
||||
if(m)
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
}
|
||||
}
|
||||
// not a character class, just a regular escape:
|
||||
--m_position;
|
||||
parse_set_literal(char_set);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
parse_set_literal(char_set);
|
||||
break;
|
||||
@ -673,6 +738,13 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
switch(this->m_traits.syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::syntax_dot:
|
||||
//
|
||||
// a collating element is treated as a literal:
|
||||
//
|
||||
--m_position;
|
||||
parse_set_literal(char_set);
|
||||
return true;
|
||||
case regex_constants::syntax_colon:
|
||||
{
|
||||
// check that character classes are actually enabled:
|
||||
@ -733,6 +805,37 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
|
||||
++m_position;
|
||||
break;
|
||||
}
|
||||
case regex_constants::syntax_equal:
|
||||
{
|
||||
// skip the '='
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
const charT* name_first = m_position;
|
||||
// skip at least one character, then find the matching '=]'
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
while((m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
|
||||
++m_position;
|
||||
const charT* name_last = m_position;
|
||||
if(m_end == m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
if((m_end == ++m_position)
|
||||
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
string_type m = this->m_traits.lookup_collatename(name_first, name_last);
|
||||
if((0 == m.size()) || (m.size() > 2))
|
||||
fail(REG_ECOLLATE, name_first - m_base);
|
||||
digraph<charT> d;
|
||||
d.first = m[0];
|
||||
if(m.size() > 1)
|
||||
d.second = m[1];
|
||||
else
|
||||
d.second = 0;
|
||||
char_set.add_equivalent(d);
|
||||
++m_position;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
--m_position;
|
||||
parse_set_literal(char_set);
|
||||
@ -744,7 +847,7 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr
|
||||
template <class charT, class traits>
|
||||
void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
|
||||
{
|
||||
digraph<charT> start_range = get_next_set_literal();
|
||||
digraph<charT> start_range = get_next_set_literal(char_set);
|
||||
if(m_end == m_position)
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
|
||||
@ -754,7 +857,7 @@ void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT,
|
||||
fail(REG_EBRACK, m_position - m_base);
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
|
||||
{
|
||||
digraph<charT> end_range = get_next_set_literal();
|
||||
digraph<charT> end_range = get_next_set_literal(char_set);
|
||||
char_set.add_range(start_range, end_range);
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
|
||||
fail(REG_ERANGE, m_position - m_base);
|
||||
@ -766,11 +869,22 @@ void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT,
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal()
|
||||
digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
|
||||
{
|
||||
typedef typename traits::string_type string_type;
|
||||
digraph<charT> result;
|
||||
switch(this->m_traits.syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::syntax_dash:
|
||||
if(!char_set.empty())
|
||||
{
|
||||
// see if we are at the end of the set:
|
||||
if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
|
||||
fail(REG_ERANGE, m_position - m_base);
|
||||
--m_position;
|
||||
}
|
||||
result.first = *m_position++;
|
||||
return result;
|
||||
case regex_constants::syntax_escape:
|
||||
// check to see if escapes are supported first:
|
||||
if(this->flags() & regex_constants::no_escape_in_lists)
|
||||
@ -781,6 +895,43 @@ digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal()
|
||||
++m_position;
|
||||
result = unescape_character();
|
||||
break;
|
||||
case regex_constants::syntax_open_set:
|
||||
{
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_ECOLLATE, m_position - m_base);
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
|
||||
{
|
||||
--m_position;
|
||||
result.first = *m_position;
|
||||
++m_position;
|
||||
return result;
|
||||
}
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_ECOLLATE, m_position - m_base);
|
||||
const charT* name_first = m_position;
|
||||
// skip at least one character, then find the matching ':]'
|
||||
if(m_end == ++m_position)
|
||||
fail(REG_ECOLLATE, name_first - m_base);
|
||||
while((m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
|
||||
++m_position;
|
||||
const charT* name_last = m_position;
|
||||
if(m_end == m_position)
|
||||
fail(REG_ECOLLATE, name_first - m_base);
|
||||
if((m_end == ++m_position)
|
||||
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
|
||||
fail(REG_ECOLLATE, name_first - m_base);
|
||||
++m_position;
|
||||
string_type s = this->m_traits.lookup_collatename(name_first, name_last);
|
||||
if(s.empty() || (s.size() > 2))
|
||||
fail(REG_ECOLLATE, name_first - m_base);
|
||||
result.first = s[0];
|
||||
if(s.size() > 1)
|
||||
result.second = s[1];
|
||||
else
|
||||
result.second = 0;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
result = *m_position++;
|
||||
}
|
||||
@ -916,6 +1067,133 @@ bool basic_regex_parser<charT, traits>::parse_backref()
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_QE()
|
||||
{
|
||||
//
|
||||
// parse a \Q...\E sequence:
|
||||
//
|
||||
++m_position; // skip the Q
|
||||
const charT* start = m_position;
|
||||
const charT* end;
|
||||
do
|
||||
{
|
||||
while((m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
|
||||
++m_position;
|
||||
if((m_position == m_end) || (++m_position == m_end)) // skip the escape
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
// check to see if it's a \E:
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_E)
|
||||
{
|
||||
++m_position;
|
||||
end = m_position - 2;
|
||||
break;
|
||||
}
|
||||
// otherwise go round again:
|
||||
}while(true);
|
||||
//
|
||||
// now add all the character between the two escapes as literals:
|
||||
//
|
||||
while(start != end)
|
||||
{
|
||||
this->append_literal(*start);
|
||||
++start;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
{
|
||||
if(++m_position == m_end)
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
//
|
||||
// backup some state, and prepare the way:
|
||||
//
|
||||
int markid;
|
||||
std::ptrdiff_t jump_offset = 0;
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
||||
std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
||||
// back up insertion point for alternations, and set new point:
|
||||
std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
||||
this->m_pdata->m_data.align();
|
||||
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
//
|
||||
// select the actual extension used:
|
||||
//
|
||||
switch(this->m_traits.syntax_type(*m_position))
|
||||
{
|
||||
case regex_constants::syntax_colon:
|
||||
//
|
||||
// a non-capturing mark:
|
||||
//
|
||||
pb->index = markid = 0;
|
||||
++m_position;
|
||||
break;
|
||||
case regex_constants::syntax_hash:
|
||||
//
|
||||
// a comment; this actually becomes an empty non-capturing mark:
|
||||
//
|
||||
pb->index = markid = 0;
|
||||
while((m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||
++m_position;
|
||||
break;
|
||||
case regex_constants::syntax_equal:
|
||||
pb->index = markid = -1;
|
||||
++m_position;
|
||||
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
|
||||
this->m_pdata->m_data.align();
|
||||
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
break;
|
||||
case regex_constants::syntax_not:
|
||||
pb->index = markid = -2;
|
||||
++m_position;
|
||||
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
|
||||
this->m_pdata->m_data.align();
|
||||
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
break;
|
||||
default:
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
}
|
||||
//
|
||||
// now recursively add more states, this will terminate when we get to a
|
||||
// matching ')' :
|
||||
//
|
||||
parse_all();
|
||||
//
|
||||
// we either have a ')' or we have run out of characters prematurely:
|
||||
//
|
||||
if(m_position == m_end)
|
||||
this->fail(REG_EPAREN, std::distance(m_base, m_end));
|
||||
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
|
||||
++m_position;
|
||||
//
|
||||
// set up the jump pointer if we have one:
|
||||
//
|
||||
if(jump_offset)
|
||||
{
|
||||
this->m_pdata->m_data.align();
|
||||
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
|
||||
jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
|
||||
}
|
||||
//
|
||||
// append closing parenthesis state:
|
||||
//
|
||||
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
||||
pb->index = markid;
|
||||
this->m_paren_start = last_paren_start;
|
||||
//
|
||||
// restore the alternate insertion point:
|
||||
//
|
||||
this->m_alt_insert_point = last_alt_point;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace re_detail
|
||||
} // namespace boost
|
||||
|
||||
|
@ -25,6 +25,9 @@
|
||||
#ifdef BOOST_HAS_THREADS
|
||||
#include <boost/regex/static_mutex.hpp>
|
||||
#endif
|
||||
#ifndef BOOST_REGEX_PRIMARY_TRANSFORM
|
||||
#include <boost/regex/v4/primary_transform.hpp>
|
||||
#endif
|
||||
|
||||
namespace boost{
|
||||
|
||||
@ -272,7 +275,7 @@ typename cpp_regex_traits_char_layer<charT>::string_type
|
||||
// specialised version for narrow characters:
|
||||
//
|
||||
template <>
|
||||
class cpp_regex_traits_char_layer<char> : public cpp_regex_traits_base<char>
|
||||
class BOOST_REGEX_DECL cpp_regex_traits_char_layer<char> : public cpp_regex_traits_base<char>
|
||||
{
|
||||
typedef std::string string_type;
|
||||
public:
|
||||
@ -326,6 +329,7 @@ public:
|
||||
|
||||
|
||||
typedef std::basic_string<charT> string_type;
|
||||
typedef charT char_type;
|
||||
//cpp_regex_traits_implementation();
|
||||
cpp_regex_traits_implementation(const std::locale& l);
|
||||
std::string error_string(regex_constants::error_type n) const
|
||||
@ -348,16 +352,88 @@ public:
|
||||
}
|
||||
return result;
|
||||
}
|
||||
string_type lookup_collatename(const charT* p1, const charT* p2) const;
|
||||
string_type transform_primary(const charT* p1, const charT* p2) const;
|
||||
string_type transform(const charT* p1, const charT* p2) const
|
||||
{
|
||||
return this->m_pcollate->transform(p1, p2);
|
||||
}
|
||||
re_detail::parser_buf<charT> m_sbuf; // buffer for parsing numbers.
|
||||
std::basic_istream<charT> m_is; // stream for parsing numbers.
|
||||
private:
|
||||
std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID
|
||||
std::map<string_type, char_class_type> m_custom_class_names; // character class names
|
||||
std::map<string_type, string_type> m_custom_collate_names; // collating element names
|
||||
unsigned m_collate_type; // the form of the collation string
|
||||
charT m_collate_delim; // the collation group delimiter
|
||||
//
|
||||
// helpers:
|
||||
//
|
||||
char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
|
||||
};
|
||||
|
||||
template <class charT>
|
||||
typename cpp_regex_traits_implementation<charT>::string_type
|
||||
cpp_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const
|
||||
{
|
||||
string_type result;
|
||||
//
|
||||
// What we do here depends upon the format of the sort key returned by
|
||||
// sort key returned by this->transform:
|
||||
//
|
||||
switch(m_collate_type)
|
||||
{
|
||||
case sort_C:
|
||||
case sort_unknown:
|
||||
// the best we can do is translate to lower case, then get a regular sort key:
|
||||
{
|
||||
result.assign(p1, p2);
|
||||
m_pctype->tolower(&*result.begin(), &*result.end());
|
||||
result = this->m_pcollate->transform(&*result.begin(), &*result.end());
|
||||
break;
|
||||
}
|
||||
case sort_fixed:
|
||||
{
|
||||
// get a regular sort key, and then truncate it:
|
||||
result.assign(this->m_pcollate->transform(&*result.begin(), &*result.end()));
|
||||
result.erase(this->m_collate_delim);
|
||||
break;
|
||||
}
|
||||
case sort_delim:
|
||||
// get a regular sort key, and then truncate everything after the delim:
|
||||
result.assign(this->m_pcollate->transform(&*result.begin(), &*result.end()));
|
||||
std::size_t i;
|
||||
for(i = 0; i < result.size(); ++i)
|
||||
{
|
||||
if(result[i] == m_collate_delim)
|
||||
break;
|
||||
}
|
||||
result.erase(i);
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class charT>
|
||||
typename cpp_regex_traits_implementation<charT>::string_type
|
||||
cpp_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const
|
||||
{
|
||||
typedef typename std::map<string_type, string_type>::const_iterator iter_type;
|
||||
if(m_custom_collate_names.size())
|
||||
{
|
||||
iter_type pos = m_custom_collate_names.find(string_type(p1, p2));
|
||||
if(pos != m_custom_collate_names.end())
|
||||
return pos->second;
|
||||
}
|
||||
std::string name(p1, p2);
|
||||
name = lookup_default_collate_name(name);
|
||||
if(name.size())
|
||||
return string_type(name.begin(), name.end());
|
||||
if(p2 - p1 == 1)
|
||||
return string_type(1, *p1);
|
||||
return string_type();
|
||||
}
|
||||
|
||||
template <class charT>
|
||||
cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const std::locale& l)
|
||||
: cpp_regex_traits_char_layer<charT>(l), m_is(&m_sbuf)
|
||||
@ -385,6 +461,9 @@ cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const st
|
||||
//
|
||||
if((int)cat >= 0)
|
||||
{
|
||||
//
|
||||
// Error messages:
|
||||
//
|
||||
for(boost::regex_constants::error_type i = 0; i <= boost::regex_constants::error_unknown; ++i)
|
||||
{
|
||||
const char* p = get_default_error_string(i);
|
||||
@ -402,7 +481,38 @@ cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const st
|
||||
}
|
||||
m_error_strings[i] = result;
|
||||
}
|
||||
//
|
||||
// Custom class names:
|
||||
//
|
||||
static const char_class_type masks[] =
|
||||
{
|
||||
std::ctype<charT>::alnum,
|
||||
std::ctype<charT>::alpha,
|
||||
std::ctype<charT>::cntrl,
|
||||
std::ctype<charT>::digit,
|
||||
std::ctype<charT>::graph,
|
||||
std::ctype<charT>::lower,
|
||||
std::ctype<charT>::print,
|
||||
std::ctype<charT>::punct,
|
||||
std::ctype<charT>::space,
|
||||
std::ctype<charT>::upper,
|
||||
std::ctype<charT>::xdigit,
|
||||
cpp_regex_traits_implementation<charT>::mask_blank,
|
||||
cpp_regex_traits_implementation<charT>::mask_word,
|
||||
cpp_regex_traits_implementation<charT>::mask_unicode,
|
||||
};
|
||||
static const string_type null_string;
|
||||
for(unsigned int j = 0; j <= 13; ++j)
|
||||
{
|
||||
string_type s(this->m_pmessages->get(cat, 0, j+300, null_string));
|
||||
if(s.size())
|
||||
this->m_custom_class_names[s] = masks[j];
|
||||
}
|
||||
}
|
||||
//
|
||||
// get the collation format used by m_pcollate:
|
||||
//
|
||||
m_collate_type = re_detail::find_sort_syntax(this, &m_collate_delim);
|
||||
}
|
||||
|
||||
template <class charT>
|
||||
@ -432,6 +542,13 @@ typename cpp_regex_traits_implementation<charT>::char_class_type
|
||||
std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
|
||||
std::ctype<char>::xdigit,
|
||||
};
|
||||
if(m_custom_class_names.size())
|
||||
{
|
||||
typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter;
|
||||
map_iter pos = m_custom_class_names.find(string_type(p1, p2));
|
||||
if(pos != m_custom_class_names.end())
|
||||
return pos->second;
|
||||
}
|
||||
std::size_t id = 1 + re_detail::get_default_class_id(p1, p2);
|
||||
assert(id < sizeof(masks) / sizeof(masks[0]));
|
||||
return masks[id];
|
||||
@ -491,9 +608,9 @@ public:
|
||||
{
|
||||
return m_pimpl->m_pcollate->transform(p1, p2);
|
||||
}
|
||||
string_type transform_primary(const charT* , const charT* ) const
|
||||
string_type transform_primary(const charT* p1, const charT* p2) const
|
||||
{
|
||||
return string_type();
|
||||
return m_pimpl->transform_primary(p1, p2);
|
||||
}
|
||||
char_class_type lookup_classname(const charT* p1, const charT* p2) const
|
||||
{
|
||||
@ -501,7 +618,7 @@ public:
|
||||
}
|
||||
string_type lookup_collatename(const charT* p1, const charT* p2) const
|
||||
{
|
||||
return string_type();
|
||||
return m_pimpl->lookup_collatename(p1, p2);
|
||||
}
|
||||
bool is_class(charT c, char_class_type f) const
|
||||
{
|
||||
|
@ -47,7 +47,11 @@ typedef size_t regsize_t;
|
||||
typedef struct
|
||||
{
|
||||
unsigned int re_magic;
|
||||
#ifdef __cplusplus
|
||||
std::size_t re_nsub; /* number of parenthesized subexpressions */
|
||||
#else
|
||||
size_t re_nsub;
|
||||
#endif
|
||||
const char* re_endp; /* end pointer for REG_PEND */
|
||||
void* guts; /* none of your business :-) */
|
||||
match_flag_type eflags; /* none of your business :-) */
|
||||
@ -57,7 +61,11 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
unsigned int re_magic;
|
||||
#ifdef __cplusplus
|
||||
std::size_t re_nsub; /* number of parenthesized subexpressions */
|
||||
#else
|
||||
size_t re_nsub;
|
||||
#endif
|
||||
const wchar_t* re_endp; /* end pointer for REG_PEND */
|
||||
void* guts; /* none of your business :-) */
|
||||
match_flag_type eflags; /* none of your business :-) */
|
||||
|
@ -52,13 +52,13 @@ static const reg_error_t REG_ESPACE = 12; /* Ran out of memory. */
|
||||
static const reg_error_t REG_BADRPT = 13; /* No preceding re for repetition op. */
|
||||
static const reg_error_t REG_EEND = 14; /* unexpected end of expression */
|
||||
static const reg_error_t REG_ESIZE = 15; /* expression too big */
|
||||
static const reg_error_t REG_ERPAREN = REG_EPAREN; /* unmatched right parenthesis */
|
||||
static const reg_error_t REG_ERPAREN = 8; /* = REG_EPAREN : unmatched right parenthesis */
|
||||
static const reg_error_t REG_EMPTY = 17; /* empty expression */
|
||||
static const reg_error_t REG_E_MEMORY = REG_ESIZE; /* out of memory */
|
||||
static const reg_error_t REG_E_MEMORY = 15; /* = REG_ESIZE : out of memory */
|
||||
static const reg_error_t REG_ECOMPLEXITY = 18; /* complexity too high */
|
||||
static const reg_error_t REG_ESTACK = 19; /* out of stack space */
|
||||
static const reg_error_t REG_E_UNKNOWN = 20; /* unknown error */
|
||||
static const reg_error_t REG_ENOSYS = REG_E_UNKNOWN; /* Reserved. */
|
||||
static const reg_error_t REG_ENOSYS = 20; /* = REG_E_UNKNOWN : Reserved. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace regex_constants{
|
||||
|
@ -153,7 +153,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
if(state_count > max_state_count)
|
||||
raise_error(traits_inst, REG_ESPACE);
|
||||
if((m_match_flags & match_partial) && (position == last))
|
||||
if((m_match_flags & match_partial) && (position == last) && (position != search_base))
|
||||
m_has_partial_match = true;
|
||||
if(false == unwind(false))
|
||||
return m_recursive_result;
|
||||
|
@ -86,7 +86,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
++state_count;
|
||||
if(!(this->*proc)())
|
||||
{
|
||||
if((m_match_flags & match_partial) && (position == last))
|
||||
if((m_match_flags & match_partial) && (position == last) && (position != search_base))
|
||||
m_has_partial_match = true;
|
||||
return 0;
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ enum{
|
||||
// this is used by basic_regex for expression storage
|
||||
//
|
||||
|
||||
class raw_storage
|
||||
class BOOST_REGEX_DECL raw_storage
|
||||
{
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
|
@ -21,11 +21,11 @@
|
||||
|
||||
namespace boost{ namespace re_detail{
|
||||
|
||||
const char* get_default_syntax(regex_constants::syntax_type n);
|
||||
const char* get_default_error_string(regex_constants::error_type n);
|
||||
BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n);
|
||||
BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n);
|
||||
|
||||
// is charT c a combining character?
|
||||
bool is_combining_implementation(uint_least16_t s);
|
||||
BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s);
|
||||
|
||||
template <class charT>
|
||||
inline bool is_combining(charT c)
|
||||
@ -75,9 +75,14 @@ inline bool is_combining<wchar_t>(wchar_t c)
|
||||
template <class charT>
|
||||
inline bool is_separator(charT c)
|
||||
{
|
||||
return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r'));
|
||||
return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (static_cast<int>(c) == 0x2028) || (static_cast<int>(c) == 0x2029));
|
||||
}
|
||||
|
||||
//
|
||||
// get a default collating element:
|
||||
//
|
||||
BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name);
|
||||
|
||||
//
|
||||
// get the id of a character clasification, the individual
|
||||
// traits classes then transform that id into a bitmask:
|
||||
|
@ -16,6 +16,7 @@
|
||||
* DESCRIPTION: Implements cpp_regex_traits<char> (and associated helper classes).
|
||||
*/
|
||||
|
||||
#define BOOST_REGEX_SOURCE
|
||||
#include <boost/regex/regex_traits.hpp>
|
||||
|
||||
namespace boost{ namespace re_detail{
|
||||
|
@ -1,5 +1,6 @@
|
||||
|
||||
|
||||
#define BOOST_REGEX_SOURCE
|
||||
#include <boost/regex/v4/regex_raw_buffer.hpp>
|
||||
|
||||
namespace boost{ namespace re_detail{
|
||||
|
@ -16,11 +16,12 @@
|
||||
* DESCRIPTION: Declares API's for access to regex_traits default properties.
|
||||
*/
|
||||
|
||||
#define BOOST_REGEX_SOURCE
|
||||
#include <boost/regex/regex_traits.hpp>
|
||||
|
||||
namespace boost{ namespace re_detail{
|
||||
|
||||
const char* get_default_syntax(regex_constants::syntax_type n)
|
||||
BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n)
|
||||
{
|
||||
// if the user hasn't supplied a message catalog, then this supplies
|
||||
// default "messages" for us to load in the range 1-100.
|
||||
@ -77,13 +78,13 @@ const char* get_default_syntax(regex_constants::syntax_type n)
|
||||
"X",
|
||||
"C",
|
||||
"Z",
|
||||
"G"
|
||||
"G",
|
||||
"!", };
|
||||
|
||||
return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]);
|
||||
}
|
||||
|
||||
const char* get_default_error_string(regex_constants::error_type n)
|
||||
BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n)
|
||||
{
|
||||
static const char* const s_default_error_messages[] = {
|
||||
"Success", /* REG_NOERROR */
|
||||
@ -115,7 +116,7 @@ const char* get_default_error_string(regex_constants::error_type n)
|
||||
return (n > REG_E_UNKNOWN) ? s_default_error_messages[REG_E_UNKNOWN] : s_default_error_messages[n];
|
||||
}
|
||||
|
||||
bool is_combining_implementation(boost::uint_least16_t c)
|
||||
BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(boost::uint_least16_t c)
|
||||
{
|
||||
const boost::uint_least16_t combining_ranges[] = { 0x0300, 0x0361,
|
||||
0x0483, 0x0486,
|
||||
@ -164,5 +165,80 @@ bool is_combining_implementation(boost::uint_least16_t c)
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// these are the POSIX collating names:
|
||||
//
|
||||
BOOST_REGEX_DECL const char* def_coll_names[] = {
|
||||
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline",
|
||||
"vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK",
|
||||
"SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark",
|
||||
"quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe",
|
||||
"left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen",
|
||||
"period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
|
||||
"colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign",
|
||||
"question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
|
||||
"Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash",
|
||||
"right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f",
|
||||
"g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket",
|
||||
"vertical-line", "right-curly-bracket", "tilde", "DEL", "",
|
||||
};
|
||||
|
||||
// these multi-character collating elements
|
||||
// should keep most Western-European locales
|
||||
// happy - we should really localise these a
|
||||
// little more - but this will have to do for
|
||||
// now:
|
||||
|
||||
BOOST_REGEX_DECL const char* def_multi_coll[] = {
|
||||
"ae",
|
||||
"Ae",
|
||||
"AE",
|
||||
"ch",
|
||||
"Ch",
|
||||
"CH",
|
||||
"ll",
|
||||
"Ll",
|
||||
"LL",
|
||||
"ss",
|
||||
"Ss",
|
||||
"SS",
|
||||
"nj",
|
||||
"Nj",
|
||||
"NJ",
|
||||
"dz",
|
||||
"Dz",
|
||||
"DZ",
|
||||
"lj",
|
||||
"Lj",
|
||||
"LJ",
|
||||
"",
|
||||
};
|
||||
|
||||
|
||||
|
||||
BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
while(*def_coll_names[i])
|
||||
{
|
||||
if(def_coll_names[i] == name)
|
||||
{
|
||||
return std::string(1, char(i));
|
||||
}
|
||||
++i;
|
||||
}
|
||||
i = 0;
|
||||
while(*def_multi_coll[i])
|
||||
{
|
||||
if(def_multi_coll[i] == name)
|
||||
{
|
||||
return def_multi_coll[i];
|
||||
}
|
||||
++i;
|
||||
}
|
||||
return std::string();
|
||||
}
|
||||
|
||||
|
||||
} // re_detail
|
||||
} // boost
|
||||
|
@ -16,6 +16,7 @@
|
||||
* DESCRIPTION: Declares static_mutex lock type.
|
||||
*/
|
||||
|
||||
#define BOOST_REGEX_SOURCE
|
||||
#include <boost/config.hpp>
|
||||
|
||||
#ifdef BOOST_HAS_THREADS
|
||||
@ -86,7 +87,7 @@ void scoped_static_mutex_lock::lock()
|
||||
{
|
||||
if(0 == m_have_lock)
|
||||
{
|
||||
#if defined(BOOST_MSVC) && (BOOST_MSVC <=1200)
|
||||
#if !defined(InterlockedCompareExchangePointer)
|
||||
while(0 != InterlockedCompareExchange(reinterpret_cast<void**>((boost::uint_least16_t*)&(m_mutex.m_mutex)), (void*)1, 0))
|
||||
#else
|
||||
while(0 != InterlockedCompareExchange(reinterpret_cast<volatile LONG*>(&(m_mutex.m_mutex)), 1, 0))
|
||||
@ -102,7 +103,7 @@ void scoped_static_mutex_lock::unlock()
|
||||
{
|
||||
if(m_have_lock)
|
||||
{
|
||||
#if defined(BOOST_MSVC) && (BOOST_MSVC <=1200)
|
||||
#if !defined(InterlockedCompareExchangePointer)
|
||||
InterlockedExchange((LONG*)&(m_mutex.m_mutex), 0);
|
||||
#else
|
||||
InterlockedExchange(reinterpret_cast<volatile LONG*>(&(m_mutex.m_mutex)), 0);
|
||||
@ -121,7 +122,7 @@ void scoped_static_mutex_lock::unlock()
|
||||
boost::recursive_mutex* static_mutex::m_pmutex = 0;
|
||||
boost::once_flag static_mutex::m_once = BOOST_ONCE_INIT;
|
||||
|
||||
extern "C" void free_static_mutex()
|
||||
extern "C" BOOST_REGEX_DECL void free_static_mutex()
|
||||
{
|
||||
delete static_mutex::m_pmutex;
|
||||
static_mutex::m_pmutex = 0;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -14,6 +14,14 @@ int cpp_main(int argc, char * argv[])
|
||||
test_character_escapes();
|
||||
test_assertion_escapes();
|
||||
test_tricky_cases();
|
||||
test_tricky_cases2();
|
||||
test_grep();
|
||||
test_replace();
|
||||
test_non_greedy_repeats();
|
||||
test_non_marking_paren();
|
||||
test_partial_match();
|
||||
test_forward_lookahead_asserts();
|
||||
test_fast_repeats();
|
||||
return error_count;
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#define BOOST_REGEX_REGRESS_TEST_HPP
|
||||
#include "test_not_regex.hpp"
|
||||
#include "test_regex_search.hpp"
|
||||
#include "test_regex_replace.hpp"
|
||||
|
||||
|
||||
//
|
||||
@ -81,6 +82,45 @@ const int* make_array(int first, ...);
|
||||
TEST_REGEX_SEARCH_N(s, f, t, m, a);\
|
||||
TEST_REGEX_SEARCH_W(BOOST_JOIN(L, s), f, BOOST_JOIN(L, t), m, a)
|
||||
|
||||
//
|
||||
// define macros for testing regex replaces:
|
||||
//
|
||||
#define TEST_REGEX_REPLACE_N(s, f, t, m, fs, r)\
|
||||
do{\
|
||||
const char e[] = { s };\
|
||||
std::string se(e, sizeof(e) - 1);\
|
||||
const char st[] = { t };\
|
||||
std::string sst(st, sizeof(st) - 1);\
|
||||
const char ft[] = { fs };\
|
||||
std::string sft(ft, sizeof(ft) - 1);\
|
||||
const char rt[] = { r };\
|
||||
std::string srt(rt, sizeof(rt) - 1);\
|
||||
test_info<char>::set_info(__FILE__, __LINE__, se, f, sst, m, 0, sft, srt);\
|
||||
test(char(0), test_regex_replace_tag());\
|
||||
}while(0)
|
||||
|
||||
#ifndef BOOST_NO_WREGEX
|
||||
#define TEST_REGEX_REPLACE_W(s, f, t, m, fs, r)\
|
||||
do{\
|
||||
const wchar_t e[] = { s };\
|
||||
std::wstring se(e, (sizeof(e) / sizeof(wchar_t)) - 1);\
|
||||
const wchar_t st[] = { t };\
|
||||
std::wstring sst(st, (sizeof(st) / sizeof(wchar_t)) - 1);\
|
||||
const wchar_t ft[] = { fs };\
|
||||
std::wstring sft(ft, (sizeof(ft) / sizeof(wchar_t)) - 1);\
|
||||
const wchar_t rt[] = { r };\
|
||||
std::wstring srt(rt, (sizeof(rt) / sizeof(wchar_t)) - 1);\
|
||||
test_info<wchar_t>::set_info(__FILE__, __LINE__, se, f, sst, m, 0, sft, srt);\
|
||||
test(wchar_t(0), test_regex_replace_tag());\
|
||||
}while(0)
|
||||
#else
|
||||
#define TEST_REGEX_REPLACE_W(s, f, t, m, fs, r)
|
||||
#endif
|
||||
|
||||
#define TEST_REGEX_REPLACE(s, f, t, m, fs, r)\
|
||||
TEST_REGEX_REPLACE_N(s, f, t, m, fs, r);\
|
||||
TEST_REGEX_REPLACE_W(BOOST_JOIN(L, s), f, BOOST_JOIN(L, t), m, BOOST_JOIN(L, fs), BOOST_JOIN(L, r))
|
||||
|
||||
//
|
||||
// define the test group proceedures:
|
||||
//
|
||||
@ -92,6 +132,13 @@ void test_backrefs();
|
||||
void test_character_escapes();
|
||||
void test_assertion_escapes();
|
||||
void test_tricky_cases();
|
||||
|
||||
void test_grep();
|
||||
void test_replace();
|
||||
void test_non_greedy_repeats();
|
||||
void test_non_marking_paren();
|
||||
void test_partial_match();
|
||||
void test_forward_lookahead_asserts();
|
||||
void test_fast_repeats();
|
||||
void test_tricky_cases2();
|
||||
|
||||
#endif
|
||||
|
@ -16,7 +16,10 @@ void test_sub_match(const boost::sub_match<BidirectionalIterator>& sub, Bidirect
|
||||
#pragma warning(disable:4244)
|
||||
#endif
|
||||
typedef typename boost::sub_match<BidirectionalIterator>::value_type charT;
|
||||
if(sub.matched == 0)
|
||||
if((sub.matched == 0)
|
||||
&&
|
||||
!((i == 0)
|
||||
&& (test_info<charT>::match_options() & boost::match_partial)) )
|
||||
{
|
||||
if(answer_table[2*i] >= 0)
|
||||
{
|
||||
@ -80,6 +83,101 @@ void test_simple_search(boost::basic_regex<charT, traits>& r)
|
||||
}
|
||||
}
|
||||
|
||||
template<class charT, class traits>
|
||||
void test_regex_iterator(boost::basic_regex<charT, traits>& r)
|
||||
{
|
||||
typedef typename std::basic_string<charT>::const_iterator const_iterator;
|
||||
typedef boost::regex_iterator<const_iterator> test_iterator;
|
||||
const std::basic_string<charT>& search_text = test_info<charT>::search_text();
|
||||
boost::regex_constants::match_flag_type opts = test_info<charT>::match_options();
|
||||
const int* answer_table = test_info<charT>::answer_table();
|
||||
test_iterator start(search_text.begin(), search_text.end(), r, opts), end;
|
||||
while(start != end)
|
||||
{
|
||||
test_result(*start, search_text.begin(), answer_table);
|
||||
++start;
|
||||
// move on the answer table to next set of answers;
|
||||
while(*answer_table++ != -2){}
|
||||
}
|
||||
if(answer_table[0] >= 0)
|
||||
{
|
||||
// we should have had a match but didn't:
|
||||
BOOST_REGEX_TEST_ERROR("Expected match was not found.", charT);
|
||||
}
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
struct grep_test_predicate
|
||||
{
|
||||
typedef typename std::basic_string<charT>::const_iterator test_iter;
|
||||
|
||||
grep_test_predicate(test_iter b, const int* a)
|
||||
: m_base(b), m_table(a)
|
||||
{}
|
||||
bool operator()(const boost::match_results<test_iter>& what)
|
||||
{
|
||||
test_result(what, m_base, m_table);
|
||||
// move on the answer table to next set of answers;
|
||||
while(*m_table++ != -2){}
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
test_iter m_base;
|
||||
const int* m_table;
|
||||
};
|
||||
|
||||
template<class charT, class traits>
|
||||
void test_regex_grep(boost::basic_regex<charT, traits>& r)
|
||||
{
|
||||
typedef typename std::basic_string<charT>::const_iterator const_iterator;
|
||||
const std::basic_string<charT>& search_text = test_info<charT>::search_text();
|
||||
boost::regex_constants::match_flag_type opts = test_info<charT>::match_options();
|
||||
const int* answer_table = test_info<charT>::answer_table();
|
||||
grep_test_predicate<charT, traits> pred(search_text.begin(), answer_table);
|
||||
boost::regex_grep(pred, search_text, r, opts);
|
||||
}
|
||||
|
||||
template<class charT, class traits>
|
||||
void test_regex_match(boost::basic_regex<charT, traits>& r)
|
||||
{
|
||||
typedef typename std::basic_string<charT>::const_iterator const_iterator;
|
||||
const std::basic_string<charT>& search_text = test_info<charT>::search_text();
|
||||
boost::regex_constants::match_flag_type opts = test_info<charT>::match_options();
|
||||
const int* answer_table = test_info<charT>::answer_table();
|
||||
boost::match_results<const_iterator> what;
|
||||
if(answer_table[0] < 0)
|
||||
{
|
||||
if(boost::regex_match(search_text, r, opts))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("boost::regex_match found a match when it should not have done so.", charT);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if((answer_table[0] > 0) && boost::regex_match(search_text, r, opts))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("boost::regex_match found a match when it should not have done so.", charT);
|
||||
}
|
||||
else if((answer_table[0] == 0) && (answer_table[1] == search_text.size()))
|
||||
{
|
||||
if(boost::regex_match(
|
||||
search_text.begin(),
|
||||
search_text.end(),
|
||||
what,
|
||||
r,
|
||||
opts))
|
||||
{
|
||||
test_result(what, search_text.begin(), answer_table);
|
||||
}
|
||||
else if(answer_table[0] >= 0)
|
||||
{
|
||||
// we should have had a match but didn't:
|
||||
BOOST_REGEX_TEST_ERROR("Expected match was not found.", charT);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class charT, class traits>
|
||||
void test(boost::basic_regex<charT, traits>& r, const test_regex_search_tag&)
|
||||
{
|
||||
@ -88,6 +186,9 @@ void test(boost::basic_regex<charT, traits>& r, const test_regex_search_tag&)
|
||||
try{
|
||||
r.assign(expression, syntax_options);
|
||||
test_simple_search(r);
|
||||
test_regex_iterator(r);
|
||||
test_regex_grep(r);
|
||||
test_regex_match(r);
|
||||
}
|
||||
catch(const boost::bad_expression& e)
|
||||
{
|
||||
|
Reference in New Issue
Block a user