merged changes in regex5 branch

[SVN r26692]
This commit is contained in:
John Maddock
2005-01-13 17:06:21 +00:00
parent de0ab9092a
commit 71a0e020e2
275 changed files with 37305 additions and 27154 deletions

View File

@ -24,7 +24,37 @@ namespace re_detail{
//
// error checking API:
//
BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex::flag_type ef, match_flag_type mf);
BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type ef, match_flag_type mf);
//
// function can_start:
//
template <class charT>
bool can_start(charT c, const unsigned char* map, unsigned char mask)
{
return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
}
inline bool can_start(char c, const unsigned char* map, unsigned char mask)
{
return map[(unsigned char)c] & mask;
}
inline bool can_start(signed char c, const unsigned char* map, unsigned char mask)
{
return map[(unsigned char)c] & mask;
}
inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask)
{
return map[c] & mask;
}
inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask)
{
return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
}
#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
{
return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
}
#endif
//
@ -36,7 +66,9 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex::flag_type ef
// which succeeds when it should not.
//
#ifndef _RWSTD_VER
# define STR_COMP(s,p) s.compare(p)
template <class C, class T, class A>
inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
{ return s.compare(p); }
#else
template <class C, class T, class A>
inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
@ -47,31 +79,43 @@ inline int string_compare(const std::string& s, const char* p)
inline int string_compare(const std::wstring& s, const wchar_t* p)
{ return std::wcscmp(s.c_str(), p); }
#endif
# define STR_COMP(s,p) string_compare(s,p)
#endif
#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310)
template <class Seq, class C>
inline int string_compare(const Seq& s, const C* p)
{
std::size_t i = 0;
while((i < s.size()) && (p[i] == s[i]))
{
++i;
}
return (i == s.size()) ? -p[i] : s[i] - p[i];
}
#endif
# define STR_COMP(s,p) string_compare(s,p)
template<class charT>
inline const charT* re_skip_past_null(const charT* p)
{
while (*p != 0) ++p;
while (*p != static_cast<charT>(0)) ++p;
return ++p;
}
template <class iterator, class charT, class traits_type, class Allocator>
template <class iterator, class charT, class traits_type, class char_classT>
iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
iterator last,
const re_set_long* set_,
const reg_expression<charT, traits_type, Allocator>& e)
const re_set_long<char_classT>* set_,
const regex_data<charT, traits_type>& e, bool icase)
{
const charT* p = reinterpret_cast<const charT*>(set_+1);
iterator ptr;
unsigned int i;
bool icase = e.flags() & regex_constants::icase;
//bool icase = e.m_flags & regex_constants::icase;
if(next == last) return next;
typedef typename traits_type::string_type traits_string_type;
const traits_type& traits_inst = e.get_traits();
const ::boost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits);
// dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
// referenced
@ -82,12 +126,12 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
for(i = 0; i < set_->csingles; ++i)
{
ptr = next;
if(*p == 0)
if(*p == static_cast<charT>(0))
{
// treat null string as special case:
if(traits_inst.translate(*ptr, icase) != *p)
{
while(*p == 0)++p;
while(*p == static_cast<charT>(0))++p;
continue;
}
return set_->isnot ? next : (ptr == next) ? ++next : ptr;
@ -102,7 +146,7 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
++ptr;
}
if(*p == 0) // if null we've matched
if(*p == static_cast<charT>(0)) // if null we've matched
return set_->isnot ? next : (ptr == next) ? ++next : ptr;
p = re_skip_past_null(p); // skip null
@ -114,23 +158,25 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
if(set_->cranges || set_->cequivalents)
{
traits_string_type s2(1, col);
traits_string_type s1;
//
// try and match a range, NB only a single character can match
if(set_->cranges)
{
if((e.flags() & regex_constants::collate) == 0)
s1 = s2;
if((e.m_flags & regex_constants::collate) == 0)
s1.assign(1, col);
else
traits_inst.transform(s1, s2);
{
charT a[2] = { col, charT(0), };
s1 = traits_inst.transform(a, a + 1);
}
for(i = 0; i < set_->cranges; ++i)
{
if(STR_COMP(s1, p) <= 0)
if(STR_COMP(s1, p) >= 0)
{
do{ ++p; }while(*p);
++p;
if(STR_COMP(s1, p) >= 0)
if(STR_COMP(s1, p) <= 0)
return set_->isnot ? next : ++next;
}
else
@ -148,7 +194,8 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
// try and match an equivalence class, NB only a single character can match
if(set_->cequivalents)
{
traits_inst.transform_primary(s1, s2);
charT a[2] = { col, charT(0), };
s1 = traits_inst.transform_primary(a, a +1);
for(i = 0; i < set_->cequivalents; ++i)
{
if(STR_COMP(s1, p) == 0)
@ -159,47 +206,19 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
}
}
}
if(traits_inst.is_class(col, set_->cclasses) == true)
if(traits_inst.isctype(col, set_->cclasses) == true)
return set_->isnot ? next : ++next;
return set_->isnot ? ++next : next;
}
template <class charT, class traits, class Allocator>
struct access_t : public reg_expression<charT, traits, Allocator>
{
typedef typename is_byte<charT>::width_type width_type;
typedef reg_expression<charT, traits, Allocator> base_type;
typedef charT char_type;
typedef traits traits_type;
typedef Allocator alloc_type;
static int repeat_count(const base_type& b)
{ return base_type::repeat_count(b); }
static unsigned int restart_type(const base_type& b)
{ return base_type::restart_type(b); }
static const re_syntax_base* first(const base_type& b)
{ return base_type::first(b); }
static const unsigned char* get_map(const base_type& b)
{ return base_type::get_map(b); }
static std::size_t leading_length(const base_type& b)
{ return base_type::leading_length(b); }
static const kmp_info<charT>* get_kmp(const base_type& b)
{ return base_type::get_kmp(b); }
static bool can_start(char_type c, const unsigned char* _map, unsigned char mask)
{
return reg_expression<char_type, traits_type, alloc_type>::can_start(c, _map, mask, width_type());
}
};
template <class BidiIterator>
class repeater_count
{
repeater_count** stack;
repeater_count* next;
int id;
unsigned count; // the number of iterations so far
BidiIterator start_pos; // where the last repeat started
std::size_t count; // the number of iterations so far
BidiIterator start_pos; // where the last repeat started
public:
repeater_count(repeater_count** s)
{
@ -230,10 +249,10 @@ public:
{
*stack = next;
}
unsigned get_count() { return count; }
std::size_t get_count() { return count; }
int get_id() { return id; }
int operator++() { return ++count; }
bool check_null_repeat(const BidiIterator& pos, unsigned max)
std::size_t operator++() { return ++count; }
bool check_null_repeat(const BidiIterator& pos, std::size_t max)
{
// this is called when we are about to start a new repeat,
// if the last one was NULL move our count to max,
@ -268,22 +287,25 @@ enum saved_state_type
saved_state_count = 14
};
template <class BidiIterator, class Allocator, class traits, class Allocator2>
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable : 4251 4231 4660)
#endif
template <class BidiIterator, class Allocator, class traits>
class perl_matcher
{
public:
typedef typename traits::char_type char_type;
typedef perl_matcher<BidiIterator, Allocator, traits, Allocator2> self_type;
typedef perl_matcher<BidiIterator, Allocator, traits> self_type;
typedef bool (self_type::*matcher_proc_type)(void);
typedef access_t<char_type, traits, Allocator2> access;
typedef typename traits::size_type traits_size_type;
typedef typename traits::uchar_type traits_uchar_type;
typedef typename is_byte<char_type>::width_type width_type;
typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
perl_matcher(BidiIterator first, BidiIterator end,
match_results<BidiIterator, Allocator>& what,
const reg_expression<char_type, traits, Allocator2>& e,
const basic_regex<char_type, traits>& e,
match_flag_type f);
bool match();
@ -334,7 +356,12 @@ private:
bool match_char_repeat();
bool match_dot_repeat_fast();
bool match_dot_repeat_slow();
bool backtrack_till_match(unsigned count);
bool match_backstep();
bool match_assert_backref();
bool match_toggle_case();
#ifdef BOOST_REGEX_RECURSIVE
bool backtrack_till_match(std::size_t count);
#endif
// find procs stored in s_find_vtable:
bool find_restart_any();
@ -361,9 +388,9 @@ private:
// where the current search started from, acts as base for $` during grep:
BidiIterator search_base;
// the expression being examined:
const reg_expression<char_type, traits, Allocator2>& re;
const basic_regex<char_type, traits>& re;
// the expression's traits class:
const traits& traits_inst;
const ::boost::regex_traits_wrapper<traits>& traits_inst;
// the next state in the machine being matched:
const re_syntax_base* pstate;
// matching flags in use:
@ -378,10 +405,16 @@ private:
bool m_has_partial_match;
// set to true whenever we get a match:
bool m_has_found_match;
// set to true whenever we're inside an independent sub-expression:
bool m_independent;
// the current repeat being examined:
repeater_count<BidiIterator>* next_count;
// the first repeat being examined (top of linked list):
repeater_count<BidiIterator> rep_obj;
// the mask to pass when matching word boundaries:
typename traits::char_class_type m_word_mask;
// the bitmask to use when determining whether a match_any matches a newline or not:
unsigned char match_any_mask;
#ifdef BOOST_REGEX_NON_RECURSIVE
//
@ -411,7 +444,7 @@ private:
void push_assertion(const re_syntax_base* ps, bool positive);
void push_alt(const re_syntax_base* ps);
void push_repeater_count(int i, repeater_count<BidiIterator>** s);
void push_single_repeat(unsigned c, const re_repeat* r, BidiIterator last_position, int id);
void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int id);
void push_non_greedy_repeat(const re_syntax_base* ps);
@ -426,11 +459,20 @@ private:
unsigned used_block_count;
#endif
// these operations aren't allowed, so are declared private:
perl_matcher& operator=(const perl_matcher&);
perl_matcher(const perl_matcher&);
// these operations aren't allowed, so are declared private,
// bodies are provided to keep explicit-instantiation requests happy:
perl_matcher& operator=(const perl_matcher&)
{
return *this;
}
perl_matcher(const perl_matcher& that)
: m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {}
};
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
} // namespace re_detail
#ifdef BOOST_HAS_ABI_HEADERS