Added new experimental captures support.

[SVN r21243]
This commit is contained in:
John Maddock
2003-12-13 12:28:48 +00:00
parent 6b95ac002e
commit bf9350aa16
28 changed files with 1593 additions and 612 deletions

View File

@ -55,12 +55,10 @@
# include <boost/detail/allocator.hpp>
# include <boost/regex/config/cstring.hpp>
# include <boost/throw_exception.hpp>
# include <boost/scoped_ptr.hpp>
# ifndef BOOST_NO_STD_LOCALE
# include <locale>
# endif
# ifdef BOOST_REGEX_MATCH_EXTRA
# include <boost/scoped_ptr.hpp>
# endif
#else
//
// C build,

View File

@ -85,3 +85,8 @@
// if you don't want boost.regex to cache memory.
// #define BOOST_REGEX_MAX_CACHE_BLOCKS 16
// define this if you want to be able to access extended capture
// information in your sub_match's (caution this will slow things
// down quite a bit).
// #define BOOST_REGEX_MATCH_EXTRA

View File

@ -55,7 +55,12 @@ typedef enum _match_flags
match_perl = match_all << 1, // Use perl matching rules
match_posix = match_perl << 1, // Use POSIX matching rules
match_nosubs = match_posix << 1, // don't trap marked subs
match_max = match_nosubs,
match_extra = match_nosubs << 1, // include full capture information for repeated captures
match_single_line = match_extra << 1, // treat text as single line and ignor any \n's when matching ^ and $.
match_unused1 = match_single_line << 1, // unused
match_unused2 = match_unused1 << 1, // unused
match_unused3 = match_unused2 << 1, // unused
match_max = match_unused3,
format_perl = 0, // perl style replacement
format_default = 0, // ditto.
@ -117,6 +122,8 @@ using regex_constants::match_all;
using regex_constants::match_perl;
using regex_constants::match_posix;
using regex_constants::match_nosubs;
using regex_constants::match_extra;
using regex_constants::match_single_line;
//using regex_constants::match_max;
using regex_constants::format_all;
using regex_constants::format_sed;

View File

@ -25,19 +25,19 @@
namespace boost{
template <class RandomAccessIterator
, class Allocator = BOOST_DEFAULT_ALLOCATOR(sub_match<RandomAccessIterator> )
template <class BidiIterator
, class Allocator = BOOST_DEFAULT_ALLOCATOR(sub_match<BidiIterator> )
>
class match_results
{
private:
#ifndef BOOST_NO_STD_ALLOCATOR
typedef std::vector<sub_match<RandomAccessIterator>, Allocator> vector_type;
typedef std::vector<sub_match<BidiIterator>, Allocator> vector_type;
#else
typedef std::vector<sub_match<RandomAccessIterator> > vector_type;
typedef std::vector<sub_match<BidiIterator> > vector_type;
#endif
public:
typedef sub_match<RandomAccessIterator> value_type;
typedef sub_match<BidiIterator> value_type;
#if !defined(BOOST_NO_STD_ALLOCATOR) && !(defined(BOOST_MSVC) && defined(_STLPORT_VERSION))
typedef typename Allocator::const_reference const_reference;
#else
@ -47,11 +47,11 @@ public:
typedef typename vector_type::const_iterator const_iterator;
typedef const_iterator iterator;
typedef typename re_detail::regex_iterator_traits<
RandomAccessIterator>::difference_type difference_type;
BidiIterator>::difference_type difference_type;
typedef typename Allocator::size_type size_type;
typedef Allocator allocator_type;
typedef typename re_detail::regex_iterator_traits<
RandomAccessIterator>::value_type char_type;
BidiIterator>::value_type char_type;
typedef std::basic_string<char_type> string_type;
// construct/copy/destroy:
@ -91,10 +91,10 @@ public:
sub += 2;
if(sub < m_subs.size())
{
const sub_match<RandomAccessIterator>& s = m_subs[sub];
const sub_match<BidiIterator>& s = m_subs[sub];
if(s.matched)
{
return boost::re_detail::distance((RandomAccessIterator)(m_base), (RandomAccessIterator)(s.first));
return boost::re_detail::distance((BidiIterator)(m_base), (BidiIterator)(s.first));
}
}
return ~static_cast<difference_type>(0);
@ -105,7 +105,7 @@ public:
string_type result;
if(sub < (int)m_subs.size() && (sub > 0))
{
const sub_match<RandomAccessIterator>& s = m_subs[sub];
const sub_match<BidiIterator>& s = m_subs[sub];
if(s.matched)
{
result = s;
@ -174,9 +174,18 @@ public:
bool operator!=(const match_results& that)const
{ return !(*this == that); }
#ifdef BOOST_REGEX_MATCH_EXTRA
typedef typename sub_match<BidiIterator>::capture_sequence_type capture_sequence_type;
const capture_sequence_type& captures(int i)const
{
return (*this)[i].captures();
}
#endif
//
// private access functions:
void BOOST_REGEX_CALL set_second(RandomAccessIterator i)
void BOOST_REGEX_CALL set_second(BidiIterator i)
{
assert(m_subs.size() > 2);
m_subs[2].second = i;
@ -188,7 +197,7 @@ public:
m_null.matched = false;
}
void BOOST_REGEX_CALL set_second(RandomAccessIterator i, size_type pos, bool m = true)
void BOOST_REGEX_CALL set_second(BidiIterator i, size_type pos, bool m = true)
{
pos += 2;
assert(m_subs.size() > pos);
@ -203,7 +212,7 @@ public:
m_null.matched = false;
}
}
void BOOST_REGEX_CALL set_size(size_type n, RandomAccessIterator i, RandomAccessIterator j)
void BOOST_REGEX_CALL set_size(size_type n, BidiIterator i, BidiIterator j)
{
value_type v(j);
size_type len = m_subs.size();
@ -220,11 +229,11 @@ public:
}
m_subs[1].first = i;
}
void BOOST_REGEX_CALL set_base(RandomAccessIterator pos)
void BOOST_REGEX_CALL set_base(BidiIterator pos)
{
m_base = pos;
}
void BOOST_REGEX_CALL set_first(RandomAccessIterator i)
void BOOST_REGEX_CALL set_first(BidiIterator i)
{
// set up prefix:
m_subs[1].second = i;
@ -238,7 +247,7 @@ public:
m_subs[n].matched = false;
}
}
void BOOST_REGEX_CALL set_first(RandomAccessIterator i, size_type pos)
void BOOST_REGEX_CALL set_first(BidiIterator i, size_type pos)
{
assert(pos+2 < m_subs.size());
if(pos)
@ -246,22 +255,22 @@ public:
else
set_first(i);
}
void BOOST_REGEX_CALL maybe_assign(const match_results<RandomAccessIterator, Allocator>& m);
void BOOST_REGEX_CALL maybe_assign(const match_results<BidiIterator, Allocator>& m);
private:
vector_type m_subs; // subexpressions
RandomAccessIterator m_base; // where the search started from
sub_match<RandomAccessIterator> m_null; // a null match
BidiIterator m_base; // where the search started from
sub_match<BidiIterator> m_null; // a null match
};
template <class RandomAccessIterator, class Allocator>
void BOOST_REGEX_CALL match_results<RandomAccessIterator, Allocator>::maybe_assign(const match_results<RandomAccessIterator, Allocator>& m)
template <class BidiIterator, class Allocator>
void BOOST_REGEX_CALL match_results<BidiIterator, Allocator>::maybe_assign(const match_results<BidiIterator, Allocator>& m)
{
const_iterator p1, p2;
p1 = begin();
p2 = m.begin();
RandomAccessIterator base = (*this)[-1].first;
BidiIterator base = (*this)[-1].first;
std::size_t len1 = 0;
std::size_t len2 = 0;
std::size_t base1 = 0;
@ -276,8 +285,8 @@ void BOOST_REGEX_CALL match_results<RandomAccessIterator, Allocator>::maybe_assi
if(base1 < base2) return;
if(base2 < base1) break;
len1 = boost::re_detail::distance((RandomAccessIterator)p1->first, (RandomAccessIterator)p1->second);
len2 = boost::re_detail::distance((RandomAccessIterator)p2->first, (RandomAccessIterator)p2->second);
len1 = boost::re_detail::distance((BidiIterator)p1->first, (BidiIterator)p1->second);
len2 = boost::re_detail::distance((BidiIterator)p2->first, (BidiIterator)p2->second);
if((len1 != len2) || ((p1->matched == false) && (p2->matched == true)))
break;
if((p1->matched == true) && (p2->matched == false))
@ -293,24 +302,24 @@ void BOOST_REGEX_CALL match_results<RandomAccessIterator, Allocator>::maybe_assi
*this = m;
}
template <class RandomAccessIterator, class Allocator>
void swap(match_results<RandomAccessIterator, Allocator>& a, match_results<RandomAccessIterator, Allocator>& b)
template <class BidiIterator, class Allocator>
void swap(match_results<BidiIterator, Allocator>& a, match_results<BidiIterator, Allocator>& b)
{
a.swap(b);
}
#ifndef BOOST_NO_STD_LOCALE
template <class charT, class traits, class RandomAccessIterator, class Allocator>
template <class charT, class traits, class BidiIterator, class Allocator>
std::basic_ostream<charT, traits>&
operator << (std::basic_ostream<charT, traits>& os,
const match_results<RandomAccessIterator, Allocator>& s)
const match_results<BidiIterator, Allocator>& s)
{
return (os << s.str());
}
#else
template <class RandomAccessIterator, class Allocator>
template <class BidiIterator, class Allocator>
std::ostream& operator << (std::ostream& os,
const match_results<RandomAccessIterator, Allocator>& s)
const match_results<BidiIterator, Allocator>& s)
{
return (os << s.str());
}

View File

@ -13,7 +13,6 @@
#define BOOST_REGEX_MATCHER_HPP
#include <boost/regex/v4/iterator_category.hpp>
#include <boost/scoped_ptr.hpp>
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_PREFIX
@ -22,7 +21,13 @@
namespace boost{
namespace re_detail{
//
//
// error checking API:
//
BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex::flag_type ef, match_flag_type mf);
//
// Unfortunately Rogue Waves standard library appears to have a bug
// in std::basic_string::compare that results in eroneous answers
// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version

View File

@ -117,6 +117,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match()
m_presult->set_base(base);
if(m_match_flags & match_posix)
m_result = *m_presult;
verify_options(re.flags(), m_match_flags);
if(0 == match_prefix())
return false;
return m_result[0].second == last;
@ -206,6 +207,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find()
m_result.set_base(base);
}
verify_options(re.flags(), m_match_flags);
// find out what kind of expression we have:
unsigned type = (m_match_flags & match_continuous) ?
static_cast<unsigned int>(regbase::restart_continue)
@ -253,6 +255,19 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_prefix()
m_presult->set_second(last, 0, false);
position = last;
}
#ifdef BOOST_REGEX_MATCH_EXTRA
if(m_has_found_match && (match_extra & m_match_flags))
{
//
// we have a match, reverse the capture information:
//
for(unsigned i = 0; i < m_presult->size(); ++i)
{
typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
std::reverse(seq.begin(), seq.end());
}
}
#endif
if(!m_has_found_match)
position = restart; // reset search postion
return m_has_found_match;
@ -297,15 +312,20 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_literal()
template <class BidiIterator, class Allocator, class traits, class Allocator2>
bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_start_line()
{
if((position == base) && ((m_match_flags & match_prev_avail) == 0))
if(position == base)
{
if((m_match_flags & match_not_bol) == 0)
if((m_match_flags & match_prev_avail) == 0)
{
pstate = pstate->next.p;
return true;
if((m_match_flags & match_not_bol) == 0)
{
pstate = pstate->next.p;
return true;
}
return false;
}
return false;
}
else if(m_match_flags & match_single_line)
return false;
// check the previous value character:
BidiIterator t(position);
@ -331,6 +351,8 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_end_line()
{
if(position != last)
{
if(m_match_flags & match_single_line)
return false;
// we're not yet at the end so *first is always valid:
if(traits_inst.is_separator(*position))
{
@ -387,6 +409,14 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_match()
m_result.maybe_assign(*m_presult);
return false;
}
#ifdef BOOST_REGEX_MATCH_EXTRA
if(match_extra & m_match_flags)
{
for(unsigned i = 0; i < m_presult->size(); ++i)
if((*m_presult)[i].matched)
((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
}
#endif
return true;
}

View File

@ -309,11 +309,38 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_startmark(
}
case -3:
{
// independent sub-expression:
// independent sub-expression, currently this is always recursive:
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
pstate = pstate->next.p->next.p;
bool r = match_all_states();
pstate = next_pstate;
#ifdef BOOST_REGEX_MATCH_EXTRA
if(r && (m_match_flags & match_extra))
{
//
// our captures have been stored in *m_presult
// we need to unpack them, and insert them
// back in the right order when we unwind the stack:
//
match_results<BidiIterator, Allocator> tm(*m_presult);
unsigned i;
for(i = 0; i < tm.size(); ++i)
(*m_presult)[i].get_captures().clear();
// match everything else:
r = match_all_states();
// now place the stored captures back:
for(i = 0; i < tm.size(); ++i)
{
typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
seq& s1 = (*m_presult)[i].get_captures();
const seq& s2 = tm[i].captures();
s1.insert(
s1.end(),
s2.begin(),
s2.end());
}
}
#endif
return r;
}
default:
@ -809,6 +836,13 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_paren(boo
m_presult->set_first(pmp->sub.first, pmp->index);
m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched);
}
#ifdef BOOST_REGEX_MATCH_EXTRA
//
// we have a match, push the capture information onto the stack:
//
else if(pmp->sub.matched && (match_extra & m_match_flags))
((*m_presult)[pmp->index]).get_captures().push_back(pmp->sub);
#endif
// unwind stack:
m_backup_state = pmp+1;
boost::re_detail::inplace_destroy(pmp);

View File

@ -35,13 +35,14 @@ class backup_subex
public:
template <class A>
backup_subex(const match_results<BidiIterator, A>& w, int i)
: index(i), sub(w[i]) {}
: index(i), sub(w[i], false) {}
template <class A>
void restore(match_results<BidiIterator, A>& w)
{
w.set_first(sub.first, index);
w.set_second(sub.second, index, sub.matched);
}
const sub_match<BidiIterator>& get() { return sub; }
};
template <class BidiIterator, class Allocator, class traits, class Allocator2>
@ -126,6 +127,33 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_startmark(
pstate = pstate->next.p->next.p;
r = match_all_states();
pstate = next_pstate;
#ifdef BOOST_REGEX_MATCH_EXTRA
if(r && (m_match_flags & match_extra))
{
//
// our captures have been stored in *m_presult
// we need to unpack them, and insert them
// back in the right order when we unwind the stack:
//
unsigned i;
match_results<BidiIterator, Allocator> tm(*m_presult);
for(i = 0; i < tm.size(); ++i)
(*m_presult)[i].get_captures().clear();
// match everything else:
r = match_all_states();
// now place the stored captures back:
for(i = 0; i < tm.size(); ++i)
{
typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
seq& s1 = (*m_presult)[i].get_captures();
const seq& s2 = tm[i].captures();
s1.insert(
s1.end(),
s2.begin(),
s2.end());
}
}
#endif
break;
}
default:
@ -139,6 +167,13 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_startmark(
r = match_all_states();
if(r == false)
sub.restore(*m_presult);
#ifdef BOOST_REGEX_MATCH_EXTRA
//
// we have a match, push the capture information onto the stack:
//
else if(sub.get().matched && (match_extra & m_match_flags))
((*m_presult)[index]).get_captures().push_back(sub.get());
#endif
}
else
{

View File

@ -103,22 +103,14 @@ public:
if(regex_search(first, end, what, *pre, flags) == true)
{
N = 0;
#if 1
result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]);
#else
result = ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[(int)subs[N]].str()));
#endif
return true;
}
else if((subs[N] == -1) && (first != end))
{
#if 1
result.first = first;
result.second = end;
result.matched = (first != end);
#else
result = value_type(first, end);
#endif
return true;
}
return false;
@ -142,11 +134,7 @@ public:
if(N+1 < (int)subs.size())
{
++N;
#if 1
result =((subs[N] == -1) ? what.prefix() : what[subs[N]]);
#else
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
#endif
return true;
}
if(what.prefix().first != what[0].second)
@ -155,23 +143,15 @@ public:
if(regex_search(last_end, end, what, *pre, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags)))
{
N =0;
#if 1
result =((subs[N] == -1) ? what.prefix() : what[subs[N]]);
#else
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
#endif
return true;
}
else if((last_end != end) && (subs[0] == -1))
{
N =-1;
#if 1
result.first = last_end;
result.second = end;
result.matched = (last_end != end);
#else
result = value_type(last_end, end);
#endif
return true;
}
return false;
@ -189,11 +169,7 @@ private:
typedef shared_ptr<impl> pimpl;
public:
typedef basic_regex<charT, traits, Allocator> regex_type;
#if 1
typedef sub_match<BidirectionalIterator> value_type;
#else
typedef std::basic_string<charT> value_type;
#endif
typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type
difference_type;
typedef const value_type* pointer;

View File

@ -83,6 +83,58 @@ struct sub_match : public std::pair<BidiIterator, BidiIterator>
bool operator>=(const sub_match& that)const
{ return compare(that) >= 0; }
#ifdef BOOST_REGEX_MATCH_EXTRA
typedef std::vector<sub_match<BidiIterator> > capture_sequence_type;
const capture_sequence_type& captures()const
{
if(!m_captures)
m_captures.reset(new capture_sequence_type());
return *m_captures;
}
//
// Private implementation API: DO NOT USE!
//
capture_sequence_type& get_captures()const
{
if(!m_captures)
m_captures.reset(new capture_sequence_type());
return *m_captures;
}
private:
mutable boost::scoped_ptr<capture_sequence_type> m_captures;
public:
#endif
sub_match(const sub_match& that, bool
#ifdef BOOST_REGEX_MATCH_EXTRA
deep_copy
#endif
= true
)
: std::pair<BidiIterator, BidiIterator>(that),
matched(that.matched)
{
#ifdef BOOST_REGEX_MATCH_EXTRA
if(that.m_captures)
if(deep_copy)
m_captures.reset(new capture_sequence_type(*(that.m_captures)));
#endif
}
sub_match& operator=(const sub_match& that)
{
this->first = that.first;
this->second = that.second;
matched = that.matched;
#ifdef BOOST_REGEX_MATCH_EXTRA
if(that.m_captures)
get_captures() = *(that.m_captures);
#endif
return *this;
}
#ifdef BOOST_OLD_REGEX_H
//
// the following are deprecated, do not use!!