mirror of
https://github.com/boostorg/regex.git
synced 2026-04-28 18:02:15 +02:00
Merge branch 'de_fuzz' into develop
This commit is contained in:
@@ -28,6 +28,8 @@ following special characters:
|
||||
|
||||
[pre .\[{}()\\\*+?|^$]
|
||||
|
||||
Other characters are special only in certain situations - for example `]` is special only after an opening `[`.
|
||||
|
||||
[h4 Wildcard]
|
||||
|
||||
The single character '.' when used outside of a character set will match
|
||||
|
||||
@@ -152,7 +152,7 @@
|
||||
# if defined(BOOST_REGEX_NO_W32) || BOOST_PLAT_WINDOWS_RUNTIME
|
||||
# define BOOST_REGEX_NO_FILEITER
|
||||
# endif
|
||||
#else // defined(_WIN32)
|
||||
#else /* defined(_WIN32) */
|
||||
# if !defined(BOOST_HAS_DIRENT_H)
|
||||
# define BOOST_REGEX_NO_FILEITER
|
||||
# endif
|
||||
|
||||
@@ -410,7 +410,7 @@ void copy_results(MR1& out, MR2 const& in)
|
||||
if(in[i].captures().size())
|
||||
{
|
||||
out[i].get_captures().assign(in[i].captures().size(), typename MR1::value_type());
|
||||
for(int j = 0; j < out[i].captures().size(); ++j)
|
||||
for(int j = 0; j < (int)out[i].captures().size(); ++j)
|
||||
{
|
||||
out[i].get_captures()[j].first = in[i].captures()[j].first.base();
|
||||
out[i].get_captures()[j].second = in[i].captures()[j].second.base();
|
||||
|
||||
@@ -117,7 +117,10 @@ inline unsigned utf8_trailing_byte_count(boost::uint8_t c)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4100)
|
||||
#endif
|
||||
BOOST_NORETURN inline void invalid_utf32_code_point(::boost::uint32_t val)
|
||||
#ifndef BOOST_NO_EXCEPTIONS
|
||||
BOOST_NORETURN
|
||||
#endif
|
||||
inline void invalid_utf32_code_point(::boost::uint32_t val)
|
||||
{
|
||||
#ifndef BOOST_NO_STD_LOCALE
|
||||
std::stringstream ss;
|
||||
|
||||
@@ -77,15 +77,15 @@ public:
|
||||
|
||||
void add_single(const digraph_type& s)
|
||||
{
|
||||
m_singles.insert(m_singles.end(), s);
|
||||
m_singles.insert(s);
|
||||
if(s.second)
|
||||
m_has_digraphs = true;
|
||||
m_empty = false;
|
||||
}
|
||||
void add_range(const digraph_type& first, const digraph_type& end)
|
||||
{
|
||||
m_ranges.insert(m_ranges.end(), first);
|
||||
m_ranges.insert(m_ranges.end(), end);
|
||||
m_ranges.push_back(first);
|
||||
m_ranges.push_back(end);
|
||||
if(first.second)
|
||||
{
|
||||
m_has_digraphs = true;
|
||||
@@ -110,7 +110,7 @@ public:
|
||||
}
|
||||
void add_equivalent(const digraph_type& s)
|
||||
{
|
||||
m_equivalents.insert(m_equivalents.end(), s);
|
||||
m_equivalents.insert(s);
|
||||
if(s.second)
|
||||
{
|
||||
m_has_digraphs = true;
|
||||
@@ -136,11 +136,12 @@ public:
|
||||
return m_negate;
|
||||
}
|
||||
typedef typename std::vector<digraph_type>::const_iterator list_iterator;
|
||||
list_iterator singles_begin()const
|
||||
typedef typename std::set<digraph_type>::const_iterator set_iterator;
|
||||
set_iterator singles_begin()const
|
||||
{
|
||||
return m_singles.begin();
|
||||
}
|
||||
list_iterator singles_end()const
|
||||
set_iterator singles_end()const
|
||||
{
|
||||
return m_singles.end();
|
||||
}
|
||||
@@ -152,11 +153,11 @@ public:
|
||||
{
|
||||
return m_ranges.end();
|
||||
}
|
||||
list_iterator equivalents_begin()const
|
||||
set_iterator equivalents_begin()const
|
||||
{
|
||||
return m_equivalents.begin();
|
||||
}
|
||||
list_iterator equivalents_end()const
|
||||
set_iterator equivalents_end()const
|
||||
{
|
||||
return m_equivalents.end();
|
||||
}
|
||||
@@ -173,14 +174,14 @@ public:
|
||||
return m_empty;
|
||||
}
|
||||
private:
|
||||
std::vector<digraph_type> m_singles; // a list of single characters to match
|
||||
std::set<digraph_type> m_singles; // a list of single characters to match
|
||||
std::vector<digraph_type> m_ranges; // a list of end points of our ranges
|
||||
bool m_negate; // true if the set is to be negated
|
||||
bool m_has_digraphs; // true if we have digraphs present
|
||||
m_type m_classes; // character classes to match
|
||||
m_type m_negated_classes; // negated character classes to match
|
||||
bool m_empty; // whether we've added anything yet
|
||||
std::vector<digraph_type> m_equivalents; // a list of equivalence classes
|
||||
std::set<digraph_type> m_equivalents; // a list of equivalence classes
|
||||
};
|
||||
|
||||
template <class charT, class traits>
|
||||
@@ -239,7 +240,7 @@ protected:
|
||||
unsigned m_backrefs; // bitmask of permitted backrefs
|
||||
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
|
||||
bool m_has_recursions; // set when we have recursive expresisons to fixup
|
||||
std::vector<bool> m_recursion_checks; // notes which recursions we've followed while analysing this expression
|
||||
std::vector<unsigned char> m_recursion_checks; // notes which recursions we've followed while analysing this expression
|
||||
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
|
||||
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
|
||||
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
|
||||
@@ -365,6 +366,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
{
|
||||
typedef typename traits::string_type string_type;
|
||||
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
|
||||
typedef typename basic_char_set<charT, traits>::set_iterator set_iterator;
|
||||
typedef typename traits::char_class_type m_type;
|
||||
|
||||
re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<m_type>)));
|
||||
@@ -395,20 +397,25 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
// now extend with all the singles:
|
||||
//
|
||||
item_iterator first, last;
|
||||
first = char_set.singles_begin();
|
||||
last = char_set.singles_end();
|
||||
while(first != last)
|
||||
set_iterator sfirst, slast;
|
||||
sfirst = char_set.singles_begin();
|
||||
slast = char_set.singles_end();
|
||||
while(sfirst != slast)
|
||||
{
|
||||
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
|
||||
p[0] = m_traits.translate(first->first, m_icase);
|
||||
if(first->second)
|
||||
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (sfirst->first == static_cast<charT>(0) ? 1 : sfirst->second ? 3 : 2)));
|
||||
p[0] = m_traits.translate(sfirst->first, m_icase);
|
||||
if(sfirst->first == static_cast<charT>(0))
|
||||
{
|
||||
p[1] = m_traits.translate(first->second, m_icase);
|
||||
p[0] = 0;
|
||||
}
|
||||
else if(sfirst->second)
|
||||
{
|
||||
p[1] = m_traits.translate(sfirst->second, m_icase);
|
||||
p[2] = 0;
|
||||
}
|
||||
else
|
||||
p[1] = 0;
|
||||
++first;
|
||||
++sfirst;
|
||||
}
|
||||
//
|
||||
// now extend with all the ranges:
|
||||
@@ -472,24 +479,24 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
//
|
||||
// now process the equivalence classes:
|
||||
//
|
||||
first = char_set.equivalents_begin();
|
||||
last = char_set.equivalents_end();
|
||||
while(first != last)
|
||||
sfirst = char_set.equivalents_begin();
|
||||
slast = char_set.equivalents_end();
|
||||
while(sfirst != slast)
|
||||
{
|
||||
string_type s;
|
||||
if(first->second)
|
||||
if(sfirst->second)
|
||||
{
|
||||
charT cs[3] = { first->first, first->second, charT(0), };
|
||||
charT cs[3] = { sfirst->first, sfirst->second, charT(0), };
|
||||
s = m_traits.transform_primary(cs, cs+2);
|
||||
}
|
||||
else
|
||||
s = m_traits.transform_primary(&first->first, &first->first+1);
|
||||
s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1);
|
||||
if(s.empty())
|
||||
return 0; // invalid or unsupported equivalence class
|
||||
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
|
||||
BOOST_REGEX_DETAIL_NS::copy(s.begin(), s.end(), p);
|
||||
p[s.size()] = charT(0);
|
||||
++first;
|
||||
++sfirst;
|
||||
}
|
||||
//
|
||||
// finally reset the address of our last state:
|
||||
@@ -518,7 +525,8 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
{
|
||||
typedef typename traits::string_type string_type;
|
||||
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
|
||||
|
||||
typedef typename basic_char_set<charT, traits>::set_iterator set_iterator;
|
||||
|
||||
re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
|
||||
bool negate = char_set.is_negated();
|
||||
std::memset(result->_map, 0, sizeof(result->_map));
|
||||
@@ -526,17 +534,18 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
// handle singles first:
|
||||
//
|
||||
item_iterator first, last;
|
||||
first = char_set.singles_begin();
|
||||
last = char_set.singles_end();
|
||||
while(first != last)
|
||||
set_iterator sfirst, slast;
|
||||
sfirst = char_set.singles_begin();
|
||||
slast = char_set.singles_end();
|
||||
while(sfirst != slast)
|
||||
{
|
||||
for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
|
||||
{
|
||||
if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
|
||||
== this->m_traits.translate(first->first, this->m_icase))
|
||||
== this->m_traits.translate(sfirst->first, this->m_icase))
|
||||
result->_map[i] = true;
|
||||
}
|
||||
++first;
|
||||
++sfirst;
|
||||
}
|
||||
//
|
||||
// OK now handle ranges:
|
||||
@@ -623,13 +632,13 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
//
|
||||
// now process the equivalence classes:
|
||||
//
|
||||
first = char_set.equivalents_begin();
|
||||
last = char_set.equivalents_end();
|
||||
while(first != last)
|
||||
sfirst = char_set.equivalents_begin();
|
||||
slast = char_set.equivalents_end();
|
||||
while(sfirst != slast)
|
||||
{
|
||||
string_type s;
|
||||
BOOST_ASSERT(static_cast<charT>(0) == first->second);
|
||||
s = m_traits.transform_primary(&first->first, &first->first+1);
|
||||
BOOST_ASSERT(static_cast<charT>(0) == sfirst->second);
|
||||
s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1);
|
||||
if(s.empty())
|
||||
return 0; // invalid or unsupported equivalence class
|
||||
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
@@ -639,7 +648,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
if(s == s2)
|
||||
result->_map[i] = true;
|
||||
}
|
||||
++first;
|
||||
++sfirst;
|
||||
}
|
||||
if(negate)
|
||||
{
|
||||
@@ -690,7 +699,7 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
|
||||
|
||||
m_bad_repeats = 0;
|
||||
if(m_has_recursions)
|
||||
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
|
||||
m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
|
||||
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
|
||||
// get the restart type:
|
||||
m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
|
||||
@@ -792,50 +801,57 @@ void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
|
||||
//
|
||||
idx = m_pdata->get_id(static_cast<int>(idx));
|
||||
}
|
||||
while(p)
|
||||
if(idx < 0)
|
||||
{
|
||||
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
|
||||
ok = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
while(p)
|
||||
{
|
||||
//
|
||||
// We've found the target of the recursion, set the jump target:
|
||||
//
|
||||
static_cast<re_jump*>(state)->alt.p = p;
|
||||
ok = true;
|
||||
//
|
||||
// Now scan the target for nested repeats:
|
||||
//
|
||||
p = p->next.p;
|
||||
int next_rep_id = 0;
|
||||
while(p)
|
||||
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
|
||||
{
|
||||
switch(p->type)
|
||||
{
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
case syntax_element_char_rep:
|
||||
case syntax_element_short_set_rep:
|
||||
case syntax_element_long_set_rep:
|
||||
next_rep_id = static_cast<re_repeat*>(p)->state_id;
|
||||
break;
|
||||
case syntax_element_endmark:
|
||||
if(static_cast<const re_brace*>(p)->index == idx)
|
||||
next_rep_id = -1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(next_rep_id)
|
||||
break;
|
||||
//
|
||||
// We've found the target of the recursion, set the jump target:
|
||||
//
|
||||
static_cast<re_jump*>(state)->alt.p = p;
|
||||
ok = true;
|
||||
//
|
||||
// Now scan the target for nested repeats:
|
||||
//
|
||||
p = p->next.p;
|
||||
}
|
||||
if(next_rep_id > 0)
|
||||
{
|
||||
static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
|
||||
}
|
||||
int next_rep_id = 0;
|
||||
while(p)
|
||||
{
|
||||
switch(p->type)
|
||||
{
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
case syntax_element_char_rep:
|
||||
case syntax_element_short_set_rep:
|
||||
case syntax_element_long_set_rep:
|
||||
next_rep_id = static_cast<re_repeat*>(p)->state_id;
|
||||
break;
|
||||
case syntax_element_endmark:
|
||||
if(static_cast<const re_brace*>(p)->index == idx)
|
||||
next_rep_id = -1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(next_rep_id)
|
||||
break;
|
||||
p = p->next.p;
|
||||
}
|
||||
if(next_rep_id > 0)
|
||||
{
|
||||
static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
|
||||
}
|
||||
|
||||
break;
|
||||
break;
|
||||
}
|
||||
p = p->next.p;
|
||||
}
|
||||
p = p->next.p;
|
||||
}
|
||||
if(!ok)
|
||||
{
|
||||
@@ -934,7 +950,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
{
|
||||
// Initialize m_recursion_checks if we need it:
|
||||
if(m_has_recursions)
|
||||
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
|
||||
m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
|
||||
|
||||
const std::pair<bool, re_syntax_base*>& p = v.back();
|
||||
m_icase = p.first;
|
||||
@@ -947,7 +963,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
m_bad_repeats = 0;
|
||||
|
||||
if(m_has_recursions)
|
||||
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
|
||||
m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
|
||||
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
|
||||
// adjust the type of the state to allow for faster matching:
|
||||
state->type = this->get_repeat_type(state);
|
||||
@@ -1102,11 +1118,9 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
}
|
||||
case syntax_element_recurse:
|
||||
{
|
||||
if(state->type == syntax_element_startmark)
|
||||
recursion_sub = static_cast<re_brace*>(state)->index;
|
||||
else
|
||||
recursion_sub = 0;
|
||||
if(m_recursion_checks[recursion_sub])
|
||||
BOOST_ASSERT(static_cast<const re_jump*>(state)->alt.p->type == syntax_element_startmark);
|
||||
recursion_sub = static_cast<re_brace*>(static_cast<const re_jump*>(state)->alt.p)->index;
|
||||
if(m_recursion_checks[recursion_sub] & 1u)
|
||||
{
|
||||
// Infinite recursion!!
|
||||
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
||||
@@ -1131,10 +1145,10 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
recursion_start = state;
|
||||
recursion_restart = state->next.p;
|
||||
state = static_cast<re_jump*>(state)->alt.p;
|
||||
m_recursion_checks[recursion_sub] = true;
|
||||
m_recursion_checks[recursion_sub] |= 1u;
|
||||
break;
|
||||
}
|
||||
m_recursion_checks[recursion_sub] = true;
|
||||
m_recursion_checks[recursion_sub] |= 1u;
|
||||
// can't handle nested recursion here...
|
||||
BOOST_FALLTHROUGH;
|
||||
}
|
||||
@@ -1328,8 +1342,9 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
}
|
||||
p = p->next.p;
|
||||
}
|
||||
if(ok)
|
||||
if(ok && ((m_recursion_checks[static_cast<re_brace*>(state)->index] & 2u) == 0))
|
||||
{
|
||||
m_recursion_checks[static_cast<re_brace*>(state)->index] |= 2u;
|
||||
create_startmap(p->next.p, l_map, pnull, mask);
|
||||
}
|
||||
}
|
||||
@@ -1419,7 +1434,7 @@ bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
|
||||
case syntax_element_long_set_rep:
|
||||
{
|
||||
unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
|
||||
if(state_id > sizeof(m_bad_repeats) * CHAR_BIT)
|
||||
if(state_id >= sizeof(m_bad_repeats) * CHAR_BIT)
|
||||
return true; // run out of bits, assume we can't traverse this one.
|
||||
static const boost::uintmax_t one = 1uL;
|
||||
return m_bad_repeats & (one << state_id);
|
||||
|
||||
@@ -511,7 +511,8 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
|
||||
return false;
|
||||
}
|
||||
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
||||
return false;
|
||||
#ifndef BOOST_NO_STD_DISTANCE
|
||||
if(markid && (this->flags() & regbase::save_subexpression_location))
|
||||
this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
|
||||
@@ -901,7 +902,7 @@ escape_type_class_jump:
|
||||
}
|
||||
if(negative)
|
||||
i = 1 + m_mark_count - i;
|
||||
if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
|
||||
if(((i > 0) && (i < std::numeric_limits<unsigned>::digits) && (i - 1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_pdata->get_id(i)-1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
|
||||
{
|
||||
m_position = pc;
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
||||
@@ -2596,7 +2597,7 @@ option_group_jump:
|
||||
re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
|
||||
alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
|
||||
}
|
||||
else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
|
||||
else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
|
||||
{
|
||||
// Can't have seen more than one alternative:
|
||||
// Rewind to start of (? sequence:
|
||||
@@ -2860,6 +2861,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_verb()
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Rewind to start of (* sequence:
|
||||
--m_position;
|
||||
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
||||
fail(regex_constants::error_perl_extension, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -68,7 +68,18 @@ typedef enum _match_flags
|
||||
format_no_copy = format_all << 1, /* don't copy non-matching segments. */
|
||||
format_first_only = format_no_copy << 1, /* Only replace first occurance. */
|
||||
format_is_if = format_first_only << 1, /* internal use only. */
|
||||
format_literal = format_is_if << 1 /* treat string as a literal */
|
||||
format_literal = format_is_if << 1, /* treat string as a literal */
|
||||
|
||||
match_not_any = match_not_bol | match_not_eol | match_not_bob
|
||||
| match_not_eob | match_not_bow | match_not_eow | match_not_dot_newline
|
||||
| match_not_dot_null | match_prev_avail | match_init | match_not_null
|
||||
| match_continuous | match_partial | match_stop | match_not_initial_null
|
||||
| match_stop | match_all | match_perl | match_posix | match_nosubs
|
||||
| match_extra | match_single_line | match_unused1 | match_unused2
|
||||
| match_unused3 | match_max | format_perl | format_default | format_sed
|
||||
| format_all | format_no_copy | format_first_only | format_is_if
|
||||
| format_literal
|
||||
|
||||
|
||||
} match_flags;
|
||||
|
||||
|
||||
@@ -161,9 +161,9 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
|
||||
if(*p == static_cast<charT>(0))
|
||||
{
|
||||
// treat null string as special case:
|
||||
if(traits_inst.translate(*ptr, icase) != *p)
|
||||
if(traits_inst.translate(*ptr, icase))
|
||||
{
|
||||
while(*p == static_cast<charT>(0))++p;
|
||||
++p;
|
||||
continue;
|
||||
}
|
||||
return set_->isnot ? next : (ptr == next) ? ++next : ptr;
|
||||
@@ -348,6 +348,7 @@ struct recursion_info
|
||||
const re_syntax_base* preturn_address;
|
||||
Results results;
|
||||
repeater_count<iterator>* repeater_stack;
|
||||
iterator location_of_start;
|
||||
};
|
||||
|
||||
#ifdef BOOST_MSVC
|
||||
|
||||
@@ -90,7 +90,7 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
|
||||
match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
|
||||
// Disable match_any if requested in the state machine:
|
||||
if(e.get_data().m_disable_match_any)
|
||||
m_match_flags &= ~regex_constants::match_any;
|
||||
m_match_flags &= regex_constants::match_not_any;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
|
||||
@@ -131,8 +131,7 @@ template <class Results>
|
||||
struct saved_recursion : public saved_state
|
||||
{
|
||||
saved_recursion(int idx, const re_syntax_base* p, Results* pr)
|
||||
: saved_state(14), recursion_id(idx), preturn_address(p), results(*pr)
|
||||
{}
|
||||
: saved_state(14), recursion_id(idx), preturn_address(p), results(*pr) {}
|
||||
int recursion_id;
|
||||
const re_syntax_base* preturn_address;
|
||||
Results results;
|
||||
@@ -405,7 +404,11 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
|
||||
m_independent = true;
|
||||
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
|
||||
pstate = pstate->next.p->next.p;
|
||||
bool r = match_all_states();
|
||||
bool r = false;
|
||||
#if !defined(BOOST_NO_EXCEPTIONS)
|
||||
try{
|
||||
#endif
|
||||
r = match_all_states();
|
||||
if(!r && !m_independent)
|
||||
{
|
||||
// Must be unwinding from a COMMIT/SKIP/PRUNE and the independent
|
||||
@@ -413,8 +416,20 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
|
||||
while(unwind(false));
|
||||
return false;
|
||||
}
|
||||
#if !defined(BOOST_NO_EXCEPTIONS)
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
pstate = next_pstate;
|
||||
m_independent = old_independent;
|
||||
// unwind all pushed states, apart from anything else this
|
||||
// ensures that all the states are correctly destructed
|
||||
// not just the memory freed.
|
||||
while(unwind(true)) {}
|
||||
throw;
|
||||
}
|
||||
#endif
|
||||
pstate = next_pstate;
|
||||
m_independent = old_independent;
|
||||
#ifdef BOOST_REGEX_MATCH_EXTRA
|
||||
if(r && (m_match_flags & match_extra))
|
||||
{
|
||||
@@ -428,8 +443,23 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
|
||||
for(i = 0; i < temp_match.size(); ++i)
|
||||
(*m_presult)[i].get_captures().clear();
|
||||
// match everything else:
|
||||
r = match_all_states();
|
||||
// now place the stored captures back:
|
||||
#if !defined(BOOST_NO_EXCEPTIONS)
|
||||
try{
|
||||
#endif
|
||||
r = match_all_states();
|
||||
#if !defined(BOOST_NO_EXCEPTIONS)
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
pstate = next_pstate;
|
||||
// unwind all pushed states, apart from anything else this
|
||||
// ensures that all the states are correctly destructed
|
||||
// not just the memory freed.
|
||||
while(unwind(true)) {}
|
||||
throw;
|
||||
}
|
||||
#endif
|
||||
// now place the stored captures back:
|
||||
for(i = 0; i < temp_match.size(); ++i)
|
||||
{
|
||||
typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
|
||||
@@ -464,14 +494,29 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
|
||||
BidiIterator saved_position = position;
|
||||
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
|
||||
pstate = pstate->next.p->next.p;
|
||||
bool r = match_all_states();
|
||||
position = saved_position;
|
||||
if(negated)
|
||||
r = !r;
|
||||
if(r)
|
||||
#if !defined(BOOST_NO_EXCEPTIONS)
|
||||
try{
|
||||
#endif
|
||||
bool r = match_all_states();
|
||||
position = saved_position;
|
||||
if(negated)
|
||||
r = !r;
|
||||
if(r)
|
||||
pstate = next_pstate;
|
||||
else
|
||||
pstate = alt->alt.p;
|
||||
#if !defined(BOOST_NO_EXCEPTIONS)
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
pstate = next_pstate;
|
||||
else
|
||||
pstate = alt->alt.p;
|
||||
// unwind all pushed states, apart from anything else this
|
||||
// ensures that all the states are correctly destructed
|
||||
// not just the memory freed.
|
||||
while(unwind(true)){}
|
||||
throw;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -953,6 +998,19 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
|
||||
{
|
||||
BOOST_ASSERT(pstate->type == syntax_element_recurse);
|
||||
//
|
||||
// See if we've seen this recursion before at this location, if we have then
|
||||
// we need to prevent infinite recursion:
|
||||
//
|
||||
for(typename std::vector<recursion_info<results_type> >::const_reverse_iterator i = recursion_stack.rbegin(); i != recursion_stack.rend(); ++i)
|
||||
{
|
||||
if(i->idx == static_cast<const re_brace*>(static_cast<const re_jump*>(pstate)->alt.p)->index)
|
||||
{
|
||||
if(i->location_of_start == position)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
//
|
||||
// Backup call stack:
|
||||
//
|
||||
push_recursion_pop();
|
||||
@@ -968,6 +1026,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
|
||||
recursion_stack.back().results = *m_presult;
|
||||
pstate = static_cast<const re_jump*>(pstate)->alt.p;
|
||||
recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
|
||||
recursion_stack.back().location_of_start = position;
|
||||
//if(static_cast<const re_recurse*>(pstate)->state_id > 0)
|
||||
{
|
||||
push_repeater_count(-(2 + static_cast<const re_brace*>(pstate)->index), &next_count);
|
||||
@@ -1705,6 +1764,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion(bool r)
|
||||
recursion_stack.back().idx = pmp->recursion_id;
|
||||
recursion_stack.back().preturn_address = pmp->preturn_address;
|
||||
recursion_stack.back().results = pmp->results;
|
||||
recursion_stack.back().location_of_start = position;
|
||||
}
|
||||
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++);
|
||||
m_backup_state = pmp;
|
||||
|
||||
@@ -900,10 +900,27 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
|
||||
{
|
||||
recursion_stack.reserve(50);
|
||||
}
|
||||
//
|
||||
// See if we've seen this recursion before at this location, if we have then
|
||||
// we need to prevent infinite recursion:
|
||||
//
|
||||
for(typename std::vector<recursion_info<results_type> >::const_reverse_iterator i = recursion_stack.rbegin(); i != recursion_stack.rend(); ++i)
|
||||
{
|
||||
if(i->idx == static_cast<const re_brace*>(static_cast<const re_jump*>(pstate)->alt.p)->index)
|
||||
{
|
||||
if(i->location_of_start == position)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
//
|
||||
// Now get on with it:
|
||||
//
|
||||
recursion_stack.push_back(recursion_info<results_type>());
|
||||
recursion_stack.back().preturn_address = pstate->next.p;
|
||||
recursion_stack.back().results = *m_presult;
|
||||
recursion_stack.back().repeater_stack = next_count;
|
||||
recursion_stack.back().location_of_start = position;
|
||||
pstate = static_cast<const re_jump*>(pstate)->alt.p;
|
||||
recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
|
||||
|
||||
@@ -979,6 +996,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
|
||||
recursion_stack.push_back(recursion_info<results_type>());
|
||||
recursion_stack.back().preturn_address = saved_state;
|
||||
recursion_stack.back().results = *m_presult;
|
||||
recursion_stack.back().location_of_start = position;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <algorithm>
|
||||
#include <iosfwd>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <boost/limits.hpp>
|
||||
#include <boost/assert.hpp>
|
||||
|
||||
@@ -26,10 +26,10 @@ EX_SOURCES =
|
||||
winstances.cpp
|
||||
usinstances.cpp ;
|
||||
|
||||
lib boost_regex_extra : $(EX_SOURCES)
|
||||
lib boost_regex_extra : $(EX_SOURCES) ../../build//icu_options
|
||||
:
|
||||
<define>BOOST_REGEX_MATCH_EXTRA=1
|
||||
<link>shared:<define>BOOST_REGEX_DYN_LINK=1
|
||||
<link>shared:<define>BOOST_REGEX_DYN_LINK=1
|
||||
:
|
||||
;
|
||||
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
# copyright John Maddock 2003
|
||||
# Distributed under the Boost Software License, Version 1.0.
|
||||
# (See accompanying file LICENSE_1_0.txt or copy at
|
||||
# http://www.boost.org/LICENSE_1_0.txt.
|
||||
|
||||
import testing ;
|
||||
|
||||
lib Fuzzer : : <search>. ;
|
||||
|
||||
run narrow.cpp [ glob ../../src/*.cpp ] Fuzzer
|
||||
: # additional args
|
||||
-dict=dictionary.txt -workers=3 corpus -runs=5000
|
||||
: # test-files
|
||||
: # requirements
|
||||
<toolset>clang <cxxflags>-fsanitize-coverage=trace-pc-guard
|
||||
<cxxflags>-fsanitize=address <cxxflags>-fsanitize=undefined
|
||||
<cxxflags>-fno-sanitize-recover=undefined <cxxflags>-fno-optimize-sibling-calls
|
||||
<cxxflags>-fno-omit-frame-pointer
|
||||
<include>../../../..
|
||||
<linkflags>-fsanitize=address <linkflags>-fsanitize=undefined
|
||||
debug
|
||||
;
|
||||
|
||||
run wide.cpp [ glob ../../src/*.cpp ] Fuzzer
|
||||
: # additional args
|
||||
-dict=dictionary.txt -workers=3 corpus -runs=5000
|
||||
: # test-files
|
||||
: # requirements
|
||||
<toolset>clang <cxxflags>-fsanitize-coverage=trace-pc-guard
|
||||
<cxxflags>-fsanitize=address <cxxflags>-fsanitize=undefined
|
||||
<cxxflags>-fno-sanitize-recover=undefined <cxxflags>-fno-optimize-sibling-calls
|
||||
<cxxflags>-fno-omit-frame-pointer
|
||||
<include>../../../..
|
||||
<linkflags>-fsanitize=address <linkflags>-fsanitize=undefined
|
||||
debug
|
||||
;
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
Z(((((((a+)+)+)+)+)+)+)+|Y(((((((a+)+)+)+)+)+)+)+|X(((((((a+)+)+)+)+)+)+)+|W(((((((a+)+)+)+)+)+)+)+|V(((((((a+)+)+)+)+)+)+)+|CZ(((((((a+)+)+)+)+)+)+)+|CY(((((((a+)+)+)+)+)+)+;+|CX(((((((a+)+)+)+)+)+)+)+|CW(((((((a+)+)+)+)+)+)+)+|CV(((((((a+)+)+)+)+)+)+)+|(a+)+b)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
Z(((((((a+)+)+)+)+ßÔÖÔ)+|Y(((((((a+)+)+)+)+)++)+)|X(((((((a+)+)+)+)+)+)+)+|W((Ú×(((a+)+)+)+)+)+)+)+;|V((()++)+)+|CW(((((((a+)+)+)+((((a+)+)+)+)+)+)+)+|CZ(((((((a+)+)+)+)+)+)+)+|CY(((((((a+)+)+)+)+)+)+)+|CX(((((((a+)+)+)+)+)+)+)+|CW(((((((a+)+)+)+)+)+)+)+|CV,((((((a+)+)+)+)+)+)+)+|(a+)+bc
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
8^\l*(?:((.Ô*?)\W*(?1)\§*\2[abc]| |((.)\)W*(?1)\W*\4|\ZZZZZZZZZZZZZZZZZZZZZZ˙˙˙˙˙˙˙˙˙ŔŔŔŔŔŔŔŔŔŔŔŔŔŔŔŔŔŔ˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙\pd\)W*(?1)\W*\4|\ZZZZZZZZZZZZZZZZZZZZZZ˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙\pd˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙\pdquick brown )ox
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user