Merge branch 'de_fuzz' into develop

This commit is contained in:
jzmaddock
2017-02-24 13:09:33 +00:00
3874 changed files with 7288 additions and 113 deletions

View File

@ -28,6 +28,8 @@ following special characters:
[pre .\[{}()\\\*+?|^$] [pre .\[{}()\\\*+?|^$]
Other characters are special only in certain situations - for example `]` is special only after an opening `[`.
[h4 Wildcard] [h4 Wildcard]
The single character '.' when used outside of a character set will match The single character '.' when used outside of a character set will match

View File

@ -152,7 +152,7 @@
# if defined(BOOST_REGEX_NO_W32) || BOOST_PLAT_WINDOWS_RUNTIME # if defined(BOOST_REGEX_NO_W32) || BOOST_PLAT_WINDOWS_RUNTIME
# define BOOST_REGEX_NO_FILEITER # define BOOST_REGEX_NO_FILEITER
# endif # endif
#else // defined(_WIN32) #else /* defined(_WIN32) */
# if !defined(BOOST_HAS_DIRENT_H) # if !defined(BOOST_HAS_DIRENT_H)
# define BOOST_REGEX_NO_FILEITER # define BOOST_REGEX_NO_FILEITER
# endif # endif

View File

@ -410,7 +410,7 @@ void copy_results(MR1& out, MR2 const& in)
if(in[i].captures().size()) if(in[i].captures().size())
{ {
out[i].get_captures().assign(in[i].captures().size(), typename MR1::value_type()); out[i].get_captures().assign(in[i].captures().size(), typename MR1::value_type());
for(int j = 0; j < out[i].captures().size(); ++j) for(int j = 0; j < (int)out[i].captures().size(); ++j)
{ {
out[i].get_captures()[j].first = in[i].captures()[j].first.base(); out[i].get_captures()[j].first = in[i].captures()[j].first.base();
out[i].get_captures()[j].second = in[i].captures()[j].second.base(); out[i].get_captures()[j].second = in[i].captures()[j].second.base();

View File

@ -117,7 +117,10 @@ inline unsigned utf8_trailing_byte_count(boost::uint8_t c)
#pragma warning(push) #pragma warning(push)
#pragma warning(disable:4100) #pragma warning(disable:4100)
#endif #endif
BOOST_NORETURN inline void invalid_utf32_code_point(::boost::uint32_t val) #ifndef BOOST_NO_EXCEPTIONS
BOOST_NORETURN
#endif
inline void invalid_utf32_code_point(::boost::uint32_t val)
{ {
#ifndef BOOST_NO_STD_LOCALE #ifndef BOOST_NO_STD_LOCALE
std::stringstream ss; std::stringstream ss;

View File

@ -77,15 +77,15 @@ public:
void add_single(const digraph_type& s) void add_single(const digraph_type& s)
{ {
m_singles.insert(m_singles.end(), s); m_singles.insert(s);
if(s.second) if(s.second)
m_has_digraphs = true; m_has_digraphs = true;
m_empty = false; m_empty = false;
} }
void add_range(const digraph_type& first, const digraph_type& end) void add_range(const digraph_type& first, const digraph_type& end)
{ {
m_ranges.insert(m_ranges.end(), first); m_ranges.push_back(first);
m_ranges.insert(m_ranges.end(), end); m_ranges.push_back(end);
if(first.second) if(first.second)
{ {
m_has_digraphs = true; m_has_digraphs = true;
@ -110,7 +110,7 @@ public:
} }
void add_equivalent(const digraph_type& s) void add_equivalent(const digraph_type& s)
{ {
m_equivalents.insert(m_equivalents.end(), s); m_equivalents.insert(s);
if(s.second) if(s.second)
{ {
m_has_digraphs = true; m_has_digraphs = true;
@ -136,11 +136,12 @@ public:
return m_negate; return m_negate;
} }
typedef typename std::vector<digraph_type>::const_iterator list_iterator; typedef typename std::vector<digraph_type>::const_iterator list_iterator;
list_iterator singles_begin()const typedef typename std::set<digraph_type>::const_iterator set_iterator;
set_iterator singles_begin()const
{ {
return m_singles.begin(); return m_singles.begin();
} }
list_iterator singles_end()const set_iterator singles_end()const
{ {
return m_singles.end(); return m_singles.end();
} }
@ -152,11 +153,11 @@ public:
{ {
return m_ranges.end(); return m_ranges.end();
} }
list_iterator equivalents_begin()const set_iterator equivalents_begin()const
{ {
return m_equivalents.begin(); return m_equivalents.begin();
} }
list_iterator equivalents_end()const set_iterator equivalents_end()const
{ {
return m_equivalents.end(); return m_equivalents.end();
} }
@ -173,14 +174,14 @@ public:
return m_empty; return m_empty;
} }
private: private:
std::vector<digraph_type> m_singles; // a list of single characters to match std::set<digraph_type> m_singles; // a list of single characters to match
std::vector<digraph_type> m_ranges; // a list of end points of our ranges std::vector<digraph_type> m_ranges; // a list of end points of our ranges
bool m_negate; // true if the set is to be negated bool m_negate; // true if the set is to be negated
bool m_has_digraphs; // true if we have digraphs present bool m_has_digraphs; // true if we have digraphs present
m_type m_classes; // character classes to match m_type m_classes; // character classes to match
m_type m_negated_classes; // negated character classes to match m_type m_negated_classes; // negated character classes to match
bool m_empty; // whether we've added anything yet bool m_empty; // whether we've added anything yet
std::vector<digraph_type> m_equivalents; // a list of equivalence classes std::set<digraph_type> m_equivalents; // a list of equivalence classes
}; };
template <class charT, class traits> template <class charT, class traits>
@ -239,7 +240,7 @@ protected:
unsigned m_backrefs; // bitmask of permitted backrefs unsigned m_backrefs; // bitmask of permitted backrefs
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
bool m_has_recursions; // set when we have recursive expresisons to fixup bool m_has_recursions; // set when we have recursive expresisons to fixup
std::vector<bool> m_recursion_checks; // notes which recursions we've followed while analysing this expression std::vector<unsigned char> m_recursion_checks; // notes which recursions we've followed while analysing this expression
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
@ -365,6 +366,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
{ {
typedef typename traits::string_type string_type; typedef typename traits::string_type string_type;
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator; typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
typedef typename basic_char_set<charT, traits>::set_iterator set_iterator;
typedef typename traits::char_class_type m_type; typedef typename traits::char_class_type m_type;
re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<m_type>))); re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<m_type>)));
@ -395,20 +397,25 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
// now extend with all the singles: // now extend with all the singles:
// //
item_iterator first, last; item_iterator first, last;
first = char_set.singles_begin(); set_iterator sfirst, slast;
last = char_set.singles_end(); sfirst = char_set.singles_begin();
while(first != last) slast = char_set.singles_end();
while(sfirst != slast)
{ {
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2))); charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (sfirst->first == static_cast<charT>(0) ? 1 : sfirst->second ? 3 : 2)));
p[0] = m_traits.translate(first->first, m_icase); p[0] = m_traits.translate(sfirst->first, m_icase);
if(first->second) if(sfirst->first == static_cast<charT>(0))
{ {
p[1] = m_traits.translate(first->second, m_icase); p[0] = 0;
}
else if(sfirst->second)
{
p[1] = m_traits.translate(sfirst->second, m_icase);
p[2] = 0; p[2] = 0;
} }
else else
p[1] = 0; p[1] = 0;
++first; ++sfirst;
} }
// //
// now extend with all the ranges: // now extend with all the ranges:
@ -472,24 +479,24 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
// //
// now process the equivalence classes: // now process the equivalence classes:
// //
first = char_set.equivalents_begin(); sfirst = char_set.equivalents_begin();
last = char_set.equivalents_end(); slast = char_set.equivalents_end();
while(first != last) while(sfirst != slast)
{ {
string_type s; string_type s;
if(first->second) if(sfirst->second)
{ {
charT cs[3] = { first->first, first->second, charT(0), }; charT cs[3] = { sfirst->first, sfirst->second, charT(0), };
s = m_traits.transform_primary(cs, cs+2); s = m_traits.transform_primary(cs, cs+2);
} }
else else
s = m_traits.transform_primary(&first->first, &first->first+1); s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1);
if(s.empty()) if(s.empty())
return 0; // invalid or unsupported equivalence class return 0; // invalid or unsupported equivalence class
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) ); charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
BOOST_REGEX_DETAIL_NS::copy(s.begin(), s.end(), p); BOOST_REGEX_DETAIL_NS::copy(s.begin(), s.end(), p);
p[s.size()] = charT(0); p[s.size()] = charT(0);
++first; ++sfirst;
} }
// //
// finally reset the address of our last state: // finally reset the address of our last state:
@ -518,6 +525,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
{ {
typedef typename traits::string_type string_type; typedef typename traits::string_type string_type;
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator; typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
typedef typename basic_char_set<charT, traits>::set_iterator set_iterator;
re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set))); re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
bool negate = char_set.is_negated(); bool negate = char_set.is_negated();
@ -526,17 +534,18 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
// handle singles first: // handle singles first:
// //
item_iterator first, last; item_iterator first, last;
first = char_set.singles_begin(); set_iterator sfirst, slast;
last = char_set.singles_end(); sfirst = char_set.singles_begin();
while(first != last) slast = char_set.singles_end();
while(sfirst != slast)
{ {
for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i) for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
{ {
if(this->m_traits.translate(static_cast<charT>(i), this->m_icase) if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
== this->m_traits.translate(first->first, this->m_icase)) == this->m_traits.translate(sfirst->first, this->m_icase))
result->_map[i] = true; result->_map[i] = true;
} }
++first; ++sfirst;
} }
// //
// OK now handle ranges: // OK now handle ranges:
@ -623,13 +632,13 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
// //
// now process the equivalence classes: // now process the equivalence classes:
// //
first = char_set.equivalents_begin(); sfirst = char_set.equivalents_begin();
last = char_set.equivalents_end(); slast = char_set.equivalents_end();
while(first != last) while(sfirst != slast)
{ {
string_type s; string_type s;
BOOST_ASSERT(static_cast<charT>(0) == first->second); BOOST_ASSERT(static_cast<charT>(0) == sfirst->second);
s = m_traits.transform_primary(&first->first, &first->first+1); s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1);
if(s.empty()) if(s.empty())
return 0; // invalid or unsupported equivalence class return 0; // invalid or unsupported equivalence class
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
@ -639,7 +648,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
if(s == s2) if(s == s2)
result->_map[i] = true; result->_map[i] = true;
} }
++first; ++sfirst;
} }
if(negate) if(negate)
{ {
@ -690,7 +699,7 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
m_bad_repeats = 0; m_bad_repeats = 0;
if(m_has_recursions) if(m_has_recursions)
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all); create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
// get the restart type: // get the restart type:
m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state); m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
@ -792,6 +801,12 @@ void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
// //
idx = m_pdata->get_id(static_cast<int>(idx)); idx = m_pdata->get_id(static_cast<int>(idx));
} }
if(idx < 0)
{
ok = false;
}
else
{
while(p) while(p)
{ {
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx)) if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
@ -837,6 +852,7 @@ void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
} }
p = p->next.p; p = p->next.p;
} }
}
if(!ok) if(!ok)
{ {
// recursion to sub-expression that doesn't exist: // recursion to sub-expression that doesn't exist:
@ -934,7 +950,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
{ {
// Initialize m_recursion_checks if we need it: // Initialize m_recursion_checks if we need it:
if(m_has_recursions) if(m_has_recursions)
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
const std::pair<bool, re_syntax_base*>& p = v.back(); const std::pair<bool, re_syntax_base*>& p = v.back();
m_icase = p.first; m_icase = p.first;
@ -947,7 +963,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
m_bad_repeats = 0; m_bad_repeats = 0;
if(m_has_recursions) if(m_has_recursions)
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip); create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
// adjust the type of the state to allow for faster matching: // adjust the type of the state to allow for faster matching:
state->type = this->get_repeat_type(state); state->type = this->get_repeat_type(state);
@ -1102,11 +1118,9 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
} }
case syntax_element_recurse: case syntax_element_recurse:
{ {
if(state->type == syntax_element_startmark) BOOST_ASSERT(static_cast<const re_jump*>(state)->alt.p->type == syntax_element_startmark);
recursion_sub = static_cast<re_brace*>(state)->index; recursion_sub = static_cast<re_brace*>(static_cast<const re_jump*>(state)->alt.p)->index;
else if(m_recursion_checks[recursion_sub] & 1u)
recursion_sub = 0;
if(m_recursion_checks[recursion_sub])
{ {
// Infinite recursion!! // Infinite recursion!!
if(0 == this->m_pdata->m_status) // update the error code if not already set if(0 == this->m_pdata->m_status) // update the error code if not already set
@ -1131,10 +1145,10 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
recursion_start = state; recursion_start = state;
recursion_restart = state->next.p; recursion_restart = state->next.p;
state = static_cast<re_jump*>(state)->alt.p; state = static_cast<re_jump*>(state)->alt.p;
m_recursion_checks[recursion_sub] = true; m_recursion_checks[recursion_sub] |= 1u;
break; break;
} }
m_recursion_checks[recursion_sub] = true; m_recursion_checks[recursion_sub] |= 1u;
// can't handle nested recursion here... // can't handle nested recursion here...
BOOST_FALLTHROUGH; BOOST_FALLTHROUGH;
} }
@ -1328,8 +1342,9 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
} }
p = p->next.p; p = p->next.p;
} }
if(ok) if(ok && ((m_recursion_checks[static_cast<re_brace*>(state)->index] & 2u) == 0))
{ {
m_recursion_checks[static_cast<re_brace*>(state)->index] |= 2u;
create_startmap(p->next.p, l_map, pnull, mask); create_startmap(p->next.p, l_map, pnull, mask);
} }
} }
@ -1419,7 +1434,7 @@ bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
case syntax_element_long_set_rep: case syntax_element_long_set_rep:
{ {
unsigned state_id = static_cast<re_repeat*>(pt)->state_id; unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
if(state_id > sizeof(m_bad_repeats) * CHAR_BIT) if(state_id >= sizeof(m_bad_repeats) * CHAR_BIT)
return true; // run out of bits, assume we can't traverse this one. return true; // run out of bits, assume we can't traverse this one.
static const boost::uintmax_t one = 1uL; static const boost::uintmax_t one = 1uL;
return m_bad_repeats & (one << state_id); return m_bad_repeats & (one << state_id);

View File

@ -511,7 +511,8 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end)); this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
return false; return false;
} }
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
return false;
#ifndef BOOST_NO_STD_DISTANCE #ifndef BOOST_NO_STD_DISTANCE
if(markid && (this->flags() & regbase::save_subexpression_location)) if(markid && (this->flags() & regbase::save_subexpression_location))
this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position); this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
@ -901,7 +902,7 @@ escape_type_class_jump:
} }
if(negative) if(negative)
i = 1 + m_mark_count - i; i = 1 + m_mark_count - i;
if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1))))) if(((i > 0) && (i < std::numeric_limits<unsigned>::digits) && (i - 1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_pdata->get_id(i)-1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
{ {
m_position = pc; m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace))); re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
@ -2596,7 +2597,7 @@ option_group_jump:
re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt))); re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt); alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
} }
else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt) else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
{ {
// Can't have seen more than one alternative: // Can't have seen more than one alternative:
// Rewind to start of (? sequence: // Rewind to start of (? sequence:
@ -2860,6 +2861,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_verb()
} }
break; break;
} }
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false; return false;
} }

View File

@ -68,7 +68,18 @@ typedef enum _match_flags
format_no_copy = format_all << 1, /* don't copy non-matching segments. */ format_no_copy = format_all << 1, /* don't copy non-matching segments. */
format_first_only = format_no_copy << 1, /* Only replace first occurance. */ format_first_only = format_no_copy << 1, /* Only replace first occurance. */
format_is_if = format_first_only << 1, /* internal use only. */ format_is_if = format_first_only << 1, /* internal use only. */
format_literal = format_is_if << 1 /* treat string as a literal */ format_literal = format_is_if << 1, /* treat string as a literal */
match_not_any = match_not_bol | match_not_eol | match_not_bob
| match_not_eob | match_not_bow | match_not_eow | match_not_dot_newline
| match_not_dot_null | match_prev_avail | match_init | match_not_null
| match_continuous | match_partial | match_stop | match_not_initial_null
| match_stop | match_all | match_perl | match_posix | match_nosubs
| match_extra | match_single_line | match_unused1 | match_unused2
| match_unused3 | match_max | format_perl | format_default | format_sed
| format_all | format_no_copy | format_first_only | format_is_if
| format_literal
} match_flags; } match_flags;

View File

@ -161,9 +161,9 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
if(*p == static_cast<charT>(0)) if(*p == static_cast<charT>(0))
{ {
// treat null string as special case: // treat null string as special case:
if(traits_inst.translate(*ptr, icase) != *p) if(traits_inst.translate(*ptr, icase))
{ {
while(*p == static_cast<charT>(0))++p; ++p;
continue; continue;
} }
return set_->isnot ? next : (ptr == next) ? ++next : ptr; return set_->isnot ? next : (ptr == next) ? ++next : ptr;
@ -348,6 +348,7 @@ struct recursion_info
const re_syntax_base* preturn_address; const re_syntax_base* preturn_address;
Results results; Results results;
repeater_count<iterator>* repeater_stack; repeater_count<iterator>* repeater_stack;
iterator location_of_start;
}; };
#ifdef BOOST_MSVC #ifdef BOOST_MSVC

View File

@ -90,7 +90,7 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline); match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
// Disable match_any if requested in the state machine: // Disable match_any if requested in the state machine:
if(e.get_data().m_disable_match_any) if(e.get_data().m_disable_match_any)
m_match_flags &= ~regex_constants::match_any; m_match_flags &= regex_constants::match_not_any;
} }
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>

View File

@ -131,8 +131,7 @@ template <class Results>
struct saved_recursion : public saved_state struct saved_recursion : public saved_state
{ {
saved_recursion(int idx, const re_syntax_base* p, Results* pr) saved_recursion(int idx, const re_syntax_base* p, Results* pr)
: saved_state(14), recursion_id(idx), preturn_address(p), results(*pr) : saved_state(14), recursion_id(idx), preturn_address(p), results(*pr) {}
{}
int recursion_id; int recursion_id;
const re_syntax_base* preturn_address; const re_syntax_base* preturn_address;
Results results; Results results;
@ -405,7 +404,11 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
m_independent = true; m_independent = true;
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
pstate = pstate->next.p->next.p; pstate = pstate->next.p->next.p;
bool r = match_all_states(); bool r = false;
#if !defined(BOOST_NO_EXCEPTIONS)
try{
#endif
r = match_all_states();
if(!r && !m_independent) if(!r && !m_independent)
{ {
// Must be unwinding from a COMMIT/SKIP/PRUNE and the independent // Must be unwinding from a COMMIT/SKIP/PRUNE and the independent
@ -413,6 +416,18 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
while(unwind(false)); while(unwind(false));
return false; return false;
} }
#if !defined(BOOST_NO_EXCEPTIONS)
}
catch(...)
{
pstate = next_pstate;
// unwind all pushed states, apart from anything else this
// ensures that all the states are correctly destructed
// not just the memory freed.
while(unwind(true)) {}
throw;
}
#endif
pstate = next_pstate; pstate = next_pstate;
m_independent = old_independent; m_independent = old_independent;
#ifdef BOOST_REGEX_MATCH_EXTRA #ifdef BOOST_REGEX_MATCH_EXTRA
@ -428,7 +443,22 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
for(i = 0; i < temp_match.size(); ++i) for(i = 0; i < temp_match.size(); ++i)
(*m_presult)[i].get_captures().clear(); (*m_presult)[i].get_captures().clear();
// match everything else: // match everything else:
#if !defined(BOOST_NO_EXCEPTIONS)
try{
#endif
r = match_all_states(); r = match_all_states();
#if !defined(BOOST_NO_EXCEPTIONS)
}
catch(...)
{
pstate = next_pstate;
// unwind all pushed states, apart from anything else this
// ensures that all the states are correctly destructed
// not just the memory freed.
while(unwind(true)) {}
throw;
}
#endif
// now place the stored captures back: // now place the stored captures back:
for(i = 0; i < temp_match.size(); ++i) for(i = 0; i < temp_match.size(); ++i)
{ {
@ -464,6 +494,9 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
BidiIterator saved_position = position; BidiIterator saved_position = position;
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
pstate = pstate->next.p->next.p; pstate = pstate->next.p->next.p;
#if !defined(BOOST_NO_EXCEPTIONS)
try{
#endif
bool r = match_all_states(); bool r = match_all_states();
position = saved_position; position = saved_position;
if(negated) if(negated)
@ -472,6 +505,18 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
pstate = next_pstate; pstate = next_pstate;
else else
pstate = alt->alt.p; pstate = alt->alt.p;
#if !defined(BOOST_NO_EXCEPTIONS)
}
catch(...)
{
pstate = next_pstate;
// unwind all pushed states, apart from anything else this
// ensures that all the states are correctly destructed
// not just the memory freed.
while(unwind(true)){}
throw;
}
#endif
break; break;
} }
} }
@ -953,6 +998,19 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
{ {
BOOST_ASSERT(pstate->type == syntax_element_recurse); BOOST_ASSERT(pstate->type == syntax_element_recurse);
// //
// See if we've seen this recursion before at this location, if we have then
// we need to prevent infinite recursion:
//
for(typename std::vector<recursion_info<results_type> >::const_reverse_iterator i = recursion_stack.rbegin(); i != recursion_stack.rend(); ++i)
{
if(i->idx == static_cast<const re_brace*>(static_cast<const re_jump*>(pstate)->alt.p)->index)
{
if(i->location_of_start == position)
return false;
break;
}
}
//
// Backup call stack: // Backup call stack:
// //
push_recursion_pop(); push_recursion_pop();
@ -968,6 +1026,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
recursion_stack.back().results = *m_presult; recursion_stack.back().results = *m_presult;
pstate = static_cast<const re_jump*>(pstate)->alt.p; pstate = static_cast<const re_jump*>(pstate)->alt.p;
recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index; recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
recursion_stack.back().location_of_start = position;
//if(static_cast<const re_recurse*>(pstate)->state_id > 0) //if(static_cast<const re_recurse*>(pstate)->state_id > 0)
{ {
push_repeater_count(-(2 + static_cast<const re_brace*>(pstate)->index), &next_count); push_repeater_count(-(2 + static_cast<const re_brace*>(pstate)->index), &next_count);
@ -1705,6 +1764,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion(bool r)
recursion_stack.back().idx = pmp->recursion_id; recursion_stack.back().idx = pmp->recursion_id;
recursion_stack.back().preturn_address = pmp->preturn_address; recursion_stack.back().preturn_address = pmp->preturn_address;
recursion_stack.back().results = pmp->results; recursion_stack.back().results = pmp->results;
recursion_stack.back().location_of_start = position;
} }
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++);
m_backup_state = pmp; m_backup_state = pmp;

View File

@ -900,10 +900,27 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
{ {
recursion_stack.reserve(50); recursion_stack.reserve(50);
} }
//
// See if we've seen this recursion before at this location, if we have then
// we need to prevent infinite recursion:
//
for(typename std::vector<recursion_info<results_type> >::const_reverse_iterator i = recursion_stack.rbegin(); i != recursion_stack.rend(); ++i)
{
if(i->idx == static_cast<const re_brace*>(static_cast<const re_jump*>(pstate)->alt.p)->index)
{
if(i->location_of_start == position)
return false;
break;
}
}
//
// Now get on with it:
//
recursion_stack.push_back(recursion_info<results_type>()); recursion_stack.push_back(recursion_info<results_type>());
recursion_stack.back().preturn_address = pstate->next.p; recursion_stack.back().preturn_address = pstate->next.p;
recursion_stack.back().results = *m_presult; recursion_stack.back().results = *m_presult;
recursion_stack.back().repeater_stack = next_count; recursion_stack.back().repeater_stack = next_count;
recursion_stack.back().location_of_start = position;
pstate = static_cast<const re_jump*>(pstate)->alt.p; pstate = static_cast<const re_jump*>(pstate)->alt.p;
recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index; recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
@ -979,6 +996,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
recursion_stack.push_back(recursion_info<results_type>()); recursion_stack.push_back(recursion_info<results_type>());
recursion_stack.back().preturn_address = saved_state; recursion_stack.back().preturn_address = saved_state;
recursion_stack.back().results = *m_presult; recursion_stack.back().results = *m_presult;
recursion_stack.back().location_of_start = position;
return false; return false;
} }
return true; return true;

View File

@ -33,6 +33,7 @@
#include <algorithm> #include <algorithm>
#include <iosfwd> #include <iosfwd>
#include <vector> #include <vector>
#include <set>
#include <map> #include <map>
#include <boost/limits.hpp> #include <boost/limits.hpp>
#include <boost/assert.hpp> #include <boost/assert.hpp>

View File

@ -26,7 +26,7 @@ EX_SOURCES =
winstances.cpp winstances.cpp
usinstances.cpp ; usinstances.cpp ;
lib boost_regex_extra : $(EX_SOURCES) lib boost_regex_extra : $(EX_SOURCES) ../../build//icu_options
: :
<define>BOOST_REGEX_MATCH_EXTRA=1 <define>BOOST_REGEX_MATCH_EXTRA=1
<link>shared:<define>BOOST_REGEX_DYN_LINK=1 <link>shared:<define>BOOST_REGEX_DYN_LINK=1

37
test/de_fuzz/Jamfile.v2 Normal file
View File

@ -0,0 +1,37 @@
# copyright John Maddock 2003
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt.
import testing ;
lib Fuzzer : : <search>. ;
run narrow.cpp [ glob ../../src/*.cpp ] Fuzzer
: # additional args
-dict=dictionary.txt -workers=3 corpus -runs=5000
: # test-files
: # requirements
<toolset>clang <cxxflags>-fsanitize-coverage=trace-pc-guard
<cxxflags>-fsanitize=address <cxxflags>-fsanitize=undefined
<cxxflags>-fno-sanitize-recover=undefined <cxxflags>-fno-optimize-sibling-calls
<cxxflags>-fno-omit-frame-pointer
<include>../../../..
<linkflags>-fsanitize=address <linkflags>-fsanitize=undefined
debug
;
run wide.cpp [ glob ../../src/*.cpp ] Fuzzer
: # additional args
-dict=dictionary.txt -workers=3 corpus -runs=5000
: # test-files
: # requirements
<toolset>clang <cxxflags>-fsanitize-coverage=trace-pc-guard
<cxxflags>-fsanitize=address <cxxflags>-fsanitize=undefined
<cxxflags>-fno-sanitize-recover=undefined <cxxflags>-fno-optimize-sibling-calls
<cxxflags>-fno-omit-frame-pointer
<include>../../../..
<linkflags>-fsanitize=address <linkflags>-fsanitize=undefined
debug
;

View File

@ -0,0 +1 @@
Z(((((((a+)+)+)+)+)+)+)+|Y(((((((a+)+)+)+)+)+)+)+|X(((((((a+)+)+)+)+)+)+)+|W(((((((a+)+)+)+)+)+)+)+|V(((((((a+)+)+)+)+)+)+)+|CZ(((((((a+)+)+)+)+)+)+)+|CY(((((((a+)+)+)+)+)+)+;+|CX(((((((a+)+)+)+)+)+)+)+|CW(((((((a+)+)+)+)+)+)+)+|CV(((((((a+)+)+)+)+)+)+)+|(a+)+b)

View File

@ -0,0 +1 @@
Z(((((((a+)+)+)+)+<2B><><EFBFBD><EFBFBD>)+|Y(((((((a+)+)+)+)+)++)+)|X(((((((a+)+)+)+)+)+)+)+|W((<28><>(((a+)+)+)+)+)+)+)+;|V((()++)+)+|CW(((((((a+)+)+)+((((a+)+)+)+)+)+)+)+|CZ(((((((a+)+)+)+)+)+)+)+|CY(((((((a+)+)+)+)+)+)+)+|CX(((((((a+)+)+)+)+)+)+)+|CW(((((((a+)+)+)+)+)+)+)+|CV,((((((a+)+)+)+)+)+)+)+|(a+)+bc

View File

@ -0,0 +1 @@
8^\l*(?:((.<2E>*?)\W*(?1)\<5C>*\2[abc]| |((.)\)W*(?1)\W*\4|\ZZZZZZZZZZZZZZZZZZZZZZ<1B><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\pd\)W*(?1)\W*\4|\ZZZZZZZZZZZZZZZZZZZZZZ<1B><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><1B><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\pd<1B><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><1B><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\pdquick brown )ox

Some files were not shown because too many files have changed in this diff Show More