From 955d077d2b69d7c1014e36d86c737f7eb4b057b7 Mon Sep 17 00:00:00 2001
From: jzmaddock
Date: Wed, 17 Feb 2016 18:58:05 +0000
Subject: [PATCH 1/6] Allow types wider than int in \x{} expressions (for
char32_t etc). Fixes: https://svn.boost.org/trac/boost/ticket/11988.
---
include/boost/regex/icu.hpp | 2 +-
include/boost/regex/v4/basic_regex_parser.hpp | 47 ++++++++++++-------
include/boost/regex/v4/cpp_regex_traits.hpp | 8 ++--
include/boost/regex/v4/instances.hpp | 2 +-
.../boost/regex/v4/perl_matcher_common.hpp | 8 ++--
include/boost/regex/v4/regex_format.hpp | 4 +-
include/boost/regex/v4/regex_traits.hpp | 2 +-
.../boost/regex/v4/regex_traits_defaults.hpp | 6 +--
include/boost/regex/v4/w32_regex_traits.hpp | 4 +-
test/Jamfile.v2 | 1 +
10 files changed, 50 insertions(+), 34 deletions(-)
diff --git a/include/boost/regex/icu.hpp b/include/boost/regex/icu.hpp
index a70aa0da..719ee220 100644
--- a/include/boost/regex/icu.hpp
+++ b/include/boost/regex/icu.hpp
@@ -152,7 +152,7 @@ public:
char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
bool isctype(char_type c, char_class_type f) const;
- int toi(const char_type*& p1, const char_type* p2, int radix)const
+ boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const
{
return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
}
diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp
index d097eed9..aefabacb 100644
--- a/include/boost/regex/v4/basic_regex_parser.hpp
+++ b/include/boost/regex/v4/basic_regex_parser.hpp
@@ -38,6 +38,21 @@ namespace BOOST_REGEX_DETAIL_NS{
#pragma warning(disable:4244 4800)
#endif
+inline boost::intmax_t umax(mpl::false_ const&)
+{
+ // Get out clause here, just in case numeric_limits is unspecialized:
+ return std::numeric_limits::is_specialized ? (std::numeric_limits::max)() : INT_MAX;
+}
+inline boost::intmax_t umax(mpl::true_ const&)
+{
+ return (std::numeric_limits::max)();
+}
+
+inline boost::intmax_t umax()
+{
+ return umax(mpl::bool_::digits >= std::numeric_limits::digits>());
+}
+
template
class basic_regex_parser : public basic_regex_creator
{
@@ -868,7 +883,7 @@ escape_type_class_jump:
return false;
}
const charT* pc = m_position;
- int i = this->m_traits.toi(pc, m_end, 10);
+ boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
if((i < 0) && syn_end)
{
// Check for a named capture, get the leftmost one if there is more than one:
@@ -1075,7 +1090,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic)
// parse a repeat-range:
//
std::size_t min, max;
- int v;
+ boost::intmax_t v;
// skip whitespace:
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
++m_position;
@@ -1094,7 +1109,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic)
// get min:
v = this->m_traits.toi(m_position, m_end, 10);
// skip whitespace:
- if(v < 0)
+ if((v < 0) || (v > umax()))
{
if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
{
@@ -1120,7 +1135,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic)
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
return parse_literal();
}
- min = v;
+ min = static_cast(v);
// see if we have a comma:
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
{
@@ -1143,7 +1158,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic)
}
// get the value if any:
v = this->m_traits.toi(m_position, m_end, 10);
- max = (v >= 0) ? (std::size_t)v : (std::numeric_limits::max)();
+ max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits::max)();
}
else
{
@@ -1665,19 +1680,19 @@ digraph basic_regex_parser::get_next_set_literal(basic_cha
// does a value fit in the specified charT type?
//
template
-bool valid_value(charT, int v, const mpl::true_&)
+bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
{
return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
}
template
-bool valid_value(charT, int, const mpl::false_&)
+bool valid_value(charT, boost::intmax_t, const mpl::false_&)
{
return true; // v will alsways fit in a charT
}
template
-bool valid_value(charT c, int v)
+bool valid_value(charT c, boost::intmax_t v)
{
- return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
+ return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
}
template
@@ -1753,10 +1768,10 @@ charT basic_regex_parser::unescape_character()
fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
return result;
}
- int i = this->m_traits.toi(m_position, m_end, 16);
+ boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
if((m_position == m_end)
|| (i < 0)
- || ((std::numeric_limits::is_specialized) && (i > (int)(std::numeric_limits::max)()))
+ || ((std::numeric_limits::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits::max)()))
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
{
// Rewind to start of escape:
@@ -1771,7 +1786,7 @@ charT basic_regex_parser::unescape_character()
else
{
std::ptrdiff_t len = (std::min)(static_cast(2), static_cast(m_end - m_position));
- int i = this->m_traits.toi(m_position, m_position + len, 16);
+ boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
if((i < 0)
|| !valid_value(charT(0), i))
{
@@ -1790,7 +1805,7 @@ charT basic_regex_parser::unescape_character()
// followed by up to 3 octal digits:
std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast(4));
const charT* bp = m_position;
- int val = this->m_traits.toi(bp, bp + 1, 8);
+ boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
if(val != 0)
{
// Rewind to start of escape:
@@ -1801,7 +1816,7 @@ charT basic_regex_parser::unescape_character()
return result;
}
val = this->m_traits.toi(m_position, m_position + len, 8);
- if(val < 0)
+ if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits::max)()))
{
// Rewind to start of escape:
--m_position;
@@ -1874,7 +1889,7 @@ bool basic_regex_parser::parse_backref()
{
BOOST_ASSERT(m_position != m_end);
const charT* pc = m_position;
- int i = this->m_traits.toi(pc, pc + 1, 10);
+ boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
{
// not a backref at all but an octal escape sequence:
@@ -1996,7 +2011,7 @@ bool basic_regex_parser::parse_perl_extension()
int max_mark = m_max_mark;
m_mark_reset = -1;
m_max_mark = m_mark_count;
- int v;
+ boost::intmax_t v;
//
// select the actual extension used:
//
diff --git a/include/boost/regex/v4/cpp_regex_traits.hpp b/include/boost/regex/v4/cpp_regex_traits.hpp
index 709663a3..b7b32d8a 100644
--- a/include/boost/regex/v4/cpp_regex_traits.hpp
+++ b/include/boost/regex/v4/cpp_regex_traits.hpp
@@ -1027,11 +1027,11 @@ public:
return m_pimpl->isctype(c, f);
#endif
}
- int toi(const charT*& p1, const charT* p2, int radix)const;
+ boost::intmax_t toi(const charT*& p1, const charT* p2, int radix)const;
int value(charT c, int radix)const
{
const charT* pc = &c;
- return toi(pc, pc + 1, radix);
+ return (int)toi(pc, pc + 1, radix);
}
locale_type imbue(locale_type l)
{
@@ -1069,7 +1069,7 @@ private:
template
-int cpp_regex_traits::toi(const charT*& first, const charT* last, int radix)const
+boost::intmax_t cpp_regex_traits::toi(const charT*& first, const charT* last, int radix)const
{
BOOST_REGEX_DETAIL_NS::parser_buf sbuf; // buffer for parsing numbers.
std::basic_istream is(&sbuf); // stream for parsing numbers.
@@ -1082,7 +1082,7 @@ int cpp_regex_traits::toi(const charT*& first, const charT* last, int rad
if(std::abs(radix) == 16) is >> std::hex;
else if(std::abs(radix) == 8) is >> std::oct;
else is >> std::dec;
- int val;
+ boost::intmax_t val;
if(is >> val)
{
first = first + ((last - first) - sbuf.in_avail());
diff --git a/include/boost/regex/v4/instances.hpp b/include/boost/regex/v4/instances.hpp
index f66b237c..05ac71a6 100644
--- a/include/boost/regex/v4/instances.hpp
+++ b/include/boost/regex/v4/instances.hpp
@@ -157,7 +157,7 @@ bool cpp_regex_traits_implementation::isctype(const BOOST_RE
#endif
} // namespace
template BOOST_REGEX_DECL
-int cpp_regex_traits::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const;
+boost::intmax_t cpp_regex_traits::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const;
template BOOST_REGEX_DECL
std::string cpp_regex_traits::catalog_name(const std::string& name);
template BOOST_REGEX_DECL
diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp
index f3949ccf..e60581df 100644
--- a/include/boost/regex/v4/perl_matcher_common.hpp
+++ b/include/boost/regex/v4/perl_matcher_common.hpp
@@ -206,7 +206,7 @@ bool perl_matcher::match_imp()
search_base = base;
state_count = 0;
m_match_flags |= regex_constants::match_all;
- m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last);
+ m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), search_base, last);
m_presult->set_base(base);
m_presult->set_named_subs(this->re.get_named_subs());
if(m_match_flags & match_posix)
@@ -268,7 +268,7 @@ bool perl_matcher::find_imp()
// reset our state machine:
search_base = position = base;
pstate = re.get_first_state();
- m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), base, last);
+ m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), base, last);
m_presult->set_base(base);
m_presult->set_named_subs(this->re.get_named_subs());
m_match_flags |= regex_constants::match_init;
@@ -287,13 +287,13 @@ bool perl_matcher::find_imp()
++position;
}
// reset $` start:
- m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last);
+ m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), search_base, last);
//if((base != search_base) && (base == backstop))
// m_match_flags |= match_prev_avail;
}
if(m_match_flags & match_posix)
{
- m_result.set_size(1 + re.mark_count(), base, last);
+ m_result.set_size(static_cast(1u + re.mark_count()), base, last);
m_result.set_base(base);
}
diff --git a/include/boost/regex/v4/regex_format.hpp b/include/boost/regex/v4/regex_format.hpp
index e9006a7b..f0a0a11e 100644
--- a/include/boost/regex/v4/regex_format.hpp
+++ b/include/boost/regex/v4/regex_format.hpp
@@ -86,7 +86,7 @@ struct trivial_format_traits
}
int toi(const charT*& p1, const charT* p2, int radix)const
{
- return global_toi(p1, p2, radix, *this);
+ return (int)global_toi(p1, p2, radix, *this);
}
};
@@ -165,7 +165,7 @@ private:
std::vector v(i, j);
const char_type* start = &v[0];
const char_type* pos = start;
- int r = m_traits.toi(pos, &v[0] + v.size(), base);
+ int r = (int)m_traits.toi(pos, &v[0] + v.size(), base);
std::advance(i, pos - start);
return r;
}
diff --git a/include/boost/regex/v4/regex_traits.hpp b/include/boost/regex/v4/regex_traits.hpp
index 45a4bdf6..5d427706 100644
--- a/include/boost/regex/v4/regex_traits.hpp
+++ b/include/boost/regex/v4/regex_traits.hpp
@@ -109,7 +109,7 @@ struct default_wrapper : public BaseT
{
return ((c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast(c)) : ::boost::regex_constants::escape_type_identity;
}
- int toi(const char_type*& p1, const char_type* p2, int radix)const
+ boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const
{
return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
}
diff --git a/include/boost/regex/v4/regex_traits_defaults.hpp b/include/boost/regex/v4/regex_traits_defaults.hpp
index 18218837..2a2cf21d 100644
--- a/include/boost/regex/v4/regex_traits_defaults.hpp
+++ b/include/boost/regex/v4/regex_traits_defaults.hpp
@@ -304,13 +304,13 @@ int global_value(charT c)
return -1;
}
template
-int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
+boost::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
{
(void)t; // warning suppression
- int next_value = t.value(*p1, radix);
+ boost::intmax_t next_value = t.value(*p1, radix);
if((p1 == p2) || (next_value < 0) || (next_value >= radix))
return -1;
- int result = 0;
+ boost::intmax_t result = 0;
while(p1 != p2)
{
next_value = t.value(*p1, radix);
diff --git a/include/boost/regex/v4/w32_regex_traits.hpp b/include/boost/regex/v4/w32_regex_traits.hpp
index 560cc217..bf996d61 100644
--- a/include/boost/regex/v4/w32_regex_traits.hpp
+++ b/include/boost/regex/v4/w32_regex_traits.hpp
@@ -640,13 +640,13 @@ public:
return true;
return false;
}
- int toi(const charT*& p1, const charT* p2, int radix)const
+ boost::intmax_t toi(const charT*& p1, const charT* p2, int radix)const
{
return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
}
int value(charT c, int radix)const
{
- int result = ::boost::BOOST_REGEX_DETAIL_NS::global_value(c);
+ int result = (int)::boost::BOOST_REGEX_DETAIL_NS::global_value(c);
return result < radix ? result : -1;
}
locale_type imbue(locale_type l)
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
index 06198b04..6617d188 100644
--- a/test/Jamfile.v2
+++ b/test/Jamfile.v2
@@ -153,6 +153,7 @@ test-suite regex
[ link concepts/concept_check.cpp ../build//boost_regex ]
[ link concepts/icu_concept_check.cpp ../build//boost_regex ]
[ link concepts/range_concept_check.cpp ../build//boost_regex ]
+ [ run concepts/test_bug_11988.cpp ../build//boost_regex ]
[ run
# sources
From 36b2fab2271bc97f8fae86029f587747f9fc1ff4 Mon Sep 17 00:00:00 2001
From: jzmaddock
Date: Wed, 17 Feb 2016 19:24:19 +0000
Subject: [PATCH 2/6] Add file missed in previous commit.
---
test/concepts/test_bug_11988.cpp | 111 +++++++++++++++++++++++++++++++
1 file changed, 111 insertions(+)
create mode 100644 test/concepts/test_bug_11988.cpp
diff --git a/test/concepts/test_bug_11988.cpp b/test/concepts/test_bug_11988.cpp
new file mode 100644
index 00000000..2d921024
--- /dev/null
+++ b/test/concepts/test_bug_11988.cpp
@@ -0,0 +1,111 @@
+/*
+*
+* Copyright (c) 2016
+* John Maddock
+*
+* Use, modification and distribution are subject to the
+* Boost Software License, Version 1.0. (See accompanying file
+* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+*
+*/
+
+#include
+
+#ifndef BOOST_NO_CXX11_CHAR32_T
+
+namespace boost {
+
+ std::size_t hash_value(char32_t c) { return c; }
+
+}
+
+struct char32_traits
+{
+ typedef char32_t char_type;
+ typedef std::size_t size_type;
+ typedef std::vector string_type;
+ typedef int locale_type; // not used
+ typedef unsigned char_class_type;
+
+ static size_type length(const char32_t* p)
+ {
+ size_type result = 0;
+ while(*p)
+ {
+ ++p;
+ ++result;
+ }
+ return result;
+ }
+ static char_type translate(char_type c) { return c; }
+ static char_type translate_nocase(char_type c) { return c; }
+ static string_type transform(const char32_t* p1, const char32_t* p2)
+ {
+ return string_type(p1, p2);
+ }
+ static string_type transform_primary(const char32_t* p1, const char32_t* p2)
+ {
+ return string_type(p1, p2);
+ }
+ static char_class_type lookup_classname(const char32_t* p1, const char32_t* p2)
+ {
+ std::string s(p1, p2);
+ return boost::c_regex_traits::lookup_classname(s.c_str(), s.c_str() + s.length());
+ return 0;
+ }
+ static string_type lookup_collatename(const char32_t* p1, const char32_t* p2)
+ {
+ return string_type(p1, p2);
+ }
+ static bool isctype(char_type c, char_class_type t)
+ {
+ if(c < 0xff)
+ return boost::c_regex_traits::isctype(c, t);
+ return false;
+ }
+ static boost::intmax_t value(char_type c, int radix)
+ {
+ switch(radix)
+ {
+ case 8:
+ if((c >= '0') && (c <= '7'))
+ return c - '0';
+ break;
+ case 10:
+ if((c >= '0') && (c <= '9'))
+ return c - '0';
+ break;
+ case 16:
+ if((c >= '0') && (c <= '9'))
+ return c - '0';
+ if((c >= 'a') && (c <= 'f'))
+ return (c - 'a') + 10;
+ if((c >= 'A') && (c <= 'F'))
+ return (c - 'A') + 10;
+ break;
+ }
+ return -1;
+ }
+ static locale_type imbue(locale_type) { return 0; }
+ static locale_type getloc() { return 0; }
+};
+
+
+int main()
+{
+ char32_t big_char[] = { 0xF, 0xFF, 0xFFF, 0xFFFF, 0xFFFFF, 0xFFFFFF, 0xFFFFFFF, 0xFFFFFFFF, 0 };
+
+ boost::basic_regex e(U"\\x{F}\\x{FF}\\x{FFF}\\x{FFFF}\\x{FFFFF}\\x{FFFFFF}\\x{FFFFFFF}\\x{FFFFFFFF}");
+
+ if(!regex_match(big_char, e))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+#else
+
+int main() { return 0; }
+
+#endif
\ No newline at end of file
From e217808156e482dc1af6b97799e39a2fdfabe3fd Mon Sep 17 00:00:00 2001
From: jzmaddock
Date: Wed, 17 Feb 2016 19:57:49 +0000
Subject: [PATCH 3/6] Fix declaration order in test case
---
test/concepts/test_bug_11988.cpp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/test/concepts/test_bug_11988.cpp b/test/concepts/test_bug_11988.cpp
index 2d921024..840ba9af 100644
--- a/test/concepts/test_bug_11988.cpp
+++ b/test/concepts/test_bug_11988.cpp
@@ -9,16 +9,19 @@
*
*/
-#include
#ifndef BOOST_NO_CXX11_CHAR32_T
+#include
+
namespace boost {
- std::size_t hash_value(char32_t c) { return c; }
+ std::size_t hash_value(char32_t const& c) { return c; }
}
+#include
+
struct char32_traits
{
typedef char32_t char_type;
From 3ea03e364cada28d1998a61bfcda86c1e2b29949 Mon Sep 17 00:00:00 2001
From: Marcel Raad
Date: Fri, 11 Mar 2016 17:23:53 +0100
Subject: [PATCH 4/6] Fix test on compilers without char32_t
As the include for BOOST_NO_CXX11_CHAR32_T was missing, the test didn't compile.
---
test/concepts/test_bug_11988.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/test/concepts/test_bug_11988.cpp b/test/concepts/test_bug_11988.cpp
index 840ba9af..7376f39a 100644
--- a/test/concepts/test_bug_11988.cpp
+++ b/test/concepts/test_bug_11988.cpp
@@ -9,6 +9,7 @@
*
*/
+#include
#ifndef BOOST_NO_CXX11_CHAR32_T
@@ -111,4 +112,4 @@ int main()
int main() { return 0; }
-#endif
\ No newline at end of file
+#endif
From d1e65490e0c4b98c8e510d5d93b6ffc265e778a4 Mon Sep 17 00:00:00 2001
From: jzmaddock
Date: Sat, 12 Mar 2016 19:01:17 +0000
Subject: [PATCH 5/6] Update docs in response to
https://svn.boost.org/trac/boost/ticket/11776
---
doc/html/boost_regex/partial_matches.html | 11 +++++++++++
doc/html/index.html | 2 +-
doc/partial_matches.qbk | 3 +++
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/doc/html/boost_regex/partial_matches.html b/doc/html/boost_regex/partial_matches.html
index 6bc7c1da..c9e55d6b 100644
--- a/doc/html/boost_regex/partial_matches.html
+++ b/doc/html/boost_regex/partial_matches.html
@@ -178,6 +178,17 @@
It's more efficient to work this way, but may not be the behavior you want
in all situations.
+
+ There are situations where full matches are found even though partial matches
+ are also possible: for example if the partial string terminates with "abc"
+ and the regular expression is "\w+", then a full match is found
+ even though there may be more alphabetical characters to come. This particular
+ case can be detected by checking if the match found terminates at the end
+ of current input string. However, there are situations where that is not
+ possible: for example an expression such as "abc.*123" may always
+ have longer matches available since it could conceivably match the entire
+ input string (no matter how long it may be).
+
The following example tests to see whether the text could be a valid credit
diff --git a/doc/html/index.html b/doc/html/index.html
index 04819f75..81680b0c 100644
--- a/doc/html/index.html
+++ b/doc/html/index.html
@@ -221,7 +221,7 @@
-Last revised: January 10, 2016 at 18:43:41 GMT |
+Last revised: March 12, 2016 at 19:00:00 GMT |
|
diff --git a/doc/partial_matches.qbk b/doc/partial_matches.qbk
index 65801b2c..bc621e19 100644
--- a/doc/partial_matches.qbk
+++ b/doc/partial_matches.qbk
@@ -34,6 +34,9 @@ imperfect behavior:
* There are some expressions, such as ".\*abc" that will always produce a partial match. This problem can be reduced by careful construction of the regular expressions used, or by setting flags like match_not_dot_newline so that expressions like .\* can't match past line boundaries.
* Boost.Regex currently prefers leftmost matches to full matches, so for example matching "abc|b" against "ab" produces a partial match against the "ab" rather than a full match against "b". It's more efficient to work this way, but may not be the behavior you want in all situations.
+* There are situations where full matches are found even though partial matches are also possible: for example if the partial string terminates with "abc" and the regular expression is "\w+", then a full match is found
+even though there may be more alphabetical characters to come. This particular case can be detected by checking if the match found terminates at the end of current input string. However, there are situations where
+that is not possible: for example an expression such as "abc.*123" may always have longer matches available since it could conceivably match the entire input string (no matter how long it may be).
The following example tests to see whether the text could be a valid
credit card number, as the user presses a key, the character entered
From 9059bfb5c6287b0c579bfa4be5160b44c8cc2957 Mon Sep 17 00:00:00 2001
From: jzmaddock
Date: Mon, 14 Mar 2016 19:22:18 +0000
Subject: [PATCH 6/6] Fix case-sensitivity change behaviour. See
https://svn.boost.org/trac/boost/ticket/11205. Update docs to match.
---
doc/history.qbk | 1 +
.../background_information/history.html | 9 +++-
doc/html/index.html | 2 +-
include/boost/regex/v4/perl_matcher.hpp | 2 +
.../boost/regex/v4/perl_matcher_common.hpp | 9 ----
.../regex/v4/perl_matcher_non_recursive.hpp | 43 +++++++++++++++++++
.../boost/regex/v4/perl_matcher_recursive.hpp | 16 +++++++
test/regress/test_perl_ex.cpp | 4 ++
8 files changed, 74 insertions(+), 12 deletions(-)
diff --git a/doc/history.qbk b/doc/history.qbk
index 17231392..17b43f57 100644
--- a/doc/history.qbk
+++ b/doc/history.qbk
@@ -18,6 +18,7 @@ All issues including closed ones can be viewed [@https://svn.boost.org/trac/boos
[h4 Boost.Regex-5.1.1]
* Change to lockfree implementation of memory cache, see [@https://github.com/boostorg/regex/pull/23 PR#23].
+* Fix bug in case sensitivity change, see [@https://svn.boost.org/trac/boost/ticket/11940 #11940].
[h4 Boost.Regex-5.1.0 (Boost-1.60.0)]
diff --git a/doc/html/boost_regex/background_information/history.html b/doc/html/boost_regex/background_information/history.html
index 21b56489..1cf10e32 100644
--- a/doc/html/boost_regex/background_information/history.html
+++ b/doc/html/boost_regex/background_information/history.html
@@ -39,9 +39,14 @@
Boost.Regex-5.1.1
--
+
+-
Change to lockfree implementation of memory cache, see PR#23.
-
+
+-
+ Fix bug in case sensitivity change, see #11940.
+
+
-Last revised: March 12, 2016 at 19:00:00 GMT |
+Last revised: March 14, 2016 at 19:20:20 GMT |
|
diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp
index a7298fbd..96a086b8 100644
--- a/include/boost/regex/v4/perl_matcher.hpp
+++ b/include/boost/regex/v4/perl_matcher.hpp
@@ -537,6 +537,7 @@ private:
bool unwind_recursion_pop(bool);
bool unwind_commit(bool);
bool unwind_then(bool);
+ bool unwind_case(bool);
void destroy_single_repeat();
void push_matched_paren(int index, const sub_match& sub);
void push_recursion_stopper();
@@ -547,6 +548,7 @@ private:
void push_non_greedy_repeat(const re_syntax_base* ps);
void push_recursion(int idx, const re_syntax_base* p, results_type* presults);
void push_recursion_pop();
+ void push_case_change(bool);
// pointer to base of stack:
saved_state* m_stack_base;
diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp
index e60581df..6febff4c 100644
--- a/include/boost/regex/v4/perl_matcher_common.hpp
+++ b/include/boost/regex/v4/perl_matcher_common.hpp
@@ -793,15 +793,6 @@ inline bool perl_matcher::match_assert_backref(
return result;
}
-template
-bool perl_matcher::match_toggle_case()
-{
- // change our case sensitivity:
- this->icase = static_cast(pstate)->icase;
- pstate = pstate->next.p;
- return true;
-}
-
template
bool perl_matcher::match_fail()
{
diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp
index bf77eaa5..aa7af3b4 100644
--- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp
+++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp
@@ -138,6 +138,12 @@ struct saved_recursion : public saved_state
Results results;
};
+struct saved_change_case : public saved_state
+{
+ bool icase;
+ saved_change_case(bool c) : saved_state(18), icase(c) {}
+};
+
template
bool perl_matcher::match_all_states()
{
@@ -242,6 +248,22 @@ inline void perl_matcher::push_matched_paren(in
m_backup_state = pmp;
}
+template
+inline void perl_matcher::push_case_change(bool c)
+{
+ //BOOST_ASSERT(index);
+ saved_change_case* pmp = static_cast(m_backup_state);
+ --pmp;
+ if(pmp < m_stack_base)
+ {
+ extend_stack();
+ pmp = static_cast(m_backup_state);
+ --pmp;
+ }
+ (void) new (pmp)saved_change_case(c);
+ m_backup_state = pmp;
+}
+
template
inline void perl_matcher::push_recursion_stopper()
{
@@ -347,6 +369,16 @@ inline void perl_matcher::push_recursion(int id
m_backup_state = pmp;
}
+template
+bool perl_matcher::match_toggle_case()
+{
+ // change our case sensitivity:
+ push_case_change(this->icase);
+ this->icase = static_cast(pstate)->icase;
+ pstate = pstate->next.p;
+ return true;
+}
+
template
bool perl_matcher::match_startmark()
{
@@ -1142,6 +1174,7 @@ bool perl_matcher::unwind(bool have_match)
&perl_matcher::unwind_recursion_pop,
&perl_matcher::unwind_commit,
&perl_matcher::unwind_then,
+ &perl_matcher::unwind_case,
};
m_recursive_result = have_match;
@@ -1170,6 +1203,16 @@ bool perl_matcher::unwind_end(bool)
return false; // end of stack nothing more to search
}
+template
+bool perl_matcher::unwind_case(bool)
+{
+ saved_change_case* pmp = static_cast(m_backup_state);
+ icase = pmp->icase;
+ boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++);
+ m_backup_state = pmp;
+ return true;
+}
+
template
bool perl_matcher::unwind_paren(bool have_match)
{
diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp
index 3893837d..33b91581 100644
--- a/include/boost/regex/v4/perl_matcher_recursive.hpp
+++ b/include/boost/regex/v4/perl_matcher_recursive.hpp
@@ -289,11 +289,13 @@ bool perl_matcher::match_alt()
BidiIterator oldposition(position);
const re_syntax_base* old_pstate = jmp->alt.p;
pstate = pstate->next.p;
+ bool oldcase = icase;
m_have_then = false;
if(!match_all_states())
{
pstate = old_pstate;
position = oldposition;
+ icase = oldcase;
if(m_have_then)
{
m_can_backtrack = true;
@@ -1036,6 +1038,20 @@ bool perl_matcher::match_then()
return false;
}
+template
+bool perl_matcher::match_toggle_case()
+{
+ // change our case sensitivity:
+ bool oldcase = this->icase;
+ this->icase = static_cast(pstate)->icase;
+ pstate = pstate->next.p;
+ bool result = match_all_states();
+ this->icase = oldcase;
+ return result;
+}
+
+
+
template
bool perl_matcher::skip_until_paren(int index, bool have_match)
{
diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp
index 2f081e2d..73125419 100644
--- a/test/regress/test_perl_ex.cpp
+++ b/test/regress/test_perl_ex.cpp
@@ -245,6 +245,10 @@ void test_options2()
TEST_REGEX_SEARCH("a(?i:b)*c", perl, "aBBc", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("a(?i:b)*c", perl, "aBC", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a(?i:b)*c", perl, "aBBC", match_default, make_array(-2, -2));
+ TEST_REGEX_SEARCH("(?i:j)|h", perl, "J", match_default, make_array(0, 1, -2, -2));
+ TEST_REGEX_SEARCH("(?i:j)|h", perl, "j", match_default, make_array(0, 1, -2, -2));
+ TEST_REGEX_SEARCH("(?i:j)|h", perl, "h", match_default, make_array(0, 1, -2, -2));
+ TEST_REGEX_SEARCH("(?i:j)|h", perl, "H", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a(?=b(?i)c)\\w\\wd", perl, "abcd", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("a(?=b(?i)c)\\w\\wd", perl, "abCd", match_default, make_array(0, 4, -2, -2));