Merge minor fixes from Trunk.

[SVN r83498]
This commit is contained in:
John Maddock
2013-03-19 18:48:24 +00:00
parent e70d3b6b4e
commit f6870ad64a
7 changed files with 47 additions and 28 deletions

View File

@ -405,7 +405,8 @@
characters</a> characters</a>
</h6> </h6>
<p> <p>
For example =[abc]=, will match any of the characters 'a', 'b', or 'c'. For example <code class="literal">[abc]</code>, will match any of the characters 'a',
'b', or 'c'.
</p> </p>
<h6> <h6>
<a name="boost_regex.syntax.perl_syntax.h13"></a> <a name="boost_regex.syntax.perl_syntax.h13"></a>
@ -413,10 +414,11 @@
ranges</a> ranges</a>
</h6> </h6>
<p> <p>
For example =[a-c]= will match any single character in the range 'a' to 'c'. For example <code class="literal">[a-c]</code> will match any single character in the
By default, for Perl regular expressions, a character x is within the range range 'a' to 'c'. By default, for Perl regular expressions, a character x
y to z, if the code point of the character lies within the codepoints of is within the range y to z, if the code point of the character lies within
the endpoints of the range. Alternatively, if you set the <a class="link" href="../ref/syntax_option_type/syntax_option_type_perl.html" title="Options for Perl Regular Expressions"><code class="literal">collate</code> the codepoints of the endpoints of the range. Alternatively, if you set the
<a class="link" href="../ref/syntax_option_type/syntax_option_type_perl.html" title="Options for Perl Regular Expressions"><code class="literal">collate</code>
flag</a> when constructing the regular expression, then ranges are locale flag</a> when constructing the regular expression, then ranges are locale
sensitive. sensitive.
</p> </p>
@ -426,7 +428,7 @@
</h6> </h6>
<p> <p>
If the bracket-expression begins with the ^ character, then it matches the If the bracket-expression begins with the ^ character, then it matches the
complement of the characters it contains, for example =<code class="literal">a-c</code>= complement of the characters it contains, for example <code class="literal">[^a-c]</code>
matches any character that is not in the range <code class="literal">a-c</code>. matches any character that is not in the range <code class="literal">a-c</code>.
</p> </p>
<h6> <h6>

View File

@ -198,7 +198,7 @@
</p> </p>
</div> </div>
<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr> <table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
<td align="left"><p><small>Last revised: November 29, 2012 at 10:43:51 GMT</small></p></td> <td align="left"><p><small>Last revised: January 31, 2013 at 17:33:20 GMT</small></p></td>
<td align="right"><div class="copyright-footer"></div></td> <td align="right"><div class="copyright-footer"></div></td>
</tr></table> </tr></table>
<hr> <hr>

View File

@ -218,11 +218,11 @@ A bracket expression may contain any combination of the following:
[h5 Single characters] [h5 Single characters]
For example =[abc]=, will match any of the characters 'a', 'b', or 'c'. For example [^\[abc\]], will match any of the characters 'a', 'b', or 'c'.
[h5 Character ranges] [h5 Character ranges]
For example =[a-c]= will match any single character in the range 'a' to 'c'. For example [^\[a-c\]] will match any single character in the range 'a' to 'c'.
By default, for Perl regular expressions, a character x is within the By default, for Perl regular expressions, a character x is within the
range y to z, if the code point of the character lies within the codepoints of range y to z, if the code point of the character lies within the codepoints of
the endpoints of the range. Alternatively, if you set the the endpoints of the range. Alternatively, if you set the
@ -232,7 +232,7 @@ when constructing the regular expression, then ranges are locale sensitive.
[h5 Negation] [h5 Negation]
If the bracket-expression begins with the ^ character, then it matches the If the bracket-expression begins with the ^ character, then it matches the
complement of the characters it contains, for example =[^a-c]= matches complement of the characters it contains, for example [^\[^a-c\]] matches
any character that is not in the range =a-c=. any character that is not in the range =a-c=.
[h5 Character classes] [h5 Character classes]

View File

@ -1107,7 +1107,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
} }
// get the value if any: // get the value if any:
v = this->m_traits.toi(m_position, m_end, 10); v = this->m_traits.toi(m_position, m_end, 10);
max = (v >= 0) ? v : (std::numeric_limits<std::size_t>::max)(); max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
} }
else else
{ {

View File

@ -65,6 +65,8 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
m_match_flags |= match_perl; m_match_flags |= match_perl;
else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
m_match_flags |= match_perl; m_match_flags |= match_perl;
else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
m_match_flags |= match_perl;
else else
m_match_flags |= match_posix; m_match_flags |= match_posix;
} }
@ -326,6 +328,10 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
m_has_found_match = true; m_has_found_match = true;
m_presult->set_second(last, 0, false); m_presult->set_second(last, 0, false);
position = last; position = last;
if((m_match_flags & match_posix) == match_posix)
{
m_result.maybe_assign(*m_presult);
}
} }
#ifdef BOOST_REGEX_MATCH_EXTRA #ifdef BOOST_REGEX_MATCH_EXTRA
if(m_has_found_match && (match_extra & m_match_flags)) if(m_has_found_match && (match_extra & m_match_flags))

View File

@ -3,12 +3,12 @@
* Copyright (c) 2004 * Copyright (c) 2004
* John Maddock * John Maddock
* *
* Use, modification and distribution are subject to the * Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file * Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
* *
*/ */
/* /*
* LOCATION: see http://www.boost.org for most recent version. * LOCATION: see http://www.boost.org for most recent version.
* FILE regex_traits_defaults.hpp * FILE regex_traits_defaults.hpp
@ -85,7 +85,7 @@ inline bool is_combining<unsigned char>(unsigned char)
return false; return false;
} }
#if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives
#ifdef _MSC_VER #ifdef _MSC_VER
template<> template<>
inline bool is_combining<wchar_t>(wchar_t c) inline bool is_combining<wchar_t>(wchar_t c)
{ {
@ -115,11 +115,11 @@ template <class charT>
inline bool is_separator(charT c) inline bool is_separator(charT c)
{ {
return BOOST_REGEX_MAKE_BOOL( return BOOST_REGEX_MAKE_BOOL(
(c == static_cast<charT>('\n')) (c == static_cast<charT>('\n'))
|| (c == static_cast<charT>('\r')) || (c == static_cast<charT>('\r'))
|| (c == static_cast<charT>('\f')) || (c == static_cast<charT>('\f'))
|| (static_cast<boost::uint16_t>(c) == 0x2028u) || (static_cast<boost::uint16_t>(c) == 0x2028u)
|| (static_cast<boost::uint16_t>(c) == 0x2029u) || (static_cast<boost::uint16_t>(c) == 0x2029u)
|| (static_cast<boost::uint16_t>(c) == 0x85u)); || (static_cast<boost::uint16_t>(c) == 0x85u));
} }
template <> template <>
@ -177,7 +177,7 @@ int get_default_class_id(const charT* p1, const charT* p2)
'x', 'd', 'i', 'g', 'i', 't', 'x', 'd', 'i', 'g', 'i', 't',
}; };
static const character_pointer_range<charT> ranges[21] = static const character_pointer_range<charT> ranges[21] =
{ {
{data+0, data+5,}, // alnum {data+0, data+5,}, // alnum
{data+5, data+10,}, // alpha {data+5, data+10,}, // alpha
@ -203,7 +203,7 @@ int get_default_class_id(const charT* p1, const charT* p2)
}; };
static const character_pointer_range<charT>* ranges_begin = ranges; static const character_pointer_range<charT>* ranges_begin = ranges;
static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
character_pointer_range<charT> t = { p1, p2, }; character_pointer_range<charT> t = { p1, p2, };
const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t); const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
if((p != ranges_end) && (t == *p)) if((p != ranges_end) && (t == *p))
@ -324,15 +324,15 @@ inline const charT* get_escape_R_string()
# pragma warning(push) # pragma warning(push)
# pragma warning(disable:4309 4245) # pragma warning(disable:4309 4245)
#endif #endif
static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?',
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}', '|', '[', '\x0A', '\x0B', '\x0C', static_cast<unsigned char>('\x85'), '\\', 'x', '{', '2', '0', '2', '8', '}',
'\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' }; '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' };
static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; '|', '[', '\x0A', '\x0B', '\x0C', static_cast<unsigned char>('\x85'), ']', ')', '\0' };
charT c = static_cast<charT>(0x2029u); charT c = static_cast<charT>(0x2029u);
bool b = (static_cast<unsigned>(c) == 0x2029u); bool b = (static_cast<unsigned>(c) == 0x2029u);
return (b ? e1 : e2); return (b ? e1 : e2);
#ifdef BOOST_MSVC #ifdef BOOST_MSVC
# pragma warning(pop) # pragma warning(pop)
@ -346,7 +346,7 @@ inline const char* get_escape_R_string<char>()
# pragma warning(push) # pragma warning(push)
# pragma warning(disable:4309) # pragma warning(disable:4309)
#endif #endif
static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
return e2; return e2;
#ifdef BOOST_MSVC #ifdef BOOST_MSVC

View File

@ -134,6 +134,17 @@ void test_partial_match()
TEST_REGEX_SEARCH("a*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2)); TEST_REGEX_SEARCH("a*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("\\w*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2)); TEST_REGEX_SEARCH("\\w*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("(\\w)*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2)); TEST_REGEX_SEARCH("(\\w)*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xyzaaab", match_default|match_partial, make_array(0, 7, -2, -2));
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xyz", match_default|match_partial, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xy", match_default|match_partial, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "x", match_default|match_partial, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "", match_default|match_partial, make_array(-2, -2));
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "aaaa", match_default|match_partial, make_array(-2, -2));
TEST_REGEX_SEARCH(".abc", extended, "aaab", match_default|match_partial, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("a[_]", extended, "xxa", match_default|match_partial, make_array(2, 3, -2, -2));
TEST_REGEX_SEARCH(".{4,}", extended, "xxa", match_default|match_partial, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH(".{4,}", extended, "xxa", match_default|match_partial|match_not_dot_null, make_array(0, 3, -2, -2));
} }
void test_nosubs() void test_nosubs()