forked from boostorg/regex
Merge minor fixes from Trunk.
[SVN r83498]
This commit is contained in:
@ -405,7 +405,8 @@
|
|||||||
characters</a>
|
characters</a>
|
||||||
</h6>
|
</h6>
|
||||||
<p>
|
<p>
|
||||||
For example =[abc]=, will match any of the characters 'a', 'b', or 'c'.
|
For example <code class="literal">[abc]</code>, will match any of the characters 'a',
|
||||||
|
'b', or 'c'.
|
||||||
</p>
|
</p>
|
||||||
<h6>
|
<h6>
|
||||||
<a name="boost_regex.syntax.perl_syntax.h13"></a>
|
<a name="boost_regex.syntax.perl_syntax.h13"></a>
|
||||||
@ -413,10 +414,11 @@
|
|||||||
ranges</a>
|
ranges</a>
|
||||||
</h6>
|
</h6>
|
||||||
<p>
|
<p>
|
||||||
For example =[a-c]= will match any single character in the range 'a' to 'c'.
|
For example <code class="literal">[a-c]</code> will match any single character in the
|
||||||
By default, for Perl regular expressions, a character x is within the range
|
range 'a' to 'c'. By default, for Perl regular expressions, a character x
|
||||||
y to z, if the code point of the character lies within the codepoints of
|
is within the range y to z, if the code point of the character lies within
|
||||||
the endpoints of the range. Alternatively, if you set the <a class="link" href="../ref/syntax_option_type/syntax_option_type_perl.html" title="Options for Perl Regular Expressions"><code class="literal">collate</code>
|
the codepoints of the endpoints of the range. Alternatively, if you set the
|
||||||
|
<a class="link" href="../ref/syntax_option_type/syntax_option_type_perl.html" title="Options for Perl Regular Expressions"><code class="literal">collate</code>
|
||||||
flag</a> when constructing the regular expression, then ranges are locale
|
flag</a> when constructing the regular expression, then ranges are locale
|
||||||
sensitive.
|
sensitive.
|
||||||
</p>
|
</p>
|
||||||
@ -426,7 +428,7 @@
|
|||||||
</h6>
|
</h6>
|
||||||
<p>
|
<p>
|
||||||
If the bracket-expression begins with the ^ character, then it matches the
|
If the bracket-expression begins with the ^ character, then it matches the
|
||||||
complement of the characters it contains, for example =<code class="literal">a-c</code>=
|
complement of the characters it contains, for example <code class="literal">[^a-c]</code>
|
||||||
matches any character that is not in the range <code class="literal">a-c</code>.
|
matches any character that is not in the range <code class="literal">a-c</code>.
|
||||||
</p>
|
</p>
|
||||||
<h6>
|
<h6>
|
||||||
|
@ -198,7 +198,7 @@
|
|||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
|
<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
|
||||||
<td align="left"><p><small>Last revised: November 29, 2012 at 10:43:51 GMT</small></p></td>
|
<td align="left"><p><small>Last revised: January 31, 2013 at 17:33:20 GMT</small></p></td>
|
||||||
<td align="right"><div class="copyright-footer"></div></td>
|
<td align="right"><div class="copyright-footer"></div></td>
|
||||||
</tr></table>
|
</tr></table>
|
||||||
<hr>
|
<hr>
|
||||||
|
@ -218,11 +218,11 @@ A bracket expression may contain any combination of the following:
|
|||||||
|
|
||||||
[h5 Single characters]
|
[h5 Single characters]
|
||||||
|
|
||||||
For example =[abc]=, will match any of the characters 'a', 'b', or 'c'.
|
For example [^\[abc\]], will match any of the characters 'a', 'b', or 'c'.
|
||||||
|
|
||||||
[h5 Character ranges]
|
[h5 Character ranges]
|
||||||
|
|
||||||
For example =[a-c]= will match any single character in the range 'a' to 'c'.
|
For example [^\[a-c\]] will match any single character in the range 'a' to 'c'.
|
||||||
By default, for Perl regular expressions, a character x is within the
|
By default, for Perl regular expressions, a character x is within the
|
||||||
range y to z, if the code point of the character lies within the codepoints of
|
range y to z, if the code point of the character lies within the codepoints of
|
||||||
the endpoints of the range. Alternatively, if you set the
|
the endpoints of the range. Alternatively, if you set the
|
||||||
@ -232,7 +232,7 @@ when constructing the regular expression, then ranges are locale sensitive.
|
|||||||
[h5 Negation]
|
[h5 Negation]
|
||||||
|
|
||||||
If the bracket-expression begins with the ^ character, then it matches the
|
If the bracket-expression begins with the ^ character, then it matches the
|
||||||
complement of the characters it contains, for example =[^a-c]= matches
|
complement of the characters it contains, for example [^\[^a-c\]] matches
|
||||||
any character that is not in the range =a-c=.
|
any character that is not in the range =a-c=.
|
||||||
|
|
||||||
[h5 Character classes]
|
[h5 Character classes]
|
||||||
|
@ -1107,7 +1107,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
|
|||||||
}
|
}
|
||||||
// get the value if any:
|
// get the value if any:
|
||||||
v = this->m_traits.toi(m_position, m_end, 10);
|
v = this->m_traits.toi(m_position, m_end, 10);
|
||||||
max = (v >= 0) ? v : (std::numeric_limits<std::size_t>::max)();
|
max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -65,6 +65,8 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
|
|||||||
m_match_flags |= match_perl;
|
m_match_flags |= match_perl;
|
||||||
else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
|
else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
|
||||||
m_match_flags |= match_perl;
|
m_match_flags |= match_perl;
|
||||||
|
else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
|
||||||
|
m_match_flags |= match_perl;
|
||||||
else
|
else
|
||||||
m_match_flags |= match_posix;
|
m_match_flags |= match_posix;
|
||||||
}
|
}
|
||||||
@ -326,6 +328,10 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
|
|||||||
m_has_found_match = true;
|
m_has_found_match = true;
|
||||||
m_presult->set_second(last, 0, false);
|
m_presult->set_second(last, 0, false);
|
||||||
position = last;
|
position = last;
|
||||||
|
if((m_match_flags & match_posix) == match_posix)
|
||||||
|
{
|
||||||
|
m_result.maybe_assign(*m_presult);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#ifdef BOOST_REGEX_MATCH_EXTRA
|
#ifdef BOOST_REGEX_MATCH_EXTRA
|
||||||
if(m_has_found_match && (match_extra & m_match_flags))
|
if(m_has_found_match && (match_extra & m_match_flags))
|
||||||
|
@ -3,12 +3,12 @@
|
|||||||
* Copyright (c) 2004
|
* Copyright (c) 2004
|
||||||
* John Maddock
|
* John Maddock
|
||||||
*
|
*
|
||||||
* Use, modification and distribution are subject to the
|
* Use, modification and distribution are subject to the
|
||||||
* Boost Software License, Version 1.0. (See accompanying file
|
* Boost Software License, Version 1.0. (See accompanying file
|
||||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LOCATION: see http://www.boost.org for most recent version.
|
* LOCATION: see http://www.boost.org for most recent version.
|
||||||
* FILE regex_traits_defaults.hpp
|
* FILE regex_traits_defaults.hpp
|
||||||
@ -85,7 +85,7 @@ inline bool is_combining<unsigned char>(unsigned char)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives
|
#if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
template<>
|
template<>
|
||||||
inline bool is_combining<wchar_t>(wchar_t c)
|
inline bool is_combining<wchar_t>(wchar_t c)
|
||||||
{
|
{
|
||||||
@ -115,11 +115,11 @@ template <class charT>
|
|||||||
inline bool is_separator(charT c)
|
inline bool is_separator(charT c)
|
||||||
{
|
{
|
||||||
return BOOST_REGEX_MAKE_BOOL(
|
return BOOST_REGEX_MAKE_BOOL(
|
||||||
(c == static_cast<charT>('\n'))
|
(c == static_cast<charT>('\n'))
|
||||||
|| (c == static_cast<charT>('\r'))
|
|| (c == static_cast<charT>('\r'))
|
||||||
|| (c == static_cast<charT>('\f'))
|
|| (c == static_cast<charT>('\f'))
|
||||||
|| (static_cast<boost::uint16_t>(c) == 0x2028u)
|
|| (static_cast<boost::uint16_t>(c) == 0x2028u)
|
||||||
|| (static_cast<boost::uint16_t>(c) == 0x2029u)
|
|| (static_cast<boost::uint16_t>(c) == 0x2029u)
|
||||||
|| (static_cast<boost::uint16_t>(c) == 0x85u));
|
|| (static_cast<boost::uint16_t>(c) == 0x85u));
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
@ -177,7 +177,7 @@ int get_default_class_id(const charT* p1, const charT* p2)
|
|||||||
'x', 'd', 'i', 'g', 'i', 't',
|
'x', 'd', 'i', 'g', 'i', 't',
|
||||||
};
|
};
|
||||||
|
|
||||||
static const character_pointer_range<charT> ranges[21] =
|
static const character_pointer_range<charT> ranges[21] =
|
||||||
{
|
{
|
||||||
{data+0, data+5,}, // alnum
|
{data+0, data+5,}, // alnum
|
||||||
{data+5, data+10,}, // alpha
|
{data+5, data+10,}, // alpha
|
||||||
@ -203,7 +203,7 @@ int get_default_class_id(const charT* p1, const charT* p2)
|
|||||||
};
|
};
|
||||||
static const character_pointer_range<charT>* ranges_begin = ranges;
|
static const character_pointer_range<charT>* ranges_begin = ranges;
|
||||||
static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
|
static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
|
||||||
|
|
||||||
character_pointer_range<charT> t = { p1, p2, };
|
character_pointer_range<charT> t = { p1, p2, };
|
||||||
const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
|
const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
|
||||||
if((p != ranges_end) && (t == *p))
|
if((p != ranges_end) && (t == *p))
|
||||||
@ -324,15 +324,15 @@ inline const charT* get_escape_R_string()
|
|||||||
# pragma warning(push)
|
# pragma warning(push)
|
||||||
# pragma warning(disable:4309 4245)
|
# pragma warning(disable:4309 4245)
|
||||||
#endif
|
#endif
|
||||||
static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
||||||
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}',
|
'|', '[', '\x0A', '\x0B', '\x0C', static_cast<unsigned char>('\x85'), '\\', 'x', '{', '2', '0', '2', '8', '}',
|
||||||
'\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' };
|
'\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' };
|
||||||
static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
||||||
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
|
'|', '[', '\x0A', '\x0B', '\x0C', static_cast<unsigned char>('\x85'), ']', ')', '\0' };
|
||||||
|
|
||||||
charT c = static_cast<charT>(0x2029u);
|
charT c = static_cast<charT>(0x2029u);
|
||||||
bool b = (static_cast<unsigned>(c) == 0x2029u);
|
bool b = (static_cast<unsigned>(c) == 0x2029u);
|
||||||
|
|
||||||
return (b ? e1 : e2);
|
return (b ? e1 : e2);
|
||||||
#ifdef BOOST_MSVC
|
#ifdef BOOST_MSVC
|
||||||
# pragma warning(pop)
|
# pragma warning(pop)
|
||||||
@ -346,7 +346,7 @@ inline const char* get_escape_R_string<char>()
|
|||||||
# pragma warning(push)
|
# pragma warning(push)
|
||||||
# pragma warning(disable:4309)
|
# pragma warning(disable:4309)
|
||||||
#endif
|
#endif
|
||||||
static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
||||||
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
|
'|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
|
||||||
return e2;
|
return e2;
|
||||||
#ifdef BOOST_MSVC
|
#ifdef BOOST_MSVC
|
||||||
|
@ -134,6 +134,17 @@ void test_partial_match()
|
|||||||
TEST_REGEX_SEARCH("a*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
|
TEST_REGEX_SEARCH("a*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
|
||||||
TEST_REGEX_SEARCH("\\w*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
|
TEST_REGEX_SEARCH("\\w*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
|
||||||
TEST_REGEX_SEARCH("(\\w)*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
|
TEST_REGEX_SEARCH("(\\w)*?<tag>", perl, "aaa", match_default|match_partial, make_array(0, 3, -2, -2));
|
||||||
|
|
||||||
|
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xyzaaab", match_default|match_partial, make_array(0, 7, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xyz", match_default|match_partial, make_array(0, 3, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "xy", match_default|match_partial, make_array(0, 2, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "x", match_default|match_partial, make_array(0, 1, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "", match_default|match_partial, make_array(-2, -2));
|
||||||
|
TEST_REGEX_SEARCH("(xyz)(.*)abc", extended, "aaaa", match_default|match_partial, make_array(-2, -2));
|
||||||
|
TEST_REGEX_SEARCH(".abc", extended, "aaab", match_default|match_partial, make_array(1, 4, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("a[_]", extended, "xxa", match_default|match_partial, make_array(2, 3, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH(".{4,}", extended, "xxa", match_default|match_partial, make_array(0, 3, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH(".{4,}", extended, "xxa", match_default|match_partial|match_not_dot_null, make_array(0, 3, -2, -2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_nosubs()
|
void test_nosubs()
|
||||||
|
Reference in New Issue
Block a user