mirror of
https://github.com/boostorg/regex.git
synced 2025-07-22 16:47:17 +02:00
Added possessive modifiers ++ *+ ?+ {}+.
Added support for \v and \h as character classes as per Perl-5.10. [SVN r52558]
This commit is contained in:
@ -184,7 +184,9 @@ private:
|
||||
offset_underscore = U_CHAR_CATEGORY_COUNT+3,
|
||||
offset_unicode = U_CHAR_CATEGORY_COUNT+4,
|
||||
offset_any = U_CHAR_CATEGORY_COUNT+5,
|
||||
offset_ascii = U_CHAR_CATEGORY_COUNT+6
|
||||
offset_ascii = U_CHAR_CATEGORY_COUNT+6,
|
||||
offset_horizontal = U_CHAR_CATEGORY_COUNT+7,
|
||||
offset_vertical = U_CHAR_CATEGORY_COUNT+8
|
||||
};
|
||||
|
||||
//
|
||||
@ -197,6 +199,8 @@ private:
|
||||
static const char_class_type mask_unicode;
|
||||
static const char_class_type mask_any;
|
||||
static const char_class_type mask_ascii;
|
||||
static const char_class_type mask_horizontal;
|
||||
static const char_class_type mask_vertical;
|
||||
|
||||
static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2);
|
||||
|
||||
|
@ -610,6 +610,7 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape()
|
||||
// fall through:
|
||||
case regex_constants::escape_type_class:
|
||||
{
|
||||
escape_type_class_jump:
|
||||
typedef typename traits::char_class_type mask_type;
|
||||
mask_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
||||
if(m != 0)
|
||||
@ -720,6 +721,10 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape()
|
||||
}
|
||||
fail(regex_constants::error_ctype, m_position - m_base);
|
||||
}
|
||||
case regex_constants::escape_type_control_v:
|
||||
if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
|
||||
goto escape_type_class_jump;
|
||||
// fallthrough:
|
||||
default:
|
||||
this->append_literal(unescape_character());
|
||||
break;
|
||||
@ -747,6 +752,7 @@ template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
|
||||
{
|
||||
bool greedy = true;
|
||||
bool pocessive = false;
|
||||
std::size_t insert_point;
|
||||
//
|
||||
// when we get to here we may have a non-greedy ? mark still to come:
|
||||
@ -758,12 +764,19 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
|
||||
)
|
||||
)
|
||||
{
|
||||
// OK we have a perl regex, check for a '?':
|
||||
// OK we have a perl or emacs regex, check for a '?':
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
|
||||
{
|
||||
greedy = false;
|
||||
++m_position;
|
||||
}
|
||||
// for perl regexes only check for pocessive ++ repeats.
|
||||
if((0 == (this->flags() & regbase::main_option_type))
|
||||
&& (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
|
||||
{
|
||||
pocessive = true;
|
||||
++m_position;
|
||||
}
|
||||
}
|
||||
if(0 == this->m_last_state)
|
||||
{
|
||||
@ -832,6 +845,20 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
|
||||
// now fill in the alt jump for the repeat:
|
||||
rep = static_cast<re_repeat*>(this->getaddress(rep_off));
|
||||
rep->alt.i = this->m_pdata->m_data.size() - rep_off;
|
||||
//
|
||||
// If the repeat is pocessive then bracket the repeat with a (?>...)
|
||||
// independent sub-expression construct:
|
||||
//
|
||||
if(pocessive)
|
||||
{
|
||||
re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
|
||||
pb->index = -3;
|
||||
re_jump* jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
|
||||
this->m_pdata->m_data.align();
|
||||
jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
|
||||
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
||||
pb->index = -3;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -394,7 +394,9 @@ enum
|
||||
char_class_graph=char_class_alnum|char_class_punct,
|
||||
char_class_blank=1<<9,
|
||||
char_class_word=1<<10,
|
||||
char_class_unicode=1<<11
|
||||
char_class_unicode=1<<11,
|
||||
char_class_horizontal_space=1<<12,
|
||||
char_class_vertical_space=1<<13
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -413,6 +415,8 @@ public:
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 24);
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 25);
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 26);
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 1u << 27);
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 1u << 28);
|
||||
#endif
|
||||
|
||||
typedef std::basic_string<charT> string_type;
|
||||
@ -477,6 +481,10 @@ template <class charT>
|
||||
typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_word;
|
||||
template <class charT>
|
||||
typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_unicode;
|
||||
template <class charT>
|
||||
typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_vertical;
|
||||
template <class charT>
|
||||
typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_horizontal;
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -688,18 +696,20 @@ void cpp_regex_traits_implementation<charT>::init()
|
||||
// Custom class names:
|
||||
//
|
||||
#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
|
||||
static const char_class_type masks[14] =
|
||||
static const char_class_type masks[16] =
|
||||
{
|
||||
std::ctype<charT>::alnum,
|
||||
std::ctype<charT>::alpha,
|
||||
std::ctype<charT>::cntrl,
|
||||
std::ctype<charT>::digit,
|
||||
std::ctype<charT>::graph,
|
||||
cpp_regex_traits_implementation<charT>::mask_horizontal,
|
||||
std::ctype<charT>::lower,
|
||||
std::ctype<charT>::print,
|
||||
std::ctype<charT>::punct,
|
||||
std::ctype<charT>::space,
|
||||
std::ctype<charT>::upper,
|
||||
cpp_regex_traits_implementation<charT>::mask_vertical,
|
||||
std::ctype<charT>::xdigit,
|
||||
cpp_regex_traits_implementation<charT>::mask_blank,
|
||||
cpp_regex_traits_implementation<charT>::mask_word,
|
||||
@ -713,11 +723,13 @@ void cpp_regex_traits_implementation<charT>::init()
|
||||
::boost::re_detail::char_class_cntrl,
|
||||
::boost::re_detail::char_class_digit,
|
||||
::boost::re_detail::char_class_graph,
|
||||
::boost::re_detail::char_class_horizontal_space,
|
||||
::boost::re_detail::char_class_lower,
|
||||
::boost::re_detail::char_class_print,
|
||||
::boost::re_detail::char_class_punct,
|
||||
::boost::re_detail::char_class_space,
|
||||
::boost::re_detail::char_class_upper,
|
||||
::boost::re_detail::char_class_vertical_space,
|
||||
::boost::re_detail::char_class_xdigit,
|
||||
::boost::re_detail::char_class_blank,
|
||||
::boost::re_detail::char_class_word,
|
||||
@ -744,7 +756,7 @@ typename cpp_regex_traits_implementation<charT>::char_class_type
|
||||
cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
|
||||
{
|
||||
#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
|
||||
static const char_class_type masks[20] =
|
||||
static const char_class_type masks[22] =
|
||||
{
|
||||
0,
|
||||
std::ctype<char>::alnum,
|
||||
@ -754,6 +766,7 @@ typename cpp_regex_traits_implementation<charT>::char_class_type
|
||||
std::ctype<char>::digit,
|
||||
std::ctype<char>::digit,
|
||||
std::ctype<char>::graph,
|
||||
cpp_regex_traits_implementation<charT>::mask_horizontal,
|
||||
std::ctype<char>::lower,
|
||||
std::ctype<char>::lower,
|
||||
std::ctype<char>::print,
|
||||
@ -763,12 +776,13 @@ typename cpp_regex_traits_implementation<charT>::char_class_type
|
||||
std::ctype<char>::upper,
|
||||
cpp_regex_traits_implementation<charT>::mask_unicode,
|
||||
std::ctype<char>::upper,
|
||||
cpp_regex_traits_implementation<charT>::mask_vertical,
|
||||
std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
|
||||
std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
|
||||
std::ctype<char>::xdigit,
|
||||
};
|
||||
#else
|
||||
static const char_class_type masks[20] =
|
||||
static const char_class_type masks[22] =
|
||||
{
|
||||
0,
|
||||
::boost::re_detail::char_class_alnum,
|
||||
@ -778,6 +792,7 @@ typename cpp_regex_traits_implementation<charT>::char_class_type
|
||||
::boost::re_detail::char_class_digit,
|
||||
::boost::re_detail::char_class_digit,
|
||||
::boost::re_detail::char_class_graph,
|
||||
::boost::re_detail::char_class_horizontal_space,
|
||||
::boost::re_detail::char_class_lower,
|
||||
::boost::re_detail::char_class_lower,
|
||||
::boost::re_detail::char_class_print,
|
||||
@ -787,6 +802,7 @@ typename cpp_regex_traits_implementation<charT>::char_class_type
|
||||
::boost::re_detail::char_class_upper,
|
||||
::boost::re_detail::char_class_unicode,
|
||||
::boost::re_detail::char_class_upper,
|
||||
::boost::re_detail::char_class_vertical_space,
|
||||
::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word,
|
||||
::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word,
|
||||
::boost::re_detail::char_class_xdigit,
|
||||
@ -820,7 +836,9 @@ bool cpp_regex_traits_implementation<charT>::isctype(const charT c, char_class_t
|
||||
|| ((mask & ::boost::re_detail::char_class_xdigit) && (m_pctype->is(std::ctype<charT>::xdigit, c)))
|
||||
|| ((mask & ::boost::re_detail::char_class_blank) && (m_pctype->is(std::ctype<charT>::space, c)) && !::boost::re_detail::is_separator(c))
|
||||
|| ((mask & ::boost::re_detail::char_class_word) && (c == '_'))
|
||||
|| ((mask & ::boost::re_detail::char_class_unicode) && ::boost::re_detail::is_extended(c));
|
||||
|| ((mask & ::boost::re_detail::char_class_unicode) && ::boost::re_detail::is_extended(c))
|
||||
|| ((mask & ::boost::re_detail::char_class_vertical) && (is_separator(c) || (c == '\v')))
|
||||
|| ((mask & ::boost::re_detail::char_class_horizontal) && m_pctype->is(std::ctype<charT>::space, c) && !(is_separator(c) || (c == '\v')));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -930,6 +948,12 @@ public:
|
||||
&& m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
|
||||
&& !re_detail::is_separator(c))
|
||||
return true;
|
||||
else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_vertical)
|
||||
&& (::boost::re_detail::is_separator(c) || (c == '\v')))
|
||||
return true;
|
||||
else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_horizontal)
|
||||
&& this->isctype(c, std::ctype<charT>::space) && !this->isctype(c, re_detail::cpp_regex_traits_implementation<charT>::mask_vertical))
|
||||
return true;
|
||||
return false;
|
||||
#else
|
||||
return m_pimpl->isctype(c, f);
|
||||
|
@ -159,7 +159,7 @@ struct character_pointer_range
|
||||
template <class charT>
|
||||
int get_default_class_id(const charT* p1, const charT* p2)
|
||||
{
|
||||
static const charT data[72] = {
|
||||
static const charT data[73] = {
|
||||
'a', 'l', 'n', 'u', 'm',
|
||||
'a', 'l', 'p', 'h', 'a',
|
||||
'b', 'l', 'a', 'n', 'k',
|
||||
@ -172,11 +172,12 @@ int get_default_class_id(const charT* p1, const charT* p2)
|
||||
's', 'p', 'a', 'c', 'e',
|
||||
'u', 'n', 'i', 'c', 'o', 'd', 'e',
|
||||
'u', 'p', 'p', 'e', 'r',
|
||||
'v',
|
||||
'w', 'o', 'r', 'd',
|
||||
'x', 'd', 'i', 'g', 'i', 't',
|
||||
};
|
||||
|
||||
static const character_pointer_range<charT> ranges[19] =
|
||||
static const character_pointer_range<charT> ranges[21] =
|
||||
{
|
||||
{data+0, data+5,}, // alnum
|
||||
{data+5, data+10,}, // alpha
|
||||
@ -185,6 +186,7 @@ int get_default_class_id(const charT* p1, const charT* p2)
|
||||
{data+20, data+21,}, // d
|
||||
{data+20, data+25,}, // digit
|
||||
{data+25, data+30,}, // graph
|
||||
{data+29, data+30,}, // h
|
||||
{data+30, data+31,}, // l
|
||||
{data+30, data+35,}, // lower
|
||||
{data+35, data+40,}, // print
|
||||
@ -194,9 +196,10 @@ int get_default_class_id(const charT* p1, const charT* p2)
|
||||
{data+57, data+58,}, // u
|
||||
{data+50, data+57,}, // unicode
|
||||
{data+57, data+62,}, // upper
|
||||
{data+62, data+63,}, // w
|
||||
{data+62, data+66,}, // word
|
||||
{data+66, data+72,}, // xdigit
|
||||
{data+62, data+63,}, // v
|
||||
{data+63, data+64,}, // w
|
||||
{data+63, data+67,}, // word
|
||||
{data+67, data+73,}, // xdigit
|
||||
};
|
||||
static const character_pointer_range<charT>* ranges_begin = ranges;
|
||||
static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
|
||||
|
@ -294,6 +294,8 @@ public:
|
||||
typedef typename w32_regex_traits<charT>::char_class_type char_class_type;
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_word = 0x0400); // must be C1_DEFINED << 1
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 0x0800); // must be C1_DEFINED << 2
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 0x1000); // must be C1_DEFINED << 3
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 0x2000); // must be C1_DEFINED << 4
|
||||
BOOST_STATIC_CONSTANT(char_class_type, mask_base = 0x3ff); // all the masks used by the CT_CTYPE1 group
|
||||
|
||||
typedef std::basic_string<charT> string_type;
|
||||
@ -510,7 +512,7 @@ template <class charT>
|
||||
typename w32_regex_traits_implementation<charT>::char_class_type
|
||||
w32_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
|
||||
{
|
||||
static const char_class_type masks[20] =
|
||||
static const char_class_type masks[22] =
|
||||
{
|
||||
0,
|
||||
0x0104u, // C1_ALPHA | C1_DIGIT
|
||||
@ -520,6 +522,7 @@ typename w32_regex_traits_implementation<charT>::char_class_type
|
||||
0x0004u, // C1_DIGIT
|
||||
0x0004u, // C1_DIGIT
|
||||
(~(0x0020u|0x0008u|0x0040) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE or C1_BLANK
|
||||
w32_regex_traits_implementation<charT>::mask_horizontal,
|
||||
0x0002u, // C1_LOWER
|
||||
0x0002u, // C1_LOWER
|
||||
(~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL
|
||||
@ -529,6 +532,7 @@ typename w32_regex_traits_implementation<charT>::char_class_type
|
||||
0x0001u, // C1_UPPER
|
||||
w32_regex_traits_implementation<charT>::mask_unicode,
|
||||
0x0001u, // C1_UPPER
|
||||
w32_regex_traits_implementation<charT>::mask_vertical,
|
||||
0x0104u | w32_regex_traits_implementation<charT>::mask_word,
|
||||
0x0104u | w32_regex_traits_implementation<charT>::mask_word,
|
||||
0x0080u, // C1_XDIGIT
|
||||
@ -628,6 +632,12 @@ public:
|
||||
return true;
|
||||
else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_word) && (c == '_'))
|
||||
return true;
|
||||
else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_vertical)
|
||||
&& (::boost::re_detail::is_separator(c) || (c == '\v')))
|
||||
return true;
|
||||
else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_horizontal)
|
||||
&& this->isctype(c, 0x0008u) && !this->isctype(c, re_detail::w32_regex_traits_implementation<charT>::mask_vertical))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
int toi(const charT*& p1, const charT* p2, int radix)const
|
||||
|
Reference in New Issue
Block a user