diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index 5fdcea55..62bfbed1 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -64,6 +64,7 @@ protected: re_syntax_base* m_last_state;// the last state we added bool m_icase; // true for case insensitive matches typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character private: basic_regex_creator& operator=(const basic_regex_creator&); basic_regex_creator(const basic_regex_creator&); @@ -80,8 +81,11 @@ basic_regex_creator::basic_regex_creator(regex_datam_data.clear(); static const charT w = 'w'; + static const charT s = 's'; m_word_mask = m_traits.lookup_classname(&w, &w +1); - //BOOST_ASSERT(m_word_mask); // TODO!! + m_mask_space = m_traits.lookup_classname(&s, &s +1); + BOOST_ASSERT(m_word_mask); + BOOST_ASSERT(m_mask_space); } template @@ -308,7 +312,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) { if(!m_traits.is_class(static_cast(i), m_word_mask)) - map[i] &= ~mask; + map[i] &= static_cast(~mask); } } return; @@ -323,7 +327,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) { if(m_traits.is_class(static_cast(i), m_word_mask)) - map[i] &= ~mask; + map[i] &= static_cast(~mask); } } return; diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 303cd0a3..65da6e08 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -43,6 +43,7 @@ public: bool parse_extended_escape(); bool parse_match_any(); bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits::max)()); + bool parse_repeat_range(bool isbasic); private: typedef bool (basic_regex_parser::*parser_proc_type)(); @@ -157,6 +158,7 @@ bool basic_regex_parser::parse_basic() template bool basic_regex_parser::parse_extended() { + bool result; switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_open_mark: @@ -190,10 +192,20 @@ bool basic_regex_parser::parse_extended() fail(REG_BADRPT, 0); ++m_position; return parse_repeat(1); + case regex_constants::syntax_open_brace: + ++m_position; + return parse_repeat_range(false); + case regex_constants::syntax_close_brace: + fail(REG_EBRACE, this->m_position - this->m_end); + // we don't ever get here, because we will have thrown: + BOOST_ASSERT(0); + result = false; + break; default: - return parse_literal(); + result = parse_literal(); + break; } - return true; + return result; } #ifdef BOOST_MSVC #pragma warning(pop) @@ -244,16 +256,41 @@ template bool basic_regex_parser::parse_basic_escape() { ++m_position; + bool result; switch(this->m_traits.escape_syntax_type(*m_position)) { case regex_constants::syntax_open_mark: return parse_open_paren(); case regex_constants::syntax_close_mark: return false; + case regex_constants::syntax_plus: + if(this->m_pdata->m_flags & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(1); + } + else + return parse_literal(); + case regex_constants::syntax_question: + if(this->m_pdata->m_flags & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(0, 1); + } + else + return parse_literal(); + case regex_constants::syntax_open_brace: + ++m_position; + return parse_repeat_range(true); + case regex_constants::syntax_close_brace: + fail(REG_EBRACE, this->m_position - this->m_base); + result = false; + break; default: - return parse_literal(); + result = parse_literal(); + break; } - return true; + return result; } template @@ -305,6 +342,10 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ ++m_position; } } + if(0 == this->m_last_state) + { + fail(REG_BADRPT, std::distance(m_base, m_position)); + } if(this->m_last_state->type == syntax_element_endmark) { // insert a repeat before the '(' matching the last ')': @@ -367,6 +408,80 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ return true; } +template +bool basic_regex_parser::parse_repeat_range(bool isbasic) +{ + // + // parse a repeat-range: + // + std::size_t min, max; + int v; + // skip whitespace: + while((m_position != m_end) && this->m_traits.is_class(*m_position, this->m_mask_space)) + ++m_position; + // fail if at end: + if(this->m_position == this->m_end) + fail(REG_EBRACE, this->m_position - this->m_base); + // get min: + v = this->m_traits.toi(m_position, m_end, 10); + // skip whitespace: + while((m_position != m_end) && this->m_traits.is_class(*m_position, this->m_mask_space)) + ++m_position; + if(v < 0) + fail(REG_BADBR, this->m_position - this->m_base); + else if(this->m_position == this->m_end) + fail(REG_EBRACE, this->m_position - this->m_base); + min = v; + // see if we have a comma: + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) + { + // move on and error check: + ++m_position; + // skip whitespace: + while((m_position != m_end) && this->m_traits.is_class(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + fail(REG_EBRACE, this->m_position - this->m_base); + // get the value if any: + v = this->m_traits.toi(m_position, m_end, 10); + max = (v >= 0) ? v : (std::numeric_limits::max)(); + } + else + { + // no comma, max = min: + max = min; + } + // skip whitespace: + while((m_position != m_end) && this->m_traits.is_class(*m_position, this->m_mask_space)) + ++m_position; + // OK now check trailing }: + if(this->m_position == this->m_end) + fail(REG_EBRACE, this->m_position - this->m_base); + if(isbasic) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape) + { + ++m_position; + if(this->m_position == this->m_end) + fail(REG_EBRACE, this->m_position - this->m_base); + } + else + { + fail(REG_BADBR, this->m_position - this->m_base); + } + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace) + ++m_position; + else + fail(REG_BADBR, this->m_position - this->m_base); + // + // finally go and add the repeat, unless error: + // + if(min > max) + fail(REG_ERANGE, this->m_position - this->m_base); + return parse_repeat(min, max); +} + } // namespace re_detail } // namespace boost diff --git a/include/boost/regex/v4/cpp_regex_traits.hpp b/include/boost/regex/v4/cpp_regex_traits.hpp index 1cf4381c..db68b95a 100644 --- a/include/boost/regex/v4/cpp_regex_traits.hpp +++ b/include/boost/regex/v4/cpp_regex_traits.hpp @@ -36,6 +36,100 @@ class cpp_regex_traits; namespace re_detail{ +// +// class parser_buf: +// acts as a stream buffer which wraps around a pair of pointers: +// +template > +class parser_buf : public ::std::basic_streambuf +{ + typedef ::std::basic_streambuf base_type; + typedef typename base_type::int_type int_type; + typedef typename base_type::char_type char_type; + typedef typename base_type::pos_type pos_type; + typedef ::std::streamsize streamsize; + typedef typename base_type::off_type off_type; +public: + parser_buf() : base_type() { setbuf(0, 0); } + const charT* getnext() { return this->gptr(); } +protected: + std::basic_streambuf* setbuf(char_type* s, streamsize n); + typename parser_buf::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which); + typename parser_buf::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which); +private: + parser_buf& operator=(const parser_buf&); + parser_buf(const parser_buf&); +}; + +template +std::basic_streambuf* +parser_buf::setbuf(char_type* s, streamsize n) +{ + this->setg(s, s, s + n); + return this; +} + +template +typename parser_buf::pos_type +parser_buf::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + std::ptrdiff_t size = this->egptr() - this->eback(); + std::ptrdiff_t pos = this->gptr() - this->eback(); + charT* g = this->eback(); + switch(way) + { + case ::std::ios_base::beg: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + off, g + size); + break; + case ::std::ios_base::end: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + size - off, g + size); + break; + case ::std::ios_base::cur: + { + std::ptrdiff_t newpos = pos + off; + if((newpos < 0) || (newpos > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + newpos, g + size); + break; + } + default: ; + } +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4244) +#endif + return static_cast(this->gptr() - this->eback()); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +typename parser_buf::pos_type +parser_buf::seekpos(pos_type sp, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + off_type size = static_cast(this->egptr() - this->eback()); + charT* g = this->eback(); + if(off_type(sp) <= size) + { + this->setg(g, g + off_type(sp), g + size); + } + return pos_type(off_type(-1)); +} + + // // class cpp_regex_traits_base: // acts as a container for locale and the facets we are using. @@ -210,8 +304,9 @@ template class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer { public: - cpp_regex_traits_implementation(); - cpp_regex_traits_implementation(const std::locale& l) : cpp_regex_traits_char_layer(l){} + typedef std::basic_string string_type; + //cpp_regex_traits_implementation(); + cpp_regex_traits_implementation(const std::locale& l); std::string error_string(regex_constants::error_type n) const { if(!m_error_strings.empty()) @@ -221,12 +316,15 @@ public: } return get_default_error_string(n); } + re_detail::parser_buf m_sbuf; // buffer for parsing numbers. + std::basic_istream m_is; // stream for parsing numbers. private: - std::map m_error_strings; + std::map m_error_strings; // error messages indexed by numberic ID }; template -cpp_regex_traits_implementation::cpp_regex_traits_implementation() +cpp_regex_traits_implementation::cpp_regex_traits_implementation(const std::locale& l) +: cpp_regex_traits_char_layer(l), m_is(&m_sbuf) { #ifndef __IBMCPP__ typename std::messages::catalog cat = static_cast::catalog>(-1); @@ -253,14 +351,26 @@ cpp_regex_traits_implementation::cpp_regex_traits_implementation() { for(int i = 0; i <= boost::regex_constants::error_unknown; ++i) { - std::string s = this->m_pmessages->get(cat, 0, i+200, get_default_error_string(i)); - m_error_strings[i] = s; + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, this->m_pctype->widen(*p)); + ++p; + } + string_type s = this->m_pmessages->get(cat, 0, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, this->m_pctype->narrow(s[j], 0)); + } + m_error_strings[i] = result; } } } template -boost::shared_ptr > create_cpp_regex_traits(const std::locale& l) +boost::shared_ptr > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) { // TODO: create a cache for previously constructed objects. return boost::shared_ptr >(new cpp_regex_traits_implementation(l)); @@ -278,7 +388,26 @@ public: typedef std::size_t size_type; typedef std::basic_string string_type; typedef std::locale locale_type; - typedef typename ctype_type::mask char_class_type; + typedef boost::uint_least32_t char_class_type; + + BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 16); + BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 17); + BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 18); + BOOST_STATIC_CONSTANT(char_class_type, + mask_base = + std::ctype::alnum + | std::ctype::alpha + | std::ctype::cntrl + | std::ctype::digit + | std::ctype::graph + | std::ctype::lower + | std::ctype::print + | std::ctype::punct + | std::ctype::space + | std::ctype::upper + | std::ctype::xdigit); + + //BOOST_STATIC_ASSERT(0 == (mask_base & (mask_word | mask_unicode))); cpp_regex_traits() : m_pimpl(re_detail::create_cpp_regex_traits(std::locale())) @@ -309,7 +438,33 @@ public: } char_class_type lookup_classname(const charT* p1, const charT* p2) const { - return 0; + static const char_class_type masks[] = + { + 0, + std::ctype::alnum, + std::ctype::alpha, + cpp_regex_traits::mask_blank, + std::ctype::cntrl, + std::ctype::digit, + std::ctype::digit, + std::ctype::graph, + std::ctype::lower, + std::ctype::lower, + std::ctype::print, + std::ctype::punct, + std::ctype::space, + std::ctype::space, + cpp_regex_traits::mask_unicode, + std::ctype::upper, + std::ctype::upper, + std::ctype::alnum | cpp_regex_traits::mask_word, + std::ctype::alnum | cpp_regex_traits::mask_word, + std::ctype::xdigit, + }; + int id = re_detail::get_default_class_id(p1, p2); + assert(id >= -1); + assert(id < sizeof(masks) / sizeof(masks[0])); + return masks[1 + id]; } string_type lookup_collatename(const charT* p1, const charT* p2) const { @@ -317,12 +472,21 @@ public: } bool is_class(charT c, char_class_type f) const { + if((f & cpp_regex_traits::mask_base) + && (m_pimpl->m_pctype->is( + static_cast::mask>(f & cpp_regex_traits::mask_base), c))) + return true; + else if((f & cpp_regex_traits::mask_unicode) && (c >= 256)) + return true; + else if((f & cpp_regex_traits::mask_word) && (c == '_')) + return true; + else if((f & cpp_regex_traits::mask_blank) + && m_pimpl->m_pctype->is(static_cast::mask>(f & cpp_regex_traits::mask_base), c) + && !re_detail::is_separator(c)) + return true; return false; } - int value(charT) const - { - return -1; - } + int toi(const charT*& p1, const charT* p2, int radix)const; locale_type imbue(locale_type l) { std::locale result(getloc()); @@ -356,6 +520,25 @@ private: #endif }; + +template +int cpp_regex_traits::toi(const charT*& first, const charT* last, int radix)const +{ + m_pimpl->m_sbuf.pubsetbuf(const_cast(first), static_cast(last-first)); + m_pimpl->m_is.clear(); + if(std::abs(radix) == 16) m_pimpl->m_is >> std::hex; + else if(std::abs(radix) == 8) m_pimpl->m_is >> std::oct; + else m_pimpl->m_is >> std::dec; + int val; + if(m_pimpl->m_is >> val) + { + first = first + ((last - first) - m_pimpl->m_sbuf.in_avail()); + return val; + } + else + return -1; +} + template std::string cpp_regex_traits::catalog_name(const std::string& name) { diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 86102b7a..0618aecf 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -87,10 +87,10 @@ inline const charT* re_skip_past_null(const charT* p) return ++p; } -template +template iterator BOOST_REGEX_CALL re_is_set_member(iterator next, iterator last, - const re_set_long* set_, + const re_set_long* set_, const basic_regex& e) { const charT* p = reinterpret_cast(set_+1); @@ -233,7 +233,7 @@ public: } std::size_t get_count() { return count; } int get_id() { return id; } - int operator++() { return ++count; } + std::size_t operator++() { return ++count; } bool check_null_repeat(const BidiIterator& pos, std::size_t max) { // this is called when we are about to start a new repeat, diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index f4af5044..d0b23ace 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -586,10 +586,11 @@ bool perl_matcher::match_backref() template bool perl_matcher::match_long_set() { + typedef typename traits::char_class_type char_class_type; // let the traits class do the work: if(position == last) return false; - BidiIterator t = re_is_set_member(position, last, static_cast*>(pstate), re); + BidiIterator t = re_is_set_member(position, last, static_cast*>(pstate), re); if(t != position) { pstate = pstate->next.p; diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index fc6bc65f..c96a827f 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -624,8 +624,9 @@ bool perl_matcher::match_long_set_repeat() #ifdef __BORLANDC__ #pragma option push -w-8008 -w-8066 -w-8004 #endif + typedef typename traits::char_class_type char_class_type; const re_repeat* rep = static_cast(pstate); - const re_set_long* set = static_cast*>(pstate->next.p); + const re_set_long* set = static_cast*>(pstate->next.p); unsigned count = 0; // // start by working out how much we can skip: diff --git a/include/boost/regex/v4/regbase.hpp b/include/boost/regex/v4/regbase.hpp index 96e738a4..62435c88 100644 --- a/include/boost/regex/v4/regbase.hpp +++ b/include/boost/regex/v4/regbase.hpp @@ -119,6 +119,7 @@ namespace regex_constants{ collate = ::boost::regbase::collate, nosubs = ::boost::regbase::nosubs, optimize = ::boost::regbase::optimize, + bk_plus_qm = ::boost::regbase::bk_plus_qm, basic = ::boost::regbase::basic, extended = ::boost::regbase::extended, diff --git a/include/boost/regex/v4/regex_format.hpp b/include/boost/regex/v4/regex_format.hpp index 57e77842..8db2fd26 100644 --- a/include/boost/regex/v4/regex_format.hpp +++ b/include/boost/regex/v4/regex_format.hpp @@ -179,7 +179,7 @@ OutputIterator BOOST_REGEX_CALL _reg_format_aux(OutputIterator out, case regex_constants::syntax_digit: { expand_sub: - unsigned int index = parse_value(fmt, fmt_end, traits_inst, 10); + unsigned int index = traits_inst.toi(fmt, fmt_end, 10); if(index < m.size()) oi_assign(&out, re_copy_out(out, Iterator(m[index].first), Iterator(m[index].second))); continue; @@ -259,7 +259,7 @@ expand_sub: ++fmt; continue; } - int val = parse_value(fmt, fmt_end, traits_inst, 16); + int val = traits_inst.toi(fmt, fmt_end, 16); if(val < 0) { fmt -= 2; @@ -284,7 +284,7 @@ expand_sub: } else { - int val = parse_value(fmt, fmt_end, traits_inst, 16); + int val = traits_inst.toi(fmt, fmt_end, 16); if(val < 0) { --fmt; @@ -324,7 +324,7 @@ expand_sub: if(flags & format_sed) goto expand_sub; else - c = static_cast(parse_value(fmt, fmt_end, traits_inst, 8)); + c = static_cast(traits_inst.toi(fmt, fmt_end, 8)); break; default: //c = *fmt; @@ -391,7 +391,7 @@ expand_sub: ++fmt; return out; } - unsigned int id = parse_value(fmt, fmt_end, traits_inst, 10); + unsigned int id = traits_inst.toi(fmt, fmt_end, 10); if(m[id].matched) { oi_assign(&out, _reg_format_aux(out, m, fmt, flags | regex_constants::format_is_if, traits_inst)); diff --git a/include/boost/regex/v4/regex_match.hpp b/include/boost/regex/v4/regex_match.hpp index 288ea256..9e4822b5 100644 --- a/include/boost/regex/v4/regex_match.hpp +++ b/include/boost/regex/v4/regex_match.hpp @@ -139,7 +139,7 @@ inline bool regex_match(const std::string& s, } #if !defined(BOOST_NO_WREGEX) inline bool regex_match(const std::basic_string& s, - match_results::const_iterator, wregex::allocator_type>& m, + match_results::const_iterator>& m, const wregex& e, match_flag_type flags = match_default) { diff --git a/include/boost/regex/v4/regex_search.hpp b/include/boost/regex/v4/regex_search.hpp index 55e076ad..b5ebdc1b 100644 --- a/include/boost/regex/v4/regex_search.hpp +++ b/include/boost/regex/v4/regex_search.hpp @@ -131,6 +131,7 @@ inline bool regex_search(const std::basic_string& s, return regex_search(s.begin(), s.end(), e, flags); } #else // non-template function overloads +#if 0 inline bool regex_search(const char* first, const char* last, const regex& e, match_flag_type flags = match_default) @@ -142,12 +143,13 @@ inline bool regex_search(const char* first, const char* last, #ifndef BOOST_NO_WREGEX inline bool regex_search(const wchar_t* first, const wchar_t* last, const wregex& e, - match_flag_type flags = match_default) + match_flag_type flags/* = match_default*/) { wcmatch m; return regex_search(first, last, m, e, flags); } #endif +#endif inline bool regex_search(const char* str, const regex& e, match_flag_type flags = match_default) diff --git a/include/boost/regex/v4/regex_traits_defaults.hpp b/include/boost/regex/v4/regex_traits_defaults.hpp index 6934c368..0841cdf6 100644 --- a/include/boost/regex/v4/regex_traits_defaults.hpp +++ b/include/boost/regex/v4/regex_traits_defaults.hpp @@ -47,7 +47,13 @@ inline bool is_combining(unsigned char) { return false; } -#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +#ifdef _MSC_VER +template<> +inline bool is_combining(wchar_t c) +{ + return is_combining_implementation(static_cast(c)); +} +#elif defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) template<> inline bool is_combining(wchar_t c) @@ -58,7 +64,7 @@ inline bool is_combining(wchar_t c) template<> inline bool is_combining(wchar_t c) { - return (c > std::numeric_limits::max()) ? false : is_combining_implementation(static_cast(c)); + return (c > (std::numeric_limits::max)()) ? false : is_combining_implementation(static_cast(c)); } #endif #endif @@ -72,13 +78,85 @@ inline bool is_separator(charT c) return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r')); } +// +// get the id of a character clasification, the individual +// traits classes then transform that id into a bitmask: +// +template +struct character_pointer_range +{ + const charT* p1; + const charT* p2; + + bool operator < (const character_pointer_range& r)const + { + return std::lexicographical_compare(p1, p2, r.p1, r.p2); + } + bool operator == (const character_pointer_range& r)const + { + return (std::distance(p1, p2) == std::distance(r.p1, r.p2)) && std::equal(p1, p2, r.p1); + } +}; +template +int get_default_class_id(const charT* p1, const charT* p2) +{ + static const charT data[] = { + 'a', 'l', 'n', 'u', 'm', + 'a', 'l', 'p', 'h', 'a', + 'b', 'l', 'a', 'n', 'k', + 'c', 'n', 't', 'r', 'l', + 'd', 'i', 'g', 'i', 't', + 'g', 'r', 'a', 'p', 'h', + 'l', 'o', 'w', 'e', 'r', + 'p', 'r', 'i', 'n', 't', + 'p', 'u', 'n', 'c', 't', + 's', 'p', 'a', 'c', 'e', + 'u', 'n', 'i', 'c', 'o', 'd', 'e', + 'u', 'p', 'p', 'e', 'r', + 'w', 'o', 'r', 'd', + 'x', 'd', 'i', 'g', 'i', 't', + }; + + static const character_pointer_range ranges[] = + { + {data+0, data+5,}, // alnum + {data+5, data+10,}, // alpha + {data+10, data+15,}, // blank + {data+15, data+20,}, // cntrl + {data+20, data+21,}, // d + {data+20, data+25,}, // digit + {data+25, data+30,}, // graph + {data+30, data+31,}, // l + {data+30, data+35,}, // lower + {data+35, data+40,}, // print + {data+40, data+45,}, // punct + {data+45, data+46,}, // s + {data+45, data+50,}, // space + {data+50, data+57,}, // unicode + {data+57, data+58,}, // u + {data+57, data+62,}, // upper + {data+62, data+63,}, // w + {data+62, data+66,}, // word + {data+66, data+72,}, // xdigit + }; + + character_pointer_range t = { p1, p2, }; + const character_pointer_range* p = std::lower_bound(ranges, ranges + (sizeof(ranges)/sizeof(ranges[0])), t); + if(t == *p) + return static_cast(p - ranges); + return -1; +} + +#if 0 // // parse_value: // covert a character sequence into a value, return -1 if no digits found: // -template -int parse_value(ForwardIterator& p1, ForwardIterator p2, const traits& traits_inst, int radix = 10) +template +int parse_value(const charT*& p1, const charT* p2, const traits& traits_inst, int radix = 10) { + return traits_inst.toi(p1, p2, radix); +#if 0 int value = 0; if(traits_inst.value(*p1) < 0) return -1; @@ -91,8 +169,9 @@ int parse_value(ForwardIterator& p1, ForwardIterator p2, const traits& traits_in ++p1; } return value; +#endif } - +#endif } // re_detail } // boost diff --git a/include/boost/regex/v4/states.hpp b/include/boost/regex/v4/states.hpp index 4bfd4527..31c86cf4 100644 --- a/include/boost/regex/v4/states.hpp +++ b/include/boost/regex/v4/states.hpp @@ -221,10 +221,10 @@ enum re_jump_size_type /*** proc re_is_set_member ********************************************* Forward declaration: we'll need this one later... ***********************************************************************/ -template +template iterator BOOST_REGEX_CALL re_is_set_member(iterator next, iterator last, - const re_set_long* set_, + const re_set_long* set_, const basic_regex& e); } // namespace re_detail diff --git a/src/posix_api.cpp b/src/posix_api.cpp index 244ed4d5..cd8bd330 100644 --- a/src/posix_api.cpp +++ b/src/posix_api.cpp @@ -162,6 +162,10 @@ BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorA(int code, const regex_tA* BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA* expression, const char* buf, regsize_t n, regmatch_t* array, int eflags) { +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4267) +#endif bool result = false; match_flag_type flags = match_default | expression->eflags; const char* end; @@ -217,6 +221,9 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA* expression, cons return 0; } return REG_NOMATCH; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif } BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeA(regex_tA* expression) diff --git a/src/regex_traits_defaults.cpp b/src/regex_traits_defaults.cpp index d5fa3953..cd9e63d0 100644 --- a/src/regex_traits_defaults.cpp +++ b/src/regex_traits_defaults.cpp @@ -95,7 +95,7 @@ const char* get_default_error_string(regex_constants::error_type n) "Invalid back reference", /* REG_ESUBREG */ "Unmatched [ or [^", /* REG_EBRACK */ "Unmatched ( or \\(", /* REG_EPAREN */ - "Unmatched \\{", /* REG_EBRACE */ + "Unmatched { or \\{", /* REG_EBRACE */ "Invalid content of \\{\\}", /* REG_BADBR */ "Invalid range end", /* REG_ERANGE */ "Memory exhausted", /* REG_ESPACE */ @@ -112,7 +112,7 @@ const char* get_default_error_string(regex_constants::error_type n) "", }; - return (n <= REG_E_UNKNOWN) ? s_default_error_messages[REG_E_UNKNOWN] : s_default_error_messages[n]; + return (n > REG_E_UNKNOWN) ? s_default_error_messages[REG_E_UNKNOWN] : s_default_error_messages[n]; } bool is_combining_implementation(boost::uint_least16_t c) diff --git a/src/static_mutex.cpp b/src/static_mutex.cpp index 49072dac..d1d683dd 100644 --- a/src/static_mutex.cpp +++ b/src/static_mutex.cpp @@ -23,7 +23,10 @@ #include #if defined(BOOST_HAS_WINTHREADS) +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN #include +#include #endif @@ -64,6 +67,8 @@ void scoped_static_mutex_lock::unlock() #elif defined(BOOST_HAS_WINTHREADS) +BOOST_STATIC_ASSERT(sizeof(LONG) == sizeof(boost::int32_t)); + scoped_static_mutex_lock::scoped_static_mutex_lock(static_mutex& m, bool lk) : m_mutex(m), m_have_lock(false) { @@ -81,7 +86,11 @@ void scoped_static_mutex_lock::lock() { if(0 == m_have_lock) { - while(0 != InterlockedCompareExchange((LONG*)&(m_mutex.m_mutex), 1, 0)) +#if defined(BOOST_MSVC) && (BOOST_MSVC <=1200) + while(0 != InterlockedCompareExchange(reinterpret_cast((boost::uint_least16_t*)&(m_mutex.m_mutex)), (void*)1, 0)) +#else + while(0 != InterlockedCompareExchange(reinterpret_cast(&(m_mutex.m_mutex)), 1, 0)) +#endif { Sleep(0); } @@ -93,7 +102,11 @@ void scoped_static_mutex_lock::unlock() { if(m_have_lock) { +#if defined(BOOST_MSVC) && (BOOST_MSVC <=1200) InterlockedExchange((LONG*)&(m_mutex.m_mutex), 0); +#else + InterlockedExchange(reinterpret_cast(&(m_mutex.m_mutex)), 0); +#endif m_have_lock = false; } } diff --git a/src/wide_posix_api.cpp b/src/wide_posix_api.cpp index a6d15147..700ba217 100644 --- a/src/wide_posix_api.cpp +++ b/src/wide_posix_api.cpp @@ -170,6 +170,10 @@ BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorW(int code, const regex_tW* BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecW(const regex_tW* expression, const wchar_t* buf, regsize_t n, regmatch_t* array, int eflags) { +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4267) +#endif bool result = false; match_flag_type flags = match_default | expression->eflags; const wchar_t* end; @@ -224,6 +228,9 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecW(const regex_tW* expression, cons return 0; } return REG_NOMATCH; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif } BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW* expression) diff --git a/test/regress/basic_tests.cpp b/test/regress/basic_tests.cpp index e4b06f6a..b4eab37b 100644 --- a/test/regress/basic_tests.cpp +++ b/test/regress/basic_tests.cpp @@ -80,93 +80,108 @@ void basic_tests() TEST_REGEX_SEARCH("\\*", perl, "*", match_default, make_array(0, 1, -2, -2)); TEST_REGEX_SEARCH("(ab)*", perl, "abab", match_default, make_array(0, 4, 2, 4, -2, -2)); + // now try operator + : + TEST_REGEX_SEARCH("ab+", perl, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("ab+", perl, "ab", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("ab+", perl, "sssabbbbbbsss", match_default, make_array(3, 10, -2, -2)); + TEST_REGEX_SEARCH("ab+c+", perl, "abbb", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("ab+c+", perl, "accc", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("ab+c+", perl, "abbcc", match_default, make_array(0, 5, -2, -2)); + TEST_INVALID_REGEX("+a", perl); + TEST_INVALID_REGEX("\\<+", perl); + TEST_INVALID_REGEX("\\>+", perl); + TEST_REGEX_SEARCH("\n+", perl, "\n\n", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("\\+", perl, "+", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\+", perl, "++", match_default, make_array(0, 1, -2, 1, 2, -2, -2)); + TEST_REGEX_SEARCH("\\++", perl, "++", match_default, make_array(0, 2, -2, -2)); + + TEST_REGEX_SEARCH("+", basic|bk_plus_qm, "+", match_default, make_array(0, 1, -2, -2)); + TEST_INVALID_REGEX("\\+", basic|bk_plus_qm); + TEST_REGEX_SEARCH("a\\+", basic|bk_plus_qm, "aa", match_default, make_array(0, 2, -2, -2)); + + // now try operator ? + TEST_REGEX_SEARCH("a?", perl, "b", match_default, make_array(0, 0, -2, -2)); + TEST_REGEX_SEARCH("ab?", perl, "a", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("ab?", perl, "ab", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("ab?", perl, "sssabbbbbbsss", match_default, make_array(3, 5, -2, -2)); + TEST_REGEX_SEARCH("ab?c?", perl, "a", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("ab?c?", perl, "abbb", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("ab?c?", perl, "accc", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("ab?c?", perl, "abcc", match_default, make_array(0, 3, -2, -2)); + TEST_INVALID_REGEX("?a", perl); + TEST_INVALID_REGEX("\\?", perl); + TEST_REGEX_SEARCH("\n?", perl, "\n\n", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\?", perl, "?", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\?", perl, "?", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("\\??", perl, "??", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("?", basic|bk_plus_qm, "?", match_default, make_array(0, 1, -2, -2)); + TEST_INVALID_REGEX("\\?", basic|bk_plus_qm); + TEST_REGEX_SEARCH("a\\?", basic|bk_plus_qm, "aa", match_default, make_array(0, 1, -2, -2)); + TEST_REGEX_SEARCH("a\\?", basic|bk_plus_qm, "b", match_default, make_array(0, 0, -2, -2)); + + TEST_REGEX_SEARCH("a?", basic, "a?", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a+", basic, "a+", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\?", basic, "a?", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\+", basic, "a+", match_default, make_array(0, 2, -2, -2)); + + // now try operator {} + TEST_REGEX_SEARCH("a{2}", perl, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a{2}", perl, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a{2}", perl, "aaa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a{2,}", perl, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a{2,}", perl, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a{2,}", perl, "aaaaa", match_default, make_array(0, 5, -2, -2)); + TEST_REGEX_SEARCH("a{2,4}", perl, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a{2,4}", perl, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a{2,4}", perl, "aaa", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("a{2,4}", perl, "aaaa", match_default, make_array(0, 4, -2, -2)); + TEST_REGEX_SEARCH("a{2,4}", perl, "aaaaa", match_default, make_array(0, 4, -2, -2)); + TEST_REGEX_SEARCH("a{ 2 , 4 }", perl, "aaaaa", match_default, make_array(0, 4, -2, -2)); + TEST_REGEX_SEARCH("a{ 2 , }", perl, "aaaaa", match_default, make_array(0, 5, -2, -2)); + TEST_REGEX_SEARCH("a{ 2 }", perl, "aaa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\{\\}", perl, "a{}", match_default, make_array(0, 3, -2, -2)); + TEST_INVALID_REGEX("a{}", perl); + TEST_INVALID_REGEX("a{", perl); + TEST_INVALID_REGEX("a{1", perl); + TEST_INVALID_REGEX("a{1,", perl); + TEST_INVALID_REGEX("a{ }", perl); + TEST_INVALID_REGEX("a}", perl); + TEST_INVALID_REGEX("{1}", perl); + TEST_INVALID_REGEX("a{b}", perl); + TEST_INVALID_REGEX("a{1b}", perl); + TEST_INVALID_REGEX("a{1,b}", perl); + TEST_INVALID_REGEX("a{1,2v}", perl); + + // now try operator \\{\\} for POSIX basic regexes + TEST_REGEX_SEARCH("a\\{2\\}", basic, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a\\{2\\}", basic, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\{2\\}", basic, "aaa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\{2,\\}", basic, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a\\{2,\\}", basic, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\{2,\\}", basic, "aaaaa", match_default, make_array(0, 5, -2, -2)); + TEST_REGEX_SEARCH("a\\{2,4\\}", basic, "a", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a\\{2,4\\}", basic, "aa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a\\{2,4\\}", basic, "aaa", match_default, make_array(0, 3, -2, -2)); + TEST_REGEX_SEARCH("a\\{2,4\\}", basic, "aaaa", match_default, make_array(0, 4, -2, -2)); + TEST_REGEX_SEARCH("a\\{2,4\\}", basic, "aaaaa", match_default, make_array(0, 4, -2, -2)); + TEST_REGEX_SEARCH("a\\{ 2 , 4 \\}", basic, "aaaaa", match_default, make_array(0, 4, -2, -2)); + TEST_REGEX_SEARCH("a\\{ 2 , \\}", basic, "aaaaa", match_default, make_array(0, 5, -2, -2)); + TEST_REGEX_SEARCH("a\\{ 2 \\}", basic, "aaa", match_default, make_array(0, 2, -2, -2)); + TEST_REGEX_SEARCH("a{}", basic, "a{}", match_default, make_array(0, 3, -2, -2)); + TEST_INVALID_REGEX("a\\{\\}", basic); + TEST_INVALID_REGEX("a\\{", basic); + TEST_INVALID_REGEX("a\\{1", basic); + TEST_INVALID_REGEX("a\\{1,", basic); + TEST_INVALID_REGEX("a\\{ \\}", basic); + TEST_INVALID_REGEX("a\\}", basic); + TEST_INVALID_REGEX("\\{1\\}", basic); + TEST_INVALID_REGEX("a\\{b\\}", basic); + TEST_INVALID_REGEX("a\\{1b\\}", basic); + TEST_INVALID_REGEX("a\\{1,b\\}", basic); + TEST_INVALID_REGEX("a\\{1,2v\\}", basic); #if 0 -; now try operator + -ab+ a -1 -1 -ab+ ab 0 2 -ab+ sssabbbbbbsss 3 10 -ab+c+ a -1 -1 -ab+c+ abbb -1 -1 -ab+c+ accc -1 -1 -ab+c+ abbcc 0 5 -+a ! -\<+ ! -\>+ ! -\n+ \n\n 0 2 -\+ + 0 1 -\+ ++ 0 1 -\++ ++ 0 2 -- match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST -+ + 0 1 -\+ ! -a\+ aa 0 2 - -; now try operator ? -- match_default normal REG_EXTENDED -a? b 0 0 -ab? a 0 1 -ab? ab 0 2 -ab? sssabbbbbbsss 3 5 -ab?c? a 0 1 -ab?c? abbb 0 2 -ab?c? accc 0 2 -ab?c? abcc 0 3 -?a ! -\? ! -\n? \n\n 0 1 -\? ? 0 1 -\? ?? 0 1 -\?? ?? 0 1 -- match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST -? ? 0 1 -\? ! -a\? aa 0 1 -a\? b 0 0 - -- match_default normal limited_ops -a? a? 0 2 -a+ a+ 0 2 -a\? a? 0 2 -a\+ a+ 0 2 - -; now try operator {} -- match_default normal REG_EXTENDED -a{2} a -1 -1 -a{2} aa 0 2 -a{2} aaa 0 2 -a{2,} a -1 -1 -a{2,} aa 0 2 -a{2,} aaaaa 0 5 -a{2,4} a -1 -1 -a{2,4} aa 0 2 -a{2,4} aaa 0 3 -a{2,4} aaaa 0 4 -a{2,4} aaaaa 0 4 -; spaces are now allowed inside {} -"a{ 2 , 4 }" aaaaa 0 4 -a{} ! -"a{ }" ! -a{2 ! -a} ! -\{\} {} 0 2 - -- match_default normal bk_braces -a\{2\} a -1 -1 -a\{2\} aa 0 2 -a\{2\} aaa 0 2 -a\{2,\} a -1 -1 -a\{2,\} aa 0 2 -a\{2,\} aaaaa 0 5 -a\{2,4\} a -1 -1 -a\{2,4\} aa 0 2 -a\{2,4\} aaa 0 3 -a\{2,4\} aaaa 0 4 -a\{2,4\} aaaaa 0 4 -"a\{ 2 , 4 \}" aaaaa 0 4 -{} {} 0 2 - ; now test the alternation operator | - match_default normal REG_EXTENDED a|b a 0 1 diff --git a/test/regress/test.hpp b/test/regress/test.hpp index 5f18dad8..5fb2c659 100644 --- a/test/regress/test.hpp +++ b/test/regress/test.hpp @@ -11,7 +11,7 @@ // real test: // template -void test(charT, const tagT& tag) +void test(const charT&, const tagT& tag) { boost::basic_regex e; test(e, tag); diff --git a/test/regress/test_regex_search.hpp b/test/regress/test_regex_search.hpp index 864c265a..7e70f011 100644 --- a/test/regress/test_regex_search.hpp +++ b/test/regress/test_regex_search.hpp @@ -11,6 +11,10 @@ struct test_regex_search_tag{}; template void test_sub_match(const boost::sub_match& sub, BidirectionalIterator base, const int* answer_table, int i) { +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4244) +#endif typedef typename boost::sub_match::value_type charT; if(sub.matched == 0) { @@ -38,6 +42,9 @@ void test_sub_match(const boost::sub_match& sub, Bidirect << ", expected " << answer_table[1 + 2*i] << ".", charT); } } +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif } template @@ -52,10 +59,11 @@ void test_result(const boost::match_results& w template void test_simple_search(boost::basic_regex& r) { + typedef typename std::basic_string::const_iterator const_iterator; const std::basic_string& search_text = test_info::search_text(); boost::regex_constants::match_flag_type opts = test_info::match_options(); const int* answer_table = test_info::answer_table(); - boost::match_results::const_iterator> what; + boost::match_results what; if(boost::regex_search( search_text.begin(), search_text.end(),