diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index 9f9bb1e0..52b5b454 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -127,7 +127,6 @@ SOURCES = regex.cpp regex_debug.cpp regex_raw_buffer.cpp - regex_traits_defaults.cpp static_mutex.cpp wide_posix_api.cpp ; diff --git a/include/boost/regex/v4/regex_traits_defaults.hpp b/include/boost/regex/v4/regex_traits_defaults.hpp index d08047d9..38f1d198 100644 --- a/include/boost/regex/v4/regex_traits_defaults.hpp +++ b/include/boost/regex/v4/regex_traits_defaults.hpp @@ -33,10 +33,10 @@ #include #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP -#include +#include #endif #ifndef BOOST_REGEX_ERROR_TYPE_HPP -#include +#include #endif #include #include @@ -62,14 +62,548 @@ inline bool is_extended(charT c) inline bool is_extended(char) { return false; } +inline const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n) +{ + // if the user hasn't supplied a message catalog, then this supplies + // default "messages" for us to load in the range 1-100. + const char* messages[] = { + "", + "(", + ")", + "$", + "^", + ".", + "*", + "+", + "?", + "[", + "]", + "|", + "\\", + "#", + "-", + "{", + "}", + "0123456789", + "b", + "B", + "<", + ">", + "", + "", + "A`", + "z'", + "\n", + ",", + "a", + "f", + "n", + "r", + "t", + "v", + "x", + "c", + ":", + "=", + "e", + "", + "", + "", + "", + "", + "", + "", + "", + "E", + "Q", + "X", + "C", + "Z", + "G", + "!", + "p", + "P", + "N", + "gk", + "K", + "R", + }; -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); -BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); -BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); + return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]); +} + +inline const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n) +{ + static const char* const s_default_error_messages[] = { + "Success", /* REG_NOERROR 0 error_ok */ + "No match", /* REG_NOMATCH 1 error_no_match */ + "Invalid regular expression.", /* REG_BADPAT 2 error_bad_pattern */ + "Invalid collation character.", /* REG_ECOLLATE 3 error_collate */ + "Invalid character class name, collating name, or character range.", /* REG_ECTYPE 4 error_ctype */ + "Invalid or unterminated escape sequence.", /* REG_EESCAPE 5 error_escape */ + "Invalid back reference: specified capturing group does not exist.", /* REG_ESUBREG 6 error_backref */ + "Unmatched [ or [^ in character class declaration.", /* REG_EBRACK 7 error_brack */ + "Unmatched marking parenthesis ( or \\(.", /* REG_EPAREN 8 error_paren */ + "Unmatched quantified repeat operator { or \\{.", /* REG_EBRACE 9 error_brace */ + "Invalid content of repeat range.", /* REG_BADBR 10 error_badbrace */ + "Invalid range end in character class", /* REG_ERANGE 11 error_range */ + "Out of memory.", /* REG_ESPACE 12 error_space NOT USED */ + "Invalid preceding regular expression prior to repetition operator.", /* REG_BADRPT 13 error_badrepeat */ + "Premature end of regular expression", /* REG_EEND 14 error_end NOT USED */ + "Regular expression is too large.", /* REG_ESIZE 15 error_size NOT USED */ + "Unmatched ) or \\)", /* REG_ERPAREN 16 error_right_paren NOT USED */ + "Empty regular expression.", /* REG_EMPTY 17 error_empty */ + "The complexity of matching the regular expression exceeded predefined bounds. " + "Try refactoring the regular expression to make each choice made by the state machine unambiguous. " + "This exception is thrown to prevent \"eternal\" matches that take an " + "indefinite period time to locate.", /* REG_ECOMPLEXITY 18 error_complexity */ + "Ran out of stack space trying to match the regular expression.", /* REG_ESTACK 19 error_stack */ + "Invalid or unterminated Perl (?...) sequence.", /* REG_E_PERL 20 error_perl */ + "Unknown error.", /* REG_E_UNKNOWN 21 error_unknown */ + }; + + return (n > ::boost::regex_constants::error_unknown) ? s_default_error_messages[::boost::regex_constants::error_unknown] : s_default_error_messages[n]; +} + +inline regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::syntax_type char_syntax[] = { + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_newline, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /* */ // 32 + regex_constants::syntax_not, /*!*/ + regex_constants::syntax_char, /*"*/ + regex_constants::syntax_hash, /*#*/ + regex_constants::syntax_dollar, /*$*/ + regex_constants::syntax_char, /*%*/ + regex_constants::syntax_char, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::syntax_star, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::syntax_comma, /*,*/ + regex_constants::syntax_dash, /*-*/ + regex_constants::syntax_dot, /*.*/ + regex_constants::syntax_char, /*/*/ + regex_constants::syntax_digit, /*0*/ + regex_constants::syntax_digit, /*1*/ + regex_constants::syntax_digit, /*2*/ + regex_constants::syntax_digit, /*3*/ + regex_constants::syntax_digit, /*4*/ + regex_constants::syntax_digit, /*5*/ + regex_constants::syntax_digit, /*6*/ + regex_constants::syntax_digit, /*7*/ + regex_constants::syntax_digit, /*8*/ + regex_constants::syntax_digit, /*9*/ + regex_constants::syntax_colon, /*:*/ + regex_constants::syntax_char, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::syntax_equal, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::syntax_char, /*@*/ + regex_constants::syntax_char, /*A*/ + regex_constants::syntax_char, /*B*/ + regex_constants::syntax_char, /*C*/ + regex_constants::syntax_char, /*D*/ + regex_constants::syntax_char, /*E*/ + regex_constants::syntax_char, /*F*/ + regex_constants::syntax_char, /*G*/ + regex_constants::syntax_char, /*H*/ + regex_constants::syntax_char, /*I*/ + regex_constants::syntax_char, /*J*/ + regex_constants::syntax_char, /*K*/ + regex_constants::syntax_char, /*L*/ + regex_constants::syntax_char, /*M*/ + regex_constants::syntax_char, /*N*/ + regex_constants::syntax_char, /*O*/ + regex_constants::syntax_char, /*P*/ + regex_constants::syntax_char, /*Q*/ + regex_constants::syntax_char, /*R*/ + regex_constants::syntax_char, /*S*/ + regex_constants::syntax_char, /*T*/ + regex_constants::syntax_char, /*U*/ + regex_constants::syntax_char, /*V*/ + regex_constants::syntax_char, /*W*/ + regex_constants::syntax_char, /*X*/ + regex_constants::syntax_char, /*Y*/ + regex_constants::syntax_char, /*Z*/ + regex_constants::syntax_open_set, /*[*/ + regex_constants::syntax_escape, /*\*/ + regex_constants::syntax_close_set, /*]*/ + regex_constants::syntax_caret, /*^*/ + regex_constants::syntax_char, /*_*/ + regex_constants::syntax_char, /*`*/ + regex_constants::syntax_char, /*a*/ + regex_constants::syntax_char, /*b*/ + regex_constants::syntax_char, /*c*/ + regex_constants::syntax_char, /*d*/ + regex_constants::syntax_char, /*e*/ + regex_constants::syntax_char, /*f*/ + regex_constants::syntax_char, /*g*/ + regex_constants::syntax_char, /*h*/ + regex_constants::syntax_char, /*i*/ + regex_constants::syntax_char, /*j*/ + regex_constants::syntax_char, /*k*/ + regex_constants::syntax_char, /*l*/ + regex_constants::syntax_char, /*m*/ + regex_constants::syntax_char, /*n*/ + regex_constants::syntax_char, /*o*/ + regex_constants::syntax_char, /*p*/ + regex_constants::syntax_char, /*q*/ + regex_constants::syntax_char, /*r*/ + regex_constants::syntax_char, /*s*/ + regex_constants::syntax_char, /*t*/ + regex_constants::syntax_char, /*u*/ + regex_constants::syntax_char, /*v*/ + regex_constants::syntax_char, /*w*/ + regex_constants::syntax_char, /*x*/ + regex_constants::syntax_char, /*y*/ + regex_constants::syntax_char, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::syntax_char, /*~*/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + }; + + return char_syntax[(unsigned char)c]; +} + +inline regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::escape_syntax_type char_syntax[] = { + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /* */ // 32 + regex_constants::escape_type_identity, /*!*/ + regex_constants::escape_type_identity, /*"*/ + regex_constants::escape_type_identity, /*#*/ + regex_constants::escape_type_identity, /*$*/ + regex_constants::escape_type_identity, /*%*/ + regex_constants::escape_type_identity, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::escape_type_identity, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::escape_type_identity, /*,*/ + regex_constants::escape_type_identity, /*-*/ + regex_constants::escape_type_identity, /*.*/ + regex_constants::escape_type_identity, /*/*/ + regex_constants::escape_type_decimal, /*0*/ + regex_constants::escape_type_backref, /*1*/ + regex_constants::escape_type_backref, /*2*/ + regex_constants::escape_type_backref, /*3*/ + regex_constants::escape_type_backref, /*4*/ + regex_constants::escape_type_backref, /*5*/ + regex_constants::escape_type_backref, /*6*/ + regex_constants::escape_type_backref, /*7*/ + regex_constants::escape_type_backref, /*8*/ + regex_constants::escape_type_backref, /*9*/ + regex_constants::escape_type_identity, /*:*/ + regex_constants::escape_type_identity, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::escape_type_identity, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::escape_type_identity, /*@*/ + regex_constants::escape_type_start_buffer, /*A*/ + regex_constants::escape_type_not_word_assert, /*B*/ + regex_constants::escape_type_C, /*C*/ + regex_constants::escape_type_not_class, /*D*/ + regex_constants::escape_type_E, /*E*/ + regex_constants::escape_type_not_class, /*F*/ + regex_constants::escape_type_G, /*G*/ + regex_constants::escape_type_not_class, /*H*/ + regex_constants::escape_type_not_class, /*I*/ + regex_constants::escape_type_not_class, /*J*/ + regex_constants::escape_type_reset_start_mark, /*K*/ + regex_constants::escape_type_not_class, /*L*/ + regex_constants::escape_type_not_class, /*M*/ + regex_constants::escape_type_named_char, /*N*/ + regex_constants::escape_type_not_class, /*O*/ + regex_constants::escape_type_not_property, /*P*/ + regex_constants::escape_type_Q, /*Q*/ + regex_constants::escape_type_line_ending, /*R*/ + regex_constants::escape_type_not_class, /*S*/ + regex_constants::escape_type_not_class, /*T*/ + regex_constants::escape_type_not_class, /*U*/ + regex_constants::escape_type_not_class, /*V*/ + regex_constants::escape_type_not_class, /*W*/ + regex_constants::escape_type_X, /*X*/ + regex_constants::escape_type_not_class, /*Y*/ + regex_constants::escape_type_Z, /*Z*/ + regex_constants::escape_type_identity, /*[*/ + regex_constants::escape_type_identity, /*\*/ + regex_constants::escape_type_identity, /*]*/ + regex_constants::escape_type_identity, /*^*/ + regex_constants::escape_type_identity, /*_*/ + regex_constants::escape_type_start_buffer, /*`*/ + regex_constants::escape_type_control_a, /*a*/ + regex_constants::escape_type_word_assert, /*b*/ + regex_constants::escape_type_ascii_control, /*c*/ + regex_constants::escape_type_class, /*d*/ + regex_constants::escape_type_e, /*e*/ + regex_constants::escape_type_control_f, /*f*/ + regex_constants::escape_type_extended_backref, /*g*/ + regex_constants::escape_type_class, /*h*/ + regex_constants::escape_type_class, /*i*/ + regex_constants::escape_type_class, /*j*/ + regex_constants::escape_type_extended_backref, /*k*/ + regex_constants::escape_type_class, /*l*/ + regex_constants::escape_type_class, /*m*/ + regex_constants::escape_type_control_n, /*n*/ + regex_constants::escape_type_class, /*o*/ + regex_constants::escape_type_property, /*p*/ + regex_constants::escape_type_class, /*q*/ + regex_constants::escape_type_control_r, /*r*/ + regex_constants::escape_type_class, /*s*/ + regex_constants::escape_type_control_t, /*t*/ + regex_constants::escape_type_class, /*u*/ + regex_constants::escape_type_control_v, /*v*/ + regex_constants::escape_type_class, /*w*/ + regex_constants::escape_type_hex, /*x*/ + regex_constants::escape_type_class, /*y*/ + regex_constants::escape_type_end_buffer, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::escape_type_identity, /*~*/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + }; + + return char_syntax[(unsigned char)c]; +} // is charT c a combining character? -BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); +inline bool BOOST_REGEX_CALL is_combining_implementation(boost::uint_least16_t c) +{ + const boost::uint_least16_t combining_ranges[] = { 0x0300, 0x0361, + 0x0483, 0x0486, + 0x0903, 0x0903, + 0x093E, 0x0940, + 0x0949, 0x094C, + 0x0982, 0x0983, + 0x09BE, 0x09C0, + 0x09C7, 0x09CC, + 0x09D7, 0x09D7, + 0x0A3E, 0x0A40, + 0x0A83, 0x0A83, + 0x0ABE, 0x0AC0, + 0x0AC9, 0x0ACC, + 0x0B02, 0x0B03, + 0x0B3E, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B4C, + 0x0B57, 0x0B57, + 0x0B83, 0x0B83, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0C01, 0x0C03, + 0x0C41, 0x0C44, + 0x0C82, 0x0C83, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0D02, 0x0D03, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D4C, + 0x0D57, 0x0D57, + 0x0F7F, 0x0F7F, + 0x20D0, 0x20E1, + 0x3099, 0x309A, + 0xFE20, 0xFE23, + 0xffff, 0xffff, }; + + const boost::uint_least16_t* p = combining_ranges + 1; + while (*p < c) p += 2; + --p; + if ((c >= *p) && (c <= *(p + 1))) + return true; + return false; +} template inline bool is_combining(charT c) @@ -138,7 +672,77 @@ inline bool is_separator(char c) // // get a default collating element: // -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); +inline std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name) +{ + // + // these are the POSIX collating names: + // + static const char* def_coll_names[] = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", + "vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", + "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", + "quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", + "left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", + "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", + "question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", + "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", + "right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", + "vertical-line", "right-curly-bracket", "tilde", "DEL", "", + }; + + // these multi-character collating elements + // should keep most Western-European locales + // happy - we should really localise these a + // little more - but this will have to do for + // now: + + static const char* def_multi_coll[] = { + "ae", + "Ae", + "AE", + "ch", + "Ch", + "CH", + "ll", + "Ll", + "LL", + "ss", + "Ss", + "SS", + "nj", + "Nj", + "NJ", + "dz", + "Dz", + "DZ", + "lj", + "Lj", + "LJ", + "", + }; + + unsigned int i = 0; + while (*def_coll_names[i]) + { + if (def_coll_names[i] == name) + { + return std::string(1, char(i)); + } + ++i; + } + i = 0; + while (*def_multi_coll[i]) + { + if (def_multi_coll[i] == name) + { + return def_multi_coll[i]; + } + ++i; + } + return std::string(); +} // // get the state_id of a character classification, the individual @@ -255,15 +859,25 @@ inline charT BOOST_REGEX_CALL global_upper(charT c) return c; } -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); +inline char BOOST_REGEX_CALL do_global_lower(char c) +{ + return static_cast((std::tolower)((unsigned char)c)); +} + +inline char BOOST_REGEX_CALL do_global_upper(char c) +{ + return static_cast((std::toupper)((unsigned char)c)); +} #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); -#endif -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); +inline wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c) +{ + return (std::towlower)(c); +} + +inline wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c) +{ + return (std::towupper)(c); +} #endif // // This sucks: declare template specialisations of global_lower/global_upper @@ -275,15 +889,11 @@ BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short // otherwise the "local template instantiation" compiler option can pick // the wrong instantiation when linking: // -template<> inline char BOOST_REGEX_CALL global_lower(char c){ return do_global_lower(c); } -template<> inline char BOOST_REGEX_CALL global_upper(char c){ return do_global_upper(c); } +template<> inline char BOOST_REGEX_CALL global_lower(char c) { return do_global_lower(c); } +template<> inline char BOOST_REGEX_CALL global_upper(char c) { return do_global_upper(c); } #ifndef BOOST_NO_WREGEX -template<> inline wchar_t BOOST_REGEX_CALL global_lower(wchar_t c){ return do_global_lower(c); } -template<> inline wchar_t BOOST_REGEX_CALL global_upper(wchar_t c){ return do_global_upper(c); } -#endif -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -template<> inline unsigned short BOOST_REGEX_CALL global_lower(unsigned short c){ return do_global_lower(c); } -template<> inline unsigned short BOOST_REGEX_CALL global_upper(unsigned short c){ return do_global_upper(c); } +template<> inline wchar_t BOOST_REGEX_CALL global_lower(wchar_t c) { return do_global_lower(c); } +template<> inline wchar_t BOOST_REGEX_CALL global_upper(wchar_t c) { return do_global_upper(c); } #endif template diff --git a/include/boost/regex/v5/regex_traits_defaults.hpp b/include/boost/regex/v5/regex_traits_defaults.hpp index c924f2de..38f1d198 100644 --- a/include/boost/regex/v5/regex_traits_defaults.hpp +++ b/include/boost/regex/v5/regex_traits_defaults.hpp @@ -62,14 +62,548 @@ inline bool is_extended(charT c) inline bool is_extended(char) { return false; } +inline const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n) +{ + // if the user hasn't supplied a message catalog, then this supplies + // default "messages" for us to load in the range 1-100. + const char* messages[] = { + "", + "(", + ")", + "$", + "^", + ".", + "*", + "+", + "?", + "[", + "]", + "|", + "\\", + "#", + "-", + "{", + "}", + "0123456789", + "b", + "B", + "<", + ">", + "", + "", + "A`", + "z'", + "\n", + ",", + "a", + "f", + "n", + "r", + "t", + "v", + "x", + "c", + ":", + "=", + "e", + "", + "", + "", + "", + "", + "", + "", + "", + "E", + "Q", + "X", + "C", + "Z", + "G", + "!", + "p", + "P", + "N", + "gk", + "K", + "R", + }; -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); -BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); -BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); + return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]); +} + +inline const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n) +{ + static const char* const s_default_error_messages[] = { + "Success", /* REG_NOERROR 0 error_ok */ + "No match", /* REG_NOMATCH 1 error_no_match */ + "Invalid regular expression.", /* REG_BADPAT 2 error_bad_pattern */ + "Invalid collation character.", /* REG_ECOLLATE 3 error_collate */ + "Invalid character class name, collating name, or character range.", /* REG_ECTYPE 4 error_ctype */ + "Invalid or unterminated escape sequence.", /* REG_EESCAPE 5 error_escape */ + "Invalid back reference: specified capturing group does not exist.", /* REG_ESUBREG 6 error_backref */ + "Unmatched [ or [^ in character class declaration.", /* REG_EBRACK 7 error_brack */ + "Unmatched marking parenthesis ( or \\(.", /* REG_EPAREN 8 error_paren */ + "Unmatched quantified repeat operator { or \\{.", /* REG_EBRACE 9 error_brace */ + "Invalid content of repeat range.", /* REG_BADBR 10 error_badbrace */ + "Invalid range end in character class", /* REG_ERANGE 11 error_range */ + "Out of memory.", /* REG_ESPACE 12 error_space NOT USED */ + "Invalid preceding regular expression prior to repetition operator.", /* REG_BADRPT 13 error_badrepeat */ + "Premature end of regular expression", /* REG_EEND 14 error_end NOT USED */ + "Regular expression is too large.", /* REG_ESIZE 15 error_size NOT USED */ + "Unmatched ) or \\)", /* REG_ERPAREN 16 error_right_paren NOT USED */ + "Empty regular expression.", /* REG_EMPTY 17 error_empty */ + "The complexity of matching the regular expression exceeded predefined bounds. " + "Try refactoring the regular expression to make each choice made by the state machine unambiguous. " + "This exception is thrown to prevent \"eternal\" matches that take an " + "indefinite period time to locate.", /* REG_ECOMPLEXITY 18 error_complexity */ + "Ran out of stack space trying to match the regular expression.", /* REG_ESTACK 19 error_stack */ + "Invalid or unterminated Perl (?...) sequence.", /* REG_E_PERL 20 error_perl */ + "Unknown error.", /* REG_E_UNKNOWN 21 error_unknown */ + }; + + return (n > ::boost::regex_constants::error_unknown) ? s_default_error_messages[::boost::regex_constants::error_unknown] : s_default_error_messages[n]; +} + +inline regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::syntax_type char_syntax[] = { + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_newline, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /* */ // 32 + regex_constants::syntax_not, /*!*/ + regex_constants::syntax_char, /*"*/ + regex_constants::syntax_hash, /*#*/ + regex_constants::syntax_dollar, /*$*/ + regex_constants::syntax_char, /*%*/ + regex_constants::syntax_char, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::syntax_star, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::syntax_comma, /*,*/ + regex_constants::syntax_dash, /*-*/ + regex_constants::syntax_dot, /*.*/ + regex_constants::syntax_char, /*/*/ + regex_constants::syntax_digit, /*0*/ + regex_constants::syntax_digit, /*1*/ + regex_constants::syntax_digit, /*2*/ + regex_constants::syntax_digit, /*3*/ + regex_constants::syntax_digit, /*4*/ + regex_constants::syntax_digit, /*5*/ + regex_constants::syntax_digit, /*6*/ + regex_constants::syntax_digit, /*7*/ + regex_constants::syntax_digit, /*8*/ + regex_constants::syntax_digit, /*9*/ + regex_constants::syntax_colon, /*:*/ + regex_constants::syntax_char, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::syntax_equal, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::syntax_char, /*@*/ + regex_constants::syntax_char, /*A*/ + regex_constants::syntax_char, /*B*/ + regex_constants::syntax_char, /*C*/ + regex_constants::syntax_char, /*D*/ + regex_constants::syntax_char, /*E*/ + regex_constants::syntax_char, /*F*/ + regex_constants::syntax_char, /*G*/ + regex_constants::syntax_char, /*H*/ + regex_constants::syntax_char, /*I*/ + regex_constants::syntax_char, /*J*/ + regex_constants::syntax_char, /*K*/ + regex_constants::syntax_char, /*L*/ + regex_constants::syntax_char, /*M*/ + regex_constants::syntax_char, /*N*/ + regex_constants::syntax_char, /*O*/ + regex_constants::syntax_char, /*P*/ + regex_constants::syntax_char, /*Q*/ + regex_constants::syntax_char, /*R*/ + regex_constants::syntax_char, /*S*/ + regex_constants::syntax_char, /*T*/ + regex_constants::syntax_char, /*U*/ + regex_constants::syntax_char, /*V*/ + regex_constants::syntax_char, /*W*/ + regex_constants::syntax_char, /*X*/ + regex_constants::syntax_char, /*Y*/ + regex_constants::syntax_char, /*Z*/ + regex_constants::syntax_open_set, /*[*/ + regex_constants::syntax_escape, /*\*/ + regex_constants::syntax_close_set, /*]*/ + regex_constants::syntax_caret, /*^*/ + regex_constants::syntax_char, /*_*/ + regex_constants::syntax_char, /*`*/ + regex_constants::syntax_char, /*a*/ + regex_constants::syntax_char, /*b*/ + regex_constants::syntax_char, /*c*/ + regex_constants::syntax_char, /*d*/ + regex_constants::syntax_char, /*e*/ + regex_constants::syntax_char, /*f*/ + regex_constants::syntax_char, /*g*/ + regex_constants::syntax_char, /*h*/ + regex_constants::syntax_char, /*i*/ + regex_constants::syntax_char, /*j*/ + regex_constants::syntax_char, /*k*/ + regex_constants::syntax_char, /*l*/ + regex_constants::syntax_char, /*m*/ + regex_constants::syntax_char, /*n*/ + regex_constants::syntax_char, /*o*/ + regex_constants::syntax_char, /*p*/ + regex_constants::syntax_char, /*q*/ + regex_constants::syntax_char, /*r*/ + regex_constants::syntax_char, /*s*/ + regex_constants::syntax_char, /*t*/ + regex_constants::syntax_char, /*u*/ + regex_constants::syntax_char, /*v*/ + regex_constants::syntax_char, /*w*/ + regex_constants::syntax_char, /*x*/ + regex_constants::syntax_char, /*y*/ + regex_constants::syntax_char, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::syntax_char, /*~*/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + }; + + return char_syntax[(unsigned char)c]; +} + +inline regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::escape_syntax_type char_syntax[] = { + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /* */ // 32 + regex_constants::escape_type_identity, /*!*/ + regex_constants::escape_type_identity, /*"*/ + regex_constants::escape_type_identity, /*#*/ + regex_constants::escape_type_identity, /*$*/ + regex_constants::escape_type_identity, /*%*/ + regex_constants::escape_type_identity, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::escape_type_identity, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::escape_type_identity, /*,*/ + regex_constants::escape_type_identity, /*-*/ + regex_constants::escape_type_identity, /*.*/ + regex_constants::escape_type_identity, /*/*/ + regex_constants::escape_type_decimal, /*0*/ + regex_constants::escape_type_backref, /*1*/ + regex_constants::escape_type_backref, /*2*/ + regex_constants::escape_type_backref, /*3*/ + regex_constants::escape_type_backref, /*4*/ + regex_constants::escape_type_backref, /*5*/ + regex_constants::escape_type_backref, /*6*/ + regex_constants::escape_type_backref, /*7*/ + regex_constants::escape_type_backref, /*8*/ + regex_constants::escape_type_backref, /*9*/ + regex_constants::escape_type_identity, /*:*/ + regex_constants::escape_type_identity, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::escape_type_identity, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::escape_type_identity, /*@*/ + regex_constants::escape_type_start_buffer, /*A*/ + regex_constants::escape_type_not_word_assert, /*B*/ + regex_constants::escape_type_C, /*C*/ + regex_constants::escape_type_not_class, /*D*/ + regex_constants::escape_type_E, /*E*/ + regex_constants::escape_type_not_class, /*F*/ + regex_constants::escape_type_G, /*G*/ + regex_constants::escape_type_not_class, /*H*/ + regex_constants::escape_type_not_class, /*I*/ + regex_constants::escape_type_not_class, /*J*/ + regex_constants::escape_type_reset_start_mark, /*K*/ + regex_constants::escape_type_not_class, /*L*/ + regex_constants::escape_type_not_class, /*M*/ + regex_constants::escape_type_named_char, /*N*/ + regex_constants::escape_type_not_class, /*O*/ + regex_constants::escape_type_not_property, /*P*/ + regex_constants::escape_type_Q, /*Q*/ + regex_constants::escape_type_line_ending, /*R*/ + regex_constants::escape_type_not_class, /*S*/ + regex_constants::escape_type_not_class, /*T*/ + regex_constants::escape_type_not_class, /*U*/ + regex_constants::escape_type_not_class, /*V*/ + regex_constants::escape_type_not_class, /*W*/ + regex_constants::escape_type_X, /*X*/ + regex_constants::escape_type_not_class, /*Y*/ + regex_constants::escape_type_Z, /*Z*/ + regex_constants::escape_type_identity, /*[*/ + regex_constants::escape_type_identity, /*\*/ + regex_constants::escape_type_identity, /*]*/ + regex_constants::escape_type_identity, /*^*/ + regex_constants::escape_type_identity, /*_*/ + regex_constants::escape_type_start_buffer, /*`*/ + regex_constants::escape_type_control_a, /*a*/ + regex_constants::escape_type_word_assert, /*b*/ + regex_constants::escape_type_ascii_control, /*c*/ + regex_constants::escape_type_class, /*d*/ + regex_constants::escape_type_e, /*e*/ + regex_constants::escape_type_control_f, /*f*/ + regex_constants::escape_type_extended_backref, /*g*/ + regex_constants::escape_type_class, /*h*/ + regex_constants::escape_type_class, /*i*/ + regex_constants::escape_type_class, /*j*/ + regex_constants::escape_type_extended_backref, /*k*/ + regex_constants::escape_type_class, /*l*/ + regex_constants::escape_type_class, /*m*/ + regex_constants::escape_type_control_n, /*n*/ + regex_constants::escape_type_class, /*o*/ + regex_constants::escape_type_property, /*p*/ + regex_constants::escape_type_class, /*q*/ + regex_constants::escape_type_control_r, /*r*/ + regex_constants::escape_type_class, /*s*/ + regex_constants::escape_type_control_t, /*t*/ + regex_constants::escape_type_class, /*u*/ + regex_constants::escape_type_control_v, /*v*/ + regex_constants::escape_type_class, /*w*/ + regex_constants::escape_type_hex, /*x*/ + regex_constants::escape_type_class, /*y*/ + regex_constants::escape_type_end_buffer, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::escape_type_identity, /*~*/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + }; + + return char_syntax[(unsigned char)c]; +} // is charT c a combining character? -BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); +inline bool BOOST_REGEX_CALL is_combining_implementation(boost::uint_least16_t c) +{ + const boost::uint_least16_t combining_ranges[] = { 0x0300, 0x0361, + 0x0483, 0x0486, + 0x0903, 0x0903, + 0x093E, 0x0940, + 0x0949, 0x094C, + 0x0982, 0x0983, + 0x09BE, 0x09C0, + 0x09C7, 0x09CC, + 0x09D7, 0x09D7, + 0x0A3E, 0x0A40, + 0x0A83, 0x0A83, + 0x0ABE, 0x0AC0, + 0x0AC9, 0x0ACC, + 0x0B02, 0x0B03, + 0x0B3E, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B4C, + 0x0B57, 0x0B57, + 0x0B83, 0x0B83, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0C01, 0x0C03, + 0x0C41, 0x0C44, + 0x0C82, 0x0C83, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0D02, 0x0D03, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D4C, + 0x0D57, 0x0D57, + 0x0F7F, 0x0F7F, + 0x20D0, 0x20E1, + 0x3099, 0x309A, + 0xFE20, 0xFE23, + 0xffff, 0xffff, }; + + const boost::uint_least16_t* p = combining_ranges + 1; + while (*p < c) p += 2; + --p; + if ((c >= *p) && (c <= *(p + 1))) + return true; + return false; +} template inline bool is_combining(charT c) @@ -138,7 +672,77 @@ inline bool is_separator(char c) // // get a default collating element: // -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); +inline std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name) +{ + // + // these are the POSIX collating names: + // + static const char* def_coll_names[] = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", + "vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", + "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", + "quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", + "left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", + "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", + "question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", + "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", + "right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", + "vertical-line", "right-curly-bracket", "tilde", "DEL", "", + }; + + // these multi-character collating elements + // should keep most Western-European locales + // happy - we should really localise these a + // little more - but this will have to do for + // now: + + static const char* def_multi_coll[] = { + "ae", + "Ae", + "AE", + "ch", + "Ch", + "CH", + "ll", + "Ll", + "LL", + "ss", + "Ss", + "SS", + "nj", + "Nj", + "NJ", + "dz", + "Dz", + "DZ", + "lj", + "Lj", + "LJ", + "", + }; + + unsigned int i = 0; + while (*def_coll_names[i]) + { + if (def_coll_names[i] == name) + { + return std::string(1, char(i)); + } + ++i; + } + i = 0; + while (*def_multi_coll[i]) + { + if (def_multi_coll[i] == name) + { + return def_multi_coll[i]; + } + ++i; + } + return std::string(); +} // // get the state_id of a character classification, the individual @@ -255,15 +859,25 @@ inline charT BOOST_REGEX_CALL global_upper(charT c) return c; } -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); +inline char BOOST_REGEX_CALL do_global_lower(char c) +{ + return static_cast((std::tolower)((unsigned char)c)); +} + +inline char BOOST_REGEX_CALL do_global_upper(char c) +{ + return static_cast((std::toupper)((unsigned char)c)); +} #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); -#endif -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); +inline wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c) +{ + return (std::towlower)(c); +} + +inline wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c) +{ + return (std::towupper)(c); +} #endif // // This sucks: declare template specialisations of global_lower/global_upper @@ -275,15 +889,11 @@ BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short // otherwise the "local template instantiation" compiler option can pick // the wrong instantiation when linking: // -template<> inline char BOOST_REGEX_CALL global_lower(char c){ return do_global_lower(c); } -template<> inline char BOOST_REGEX_CALL global_upper(char c){ return do_global_upper(c); } +template<> inline char BOOST_REGEX_CALL global_lower(char c) { return do_global_lower(c); } +template<> inline char BOOST_REGEX_CALL global_upper(char c) { return do_global_upper(c); } #ifndef BOOST_NO_WREGEX -template<> inline wchar_t BOOST_REGEX_CALL global_lower(wchar_t c){ return do_global_lower(c); } -template<> inline wchar_t BOOST_REGEX_CALL global_upper(wchar_t c){ return do_global_upper(c); } -#endif -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -template<> inline unsigned short BOOST_REGEX_CALL global_lower(unsigned short c){ return do_global_lower(c); } -template<> inline unsigned short BOOST_REGEX_CALL global_upper(unsigned short c){ return do_global_upper(c); } +template<> inline wchar_t BOOST_REGEX_CALL global_lower(wchar_t c) { return do_global_lower(c); } +template<> inline wchar_t BOOST_REGEX_CALL global_upper(wchar_t c) { return do_global_upper(c); } #endif template diff --git a/src/regex_traits_defaults.cpp b/src/regex_traits_defaults.cpp deleted file mode 100644 index 5ac46d2d..00000000 --- a/src/regex_traits_defaults.cpp +++ /dev/null @@ -1,692 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE regex_traits_defaults.cpp - * VERSION see - * DESCRIPTION: Declares API's for access to regex_traits default properties. - */ - -#define BOOST_REGEX_SOURCE -#include - -#include -#ifndef BOOST_NO_WREGEX -#include -#endif - -#if defined(BOOST_NO_STDC_NAMESPACE) -namespace std{ - using ::tolower; - using ::toupper; -#ifndef BOOST_NO_WREGEX - using ::towlower; - using ::towupper; -#endif -} -#endif - - -namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ - -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n) -{ - // if the user hasn't supplied a message catalog, then this supplies - // default "messages" for us to load in the range 1-100. - const char* messages[] = { - "", - "(", - ")", - "$", - "^", - ".", - "*", - "+", - "?", - "[", - "]", - "|", - "\\", - "#", - "-", - "{", - "}", - "0123456789", - "b", - "B", - "<", - ">", - "", - "", - "A`", - "z'", - "\n", - ",", - "a", - "f", - "n", - "r", - "t", - "v", - "x", - "c", - ":", - "=", - "e", - "", - "", - "", - "", - "", - "", - "", - "", - "E", - "Q", - "X", - "C", - "Z", - "G", - "!", - "p", - "P", - "N", - "gk", - "K", - "R", - }; - - return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]); -} - -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n) -{ - static const char* const s_default_error_messages[] = { - "Success", /* REG_NOERROR 0 error_ok */ - "No match", /* REG_NOMATCH 1 error_no_match */ - "Invalid regular expression.", /* REG_BADPAT 2 error_bad_pattern */ - "Invalid collation character.", /* REG_ECOLLATE 3 error_collate */ - "Invalid character class name, collating name, or character range.", /* REG_ECTYPE 4 error_ctype */ - "Invalid or unterminated escape sequence.", /* REG_EESCAPE 5 error_escape */ - "Invalid back reference: specified capturing group does not exist.", /* REG_ESUBREG 6 error_backref */ - "Unmatched [ or [^ in character class declaration.", /* REG_EBRACK 7 error_brack */ - "Unmatched marking parenthesis ( or \\(.", /* REG_EPAREN 8 error_paren */ - "Unmatched quantified repeat operator { or \\{.", /* REG_EBRACE 9 error_brace */ - "Invalid content of repeat range.", /* REG_BADBR 10 error_badbrace */ - "Invalid range end in character class", /* REG_ERANGE 11 error_range */ - "Out of memory.", /* REG_ESPACE 12 error_space NOT USED */ - "Invalid preceding regular expression prior to repetition operator.", /* REG_BADRPT 13 error_badrepeat */ - "Premature end of regular expression", /* REG_EEND 14 error_end NOT USED */ - "Regular expression is too large.", /* REG_ESIZE 15 error_size NOT USED */ - "Unmatched ) or \\)", /* REG_ERPAREN 16 error_right_paren NOT USED */ - "Empty regular expression.", /* REG_EMPTY 17 error_empty */ - "The complexity of matching the regular expression exceeded predefined bounds. " - "Try refactoring the regular expression to make each choice made by the state machine unambiguous. " - "This exception is thrown to prevent \"eternal\" matches that take an " - "indefinite period time to locate.", /* REG_ECOMPLEXITY 18 error_complexity */ - "Ran out of stack space trying to match the regular expression.", /* REG_ESTACK 19 error_stack */ - "Invalid or unterminated Perl (?...) sequence.", /* REG_E_PERL 20 error_perl */ - "Unknown error.", /* REG_E_UNKNOWN 21 error_unknown */ - }; - - return (n > ::boost::regex_constants::error_unknown) ? s_default_error_messages[ ::boost::regex_constants::error_unknown] : s_default_error_messages[n]; -} - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(boost::uint_least16_t c) -{ - const boost::uint_least16_t combining_ranges[] = { 0x0300, 0x0361, - 0x0483, 0x0486, - 0x0903, 0x0903, - 0x093E, 0x0940, - 0x0949, 0x094C, - 0x0982, 0x0983, - 0x09BE, 0x09C0, - 0x09C7, 0x09CC, - 0x09D7, 0x09D7, - 0x0A3E, 0x0A40, - 0x0A83, 0x0A83, - 0x0ABE, 0x0AC0, - 0x0AC9, 0x0ACC, - 0x0B02, 0x0B03, - 0x0B3E, 0x0B3E, - 0x0B40, 0x0B40, - 0x0B47, 0x0B4C, - 0x0B57, 0x0B57, - 0x0B83, 0x0B83, - 0x0BBE, 0x0BBF, - 0x0BC1, 0x0BCC, - 0x0BD7, 0x0BD7, - 0x0C01, 0x0C03, - 0x0C41, 0x0C44, - 0x0C82, 0x0C83, - 0x0CBE, 0x0CBE, - 0x0CC0, 0x0CC4, - 0x0CC7, 0x0CCB, - 0x0CD5, 0x0CD6, - 0x0D02, 0x0D03, - 0x0D3E, 0x0D40, - 0x0D46, 0x0D4C, - 0x0D57, 0x0D57, - 0x0F7F, 0x0F7F, - 0x20D0, 0x20E1, - 0x3099, 0x309A, - 0xFE20, 0xFE23, - 0xffff, 0xffff, }; - - const boost::uint_least16_t* p = combining_ranges + 1; - while(*p < c) p += 2; - --p; - if((c >= *p) && (c <= *(p+1))) - return true; - return false; -} - -// -// these are the POSIX collating names: -// -static const char* def_coll_names[] = { -"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", -"vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", -"SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", -"quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", -"left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", -"period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", -"colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", -"question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", -"Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", -"right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", -"g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", -"vertical-line", "right-curly-bracket", "tilde", "DEL", "", -}; - -// these multi-character collating elements -// should keep most Western-European locales -// happy - we should really localise these a -// little more - but this will have to do for -// now: - -static const char* def_multi_coll[] = { - "ae", - "Ae", - "AE", - "ch", - "Ch", - "CH", - "ll", - "Ll", - "LL", - "ss", - "Ss", - "SS", - "nj", - "Nj", - "NJ", - "dz", - "Dz", - "DZ", - "lj", - "Lj", - "LJ", - "", -}; - - - -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name) -{ - unsigned int i = 0; - while(*def_coll_names[i]) - { - if(def_coll_names[i] == name) - { - return std::string(1, char(i)); - } - ++i; - } - i = 0; - while(*def_multi_coll[i]) - { - if(def_multi_coll[i] == name) - { - return def_multi_coll[i]; - } - ++i; - } - return std::string(); -} - -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c) -{ - return static_cast((std::tolower)((unsigned char)c)); -} - -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c) -{ - return static_cast((std::toupper)((unsigned char)c)); -} -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c) -{ - return (std::towlower)(c); -} - -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c) -{ - return (std::towupper)(c); -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c) -{ - return (std::towlower)(c); -} - -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c) -{ - return (std::towupper)(c); -} -#endif - -#endif - -BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c) -{ - // - // char_syntax determines how the compiler treats a given character - // in a regular expression. - // - static regex_constants::escape_syntax_type char_syntax[] = { - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /* */ // 32 - regex_constants::escape_type_identity, /*!*/ - regex_constants::escape_type_identity, /*"*/ - regex_constants::escape_type_identity, /*#*/ - regex_constants::escape_type_identity, /*$*/ - regex_constants::escape_type_identity, /*%*/ - regex_constants::escape_type_identity, /*&*/ - regex_constants::escape_type_end_buffer, /*'*/ - regex_constants::syntax_open_mark, /*(*/ - regex_constants::syntax_close_mark, /*)*/ - regex_constants::escape_type_identity, /***/ - regex_constants::syntax_plus, /*+*/ - regex_constants::escape_type_identity, /*,*/ - regex_constants::escape_type_identity, /*-*/ - regex_constants::escape_type_identity, /*.*/ - regex_constants::escape_type_identity, /*/*/ - regex_constants::escape_type_decimal, /*0*/ - regex_constants::escape_type_backref, /*1*/ - regex_constants::escape_type_backref, /*2*/ - regex_constants::escape_type_backref, /*3*/ - regex_constants::escape_type_backref, /*4*/ - regex_constants::escape_type_backref, /*5*/ - regex_constants::escape_type_backref, /*6*/ - regex_constants::escape_type_backref, /*7*/ - regex_constants::escape_type_backref, /*8*/ - regex_constants::escape_type_backref, /*9*/ - regex_constants::escape_type_identity, /*:*/ - regex_constants::escape_type_identity, /*;*/ - regex_constants::escape_type_left_word, /*<*/ - regex_constants::escape_type_identity, /*=*/ - regex_constants::escape_type_right_word, /*>*/ - regex_constants::syntax_question, /*?*/ - regex_constants::escape_type_identity, /*@*/ - regex_constants::escape_type_start_buffer, /*A*/ - regex_constants::escape_type_not_word_assert, /*B*/ - regex_constants::escape_type_C, /*C*/ - regex_constants::escape_type_not_class, /*D*/ - regex_constants::escape_type_E, /*E*/ - regex_constants::escape_type_not_class, /*F*/ - regex_constants::escape_type_G, /*G*/ - regex_constants::escape_type_not_class, /*H*/ - regex_constants::escape_type_not_class, /*I*/ - regex_constants::escape_type_not_class, /*J*/ - regex_constants::escape_type_reset_start_mark, /*K*/ - regex_constants::escape_type_not_class, /*L*/ - regex_constants::escape_type_not_class, /*M*/ - regex_constants::escape_type_named_char, /*N*/ - regex_constants::escape_type_not_class, /*O*/ - regex_constants::escape_type_not_property, /*P*/ - regex_constants::escape_type_Q, /*Q*/ - regex_constants::escape_type_line_ending, /*R*/ - regex_constants::escape_type_not_class, /*S*/ - regex_constants::escape_type_not_class, /*T*/ - regex_constants::escape_type_not_class, /*U*/ - regex_constants::escape_type_not_class, /*V*/ - regex_constants::escape_type_not_class, /*W*/ - regex_constants::escape_type_X, /*X*/ - regex_constants::escape_type_not_class, /*Y*/ - regex_constants::escape_type_Z, /*Z*/ - regex_constants::escape_type_identity, /*[*/ - regex_constants::escape_type_identity, /*\*/ - regex_constants::escape_type_identity, /*]*/ - regex_constants::escape_type_identity, /*^*/ - regex_constants::escape_type_identity, /*_*/ - regex_constants::escape_type_start_buffer, /*`*/ - regex_constants::escape_type_control_a, /*a*/ - regex_constants::escape_type_word_assert, /*b*/ - regex_constants::escape_type_ascii_control, /*c*/ - regex_constants::escape_type_class, /*d*/ - regex_constants::escape_type_e, /*e*/ - regex_constants::escape_type_control_f, /*f*/ - regex_constants::escape_type_extended_backref, /*g*/ - regex_constants::escape_type_class, /*h*/ - regex_constants::escape_type_class, /*i*/ - regex_constants::escape_type_class, /*j*/ - regex_constants::escape_type_extended_backref, /*k*/ - regex_constants::escape_type_class, /*l*/ - regex_constants::escape_type_class, /*m*/ - regex_constants::escape_type_control_n, /*n*/ - regex_constants::escape_type_class, /*o*/ - regex_constants::escape_type_property, /*p*/ - regex_constants::escape_type_class, /*q*/ - regex_constants::escape_type_control_r, /*r*/ - regex_constants::escape_type_class, /*s*/ - regex_constants::escape_type_control_t, /*t*/ - regex_constants::escape_type_class, /*u*/ - regex_constants::escape_type_control_v, /*v*/ - regex_constants::escape_type_class, /*w*/ - regex_constants::escape_type_hex, /*x*/ - regex_constants::escape_type_class, /*y*/ - regex_constants::escape_type_end_buffer, /*z*/ - regex_constants::syntax_open_brace, /*{*/ - regex_constants::syntax_or, /*|*/ - regex_constants::syntax_close_brace, /*}*/ - regex_constants::escape_type_identity, /*~*/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - }; - - return char_syntax[(unsigned char)c]; -} - -BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c) -{ - // - // char_syntax determines how the compiler treats a given character - // in a regular expression. - // - static regex_constants::syntax_type char_syntax[] = { - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_newline, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /* */ // 32 - regex_constants::syntax_not, /*!*/ - regex_constants::syntax_char, /*"*/ - regex_constants::syntax_hash, /*#*/ - regex_constants::syntax_dollar, /*$*/ - regex_constants::syntax_char, /*%*/ - regex_constants::syntax_char, /*&*/ - regex_constants::escape_type_end_buffer, /*'*/ - regex_constants::syntax_open_mark, /*(*/ - regex_constants::syntax_close_mark, /*)*/ - regex_constants::syntax_star, /***/ - regex_constants::syntax_plus, /*+*/ - regex_constants::syntax_comma, /*,*/ - regex_constants::syntax_dash, /*-*/ - regex_constants::syntax_dot, /*.*/ - regex_constants::syntax_char, /*/*/ - regex_constants::syntax_digit, /*0*/ - regex_constants::syntax_digit, /*1*/ - regex_constants::syntax_digit, /*2*/ - regex_constants::syntax_digit, /*3*/ - regex_constants::syntax_digit, /*4*/ - regex_constants::syntax_digit, /*5*/ - regex_constants::syntax_digit, /*6*/ - regex_constants::syntax_digit, /*7*/ - regex_constants::syntax_digit, /*8*/ - regex_constants::syntax_digit, /*9*/ - regex_constants::syntax_colon, /*:*/ - regex_constants::syntax_char, /*;*/ - regex_constants::escape_type_left_word, /*<*/ - regex_constants::syntax_equal, /*=*/ - regex_constants::escape_type_right_word, /*>*/ - regex_constants::syntax_question, /*?*/ - regex_constants::syntax_char, /*@*/ - regex_constants::syntax_char, /*A*/ - regex_constants::syntax_char, /*B*/ - regex_constants::syntax_char, /*C*/ - regex_constants::syntax_char, /*D*/ - regex_constants::syntax_char, /*E*/ - regex_constants::syntax_char, /*F*/ - regex_constants::syntax_char, /*G*/ - regex_constants::syntax_char, /*H*/ - regex_constants::syntax_char, /*I*/ - regex_constants::syntax_char, /*J*/ - regex_constants::syntax_char, /*K*/ - regex_constants::syntax_char, /*L*/ - regex_constants::syntax_char, /*M*/ - regex_constants::syntax_char, /*N*/ - regex_constants::syntax_char, /*O*/ - regex_constants::syntax_char, /*P*/ - regex_constants::syntax_char, /*Q*/ - regex_constants::syntax_char, /*R*/ - regex_constants::syntax_char, /*S*/ - regex_constants::syntax_char, /*T*/ - regex_constants::syntax_char, /*U*/ - regex_constants::syntax_char, /*V*/ - regex_constants::syntax_char, /*W*/ - regex_constants::syntax_char, /*X*/ - regex_constants::syntax_char, /*Y*/ - regex_constants::syntax_char, /*Z*/ - regex_constants::syntax_open_set, /*[*/ - regex_constants::syntax_escape, /*\*/ - regex_constants::syntax_close_set, /*]*/ - regex_constants::syntax_caret, /*^*/ - regex_constants::syntax_char, /*_*/ - regex_constants::syntax_char, /*`*/ - regex_constants::syntax_char, /*a*/ - regex_constants::syntax_char, /*b*/ - regex_constants::syntax_char, /*c*/ - regex_constants::syntax_char, /*d*/ - regex_constants::syntax_char, /*e*/ - regex_constants::syntax_char, /*f*/ - regex_constants::syntax_char, /*g*/ - regex_constants::syntax_char, /*h*/ - regex_constants::syntax_char, /*i*/ - regex_constants::syntax_char, /*j*/ - regex_constants::syntax_char, /*k*/ - regex_constants::syntax_char, /*l*/ - regex_constants::syntax_char, /*m*/ - regex_constants::syntax_char, /*n*/ - regex_constants::syntax_char, /*o*/ - regex_constants::syntax_char, /*p*/ - regex_constants::syntax_char, /*q*/ - regex_constants::syntax_char, /*r*/ - regex_constants::syntax_char, /*s*/ - regex_constants::syntax_char, /*t*/ - regex_constants::syntax_char, /*u*/ - regex_constants::syntax_char, /*v*/ - regex_constants::syntax_char, /*w*/ - regex_constants::syntax_char, /*x*/ - regex_constants::syntax_char, /*y*/ - regex_constants::syntax_char, /*z*/ - regex_constants::syntax_open_brace, /*{*/ - regex_constants::syntax_or, /*|*/ - regex_constants::syntax_close_brace, /*}*/ - regex_constants::syntax_char, /*~*/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - regex_constants::syntax_char, /**/ - }; - - return char_syntax[(unsigned char)c]; -} - - -} // BOOST_REGEX_DETAIL_NS -} // boost diff --git a/src/w32_regex_traits.cpp b/src/w32_regex_traits.cpp deleted file mode 100644 index 5b5236aa..00000000 --- a/src/w32_regex_traits.cpp +++ /dev/null @@ -1,654 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE w32_regex_traits.cpp - * VERSION see - * DESCRIPTION: Implements w32_regex_traits (and associated helper classes). - */ - -#define BOOST_REGEX_SOURCE -#include - -#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) && !defined(BOOST_REGEX_NO_WIN32_LOCALE) -#include -#include - -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -#ifndef NOMINMAX -# define NOMINMAX -#endif -#define NOGDI -#include - -#if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE) -#pragma comment(lib, "user32.lib") -#endif - -#ifdef BOOST_NO_STDC_NAMESPACE -namespace std{ - using ::memset; -} -#endif - -namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ - -#ifdef BOOST_NO_ANSI_APIS -UINT get_code_page_for_locale_id(lcid_type idx) -{ - WCHAR code_page_string[7]; - if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0) - return 0; - - return static_cast(_wtol(code_page_string)); -} -#endif - - -void w32_regex_traits_char_layer::init() -{ - // we need to start by initialising our syntax map so we know which - // character is used for which purpose: - std::memset(m_char_map, 0, sizeof(m_char_map)); - cat_type cat; - std::string cat_name(w32_regex_traits::get_catalog_name()); - if(cat_name.size()) - { - cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); - if(!cat) - { - std::string m("Unable to open message catalog: "); - std::runtime_error err(m + cat_name); - ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); - } - } - // - // if we have a valid catalog then load our messages: - // - if(cat) - { - for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) - { - string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i)); - for(string_type::size_type j = 0; j < mss.size(); ++j) - { - m_char_map[static_cast(mss[j])] = i; - } - } - } - else - { - for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) - { - const char* ptr = get_default_syntax(i); - while(ptr && *ptr) - { - m_char_map[static_cast(*ptr)] = i; - ++ptr; - } - } - } - // - // finish off by calculating our escape types: - // - unsigned char i = 'A'; - do - { - if(m_char_map[i] == 0) - { - if(::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0002u, (char)i)) - m_char_map[i] = regex_constants::escape_type_class; - else if(::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0001u, (char)i)) - m_char_map[i] = regex_constants::escape_type_not_class; - } - }while(0xFF != i++); - - // - // fill in lower case map: - // - char char_map[1 << CHAR_BIT]; - for(int ii = 0; ii < (1 << CHAR_BIT); ++ii) - char_map[ii] = static_cast(ii); -#ifndef BOOST_NO_ANSI_APIS - int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT); - BOOST_ASSERT(r != 0); -#else - UINT code_page = get_code_page_for_locale_id(this->m_locale); - BOOST_ASSERT(code_page != 0); - - WCHAR wide_char_map[1 << CHAR_BIT]; - int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT); - BOOST_ASSERT(conv_r != 0); - - WCHAR wide_lower_map[1 << CHAR_BIT]; - int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT); - BOOST_ASSERT(r != 0); - - conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL); - BOOST_ASSERT(conv_r != 0); -#endif - if(r < (1 << CHAR_BIT)) - { - // if we have multibyte characters then not all may have been given - // a lower case mapping: - for(int jj = r; jj < (1 << CHAR_BIT); ++jj) - this->m_lower_map[jj] = static_cast(jj); - } - -#ifndef BOOST_NO_ANSI_APIS - r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map); -#else - r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map); -#endif - BOOST_ASSERT(0 != r); -} - -BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale() -{ - return ::GetUserDefaultLCID(); -} - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char c, lcid_type idx) -{ -#ifndef BOOST_NO_ANSI_APIS - WORD mask; - if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) - return true; - return false; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if (code_page == 0) - return false; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return false; - - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER)) - return true; - return false; -#endif -} - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t c, lcid_type idx) -{ - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) - return true; - return false; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type idx) -{ - WORD mask; - wchar_t c = ca; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) - return true; - return false; -} -#endif - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char c, lcid_type idx) -{ -#ifndef BOOST_NO_ANSI_APIS - WORD mask; - if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) - return true; - return false; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if (code_page == 0) - return false; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return false; - - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER)) - return true; - return false; -#endif -} - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t c, lcid_type idx) -{ - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) - return true; - return false; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type idx) -{ - WORD mask; - wchar_t c = ca; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) - return true; - return false; -} -#endif - -void free_module(void* mod) -{ - ::FreeLibrary(static_cast(mod)); -} - -BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name) -{ -#ifndef BOOST_NO_ANSI_APIS - cat_type result(::LoadLibraryA(name.c_str()), &free_module); - return result; -#else - LPWSTR wide_name = (LPWSTR)_alloca( (name.size() + 1) * sizeof(WCHAR) ); - if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0) - return cat_type(); - - cat_type result(::LoadLibraryW(wide_name), &free_module); - return result; -#endif -} - -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def) -{ -#ifndef BOOST_NO_ANSI_APIS - char buf[256]; - if(0 == ::LoadStringA( - static_cast(cat.get()), - i, - buf, - 256 - )) - { - return def; - } -#else - WCHAR wbuf[256]; - int r = ::LoadStringW( - static_cast(cat.get()), - i, - wbuf, - 256 - ); - if (r == 0) - return def; - - - int buf_size = 1 + ::WideCharToMultiByte(CP_ACP, 0, wbuf, r, NULL, 0, NULL, NULL); - LPSTR buf = (LPSTR)_alloca(buf_size); - if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, buf_size, NULL, NULL) == 0) - return def; // failed conversion. -#endif - return std::string(buf); -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def) -{ - wchar_t buf[256]; - if(0 == ::LoadStringW( - static_cast(cat.get()), - i, - buf, - 256 - )) - { - return def; - } - return std::wstring(buf); -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string& def) -{ - unsigned short buf[256]; - if(0 == ::LoadStringW( - static_cast(cat.get()), - i, - (LPWSTR)buf, - 256 - )) - { - return def; - } - return std::basic_string(buf); -} -#endif -#endif -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type idx, const char* p1, const char* p2) -{ -#ifndef BOOST_NO_ANSI_APIS - int bytes = ::LCMapStringA( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - p1, // source string - static_cast(p2 - p1), // number of characters in source string - 0, // destination buffer - 0 // size of destination buffer - ); - if(!bytes) - return std::string(p1, p2); - std::string result(++bytes, '\0'); - bytes = ::LCMapStringA( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - p1, // source string - static_cast(p2 - p1), // number of characters in source string - &*result.begin(), // destination buffer - bytes // size of destination buffer - ); -#else - UINT code_page = get_code_page_for_locale_id(idx); - if(code_page == 0) - return std::string(p1, p2); - - int src_len = static_cast(p2 - p1); - LPWSTR wide_p1 = (LPWSTR)_alloca( (src_len + 1) * 2 ); - if(::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0) - return std::string(p1, p2); - - int bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - wide_p1, // source string - src_len, // number of characters in source string - 0, // destination buffer - 0 // size of destination buffer - ); - if(!bytes) - return std::string(p1, p2); - std::string result(++bytes, '\0'); - bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - wide_p1, // source string - src_len, // number of characters in source string - (LPWSTR)&*result.begin(), // destination buffer - bytes // size of destination buffer - ); -#endif - if(bytes > static_cast(result.size())) - return std::string(p1, p2); - while(result.size() && result[result.size()-1] == '\0') - { - result.erase(result.size()-1); - } - return result; -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2) -{ - int bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - p1, // source string - static_cast(p2 - p1), // number of characters in source string - 0, // destination buffer - 0 // size of destination buffer - ); - if(!bytes) - return std::wstring(p1, p2); - std::string result(++bytes, '\0'); - bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - p1, // source string - static_cast(p2 - p1), // number of characters in source string - reinterpret_cast(&*result.begin()), // destination buffer *of bytes* - bytes // size of destination buffer - ); - if(bytes > static_cast(result.size())) - return std::wstring(p1, p2); - while(result.size() && result[result.size()-1] == L'\0') - { - result.erase(result.size()-1); - } - std::wstring r2; - for(std::string::size_type i = 0; i < result.size(); ++i) - r2.append(1, static_cast(static_cast(result[i]))); - return r2; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_transform(lcid_type idx, const unsigned short* p1, const unsigned short* p2) -{ - int bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - (LPCWSTR)p1, // source string - static_cast(p2 - p1), // number of characters in source string - 0, // destination buffer - 0 // size of destination buffer - ); - if(!bytes) - return std::basic_string(p1, p2); - std::string result(++bytes, '\0'); - bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - (LPCWSTR)p1, // source string - static_cast(p2 - p1), // number of characters in source string - reinterpret_cast(&*result.begin()), // destination buffer *of bytes* - bytes // size of destination buffer - ); - if(bytes > static_cast(result.size())) - return std::basic_string(p1, p2); - while(result.size() && result[result.size()-1] == L'\0') - { - result.erase(result.size()-1); - } - std::basic_string r2; - for(std::string::size_type i = 0; i < result.size(); ++i) - r2.append(1, static_cast(static_cast(result[i]))); - return r2; -} -#endif -#endif -BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type idx) -{ - char result[2]; -#ifndef BOOST_NO_ANSI_APIS - int b = ::LCMapStringA( - idx, // locale identifier - LCMAP_LOWERCASE, // mapping transformation type - &c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if (code_page == 0) - return c; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return c; - - WCHAR wide_result; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_LOWERCASE, // mapping transformation type - &wide_c, // source string - 1, // number of characters in source string - &wide_result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - - if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) - return c; // No single byte lower case equivalent available -#endif - return result[0]; -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type idx) -{ - wchar_t result[2]; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_LOWERCASE, // mapping transformation type - &c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - return result[0]; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type idx) -{ - wchar_t result[2]; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_LOWERCASE, // mapping transformation type - (wchar_t const*)&c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - return result[0]; -} -#endif -#endif -BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type idx) -{ - char result[2]; -#ifndef BOOST_NO_ANSI_APIS - int b = ::LCMapStringA( - idx, // locale identifier - LCMAP_UPPERCASE, // mapping transformation type - &c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if(code_page == 0) - return c; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return c; - - WCHAR wide_result; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_UPPERCASE, // mapping transformation type - &wide_c, // source string - 1, // number of characters in source string - &wide_result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - - if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) - return c; // No single byte upper case equivalent available. -#endif - return result[0]; -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type idx) -{ - wchar_t result[2]; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_UPPERCASE, // mapping transformation type - &c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - return result[0]; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_toupper(unsigned short c, lcid_type idx) -{ - wchar_t result[2]; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_UPPERCASE, // mapping transformation type - (wchar_t const*)&c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - return result[0]; -} -#endif -#endif -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, char c) -{ - WORD mask; -#ifndef BOOST_NO_ANSI_APIS - if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) - return true; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if(code_page == 0) - return false; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return false; - - if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) - return true; -#endif - if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) - return true; - return false; -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, wchar_t c) -{ - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) - return true; - if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) - return true; - if((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) - return true; - return false; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, unsigned short c) -{ - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, (wchar_t const*)&c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) - return true; - if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) - return true; - if((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) - return true; - return false; -} -#endif -#endif - -} // BOOST_REGEX_DETAIL_NS -} // boost - -#endif - diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 4668c98d..c4a25767 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -66,7 +66,6 @@ lib boost_regex_recursive : ../src/regex.cpp ../src/regex_debug.cpp ../src/regex_raw_buffer.cpp - ../src/regex_traits_defaults.cpp ../src/static_mutex.cpp ../src/wide_posix_api.cpp ../build//icu_options diff --git a/test/captures/Jamfile.v2 b/test/captures/Jamfile.v2 index 0f843cff..6fe05dca 100644 --- a/test/captures/Jamfile.v2 +++ b/test/captures/Jamfile.v2 @@ -13,7 +13,6 @@ EX_SOURCES = regex.cpp regex_debug.cpp regex_raw_buffer.cpp - regex_traits_defaults.cpp static_mutex.cpp wide_posix_api.cpp ; diff --git a/test/noeh_test/Jamfile.v2 b/test/noeh_test/Jamfile.v2 index 96561b52..b03bc383 100644 --- a/test/noeh_test/Jamfile.v2 +++ b/test/noeh_test/Jamfile.v2 @@ -29,7 +29,6 @@ lib boost_regex_noeh : ../../src/regex.cpp ../../src/regex_debug.cpp ../../src/regex_raw_buffer.cpp - ../../src/regex_traits_defaults.cpp ../../src/static_mutex.cpp ../../src/wide_posix_api.cpp ../../build//icu_options diff --git a/test/test_consolidated.cpp b/test/test_consolidated.cpp index e30c42fe..baeba3c9 100644 --- a/test/test_consolidated.cpp +++ b/test/test_consolidated.cpp @@ -15,6 +15,5 @@ #include #include #include -#include #include #include