mirror of
https://github.com/boostorg/regex.git
synced 2025-07-29 20:17:24 +02:00
Merge branch 'develop'
This commit is contained in:
@ -122,6 +122,9 @@ test-suite regex
|
||||
[ run unicode/unicode_iterator_test.cpp : : :
|
||||
[ check-target-builds ../build//is_legacy_03 : : <source>../build//boost_regex ]
|
||||
release <define>TEST_UTF16 : unicode_iterator_test_utf16 ]
|
||||
[ run unicode/unicode_casefold_test.cpp
|
||||
../build//boost_regex ../build//icu_options
|
||||
]
|
||||
[ run static_mutex/static_mutex_test.cpp
|
||||
../../thread/build//boost_thread ../build//boost_regex
|
||||
]
|
||||
@ -221,3 +224,10 @@ compile test_warnings.cpp
|
||||
<define>BOOST_REGEX_STANDALONE
|
||||
[ check-target-builds ../build//is_legacy_03 : : <build>no ]
|
||||
: test_warnings_standalone ;
|
||||
|
||||
compile test_windows_defs_1.cpp ;
|
||||
compile test_windows_defs_2.cpp ;
|
||||
compile test_windows_defs_3.cpp ;
|
||||
compile test_windows_defs_4.cpp ;
|
||||
|
||||
run issue153.cpp : : : <toolset>msvc:<linkflags>-STACK:2097152 ;
|
||||
|
@ -10,6 +10,7 @@ add_subdirectory(../.. boostorg/reegx)
|
||||
add_subdirectory(../../../config boostorg/config)
|
||||
add_subdirectory(../../../core boostorg/core)
|
||||
add_subdirectory(../../../assert boostorg/assert)
|
||||
add_subdirectory(../../../static_assert boostorg/static_assert)
|
||||
add_subdirectory(../../../throw_exception boostorg/throw_exception)
|
||||
add_subdirectory(../../../predef boostorg/predef)
|
||||
|
||||
|
22
test/cmake_subdir_test_icu/CMakeLists.txt
Normal file
22
test/cmake_subdir_test_icu/CMakeLists.txt
Normal file
@ -0,0 +1,22 @@
|
||||
# Copyright 2018, 2019 Peter Dimov
|
||||
# Distributed under the Boost Software License, Version 1.0.
|
||||
# See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt
|
||||
|
||||
cmake_minimum_required(VERSION 3.5...3.16)
|
||||
|
||||
project(cmake_subdir_test LANGUAGES CXX)
|
||||
|
||||
add_subdirectory(../.. boostorg/regex)
|
||||
add_subdirectory(../../../config boostorg/config)
|
||||
add_subdirectory(../../../core boostorg/core)
|
||||
add_subdirectory(../../../assert boostorg/assert)
|
||||
add_subdirectory(../../../throw_exception boostorg/throw_exception)
|
||||
add_subdirectory(../../../predef boostorg/predef)
|
||||
|
||||
add_executable(quick_icu ../quick_icu.cpp)
|
||||
target_link_libraries(quick_icu Boost::regex_icu)
|
||||
|
||||
enable_testing()
|
||||
add_test(quick_icu quick_icu)
|
||||
|
||||
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C $<CONFIG>)
|
33
test/issue153.cpp
Normal file
33
test/issue153.cpp
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2021
|
||||
* John Maddock
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
*/
|
||||
|
||||
//#define BOOST_REGEX_MAX_BLOCKS 10
|
||||
// See https://github.com/boostorg/regex/issues/153
|
||||
#include <iostream>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
try
|
||||
{
|
||||
boost::regex e("\x28\x28\x1f\x28\x28\x28\x3f\x31\x29\x8\x29\xf3\x29\x21\x3d\x29\x3f\x3f\xe4\x2e\x2b\x1f\x3f\x7c\x28\x3f\x21\x28\x28\x61\x3f\x3f\x28\x2a\x53\x4b\x49\x50\x29\x4\x3f\x2e\x2a\x3f\x28\x3f\x31\x29\x30\x77\x29\x29\x29\x49\x29\x61\x63\xbe\x45\x30\xa1\x5c\xfe\x5\xd2\x26\xc0\xf5\x17\x6c\xd4\xc3\x72\xe9\xb6\x74");
|
||||
if (boost::regex_match("\xb9\x32\x7c\xbc\x2d\xa0\xb\x85\xf3\xcf\x93\xa7\xd0\x44\x7b\x21\x12\x93\x6a\x7b\x72\x6d\x1e\x69\x56\x31\x37\x30\x31\x34\x31\x31\x38\x33\x34\x36\x30\x34\x36\x39\x32\x33\x31\x37\x33\x31\x36\x38\x37\x33\x30\x33\x37\x31\x35\x38\x38\x34\x31\x30\x35\x37\x32\x37\xba\x3e\x4\xc7\x27\xe9\xae\xf2\x01\x84\x47\x1f\xdc\xc9\x4c\xe5\xbc\xcf\x17\x31\x37\x30\x31\x34\x31\x31\x38\x33\x34\x36\x30\x34\x36\x39\x32\x33\x31\x37\x33\x31\x36\x38\x37\x33\x30\x33\x37\x31\x35\x38\x38\x34\x31\x2c\xd6\xf5\x42\xe4\x13\x15\xde\x7e\xa1\x84\x5a\x32\xf5\x67\xd5\x13\x9a\xd1\xa6\x99\x18\x23\xf7\x5c\xf6\x40\x80\x9c\x79\xbe\x4a\xc2\x54\x94\x93\xa3\x50\x27\xaf\xd4\xc4\x3b\xd3\x49\x95\xe7\xa9\xa0\xa5\x14\x81\xd2\x9a\x77\x92\xa8\x81\xb0\xf4\x5b\xa8\x9c\x3e\x17\x3b\xbd\x86\x26\x9a\x57\x56\x12\xce\x8c\x4a\xca\x68\x86\x3d\xf5\xba\x75\xab\xb1\x76\x2d\xd\xf1\xc\x24\x5e\xc5\x6d\xc8\xdf\xa6\x18\x86\x5e\x56", e, boost::regex_constants::match_default | boost::regex_constants::match_partial))
|
||||
{
|
||||
std::cout << "OK" << std::endl;
|
||||
}
|
||||
}
|
||||
catch (const boost::regex_error& e)
|
||||
{
|
||||
assert(e.code() == boost::regex_constants::error_complexity);
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
// See library home page at http://www.boost.org/libs/regex
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
#include <boost/core/lightweight_test.hpp>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
|
||||
bool validate_card_format(const std::string& s)
|
||||
@ -37,19 +37,19 @@ int main()
|
||||
{
|
||||
std::string s[ 4 ] = { "0000111122223333", "0000 1111 2222 3333", "0000-1111-2222-3333", "000-1111-2222-3333" };
|
||||
|
||||
BOOST_TEST( !validate_card_format( s[0] ) );
|
||||
BOOST_TEST_EQ( machine_readable_card_number( s[0] ), s[0] );
|
||||
BOOST_TEST_EQ( human_readable_card_number( s[0] ), s[2] );
|
||||
assert(!validate_card_format(s[0]));
|
||||
assert(machine_readable_card_number(s[0]) == s[0]);
|
||||
assert(human_readable_card_number(s[0]) == s[2]);
|
||||
|
||||
BOOST_TEST( validate_card_format( s[1] ) );
|
||||
BOOST_TEST_EQ( machine_readable_card_number( s[1] ), s[0] );
|
||||
BOOST_TEST_EQ( human_readable_card_number( s[1] ), s[2] );
|
||||
assert(validate_card_format(s[1]));
|
||||
assert(machine_readable_card_number(s[1]) == s[0]);
|
||||
assert(human_readable_card_number(s[1]) == s[2]);
|
||||
|
||||
BOOST_TEST( validate_card_format( s[2] ) );
|
||||
BOOST_TEST_EQ( machine_readable_card_number( s[2] ), s[0] );
|
||||
BOOST_TEST_EQ( human_readable_card_number( s[2] ), s[2] );
|
||||
assert(validate_card_format(s[2]));
|
||||
assert(machine_readable_card_number(s[2]) == s[0]);
|
||||
assert(human_readable_card_number(s[2]) == s[2]);
|
||||
|
||||
BOOST_TEST( !validate_card_format( s[3] ) );
|
||||
assert(!validate_card_format(s[3]));
|
||||
|
||||
return boost::report_errors();
|
||||
return 0;
|
||||
}
|
||||
|
55
test/quick_icu.cpp
Normal file
55
test/quick_icu.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
// Copyright 1998-2002 John Maddock
|
||||
// Copyright 2017 Peter Dimov
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
//
|
||||
// See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt
|
||||
|
||||
// See library home page at http://www.boost.org/libs/regex
|
||||
|
||||
#include <boost/regex/icu.hpp>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
|
||||
bool validate_card_format(const std::string& s)
|
||||
{
|
||||
static const boost::u32regex e = boost::make_u32regex("(\\d{4}[- ]){3}\\d{4}");
|
||||
return boost::u32regex_match(s, e);
|
||||
}
|
||||
|
||||
const boost::u32regex card_rx = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
|
||||
const std::string machine_format("\\1\\2\\3\\4");
|
||||
const std::string human_format("\\1-\\2-\\3-\\4");
|
||||
|
||||
std::string machine_readable_card_number(const std::string& s)
|
||||
{
|
||||
return boost::u32regex_replace(s, card_rx, machine_format, boost::match_default | boost::format_sed);
|
||||
}
|
||||
|
||||
std::string human_readable_card_number(const std::string& s)
|
||||
{
|
||||
return boost::u32regex_replace(s, card_rx, human_format, boost::match_default | boost::format_sed);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
std::string s[ 4 ] = { "0000111122223333", "0000 1111 2222 3333", "0000-1111-2222-3333", "000-1111-2222-3333" };
|
||||
|
||||
assert( !validate_card_format( s[0] ) );
|
||||
assert( machine_readable_card_number( s[0] ) == s[0] );
|
||||
assert( human_readable_card_number( s[0] ) == s[2] );
|
||||
|
||||
assert( validate_card_format( s[1] ) );
|
||||
assert( machine_readable_card_number( s[1] ) == s[0] );
|
||||
assert( human_readable_card_number( s[1] ) == s[2] );
|
||||
|
||||
assert( validate_card_format( s[2] ) );
|
||||
assert( machine_readable_card_number( s[2] ) == s[0] );
|
||||
assert( human_readable_card_number( s[2] ) == s[2] );
|
||||
|
||||
assert( !validate_card_format( s[3] ) );
|
||||
|
||||
return 0;
|
||||
}
|
@ -19,13 +19,19 @@ void test_backrefs()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
TEST_INVALID_REGEX("a(b)\\2c", perl);
|
||||
#ifdef BOOST_REGEX_CXX03
|
||||
TEST_INVALID_REGEX("a(b\\1)c", perl);
|
||||
#endif
|
||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^(.)\\1", perl, "abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc])\\1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\([bc]\\)\\1d", basic, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||
#ifndef BOOST_REGEX_CXX03
|
||||
TEST_REGEX_SEARCH("(\\2two|(one))+", perl, "oneonetwo", match_default, make_array(0, 9, 3, 9, 0, 3, -2, -2));
|
||||
TEST_INVALID_REGEX("(\\3two|(one))+", perl);
|
||||
#endif
|
||||
// strictly speaking this is at best ambiguous, at worst wrong, this is what most
|
||||
// re implimentations will match though.
|
||||
TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbccd", match_default, make_array(0, 6, 3, 5, 3, 4, -2, -2));
|
||||
@ -59,7 +65,9 @@ void test_backrefs()
|
||||
// Now test the \g version:
|
||||
//
|
||||
TEST_INVALID_REGEX("a(b)\\g2c", perl);
|
||||
#ifdef BOOST_REGEX_CXX03
|
||||
TEST_INVALID_REGEX("a(b\\g1)c", perl);
|
||||
#endif
|
||||
TEST_INVALID_REGEX("a(b\\g0)c", perl);
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||
@ -67,8 +75,14 @@ void test_backrefs()
|
||||
TEST_REGEX_SEARCH("^(.)\\g1", perl, "abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc])\\g1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||
TEST_INVALID_REGEX("a(b)\\g{2}c", perl);
|
||||
#ifdef BOOST_REGEX_CXX03
|
||||
TEST_INVALID_REGEX("a(b\\g{1})c", perl);
|
||||
#endif
|
||||
TEST_INVALID_REGEX("a(b\\g{0})c", perl);
|
||||
#ifndef BOOST_REGEX_CXX03
|
||||
TEST_REGEX_SEARCH("(\\g{2}two|(one))+", perl, "oneonetwo", match_default, make_array(0, 9, 3, 9, 0, 3, -2, -2));
|
||||
TEST_INVALID_REGEX("(\\g{3}two|(one))+", perl);
|
||||
#endif
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
||||
@ -76,7 +90,9 @@ void test_backrefs()
|
||||
TEST_REGEX_SEARCH("a([bc])\\g{1}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||
// And again but with negative indexes:
|
||||
TEST_INVALID_REGEX("a(b)\\g-2c", perl);
|
||||
#ifdef BOOST_REGEX_CXX03
|
||||
TEST_INVALID_REGEX("a(b\\g-1)c", perl);
|
||||
#endif
|
||||
TEST_INVALID_REGEX("a(b\\g-0)c", perl);
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||
@ -84,7 +100,9 @@ void test_backrefs()
|
||||
TEST_REGEX_SEARCH("^(.)\\g1", perl, "abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a([bc])\\g1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
|
||||
TEST_INVALID_REGEX("a(b)\\g{-2}c", perl);
|
||||
#ifdef BOOST_REGEX_CXX03
|
||||
TEST_INVALID_REGEX("a(b\\g{-1})c", perl);
|
||||
#endif
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
|
||||
|
@ -112,6 +112,7 @@ void test_assertion_escapes()
|
||||
TEST_REGEX_SEARCH("\\By\\b", perl, "xy", match_default, make_array(1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\by\\B", perl, "yz", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\B\\*\\B", perl, " * ", match_default, make_array(1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH(".\\B.", perl, "!?", match_default, make_array(0, 2, -2, -2));
|
||||
// buffer operators:
|
||||
TEST_REGEX_SEARCH("\\`abc", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\`abc", perl, "\nabc", match_default, make_array(-2, -2));
|
||||
|
@ -999,6 +999,9 @@ void test_verbs()
|
||||
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|C", perl, "AAAC", match_default, make_array(3, 4, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("AA+(*SKIP)B|C", perl, "AAAC", match_default, make_array(3, 4, -2, -2));
|
||||
|
||||
// https://github.com/boostorg/regex/issues/152
|
||||
TEST_REGEX_SEARCH("\\A((\x1f((?1) )?+)?+(*SKIP) *?(?2)*?)\\z", perl, "\x20\x1f\x1f\x20", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaaxxxxxx", match_default, make_array(0, 9, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaa++++++", match_default, make_array(-2, -2));
|
||||
|
@ -496,5 +496,13 @@ void test_pocessive_repeats()
|
||||
TEST_INVALID_REGEX("(ab + + +)", perl | mod_x);
|
||||
TEST_INVALID_REGEX("(ab + + ?)", perl | mod_x);
|
||||
|
||||
#ifndef BOOST_REGEX_CXX03
|
||||
// Some bug cases from https://github.com/boostorg/regex/issues/151
|
||||
TEST_INVALID_REGEX("a|?+", perl | mod_x);
|
||||
TEST_INVALID_REGEX("(?xi)a|?+", perl | mod_x);
|
||||
TEST_INVALID_REGEX("(?xi)a|#\r*", perl | mod_x);
|
||||
TEST_INVALID_REGEX("(?xi)|#\r*", perl | mod_x);
|
||||
TEST_INVALID_REGEX("(?xi)|?+#\r*", perl | mod_x);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
29
test/test_windows_defs_1.cpp
Normal file
29
test/test_windows_defs_1.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2021
|
||||
* John Maddock
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32)
|
||||
//
|
||||
// Make sure our forward declarations match those in windows.h:
|
||||
//
|
||||
|
||||
#define STRICT
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
#include <windows.h>
|
||||
|
||||
void test_proc()
|
||||
{
|
||||
std::string text, re;
|
||||
boost::regex exp(re);
|
||||
regex_match(text, exp);
|
||||
}
|
||||
|
||||
#endif
|
29
test/test_windows_defs_2.cpp
Normal file
29
test/test_windows_defs_2.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2021
|
||||
* John Maddock
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32)
|
||||
//
|
||||
// Make sure our forward declarations match those in windows.h:
|
||||
//
|
||||
|
||||
#define NO_STRICT
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
#include <windows.h>
|
||||
|
||||
void test_proc()
|
||||
{
|
||||
std::string text, re;
|
||||
boost::regex exp(re);
|
||||
regex_match(text, exp);
|
||||
}
|
||||
|
||||
#endif
|
27
test/test_windows_defs_3.cpp
Normal file
27
test/test_windows_defs_3.cpp
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2021
|
||||
* John Maddock
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32)
|
||||
//
|
||||
// Make sure our forward declarations match those in windows.h:
|
||||
//
|
||||
|
||||
#include <windows.h>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
void test_proc()
|
||||
{
|
||||
std::string text, re;
|
||||
boost::regex exp(re);
|
||||
regex_match(text, exp);
|
||||
}
|
||||
|
||||
#endif
|
28
test/test_windows_defs_4.cpp
Normal file
28
test/test_windows_defs_4.cpp
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2021
|
||||
* John Maddock
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32)
|
||||
//
|
||||
// Make sure our forward declarations match those in windows.h:
|
||||
//
|
||||
#define STRICT
|
||||
#define BOOST_NO_ANSI_APIS
|
||||
#include <boost/regex.hpp>
|
||||
#include <windows.h>
|
||||
|
||||
void test_proc()
|
||||
{
|
||||
std::string text, re;
|
||||
boost::regex exp(re);
|
||||
regex_match(text, exp);
|
||||
}
|
||||
|
||||
#endif
|
204
test/unicode/unicode_casefold_test.cpp
Normal file
204
test/unicode/unicode_casefold_test.cpp
Normal file
@ -0,0 +1,204 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2021 John Maddock
|
||||
* Copyright (c) 2021 Daniel Kruegler
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
* Boost Software License, Version 1.0. (See accompanying file
|
||||
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* LOCATION: see http://www.boost.org for most recent version.
|
||||
* FILE unicode_casefold_test.cpp
|
||||
* VERSION see <boost/version.hpp>
|
||||
* DESCRIPTION: Simple test suite for Unicode case folding.
|
||||
*/
|
||||
|
||||
#include <boost/regex/config.hpp>
|
||||
#include <boost/detail/lightweight_main.hpp>
|
||||
#include "../test_macros.hpp"
|
||||
|
||||
#if defined(BOOST_HAS_ICU)
|
||||
|
||||
#include <boost/regex/icu.hpp>
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include <unicode/uversion.h>
|
||||
#include <unicode/uchar.h>
|
||||
|
||||
typedef std::pair<int, int> unicode_verinfo;
|
||||
|
||||
// Function to query the effective Unicode major and minor
|
||||
// version, because some spot test cases can only be tested
|
||||
// for specific Unicode versions.
|
||||
unicode_verinfo get_unicode_version()
|
||||
{
|
||||
UVersionInfo versionArray = {};
|
||||
u_getUnicodeVersion(versionArray);
|
||||
unicode_verinfo result(versionArray[0] , versionArray[1]);
|
||||
return result;
|
||||
}
|
||||
|
||||
void latin_1_checks()
|
||||
{
|
||||
typedef boost::icu_regex_traits traits_type;
|
||||
traits_type traits;
|
||||
|
||||
// Test range [U+0000, U+0041): Identity fold
|
||||
for (traits_type::char_type c = 0x0; c < 0x41; ++c)
|
||||
{
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, c);
|
||||
}
|
||||
|
||||
// Test ASCII upper case letters [A, Z]: Each character folds
|
||||
// to its lowercase variant:
|
||||
for (traits_type::char_type c = 0x41; c <= 0x5A; ++c)
|
||||
{
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
const int shift = 0x61 - 0x41;
|
||||
BOOST_CHECK_EQUAL(nc, c + shift);
|
||||
BOOST_CHECK_EQUAL(nc, traits.tolower(c));
|
||||
}
|
||||
|
||||
// Test range (U+005A, U+00B5): Identity fold
|
||||
for (traits_type::char_type c = 0x5A + 1; c < 0xB5; ++c)
|
||||
{
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, c);
|
||||
}
|
||||
|
||||
// U+00B5 maps to its decomposition GREEK SMALL LETTER MU
|
||||
// (U+03BC):
|
||||
{
|
||||
traits_type::char_type c = 0xB5;
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, 0x03BC);
|
||||
}
|
||||
|
||||
// Test range (U+00B5, U+00BF]: Identity fold
|
||||
for (traits_type::char_type c = 0xB5 + 1; c <= 0xBF; ++c)
|
||||
{
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, c);
|
||||
}
|
||||
|
||||
// Test range [U+00C0, U+00D6]: Each character folds
|
||||
// to its lowercase variant:
|
||||
for (traits_type::char_type c = 0xC0; c <= 0xD6; ++c)
|
||||
{
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
traits_type::char_type lc = traits.tolower(c);
|
||||
BOOST_CHECK_EQUAL(nc, lc);
|
||||
BOOST_CHECK_NE(nc, c);
|
||||
}
|
||||
|
||||
// U+00D7: Identity fold
|
||||
{
|
||||
traits_type::char_type c = 0xD7;
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, c);
|
||||
}
|
||||
|
||||
// Test range [U+00D8, U+00DE]: Each character folds
|
||||
// to its lowercase variant:
|
||||
for (traits_type::char_type c = 0xD8; c <= 0xDE; ++c)
|
||||
{
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
traits_type::char_type lc = traits.tolower(c);
|
||||
BOOST_CHECK_EQUAL(nc, lc);
|
||||
BOOST_CHECK_NE(nc, c);
|
||||
}
|
||||
|
||||
// Test range [U+00DF, U+00BF]: Identity fold
|
||||
// Note that case folding of U+00DF (LATIN SMALL
|
||||
// LETTER SHARP S) does not fold to U+1E9E (LATIN
|
||||
// CAPITAL LETTER SHARP S) due to case folding
|
||||
// stability contract
|
||||
for (traits_type::char_type c = 0xDF; c <= 0xFF; ++c)
|
||||
{
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, c);
|
||||
}
|
||||
}
|
||||
|
||||
void spot_checks()
|
||||
{
|
||||
// test specific values ripped straight out of the Unicode standard
|
||||
// to verify that our case folding is the same as theirs:
|
||||
typedef boost::icu_regex_traits traits_type;
|
||||
traits_type traits;
|
||||
|
||||
const unicode_verinfo unicode_version = get_unicode_version();
|
||||
|
||||
// 'LATIN CAPITAL LETTER SHARP S' folds to
|
||||
// 'LATIN SMALL LETTER SHARP S'
|
||||
if (unicode_version >= unicode_verinfo(5, 1))
|
||||
{
|
||||
traits_type::char_type c = 0x1E9E;
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
traits_type::char_type lc = traits.tolower(c);
|
||||
BOOST_CHECK_EQUAL(nc, lc);
|
||||
BOOST_CHECK_EQUAL(nc, 0xDF);
|
||||
}
|
||||
|
||||
// Capital sigma (U+03A3) is the uppercase form of both the regular (U+03C2)
|
||||
// and final (U+03C3) lowercase sigma. All these characters exists since
|
||||
// Unicode 1.1.0.
|
||||
{
|
||||
traits_type::char_type c = 0x03A3;
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
traits_type::char_type lc = traits.tolower(c);
|
||||
BOOST_CHECK_EQUAL(nc, lc);
|
||||
BOOST_CHECK_EQUAL(nc, 0x03C3);
|
||||
c = 0x03C2;
|
||||
nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, 0x03C3);
|
||||
c = 0x03C3;
|
||||
nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, c);
|
||||
}
|
||||
|
||||
// In Turkish languages the lowercase letter 'i' (U+0069) maps to an
|
||||
// uppercase dotted I (U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE),
|
||||
// while the uppercase letter 'I' (U+0049) maps to the dotless lowercase
|
||||
// i (U+0131). The Unicode simple default mapping folds U+0130 to itself,
|
||||
// but folds U+0049 to U+0069.
|
||||
{
|
||||
traits_type::char_type c = 0x0130;
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
BOOST_CHECK_EQUAL(nc, c);
|
||||
c = 0x0049;
|
||||
nc = traits.translate_nocase(c);
|
||||
traits_type::char_type lc = traits.tolower(c);
|
||||
BOOST_CHECK_EQUAL(nc, lc);
|
||||
BOOST_CHECK_EQUAL(nc, 0x0069);
|
||||
}
|
||||
|
||||
// Cherokee small letters were added with Unicode 8.0,
|
||||
// but the upper case letters existed before, therefore
|
||||
// the small letters case fold to upper case letters.
|
||||
if (unicode_version >= unicode_verinfo(8, 0))
|
||||
{
|
||||
traits_type::char_type c = 0x13F8;
|
||||
traits_type::char_type nc = traits.translate_nocase(c);
|
||||
traits_type::char_type uc = traits.toupper(c);
|
||||
BOOST_CHECK_EQUAL(nc, uc);
|
||||
BOOST_CHECK_EQUAL(nc, 0x13F0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int cpp_main( int, char* [] )
|
||||
{
|
||||
#if defined(BOOST_HAS_ICU)
|
||||
latin_1_checks();
|
||||
spot_checks();
|
||||
#endif
|
||||
return boost::report_errors();
|
||||
}
|
Reference in New Issue
Block a user