diff --git a/include/boost/detail/utf8_codecvt_facet.hpp b/include/boost/detail/utf8_codecvt_facet.hpp index e2addbf..5b14269 100644 --- a/include/boost/detail/utf8_codecvt_facet.hpp +++ b/include/boost/detail/utf8_codecvt_facet.hpp @@ -105,6 +105,12 @@ BOOST_UTF8_BEGIN_NAMESPACE // See utf8_codecvt_facet.ipp for the implementation. // //----------------------------------------------------------------------------// +#ifndef BOOST_UTF8_DECL +#define BOOST_UTF8_DECL +#endif +#ifndef BOOST_SYMBOL_VISIBLE +#define BOOST_SYMBOL_VISIBLE +#endif struct BOOST_SYMBOL_VISIBLE utf8_codecvt_facet : public std::codecvt diff --git a/test/test_utf8_codecvt.cpp b/test/test_utf8_codecvt.cpp new file mode 100644 index 0000000..4702c25 --- /dev/null +++ b/test/test_utf8_codecvt.cpp @@ -0,0 +1,279 @@ +/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8 +// test_utf8_codecvt.cpp + +// (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com . +// Use, modification and distribution is subject to the Boost Software +// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#include // std::copy +#include +#include +#include +#include +#include +#include + +#include // size_t +#include +#include + +#define BOOST_UTF8_BEGIN_NAMESPACE namespace boost { namespace detail { +#define BOOST_UTF8_END_NAMESPACE } } +#include +#include + +#if defined(BOOST_NO_STDC_NAMESPACE) +namespace std{ + using ::size_t; + using ::wcslen; +#if !defined(UNDER_CE) && !defined(__PGIC__) + using ::w_int; +#endif +} // namespace std +#endif + +// Note: copied from boost/iostreams/char_traits.hpp +// +// Dinkumware that comes with QNX Momentics 6.3.0, 4.0.2, incorrectly defines +// the EOF and WEOF macros to not std:: qualify the wint_t type (and so does +// Sun C++ 5.8 + STLport 4). Fix by placing the def in this scope. +// NOTE: Use BOOST_WORKAROUND? +#if (defined(__QNX__) && defined(BOOST_DINKUMWARE_STDLIB)) \ + || defined(__SUNPRO_CC) + using ::std::wint_t; +#endif + +#include + +template +struct test_data +{ + static unsigned char utf8_encoding[]; + static wchar_t wchar_encoding[]; +}; + +template<> +unsigned char test_data<2>::utf8_encoding[] = { + 0x01, + 0x7f, + 0xc2, 0x80, + 0xdf, 0xbf, + 0xe0, 0xa0, 0x80, + 0xe7, 0xbf, 0xbf +}; + +template<> +wchar_t test_data<2>::wchar_encoding[] = { + 0x0001, + 0x007f, + 0x0080, + 0x07ff, + 0x0800, + 0x7fff +}; + +template<> +unsigned char test_data<4>::utf8_encoding[] = { + 0x01, + 0x7f, + 0xc2, 0x80, + 0xdf, 0xbf, + 0xe0, 0xa0, 0x80, + 0xef, 0xbf, 0xbf, + 0xf0, 0x90, 0x80, 0x80, + 0xf4, 0x8f, 0xbf, 0xbf, + /* codecvt implementations for clang and gcc don't handle more than 21 bits and + * return eof accordlingly. So don't test the whole 32 range + */ + /* + 0xf7, 0xbf, 0xbf, 0xbf, + 0xf8, 0x88, 0x80, 0x80, 0x80, + 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, + 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80, + 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf + */ +}; + +template<> +wchar_t test_data<4>::wchar_encoding[] = { + (wchar_t)0x00000001, + (wchar_t)0x0000007f, + (wchar_t)0x00000080, + (wchar_t)0x000007ff, + (wchar_t)0x00000800, + (wchar_t)0x0000ffff, + (wchar_t)0x00010000, + (wchar_t)0x0010ffff, + /* codecvt implementations for clang and gcc don't handle more than 21 bits and + * return eof accordlingly. So don't test the whole 32 range + */ + /* + (wchar_t)0x001fffff, + (wchar_t)0x00200000, + (wchar_t)0x03ffffff, + (wchar_t)0x04000000, + (wchar_t)0x7fffffff + */ +}; + +int +test_main(int /* argc */, char * /* argv */[]) { + std::locale utf8_locale + = std::locale( + std::locale::classic(), + new boost::detail::utf8_codecvt_facet + ); + + typedef char utf8_t; + // define test data compatible with the wchar_t implementation + // as either ucs-2 or ucs-4 depending on the compiler/library. + typedef test_data td; + + // Send our test UTF-8 data to file + { + std::ofstream ofs; + ofs.open("test.dat"); + std::copy( + td::utf8_encoding, + td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char), + std::ostream_iterator(ofs) + ); + } + + // Read the test data back in, converting to UCS-4 on the way in + std::vector from_file; + { + std::wifstream ifs; + ifs.imbue(utf8_locale); + ifs.open("test.dat"); + + std::wint_t item = 0; + // note can't use normal vector from iterator constructor because + // dinkumware doesn't have it. + for(;;){ + item = ifs.get(); + if(item == WEOF) + break; + //ifs >> item; + //if(ifs.eof()) + // break; + from_file.push_back(item); + } + } + + BOOST_TEST(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding)); + + // Send the UCS4_data back out, converting to UTF-8 + { + std::wofstream ofs; + ofs.imbue(utf8_locale); + ofs.open("test2.dat"); + std::copy( + from_file.begin(), + from_file.end(), + std::ostream_iterator(ofs) + ); + } + + // Make sure that both files are the same + { + typedef std::istream_iterator is_iter; + is_iter end_iter; + + std::ifstream ifs1("test.dat"); + is_iter it1(ifs1); + std::vector data1; + std::copy(it1, end_iter, std::back_inserter(data1)); + + std::ifstream ifs2("test2.dat"); + is_iter it2(ifs2); + std::vector data2; + std::copy(it2, end_iter, std::back_inserter(data2)); + + BOOST_TEST(data1 == data2); + } + + // some libraries have trouble that only shows up with longer strings + + const wchar_t * test3_data = L"\ + \ + \ + \ + \ + 1\ + 96953204\ + 177129195\ + 1\ + 5627\ + 23010\ + 7419\ +

16212

\ + 4086\ + 2749\ + -33\ + 124\ + 28\ + 32225\ + 17543\ + 0.84431422\ + 1.0170664757130923\ + tjbx\ + cuwjentqpkejp\ +
\ +
\ + "; + + // Send the UCS4_data back out, converting to UTF-8 + std::size_t l = std::wcslen(test3_data); + { + std::wofstream ofs; + ofs.imbue(utf8_locale); + ofs.open("test3.dat"); + std::copy( + test3_data, + test3_data + l, + std::ostream_iterator(ofs) + ); + } + + // Make sure that both files are the same + { + std::wifstream ifs; + ifs.imbue(utf8_locale); + ifs.open("test3.dat"); + ifs >> std::noskipws; + BOOST_TEST( + std::equal( + test3_data, + test3_data + l, + std::istream_iterator(ifs) + ) + ); + } + return EXIT_SUCCESS; +} + +int +main(int argc, char * argv[]){ + + int retval = 1; + BOOST_TRY{ + retval = test_main(argc, argv); + } + #ifndef BOOST_NO_EXCEPTION_STD_NAMESPACE + BOOST_CATCH(const std::exception & e){ + BOOST_ERROR(e.what()); + } + #endif + BOOST_CATCH(...){ + BOOST_ERROR("failed with uncaught exception:"); + } + BOOST_CATCH_END + + int error_count = boost::report_errors(); + if(error_count > 0) + retval = error_count; + return retval; +} +