From f0c92f472c89a5878170c3e35d208986c994d5cc Mon Sep 17 00:00:00 2001 From: Mika Fischer Date: Mon, 23 Dec 2019 20:12:48 +0100 Subject: [PATCH] file_win32 supports UTF-8 paths: fix #793, close #1791, close #1793 This brings file_win32 in sync with the documentation. Previously, the path passed to open worked if encoded in the system codepage (which is almost never UTF-8). Now, the path must be encoded as UTF-8, as stated in the documentation. Adapt file tests so that for file_win32 all paths include a unicorn character. --- CHANGELOG.md | 1 + .../beast/core/detail/win32_unicode_path.hpp | 82 +++++++++++++++++++ include/boost/beast/core/impl/file_win32.ipp | 10 ++- test/beast/core/file_test.hpp | 43 ++++++++-- test/beast/core/file_win32.cpp | 2 +- 5 files changed, 125 insertions(+), 13 deletions(-) create mode 100644 include/boost/beast/core/detail/win32_unicode_path.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c621099..ea1c4001 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ Version 282: * Use superproject docca * Fix release build of docs +* file_win32 supports UTF-8 paths -------------------------------------------------------------------------------- diff --git a/include/boost/beast/core/detail/win32_unicode_path.hpp b/include/boost/beast/core/detail/win32_unicode_path.hpp new file mode 100644 index 00000000..3f77e651 --- /dev/null +++ b/include/boost/beast/core/detail/win32_unicode_path.hpp @@ -0,0 +1,82 @@ +// +// Copyright (c) 2019 Mika Fischer (mika.fischer@zoopnet.de) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/beast +// + +#ifndef BOOST_BEAST_CORE_DETAIL_WIN32_UNICODE_PATH_HPP +#define BOOST_BEAST_CORE_DETAIL_WIN32_UNICODE_PATH_HPP + +#ifdef _WIN32 +#include +#include +#include +#include +#include +#include +#include + +namespace boost { +namespace beast { +namespace detail { + +class win32_unicode_path +{ + using WCHAR_ = boost::winapi::WCHAR_; + +public: + win32_unicode_path(const char* utf8_path, error_code& ec) { + int ret = mb2wide(utf8_path, static_buf_.data(), + static_buf_.size()); + if (ret == 0) + { + int sz = mb2wide(utf8_path, nullptr, 0); + if (sz == 0) + { + ec.assign(boost::winapi::GetLastError(), + system_category()); + return; + } + dynamic_buf_.resize(sz); + int ret2 = mb2wide(utf8_path, + dynamic_buf_.data(), + dynamic_buf_.size()); + if (ret2 == 0) + { + ec.assign(boost::winapi::GetLastError(), + system_category()); + return; + } + } + } + + WCHAR_ const* c_str() const noexcept + { + return dynamic_buf_.empty() + ? static_buf_.data() + : dynamic_buf_.data(); + } + +private: + int mb2wide(const char* utf8_path, WCHAR_* buf, size_t sz) + { + return boost::winapi::MultiByteToWideChar( + boost::winapi::CP_UTF8_, + boost::winapi::MB_ERR_INVALID_CHARS_, + utf8_path, -1, + buf, static_cast(sz)); + } + + std::array static_buf_; + std::vector dynamic_buf_; +}; + +} // detail +} // beast +} // boost +#endif + +#endif diff --git a/include/boost/beast/core/impl/file_win32.ipp b/include/boost/beast/core/impl/file_win32.ipp index 8944a6bb..f7b76619 100644 --- a/include/boost/beast/core/impl/file_win32.ipp +++ b/include/boost/beast/core/impl/file_win32.ipp @@ -14,10 +14,10 @@ #if BOOST_BEAST_USE_WIN32_FILE +#include #include #include #include -#include #include #include #include @@ -186,8 +186,12 @@ open(char const* path, file_mode mode, error_code& ec) flags_and_attributes = 0x08000000; // FILE_FLAG_SEQUENTIAL_SCAN break; } - h_ = ::CreateFileA( - path, + + detail::win32_unicode_path unicode_path(path, ec); + if (ec) + return; + h_ = ::CreateFileW( + unicode_path.c_str(), desired_access, share_mode, NULL, diff --git a/test/beast/core/file_test.hpp b/test/beast/core/file_test.hpp index fac20d4d..82565e17 100644 --- a/test/beast/core/file_test.hpp +++ b/test/beast/core/file_test.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -22,7 +23,7 @@ namespace boost { namespace beast { -template +template void test_file() { @@ -35,16 +36,38 @@ test_file() namespace fs = boost::filesystem; + static constexpr +#ifdef _WIN32 + boost::winapi::WCHAR_ unicode_suffix[] = { 0xd83e, 0xdd84, 0x0000 }; // UTF-16-LE unicorn +#else + char unicode_suffix[] = { 0xf0, 0x9f, 0xa6, 0x84, 0x00 }; // UTF-8 unicorn +#endif + class temp_path { fs::path path_; - std::string str_; + std::vector utf8_str_; public: temp_path() : path_(fs::unique_path()) - , str_(path_.string()) { + if (append_unicode_suffix) + path_ += unicode_suffix; +#ifdef _WIN32 + constexpr auto cp = boost::winapi::CP_UTF8_; + constexpr auto flags = boost::winapi::WC_ERR_INVALID_CHARS_; + auto sz = boost::winapi::WideCharToMultiByte( + cp, flags, path_.c_str(), -1, nullptr, 0, + nullptr, nullptr); + BEAST_EXPECT(sz != 0); + utf8_str_.resize(sz); + auto ret = boost::winapi::WideCharToMultiByte( + cp, flags, path_.c_str(), -1, + utf8_str_.data(), sz, + nullptr, nullptr); + BEAST_EXPECT(ret == sz); +#endif } operator fs::path const&() @@ -54,25 +77,27 @@ test_file() operator char const*() { - return str_.c_str(); +#ifdef _WIN32 + return utf8_str_.data(); +#else + return path_.c_str(); +#endif } }; auto const create = [](fs::path const& path) { - auto const s = - path.string(); BEAST_EXPECT(! fs::exists(path)); - FILE* f = ::fopen(s.c_str(), "w"); - if( BEAST_EXPECT(f != nullptr)) - ::fclose(f); + fs::ofstream out(path); + BEAST_EXPECT(out.is_open()); }; auto const remove = [](fs::path const& path) { fs::remove(path); + BEAST_EXPECT(! fs::exists(path)); }; temp_path path; diff --git a/test/beast/core/file_win32.cpp b/test/beast/core/file_win32.cpp index dc30d4f7..f0f2b4ac 100644 --- a/test/beast/core/file_win32.cpp +++ b/test/beast/core/file_win32.cpp @@ -26,7 +26,7 @@ public: void run() { - test_file(); + test_file(); } };