diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..15f74076 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,311 @@ +# Copyright 2020 Evan Miller +# Copyright 2020 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) + +name: CI +on: [ push, pull_request ] +jobs: + ubuntu-focal: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + compiler: [ g++-9, g++-10, clang++-9, clang++-10 ] + standard: [ c++11, c++14, c++17, c++2a ] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: '0' + - uses: mstachniuk/ci-skip@v1 + with: + commit-filter: '[skip ci];[ci skip];[CI SKIP];[SKIP CI];***CI SKIP***;***SKIP CI***;[windows];[Windows];[WINDOWS];[apple];[Apple];[APPLE]' + commit-filter-separator: ';' + fail-fast: true + - name: Set TOOLSET + run: echo ${{ matrix.compiler }} | awk '/^g/ { print "TOOLSET=gcc" } /^clang/ { print "TOOLSET=clang" }' >> $GITHUB_ENV + - name: Add repository + run: sudo apt-add-repository -y "ppa:ubuntu-toolchain-r/test" + - name: Install packages + run: sudo apt install g++-9 g++-10 clang-9 clang-10 + - name: Checkout main boost + run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root + - name: Update tools/boostdep + run: git submodule update --init tools/boostdep + working-directory: ../boost-root + - name: Copy files + run: cp -r $GITHUB_WORKSPACE/* libs/regex + working-directory: ../boost-root + - name: Install deps + run: python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex + working-directory: ../boost-root + - name: Bootstrap + run: ./bootstrap.sh + working-directory: ../boost-root + - name: Generate headers + run: ./b2 headers + working-directory: ../boost-root + - name: Generate user config + run: 'echo "using $TOOLSET : : ${{ matrix.compiler }} : -std=${{ matrix.standard }} ;" > ~/user-config.jam' + working-directory: ../boost-root + - name: Config info install + run: ../../../b2 config_info_travis_install toolset=$TOOLSET + working-directory: ../boost-root/libs/config/test + - name: Config info + run: ./config_info_travis + working-directory: ../boost-root/libs/config/test + - name: Test + run: ../../../b2 toolset=$TOOLSET + working-directory: ../boost-root/libs/regex/test + ubuntu-bionic: + runs-on: ubuntu-18.04 + strategy: + fail-fast: false + matrix: + compiler: [ g++-7, g++-8, clang++-7, clang++-8 ] + standard: [ c++11, c++14, c++17 ] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: '0' + - uses: mstachniuk/ci-skip@v1 + with: + commit-filter: '[skip ci];[ci skip];[CI SKIP];[SKIP CI];***CI SKIP***;***SKIP CI***;[windows];[Windows];[WINDOWS];[apple];[Apple];[APPLE]' + commit-filter-separator: ';' + fail-fast: true + - name: Set TOOLSET + run: echo ${{ matrix.compiler }} | awk '/^g/ { print "TOOLSET=gcc" } /^clang/ { print "TOOLSET=clang" }' >> $GITHUB_ENV + - name: Add repository + run: sudo apt-add-repository -y "ppa:ubuntu-toolchain-r/test" + - name: Install packages + run: sudo apt install g++-7 g++-8 clang-7 clang-8 + - name: Checkout main boost + run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root + - name: Update tools/boostdep + run: git submodule update --init tools/boostdep + working-directory: ../boost-root + - name: Copy files + run: cp -r $GITHUB_WORKSPACE/* libs/regex + working-directory: ../boost-root + - name: Install deps + run: python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex + working-directory: ../boost-root + - name: Bootstrap + run: ./bootstrap.sh + working-directory: ../boost-root + - name: Generate headers + run: ./b2 headers + working-directory: ../boost-root + - name: Generate user config + run: 'echo "using $TOOLSET : : ${{ matrix.compiler }} : -std=${{ matrix.standard }} ;" > ~/user-config.jam' + working-directory: ../boost-root + - name: Config info install + run: ../../../b2 config_info_travis_install toolset=$TOOLSET + working-directory: ../boost-root/libs/config/test + - name: Config info + run: ./config_info_travis + working-directory: ../boost-root/libs/config/test + - name: Test + run: ../../../b2 toolset=$TOOLSET define=CI_SUPPRESS_KNOWN_ISSUES define=SLOW_COMPILER + working-directory: ../boost-root/libs/regex/test + ubuntu-xenial: + runs-on: ubuntu-16.04 + strategy: + fail-fast: false + matrix: + compiler: [ g++-5, g++-6, clang++-5.0, clang++-6.0 ] + standard: [ c++03 c++11, c++14, c++1z ] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: '0' + - uses: mstachniuk/ci-skip@v1 + with: + commit-filter: '[skip ci];[ci skip];[CI SKIP];[SKIP CI];***CI SKIP***;***SKIP CI***;[windows];[Windows];[WINDOWS];[apple];[Apple];[APPLE]' + commit-filter-separator: ';' + fail-fast: true + - name: Set TOOLSET + run: echo ${{ matrix.compiler }} | awk '/^g/ { print "TOOLSET=gcc" } /^clang/ { print "TOOLSET=clang" }' >> $GITHUB_ENV + - name: Add repository + run: sudo apt-add-repository -y "ppa:ubuntu-toolchain-r/test" + - name: Install packages + run: sudo apt install g++-5 g++-6 clang-5.0 clang-6.0 + - name: Checkout main boost + run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root + - name: Update tools/boostdep + run: git submodule update --init tools/boostdep + working-directory: ../boost-root + - name: Copy files + run: cp -r $GITHUB_WORKSPACE/* libs/regex + working-directory: ../boost-root + - name: Install deps + run: python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex + working-directory: ../boost-root + - name: Bootstrap + run: ./bootstrap.sh + working-directory: ../boost-root + - name: Generate headers + run: ./b2 headers + working-directory: ../boost-root + - name: Generate user config + run: 'echo "using $TOOLSET : : ${{ matrix.compiler }} : -std=${{ matrix.standard }} ;" > ~/user-config.jam' + working-directory: ../boost-root + - name: Config info install + run: ../../../b2 config_info_travis_install toolset=$TOOLSET + working-directory: ../boost-root/libs/config/test + - name: Config info + run: ./config_info_travis + working-directory: ../boost-root/libs/config/test + - name: Test + run: ../../../b2 toolset=$TOOLSET + working-directory: ../boost-root/libs/regex/test + macos: + runs-on: macos-latest + strategy: + fail-fast: false + matrix: + toolset: [ clang ] + standard: [ 11, 14, 17, 2a ] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: '0' + - uses: mstachniuk/ci-skip@v1 + with: + commit-filter: '[skip ci];[ci skip];[CI SKIP];[SKIP CI];***CI SKIP***;***SKIP CI***;[windows];[Windows];[WINDOWS];[linux];[Linux];[LINUX]' + commit-filter-separator: ';' + fail-fast: true + - name: Checkout main boost + run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root + - name: Update tools/boostdep + run: git submodule update --init tools/boostdep + working-directory: ../boost-root + - name: Copy files + run: cp -r $GITHUB_WORKSPACE/* libs/regex + working-directory: ../boost-root + - name: Install deps + run: python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex + working-directory: ../boost-root + - name: Bootstrap + run: ./bootstrap.sh + working-directory: ../boost-root + - name: Generate headers + run: ./b2 headers + working-directory: ../boost-root + - name: Config info install + run: ../../../b2 config_info_travis_install toolset=${{ matrix.toolset }} cxxstd=${{ matrix.standard }} + working-directory: ../boost-root/libs/config/test + - name: Config info + run: ./config_info_travis + working-directory: ../boost-root/libs/config/test + - name: Test + run: ../../../b2 toolset=${{ matrix.toolset }} cxxstd=${{ matrix.standard }} + working-directory: ../boost-root/libs/regex/test + windows: + runs-on: windows-latest + defaults: + run: + shell: cmd + env: + ARGS: toolset=${{ matrix.toolset }} address-model=64 cxxstd=${{ matrix.standard }} + strategy: + fail-fast: false + matrix: + toolset: [ gcc, msvc-14.0, msvc-14.2 ] + standard: [ 11, 14, 17 ] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: '0' + - uses: mstachniuk/ci-skip@v1 + with: + commit-filter: '[skip ci];[ci skip];[CI SKIP];[SKIP CI];***CI SKIP***;***SKIP CI***;[apple];[Apple];[APPLE];[linux];[Linux];[LINUX]' + commit-filter-separator: ';' + fail-fast: true + - name: Checkout main boost + run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root + - name: Update tools/boostdep + run: git submodule update --init tools/boostdep + working-directory: ../boost-root + - name: Copy files + run: xcopy /s /e /q %GITHUB_WORKSPACE% libs\regex + working-directory: ../boost-root + - name: Install deps + run: python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex + working-directory: ../boost-root + - name: Bootstrap + run: bootstrap + working-directory: ../boost-root + - name: Generate headers + run: b2 headers + working-directory: ../boost-root + - name: Config info install + run: ..\..\..\b2 config_info_travis_install %ARGS% + working-directory: ../boost-root/libs/config/test + - name: Config info + run: config_info_travis + working-directory: ../boost-root/libs/config/test + - name: Test + run: ..\..\..\b2 --hash %ARGS% + working-directory: ../boost-root/libs/regex/test + ubuntu-cmake-install: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: '0' + - name: Checkout main boost + run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root + - name: Update tools/boostdep + run: git submodule update --init tools/boostdep + working-directory: ../boost-root + - name: Copy files + run: cp -r $GITHUB_WORKSPACE/* libs/regex + working-directory: ../boost-root + - name: Install deps + run: python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex + working-directory: ../boost-root + - name: Bootstrap + run: ./bootstrap.sh + working-directory: ../boost-root + - name: CMake Test + working-directory: ../boost-root + run: | + mkdir __build__ && cd __build__ + cmake -DBOOST_ENABLE_CMAKE=1 -DBoost_VERBOSE=1 -DBOOST_INCLUDE_LIBRARIES="regex;core" -DBUILD_TESTING=OFF -DCMAKE_INSTALL_PREFIX=~/.local .. + cmake --build . --target install + cd ../libs/regex/test/cmake_install_test && mkdir __build__ && cd __build__ + cmake -DCMAKE_INSTALL_PREFIX=~/.local .. + cmake --build . + cmake --build . --target check + ubuntu-cmake-check: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: '0' + - name: Checkout main boost + run: git clone -b develop --depth 1 https://github.com/boostorg/boost.git ../boost-root + - name: Update tools/boostdep + run: git submodule update --init tools/boostdep + working-directory: ../boost-root + - name: Copy files + run: cp -r $GITHUB_WORKSPACE/* libs/regex + working-directory: ../boost-root + - name: Install deps + run: python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex + working-directory: ../boost-root + - name: Bootstrap + run: ./bootstrap.sh + working-directory: ../boost-root + - name: CMake Test + working-directory: ../boost-root/libs/regex/test + run: | + cd cmake_subdir_test && mkdir __build__ && cd __build__ + cmake .. + cmake --build . + cmake --build . --target check diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 060a16aa..00000000 --- a/.travis.yml +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright 2016, 2017 Peter Dimov -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) - -language: cpp - -branches: - only: - - master - - develop - - /feature\/.*/ - -env: - matrix: - - BOGUS_JOB=true - -matrix: - - exclude: - - env: BOGUS_JOB=true - - include: - - os: linux - env: TEST_CMAKE=true # variables unused - just for identification in travis ci gui - script: - - mkdir __build__ && cd __build__ - - cmake .. -DBOOST_ENABLE_CMAKE=ON -DBOOST_INCLUDE_LIBRARIES=regex -DBOOST_REGEX_INCLUDE_EXAMPLES=ON - - cmake --build . - - - os: linux - env: TEST_CMAKE=true BUILD_SHARED_LIBS=On # variables unused - just for identification in travis ci gui - script: - - mkdir __build__ && cd __build__ - - cmake .. -DBUILD_SHARED_LIBS=ON -DBOOST_ENABLE_CMAKE=ON -DBOOST_INCLUDE_LIBRARIES=regex -DBOOST_REGEX_INCLUDE_EXAMPLES=ON - - cmake --build . - - - os: linux - env: TOOLSET=gcc COMPILER=g++ CXXSTD=03 - - - os: linux - env: TOOLSET=gcc COMPILER=g++-4.7 CXXSTD=03,11 - addons: - apt: - packages: - - g++-4.7 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-4.8 CXXSTD=03,11 - addons: - apt: - packages: - - g++-4.8 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-4.9 CXXSTD=03,11 - addons: - apt: - packages: - - g++-4.9 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-5 CXXSTD=03,11,14 - addons: - apt: - packages: - - g++-5 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-6 CXXSTD=03,11 - addons: - apt: - packages: - - g++-6 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-6 CXXSTD=14,1z - addons: - apt: - packages: - - g++-6 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-6 CXXSTD=03,11 CXXSTD_DIALECT=cxxstd-dialect=gnu - addons: - apt: - packages: - - g++-6 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-6 CXXSTD=14,1z CXXSTD_DIALECT=cxxstd-dialect=gnu - addons: - apt: - packages: - - g++-6 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-7 CXXSTD=03,11 CXXSTD_DIALECT=cxxstd-dialect=gnu - addons: - apt: - packages: - - g++-7 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-7 CXXSTD=14,1z CXXSTD_DIALECT=cxxstd-dialect=gnu - addons: - apt: - packages: - - g++-7 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-8 CXXSTD=03,11 CXXSTD_DIALECT=cxxstd-dialect=gnu - addons: - apt: - packages: - - g++-8 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=gcc COMPILER=g++-8 CXXSTD=14,1z CXXSTD_DIALECT=cxxstd-dialect=gnu - addons: - apt: - packages: - - g++-8 - sources: - - ubuntu-toolchain-r-test - - - os: linux - env: TOOLSET=clang COMPILER=clang++ CXXSTD=03,11 - - - os: linux - env: TOOLSET=clang COMPILER=clang++-3.5 CXXSTD=03,11 - addons: - apt: - packages: - - clang-3.5 - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.5 - - - os: linux - env: TOOLSET=clang COMPILER=clang++-3.6 CXXSTD=03,11 - addons: - apt: - packages: - - clang-3.6 - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.6 - - - os: linux - env: TOOLSET=clang COMPILER=clang++-3.7 CXXSTD=03,11 - addons: - apt: - packages: - - clang-3.7 - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.7 - - - os: linux - env: TOOLSET=clang COMPILER=clang++-3.8 CXXSTD=03,11 - addons: - apt: - packages: - - clang-3.8 - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.8 - - - os: linux - env: TOOLSET=clang COMPILER=clang++-3.8 CXXSTD=14,1z - addons: - apt: - packages: - - clang-3.8 - - libstdc++-4.9-dev - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.8 - - - os: linux - env: TOOLSET=clang COMPILER=clang++-3.9 CXXSTD=03,11 - addons: - apt: - packages: - - clang-3.9 - - libstdc++-4.9-dev - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.9 - - - os: linux - env: TOOLSET=clang COMPILER=clang++-3.9 CXXSTD=14,1z - addons: - apt: - packages: - - clang-3.9 - - libstdc++-4.9-dev - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.9 - - - os: osx - env: TOOLSET=clang COMPILER=clang++ CXXSTD=03,11 - osx_image: xcode8.3 - - - os: osx - env: TOOLSET=clang COMPILER=clang++ CXXSTD=14,1z - osx_image: xcode8.3 - - - os: osx - env: TOOLSET=clang COMPILER=clang++ CXXSTD=11 - osx_image: xcode8.2 - - - os: osx - env: TOOLSET=clang COMPILER=clang++ CXXSTD=11 - osx_image: xcode8.1 - - - os: osx - env: TOOLSET=clang COMPILER=clang++ CXXSTD=11 - osx_image: xcode8.0 - - - os: osx - env: TOOLSET=clang COMPILER=clang++ CXXSTD=11 - osx_image: xcode7.3 - - - os: osx - env: TOOLSET=clang COMPILER=clang++ CXXSTD=11 - osx_image: xcode6.4 - # On this image, git doesn't support --jobs 3 - install: - - BOOST_BRANCH=develop && [ "$TRAVIS_BRANCH" == "master" ] && BOOST_BRANCH=master || true - - cd .. - - git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root - - cd boost-root - - git submodule update --init tools/boostdep - - cp -r $TRAVIS_BUILD_DIR/* libs/regex - - python tools/boostdep/depinst/depinst.py -I example regex - - ./bootstrap.sh - - ./b2 headers - - -install: - - BOOST_BRANCH=develop && [ "$TRAVIS_BRANCH" == "master" ] && BOOST_BRANCH=master || true - - cd .. - - git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root - - cd boost-root - - git submodule update --init tools/boostdep - - cp -r $TRAVIS_BUILD_DIR/* libs/regex - - python tools/boostdep/depinst/depinst.py -I example -g "--jobs 3" regex - - ./bootstrap.sh - - ./b2 headers - -script: - - |- - echo "using $TOOLSET : : $COMPILER ;" > ~/user-config.jam - - IFS=',' - - for CXXLOCAL in $CXXSTD; do (cd libs/config/test && ../../../b2 config_info_travis_install toolset=$TOOLSET cxxstd=$CXXLOCAL $CXXSTD_DIALECT && echo With Standard Version $CXXLOCAL && ./config_info_travis && rm ./config_info_travis) done - - unset IFS - - ./b2 -j3 libs/regex/test toolset=$TOOLSET cxxstd=$CXXSTD $CXXSTD_DIALECT - -notifications: - email: - on_success: always diff --git a/CMakeLists.txt b/CMakeLists.txt index a61ba77c..2fc093ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,103 +1,23 @@ -# Copyright 2018-2019 Mike Dev +# Copyright 2018 Mike Dev +# Copyright 2019 Peter Dimov # Distributed under the Boost Software License, Version 1.0. -# See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt -# -# NOTE: CMake support for Boost.Regex is currently experimental at best -# and the interface is likely to change in the future +# See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt +# We support CMake 3.5, but prefer 3.16 policies and behavior +cmake_minimum_required(VERSION 3.5...3.16) -##### How-To: -# -# If you have a cmake project that wants to use and compile -# boost_regex, as part of a single build system run, do the following: -# 1) clone the boost project and all its sub-projects: -# -# git clone --branch develop --depth 1 --recursive --shallow-submodules https://github.com/boostorg/boost.git boost-root -# -# 2) add to your cmake script: -# -# add_subdirectory( []) -# target_link_libraries( PUBLIC Boost::regex) -# -# 3) run your cmake build as usual -# -# ## Explanation: -# -# Currently this file does not work standalone. It is expected to be -# invoked from a parent script via add_subdirectory. That parent script -# is responsible for providing targets for direct and indirect dependencies, -# such as Boost::assert, Boost::concept_check, e.g. by also adding those -# libraries via add_submodule (order doesn't matter). -# The parent script can be your own cmake script, but it is easier to just -# use add the CMakeLists in the root of the boost super project, which -# will in turn add all boost libraries usable with the add_subdirectory -# Workflow. -# -# Note: You don't need to actually clone all boost libraries. E.g. look -# into the travis ci file to see on which libraries boost_regex actually -# depends or use boostdep https://github.com/boostorg/boostdep +project(boost_regex VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX) +add_library(boost_regex INTERFACE) +add_library(Boost::regex ALIAS boost_regex) -##### Current Limitations: -# -# - Doesn't compile or run tests -# +target_include_directories(boost_regex INTERFACE include) -cmake_minimum_required( VERSION 3.5...3.16 ) -project( boost_regex VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX ) - -option( BOOST_REGEX_INCLUDE_EXAMPLES "Also build (some) boost regex examples" OFF ) -option( BOOST_REGEX_USE_ICU "Enable ICU support in boost regex" OFF ) - -file( GLOB BOOST_REGEX_SRC ./src/*.cpp ) - -add_library( boost_regex ${BOOST_REGEX_SRC} ) -add_library( Boost::regex ALIAS boost_regex ) - -target_include_directories( boost_regex PUBLIC include ) - -target_compile_definitions( boost_regex - PUBLIC - # No need for autolink - BOOST_REGEX_NO_LIB - $<$,SHARED_LIBRARY>:BOOST_REGEX_DYN_LINK=1> - $<$,STATIC_LIBRARY>:BOOST_REGEX_STATIC_LINK=1> +target_link_libraries(boost_regex + INTERFACE + Boost::config + Boost::throw_exception + Boost::predef + Boost::assert ) -# Specify dependencies (including header-only libraries) -target_link_libraries( boost_regex - PUBLIC - Boost::assert - Boost::concept_check - Boost::config - Boost::container_hash - Boost::core - Boost::integer - Boost::iterator - Boost::mpl - Boost::predef - Boost::smart_ptr - Boost::static_assert - Boost::throw_exception - Boost::type_traits -) - -if( BOOST_REGEX_USE_ICU ) - # ICU Targets could be provided by parent project, - # if not, look for them ourselves - if( NOT TARGET ICU::dt ) - # components need to be listed explicitly - find_package( ICU COMPONENTS dt in uc REQUIRED ) - endif() - - target_link_libraries( boost_regex - PRIVATE - ICU::dt ICU::in ICU::uc - ) - target_compile_definitions( boost_regex PRIVATE BOOST_HAS_ICU=1 ) -endif() - -if( BOOST_REGEX_INCLUDE_EXAMPLES ) - add_subdirectory( example/snippets ) -endif() - diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 8ab4c68c..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2016 Peter Dimov -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) - -version: 1.0.{build}-{branch} - -shallow_clone: true - -branches: - only: - - master - - develop - -platform: - - x64 - -environment: - matrix: - - ARGS: --toolset=msvc-9.0 address-model=32 - - ARGS: --toolset=msvc-10.0 address-model=32 - - ARGS: --toolset=msvc-11.0 address-model=32 - - ARGS: --toolset=msvc-12.0 address-model=32 - - ARGS: --toolset=msvc-14.0 address-model=32 - - ARGS: --toolset=msvc-12.0 address-model=64 - - ARGS: --toolset=msvc-14.0 address-model=64 - - ARGS: --toolset=msvc-14.0 address-model=64 cxxstd=latest - - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - ARGS: --toolset=msvc-14.1 address-model=64 - - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - ARGS: --toolset=msvc-14.1 address-model=32 - - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - ARGS: --toolset=msvc-14.1 address-model=64 cxxstd=17 - - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - ARGS: --toolset=msvc-14.1 address-model=64 cxxstd=latest cxxflags=-permissive- - - ARGS: --toolset=gcc address-model=64 - PATH: C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% - - ARGS: --toolset=gcc address-model=64 cxxflags=-std=gnu++1z - PATH: C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% - - ARGS: --toolset=gcc address-model=32 - PATH: C:\mingw-w64\i686-5.3.0-posix-dwarf-rt_v4-rev0\mingw32\bin;%PATH% - - ARGS: --toolset=gcc address-model=32 linkflags=-Wl,-allow-multiple-definition - PATH: C:\MinGW\bin;%PATH% - - ARGS: --toolset=gcc address-model=32 define=_POSIX_C_SOURCE=200112L threadapi=pthread link=static - PATH: C:\cygwin\bin;%PATH% - - ARGS: --toolset=gcc address-model=64 define=_POSIX_C_SOURCE=200112L define=__USE_ISOC99 threadapi=pthread link=static - PATH: C:\cygwin64\bin;%PATH% - -install: - - cd .. - - git clone -b %APPVEYOR_REPO_BRANCH% --depth 1 https://github.com/boostorg/boost.git boost-root - - cd boost-root - - git submodule update --init tools/build - - git submodule update --init tools/boost_install - - git submodule update --init libs/headers - - git submodule update --init libs/config - - git submodule update --init libs/core - - git submodule update --init libs/container_hash - - git submodule update --init libs/detail - - git submodule update --init libs/smart_ptr - - git submodule update --init libs/predef - - git submodule update --init libs/assert - - git submodule update --init libs/throw_exception - - git submodule update --init libs/mpl - - git submodule update --init libs/type_traits - - git submodule update --init libs/static_assert - - git submodule update --init libs/integer - - git submodule update --init libs/preprocessor - - git submodule update --init libs/functional - - git submodule update --init libs/program_options - - git submodule update --init libs/chrono - - git submodule update --init libs/system - - git submodule update --init libs/thread - - git submodule update --init libs/winapi - - git submodule update --init libs/move - - git submodule update --init libs/date_time - - git submodule update --init libs/ratio - - git submodule update --init libs/iterator - - git submodule update --init libs/range - - git submodule update --init libs/any - - git submodule update --init libs/concept_check - - git submodule update --init libs/array - - git submodule update --init libs/timer - - git submodule update --init libs/bind - - git submodule update --init libs/utility - - git submodule update --init libs/io - - git submodule update --init libs/intrusive - - git submodule update --init libs/container - - git submodule update --init libs/tuple - - git submodule update --init libs/exception - - git submodule update --init libs/function - - git submodule update --init libs/type_index - - git submodule update --init libs/lexical_cast - - git submodule update --init libs/numeric - - git submodule update --init libs/math - - git submodule update --init libs/tokenizer - - git submodule update --init libs/optional - - git submodule update --init libs/atomic - - git submodule update --init libs/rational - - git submodule update --init libs/algorithm - - xcopy /s /e /q %APPVEYOR_BUILD_FOLDER% libs\regex - - bootstrap - - b2 headers - -build: off - -test_script: - - cd libs\config\test - - ..\..\..\b2 config_info_travis_install %ARGS% - - config_info_travis - - cd ..\..\regex\test - - ..\..\..\b2 -j3 %ARGS% define=CI_SUPPRESS_KNOWN_ISSUES diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index e4de0afd..a74f9ed2 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -119,27 +119,18 @@ if ! $(disable-icu) exe has_icu : has_icu_test.cpp : $(ICU_OPTS) ; explicit has_icu ; +obj is_legacy_03 : is_legacy_03.cpp ; +explicit is_legacy_03 ; + alias icu_options : : : : [ check-target-builds has_icu : $(ICU_OPTS) : ] ; SOURCES = - c_regex_traits.cpp - cpp_regex_traits.cpp - cregex.cpp - fileiter.cpp - icu.cpp - instances.cpp posix_api.cpp regex.cpp regex_debug.cpp - regex_raw_buffer.cpp - regex_traits_defaults.cpp static_mutex.cpp - w32_regex_traits.cpp - wc_regex_traits.cpp wide_posix_api.cpp - winstances.cpp - usinstances.cpp ; - +; lib boost_regex : ../src/$(SOURCES) icu_options : @@ -149,10 +140,3 @@ lib boost_regex : ../src/$(SOURCES) icu_options boost-install boost_regex ; - - - - - - - diff --git a/build/is_legacy_03.cpp b/build/is_legacy_03.cpp new file mode 100644 index 00000000..78c36e19 --- /dev/null +++ b/build/is_legacy_03.cpp @@ -0,0 +1,16 @@ +/* + * + * Copyright (c) 2020 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#include + +#ifdef BOOST_REGEX_CXX03 +#error "Legacy mode" +#endif diff --git a/doc/configuration.qbk b/doc/configuration.qbk index 080adf1c..3439eb15 100644 --- a/doc/configuration.qbk +++ b/doc/configuration.qbk @@ -17,6 +17,22 @@ or platform) then [@../../../config/index.html Boost.Config] has a configure scr [endsect] +[section:standalone Use in Standalone Mode (without the rest of Boost)] + +This library may now be used in "standalone" mode without the rest of the Boost C++ libraries, +in order to do this you must either: + +* Have a C++17 compiler that supports `__has_include`, in this case if `` is *not* present +then the library will automoatically enter standalone mode. Or: +* Define BOOST_REGEX_STANDALONE when building. + +The main difference between the 2 modes, is that when Boost.Config is present the library will automatically +configure itself around various compiler defects. In particular in order to use the library with exception support +turned off, you will either need a copy of Boost.Config in your include path, or else manually define BOOST_NO_EXCEPTIONS +when building. + +[endsect] + [section:locale Locale and traits class selection] The following macros (see [@../../../../boost/regex/user.hpp user.hpp]) control how Boost.Regex interacts with @@ -31,43 +47,11 @@ the user's locale: [endsect] - -[section:linkage Linkage Options] - -[table -[[macro][description]] -[[BOOST_REGEX_DYN_LINK][For Microsoft and Borland C++ builds, this tells Boost.Regex that it should link to the dll build of the Boost.Regex. By default boost.regex will link to its static library build, even if the dynamic C runtime library is in use.]] -[[BOOST_REGEX_NO_LIB][For Microsoft and Borland C++ builds, this tells Boost.Regex that it should not automatically select the library to link to.]] -[[BOOST_REGEX_NO_FASTCALL][For Microsoft builds, this tells Boost.Regex to use the `__cdecl` calling convention rather than `__fastcall`. Useful if you want to use the same library from both managed and unmanaged code.]] -] - -[endsect] - -[section:algorithm Algorithm Selection] - -[table -[[macro][description]] -[[BOOST_REGEX_RECURSIVE][Tells Boost.Regex to use a stack-recursive matching algorithm. This is generally the fastest option (although there is very little in it), but can cause stack overflow in extreme cases, on Win32 this can be handled safely, but this is not the case on other platforms.]] -[[BOOST_REGEX_NON_RECURSIVE][Tells Boost.Regex to use a non-stack recursive matching algorithm, this can be slightly slower than the alternative, but is always safe no matter how pathological the regular expression. This is the default on non-Win32 platforms.]] -] - -[endsect] - [section:tuning Algorithm Tuning] -The following option applies only if BOOST_REGEX_RECURSIVE is set. - [table [[macro][description]] -[[BOOST_REGEX_HAS_MS_STACK_GUARD][Tells Boost.Regex that Microsoft style __try - __except blocks are supported, and can be used to safely trap stack overflow.]] -] - - -The following options apply only if BOOST_REGEX_NON_RECURSIVE is set. - -[table -[[macro][description]] -[[BOOST_REGEX_BLOCKSIZE][In non-recursive mode, Boost.Regex uses largish blocks of memory to act as a stack for the state machine, the larger the block size then the fewer allocations that will take place. This defaults to 4096 bytes, which is large enough to match the vast majority of regular expressions without further allocations, however, you can choose smaller or larger values depending upon your platforms characteristics.]] +[[BOOST_REGEX_BLOCKSIZE][Boost.Regex uses largish blocks of memory to act as a stack for the state machine, the larger the block size then the fewer allocations that will take place. This defaults to 4096 bytes, which is large enough to match the vast majority of regular expressions without further allocations, however, you can choose smaller or larger values depending upon your platforms characteristics.]] [[BOOST_REGEX_MAX_BLOCKS][Tells Boost.Regex how many blocks of size BOOST_REGEX_BLOCKSIZE it is permitted to use. If this value is exceeded then Boost.Regex will stop trying to find a match and throw a std::runtime_error. Defaults to 1024, don't forget to tweak this value if you alter BOOST_REGEX_BLOCKSIZE by much.]] [[BOOST_REGEX_MAX_CACHE_BLOCKS][Tells Boost.Regex how many memory blocks to store in it's internal cache - memory blocks are taken from this cache rather than by calling diff --git a/doc/html/boost_regex/configuration.html b/doc/html/boost_regex/configuration.html index 9e2bf10e..bfc70b20 100644 --- a/doc/html/boost_regex/configuration.html +++ b/doc/html/boost_regex/configuration.html @@ -30,8 +30,6 @@
Compiler Setup
Locale and traits class selection
-
Linkage Options
-
Algorithm Selection
Algorithm Tuning
diff --git a/doc/html/boost_regex/configuration/locale.html b/doc/html/boost_regex/configuration/locale.html index d1f74f6a..eb3ba937 100644 --- a/doc/html/boost_regex/configuration/locale.html +++ b/doc/html/boost_regex/configuration/locale.html @@ -7,7 +7,7 @@ - + @@ -20,7 +20,7 @@

-PrevUpHomeNext +PrevUpHomeNext

@@ -103,7 +103,7 @@
-PrevUpHomeNext +PrevUpHomeNext
diff --git a/doc/html/boost_regex/configuration/tuning.html b/doc/html/boost_regex/configuration/tuning.html index 45f13f45..30ddc99f 100644 --- a/doc/html/boost_regex/configuration/tuning.html +++ b/doc/html/boost_regex/configuration/tuning.html @@ -6,7 +6,7 @@ - + @@ -20,49 +20,12 @@
-PrevUpHomeNext +PrevUpHomeNext
-

- The following option applies only if BOOST_REGEX_RECURSIVE is set. -

-
---- - - - - - - - - -
-

- macro -

-
-

- description -

-
-

- BOOST_REGEX_HAS_MS_STACK_GUARD -

-
-

- Tells Boost.Regex that Microsoft style __try - __except blocks - are supported, and can be used to safely trap stack overflow. -

-
-

- The following options apply only if BOOST_REGEX_NON_RECURSIVE is set. -

@@ -89,13 +52,12 @@ @@ -150,7 +112,7 @@

- In non-recursive mode, Boost.Regex uses largish blocks of memory - to act as a stack for the state machine, the larger the block size - then the fewer allocations that will take place. This defaults - to 4096 bytes, which is large enough to match the vast majority - of regular expressions without further allocations, however, you - can choose smaller or larger values depending upon your platforms - characteristics. + Boost.Regex uses largish blocks of memory to act as a stack for + the state machine, the larger the block size then the fewer allocations + that will take place. This defaults to 4096 bytes, which is large + enough to match the vast majority of regular expressions without + further allocations, however, you can choose smaller or larger + values depending upon your platforms characteristics.


-PrevUpHomeNext +PrevUpHomeNext
diff --git a/doc/html/boost_regex/install.html b/doc/html/boost_regex/install.html index eacde153..7070210a 100644 --- a/doc/html/boost_regex/install.html +++ b/doc/html/boost_regex/install.html @@ -43,15 +43,39 @@ The library will encase all code inside namespace boost.

- Unlike some other template libraries, this library consists of a mixture of - template code (in the headers) and static code and data (in cpp files). Consequently - it is necessary to build the library's support code into a library or archive - file before you can use it, instructions for specific platforms are as follows: + This is a header only library provided your compiler supports C++11 or later. + Support for C++03 compilers is still present, but is now deprecated and may + be removed without further notice!

+

+ The only people that still need to build the external libboost_regex library + are those that are either: +

+
    +
  • + Using the library in C++03 mode, or, +
  • +
  • + Using the deprecated POSIX C API's +
  • +
+

+ Further, this library may now be used in "standalone" mode without + the rest of the Boost C++ libraries, in order to do this you must either: +

+
    +
  • + Have a C++17 compiler that supports __has_include, + in this case if <boost/config.hpp> is not present then the library will + automoatically enter standalone mode. Or: +
  • +
  • + Define BOOST_REGEX_STANDALONE when building. +
  • +
- Building - with bjam + C++03 users only (Deprecated) Building with bjam

This is now the preferred method for building and installing this library, diff --git a/doc/html/boost_regex/syntax/basic_extended.html b/doc/html/boost_regex/syntax/basic_extended.html index 08c1b790..60d814f3 100644 --- a/doc/html/boost_regex/syntax/basic_extended.html +++ b/doc/html/boost_regex/syntax/basic_extended.html @@ -231,9 +231,9 @@ cab sets:

- A character set is a bracket-expression starting with [ and ending with ], - it defines a set of characters, and matches any single character that is - a member of that set. + A character set is a bracket-expression starting with [ + and ending with ], it defines a set of characters, and + matches any single character that is a member of that set.

A bracket expression may contain any combination of the following: diff --git a/doc/html/boost_regex/syntax/basic_syntax.html b/doc/html/boost_regex/syntax/basic_syntax.html index 75992b0e..f7067ed5 100644 --- a/doc/html/boost_regex/syntax/basic_syntax.html +++ b/doc/html/boost_regex/syntax/basic_syntax.html @@ -176,9 +176,9 @@ aaaa sets:

- A character set is a bracket-expression starting with [ and ending with ], - it defines a set of characters, and matches any single character that is - a member of that set. + A character set is a bracket-expression starting with [ + and ending with ], it defines a set of characters, and + matches any single character that is a member of that set.

A bracket expression may contain any combination of the following: diff --git a/doc/html/boost_regex/syntax/perl_syntax.html b/doc/html/boost_regex/syntax/perl_syntax.html index b2ea134b..10e30e5f 100644 --- a/doc/html/boost_regex/syntax/perl_syntax.html +++ b/doc/html/boost_regex/syntax/perl_syntax.html @@ -402,9 +402,9 @@ sets

- A character set is a bracket-expression starting with [] and ending - with , it defines a set of characters, and matches - any single character that is a member of that set. + A character set is a bracket-expression starting with [ + and ending with ], it defines a set of characters, and + matches any single character that is a member of that set.

A bracket expression may contain any combination of the following: diff --git a/doc/html/index.html b/doc/html/index.html index 8a2244da..49e7612f 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -44,8 +44,6 @@

Compiler Setup
Locale and traits class selection
-
Linkage Options
-
Algorithm Selection
Algorithm Tuning
Building and Installing the Library
@@ -215,7 +213,7 @@

- +

Last revised: November 23, 2020 at 18:11:11 GMT

Last revised: January 25, 2021 at 11:08:05 GMT


diff --git a/doc/install.qbk b/doc/install.qbk index 45fea722..041439ef 100644 --- a/doc/install.qbk +++ b/doc/install.qbk @@ -20,13 +20,22 @@ process is the same as for all of boost; see the The library will encase all code inside namespace boost. -Unlike some other template libraries, this library consists of a mixture of -template code (in the headers) and static code and data (in cpp files). -Consequently it is necessary to build the library's support code into a -library or archive file before you can use it, instructions for specific -platforms are as follows: +This is a header only library provided your compiler supports C++11 or later. Support for +C++03 compilers is still present, but is now deprecated and may be removed without further notice! -[h4 Building with bjam] +The only people that still need to build the external libboost_regex library are those that are either: + +* Using the library in C++03 mode, or, +* Using the deprecated POSIX C API's + +Further, this library may now be used in "standalone" mode without the rest of the Boost C++ libraries, +in order to do this you must either: + +* Have a C++17 compiler that supports `__has_include`, in this case if `` is not present +then the library will automoatically enter standalone mode. Or: +* Define BOOST_REGEX_STANDALONE when building. + +[h4 [*C++03 users only (Deprecated)] Building with bjam] This is now the preferred method for building and installing this library, please refer to the diff --git a/doc/syntax_basic.qbk b/doc/syntax_basic.qbk index 865ef995..63aa04be 100644 --- a/doc/syntax_basic.qbk +++ b/doc/syntax_basic.qbk @@ -118,7 +118,7 @@ But not the string: [h4 Character sets:] -A character set is a bracket-expression starting with \[ and ending with \], +A character set is a bracket-expression starting with [^\[] and ending with [^\]], it defines a set of characters, and matches any single character that is a member of that set. diff --git a/doc/syntax_extended.qbk b/doc/syntax_extended.qbk index 0146d5be..c1c9e08d 100644 --- a/doc/syntax_extended.qbk +++ b/doc/syntax_extended.qbk @@ -151,7 +151,7 @@ will match either of "abd" or "abef". [h4 Character sets:] -A character set is a bracket-expression starting with \[ and ending with \], +A character set is a bracket-expression starting with [^\[] and ending with [^\]], it defines a set of characters, and matches any single character that is a member of that set. diff --git a/doc/syntax_perl.qbk b/doc/syntax_perl.qbk index 6ee00e4c..233db3cf 100644 --- a/doc/syntax_perl.qbk +++ b/doc/syntax_perl.qbk @@ -217,7 +217,7 @@ if you really want an empty alternative use [^(?:)] as a placeholder, for exampl [h4 Character sets] -A character set is a bracket-expression starting with [^[] and ending with [^]], +A character set is a bracket-expression starting with [^\[] and ending with [^\]], it defines a set of characters, and matches any single character that is a member of that set. diff --git a/example/snippets/regex_token_iterator_eg_1.cpp b/example/snippets/regex_token_iterator_eg_1.cpp index 74fcbf1a..4bf5c62f 100644 --- a/example/snippets/regex_token_iterator_eg_1.cpp +++ b/example/snippets/regex_token_iterator_eg_1.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (c) 12003 + * Copyright (c) 2003 * John Maddock * * Use, modification and distribution are subject to the diff --git a/include/boost/cregex.hpp b/include/boost/cregex.hpp index b7a918eb..78012dd6 100644 --- a/include/boost/cregex.hpp +++ b/include/boost/cregex.hpp @@ -24,7 +24,11 @@ #include #endif +#ifdef BOOST_REGEX_CXX03 #include +#else +#include +#endif #endif /* include guard */ diff --git a/include/boost/regex.hpp b/include/boost/regex.hpp index 6dc3dfbd..b0c8bf13 100644 --- a/include/boost/regex.hpp +++ b/include/boost/regex.hpp @@ -28,7 +28,11 @@ #include #endif +#ifdef BOOST_REGEX_CXX03 #include +#else +#include +#endif #endif // include diff --git a/include/boost/regex/concepts.hpp b/include/boost/regex/concepts.hpp index 276e63f7..fe14f400 100644 --- a/include/boost/regex/concepts.hpp +++ b/include/boost/regex/concepts.hpp @@ -31,6 +31,12 @@ #include #include +#ifdef BOOST_REGEX_CXX03 +#define RW_NS boost +#else +#define RW_NS std +#endif + namespace boost{ // @@ -996,37 +1002,36 @@ struct BoostRegexConcept out = regex_format(out, m_cresults, func2b); out = regex_format(out, m_cresults, func1b, f); out = regex_format(out, m_cresults, func1b); - out = regex_format(out, m_cresults, boost::ref(func3b), f); - out = regex_format(out, m_cresults, boost::ref(func3b)); - out = regex_format(out, m_cresults, boost::ref(func2b), f); - out = regex_format(out, m_cresults, boost::ref(func2b)); - out = regex_format(out, m_cresults, boost::ref(func1b), f); - out = regex_format(out, m_cresults, boost::ref(func1b)); - out = regex_format(out, m_cresults, boost::cref(func3b), f); - out = regex_format(out, m_cresults, boost::cref(func3b)); - out = regex_format(out, m_cresults, boost::cref(func2b), f); - out = regex_format(out, m_cresults, boost::cref(func2b)); - out = regex_format(out, m_cresults, boost::cref(func1b), f); - out = regex_format(out, m_cresults, boost::cref(func1b)); - + out = regex_format(out, m_cresults, RW_NS::ref(func3b), f); + out = regex_format(out, m_cresults, RW_NS::ref(func3b)); + out = regex_format(out, m_cresults, RW_NS::ref(func2b), f); + out = regex_format(out, m_cresults, RW_NS::ref(func2b)); + out = regex_format(out, m_cresults, RW_NS::ref(func1b), f); + out = regex_format(out, m_cresults, RW_NS::ref(func1b)); + out = regex_format(out, m_cresults, RW_NS::cref(func3b), f); + out = regex_format(out, m_cresults, RW_NS::cref(func3b)); + out = regex_format(out, m_cresults, RW_NS::cref(func2b), f); + out = regex_format(out, m_cresults, RW_NS::cref(func2b)); + out = regex_format(out, m_cresults, RW_NS::cref(func1b), f); + out = regex_format(out, m_cresults, RW_NS::cref(func1b)); m_string += regex_format(m_cresults, func3b, f); m_string += regex_format(m_cresults, func3b); m_string += regex_format(m_cresults, func2b, f); m_string += regex_format(m_cresults, func2b); m_string += regex_format(m_cresults, func1b, f); m_string += regex_format(m_cresults, func1b); - m_string += regex_format(m_cresults, boost::ref(func3b), f); - m_string += regex_format(m_cresults, boost::ref(func3b)); - m_string += regex_format(m_cresults, boost::ref(func2b), f); - m_string += regex_format(m_cresults, boost::ref(func2b)); - m_string += regex_format(m_cresults, boost::ref(func1b), f); - m_string += regex_format(m_cresults, boost::ref(func1b)); - m_string += regex_format(m_cresults, boost::cref(func3b), f); - m_string += regex_format(m_cresults, boost::cref(func3b)); - m_string += regex_format(m_cresults, boost::cref(func2b), f); - m_string += regex_format(m_cresults, boost::cref(func2b)); - m_string += regex_format(m_cresults, boost::cref(func1b), f); - m_string += regex_format(m_cresults, boost::cref(func1b)); + m_string += regex_format(m_cresults, RW_NS::ref(func3b), f); + m_string += regex_format(m_cresults, RW_NS::ref(func3b)); + m_string += regex_format(m_cresults, RW_NS::ref(func2b), f); + m_string += regex_format(m_cresults, RW_NS::ref(func2b)); + m_string += regex_format(m_cresults, RW_NS::ref(func1b), f); + m_string += regex_format(m_cresults, RW_NS::ref(func1b)); + m_string += regex_format(m_cresults, RW_NS::cref(func3b), f); + m_string += regex_format(m_cresults, RW_NS::cref(func3b)); + m_string += regex_format(m_cresults, RW_NS::cref(func2b), f); + m_string += regex_format(m_cresults, RW_NS::cref(func2b)); + m_string += regex_format(m_cresults, RW_NS::cref(func1b), f); + m_string += regex_format(m_cresults, RW_NS::cref(func1b)); out = m_cresults.format(out, func3b, f); out = m_cresults.format(out, func3b); @@ -1034,18 +1039,18 @@ struct BoostRegexConcept out = m_cresults.format(out, func2b); out = m_cresults.format(out, func1b, f); out = m_cresults.format(out, func1b); - out = m_cresults.format(out, boost::ref(func3b), f); - out = m_cresults.format(out, boost::ref(func3b)); - out = m_cresults.format(out, boost::ref(func2b), f); - out = m_cresults.format(out, boost::ref(func2b)); - out = m_cresults.format(out, boost::ref(func1b), f); - out = m_cresults.format(out, boost::ref(func1b)); - out = m_cresults.format(out, boost::cref(func3b), f); - out = m_cresults.format(out, boost::cref(func3b)); - out = m_cresults.format(out, boost::cref(func2b), f); - out = m_cresults.format(out, boost::cref(func2b)); - out = m_cresults.format(out, boost::cref(func1b), f); - out = m_cresults.format(out, boost::cref(func1b)); + out = m_cresults.format(out, RW_NS::ref(func3b), f); + out = m_cresults.format(out, RW_NS::ref(func3b)); + out = m_cresults.format(out, RW_NS::ref(func2b), f); + out = m_cresults.format(out, RW_NS::ref(func2b)); + out = m_cresults.format(out, RW_NS::ref(func1b), f); + out = m_cresults.format(out, RW_NS::ref(func1b)); + out = m_cresults.format(out, RW_NS::cref(func3b), f); + out = m_cresults.format(out, RW_NS::cref(func3b)); + out = m_cresults.format(out, RW_NS::cref(func2b), f); + out = m_cresults.format(out, RW_NS::cref(func2b)); + out = m_cresults.format(out, RW_NS::cref(func1b), f); + out = m_cresults.format(out, RW_NS::cref(func1b)); m_string += m_cresults.format(func3b, f); m_string += m_cresults.format(func3b); @@ -1053,18 +1058,18 @@ struct BoostRegexConcept m_string += m_cresults.format(func2b); m_string += m_cresults.format(func1b, f); m_string += m_cresults.format(func1b); - m_string += m_cresults.format(boost::ref(func3b), f); - m_string += m_cresults.format(boost::ref(func3b)); - m_string += m_cresults.format(boost::ref(func2b), f); - m_string += m_cresults.format(boost::ref(func2b)); - m_string += m_cresults.format(boost::ref(func1b), f); - m_string += m_cresults.format(boost::ref(func1b)); - m_string += m_cresults.format(boost::cref(func3b), f); - m_string += m_cresults.format(boost::cref(func3b)); - m_string += m_cresults.format(boost::cref(func2b), f); - m_string += m_cresults.format(boost::cref(func2b)); - m_string += m_cresults.format(boost::cref(func1b), f); - m_string += m_cresults.format(boost::cref(func1b)); + m_string += m_cresults.format(RW_NS::ref(func3b), f); + m_string += m_cresults.format(RW_NS::ref(func3b)); + m_string += m_cresults.format(RW_NS::ref(func2b), f); + m_string += m_cresults.format(RW_NS::ref(func2b)); + m_string += m_cresults.format(RW_NS::ref(func1b), f); + m_string += m_cresults.format(RW_NS::ref(func1b)); + m_string += m_cresults.format(RW_NS::cref(func3b), f); + m_string += m_cresults.format(RW_NS::cref(func3b)); + m_string += m_cresults.format(RW_NS::cref(func2b), f); + m_string += m_cresults.format(RW_NS::cref(func2b)); + m_string += m_cresults.format(RW_NS::cref(func1b), f); + m_string += m_cresults.format(RW_NS::cref(func1b)); out = regex_replace(out, m_in, m_in, ce, func3, f); out = regex_replace(out, m_in, m_in, ce, func3); @@ -1072,18 +1077,18 @@ struct BoostRegexConcept out = regex_replace(out, m_in, m_in, ce, func2); out = regex_replace(out, m_in, m_in, ce, func1, f); out = regex_replace(out, m_in, m_in, ce, func1); - out = regex_replace(out, m_in, m_in, ce, boost::ref(func3), f); - out = regex_replace(out, m_in, m_in, ce, boost::ref(func3)); - out = regex_replace(out, m_in, m_in, ce, boost::ref(func2), f); - out = regex_replace(out, m_in, m_in, ce, boost::ref(func2)); - out = regex_replace(out, m_in, m_in, ce, boost::ref(func1), f); - out = regex_replace(out, m_in, m_in, ce, boost::ref(func1)); - out = regex_replace(out, m_in, m_in, ce, boost::cref(func3), f); - out = regex_replace(out, m_in, m_in, ce, boost::cref(func3)); - out = regex_replace(out, m_in, m_in, ce, boost::cref(func2), f); - out = regex_replace(out, m_in, m_in, ce, boost::cref(func2)); - out = regex_replace(out, m_in, m_in, ce, boost::cref(func1), f); - out = regex_replace(out, m_in, m_in, ce, boost::cref(func1)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func3), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func3)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func2), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func2)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func1), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::ref(func1)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func3), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func3)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func2), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func2)); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func1), f); + out = regex_replace(out, m_in, m_in, ce, RW_NS::cref(func1)); functor3 > func3s; functor2 > func2s; @@ -1094,18 +1099,18 @@ struct BoostRegexConcept m_string += regex_replace(m_string, ce, func2s); m_string += regex_replace(m_string, ce, func1s, f); m_string += regex_replace(m_string, ce, func1s); - m_string += regex_replace(m_string, ce, boost::ref(func3s), f); - m_string += regex_replace(m_string, ce, boost::ref(func3s)); - m_string += regex_replace(m_string, ce, boost::ref(func2s), f); - m_string += regex_replace(m_string, ce, boost::ref(func2s)); - m_string += regex_replace(m_string, ce, boost::ref(func1s), f); - m_string += regex_replace(m_string, ce, boost::ref(func1s)); - m_string += regex_replace(m_string, ce, boost::cref(func3s), f); - m_string += regex_replace(m_string, ce, boost::cref(func3s)); - m_string += regex_replace(m_string, ce, boost::cref(func2s), f); - m_string += regex_replace(m_string, ce, boost::cref(func2s)); - m_string += regex_replace(m_string, ce, boost::cref(func1s), f); - m_string += regex_replace(m_string, ce, boost::cref(func1s)); + m_string += regex_replace(m_string, ce, RW_NS::ref(func3s), f); + m_string += regex_replace(m_string, ce, RW_NS::ref(func3s)); + m_string += regex_replace(m_string, ce, RW_NS::ref(func2s), f); + m_string += regex_replace(m_string, ce, RW_NS::ref(func2s)); + m_string += regex_replace(m_string, ce, RW_NS::ref(func1s), f); + m_string += regex_replace(m_string, ce, RW_NS::ref(func1s)); + m_string += regex_replace(m_string, ce, RW_NS::cref(func3s), f); + m_string += regex_replace(m_string, ce, RW_NS::cref(func3s)); + m_string += regex_replace(m_string, ce, RW_NS::cref(func2s), f); + m_string += regex_replace(m_string, ce, RW_NS::cref(func2s)); + m_string += regex_replace(m_string, ce, RW_NS::cref(func1s), f); + m_string += regex_replace(m_string, ce, RW_NS::cref(func1s)); } std::basic_ostream m_stream; diff --git a/include/boost/regex/config.hpp b/include/boost/regex/config.hpp index 9b75d507..bed485fa 100644 --- a/include/boost/regex/config.hpp +++ b/include/boost/regex/config.hpp @@ -18,6 +18,21 @@ #ifndef BOOST_REGEX_CONFIG_HPP #define BOOST_REGEX_CONFIG_HPP + +#if !((__cplusplus >= 201103L) || (defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(BOOST_REGEX_CXX03)) +# define BOOST_REGEX_CXX03 +#endif + +#if defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_CXX03) +# define BOOST_REGEX_CXX03 +#endif + +#if defined(__has_include) +#if !defined(BOOST_REGEX_STANDALONE) && !__has_include() +#define BOOST_REGEX_STANDALONE +#endif +#endif + /* * Borland C++ Fix/error check * this has to go *before* we include any std lib headers: @@ -25,7 +40,23 @@ #if defined(__BORLANDC__) && !defined(__clang__) # include #endif +#ifndef BOOST_REGEX_STANDALONE #include +#endif + +/************************************************************************* +* +* Asserts: +* +*************************************************************************/ + +#ifdef BOOST_REGEX_STANDALONE +#include +# define BOOST_REGEX_ASSERT(x) assert(x) +#else +#include +# define BOOST_REGEX_ASSERT(x) BOOST_ASSERT(x) +#endif /***************************************************************************** * @@ -41,8 +72,10 @@ # include BOOST_REGEX_USER_CONFIG +#ifndef BOOST_REGEX_STANDALONE # include # include +#endif #else /* @@ -57,6 +90,21 @@ # endif #endif + +/**************************************************************************** +* +* Legacy support: +* +*******************************************************************************/ + +#if defined(BOOST_NO_STD_LOCALE) || defined(BOOST_NO_CXX11_HDR_MUTEX) || defined(BOOST_NO_CXX11_HDR_TYPE_TRAITS) \ + || defined(BOOST_NO_CXX11_HDR_ATOMIC) || defined(BOOST_NO_CXX11_ALLOCATOR) || defined(BOOST_NO_CXX11_SMART_PTR) \ + || defined(BOOST_NO_CXX11_STATIC_ASSERT) || defined(BOOST_NO_NOEXCEPT) +#ifndef BOOST_REGEX_CXX03 +# define BOOST_REGEX_CXX03 +#endif +#endif + /***************************************************************************** * * Boilerplate regex config options: @@ -64,19 +112,50 @@ ****************************************************************************/ /* Obsolete macro, use BOOST_VERSION instead: */ -#define BOOST_RE_VERSION 320 +#define BOOST_RE_VERSION 500 /* fix: */ #if defined(_UNICODE) && !defined(UNICODE) #define UNICODE #endif +#define BOOST_REGEX_JOIN(X, Y) BOOST_REGEX_DO_JOIN(X, Y) +#define BOOST_REGEX_DO_JOIN(X, Y) BOOST_REGEX_DO_JOIN2(X,Y) +#define BOOST_REGEX_DO_JOIN2(X, Y) X##Y + +#ifdef BOOST_FALLTHROUGH +# define BOOST_REGEX_FALLTHROUGH BOOST_FALLTHROUGH +#else + +#if defined(__clang__) && (__cplusplus >= 201103L) && defined(__has_warning) +# if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") +# define BOOST_REGEX_FALLTHROUGH [[clang::fallthrough]] +# endif +#endif +#if !defined(BOOST_REGEX_FALLTHROUGH) && defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1800) && (__cplusplus >= 201703) +# define BOOST_REGEX_FALLTHROUGH [[fallthrough]] +#endif +#if !defined(BOOST_REGEX_FALLTHROUGH) && defined(__GNUC__) && (__GNUC__ >= 7) +# define BOOST_REGEX_FALLTHROUGH __attribute__((fallthrough)) +#endif + +#if !defined(BOOST_REGEX_FALLTHROUGH) +# define BOOST_REGEX_FALLTHROUGH +#endif +#endif + +#ifdef BOOST_NORETURN +# define BOOST_REGEX_NORETURN BOOST_NORETURN +#else +# define BOOST_REGEX_NORETURN +#endif + /* * Define a macro for the namespace that details are placed in, this includes the Boost * version number to avoid mismatched header and library versions: */ -#define BOOST_REGEX_DETAIL_NS BOOST_JOIN(re_detail_, BOOST_VERSION) +#define BOOST_REGEX_DETAIL_NS BOOST_REGEX_JOIN(re_detail_, BOOST_RE_VERSION) /* * Fix for gcc prior to 3.4: std::ctype doesn't allow @@ -84,47 +163,10 @@ * std::use_facet >.is(std::ctype_base::lower|std::ctype_base::upper, L'a'); * returns *false*. */ -#ifdef __GLIBCPP__ +#if defined(__GLIBCPP__) && defined(BOOST_REGEX_CXX03) # define BOOST_REGEX_BUGGY_CTYPE_FACET #endif -/* - * Intel C++ before 8.0 ends up with unresolved externals unless we turn off - * extern template support: - */ -#if defined(BOOST_INTEL) && defined(__cplusplus) && (BOOST_INTEL <= 800) -# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES -#endif -/* - * Visual C++ doesn't support external templates with C++ extensions turned off: - */ -#if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS) -# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES -#endif - /* - * Oracle compiler in C++11 mode doesn't like external templates for some reason: - */ -#ifdef __SUNPRO_CC -# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES -#endif - /* - * Shared regex lib will crash without this, frankly it looks a lot like a gcc bug: - */ -#if defined(__MINGW32__) -# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES -#endif -/* - * Clang fails to export template instances with -fvisibility=hidden, see - * https://github.com/boostorg/regex/issues/49 - */ -#ifdef __clang__ -# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES -#endif -#ifdef __CYGWIN__ -/* We get multiply defined symbols without this: */ -# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES -#endif - /* * If there isn't good enough wide character support then there will * be no wide character regular expressions: @@ -143,7 +185,7 @@ # define _STLP_CWCTYPE # endif -#ifdef __cplusplus +#if defined(__cplusplus) && defined(BOOST_REGEX_CXX03) # include #endif @@ -183,50 +225,12 @@ # define BOOST_REGEX_NO_W32 #endif -/***************************************************************************** - * - * Wide character workarounds: - * - ****************************************************************************/ - -/* - * define BOOST_REGEX_HAS_OTHER_WCHAR_T when wchar_t is a native type, but the users - * code may be built with wchar_t as unsigned short: basically when we're building - * with MSVC and the /Zc:wchar_t option we place some extra unsigned short versions - * of the non-inline functions in the library, so that users can still link to the lib, - * irrespective of whether their own code is built with /Zc:wchar_t. - * Note that this does NOT WORK with VC10 and VC14 when the C++ locale is in effect as - * the locale's facets simply do not compile in that case. - * As we default to the C++ locale when compiling for the windows runtime we - * skip in this case aswell. - */ -#if defined(__cplusplus) && \ - (defined(BOOST_MSVC) || defined(__ICL)) && \ - !defined(BOOST_NO_INTRINSIC_WCHAR_T) && \ - defined(BOOST_WINDOWS) && \ - !defined(__SGI_STL_PORT) && \ - !defined(_STLPORT_VERSION) && \ - !defined(BOOST_RWSTD_VER) && \ - ((_MSC_VER < 1600) || !defined(BOOST_REGEX_USE_CPP_LOCALE)) && \ - !BOOST_PLAT_WINDOWS_RUNTIME -# define BOOST_REGEX_HAS_OTHER_WCHAR_T -# ifdef BOOST_MSVC -# pragma warning(push) -# pragma warning(disable : 4251) -#if BOOST_MSVC < 1700 -# pragma warning(disable : 4231) +#ifdef BOOST_REGEX_STANDALONE +# if defined(_MSC_VER) && !defined(__clang__) && !defined(__GNUC__) +# define BOOST_REGEX_MSVC _MSC_VER #endif -# if BOOST_MSVC < 1600 -# pragma warning(disable : 4660) -# endif -# endif -# if defined(_DLL) && defined(BOOST_MSVC) && (BOOST_MSVC < 1600) -# include - extern template class __declspec(dllimport) std::basic_string; -# endif -# ifdef BOOST_MSVC -# pragma warning(pop) -# endif +#elif defined(BOOST_MSVC) +# define BOOST_REGEX_MSVC BOOST_MSVC #endif @@ -236,15 +240,10 @@ * ****************************************************************************/ -#ifndef BOOST_SYMBOL_EXPORT -# define BOOST_SYMBOL_EXPORT -# define BOOST_SYMBOL_IMPORT -#endif - -#if (defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK)) && !defined(BOOST_REGEX_STATIC_LINK) +#if (defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK)) && !defined(BOOST_REGEX_STATIC_LINK) && defined(BOOST_SYMBOL_IMPORT) # if defined(BOOST_REGEX_SOURCE) -# define BOOST_REGEX_DECL BOOST_SYMBOL_EXPORT # define BOOST_REGEX_BUILD_DLL +# define BOOST_REGEX_DECL BOOST_SYMBOL_EXPORT # else # define BOOST_REGEX_DECL BOOST_SYMBOL_IMPORT # endif @@ -252,6 +251,7 @@ # define BOOST_REGEX_DECL #endif +#ifdef BOOST_REGEX_CXX03 #if !defined(BOOST_REGEX_NO_LIB) && !defined(BOOST_REGEX_SOURCE) && !defined(BOOST_ALL_NO_LIB) && defined(__cplusplus) # define BOOST_LIB_NAME boost_regex # if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) @@ -262,6 +262,7 @@ # endif # include #endif +#endif /***************************************************************************** * @@ -333,7 +334,7 @@ #if !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) && !defined(BOOST_NO_STD_LOCALE) # define BOOST_REGEX_USE_CPP_LOCALE #endif -/* otherwise use C+ locale: */ +/* otherwise use C locale: */ #if !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) # define BOOST_REGEX_USE_C_LOCALE #endif @@ -407,12 +408,13 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page(); /***************************************************************************** * - * Algorithm selection and configuration: + * Algorithm selection and configuration. + * These options are now obsolete for C++11 and later (regex v5). * ****************************************************************************/ #if !defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_NON_RECURSIVE) -# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG) && !(defined(_MSC_VER) && (_MSC_VER >= 1400)) +# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG) && !(defined(_MSC_VER) && (_MSC_VER >= 1400)) && defined(BOOST_REGEX_CXX03) # define BOOST_REGEX_RECURSIVE # else # define BOOST_REGEX_NON_RECURSIVE @@ -441,21 +443,6 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page(); #endif -/***************************************************************************** - * - * helper memory allocation functions: - * - ****************************************************************************/ - -#if defined(__cplusplus) && defined(BOOST_REGEX_NON_RECURSIVE) -namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ - -BOOST_REGEX_DECL void* BOOST_REGEX_CALL get_mem_block(); -BOOST_REGEX_DECL void BOOST_REGEX_CALL put_mem_block(void*); - -}} /* namespaces */ -#endif - /***************************************************************************** * * Diagnostics: @@ -491,6 +478,3 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL print_regex_library_info(); #endif - - - diff --git a/include/boost/regex/icu.hpp b/include/boost/regex/icu.hpp index 1bf71cb4..b312612d 100644 --- a/include/boost/regex/icu.hpp +++ b/include/boost/regex/icu.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2004 + * Copyright (c) 2020 * John Maddock * * Use, modification and distribution are subject to the @@ -19,1051 +19,12 @@ #ifndef BOOST_REGEX_ICU_HPP #define BOOST_REGEX_ICU_HPP -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include -#ifdef BOOST_MSVC -#pragma warning (push) -#pragma warning (disable: 4251) -#endif - -namespace boost{ - -namespace BOOST_REGEX_DETAIL_NS{ - -// -// Implementation details: -// -class BOOST_REGEX_DECL icu_regex_traits_implementation -{ - typedef UChar32 char_type; - typedef std::size_t size_type; - typedef std::vector string_type; - typedef U_NAMESPACE_QUALIFIER Locale locale_type; - typedef boost::uint_least32_t char_class_type; -public: - icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l) - : m_locale(l) - { - UErrorCode success = U_ZERO_ERROR; - m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); - if(U_SUCCESS(success) == 0) - init_error(); - m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL); - success = U_ZERO_ERROR; - m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); - if(U_SUCCESS(success) == 0) - init_error(); - m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY); - } - U_NAMESPACE_QUALIFIER Locale getloc()const - { - return m_locale; - } - string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const; - string_type transform(const char_type* p1, const char_type* p2) const - { - return do_transform(p1, p2, m_collator.get()); - } - string_type transform_primary(const char_type* p1, const char_type* p2) const - { - return do_transform(p1, p2, m_primary_collator.get()); - } -private: - void init_error() - { - std::runtime_error e("Could not initialize ICU resources"); - boost::throw_exception(e); - } - U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using - boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object - boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object -}; - -inline boost::shared_ptr get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc) -{ - return boost::shared_ptr(new icu_regex_traits_implementation(loc)); -} - -} - -class BOOST_REGEX_DECL icu_regex_traits -{ -public: - typedef UChar32 char_type; - typedef std::size_t size_type; - typedef std::vector string_type; - typedef U_NAMESPACE_QUALIFIER Locale locale_type; -#ifdef BOOST_NO_INT64_T - typedef std::bitset<64> char_class_type; +#ifdef BOOST_REGEX_CXX03 +#include #else - typedef boost::uint64_t char_class_type; +#include #endif - struct boost_extensions_tag{}; - - icu_regex_traits() - : m_pimpl(BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale())) - { - } - static size_type length(const char_type* p); - - ::boost::regex_constants::syntax_type syntax_type(char_type c)const - { - return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_syntax_type(static_cast(c)) : regex_constants::syntax_char; - } - ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const - { - return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_escape_syntax_type(static_cast(c)) : regex_constants::syntax_char; - } - char_type translate(char_type c) const - { - return c; - } - char_type translate_nocase(char_type c) const - { - return ::u_tolower(c); - } - char_type translate(char_type c, bool icase) const - { - return icase ? translate_nocase(c) : translate(c); - } - char_type tolower(char_type c) const - { - return ::u_tolower(c); - } - char_type toupper(char_type c) const - { - return ::u_toupper(c); - } - string_type transform(const char_type* p1, const char_type* p2) const - { - return m_pimpl->transform(p1, p2); - } - string_type transform_primary(const char_type* p1, const char_type* p2) const - { - return m_pimpl->transform_primary(p1, p2); - } - char_class_type lookup_classname(const char_type* p1, const char_type* p2) const; - string_type lookup_collatename(const char_type* p1, const char_type* p2) const; - bool isctype(char_type c, char_class_type f) const; - boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const - { - return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); - } - int value(char_type c, int radix)const - { - return u_digit(c, static_cast< ::int8_t>(radix)); - } - locale_type imbue(locale_type l) - { - locale_type result(m_pimpl->getloc()); - m_pimpl = BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(l); - return result; - } - locale_type getloc()const - { - return locale_type(); - } - std::string error_string(::boost::regex_constants::error_type n) const - { - return BOOST_REGEX_DETAIL_NS::get_default_error_string(n); - } -private: - icu_regex_traits(const icu_regex_traits&); - icu_regex_traits& operator=(const icu_regex_traits&); - - // - // define the bitmasks offsets we need for additional character properties: - // - enum{ - offset_blank = U_CHAR_CATEGORY_COUNT, - offset_space = U_CHAR_CATEGORY_COUNT+1, - offset_xdigit = U_CHAR_CATEGORY_COUNT+2, - offset_underscore = U_CHAR_CATEGORY_COUNT+3, - offset_unicode = U_CHAR_CATEGORY_COUNT+4, - offset_any = U_CHAR_CATEGORY_COUNT+5, - offset_ascii = U_CHAR_CATEGORY_COUNT+6, - offset_horizontal = U_CHAR_CATEGORY_COUNT+7, - offset_vertical = U_CHAR_CATEGORY_COUNT+8 - }; - - // - // and now the masks: - // - static const char_class_type mask_blank; - static const char_class_type mask_space; - static const char_class_type mask_xdigit; - static const char_class_type mask_underscore; - static const char_class_type mask_unicode; - static const char_class_type mask_any; - static const char_class_type mask_ascii; - static const char_class_type mask_horizontal; - static const char_class_type mask_vertical; - - static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2); - - boost::shared_ptr< ::boost::BOOST_REGEX_DETAIL_NS::icu_regex_traits_implementation> m_pimpl; -}; - -} // namespace boost - -// -// template instances: -// -#define BOOST_REGEX_CHAR_T UChar32 -#undef BOOST_REGEX_TRAITS_T -#define BOOST_REGEX_TRAITS_T , icu_regex_traits -#define BOOST_REGEX_ICU_INSTANCES -#ifdef BOOST_REGEX_ICU_INSTANTIATE -# define BOOST_REGEX_INSTANTIATE -#endif -#include -#undef BOOST_REGEX_CHAR_T -#undef BOOST_REGEX_TRAITS_T -#undef BOOST_REGEX_ICU_INSTANCES -#ifdef BOOST_REGEX_INSTANTIATE -# undef BOOST_REGEX_INSTANTIATE -#endif - -namespace boost{ - -// types: -typedef basic_regex< ::UChar32, icu_regex_traits> u32regex; -typedef match_results u32match; -typedef match_results u16match; - -// -// Construction of 32-bit regex types from UTF-8 and UTF-16 primitives: -// -namespace BOOST_REGEX_DETAIL_NS{ - -#if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) -template -inline u32regex do_make_u32regex(InputIterator i, - InputIterator j, - boost::regex_constants::syntax_option_type opt, - const boost::mpl::int_<1>*) -{ - typedef boost::u8_to_u32_iterator conv_type; - return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); -} - -template -inline u32regex do_make_u32regex(InputIterator i, - InputIterator j, - boost::regex_constants::syntax_option_type opt, - const boost::mpl::int_<2>*) -{ - typedef boost::u16_to_u32_iterator conv_type; - return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); -} - -template -inline u32regex do_make_u32regex(InputIterator i, - InputIterator j, - boost::regex_constants::syntax_option_type opt, - const boost::mpl::int_<4>*) -{ - return u32regex(i, j, opt); -} -#else -template -inline u32regex do_make_u32regex(InputIterator i, - InputIterator j, - boost::regex_constants::syntax_option_type opt, - const boost::mpl::int_<1>*) -{ - typedef boost::u8_to_u32_iterator conv_type; - typedef std::vector vector_type; - vector_type v; - conv_type a(i, i, j), b(j, i, j); - while(a != b) - { - v.push_back(*a); - ++a; - } - if(v.size()) - return u32regex(&*v.begin(), v.size(), opt); - return u32regex(static_cast(0), static_cast(0), opt); -} - -template -inline u32regex do_make_u32regex(InputIterator i, - InputIterator j, - boost::regex_constants::syntax_option_type opt, - const boost::mpl::int_<2>*) -{ - typedef boost::u16_to_u32_iterator conv_type; - typedef std::vector vector_type; - vector_type v; - conv_type a(i, i, j), b(j, i, j); - while(a != b) - { - v.push_back(*a); - ++a; - } - if(v.size()) - return u32regex(&*v.begin(), v.size(), opt); - return u32regex(static_cast(0), static_cast(0), opt); -} - -template -inline u32regex do_make_u32regex(InputIterator i, - InputIterator j, - boost::regex_constants::syntax_option_type opt, - const boost::mpl::int_<4>*) -{ - typedef std::vector vector_type; - vector_type v; - while(i != j) - { - v.push_back((UChar32)(*i)); - ++i; - } - if(v.size()) - return u32regex(&*v.begin(), v.size(), opt); - return u32regex(static_cast(0), static_cast(0), opt); -} -#endif -} - -// BOOST_REGEX_UCHAR_IS_WCHAR_T -// -// Source inspection of unicode/umachine.h in ICU version 59 indicates that: -// -// On version 59, UChar is always char16_t in C++ mode (and uint16_t in C mode) -// -// On earlier versions, the logic is -// -// #if U_SIZEOF_WCHAR_T==2 -// typedef wchar_t OldUChar; -// #elif defined(__CHAR16_TYPE__) -// typedef __CHAR16_TYPE__ OldUChar; -// #else -// typedef uint16_t OldUChar; -// #endif -// -// That is, UChar is wchar_t only on versions below 59, when U_SIZEOF_WCHAR_T==2 -// -// Hence, - -#define BOOST_REGEX_UCHAR_IS_WCHAR_T (U_ICU_VERSION_MAJOR_NUM < 59 && U_SIZEOF_WCHAR_T == 2) - -#if BOOST_REGEX_UCHAR_IS_WCHAR_T - BOOST_STATIC_ASSERT((boost::is_same::value)); -#else - BOOST_STATIC_ASSERT(!(boost::is_same::value)); -#endif - -// -// Construction from an iterator pair: -// -template -inline u32regex make_u32regex(InputIterator i, - InputIterator j, - boost::regex_constants::syntax_option_type opt) -{ - return BOOST_REGEX_DETAIL_NS::do_make_u32regex(i, j, opt, static_cast const*>(0)); -} -// -// construction from UTF-8 nul-terminated strings: -// -inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) -{ - return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(p), opt, static_cast const*>(0)); -} -inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) -{ - return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(reinterpret_cast(p)), opt, static_cast const*>(0)); -} -// -// construction from UTF-16 nul-terminated strings: -// -#ifndef BOOST_NO_WREGEX -inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) -{ - return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast const*>(0)); -} -#endif -#if !BOOST_REGEX_UCHAR_IS_WCHAR_T -inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) -{ - return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + u_strlen(p), opt, static_cast const*>(0)); -} -#endif -// -// construction from basic_string class-template: -// -template -inline u32regex make_u32regex(const std::basic_string& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) -{ - return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.begin(), s.end(), opt, static_cast const*>(0)); -} -// -// Construction from ICU string type: -// -inline u32regex make_u32regex(const U_NAMESPACE_QUALIFIER UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) -{ - return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast const*>(0)); -} - -// -// regex_match overloads that widen the character type as appropriate: -// -namespace BOOST_REGEX_DETAIL_NS{ -template -void copy_results(MR1& out, MR2 const& in, NSubs named_subs) -{ - // copy results from an adapted MR2 match_results: - out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base()); - out.set_base(in.base().base()); - out.set_named_subs(named_subs); - for(int i = 0; i < (int)in.size(); ++i) - { - if(in[i].matched || !i) - { - out.set_first(in[i].first.base(), i); - out.set_second(in[i].second.base(), i, in[i].matched); - } - } -#ifdef BOOST_REGEX_MATCH_EXTRA - // Copy full capture info as well: - for(int i = 0; i < (int)in.size(); ++i) - { - if(in[i].captures().size()) - { - out[i].get_captures().assign(in[i].captures().size(), typename MR1::value_type()); - for(int j = 0; j < (int)out[i].captures().size(); ++j) - { - out[i].get_captures()[j].first = in[i].captures()[j].first.base(); - out[i].get_captures()[j].second = in[i].captures()[j].second.base(); - out[i].get_captures()[j].matched = in[i].captures()[j].matched; - } - } - } -#endif -} - -template -inline bool do_regex_match(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags, - boost::mpl::int_<4> const*) -{ - return ::boost::regex_match(first, last, m, e, flags); -} -template -bool do_regex_match(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags, - boost::mpl::int_<2> const*) -{ - typedef u16_to_u32_iterator conv_type; - typedef match_results match_type; - //typedef typename match_type::allocator_type alloc_type; - match_type what; - bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); - // copy results across to m: - if(result) copy_results(m, what, e.get_named_subs()); - return result; -} -template -bool do_regex_match(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags, - boost::mpl::int_<1> const*) -{ - typedef u8_to_u32_iterator conv_type; - typedef match_results match_type; - //typedef typename match_type::allocator_type alloc_type; - match_type what; - bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); - // copy results across to m: - if(result) copy_results(m, what, e.get_named_subs()); - return result; -} -} // namespace BOOST_REGEX_DETAIL_NS - -template -inline bool u32regex_match(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); -} -inline bool u32regex_match(const UChar* p, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); -} -#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) -inline bool u32regex_match(const wchar_t* p, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); -} -#endif -inline bool u32regex_match(const char* p, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); -} -inline bool u32regex_match(const unsigned char* p, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); -} -inline bool u32regex_match(const std::string& s, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); -} -#ifndef BOOST_NO_STD_WSTRING -inline bool u32regex_match(const std::wstring& s, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); -} -#endif -inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); -} -// -// regex_match overloads that do not return what matched: -// -template -inline bool u32regex_match(BidiIterator first, BidiIterator last, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); -} -inline bool u32regex_match(const UChar* p, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); -} -#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) -inline bool u32regex_match(const wchar_t* p, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); -} -#endif -inline bool u32regex_match(const char* p, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); -} -inline bool u32regex_match(const unsigned char* p, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); -} -inline bool u32regex_match(const std::string& s, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); -} -#ifndef BOOST_NO_STD_WSTRING -inline bool u32regex_match(const std::wstring& s, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); -} -#endif -inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); -} - -// -// regex_search overloads that widen the character type as appropriate: -// -namespace BOOST_REGEX_DETAIL_NS{ -template -inline bool do_regex_search(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags, - BidiIterator base, - boost::mpl::int_<4> const*) -{ - return ::boost::regex_search(first, last, m, e, flags, base); -} -template -bool do_regex_search(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags, - BidiIterator base, - boost::mpl::int_<2> const*) -{ - typedef u16_to_u32_iterator conv_type; - typedef match_results match_type; - //typedef typename match_type::allocator_type alloc_type; - match_type what; - bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); - // copy results across to m: - if(result) copy_results(m, what, e.get_named_subs()); - return result; -} -template -bool do_regex_search(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags, - BidiIterator base, - boost::mpl::int_<1> const*) -{ - typedef u8_to_u32_iterator conv_type; - typedef match_results match_type; - //typedef typename match_type::allocator_type alloc_type; - match_type what; - bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); - // copy results across to m: - if(result) copy_results(m, what, e.get_named_subs()); - return result; -} -} - -template -inline bool u32regex_search(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); -} -template -inline bool u32regex_search(BidiIterator first, BidiIterator last, - match_results& m, - const u32regex& e, - match_flag_type flags, - BidiIterator base) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, base, static_cast const*>(0)); -} -inline bool u32regex_search(const UChar* p, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); -} -#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) -inline bool u32regex_search(const wchar_t* p, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); -} -#endif -inline bool u32regex_search(const char* p, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); -} -inline bool u32regex_search(const unsigned char* p, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); -} -inline bool u32regex_search(const std::string& s, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); -} -#ifndef BOOST_NO_STD_WSTRING -inline bool u32regex_search(const std::wstring& s, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); -} -#endif -inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, - match_results& m, - const u32regex& e, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); -} -template -inline bool u32regex_search(BidiIterator first, BidiIterator last, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); -} -inline bool u32regex_search(const UChar* p, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); -} -#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) -inline bool u32regex_search(const wchar_t* p, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); -} -#endif -inline bool u32regex_search(const char* p, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); -} -inline bool u32regex_search(const unsigned char* p, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); -} -inline bool u32regex_search(const std::string& s, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); -} -#ifndef BOOST_NO_STD_WSTRING -inline bool u32regex_search(const std::wstring& s, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); -} -#endif -inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, - const u32regex& e, - match_flag_type flags = match_default) -{ - match_results m; - return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); -} - -// -// overloads for regex_replace with utf-8 and utf-16 data types: -// -namespace BOOST_REGEX_DETAIL_NS{ -template -inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > - make_utf32_seq(I i, I j, mpl::int_<1> const*) -{ - return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(i, i, j), boost::u8_to_u32_iterator(j, i, j)); -} -template -inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > - make_utf32_seq(I i, I j, mpl::int_<2> const*) -{ - return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(i, i, j), boost::u16_to_u32_iterator(j, i, j)); -} -template -inline std::pair< I, I > - make_utf32_seq(I i, I j, mpl::int_<4> const*) -{ - return std::pair< I, I >(i, j); -} -template -inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > - make_utf32_seq(const charT* p, mpl::int_<1> const*) -{ - std::size_t len = std::strlen((const char*)p); - return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(p, p, p+len), boost::u8_to_u32_iterator(p+len, p, p+len)); -} -template -inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > - make_utf32_seq(const charT* p, mpl::int_<2> const*) -{ - std::size_t len = u_strlen((const UChar*)p); - return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(p, p, p + len), boost::u16_to_u32_iterator(p+len, p, p + len)); -} -template -inline std::pair< const charT*, const charT* > - make_utf32_seq(const charT* p, mpl::int_<4> const*) -{ - return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p)); -} -template -inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*) -{ - return o; -} -template -inline utf16_output_iterator make_utf32_out(OutputIterator o, mpl::int_<2> const*) -{ - return o; -} -template -inline utf8_output_iterator make_utf32_out(OutputIterator o, mpl::int_<1> const*) -{ - return o; -} - -template -OutputIterator do_regex_replace(OutputIterator out, - std::pair const& in, - const u32regex& e, - const std::pair& fmt, - match_flag_type flags - ) -{ - // unfortunately we have to copy the format string in order to pass in onward: - std::vector f; -#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS - f.assign(fmt.first, fmt.second); -#else - f.clear(); - I2 pos = fmt.first; - while(pos != fmt.second) - f.push_back(*pos++); -#endif - - regex_iterator i(in.first, in.second, e, flags); - regex_iterator j; - if(i == j) - { - if(!(flags & regex_constants::format_no_copy)) - out = BOOST_REGEX_DETAIL_NS::copy(in.first, in.second, out); - } - else - { - I1 last_m = in.first; - while(i != j) - { - if(!(flags & regex_constants::format_no_copy)) - out = BOOST_REGEX_DETAIL_NS::copy(i->prefix().first, i->prefix().second, out); - if(f.size()) - out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits()); - else - out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, static_cast(0), static_cast(0), flags, e.get_traits()); - last_m = (*i)[0].second; - if(flags & regex_constants::format_first_only) - break; - ++i; - } - if(!(flags & regex_constants::format_no_copy)) - out = BOOST_REGEX_DETAIL_NS::copy(last_m, in.second, out); - } - return out; -} -template -inline const BaseIterator& extract_output_base(const BaseIterator& b) -{ - return b; -} -template -inline BaseIterator extract_output_base(const utf8_output_iterator& b) -{ - return b.base(); -} -template -inline BaseIterator extract_output_base(const utf16_output_iterator& b) -{ - return b.base(); -} -} // BOOST_REGEX_DETAIL_NS - -template -inline OutputIterator u32regex_replace(OutputIterator out, - BidirectionalIterator first, - BidirectionalIterator last, - const u32regex& e, - const charT* fmt, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::extract_output_base - ( - BOOST_REGEX_DETAIL_NS::do_regex_replace( - BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), - BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), - e, - BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt, static_cast const*>(0)), - flags) - ); -} - -template -inline OutputIterator u32regex_replace(OutputIterator out, - Iterator first, - Iterator last, - const u32regex& e, - const std::basic_string& fmt, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::extract_output_base - ( - BOOST_REGEX_DETAIL_NS::do_regex_replace( - BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), - BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), - e, - BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.begin(), fmt.end(), static_cast const*>(0)), - flags) - ); -} - -template -inline OutputIterator u32regex_replace(OutputIterator out, - Iterator first, - Iterator last, - const u32regex& e, - const U_NAMESPACE_QUALIFIER UnicodeString& fmt, - match_flag_type flags = match_default) -{ - return BOOST_REGEX_DETAIL_NS::extract_output_base - ( - BOOST_REGEX_DETAIL_NS::do_regex_replace( - BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), - BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), - e, - BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), - flags) - ); -} - -template -std::basic_string u32regex_replace(const std::basic_string& s, - const u32regex& e, - const charT* fmt, - match_flag_type flags = match_default) -{ - std::basic_string result; - BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); - u32regex_replace(i, s.begin(), s.end(), e, fmt, flags); - return result; -} - -template -std::basic_string u32regex_replace(const std::basic_string& s, - const u32regex& e, - const std::basic_string& fmt, - match_flag_type flags = match_default) -{ - std::basic_string result; - BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); - u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); - return result; -} - -namespace BOOST_REGEX_DETAIL_NS{ - -class unicode_string_out_iterator -{ - U_NAMESPACE_QUALIFIER UnicodeString* out; -public: - unicode_string_out_iterator(U_NAMESPACE_QUALIFIER UnicodeString& s) : out(&s) {} - unicode_string_out_iterator& operator++() { return *this; } - unicode_string_out_iterator& operator++(int) { return *this; } - unicode_string_out_iterator& operator*() { return *this; } - unicode_string_out_iterator& operator=(UChar v) - { - *out += v; - return *this; - } - typedef std::ptrdiff_t difference_type; - typedef UChar value_type; - typedef value_type* pointer; - typedef value_type& reference; - typedef std::output_iterator_tag iterator_category; -}; - -} - -inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, - const u32regex& e, - const UChar* fmt, - match_flag_type flags = match_default) -{ - U_NAMESPACE_QUALIFIER UnicodeString result; - BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result); - u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags); - return result; -} - -inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, - const u32regex& e, - const U_NAMESPACE_QUALIFIER UnicodeString& fmt, - match_flag_type flags = match_default) -{ - U_NAMESPACE_QUALIFIER UnicodeString result; - BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result); - BOOST_REGEX_DETAIL_NS::do_regex_replace( - BOOST_REGEX_DETAIL_NS::make_utf32_out(i, static_cast const*>(0)), - BOOST_REGEX_DETAIL_NS::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast const*>(0)), - e, - BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), - flags); - return result; -} - -} // namespace boost. - -#ifdef BOOST_MSVC -#pragma warning (pop) -#endif - -#include -#include - #endif diff --git a/include/boost/regex/pattern_except.hpp b/include/boost/regex/pattern_except.hpp index 7ca409d8..f2af1afe 100644 --- a/include/boost/regex/pattern_except.hpp +++ b/include/boost/regex/pattern_except.hpp @@ -23,81 +23,10 @@ #include #endif -#include -#include -#include - -namespace boost{ - -#ifdef BOOST_MSVC -#pragma warning(push) -#pragma warning(disable: 4103) -#endif -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_PREFIX -#endif -#ifdef BOOST_MSVC -#pragma warning(pop) +#ifdef BOOST_REGEX_CXX03 +#include +#else +#include #endif -#ifdef BOOST_MSVC -#pragma warning(push) -#pragma warning(disable : 4275) -#if BOOST_MSVC >= 1800 -#pragma warning(disable : 26812) #endif -#endif -class BOOST_REGEX_DECL regex_error : public std::runtime_error -{ -public: - explicit regex_error(const std::string& s, regex_constants::error_type err = regex_constants::error_unknown, std::ptrdiff_t pos = 0); - explicit regex_error(regex_constants::error_type err); - ~regex_error() BOOST_NOEXCEPT_OR_NOTHROW; - regex_constants::error_type code()const - { return m_error_code; } - std::ptrdiff_t position()const - { return m_position; } - void raise()const; -private: - regex_constants::error_type m_error_code; - std::ptrdiff_t m_position; -}; - -typedef regex_error bad_pattern; -typedef regex_error bad_expression; - -namespace BOOST_REGEX_DETAIL_NS{ - -BOOST_REGEX_DECL void BOOST_REGEX_CALL raise_runtime_error(const std::runtime_error& ex); - -template -void raise_error(const traits& t, regex_constants::error_type code) -{ - (void)t; // warning suppression - std::runtime_error e(t.error_string(code)); - ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(e); -} - -} - -#ifdef BOOST_MSVC -#pragma warning(pop) -#endif - -#ifdef BOOST_MSVC -#pragma warning(push) -#pragma warning(disable: 4103) -#endif -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_SUFFIX -#endif -#ifdef BOOST_MSVC -#pragma warning(pop) -#endif - -} // namespace boost - -#endif - - - diff --git a/include/boost/regex/pending/object_cache.hpp b/include/boost/regex/pending/object_cache.hpp index ea51ba3a..7284a99e 100644 --- a/include/boost/regex/pending/object_cache.hpp +++ b/include/boost/regex/pending/object_cache.hpp @@ -19,14 +19,24 @@ #ifndef BOOST_REGEX_OBJECT_CACHE_HPP #define BOOST_REGEX_OBJECT_CACHE_HPP -#include +#include +#ifdef BOOST_REGEX_CXX03 #include +#define BOOST_REGEX_SHARED_PTR_NS boost +#else +#include +#define BOOST_REGEX_SHARED_PTR_NS std +#endif #include #include #include #include #ifdef BOOST_HAS_THREADS +#ifdef BOOST_REGEX_CXX03 #include +#else +#include +#endif #endif namespace boost{ @@ -35,16 +45,16 @@ template class object_cache { public: - typedef std::pair< ::boost::shared_ptr, Key const*> value_type; + typedef std::pair< BOOST_REGEX_SHARED_PTR_NS::shared_ptr, Key const*> value_type; typedef std::list list_type; typedef typename list_type::iterator list_iterator; typedef std::map map_type; typedef typename map_type::iterator map_iterator; typedef typename list_type::size_type size_type; - static boost::shared_ptr get(const Key& k, size_type l_max_cache_size); + static BOOST_REGEX_SHARED_PTR_NS::shared_ptr get(const Key& k, size_type l_max_cache_size); private: - static boost::shared_ptr do_get(const Key& k, size_type l_max_cache_size); + static BOOST_REGEX_SHARED_PTR_NS::shared_ptr do_get(const Key& k, size_type l_max_cache_size); struct data { @@ -62,13 +72,13 @@ private: #pragma warning(disable: 4702) #endif template -boost::shared_ptr object_cache::get(const Key& k, size_type l_max_cache_size) +BOOST_REGEX_SHARED_PTR_NS::shared_ptr object_cache::get(const Key& k, size_type l_max_cache_size) { #ifdef BOOST_HAS_THREADS +#ifdef BOOST_REGEX_CXX03 static boost::static_mutex mut = BOOST_STATIC_MUTEX_INIT; - boost::static_mutex::scoped_lock l(mut); - if(l) + if (l) { return do_get(k, l_max_cache_size); } @@ -78,7 +88,12 @@ boost::shared_ptr object_cache::get(const Key& k, siz // ::boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock")); #if defined(BOOST_NO_UNREACHABLE_RETURN_DETECTION) || defined(BOOST_NO_EXCEPTIONS) - return boost::shared_ptr(); + return BOOST_REGEX_SHARED_PTR_NS::shared_ptr(); +#endif +#else + static std::mutex mut; + std::lock_guard l(mut); + return do_get(k, l_max_cache_size); #endif #else return do_get(k, l_max_cache_size); @@ -89,7 +104,7 @@ boost::shared_ptr object_cache::get(const Key& k, siz #endif template -boost::shared_ptr object_cache::do_get(const Key& k, size_type l_max_cache_size) +BOOST_REGEX_SHARED_PTR_NS::shared_ptr object_cache::do_get(const Key& k, size_type l_max_cache_size) { typedef typename object_cache::data object_data; typedef typename map_type::size_type map_size_type; @@ -112,11 +127,11 @@ boost::shared_ptr object_cache::do_get(const Key& k, temp.splice(temp.end(), s_data.cont, mpos->second); // and now place it at the end of the list: s_data.cont.splice(s_data.cont.end(), temp, temp.begin()); - BOOST_ASSERT(*(s_data.cont.back().second) == k); + BOOST_REGEX_ASSERT(*(s_data.cont.back().second) == k); // update index with new position: mpos->second = --(s_data.cont.end()); - BOOST_ASSERT(&(mpos->first) == mpos->second->second); - BOOST_ASSERT(&(mpos->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(&(mpos->first) == mpos->second->second); + BOOST_REGEX_ASSERT(&(mpos->first) == s_data.cont.back().second); } return s_data.cont.back().first; } @@ -124,7 +139,7 @@ boost::shared_ptr object_cache::do_get(const Key& k, // if we get here then the item is not in the cache, // so create it: // - boost::shared_ptr result(new Object(k)); + BOOST_REGEX_SHARED_PTR_NS::shared_ptr result(new Object(k)); // // Add it to the list, and index it: // @@ -132,9 +147,9 @@ boost::shared_ptr object_cache::do_get(const Key& k, s_data.index.insert(std::make_pair(k, --(s_data.cont.end()))); s_data.cont.back().second = &(s_data.index.find(k)->first); map_size_type s = s_data.index.size(); - BOOST_ASSERT(s_data.index[k]->first.get() == result.get()); - BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); - BOOST_ASSERT(s_data.index.find(k)->first == k); + BOOST_REGEX_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_REGEX_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(s_data.index.find(k)->first == k); if(s > l_max_cache_size) { // @@ -152,7 +167,7 @@ boost::shared_ptr object_cache::do_get(const Key& k, ++pos; // now remove the items from our containers, // then order has to be as follows: - BOOST_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end()); + BOOST_REGEX_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end()); s_data.index.erase(*(condemmed->second)); s_data.cont.erase(condemmed); --s; @@ -160,13 +175,13 @@ boost::shared_ptr object_cache::do_get(const Key& k, else ++pos; } - BOOST_ASSERT(s_data.index[k]->first.get() == result.get()); - BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); - BOOST_ASSERT(s_data.index.find(k)->first == k); + BOOST_REGEX_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_REGEX_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(s_data.index.find(k)->first == k); } return result; } -} +#undef BOOST_REGEX_SHARED_PTR_NS #endif diff --git a/include/boost/regex/pending/unicode_iterator.hpp b/include/boost/regex/pending/unicode_iterator.hpp index becb17ad..a565570a 100644 --- a/include/boost/regex/pending/unicode_iterator.hpp +++ b/include/boost/regex/pending/unicode_iterator.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2004 + * Copyright (c) 2020 * John Maddock * * Use, modification and distribution are subject to the @@ -16,770 +16,17 @@ * DESCRIPTION: Iterator adapters for converting between different Unicode encodings. */ -/**************************************************************************** +#ifndef BOOST_REGEX_PENDING_UNICODE_ITERATOR_HPP +#define BOOST_REGEX_PENDING_UNICODE_ITERATOR_HPP -Contents: -~~~~~~~~~ +#include -1) Read Only, Input Adapters: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -template -class u32_to_u8_iterator; - -Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8. - -template -class u8_to_u32_iterator; - -Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32. - -template -class u32_to_u16_iterator; - -Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16. - -template -class u16_to_u32_iterator; - -Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32. - -2) Single pass output iterator adapters: - -template -class utf8_output_iterator; - -Accepts UTF-32 code points and forwards them on as UTF-8 code points. - -template -class utf16_output_iterator; - -Accepts UTF-32 code points and forwards them on as UTF-16 code points. - -****************************************************************************/ - -#ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP -#define BOOST_REGEX_UNICODE_ITERATOR_HPP -#include -#include -#include -#include -#include -#include -#ifndef BOOST_NO_STD_LOCALE -#include -#include -#endif -#include // CHAR_BIT - -namespace boost{ - -namespace detail{ - -static const ::boost::uint16_t high_surrogate_base = 0xD7C0u; -static const ::boost::uint16_t low_surrogate_base = 0xDC00u; -static const ::boost::uint32_t ten_bit_mask = 0x3FFu; - -inline bool is_high_surrogate(::boost::uint16_t v) -{ - return (v & 0xFFFFFC00u) == 0xd800u; -} -inline bool is_low_surrogate(::boost::uint16_t v) -{ - return (v & 0xFFFFFC00u) == 0xdc00u; -} -template -inline bool is_surrogate(T v) -{ - return (v & 0xFFFFF800u) == 0xd800; -} - -inline unsigned utf8_byte_count(boost::uint8_t c) -{ - // if the most significant bit with a zero in it is in position - // 8-N then there are N bytes in this UTF-8 sequence: - boost::uint8_t mask = 0x80u; - unsigned result = 0; - while(c & mask) - { - ++result; - mask >>= 1; - } - return (result == 0) ? 1 : ((result > 4) ? 4 : result); -} - -inline unsigned utf8_trailing_byte_count(boost::uint8_t c) -{ - return utf8_byte_count(c) - 1; -} - -#ifdef BOOST_MSVC -#pragma warning(push) -#pragma warning(disable:4100) -#endif -#ifndef BOOST_NO_EXCEPTIONS -BOOST_NORETURN -#endif -inline void invalid_utf32_code_point(::boost::uint32_t val) -{ -#ifndef BOOST_NO_STD_LOCALE - std::stringstream ss; - ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence"; - std::out_of_range e(ss.str()); +#if defined(BOOST_REGEX_CXX03) +#include #else - std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence"); -#endif - boost::throw_exception(e); -} -#ifdef BOOST_MSVC -#pragma warning(pop) +#include #endif -} // namespace detail - -template -class u32_to_u16_iterator - : public boost::iterator_facade, U16Type, std::bidirectional_iterator_tag, const U16Type> -{ - typedef boost::iterator_facade, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type; - -#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) - typedef typename std::iterator_traits::value_type base_value_type; - - BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); - BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16); -#endif - -public: - typename base_type::reference - dereference()const - { - if(m_current == 2) - extract_current(); - return m_values[m_current]; - } - bool equal(const u32_to_u16_iterator& that)const - { - if(m_position == that.m_position) - { - // Both m_currents must be equal, or both even - // this is the same as saying their sum must be even: - return (m_current + that.m_current) & 1u ? false : true; - } - return false; - } - void increment() - { - // if we have a pending read then read now, so that we know whether - // to skip a position, or move to a low-surrogate: - if(m_current == 2) - { - // pending read: - extract_current(); - } - // move to the next surrogate position: - ++m_current; - // if we've reached the end skip a position: - if(m_values[m_current] == 0) - { - m_current = 2; - ++m_position; - } - } - void decrement() - { - if(m_current != 1) - { - // decrementing an iterator always leads to a valid position: - --m_position; - extract_current(); - m_current = m_values[1] ? 1 : 0; - } - else - { - m_current = 0; - } - } - BaseIterator base()const - { - return m_position; - } - // construct: - u32_to_u16_iterator() : m_position(), m_current(0) - { - m_values[0] = 0; - m_values[1] = 0; - m_values[2] = 0; - } - u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2) - { - m_values[0] = 0; - m_values[1] = 0; - m_values[2] = 0; - } -private: - - void extract_current()const - { - // begin by checking for a code point out of range: - ::boost::uint32_t v = *m_position; - if(v >= 0x10000u) - { - if(v > 0x10FFFFu) - detail::invalid_utf32_code_point(*m_position); - // split into two surrogates: - m_values[0] = static_cast(v >> 10) + detail::high_surrogate_base; - m_values[1] = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; - m_current = 0; - BOOST_ASSERT(detail::is_high_surrogate(m_values[0])); - BOOST_ASSERT(detail::is_low_surrogate(m_values[1])); - } - else - { - // 16-bit code point: - m_values[0] = static_cast(*m_position); - m_values[1] = 0; - m_current = 0; - // value must not be a surrogate: - if(detail::is_surrogate(m_values[0])) - detail::invalid_utf32_code_point(*m_position); - } - } - BaseIterator m_position; - mutable U16Type m_values[3]; - mutable unsigned m_current; -}; - -template -class u16_to_u32_iterator - : public boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> -{ - typedef boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; - // special values for pending iterator reads: - BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); - -#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) - typedef typename std::iterator_traits::value_type base_value_type; - - BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16); - BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); -#endif - -public: - typename base_type::reference - dereference()const - { - if(m_value == pending_read) - extract_current(); - return m_value; - } - bool equal(const u16_to_u32_iterator& that)const - { - return m_position == that.m_position; - } - void increment() - { - // skip high surrogate first if there is one: - if(detail::is_high_surrogate(*m_position)) ++m_position; - ++m_position; - m_value = pending_read; - } - void decrement() - { - --m_position; - // if we have a low surrogate then go back one more: - if(detail::is_low_surrogate(*m_position)) - --m_position; - m_value = pending_read; - } - BaseIterator base()const - { - return m_position; - } - // construct: - u16_to_u32_iterator() : m_position() - { - m_value = pending_read; - } - u16_to_u32_iterator(BaseIterator b) : m_position(b) - { - m_value = pending_read; - } - // - // Range checked version: - // - u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) - { - m_value = pending_read; - // - // The range must not start with a low surrogate, or end in a high surrogate, - // otherwise we run the risk of running outside the underlying input range. - // Likewise b must not be located at a low surrogate. - // - boost::uint16_t val; - if(start != end) - { - if((b != start) && (b != end)) - { - val = *b; - if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) - invalid_code_point(val); - } - val = *start; - if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) - invalid_code_point(val); - val = *--end; - if(detail::is_high_surrogate(val)) - invalid_code_point(val); - } - } -private: - static void invalid_code_point(::boost::uint16_t val) - { -#ifndef BOOST_NO_STD_LOCALE - std::stringstream ss; - ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence"; - std::out_of_range e(ss.str()); -#else - std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence"); -#endif - boost::throw_exception(e); - } - void extract_current()const - { - m_value = static_cast(static_cast< ::boost::uint16_t>(*m_position)); - // if the last value is a high surrogate then adjust m_position and m_value as needed: - if(detail::is_high_surrogate(*m_position)) - { - // precondition; next value must have be a low-surrogate: - BaseIterator next(m_position); - ::boost::uint16_t t = *++next; - if((t & 0xFC00u) != 0xDC00u) - invalid_code_point(t); - m_value = (m_value - detail::high_surrogate_base) << 10; - m_value |= (static_cast(static_cast< ::boost::uint16_t>(t)) & detail::ten_bit_mask); - } - // postcondition; result must not be a surrogate: - if(detail::is_surrogate(m_value)) - invalid_code_point(static_cast< ::boost::uint16_t>(m_value)); - } - BaseIterator m_position; - mutable U32Type m_value; -}; - -template -class u32_to_u8_iterator - : public boost::iterator_facade, U8Type, std::bidirectional_iterator_tag, const U8Type> -{ - typedef boost::iterator_facade, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type; - -#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) - typedef typename std::iterator_traits::value_type base_value_type; - - BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); - BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8); -#endif - -public: - typename base_type::reference - dereference()const - { - if(m_current == 4) - extract_current(); - return m_values[m_current]; - } - bool equal(const u32_to_u8_iterator& that)const - { - if(m_position == that.m_position) - { - // either the m_current's must be equal, or one must be 0 and - // the other 4: which means neither must have bits 1 or 2 set: - return (m_current == that.m_current) - || (((m_current | that.m_current) & 3) == 0); - } - return false; - } - void increment() - { - // if we have a pending read then read now, so that we know whether - // to skip a position, or move to a low-surrogate: - if(m_current == 4) - { - // pending read: - extract_current(); - } - // move to the next surrogate position: - ++m_current; - // if we've reached the end skip a position: - if(m_values[m_current] == 0) - { - m_current = 4; - ++m_position; - } - } - void decrement() - { - if((m_current & 3) == 0) - { - --m_position; - extract_current(); - m_current = 3; - while(m_current && (m_values[m_current] == 0)) - --m_current; - } - else - --m_current; - } - BaseIterator base()const - { - return m_position; - } - // construct: - u32_to_u8_iterator() : m_position(), m_current(0) - { - m_values[0] = 0; - m_values[1] = 0; - m_values[2] = 0; - m_values[3] = 0; - m_values[4] = 0; - } - u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4) - { - m_values[0] = 0; - m_values[1] = 0; - m_values[2] = 0; - m_values[3] = 0; - m_values[4] = 0; - } -private: - - void extract_current()const - { - boost::uint32_t c = *m_position; - if(c > 0x10FFFFu) - detail::invalid_utf32_code_point(c); - if(c < 0x80u) - { - m_values[0] = static_cast(c); - m_values[1] = static_cast(0u); - m_values[2] = static_cast(0u); - m_values[3] = static_cast(0u); - } - else if(c < 0x800u) - { - m_values[0] = static_cast(0xC0u + (c >> 6)); - m_values[1] = static_cast(0x80u + (c & 0x3Fu)); - m_values[2] = static_cast(0u); - m_values[3] = static_cast(0u); - } - else if(c < 0x10000u) - { - m_values[0] = static_cast(0xE0u + (c >> 12)); - m_values[1] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); - m_values[2] = static_cast(0x80u + (c & 0x3Fu)); - m_values[3] = static_cast(0u); - } - else - { - m_values[0] = static_cast(0xF0u + (c >> 18)); - m_values[1] = static_cast(0x80u + ((c >> 12) & 0x3Fu)); - m_values[2] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); - m_values[3] = static_cast(0x80u + (c & 0x3Fu)); - } - m_current= 0; - } - BaseIterator m_position; - mutable U8Type m_values[5]; - mutable unsigned m_current; -}; - -template -class u8_to_u32_iterator - : public boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> -{ - typedef boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; - // special values for pending iterator reads: - BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); - -#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) - typedef typename std::iterator_traits::value_type base_value_type; - - BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8); - BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); -#endif - -public: - typename base_type::reference - dereference()const - { - if(m_value == pending_read) - extract_current(); - return m_value; - } - bool equal(const u8_to_u32_iterator& that)const - { - return m_position == that.m_position; - } - void increment() - { - // We must not start with a continuation character: - if((static_cast(*m_position) & 0xC0) == 0x80) - invalid_sequence(); - // skip high surrogate first if there is one: - unsigned c = detail::utf8_byte_count(*m_position); - if(m_value == pending_read) - { - // Since we haven't read in a value, we need to validate the code points: - for(unsigned i = 0; i < c; ++i) - { - ++m_position; - // We must have a continuation byte: - if((i != c - 1) && ((static_cast(*m_position) & 0xC0) != 0x80)) - invalid_sequence(); - } - } - else - { - std::advance(m_position, c); - } - m_value = pending_read; - } - void decrement() - { - // Keep backtracking until we don't have a trailing character: - unsigned count = 0; - while((*--m_position & 0xC0u) == 0x80u) ++count; - // now check that the sequence was valid: - if(count != detail::utf8_trailing_byte_count(*m_position)) - invalid_sequence(); - m_value = pending_read; - } - BaseIterator base()const - { - return m_position; - } - // construct: - u8_to_u32_iterator() : m_position() - { - m_value = pending_read; - } - u8_to_u32_iterator(BaseIterator b) : m_position(b) - { - m_value = pending_read; - } - // - // Checked constructor: - // - u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) - { - m_value = pending_read; - // - // We must not start with a continuation character, or end with a - // truncated UTF-8 sequence otherwise we run the risk of going past - // the start/end of the underlying sequence: - // - if(start != end) - { - unsigned char v = *start; - if((v & 0xC0u) == 0x80u) - invalid_sequence(); - if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u)) - invalid_sequence(); - BaseIterator pos = end; - do - { - v = *--pos; - } - while((start != pos) && ((v & 0xC0u) == 0x80u)); - std::ptrdiff_t extra = detail::utf8_byte_count(v); - if(std::distance(pos, end) < extra) - invalid_sequence(); - } - } -private: - static void invalid_sequence() - { - std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character"); - boost::throw_exception(e); - } - void extract_current()const - { - m_value = static_cast(static_cast< ::boost::uint8_t>(*m_position)); - // we must not have a continuation character: - if((m_value & 0xC0u) == 0x80u) - invalid_sequence(); - // see how many extra bytes we have: - unsigned extra = detail::utf8_trailing_byte_count(*m_position); - // extract the extra bits, 6 from each extra byte: - BaseIterator next(m_position); - for(unsigned c = 0; c < extra; ++c) - { - ++next; - m_value <<= 6; - // We must have a continuation byte: - if((static_cast(*next) & 0xC0) != 0x80) - invalid_sequence(); - m_value += static_cast(*next) & 0x3Fu; - } - // we now need to remove a few of the leftmost bits, but how many depends - // upon how many extra bytes we've extracted: - static const boost::uint32_t masks[4] = - { - 0x7Fu, - 0x7FFu, - 0xFFFFu, - 0x1FFFFFu, - }; - m_value &= masks[extra]; - // check the result is in range: - if(m_value > static_cast(0x10FFFFu)) - invalid_sequence(); - // The result must not be a surrogate: - if((m_value >= static_cast(0xD800)) && (m_value <= static_cast(0xDFFF))) - invalid_sequence(); - // We should not have had an invalidly encoded UTF8 sequence: - if((extra > 0) && (m_value <= static_cast(masks[extra - 1]))) - invalid_sequence(); - } - BaseIterator m_position; - mutable U32Type m_value; -}; - -template -class utf16_output_iterator -{ -public: - typedef void difference_type; - typedef void value_type; - typedef boost::uint32_t* pointer; - typedef boost::uint32_t& reference; - typedef std::output_iterator_tag iterator_category; - - utf16_output_iterator(const BaseIterator& b) - : m_position(b){} - utf16_output_iterator(const utf16_output_iterator& that) - : m_position(that.m_position){} - utf16_output_iterator& operator=(const utf16_output_iterator& that) - { - m_position = that.m_position; - return *this; - } - const utf16_output_iterator& operator*()const - { - return *this; - } - void operator=(boost::uint32_t val)const - { - push(val); - } - utf16_output_iterator& operator++() - { - return *this; - } - utf16_output_iterator& operator++(int) - { - return *this; - } - BaseIterator base()const - { - return m_position; - } -private: - void push(boost::uint32_t v)const - { - if(v >= 0x10000u) - { - // begin by checking for a code point out of range: - if(v > 0x10FFFFu) - detail::invalid_utf32_code_point(v); - // split into two surrogates: - *m_position++ = static_cast(v >> 10) + detail::high_surrogate_base; - *m_position++ = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; - } - else - { - // 16-bit code point: - // value must not be a surrogate: - if(detail::is_surrogate(v)) - detail::invalid_utf32_code_point(v); - *m_position++ = static_cast(v); - } - } - mutable BaseIterator m_position; -}; - -template -class utf8_output_iterator -{ -public: - typedef void difference_type; - typedef void value_type; - typedef boost::uint32_t* pointer; - typedef boost::uint32_t& reference; - typedef std::output_iterator_tag iterator_category; - - utf8_output_iterator(const BaseIterator& b) - : m_position(b){} - utf8_output_iterator(const utf8_output_iterator& that) - : m_position(that.m_position){} - utf8_output_iterator& operator=(const utf8_output_iterator& that) - { - m_position = that.m_position; - return *this; - } - const utf8_output_iterator& operator*()const - { - return *this; - } - void operator=(boost::uint32_t val)const - { - push(val); - } - utf8_output_iterator& operator++() - { - return *this; - } - utf8_output_iterator& operator++(int) - { - return *this; - } - BaseIterator base()const - { - return m_position; - } -private: - void push(boost::uint32_t c)const - { - if(c > 0x10FFFFu) - detail::invalid_utf32_code_point(c); - if(c < 0x80u) - { - *m_position++ = static_cast(c); - } - else if(c < 0x800u) - { - *m_position++ = static_cast(0xC0u + (c >> 6)); - *m_position++ = static_cast(0x80u + (c & 0x3Fu)); - } - else if(c < 0x10000u) - { - *m_position++ = static_cast(0xE0u + (c >> 12)); - *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); - *m_position++ = static_cast(0x80u + (c & 0x3Fu)); - } - else - { - *m_position++ = static_cast(0xF0u + (c >> 18)); - *m_position++ = static_cast(0x80u + ((c >> 12) & 0x3Fu)); - *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); - *m_position++ = static_cast(0x80u + (c & 0x3Fu)); - } - } - mutable BaseIterator m_position; -}; - -} // namespace boost - -#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP +#endif // BOOST_REGEX_PENDING_UNICODE_ITERATOR_HPP diff --git a/include/boost/regex/regex_traits.hpp b/include/boost/regex/regex_traits.hpp index 730ba6e0..2b383160 100644 --- a/include/boost/regex/regex_traits.hpp +++ b/include/boost/regex/regex_traits.hpp @@ -24,7 +24,11 @@ #endif # ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED +#ifdef BOOST_REGEX_CXX03 # include +#else +# include +#endif # endif #endif // include diff --git a/include/boost/regex/user.hpp b/include/boost/regex/user.hpp index 33b10136..4b159bc5 100644 --- a/include/boost/regex/user.hpp +++ b/include/boost/regex/user.hpp @@ -61,10 +61,12 @@ // define this if you want to use the recursive algorithm // even if BOOST_REGEX_HAS_MS_STACK_GUARD is not defined. +// NOTE: OBSOLETE!! // #define BOOST_REGEX_RECURSIVE // define this if you want to use the non-recursive // algorithm, even if the recursive version would be the default. +// NOTE: OBSOLETE!! // #define BOOST_REGEX_NON_RECURSIVE // define this if you want to set the size of the memory blocks diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp index c56dd8ef..0408a754 100644 --- a/include/boost/regex/v4/basic_regex.hpp +++ b/include/boost/regex/v4/basic_regex.hpp @@ -416,7 +416,7 @@ public: { typedef typename traits::string_type seq_type; seq_type a(arg_first, arg_last); - if(a.size()) + if(!a.empty()) assign(static_cast(&*a.begin()), static_cast(&*a.begin() + a.size()), f); else assign(static_cast(0), static_cast(0), f); @@ -617,32 +617,32 @@ public: // const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const { - BOOST_ASSERT(0 != m_pimpl.get()); + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_first_state(); } unsigned get_restart_type()const { - BOOST_ASSERT(0 != m_pimpl.get()); + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_restart_type(); } const unsigned char* get_map()const { - BOOST_ASSERT(0 != m_pimpl.get()); + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_map(); } const ::boost::regex_traits_wrapper& get_traits()const { - BOOST_ASSERT(0 != m_pimpl.get()); + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_traits(); } bool can_be_null()const { - BOOST_ASSERT(0 != m_pimpl.get()); + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->can_be_null(); } const BOOST_REGEX_DETAIL_NS::regex_data& get_data()const { - BOOST_ASSERT(0 != m_pimpl.get()); + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_data(); } boost::shared_ptr get_named_subs()const @@ -657,7 +657,7 @@ private: // // out of line members; // these are the only members that mutate the basic_regex object, -// and are designed to provide the strong exception guarentee +// and are designed to provide the strong exception guarantee // (in the event of a throw, the state of the object remains unchanged). // template @@ -795,4 +795,3 @@ public: #endif #endif - diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index f4b1660a..b9f02ece 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -52,11 +52,14 @@ struct digraph : public std::pair digraph(charT c1, charT c2) : std::pair(c1, c2) {} digraph(const digraph& d) : std::pair(d.first, d.second){} +#ifndef BOOST_NO_CXX11_DEFAULTED_FUNCTIONS + digraph& operator=(const digraph&) = default; +#endif template digraph(const Seq& s) : std::pair() { - BOOST_ASSERT(s.size() <= 2); - BOOST_ASSERT(s.size()); + BOOST_REGEX_ASSERT(s.size() <= 2); + BOOST_REGEX_ASSERT(s.size()); this->first = s[0]; this->second = (s.size() > 1) ? s[1] : 0; } @@ -243,7 +246,7 @@ protected: bool m_has_backrefs; // true if there are actually any backrefs indexed_bit_flag m_backrefs; // bitmask of permitted backrefs boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; - bool m_has_recursions; // set when we have recursive expresisons to fixup + bool m_has_recursions; // set when we have recursive expressions to fixup std::vector m_recursion_checks; // notes which recursions we've followed while analysing this expression typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character @@ -285,11 +288,11 @@ basic_regex_creator::basic_regex_creator(regex_datam_word_mask = m_word_mask; - BOOST_ASSERT(m_word_mask != 0); - BOOST_ASSERT(m_mask_space != 0); - BOOST_ASSERT(m_lower_mask != 0); - BOOST_ASSERT(m_upper_mask != 0); - BOOST_ASSERT(m_alpha_mask != 0); + BOOST_REGEX_ASSERT(m_word_mask != 0); + BOOST_REGEX_ASSERT(m_mask_space != 0); + BOOST_REGEX_ASSERT(m_lower_mask != 0); + BOOST_REGEX_ASSERT(m_upper_mask != 0); + BOOST_REGEX_ASSERT(m_alpha_mask != 0); } template @@ -303,7 +306,7 @@ re_syntax_base* basic_regex_creator::append_state(syntax_element_ // set the offset to the next state in our last one: if(m_last_state) m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); - // now actually extent our data: + // now actually extend our data: m_last_state = static_cast(m_pdata->m_data.extend(s)); // fill in boilerplate options in the new state: m_last_state->next.i = 0; @@ -447,9 +450,9 @@ re_syntax_base* basic_regex_creator::append_set( charT a2[3] = { c2.first, c2.second, charT(0), }; s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1)); s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1)); - if(s1.size() == 0) + if(s1.empty()) s1 = string_type(1, charT(0)); - if(s2.size() == 0) + if(s2.empty()) s2 = string_type(1, charT(0)); } else @@ -577,7 +580,7 @@ re_syntax_base* basic_regex_creator::append_set( // Oops error: return 0; } - BOOST_ASSERT(c3[1] == charT(0)); + BOOST_REGEX_ASSERT(c3[1] == charT(0)); for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) { c3[0] = static_cast(i); @@ -642,7 +645,7 @@ re_syntax_base* basic_regex_creator::append_set( while(sfirst != slast) { string_type s; - BOOST_ASSERT(static_cast(0) == sfirst->second); + BOOST_REGEX_ASSERT(static_cast(0) == sfirst->second); s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1); if(s.empty()) return 0; // invalid or unsupported equivalence class @@ -951,7 +954,7 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) } // now work through our list, building all the maps as we go: - while(v.size()) + while(!v.empty()) { // Initialize m_recursion_checks if we need it: if(m_has_recursions) @@ -1035,7 +1038,7 @@ int basic_regex_creator::calculate_backstep(re_syntax_base* state } else if(state->type == syntax_element_long_set_rep) { - BOOST_ASSERT(rep->next.p->type == syntax_element_long_set); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_long_set); if(static_cast*>(rep->next.p)->singleton == 0) return -1; if(rep->max != rep->min) @@ -1135,7 +1138,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, } case syntax_element_recurse: { - BOOST_ASSERT(static_cast(state)->alt.p->type == syntax_element_startmark); + BOOST_REGEX_ASSERT(static_cast(state)->alt.p->type == syntax_element_startmark); recursion_sub = static_cast(static_cast(state)->alt.p)->index; if(m_recursion_checks[recursion_sub] & 1u) { @@ -1516,7 +1519,7 @@ template void basic_regex_creator::probe_leading_repeat(re_syntax_base* state) { // enumerate our states, and see if we have a leading repeat - // for which failed search restarts can be optimised; + // for which failed search restarts can be optimized; do { switch(state->type) @@ -1573,7 +1576,6 @@ void basic_regex_creator::probe_leading_repeat(re_syntax_base* st }while(state); } - } // namespace BOOST_REGEX_DETAIL_NS } // namespace boost @@ -1594,4 +1596,3 @@ void basic_regex_creator::probe_leading_repeat(re_syntax_base* st #endif #endif - diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 8bdb079f..58096783 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -172,7 +172,7 @@ void basic_regex_parser::parse(const charT* p1, const charT* p2, m_parser_proc = &basic_regex_parser::parse_literal; break; default: - // Ooops, someone has managed to set more than one of the main option flags, + // Oops, someone has managed to set more than one of the main option flags, // so this must be an error: fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used."); return; @@ -981,7 +981,7 @@ template bool basic_regex_parser::parse_repeat(std::size_t low, std::size_t high) { bool greedy = true; - bool pocessive = false; + bool possessive = false; std::size_t insert_point; // // when we get to here we may have a non-greedy ? mark still to come: @@ -1005,12 +1005,12 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ greedy = false; ++m_position; } - // for perl regexes only check for pocessive ++ repeats. + // for perl regexes only check for possessive ++ repeats. if((m_position != m_end) && (0 == (this->flags() & regbase::main_option_type)) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus)) { - pocessive = true; + possessive = true; ++m_position; } } @@ -1082,10 +1082,10 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ rep = static_cast(this->getaddress(rep_off)); rep->alt.i = this->m_pdata->m_data.size() - rep_off; // - // If the repeat is pocessive then bracket the repeat with a (?>...) + // If the repeat is possessive then bracket the repeat with a (?>...) // independent sub-expression construct: // - if(pocessive) + if(possessive) { if(m_position != m_end) { @@ -1542,7 +1542,7 @@ bool basic_regex_parser::parse_inner_set(basic_char_set::parse_inner_set(basic_char_setm_traits.lookup_collatename(name_first, name_last); - if((0 == m.size()) || (m.size() > 2)) + if(m.empty() || (m.size() > 2)) { fail(regex_constants::error_collate, name_first - m_base); return false; @@ -1948,7 +1948,7 @@ charT basic_regex_parser::unescape_character() template bool basic_regex_parser::parse_backref() { - BOOST_ASSERT(m_position != m_end); + BOOST_REGEX_ASSERT(m_position != m_end); const charT* pc = m_position; boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10); if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs))) @@ -2609,7 +2609,7 @@ option_group_jump: this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end)); return false; } - BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); + BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); ++m_position; // // restore the flags: @@ -3112,7 +3112,7 @@ bool basic_regex_parser::unwind_alts(std::ptrdiff_t last_paren_st // alternative then that's an error: // if((this->m_alt_insert_point == static_cast(this->m_pdata->m_data.size())) - && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start) + && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start) && !( ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) @@ -3127,7 +3127,7 @@ bool basic_regex_parser::unwind_alts(std::ptrdiff_t last_paren_st // // Fix up our alternatives: // - while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)) + while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)) { // // fix up the jump to point to the end of the states @@ -3137,7 +3137,7 @@ bool basic_regex_parser::unwind_alts(std::ptrdiff_t last_paren_st m_alt_jumps.pop_back(); this->m_pdata->m_data.align(); re_jump* jmp = static_cast(this->getaddress(jump_offset)); - BOOST_ASSERT(jmp->type == syntax_element_jump); + BOOST_REGEX_ASSERT(jmp->type == syntax_element_jump); jmp->alt.i = this->m_pdata->m_data.size() - jump_offset; } return true; diff --git a/include/boost/regex/v4/c_regex_traits.hpp b/include/boost/regex/v4/c_regex_traits.hpp index d99b0f34..f966c4d4 100644 --- a/include/boost/regex/v4/c_regex_traits.hpp +++ b/include/boost/regex/v4/c_regex_traits.hpp @@ -36,7 +36,7 @@ namespace std{ #ifdef BOOST_MSVC #pragma warning(push) -#pragma warning(disable: 4103) +#pragma warning(disable: 4103 4244) #endif #ifdef BOOST_HAS_ABI_HEADERS # include BOOST_ABI_PREFIX @@ -47,11 +47,35 @@ namespace std{ namespace boost{ + namespace BOOST_REGEX_DETAIL_NS { + + enum + { + char_class_space = 1 << 0, + char_class_print = 1 << 1, + char_class_cntrl = 1 << 2, + char_class_upper = 1 << 3, + char_class_lower = 1 << 4, + char_class_alpha = 1 << 5, + char_class_digit = 1 << 6, + char_class_punct = 1 << 7, + char_class_xdigit = 1 << 8, + char_class_alnum = char_class_alpha | char_class_digit, + char_class_graph = char_class_alnum | char_class_punct, + char_class_blank = 1 << 9, + char_class_word = 1 << 10, + char_class_unicode = 1 << 11, + char_class_horizontal = 1 << 12, + char_class_vertical = 1 << 13 + }; + + } + template struct c_regex_traits; template<> -struct BOOST_REGEX_DECL c_regex_traits +struct c_regex_traits { c_regex_traits(){} typedef char char_type; @@ -96,7 +120,7 @@ private: #ifndef BOOST_NO_WREGEX template<> -struct BOOST_REGEX_DECL c_regex_traits +struct c_regex_traits { c_regex_traits(){} typedef wchar_t char_type; @@ -139,59 +163,335 @@ private: c_regex_traits& operator=(const c_regex_traits&); }; -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -// -// Provide an unsigned short version as well, so the user can link to this -// no matter whether they build with /Zc:wchar_t or not (MSVC specific). -// -template<> -struct BOOST_REGEX_DECL c_regex_traits +#endif // BOOST_NO_WREGEX + +inline c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const char* p1, const char* p2) { - c_regex_traits(){} - typedef unsigned short char_type; - typedef std::size_t size_type; - typedef std::basic_string string_type; - struct locale_type{}; - typedef boost::uint32_t char_class_type; - - static size_type length(const char_type* p) - { - return (std::wcslen)((const wchar_t*)p); + std::string result(10, ' '); + std::size_t s = result.size(); + std::size_t r; + std::string src(p1, p2); + while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s))) + { +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::strxfrm, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if (r == INT_MAX) + { + result.erase(); + result.insert(result.begin(), static_cast(0)); + return result; + } +#endif + result.append(r - s + 3, ' '); + s = result.size(); } + result.erase(r); + return result; +} - unsigned short translate(unsigned short c) const - { - return c; +inline c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform_primary(const char* p1, const char* p2) +{ + static char s_delim; + static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast*>(0), &s_delim); + std::string result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch (s_collate_type) + { + case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: + case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + for (std::string::size_type i = 0; i < result.size(); ++i) + result[i] = static_cast((std::tolower)(static_cast(result[i]))); + result = transform(&*result.begin(), &*result.begin() + result.size()); + break; } - unsigned short translate_nocase(unsigned short c) const - { - return (std::towlower)((wchar_t)c); + case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: + { + // get a regular sort key, and then truncate it: + result = transform(p1, p2); + result.erase(s_delim); + break; } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result = transform(p1, p2); + if ((!result.empty()) && (result[0] == s_delim)) + break; + std::size_t i; + for (i = 0; i < result.size(); ++i) + { + if (result[i] == s_delim) + break; + } + result.erase(i); + break; + } + if (result.empty()) + result = std::string(1, char(0)); + return result; +} - static string_type BOOST_REGEX_CALL transform(const unsigned short* p1, const unsigned short* p2); - static string_type BOOST_REGEX_CALL transform_primary(const unsigned short* p1, const unsigned short* p2); +inline c_regex_traits::char_class_type BOOST_REGEX_CALL c_regex_traits::lookup_classname(const char* p1, const char* p2) +{ + using namespace BOOST_REGEX_DETAIL_NS; + static const char_class_type masks[] = + { + 0, + char_class_alnum, + char_class_alpha, + char_class_blank, + char_class_cntrl, + char_class_digit, + char_class_digit, + char_class_graph, + char_class_horizontal, + char_class_lower, + char_class_lower, + char_class_print, + char_class_punct, + char_class_space, + char_class_space, + char_class_upper, + char_class_unicode, + char_class_upper, + char_class_vertical, + char_class_alnum | char_class_word, + char_class_alnum | char_class_word, + char_class_xdigit, + }; - static char_class_type BOOST_REGEX_CALL lookup_classname(const unsigned short* p1, const unsigned short* p2); - static string_type BOOST_REGEX_CALL lookup_collatename(const unsigned short* p1, const unsigned short* p2); + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx < 0) + { + std::string s(p1, p2); + for (std::string::size_type i = 0; i < s.size(); ++i) + s[i] = static_cast((std::tolower)(static_cast(s[i]))); + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + } + BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0])); + return masks[idx + 1]; +} - static bool BOOST_REGEX_CALL isctype(unsigned short, char_class_type); - static int BOOST_REGEX_CALL value(unsigned short, int); +inline bool BOOST_REGEX_CALL c_regex_traits::isctype(char c, char_class_type mask) +{ + using namespace BOOST_REGEX_DETAIL_NS; + return + ((mask & char_class_space) && (std::isspace)(static_cast(c))) + || ((mask & char_class_print) && (std::isprint)(static_cast(c))) + || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast(c))) + || ((mask & char_class_upper) && (std::isupper)(static_cast(c))) + || ((mask & char_class_lower) && (std::islower)(static_cast(c))) + || ((mask & char_class_alpha) && (std::isalpha)(static_cast(c))) + || ((mask & char_class_digit) && (std::isdigit)(static_cast(c))) + || ((mask & char_class_punct) && (std::ispunct)(static_cast(c))) + || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast(c))) + || ((mask & char_class_blank) && (std::isspace)(static_cast(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) + || ((mask & char_class_word) && (c == '_')) + || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v'))) + || ((mask & char_class_horizontal) && (std::isspace)(static_cast(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v')); +} - locale_type imbue(locale_type l) - { return l; } - locale_type getloc()const - { return locale_type(); } +inline c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::lookup_collatename(const char* p1, const char* p2) +{ + std::string s(p1, p2); + s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s); + if (s.empty() && (p2 - p1 == 1)) + s.append(1, *p1); + return s; +} -private: - // this type is not copyable: - c_regex_traits(const c_regex_traits&); - c_regex_traits& operator=(const c_regex_traits&); -}; +inline int BOOST_REGEX_CALL c_regex_traits::value(char c, int radix) +{ + char b[2] = { c, '\0', }; + char* ep; + int result = std::strtol(b, &ep, radix); + if (ep == b) + return -1; + return result; +} + +#ifndef BOOST_NO_WREGEX + +inline c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const wchar_t* p1, const wchar_t* p2) +{ + std::size_t r; + std::size_t s = 10; + std::wstring src(p1, p2); + std::wstring result(s, L' '); + while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s))) + { +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::strxfrm, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if (r == INT_MAX) + { + result.erase(); + result.insert(result.begin(), static_cast(0)); + return result; + } +#endif + result.append(r - s + 3, L' '); + s = result.size(); + } + result.erase(r); + return result; +} + +inline c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform_primary(const wchar_t* p1, const wchar_t* p2) +{ + static wchar_t s_delim; + static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast*>(0), &s_delim); + std::wstring result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch (s_collate_type) + { + case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: + case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + for (std::wstring::size_type i = 0; i < result.size(); ++i) + result[i] = (std::towlower)(result[i]); + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: + { + // get a regular sort key, and then truncate it: + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + result.erase(s_delim); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + if ((!result.empty()) && (result[0] == s_delim)) + break; + std::size_t i; + for (i = 0; i < result.size(); ++i) + { + if (result[i] == s_delim) + break; + } + result.erase(i); + break; + } + if (result.empty()) + result = std::wstring(1, char(0)); + return result; +} + +inline c_regex_traits::char_class_type BOOST_REGEX_CALL c_regex_traits::lookup_classname(const wchar_t* p1, const wchar_t* p2) +{ + using namespace BOOST_REGEX_DETAIL_NS; + static const char_class_type masks[] = + { + 0, + char_class_alnum, + char_class_alpha, + char_class_blank, + char_class_cntrl, + char_class_digit, + char_class_digit, + char_class_graph, + char_class_horizontal, + char_class_lower, + char_class_lower, + char_class_print, + char_class_punct, + char_class_space, + char_class_space, + char_class_upper, + char_class_unicode, + char_class_upper, + char_class_vertical, + char_class_alnum | char_class_word, + char_class_alnum | char_class_word, + char_class_xdigit, + }; + + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx < 0) + { + std::wstring s(p1, p2); + for (std::wstring::size_type i = 0; i < s.size(); ++i) + s[i] = (std::towlower)(s[i]); + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + } + BOOST_REGEX_ASSERT(idx + 1 < static_cast(sizeof(masks) / sizeof(masks[0]))); + return masks[idx + 1]; +} + +inline bool BOOST_REGEX_CALL c_regex_traits::isctype(wchar_t c, char_class_type mask) +{ + using namespace BOOST_REGEX_DETAIL_NS; + return + ((mask & char_class_space) && (std::iswspace)(c)) + || ((mask & char_class_print) && (std::iswprint)(c)) + || ((mask & char_class_cntrl) && (std::iswcntrl)(c)) + || ((mask & char_class_upper) && (std::iswupper)(c)) + || ((mask & char_class_lower) && (std::iswlower)(c)) + || ((mask & char_class_alpha) && (std::iswalpha)(c)) + || ((mask & char_class_digit) && (std::iswdigit)(c)) + || ((mask & char_class_punct) && (std::iswpunct)(c)) + || ((mask & char_class_xdigit) && (std::iswxdigit)(c)) + || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) + || ((mask & char_class_word) && (c == '_')) + || ((mask & char_class_unicode) && (c & ~static_cast(0xff))) + || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v'))) + || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v')); +} + +inline c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::lookup_collatename(const wchar_t* p1, const wchar_t* p2) +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4244) +#endif + std::string name(p1, p2); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name); + if (!name.empty()) + return string_type(name.begin(), name.end()); + if (p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +inline int BOOST_REGEX_CALL c_regex_traits::value(wchar_t c, int radix) +{ +#ifdef BOOST_BORLANDC + // workaround for broken wcstol: + if ((std::iswxdigit)(c) == 0) + return -1; +#endif + wchar_t b[2] = { c, '\0', }; + wchar_t* ep; + int result = std::wcstol(b, &ep, radix); + if (ep == b) + return -1; + return result; +} #endif -#endif // BOOST_NO_WREGEX - } #ifdef BOOST_MSVC diff --git a/include/boost/regex/v4/cpp_regex_traits.hpp b/include/boost/regex/v4/cpp_regex_traits.hpp index 1370b0d5..98b25151 100644 --- a/include/boost/regex/v4/cpp_regex_traits.hpp +++ b/include/boost/regex/v4/cpp_regex_traits.hpp @@ -38,12 +38,12 @@ #include #endif #ifndef BOOST_REGEX_OBJECT_CACHE_HPP -#include +#include #endif -#include -#include #include +#include +#include #ifdef BOOST_MSVC #pragma warning(push) @@ -89,9 +89,9 @@ public: parser_buf() : base_type() { setbuf(0, 0); } const charT* getnext() { return this->gptr(); } protected: - std::basic_streambuf* setbuf(char_type* s, streamsize n); - typename parser_buf::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which); - typename parser_buf::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which); + std::basic_streambuf* setbuf(char_type* s, streamsize n) BOOST_OVERRIDE; + typename parser_buf::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which) BOOST_OVERRIDE; + typename parser_buf::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) BOOST_OVERRIDE; private: parser_buf& operator=(const parser_buf&); parser_buf(const parser_buf&); @@ -225,7 +225,7 @@ std::locale cpp_regex_traits_base::imbue(const std::locale& l) // // class cpp_regex_traits_char_layer: -// implements methods that require specialisation for narrow characters: +// implements methods that require specialization for narrow characters: // template class cpp_regex_traits_char_layer : public cpp_regex_traits_base @@ -281,7 +281,7 @@ void cpp_regex_traits_char_layer::init() typename std::messages::catalog cat = reinterpret_cast::catalog>(-1); #endif std::string cat_name(cpp_regex_traits::get_catalog_name()); - if(cat_name.size() && (this->m_pmessages != 0)) + if((!cat_name.empty()) && (this->m_pmessages != 0)) { cat = this->m_pmessages->open( cat_name, @@ -352,10 +352,10 @@ typename cpp_regex_traits_char_layer::string_type } // -// specialised version for narrow characters: +// specialized version for narrow characters: // template <> -class BOOST_REGEX_DECL cpp_regex_traits_char_layer : public cpp_regex_traits_base +class cpp_regex_traits_char_layer : public cpp_regex_traits_base { typedef std::string string_type; public: @@ -508,7 +508,7 @@ typename cpp_regex_traits_implementation::string_type // we work around this elsewhere, but just assert here that // we adhere to gcc's (buggy) preconditions... // - BOOST_ASSERT(*p2 == 0); + BOOST_REGEX_ASSERT(*p2 == 0); string_type result; #if defined(_CPPLIB_VER) // @@ -566,7 +566,7 @@ typename cpp_regex_traits_implementation::string_type #ifndef BOOST_NO_EXCEPTIONS }catch(...){} #endif - while(result.size() && (charT(0) == *result.rbegin())) + while((!result.empty()) && (charT(0) == *result.rbegin())) result.erase(result.size() - 1); if(result.empty()) { @@ -588,7 +588,7 @@ typename cpp_regex_traits_implementation::string_type // we work around this elsewhere, but just assert here that // we adhere to gcc's (buggy) preconditions... // - BOOST_ASSERT(*p2 == 0); + BOOST_REGEX_ASSERT(*p2 == 0); // // swallowing all exceptions here is a bad idea // however at least one std lib will always throw @@ -621,7 +621,7 @@ typename cpp_regex_traits_implementation::string_type #else // // some implementations (Dinkumware) append unnecessary trailing \0's: - while(result.size() && (charT(0) == *result.rbegin())) + while((!result.empty()) && (charT(0) == *result.rbegin())) result.erase(result.size() - 1); #endif // @@ -646,7 +646,7 @@ typename cpp_regex_traits_implementation::string_type result2.append(1, static_cast(1 + static_cast(result[i]))).append(1, charT('b') - 1); } } - BOOST_ASSERT(std::find(result2.begin(), result2.end(), charT(0)) == result2.end()); + BOOST_REGEX_ASSERT(std::find(result2.begin(), result2.end(), charT(0)) == result2.end()); #ifndef BOOST_NO_EXCEPTIONS } catch(...) @@ -662,7 +662,7 @@ typename cpp_regex_traits_implementation::string_type cpp_regex_traits_implementation::lookup_collatename(const charT* p1, const charT* p2) const { typedef typename std::map::const_iterator iter_type; - if(m_custom_collate_names.size()) + if(!m_custom_collate_names.empty()) { iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); if(pos != m_custom_collate_names.end()) @@ -680,10 +680,10 @@ typename cpp_regex_traits_implementation::string_type name = lookup_default_collate_name(name); #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ && !BOOST_WORKAROUND(BOOST_BORLANDC, <= 0x0551) - if(name.size()) + if(!name.empty()) return string_type(name.begin(), name.end()); #else - if(name.size()) + if(!name.empty()) { string_type result; typedef std::string::const_iterator iter; @@ -709,7 +709,7 @@ void cpp_regex_traits_implementation::init() typename std::messages::catalog cat = reinterpret_cast::catalog>(-1); #endif std::string cat_name(cpp_regex_traits::get_catalog_name()); - if(cat_name.size() && (this->m_pmessages != 0)) + if((!cat_name.empty()) && (this->m_pmessages != 0)) { cat = this->m_pmessages->open( cat_name, @@ -796,7 +796,7 @@ void cpp_regex_traits_implementation::init() for(unsigned int j = 0; j <= 13; ++j) { string_type s(this->m_pmessages->get(cat, 0, j+300, null_string)); - if(s.size()) + if(!s.empty()) this->m_custom_class_names[s] = masks[j]; } } @@ -864,7 +864,7 @@ typename cpp_regex_traits_implementation::char_class_type ::boost::BOOST_REGEX_DETAIL_NS::char_class_xdigit, }; #endif - if(m_custom_class_names.size()) + if(!m_custom_class_names.empty()) { typedef typename std::map, char_class_type>::const_iterator map_iter; map_iter pos = m_custom_class_names.find(string_type(p1, p2)); @@ -872,7 +872,7 @@ typename cpp_regex_traits_implementation::char_class_type return pos->second; } std::size_t state_id = 1 + BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); - BOOST_ASSERT(state_id < sizeof(masks) / sizeof(masks[0])); + BOOST_REGEX_ASSERT(state_id < sizeof(masks) / sizeof(masks[0])); return masks[state_id]; } @@ -1129,6 +1129,91 @@ static_mutex& cpp_regex_traits::get_mutex_inst() } #endif +namespace BOOST_REGEX_DETAIL_NS { + + inline void cpp_regex_traits_char_layer::init() + { + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + std::memset(m_char_map, 0, sizeof(m_char_map)); +#ifndef BOOST_NO_STD_MESSAGES +#ifndef __IBMCPP__ + std::messages::catalog cat = static_cast::catalog>(-1); +#else + std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if ((!cat_name.empty()) && (m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if ((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if ((int)cat >= 0) + { +#ifndef BOOST_NO_EXCEPTIONS + try { +#endif + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = this->m_pmessages->get(cat, 0, i, get_default_syntax(i)); + for (string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[static_cast(mss[j])] = i; + } + } + this->m_pmessages->close(cat); +#ifndef BOOST_NO_EXCEPTIONS + } + catch (...) + { + this->m_pmessages->close(cat); + throw; + } +#endif + } + else + { +#endif + for (regex_constants::syntax_type j = 1; j < regex_constants::syntax_max; ++j) + { + const char* ptr = get_default_syntax(j); + while (ptr && *ptr) + { + m_char_map[static_cast(*ptr)] = j; + ++ptr; + } + } +#ifndef BOOST_NO_STD_MESSAGES + } +#endif + // + // finish off by calculating our escape types: + // + unsigned char i = 'A'; + do + { + if (m_char_map[i] == 0) + { + if (this->m_pctype->is(std::ctype_base::lower, i)) + m_char_map[i] = regex_constants::escape_type_class; + else if (this->m_pctype->is(std::ctype_base::upper, i)) + m_char_map[i] = regex_constants::escape_type_not_class; + } + } while (0xFF != i++); + } + +} // namespace detail + } // boost @@ -1150,5 +1235,3 @@ static_mutex& cpp_regex_traits::get_mutex_inst() #endif #endif - - diff --git a/include/boost/regex/v4/cregex.hpp b/include/boost/regex/v4/cregex.hpp index daac7e63..dec49c9b 100644 --- a/include/boost/regex/v4/cregex.hpp +++ b/include/boost/regex/v4/cregex.hpp @@ -200,123 +200,6 @@ BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW*); } /* namespace */ #endif -#if defined(__cplusplus) -/* - * C++ high level wrapper goes here: - */ -#include -#include -namespace boost{ - -#ifdef BOOST_MSVC -#pragma warning(push) -#pragma warning(disable: 4103) -#endif -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_PREFIX -#endif -#ifdef BOOST_MSVC -#pragma warning(pop) -#endif - -class RegEx; - -namespace BOOST_REGEX_DETAIL_NS{ - -class RegExData; -struct pred1; -struct pred2; -struct pred3; -struct pred4; - -} /* namespace BOOST_REGEX_DETAIL_NS */ - -#if (defined(BOOST_MSVC) || defined(BOOST_BORLANDC)) && !defined(BOOST_DISABLE_WIN32) -typedef bool (__cdecl *GrepCallback)(const RegEx& expression); -typedef bool (__cdecl *GrepFileCallback)(const char* file, const RegEx& expression); -typedef bool (__cdecl *FindFilesCallback)(const char* file); -#else -typedef bool (*GrepCallback)(const RegEx& expression); -typedef bool (*GrepFileCallback)(const char* file, const RegEx& expression); -typedef bool (*FindFilesCallback)(const char* file); -#endif - -class BOOST_REGEX_DECL RegEx -{ -private: - BOOST_REGEX_DETAIL_NS::RegExData* pdata; -public: - RegEx(); - RegEx(const RegEx& o); - ~RegEx(); - explicit RegEx(const char* c, bool icase = false); - explicit RegEx(const std::string& s, bool icase = false); - RegEx& operator=(const RegEx& o); - RegEx& operator=(const char* p); - RegEx& operator=(const std::string& s){ return this->operator=(s.c_str()); } - unsigned int SetExpression(const char* p, bool icase = false); - unsigned int SetExpression(const std::string& s, bool icase = false){ return SetExpression(s.c_str(), icase); } - std::string Expression()const; - unsigned int error_code()const; - /* - * now matching operators: - */ - bool Match(const char* p, match_flag_type flags = match_default); - bool Match(const std::string& s, match_flag_type flags = match_default) { return Match(s.c_str(), flags); } - bool Search(const char* p, match_flag_type flags = match_default); - bool Search(const std::string& s, match_flag_type flags = match_default) { return Search(s.c_str(), flags); } - unsigned int Grep(GrepCallback cb, const char* p, match_flag_type flags = match_default); - unsigned int Grep(GrepCallback cb, const std::string& s, match_flag_type flags = match_default) { return Grep(cb, s.c_str(), flags); } - unsigned int Grep(std::vector& v, const char* p, match_flag_type flags = match_default); - unsigned int Grep(std::vector& v, const std::string& s, match_flag_type flags = match_default) { return Grep(v, s.c_str(), flags); } - unsigned int Grep(std::vector& v, const char* p, match_flag_type flags = match_default); - unsigned int Grep(std::vector& v, const std::string& s, match_flag_type flags = match_default) { return Grep(v, s.c_str(), flags); } -#ifndef BOOST_REGEX_NO_FILEITER - unsigned int GrepFiles(GrepFileCallback cb, const char* files, bool recurse = false, match_flag_type flags = match_default); - unsigned int GrepFiles(GrepFileCallback cb, const std::string& files, bool recurse = false, match_flag_type flags = match_default) { return GrepFiles(cb, files.c_str(), recurse, flags); } - unsigned int FindFiles(FindFilesCallback cb, const char* files, bool recurse = false, match_flag_type flags = match_default); - unsigned int FindFiles(FindFilesCallback cb, const std::string& files, bool recurse = false, match_flag_type flags = match_default) { return FindFiles(cb, files.c_str(), recurse, flags); } -#endif - - std::string Merge(const std::string& in, const std::string& fmt, - bool copy = true, match_flag_type flags = match_default); - std::string Merge(const char* in, const char* fmt, - bool copy = true, match_flag_type flags = match_default); - - std::size_t Split(std::vector& v, std::string& s, match_flag_type flags = match_default, unsigned max_count = ~0); - /* - * now operators for returning what matched in more detail: - */ - std::size_t Position(int i = 0)const; - std::size_t Length(int i = 0)const; - bool Matched(int i = 0)const; - std::size_t Marks()const; - std::string What(int i = 0)const; - std::string operator[](int i)const { return What(i); } - - static const std::size_t npos; - - friend struct BOOST_REGEX_DETAIL_NS::pred1; - friend struct BOOST_REGEX_DETAIL_NS::pred2; - friend struct BOOST_REGEX_DETAIL_NS::pred3; - friend struct BOOST_REGEX_DETAIL_NS::pred4; -}; - -#ifdef BOOST_MSVC -#pragma warning(push) -#pragma warning(disable: 4103) -#endif -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_SUFFIX -#endif -#ifdef BOOST_MSVC -#pragma warning(pop) -#endif - -} /* namespace boost */ - -#endif /* __cplusplus */ - #endif /* include guard */ diff --git a/include/boost/regex/v4/fileiter.hpp b/include/boost/regex/v4/fileiter.hpp deleted file mode 100644 index 79260f26..00000000 --- a/include/boost/regex/v4/fileiter.hpp +++ /dev/null @@ -1,557 +0,0 @@ -/* - * - * Copyright (c) 1998-2002 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE fileiter.hpp - * VERSION see - * DESCRIPTION: Declares various platform independent file and - * directory iterators, plus binary file input in - * the form of class map_file. - */ - -#ifndef BOOST_RE_FILEITER_HPP_INCLUDED -#define BOOST_RE_FILEITER_HPP_INCLUDED - -#ifndef BOOST_REGEX_CONFIG_HPP -#include -#endif -#include - -#ifndef BOOST_REGEX_NO_FILEITER - -#if (defined(__CYGWIN__) || defined(__CYGWIN32__)) && !defined(BOOST_REGEX_NO_W32) -#error "Sorry, can't mix with STL code and gcc compiler: if you ran configure, try again with configure --disable-ms-windows" -#define BOOST_REGEX_FI_WIN32_MAP -#define BOOST_REGEX_FI_POSIX_DIR -#elif (defined(__WIN32__) || defined(_WIN32) || defined(WIN32)) && !defined(BOOST_REGEX_NO_W32) -#define BOOST_REGEX_FI_WIN32_MAP -#define BOOST_REGEX_FI_WIN32_DIR -#else -#define BOOST_REGEX_FI_POSIX_MAP -#define BOOST_REGEX_FI_POSIX_DIR -#endif - -#if defined(BOOST_REGEX_FI_WIN32_MAP)||defined(BOOST_REGEX_FI_WIN32_DIR) -#include -#endif - -#if defined(BOOST_REGEX_FI_WIN32_DIR) - -#include - -namespace boost{ - namespace BOOST_REGEX_DETAIL_NS{ - -#ifndef BOOST_NO_ANSI_APIS -typedef WIN32_FIND_DATAA _fi_find_data; -#else -typedef WIN32_FIND_DATAW _fi_find_data; -#endif -typedef HANDLE _fi_find_handle; - - } // namespace BOOST_REGEX_DETAIL_NS - -} // namespace boost - -#define _fi_invalid_handle INVALID_HANDLE_VALUE -#define _fi_dir FILE_ATTRIBUTE_DIRECTORY - -#elif defined(BOOST_REGEX_FI_POSIX_DIR) - -#include -#include -#include -#include -#include -#include -#include - -#if defined(__SUNPRO_CC) -using std::list; -#endif - -#ifndef MAX_PATH -#define MAX_PATH 256 -#endif - -namespace boost{ - namespace BOOST_REGEX_DETAIL_NS{ - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_PREFIX -#endif - -struct _fi_find_data -{ - unsigned dwFileAttributes; - char cFileName[MAX_PATH]; -}; - -struct _fi_priv_data; - -typedef _fi_priv_data* _fi_find_handle; -#define _fi_invalid_handle 0 -#define _fi_dir 1 - -_fi_find_handle _fi_FindFirstFile(const char* lpFileName, _fi_find_data* lpFindFileData); -bool _fi_FindNextFile(_fi_find_handle hFindFile, _fi_find_data* lpFindFileData); -bool _fi_FindClose(_fi_find_handle hFindFile); - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_SUFFIX -#endif - - } // namespace BOOST_REGEX_DETAIL_NS -} // namespace boost - -#ifdef FindFirstFile - #undef FindFirstFile -#endif -#ifdef FindNextFile - #undef FindNextFile -#endif -#ifdef FindClose - #undef FindClose -#endif - -#define FindFirstFileA _fi_FindFirstFile -#define FindNextFileA _fi_FindNextFile -#define FindClose _fi_FindClose - -#endif - -namespace boost{ - namespace BOOST_REGEX_DETAIL_NS{ - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_PREFIX -#endif - -#ifdef BOOST_REGEX_FI_WIN32_MAP // win32 mapfile - -class BOOST_REGEX_DECL mapfile -{ - HANDLE hfile; - HANDLE hmap; - const char* _first; - const char* _last; -public: - - typedef const char* iterator; - - mapfile(){ hfile = hmap = 0; _first = _last = 0; } - mapfile(const char* file){ hfile = hmap = 0; _first = _last = 0; open(file); } - ~mapfile(){ close(); } - void open(const char* file); - void close(); - const char* begin(){ return _first; } - const char* end(){ return _last; } - size_t size(){ return _last - _first; } - bool valid(){ return (hfile != 0) && (hfile != INVALID_HANDLE_VALUE); } -}; - - -#else - -class BOOST_REGEX_DECL mapfile_iterator; - -class BOOST_REGEX_DECL mapfile -{ - typedef char* pointer; - std::FILE* hfile; - long int _size; - pointer* _first; - pointer* _last; - mutable std::list condemed; - enum sizes - { - buf_size = 4096 - }; - void lock(pointer* node)const; - void unlock(pointer* node)const; -public: - - typedef mapfile_iterator iterator; - - mapfile(){ hfile = 0; _size = 0; _first = _last = 0; } - mapfile(const char* file){ hfile = 0; _size = 0; _first = _last = 0; open(file); } - ~mapfile(){ close(); } - void open(const char* file); - void close(); - iterator begin()const; - iterator end()const; - unsigned long size()const{ return _size; } - bool valid()const{ return hfile != 0; } - friend class mapfile_iterator; -}; - -class BOOST_REGEX_DECL mapfile_iterator -{ - typedef mapfile::pointer internal_pointer; - internal_pointer* node; - const mapfile* file; - unsigned long offset; - long position()const - { - return file ? ((node - file->_first) * mapfile::buf_size + offset) : 0; - } - void position(long pos) - { - if(file) - { - node = file->_first + (pos / mapfile::buf_size); - offset = pos % mapfile::buf_size; - } - } -public: - typedef std::ptrdiff_t difference_type; - typedef char value_type; - typedef const char* pointer; - typedef const char& reference; - typedef std::random_access_iterator_tag iterator_category; - - mapfile_iterator() { node = 0; file = 0; offset = 0; } - mapfile_iterator(const mapfile* f, long arg_position) - { - BOOST_ASSERT(f); - file = f; - node = f->_first + arg_position / mapfile::buf_size; - offset = arg_position % mapfile::buf_size; - file->lock(node); - } - mapfile_iterator(const mapfile_iterator& i) - { - file = i.file; - node = i.node; - offset = i.offset; - if(file) - file->lock(node); - } - ~mapfile_iterator() - { - if(file && node) - file->unlock(node); - } - mapfile_iterator& operator = (const mapfile_iterator& i); - char operator* ()const - { - BOOST_ASSERT(node >= file->_first); - BOOST_ASSERT(node < file->_last); - return file ? *(*node + sizeof(int) + offset) : char(0); - } - char operator[] (long off)const - { - mapfile_iterator tmp(*this); - tmp += off; - return *tmp; - } - mapfile_iterator& operator++ (); - mapfile_iterator operator++ (int); - mapfile_iterator& operator-- (); - mapfile_iterator operator-- (int); - - mapfile_iterator& operator += (long off) - { - position(position() + off); - return *this; - } - mapfile_iterator& operator -= (long off) - { - position(position() - off); - return *this; - } - - #if !defined(BOOST_EMBTC) - - friend inline bool operator==(const mapfile_iterator& i, const mapfile_iterator& j) - { - return (i.file == j.file) && (i.node == j.node) && (i.offset == j.offset); - } - - friend inline bool operator!=(const mapfile_iterator& i, const mapfile_iterator& j) - { - return !(i == j); - } - - friend inline bool operator<(const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() < j.position(); - } - friend inline bool operator>(const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() > j.position(); - } - friend inline bool operator<=(const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() <= j.position(); - } - friend inline bool operator>=(const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() >= j.position(); - } - - friend mapfile_iterator operator + (const mapfile_iterator& i, long off); - friend mapfile_iterator operator + (long off, const mapfile_iterator& i) - { - mapfile_iterator tmp(i); - return tmp += off; - } - friend mapfile_iterator operator - (const mapfile_iterator& i, long off); - friend inline long operator - (const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() - j.position(); - } - - #else - - friend bool operator==(const mapfile_iterator& i, const mapfile_iterator& j); - friend bool operator!=(const mapfile_iterator& i, const mapfile_iterator& j); - friend bool operator<(const mapfile_iterator& i, const mapfile_iterator& j); - friend bool operator>(const mapfile_iterator& i, const mapfile_iterator& j); - friend bool operator<=(const mapfile_iterator& i, const mapfile_iterator& j); - friend bool operator>=(const mapfile_iterator& i, const mapfile_iterator& j); - friend mapfile_iterator operator + (const mapfile_iterator& i, long off); - friend mapfile_iterator operator + (long off, const mapfile_iterator& i); - friend mapfile_iterator operator - (const mapfile_iterator& i, long off); - friend long operator - (const mapfile_iterator& i, const mapfile_iterator& j); - - #endif - -}; - -#if defined(BOOST_EMBTC) - - inline bool operator==(const mapfile_iterator& i, const mapfile_iterator& j) - { - return (i.file == j.file) && (i.node == j.node) && (i.offset == j.offset); - } - - inline bool operator!=(const mapfile_iterator& i, const mapfile_iterator& j) - { - return !(i == j); - } - - inline bool operator<(const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() < j.position(); - } - inline bool operator>(const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() > j.position(); - } - inline bool operator<=(const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() <= j.position(); - } - inline bool operator>=(const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() >= j.position(); - } - mapfile_iterator operator + (long off, const mapfile_iterator& i) - { - mapfile_iterator tmp(i); - return tmp += off; - } - inline long operator - (const mapfile_iterator& i, const mapfile_iterator& j) - { - return i.position() - j.position(); - } - -#endif - -#endif - -// _fi_sep determines the directory separator, either '\\' or '/' -BOOST_REGEX_DECL extern const char* _fi_sep; - -struct file_iterator_ref -{ - _fi_find_handle hf; - _fi_find_data _data; - long count; -}; - - -class BOOST_REGEX_DECL file_iterator -{ - char* _root; - char* _path; - char* ptr; - file_iterator_ref* ref; - -public: - typedef std::ptrdiff_t difference_type; - typedef const char* value_type; - typedef const char** pointer; - typedef const char*& reference; - typedef std::input_iterator_tag iterator_category; - - file_iterator(); - file_iterator(const char* wild); - ~file_iterator(); - file_iterator(const file_iterator&); - file_iterator& operator=(const file_iterator&); - const char* root()const { return _root; } - const char* path()const { return _path; } - const char* name()const { return ptr; } - _fi_find_data* data() { return &(ref->_data); } - void next(); - file_iterator& operator++() { next(); return *this; } - file_iterator operator++(int); - const char* operator*() { return path(); } - - #if !defined(BOOST_EMBTC) - - friend inline bool operator == (const file_iterator& f1, const file_iterator& f2) - { - return ((f1.ref->hf == _fi_invalid_handle) && (f2.ref->hf == _fi_invalid_handle)); - } - - friend inline bool operator != (const file_iterator& f1, const file_iterator& f2) - { - return !(f1 == f2); - } - - #else - - friend bool operator == (const file_iterator& f1, const file_iterator& f2); - friend bool operator != (const file_iterator& f1, const file_iterator& f2); - - #endif - -}; - -#if defined(BOOST_EMBTC) - - inline bool operator == (const file_iterator& f1, const file_iterator& f2) - { - return ((f1.ref->hf == _fi_invalid_handle) && (f2.ref->hf == _fi_invalid_handle)); - } - - inline bool operator != (const file_iterator& f1, const file_iterator& f2) - { - return !(f1 == f2); - } - -#endif - -// dwa 9/13/00 - suppress unused parameter warning -inline bool operator < (const file_iterator&, const file_iterator&) -{ - return false; -} - - -class BOOST_REGEX_DECL directory_iterator -{ - char* _root; - char* _path; - char* ptr; - file_iterator_ref* ref; - -public: - typedef std::ptrdiff_t difference_type; - typedef const char* value_type; - typedef const char** pointer; - typedef const char*& reference; - typedef std::input_iterator_tag iterator_category; - - directory_iterator(); - directory_iterator(const char* wild); - ~directory_iterator(); - directory_iterator(const directory_iterator& other); - directory_iterator& operator=(const directory_iterator& other); - - const char* root()const { return _root; } - const char* path()const { return _path; } - const char* name()const { return ptr; } - _fi_find_data* data() { return &(ref->_data); } - void next(); - directory_iterator& operator++() { next(); return *this; } - directory_iterator operator++(int); - const char* operator*() { return path(); } - - static const char* separator() { return _fi_sep; } - - #if !defined(BOOST_EMBTC) - - friend inline bool operator == (const directory_iterator& f1, const directory_iterator& f2) - { - return ((f1.ref->hf == _fi_invalid_handle) && (f2.ref->hf == _fi_invalid_handle)); - } - - - friend inline bool operator != (const directory_iterator& f1, const directory_iterator& f2) - { - return !(f1 == f2); - } - - #else - - friend bool operator == (const directory_iterator& f1, const directory_iterator& f2); - friend bool operator != (const directory_iterator& f1, const directory_iterator& f2); - - #endif - - }; - -#if defined(BOOST_EMBTC) - - inline bool operator == (const directory_iterator& f1, const directory_iterator& f2) - { - return ((f1.ref->hf == _fi_invalid_handle) && (f2.ref->hf == _fi_invalid_handle)); - } - - - inline bool operator != (const directory_iterator& f1, const directory_iterator& f2) - { - return !(f1 == f2); - } - -#endif - -inline bool operator < (const directory_iterator&, const directory_iterator&) -{ - return false; -} - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_SUFFIX -#endif - - -} // namespace BOOST_REGEX_DETAIL_NS -using boost::BOOST_REGEX_DETAIL_NS::directory_iterator; -using boost::BOOST_REGEX_DETAIL_NS::file_iterator; -using boost::BOOST_REGEX_DETAIL_NS::mapfile; -} // namespace boost - -#endif // BOOST_REGEX_NO_FILEITER -#endif // BOOST_RE_FILEITER_HPP - - - - - - - - - - - - - - - - - - diff --git a/include/boost/regex/v4/icu.hpp b/include/boost/regex/v4/icu.hpp new file mode 100644 index 00000000..9724b0f8 --- /dev/null +++ b/include/boost/regex/v4/icu.hpp @@ -0,0 +1,1516 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE icu.hpp + * VERSION see + * DESCRIPTION: Unicode regular expressions on top of the ICU Library. + */ + +#ifndef BOOST_REGEX_ICU_V4_HPP +#define BOOST_REGEX_ICU_V4_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef BOOST_MSVC +#pragma warning (push) +#pragma warning (disable: 4251) +#endif + +namespace boost { + + namespace BOOST_REGEX_DETAIL_NS { + + // + // Implementation details: + // + class icu_regex_traits_implementation + { + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; + typedef boost::uint_least32_t char_class_type; + public: + icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l) + : m_locale(l) + { + UErrorCode success = U_ZERO_ERROR; + m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if (U_SUCCESS(success) == 0) + init_error(); + m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL); + success = U_ZERO_ERROR; + m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if (U_SUCCESS(success) == 0) + init_error(); + m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY); + } + U_NAMESPACE_QUALIFIER Locale getloc()const + { + return m_locale; + } + string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const + { + // TODO make thread safe!!!! : + typedef u32_to_u16_iterator itt; + itt i(p1), j(p2); +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + std::vector< ::UChar> t(i, j); +#else + std::vector< ::UChar> t; + while (i != j) + t.push_back(*i++); +#endif + ::uint8_t result[100]; + ::int32_t len; + if (!t.empty()) + len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result)); + else + len = pcoll->getSortKey(static_cast(0), static_cast< ::int32_t>(0), result, sizeof(result)); + if (std::size_t(len) > sizeof(result)) + { + scoped_array< ::uint8_t> presult(new ::uint8_t[len + 1]); + if (!t.empty()) + len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len + 1); + else + len = pcoll->getSortKey(static_cast(0), static_cast< ::int32_t>(0), presult.get(), len + 1); + if ((0 == presult[len - 1]) && (len > 1)) + --len; +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + return string_type(presult.get(), presult.get() + len); +#else + string_type sresult; + ::uint8_t const* ia = presult.get(); + ::uint8_t const* ib = presult.get() + len; + while (ia != ib) + sresult.push_back(*ia++); + return sresult; +#endif + } + if ((0 == result[len - 1]) && (len > 1)) + --len; +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + return string_type(result, result + len); +#else + string_type sresult; + ::uint8_t const* ia = result; + ::uint8_t const* ib = result + len; + while (ia != ib) + sresult.push_back(*ia++); + return sresult; +#endif + } + string_type transform(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_collator.get()); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_primary_collator.get()); + } + private: + void init_error() + { + std::runtime_error e("Could not initialize ICU resources"); + boost::throw_exception(e); + } + U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using + boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object + boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object + }; + + inline boost::shared_ptr get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc) + { + return boost::shared_ptr(new icu_regex_traits_implementation(loc)); + } + + } + + class icu_regex_traits + { + public: + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; +#ifdef BOOST_NO_INT64_T + typedef std::bitset<64> char_class_type; +#else + typedef boost::uint64_t char_class_type; +#endif + + struct boost_extensions_tag {}; + + icu_regex_traits() + : m_pimpl(BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale())) + { + } + static size_type length(const char_type* p) + { + size_type result = 0; + while (*p) + { + ++p; + ++result; + } + return result; + } + ::boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_syntax_type(static_cast(c)) : regex_constants::syntax_char; + } + ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const + { + return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_escape_syntax_type(static_cast(c)) : regex_constants::syntax_char; + } + char_type translate(char_type c) const + { + return c; + } + char_type translate_nocase(char_type c) const + { + return ::u_tolower(c); + } + char_type translate(char_type c, bool icase) const + { + return icase ? translate_nocase(c) : translate(c); + } + char_type tolower(char_type c) const + { + return ::u_tolower(c); + } + char_type toupper(char_type c) const + { + return ::u_toupper(c); + } + string_type transform(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const char_type* p1, const char_type* p2) const + { + static const char_class_type mask_blank = char_class_type(1) << offset_blank; + static const char_class_type mask_space = char_class_type(1) << offset_space; + static const char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + static const char_class_type mask_underscore = char_class_type(1) << offset_underscore; + static const char_class_type mask_unicode = char_class_type(1) << offset_unicode; + static const char_class_type mask_any = char_class_type(1) << offset_any; + static const char_class_type mask_ascii = char_class_type(1) << offset_ascii; + static const char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + static const char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + static const char_class_type masks[] = + { + 0, + U_GC_L_MASK | U_GC_ND_MASK, + U_GC_L_MASK, + mask_blank, + U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK, + U_GC_ND_MASK, + U_GC_ND_MASK, + (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK), + mask_horizontal, + U_GC_LL_MASK, + U_GC_LL_MASK, + ~(U_GC_C_MASK), + U_GC_P_MASK, + char_class_type(U_GC_Z_MASK) | mask_space, + char_class_type(U_GC_Z_MASK) | mask_space, + U_GC_LU_MASK, + mask_unicode, + U_GC_LU_MASK, + mask_vertical, + char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, + char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, + char_class_type(U_GC_ND_MASK) | mask_xdigit, + }; + + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx >= 0) + return masks[idx + 1]; + char_class_type result = lookup_icu_mask(p1, p2); + if (result != 0) + return result; + + if (idx < 0) + { + string_type s(p1, p2); + string_type::size_type i = 0; + while (i < s.size()) + { + s[i] = static_cast((::u_tolower)(s[i])); + if (::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_')) + s.erase(s.begin() + i, s.begin() + i + 1); + else + { + s[i] = static_cast((::u_tolower)(s[i])); + ++i; + } + } + if (!s.empty()) + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + if (idx >= 0) + return masks[idx + 1]; + if (!s.empty()) + result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size()); + if (result != 0) + return result; + } + BOOST_ASSERT(std::size_t(idx + 1) < sizeof(masks) / sizeof(masks[0])); + return masks[idx + 1]; + } + string_type lookup_collatename(const char_type* p1, const char_type* p2) const + { + string_type result; +#ifdef BOOST_NO_CXX98_BINDERS + if (std::find_if(p1, p2, std::bind(std::greater< ::UChar32>(), std::placeholders::_1, 0x7f)) == p2) +#else + if (std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2) +#endif + { +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + std::string s(p1, p2); +#else + std::string s; + const char_type* p3 = p1; + while (p3 != p2) + s.append(1, *p3++); +#endif + // Try Unicode name: + UErrorCode err = U_ZERO_ERROR; + UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err); + if (U_SUCCESS(err)) + { + result.push_back(c); + return result; + } + // Try Unicode-extended name: + err = U_ZERO_ERROR; + c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err); + if (U_SUCCESS(err)) + { + result.push_back(c); + return result; + } + // try POSIX name: + s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s); +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + result.assign(s.begin(), s.end()); +#else + result.clear(); + std::string::const_iterator si, sj; + si = s.begin(); + sj = s.end(); + while (si != sj) + result.push_back(*si++); +#endif + } + if (result.empty() && (p2 - p1 == 1)) + result.push_back(*p1); + return result; + } + bool isctype(char_type c, char_class_type f) const + { + static const char_class_type mask_blank = char_class_type(1) << offset_blank; + static const char_class_type mask_space = char_class_type(1) << offset_space; + static const char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + static const char_class_type mask_underscore = char_class_type(1) << offset_underscore; + static const char_class_type mask_unicode = char_class_type(1) << offset_unicode; + static const char_class_type mask_any = char_class_type(1) << offset_any; + static const char_class_type mask_ascii = char_class_type(1) << offset_ascii; + static const char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + static const char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + // check for standard catagories first: + char_class_type m = char_class_type(static_cast(1) << u_charType(c)); + if ((m & f) != 0) + return true; + // now check for special cases: + if (((f & mask_blank) != 0) && u_isblank(c)) + return true; + if (((f & mask_space) != 0) && u_isspace(c)) + return true; + if (((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0)) + return true; + if (((f & mask_unicode) != 0) && (c >= 0x100)) + return true; + if (((f & mask_underscore) != 0) && (c == '_')) + return true; + if (((f & mask_any) != 0) && (c <= 0x10FFFF)) + return true; + if (((f & mask_ascii) != 0) && (c <= 0x7F)) + return true; + if (((f & mask_vertical) != 0) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == static_cast('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK))) + return true; + if (((f & mask_horizontal) != 0) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && u_isspace(c) && (c != static_cast('\v'))) + return true; + return false; + } + boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const + { + return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); + } + int value(char_type c, int radix)const + { + return u_digit(c, static_cast< ::int8_t>(radix)); + } + locale_type imbue(locale_type l) + { + locale_type result(m_pimpl->getloc()); + m_pimpl = BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(l); + return result; + } + locale_type getloc()const + { + return locale_type(); + } + std::string error_string(::boost::regex_constants::error_type n) const + { + return BOOST_REGEX_DETAIL_NS::get_default_error_string(n); + } + private: + icu_regex_traits(const icu_regex_traits&); + icu_regex_traits& operator=(const icu_regex_traits&); + + // + // define the bitmasks offsets we need for additional character properties: + // + enum { + offset_blank = U_CHAR_CATEGORY_COUNT, + offset_space = U_CHAR_CATEGORY_COUNT + 1, + offset_xdigit = U_CHAR_CATEGORY_COUNT + 2, + offset_underscore = U_CHAR_CATEGORY_COUNT + 3, + offset_unicode = U_CHAR_CATEGORY_COUNT + 4, + offset_any = U_CHAR_CATEGORY_COUNT + 5, + offset_ascii = U_CHAR_CATEGORY_COUNT + 6, + offset_horizontal = U_CHAR_CATEGORY_COUNT + 7, + offset_vertical = U_CHAR_CATEGORY_COUNT + 8 + }; + + static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2) + { + static const char_class_type mask_blank = char_class_type(1) << offset_blank; + static const char_class_type mask_space = char_class_type(1) << offset_space; + static const char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + static const char_class_type mask_underscore = char_class_type(1) << offset_underscore; + static const char_class_type mask_unicode = char_class_type(1) << offset_unicode; + static const char_class_type mask_any = char_class_type(1) << offset_any; + static const char_class_type mask_ascii = char_class_type(1) << offset_ascii; + static const char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + static const char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + static const ::UChar32 prop_name_table[] = { + /* any */ 'a', 'n', 'y', + /* ascii */ 'a', 's', 'c', 'i', 'i', + /* assigned */ 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', + /* c* */ 'c', '*', + /* cc */ 'c', 'c', + /* cf */ 'c', 'f', + /* closepunctuation */ 'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* cn */ 'c', 'n', + /* co */ 'c', 'o', + /* connectorpunctuation */ 'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* control */ 'c', 'o', 'n', 't', 'r', 'o', 'l', + /* cs */ 'c', 's', + /* currencysymbol */ 'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l', + /* dashpunctuation */ 'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* decimaldigitnumber */ 'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r', + /* enclosingmark */ 'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* finalpunctuation */ 'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* format */ 'f', 'o', 'r', 'm', 'a', 't', + /* initialpunctuation */ 'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* l* */ 'l', '*', + /* letter */ 'l', 'e', 't', 't', 'e', 'r', + /* letternumber */ 'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', + /* lineseparator */ 'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* ll */ 'l', 'l', + /* lm */ 'l', 'm', + /* lo */ 'l', 'o', + /* lowercaseletter */ 'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* lt */ 'l', 't', + /* lu */ 'l', 'u', + /* m* */ 'm', '*', + /* mark */ 'm', 'a', 'r', 'k', + /* mathsymbol */ 'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l', + /* mc */ 'm', 'c', + /* me */ 'm', 'e', + /* mn */ 'm', 'n', + /* modifierletter */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', + /* modifiersymbol */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', + /* n* */ 'n', '*', + /* nd */ 'n', 'd', + /* nl */ 'n', 'l', + /* no */ 'n', 'o', + /* nonspacingmark */ 'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* notassigned */ 'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', + /* number */ 'n', 'u', 'm', 'b', 'e', 'r', + /* openpunctuation */ 'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* other */ 'o', 't', 'h', 'e', 'r', + /* otherletter */ 'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', + /* othernumber */ 'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', + /* otherpunctuation */ 'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* othersymbol */ 'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', + /* p* */ 'p', '*', + /* paragraphseparator */ 'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* pc */ 'p', 'c', + /* pd */ 'p', 'd', + /* pe */ 'p', 'e', + /* pf */ 'p', 'f', + /* pi */ 'p', 'i', + /* po */ 'p', 'o', + /* privateuse */ 'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e', + /* ps */ 'p', 's', + /* punctuation */ 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* s* */ 's', '*', + /* sc */ 's', 'c', + /* separator */ 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* sk */ 's', 'k', + /* sm */ 's', 'm', + /* so */ 's', 'o', + /* spaceseparator */ 's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* spacingcombiningmark */ 's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* surrogate */ 's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e', + /* symbol */ 's', 'y', 'm', 'b', 'o', 'l', + /* titlecase */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', + /* titlecaseletter */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* uppercaseletter */ 'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* z* */ 'z', '*', + /* zl */ 'z', 'l', + /* zp */ 'z', 'p', + /* zs */ 'z', 's', + }; + + static const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> range_data[] = { + { prop_name_table + 0, prop_name_table + 3, }, // any + { prop_name_table + 3, prop_name_table + 8, }, // ascii + { prop_name_table + 8, prop_name_table + 16, }, // assigned + { prop_name_table + 16, prop_name_table + 18, }, // c* + { prop_name_table + 18, prop_name_table + 20, }, // cc + { prop_name_table + 20, prop_name_table + 22, }, // cf + { prop_name_table + 22, prop_name_table + 38, }, // closepunctuation + { prop_name_table + 38, prop_name_table + 40, }, // cn + { prop_name_table + 40, prop_name_table + 42, }, // co + { prop_name_table + 42, prop_name_table + 62, }, // connectorpunctuation + { prop_name_table + 62, prop_name_table + 69, }, // control + { prop_name_table + 69, prop_name_table + 71, }, // cs + { prop_name_table + 71, prop_name_table + 85, }, // currencysymbol + { prop_name_table + 85, prop_name_table + 100, }, // dashpunctuation + { prop_name_table + 100, prop_name_table + 118, }, // decimaldigitnumber + { prop_name_table + 118, prop_name_table + 131, }, // enclosingmark + { prop_name_table + 131, prop_name_table + 147, }, // finalpunctuation + { prop_name_table + 147, prop_name_table + 153, }, // format + { prop_name_table + 153, prop_name_table + 171, }, // initialpunctuation + { prop_name_table + 171, prop_name_table + 173, }, // l* + { prop_name_table + 173, prop_name_table + 179, }, // letter + { prop_name_table + 179, prop_name_table + 191, }, // letternumber + { prop_name_table + 191, prop_name_table + 204, }, // lineseparator + { prop_name_table + 204, prop_name_table + 206, }, // ll + { prop_name_table + 206, prop_name_table + 208, }, // lm + { prop_name_table + 208, prop_name_table + 210, }, // lo + { prop_name_table + 210, prop_name_table + 225, }, // lowercaseletter + { prop_name_table + 225, prop_name_table + 227, }, // lt + { prop_name_table + 227, prop_name_table + 229, }, // lu + { prop_name_table + 229, prop_name_table + 231, }, // m* + { prop_name_table + 231, prop_name_table + 235, }, // mark + { prop_name_table + 235, prop_name_table + 245, }, // mathsymbol + { prop_name_table + 245, prop_name_table + 247, }, // mc + { prop_name_table + 247, prop_name_table + 249, }, // me + { prop_name_table + 249, prop_name_table + 251, }, // mn + { prop_name_table + 251, prop_name_table + 265, }, // modifierletter + { prop_name_table + 265, prop_name_table + 279, }, // modifiersymbol + { prop_name_table + 279, prop_name_table + 281, }, // n* + { prop_name_table + 281, prop_name_table + 283, }, // nd + { prop_name_table + 283, prop_name_table + 285, }, // nl + { prop_name_table + 285, prop_name_table + 287, }, // no + { prop_name_table + 287, prop_name_table + 301, }, // nonspacingmark + { prop_name_table + 301, prop_name_table + 312, }, // notassigned + { prop_name_table + 312, prop_name_table + 318, }, // number + { prop_name_table + 318, prop_name_table + 333, }, // openpunctuation + { prop_name_table + 333, prop_name_table + 338, }, // other + { prop_name_table + 338, prop_name_table + 349, }, // otherletter + { prop_name_table + 349, prop_name_table + 360, }, // othernumber + { prop_name_table + 360, prop_name_table + 376, }, // otherpunctuation + { prop_name_table + 376, prop_name_table + 387, }, // othersymbol + { prop_name_table + 387, prop_name_table + 389, }, // p* + { prop_name_table + 389, prop_name_table + 407, }, // paragraphseparator + { prop_name_table + 407, prop_name_table + 409, }, // pc + { prop_name_table + 409, prop_name_table + 411, }, // pd + { prop_name_table + 411, prop_name_table + 413, }, // pe + { prop_name_table + 413, prop_name_table + 415, }, // pf + { prop_name_table + 415, prop_name_table + 417, }, // pi + { prop_name_table + 417, prop_name_table + 419, }, // po + { prop_name_table + 419, prop_name_table + 429, }, // privateuse + { prop_name_table + 429, prop_name_table + 431, }, // ps + { prop_name_table + 431, prop_name_table + 442, }, // punctuation + { prop_name_table + 442, prop_name_table + 444, }, // s* + { prop_name_table + 444, prop_name_table + 446, }, // sc + { prop_name_table + 446, prop_name_table + 455, }, // separator + { prop_name_table + 455, prop_name_table + 457, }, // sk + { prop_name_table + 457, prop_name_table + 459, }, // sm + { prop_name_table + 459, prop_name_table + 461, }, // so + { prop_name_table + 461, prop_name_table + 475, }, // spaceseparator + { prop_name_table + 475, prop_name_table + 495, }, // spacingcombiningmark + { prop_name_table + 495, prop_name_table + 504, }, // surrogate + { prop_name_table + 504, prop_name_table + 510, }, // symbol + { prop_name_table + 510, prop_name_table + 519, }, // titlecase + { prop_name_table + 519, prop_name_table + 534, }, // titlecaseletter + { prop_name_table + 534, prop_name_table + 549, }, // uppercaseletter + { prop_name_table + 549, prop_name_table + 551, }, // z* + { prop_name_table + 551, prop_name_table + 553, }, // zl + { prop_name_table + 553, prop_name_table + 555, }, // zp + { prop_name_table + 555, prop_name_table + 557, }, // zs + }; + + static const icu_regex_traits::char_class_type icu_class_map[] = { + mask_any, // any + mask_ascii, // ascii + (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned + U_GC_C_MASK, // c* + U_GC_CC_MASK, // cc + U_GC_CF_MASK, // cf + U_GC_PE_MASK, // closepunctuation + U_GC_CN_MASK, // cn + U_GC_CO_MASK, // co + U_GC_PC_MASK, // connectorpunctuation + U_GC_CC_MASK, // control + U_GC_CS_MASK, // cs + U_GC_SC_MASK, // currencysymbol + U_GC_PD_MASK, // dashpunctuation + U_GC_ND_MASK, // decimaldigitnumber + U_GC_ME_MASK, // enclosingmark + U_GC_PF_MASK, // finalpunctuation + U_GC_CF_MASK, // format + U_GC_PI_MASK, // initialpunctuation + U_GC_L_MASK, // l* + U_GC_L_MASK, // letter + U_GC_NL_MASK, // letternumber + U_GC_ZL_MASK, // lineseparator + U_GC_LL_MASK, // ll + U_GC_LM_MASK, // lm + U_GC_LO_MASK, // lo + U_GC_LL_MASK, // lowercaseletter + U_GC_LT_MASK, // lt + U_GC_LU_MASK, // lu + U_GC_M_MASK, // m* + U_GC_M_MASK, // mark + U_GC_SM_MASK, // mathsymbol + U_GC_MC_MASK, // mc + U_GC_ME_MASK, // me + U_GC_MN_MASK, // mn + U_GC_LM_MASK, // modifierletter + U_GC_SK_MASK, // modifiersymbol + U_GC_N_MASK, // n* + U_GC_ND_MASK, // nd + U_GC_NL_MASK, // nl + U_GC_NO_MASK, // no + U_GC_MN_MASK, // nonspacingmark + U_GC_CN_MASK, // notassigned + U_GC_N_MASK, // number + U_GC_PS_MASK, // openpunctuation + U_GC_C_MASK, // other + U_GC_LO_MASK, // otherletter + U_GC_NO_MASK, // othernumber + U_GC_PO_MASK, // otherpunctuation + U_GC_SO_MASK, // othersymbol + U_GC_P_MASK, // p* + U_GC_ZP_MASK, // paragraphseparator + U_GC_PC_MASK, // pc + U_GC_PD_MASK, // pd + U_GC_PE_MASK, // pe + U_GC_PF_MASK, // pf + U_GC_PI_MASK, // pi + U_GC_PO_MASK, // po + U_GC_CO_MASK, // privateuse + U_GC_PS_MASK, // ps + U_GC_P_MASK, // punctuation + U_GC_S_MASK, // s* + U_GC_SC_MASK, // sc + U_GC_Z_MASK, // separator + U_GC_SK_MASK, // sk + U_GC_SM_MASK, // sm + U_GC_SO_MASK, // so + U_GC_ZS_MASK, // spaceseparator + U_GC_MC_MASK, // spacingcombiningmark + U_GC_CS_MASK, // surrogate + U_GC_S_MASK, // symbol + U_GC_LT_MASK, // titlecase + U_GC_LT_MASK, // titlecaseletter + U_GC_LU_MASK, // uppercaseletter + U_GC_Z_MASK, // z* + U_GC_ZL_MASK, // zl + U_GC_ZP_MASK, // zp + U_GC_ZS_MASK, // zs + }; + + + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_begin = range_data; + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data) / sizeof(range_data[0])); + + BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> t = { p1, p2, }; + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t); + if ((p != ranges_end) && (t == *p)) + return icu_class_map[p - ranges_begin]; + return 0; + } + + boost::shared_ptr< ::boost::BOOST_REGEX_DETAIL_NS::icu_regex_traits_implementation> m_pimpl; + }; + +} // namespace boost + +namespace boost { + + // types: + typedef basic_regex< ::UChar32, icu_regex_traits> u32regex; + typedef match_results u32match; + typedef match_results u16match; + + // + // Construction of 32-bit regex types from UTF-8 and UTF-16 primitives: + // + namespace BOOST_REGEX_DETAIL_NS { + +#if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) + template + inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<1>*) + { + typedef boost::u8_to_u32_iterator conv_type; + return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); + } + + template + inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<2>*) + { + typedef boost::u16_to_u32_iterator conv_type; + return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); + } + + template + inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<4>*) + { + return u32regex(i, j, opt); + } +#else + template + inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<1>*) + { + typedef boost::u8_to_u32_iterator conv_type; + typedef std::vector vector_type; + vector_type v; + conv_type a(i, i, j), b(j, i, j); + while (a != b) + { + v.push_back(*a); + ++a; + } + if (v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); + } + + template + inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<2>*) + { + typedef boost::u16_to_u32_iterator conv_type; + typedef std::vector vector_type; + vector_type v; + conv_type a(i, i, j), b(j, i, j); + while (a != b) + { + v.push_back(*a); + ++a; + } + if (v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); + } + + template + inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<4>*) + { + typedef std::vector vector_type; + vector_type v; + while (i != j) + { + v.push_back((UChar32)(*i)); + ++i; + } + if (v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); + } +#endif + } + + // BOOST_REGEX_UCHAR_IS_WCHAR_T + // + // Source inspection of unicode/umachine.h in ICU version 59 indicates that: + // + // On version 59, UChar is always char16_t in C++ mode (and uint16_t in C mode) + // + // On earlier versions, the logic is + // + // #if U_SIZEOF_WCHAR_T==2 + // typedef wchar_t OldUChar; + // #elif defined(__CHAR16_TYPE__) + // typedef __CHAR16_TYPE__ OldUChar; + // #else + // typedef uint16_t OldUChar; + // #endif + // + // That is, UChar is wchar_t only on versions below 59, when U_SIZEOF_WCHAR_T==2 + // + // Hence, + +#define BOOST_REGEX_UCHAR_IS_WCHAR_T (U_ICU_VERSION_MAJOR_NUM < 59 && U_SIZEOF_WCHAR_T == 2) + +#if BOOST_REGEX_UCHAR_IS_WCHAR_T + BOOST_STATIC_ASSERT((boost::is_same::value)); +#else + BOOST_STATIC_ASSERT(!(boost::is_same::value)); +#endif + + // + // Construction from an iterator pair: + // + template + inline u32regex make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt) + { + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(i, j, opt, static_cast const*>(0)); + } + // + // construction from UTF-8 nul-terminated strings: + // + inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) + { + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(p), opt, static_cast const*>(0)); + } + inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) + { + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(reinterpret_cast(p)), opt, static_cast const*>(0)); + } + // + // construction from UTF-16 nul-terminated strings: + // +#ifndef BOOST_NO_WREGEX + inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) + { + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast const*>(0)); + } +#endif +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T + inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) + { + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + u_strlen(p), opt, static_cast const*>(0)); + } +#endif + // + // construction from basic_string class-template: + // + template + inline u32regex make_u32regex(const std::basic_string& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) + { + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.begin(), s.end(), opt, static_cast const*>(0)); + } + // + // Construction from ICU string type: + // + inline u32regex make_u32regex(const U_NAMESPACE_QUALIFIER UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) + { + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast const*>(0)); + } + + // + // regex_match overloads that widen the character type as appropriate: + // + namespace BOOST_REGEX_DETAIL_NS { + template + void copy_results(MR1& out, MR2 const& in, NSubs named_subs) + { + // copy results from an adapted MR2 match_results: + out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base()); + out.set_base(in.base().base()); + out.set_named_subs(named_subs); + for (int i = 0; i < (int)in.size(); ++i) + { + if (in[i].matched || !i) + { + out.set_first(in[i].first.base(), i); + out.set_second(in[i].second.base(), i, in[i].matched); + } + } +#ifdef BOOST_REGEX_MATCH_EXTRA + // Copy full capture info as well: + for (int i = 0; i < (int)in.size(); ++i) + { + if (in[i].captures().size()) + { + out[i].get_captures().assign(in[i].captures().size(), typename MR1::value_type()); + for (int j = 0; j < (int)out[i].captures().size(); ++j) + { + out[i].get_captures()[j].first = in[i].captures()[j].first.base(); + out[i].get_captures()[j].second = in[i].captures()[j].second.base(); + out[i].get_captures()[j].matched = in[i].captures()[j].matched; + } + } + } +#endif + } + + template + inline bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + boost::mpl::int_<4> const*) + { + return ::boost::regex_match(first, last, m, e, flags); + } + template + bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + boost::mpl::int_<2> const*) + { + typedef u16_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); + // copy results across to m: + if (result) copy_results(m, what, e.get_named_subs()); + return result; + } + template + bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + boost::mpl::int_<1> const*) + { + typedef u8_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); + // copy results across to m: + if (result) copy_results(m, what, e.get_named_subs()); + return result; + } + } // namespace BOOST_REGEX_DETAIL_NS + + template + inline bool u32regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); + } + inline bool u32regex_match(const UChar* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + u_strlen(p), m, e, flags, static_cast const*>(0)); + } +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) + inline bool u32regex_match(const wchar_t* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::wcslen(p), m, e, flags, static_cast const*>(0)); + } +#endif + inline bool u32regex_match(const char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::strlen(p), m, e, flags, static_cast const*>(0)); + } + inline bool u32regex_match(const unsigned char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); + } + inline bool u32regex_match(const std::string& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); + } +#ifndef BOOST_NO_STD_WSTRING + inline bool u32regex_match(const std::wstring& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); + } +#endif + inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); + } + // + // regex_match overloads that do not return what matched: + // + template + inline bool u32regex_match(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); + } + inline bool u32regex_match(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + u_strlen(p), m, e, flags, static_cast const*>(0)); + } +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) + inline bool u32regex_match(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::wcslen(p), m, e, flags, static_cast const*>(0)); + } +#endif + inline bool u32regex_match(const char* p, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::strlen(p), m, e, flags, static_cast const*>(0)); + } + inline bool u32regex_match(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); + } + inline bool u32regex_match(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); + } +#ifndef BOOST_NO_STD_WSTRING + inline bool u32regex_match(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); + } +#endif + inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); + } + + // + // regex_search overloads that widen the character type as appropriate: + // + namespace BOOST_REGEX_DETAIL_NS { + template + inline bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + boost::mpl::int_<4> const*) + { + return ::boost::regex_search(first, last, m, e, flags, base); + } + template + bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + boost::mpl::int_<2> const*) + { + typedef u16_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); + // copy results across to m: + if (result) copy_results(m, what, e.get_named_subs()); + return result; + } + template + bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + boost::mpl::int_<1> const*) + { + typedef u8_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); + // copy results across to m: + if (result) copy_results(m, what, e.get_named_subs()); + return result; + } + } + + template + inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); + } + template + inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, base, static_cast const*>(0)); + } + inline bool u32regex_search(const UChar* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + u_strlen(p), m, e, flags, p, static_cast const*>(0)); + } +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) + inline bool u32regex_search(const wchar_t* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::wcslen(p), m, e, flags, p, static_cast const*>(0)); + } +#endif + inline bool u32regex_search(const char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::strlen(p), m, e, flags, p, static_cast const*>(0)); + } + inline bool u32regex_search(const unsigned char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); + } + inline bool u32regex_search(const std::string& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); + } +#ifndef BOOST_NO_STD_WSTRING + inline bool u32regex_search(const std::wstring& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); + } +#endif + inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); + } + template + inline bool u32regex_search(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); + } + inline bool u32regex_search(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + u_strlen(p), m, e, flags, p, static_cast const*>(0)); + } +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) + inline bool u32regex_search(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::wcslen(p), m, e, flags, p, static_cast const*>(0)); + } +#endif + inline bool u32regex_search(const char* p, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::strlen(p), m, e, flags, p, static_cast const*>(0)); + } + inline bool u32regex_search(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); + } + inline bool u32regex_search(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); + } +#ifndef BOOST_NO_STD_WSTRING + inline bool u32regex_search(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); + } +#endif + inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) + { + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); + } + + // + // overloads for regex_replace with utf-8 and utf-16 data types: + // + namespace BOOST_REGEX_DETAIL_NS { + template + inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > + make_utf32_seq(I i, I j, mpl::int_<1> const*) + { + return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(i, i, j), boost::u8_to_u32_iterator(j, i, j)); + } + template + inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > + make_utf32_seq(I i, I j, mpl::int_<2> const*) + { + return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(i, i, j), boost::u16_to_u32_iterator(j, i, j)); + } + template + inline std::pair< I, I > + make_utf32_seq(I i, I j, mpl::int_<4> const*) + { + return std::pair< I, I >(i, j); + } + template + inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > + make_utf32_seq(const charT* p, mpl::int_<1> const*) + { + std::size_t len = std::strlen((const char*)p); + return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(p, p, p + len), boost::u8_to_u32_iterator(p + len, p, p + len)); + } + template + inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > + make_utf32_seq(const charT* p, mpl::int_<2> const*) + { + std::size_t len = u_strlen((const UChar*)p); + return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(p, p, p + len), boost::u16_to_u32_iterator(p + len, p, p + len)); + } + template + inline std::pair< const charT*, const charT* > + make_utf32_seq(const charT* p, mpl::int_<4> const*) + { + return std::pair< const charT*, const charT* >(p, p + icu_regex_traits::length((UChar32 const*)p)); + } + template + inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*) + { + return o; + } + template + inline utf16_output_iterator make_utf32_out(OutputIterator o, mpl::int_<2> const*) + { + return o; + } + template + inline utf8_output_iterator make_utf32_out(OutputIterator o, mpl::int_<1> const*) + { + return o; + } + + template + OutputIterator do_regex_replace(OutputIterator out, + std::pair const& in, + const u32regex& e, + const std::pair& fmt, + match_flag_type flags + ) + { + // unfortunately we have to copy the format string in order to pass in onward: + std::vector f; +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + f.assign(fmt.first, fmt.second); +#else + f.clear(); + I2 pos = fmt.first; + while (pos != fmt.second) + f.push_back(*pos++); +#endif + + regex_iterator i(in.first, in.second, e, flags); + regex_iterator j; + if (i == j) + { + if (!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(in.first, in.second, out); + } + else + { + I1 last_m = in.first; + while (i != j) + { + if (!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(i->prefix().first, i->prefix().second, out); + if (!f.empty()) + out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits()); + else + out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, static_cast(0), static_cast(0), flags, e.get_traits()); + last_m = (*i)[0].second; + if (flags & regex_constants::format_first_only) + break; + ++i; + } + if (!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(last_m, in.second, out); + } + return out; + } + template + inline const BaseIterator& extract_output_base(const BaseIterator& b) + { + return b; + } + template + inline BaseIterator extract_output_base(const utf8_output_iterator& b) + { + return b.base(); + } + template + inline BaseIterator extract_output_base(const utf16_output_iterator& b) + { + return b.base(); + } + } // BOOST_REGEX_DETAIL_NS + + template + inline OutputIterator u32regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt, static_cast const*>(0)), + flags) + ); + } + + template + inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.begin(), fmt.end(), static_cast const*>(0)), + flags) + ); + } + + template + inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, + match_flag_type flags = match_default) + { + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), + flags) + ); + } + + template + std::basic_string u32regex_replace(const std::basic_string& s, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) + { + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; + } + + template + std::basic_string u32regex_replace(const std::basic_string& s, + const u32regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) + { + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); + return result; + } + + namespace BOOST_REGEX_DETAIL_NS { + + class unicode_string_out_iterator + { + U_NAMESPACE_QUALIFIER UnicodeString* out; + public: + unicode_string_out_iterator(U_NAMESPACE_QUALIFIER UnicodeString& s) : out(&s) {} + unicode_string_out_iterator& operator++() { return *this; } + unicode_string_out_iterator& operator++(int) { return *this; } + unicode_string_out_iterator& operator*() { return *this; } + unicode_string_out_iterator& operator=(UChar v) + { + *out += v; + return *this; + } + typedef std::ptrdiff_t difference_type; + typedef UChar value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; + }; + + } + + inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + const UChar* fmt, + match_flag_type flags = match_default) + { + U_NAMESPACE_QUALIFIER UnicodeString result; + BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result); + u32regex_replace(i, s.getBuffer(), s.getBuffer() + s.length(), e, fmt, flags); + return result; + } + + inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, + match_flag_type flags = match_default) + { + U_NAMESPACE_QUALIFIER UnicodeString result; + BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result); + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(i, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(s.getBuffer(), s.getBuffer() + s.length(), static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), + flags); + return result; + } + +} // namespace boost. + +#ifdef BOOST_MSVC +#pragma warning (pop) +#endif + +#include +#include + +#endif diff --git a/include/boost/regex/v4/instances.hpp b/include/boost/regex/v4/instances.hpp deleted file mode 100644 index 7c18b6e8..00000000 --- a/include/boost/regex/v4/instances.hpp +++ /dev/null @@ -1,236 +0,0 @@ -/* - * - * Copyright (c) 1998-2002 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE instances.cpp - * VERSION see - * DESCRIPTION: Defines those template instances that are placed in the - * library rather than in the users object files. - */ - -// -// note no include guard, we may include this multiple times: -// -#ifndef BOOST_REGEX_NO_EXTERNAL_TEMPLATES - -namespace boost{ - -// -// this header can be included multiple times, each time with -// a different character type, BOOST_REGEX_CHAR_T must be defined -// first: -// -#ifndef BOOST_REGEX_CHAR_T -# error "BOOST_REGEX_CHAR_T not defined" -#endif - -#ifndef BOOST_REGEX_TRAITS_T -# define BOOST_REGEX_TRAITS_T , boost::regex_traits -#endif - -// -// what follows is compiler specific: -// - -#if defined(BOOST_BORLANDC) && (BOOST_BORLANDC < 0x600) - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_PREFIX -#endif - -# ifndef BOOST_REGEX_INSTANTIATE -# pragma option push -Jgx -# endif - -template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >; -template class BOOST_REGEX_DECL match_results< const BOOST_REGEX_CHAR_T* >; -#ifndef BOOST_NO_STD_ALLOCATOR -template class BOOST_REGEX_DECL ::boost::BOOST_REGEX_DETAIL_NS::perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >; -#endif - -# ifndef BOOST_REGEX_INSTANTIATE -# pragma option pop -# endif - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_SUFFIX -#endif - -#elif defined(BOOST_MSVC) || defined(__ICL) - -# ifndef BOOST_REGEX_INSTANTIATE -# ifdef __GNUC__ -# define template __extension__ extern template -# else -# if BOOST_MSVC > 1310 -# define BOOST_REGEX_TEMPLATE_DECL -# endif -# define template extern template -# endif -# endif - -#ifndef BOOST_REGEX_TEMPLATE_DECL -# define BOOST_REGEX_TEMPLATE_DECL BOOST_REGEX_DECL -#endif - -# ifdef BOOST_MSVC -# pragma warning(push) -# pragma warning(disable : 4251) -#if BOOST_MSVC < 1700 -# pragma warning(disable : 4231) -#endif -# if BOOST_MSVC < 1600 -# pragma warning(disable : 4660) -# endif -# endif - -template class BOOST_REGEX_TEMPLATE_DECL basic_regex< BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >; - -template class BOOST_REGEX_TEMPLATE_DECL match_results< const BOOST_REGEX_CHAR_T* >; -#ifndef BOOST_NO_STD_ALLOCATOR -template class BOOST_REGEX_TEMPLATE_DECL ::boost::BOOST_REGEX_DETAIL_NS::perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >; -#endif -#if !(defined(BOOST_DINKUMWARE_STDLIB) && (BOOST_DINKUMWARE_STDLIB <= 1))\ - && !(defined(BOOST_INTEL_CXX_VERSION) && (BOOST_INTEL_CXX_VERSION <= 800))\ - && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))\ - && !defined(BOOST_REGEX_ICU_INSTANCES) -template class BOOST_REGEX_TEMPLATE_DECL match_results< std::basic_string::const_iterator >; -#ifndef BOOST_NO_STD_ALLOCATOR -template class BOOST_REGEX_TEMPLATE_DECL ::boost::BOOST_REGEX_DETAIL_NS::perl_matcher< std::basic_string::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >; -#endif -#endif - - -# ifdef BOOST_MSVC -# pragma warning(pop) -# endif - -# ifdef template -# undef template -# endif - -#undef BOOST_REGEX_TEMPLATE_DECL - -#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || !defined(BOOST_NO_CXX11_EXTERN_TEMPLATE) - -#if defined(__clang__) -# pragma clang diagnostic push -# if defined(__APPLE_CC__) -# if (__clang_major__ > 6) -# pragma clang diagnostic ignored "-Wkeyword-macro" -# endif -# else -# if (__clang_major__ > 3) || ((__clang_major__ == 3) && (__clang_minor__ > 5)) -# pragma clang diagnostic ignored "-Wkeyword-macro" -# endif -# endif -#endif - -# ifndef BOOST_REGEX_INSTANTIATE -# ifdef __GNUC__ -# define template __extension__ extern template -# else -# define template extern template -# endif -# endif - -#if !defined(BOOST_NO_STD_LOCALE) && !defined(BOOST_REGEX_ICU_INSTANCES) -namespace BOOST_REGEX_DETAIL_NS{ -template BOOST_REGEX_DECL -std::locale cpp_regex_traits_base::imbue(const std::locale& l); - -template BOOST_REGEX_DECL -cpp_regex_traits_implementation::string_type - cpp_regex_traits_implementation::transform_primary(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; -template BOOST_REGEX_DECL -cpp_regex_traits_implementation::string_type - cpp_regex_traits_implementation::transform(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; -template BOOST_REGEX_DECL -cpp_regex_traits_implementation::string_type - cpp_regex_traits_implementation::lookup_collatename(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; -template BOOST_REGEX_DECL -void cpp_regex_traits_implementation::init(); -template BOOST_REGEX_DECL -cpp_regex_traits_implementation::char_class_type - cpp_regex_traits_implementation::lookup_classname_imp(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; -#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET -template BOOST_REGEX_DECL -bool cpp_regex_traits_implementation::isctype(const BOOST_REGEX_CHAR_T c, char_class_type mask) const; -#endif -} // namespace -template BOOST_REGEX_DECL -boost::intmax_t cpp_regex_traits::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const; -template BOOST_REGEX_DECL -std::string cpp_regex_traits::catalog_name(const std::string& name); -template BOOST_REGEX_DECL -std::string& cpp_regex_traits::get_catalog_name_inst(); -template BOOST_REGEX_DECL -std::string cpp_regex_traits::get_catalog_name(); -#ifdef BOOST_HAS_THREADS -template BOOST_REGEX_DECL -static_mutex& cpp_regex_traits::get_mutex_inst(); -#endif -#endif - -template BOOST_REGEX_DECL basic_regex& - basic_regex::do_assign( - const BOOST_REGEX_CHAR_T* p1, - const BOOST_REGEX_CHAR_T* p2, - flag_type f); -template BOOST_REGEX_DECL basic_regex::locale_type BOOST_REGEX_CALL - basic_regex::imbue(locale_type l); - -template BOOST_REGEX_DECL void BOOST_REGEX_CALL - match_results::maybe_assign( - const match_results& m); - -namespace BOOST_REGEX_DETAIL_NS{ -template BOOST_REGEX_DECL void perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::construct_init( - const basic_regex& e, match_flag_type f); -template BOOST_REGEX_DECL bool perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::match(); -template BOOST_REGEX_DECL bool perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::find(); -} // namespace - -#if (defined(__GLIBCPP__) || defined(__GLIBCXX__)) \ - && !defined(BOOST_REGEX_ICU_INSTANCES)\ - && !defined(__SGI_STL_PORT)\ - && !defined(_STLPORT_VERSION) -// std:basic_string<>::const_iterator instances as well: -template BOOST_REGEX_DECL void BOOST_REGEX_CALL - match_results::const_iterator>::maybe_assign( - const match_results::const_iterator>& m); - -namespace BOOST_REGEX_DETAIL_NS{ -template BOOST_REGEX_DECL void perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::construct_init( - const basic_regex& e, match_flag_type f); -template BOOST_REGEX_DECL bool perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::match(); -template BOOST_REGEX_DECL bool perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::find(); -} // namespace -#endif - -# ifdef template -# undef template -# endif - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#endif - -} // namespace boost - -#endif // BOOST_REGEX_NO_EXTERNAL_TEMPLATES - - - - - diff --git a/include/boost/regex/v4/match_flags.hpp b/include/boost/regex/v4/match_flags.hpp index 1f7ed255..5dc72655 100644 --- a/include/boost/regex/v4/match_flags.hpp +++ b/include/boost/regex/v4/match_flags.hpp @@ -51,7 +51,7 @@ typedef enum _match_flags match_any = match_init << 1, /* don't care what we match */ match_not_null = match_any << 1, /* string can't be null */ match_continuous = match_not_null << 1, /* each grep match must continue from */ - /* uninterupted from the previous one */ + /* uninterrupted from the previous one */ match_partial = match_continuous << 1, /* find partial matches */ match_stop = match_partial << 1, /* stop after first match (grep) V3 only */ @@ -61,7 +61,7 @@ typedef enum _match_flags match_posix = match_perl << 1, /* Use POSIX matching rules */ match_nosubs = match_posix << 1, /* don't trap marked subs */ match_extra = match_nosubs << 1, /* include full capture information for repeated captures */ - match_single_line = match_extra << 1, /* treat text as single line and ignor any \n's when matching ^ and $. */ + match_single_line = match_extra << 1, /* treat text as single line and ignore any \n's when matching ^ and $. */ match_unused1 = match_single_line << 1, /* unused */ match_unused2 = match_unused1 << 1, /* unused */ match_unused3 = match_unused2 << 1, /* unused */ @@ -70,9 +70,9 @@ typedef enum _match_flags format_perl = 0, /* perl style replacement */ format_default = 0, /* ditto. */ format_sed = match_max << 1, /* sed style replacement. */ - format_all = format_sed << 1, /* enable all extentions to sytax. */ + format_all = format_sed << 1, /* enable all extensions to syntax. */ format_no_copy = format_all << 1, /* don't copy non-matching segments. */ - format_first_only = format_no_copy << 1, /* Only replace first occurance. */ + format_first_only = format_no_copy << 1, /* Only replace first occurrence. */ format_is_if = format_first_only << 1, /* internal use only. */ format_literal = format_is_if << 1, /* treat string as a literal */ @@ -116,7 +116,7 @@ inline match_flags& operator^=(match_flags& m1, match_flags m2) #ifdef __cplusplus } /* namespace regex_constants */ /* - * import names into boost for backwards compatiblity: + * import names into boost for backwards compatibility: */ using regex_constants::match_flag_type; using regex_constants::match_default; diff --git a/include/boost/regex/v4/match_results.hpp b/include/boost/regex/v4/match_results.hpp index 7e509801..c580e4fc 100644 --- a/include/boost/regex/v4/match_results.hpp +++ b/include/boost/regex/v4/match_results.hpp @@ -474,7 +474,7 @@ public: // private access functions: void BOOST_REGEX_CALL set_second(BidiIterator i) { - BOOST_ASSERT(m_subs.size() > 2); + BOOST_REGEX_ASSERT(m_subs.size() > 2); m_subs[2].second = i; m_subs[2].matched = true; m_subs[0].first = i; @@ -490,7 +490,7 @@ public: if(pos) m_last_closed_paren = static_cast(pos); pos += 2; - BOOST_ASSERT(m_subs.size() > pos); + BOOST_REGEX_ASSERT(m_subs.size() > pos); m_subs[pos].second = i; m_subs[pos].matched = m; if((pos == 2) && !escape_k) @@ -531,7 +531,7 @@ public: } void BOOST_REGEX_CALL set_first(BidiIterator i) { - BOOST_ASSERT(m_subs.size() > 2); + BOOST_REGEX_ASSERT(m_subs.size() > 2); // set up prefix: m_subs[1].second = i; m_subs[1].matched = (m_subs[1].first != i); @@ -546,7 +546,7 @@ public: } void BOOST_REGEX_CALL set_first(BidiIterator i, size_type pos, bool escape_k = false) { - BOOST_ASSERT(pos+2 < m_subs.size()); + BOOST_REGEX_ASSERT(pos+2 < m_subs.size()); if(pos || escape_k) { m_subs[pos+2].first = i; @@ -650,15 +650,15 @@ void BOOST_REGEX_CALL match_results::maybe_assign(const } base1 = ::boost::BOOST_REGEX_DETAIL_NS::distance(l_base, p1->first); base2 = ::boost::BOOST_REGEX_DETAIL_NS::distance(l_base, p2->first); - BOOST_ASSERT(base1 >= 0); - BOOST_ASSERT(base2 >= 0); + BOOST_REGEX_ASSERT(base1 >= 0); + BOOST_REGEX_ASSERT(base2 >= 0); if(base1 < base2) return; if(base2 < base1) break; len1 = ::boost::BOOST_REGEX_DETAIL_NS::distance((BidiIterator)p1->first, (BidiIterator)p1->second); len2 = ::boost::BOOST_REGEX_DETAIL_NS::distance((BidiIterator)p2->first, (BidiIterator)p2->second); - BOOST_ASSERT(len1 >= 0); - BOOST_ASSERT(len2 >= 0); + BOOST_REGEX_ASSERT(len1 >= 0); + BOOST_REGEX_ASSERT(len2 >= 0); if((len1 != len2) || ((p1->matched == false) && (p2->matched == true))) break; if((p1->matched == true) && (p2->matched == false)) diff --git a/include/boost/regex/v4/mem_block_cache.hpp b/include/boost/regex/v4/mem_block_cache.hpp index 50af421e..1996a512 100644 --- a/include/boost/regex/v4/mem_block_cache.hpp +++ b/include/boost/regex/v4/mem_block_cache.hpp @@ -69,6 +69,12 @@ struct mem_block_cache } ::operator delete(ptr); } + + static mem_block_cache& instance() + { + static mem_block_cache block_cache = { { {nullptr} } }; + return block_cache; + } }; @@ -129,11 +135,43 @@ struct mem_block_cache ++cached_blocks; } } + static mem_block_cache& instance() + { +#ifdef BOOST_HAS_THREADS + static mem_block_cache block_cache = { 0, 0, BOOST_STATIC_MUTEX_INIT, }; +#else + static mem_block_cache block_cache = { 0, 0, }; +#endif + return block_cache; + } }; #endif -extern mem_block_cache block_cache; +#if BOOST_REGEX_MAX_CACHE_BLOCKS == 0 +inline void* BOOST_REGEX_CALL get_mem_block() +{ + return ::operator new(BOOST_REGEX_BLOCKSIZE); +} + +inline void BOOST_REGEX_CALL put_mem_block(void* p) +{ + ::operator delete(p); +} + +#else + +inline void* BOOST_REGEX_CALL get_mem_block() +{ + return mem_block_cache::instance().get(); +} + +inline void BOOST_REGEX_CALL put_mem_block(void* p) +{ + mem_block_cache::instance().put(p); +} + +#endif } } // namespace boost diff --git a/include/boost/regex/v4/object_cache.hpp b/include/boost/regex/v4/object_cache.hpp new file mode 100644 index 00000000..98ba4c19 --- /dev/null +++ b/include/boost/regex/v4/object_cache.hpp @@ -0,0 +1,171 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE object_cache.hpp + * VERSION see + * DESCRIPTION: Implements a generic object cache. + */ + +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#define BOOST_REGEX_OBJECT_CACHE_HPP + +#include +#include +#include +#include +#include +#include +#ifdef BOOST_HAS_THREADS +#include +#endif + +namespace boost{ + +template +class object_cache +{ +public: + typedef std::pair< ::boost::shared_ptr, Key const*> value_type; + typedef std::list list_type; + typedef typename list_type::iterator list_iterator; + typedef std::map map_type; + typedef typename map_type::iterator map_iterator; + typedef typename list_type::size_type size_type; + static boost::shared_ptr get(const Key& k, size_type l_max_cache_size); + +private: + static boost::shared_ptr do_get(const Key& k, size_type l_max_cache_size); + + struct data + { + list_type cont; + map_type index; + }; + + // Needed by compilers not implementing the resolution to DR45. For reference, + // see http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. + friend struct data; +}; + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4702) +#endif +template +boost::shared_ptr object_cache::get(const Key& k, size_type l_max_cache_size) +{ +#ifdef BOOST_HAS_THREADS + static boost::static_mutex mut = BOOST_STATIC_MUTEX_INIT; + boost::static_mutex::scoped_lock l(mut); + if (l) + { + return do_get(k, l_max_cache_size); + } + // + // what do we do if the lock fails? + // for now just throw, but we should never really get here... + // + ::boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock")); +#if defined(BOOST_NO_UNREACHABLE_RETURN_DETECTION) || defined(BOOST_NO_EXCEPTIONS) + return boost::shared_ptr(); +#endif +#else + return do_get(k, l_max_cache_size); +#endif +} +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template +boost::shared_ptr object_cache::do_get(const Key& k, size_type l_max_cache_size) +{ + typedef typename object_cache::data object_data; + typedef typename map_type::size_type map_size_type; + static object_data s_data; + + // + // see if the object is already in the cache: + // + map_iterator mpos = s_data.index.find(k); + if(mpos != s_data.index.end()) + { + // + // Eureka! + // We have a cached item, bump it up the list and return it: + // + if(--(s_data.cont.end()) != mpos->second) + { + // splice out the item we want to move: + list_type temp; + temp.splice(temp.end(), s_data.cont, mpos->second); + // and now place it at the end of the list: + s_data.cont.splice(s_data.cont.end(), temp, temp.begin()); + BOOST_REGEX_ASSERT(*(s_data.cont.back().second) == k); + // update index with new position: + mpos->second = --(s_data.cont.end()); + BOOST_REGEX_ASSERT(&(mpos->first) == mpos->second->second); + BOOST_REGEX_ASSERT(&(mpos->first) == s_data.cont.back().second); + } + return s_data.cont.back().first; + } + // + // if we get here then the item is not in the cache, + // so create it: + // + boost::shared_ptr result(new Object(k)); + // + // Add it to the list, and index it: + // + s_data.cont.push_back(value_type(result, static_cast(0))); + s_data.index.insert(std::make_pair(k, --(s_data.cont.end()))); + s_data.cont.back().second = &(s_data.index.find(k)->first); + map_size_type s = s_data.index.size(); + BOOST_REGEX_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_REGEX_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(s_data.index.find(k)->first == k); + if(s > l_max_cache_size) + { + // + // We have too many items in the list, so we need to start + // popping them off the back of the list, but only if they're + // being held uniquely by us: + // + list_iterator pos = s_data.cont.begin(); + list_iterator last = s_data.cont.end(); + while((pos != last) && (s > l_max_cache_size)) + { + if(pos->first.unique()) + { + list_iterator condemmed(pos); + ++pos; + // now remove the items from our containers, + // then order has to be as follows: + BOOST_REGEX_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end()); + s_data.index.erase(*(condemmed->second)); + s_data.cont.erase(condemmed); + --s; + } + else + ++pos; + } + BOOST_REGEX_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_REGEX_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(s_data.index.find(k)->first == k); + } + return result; +} + +} + +#endif diff --git a/include/boost/regex/v4/pattern_except.hpp b/include/boost/regex/v4/pattern_except.hpp new file mode 100644 index 00000000..56e83cc9 --- /dev/null +++ b/include/boost/regex/v4/pattern_except.hpp @@ -0,0 +1,127 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE pattern_except.hpp + * VERSION see + * DESCRIPTION: Declares pattern-matching exception classes. + */ + +#ifndef BOOST_RE_V4_PAT_EXCEPT_HPP +#define BOOST_RE_V4_PAT_EXCEPT_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#include +#include +#include +#include + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4275) +#if BOOST_MSVC >= 1800 +#pragma warning(disable : 26812) +#endif +#endif +class regex_error : public std::runtime_error +{ +public: + explicit regex_error(const std::string& s, regex_constants::error_type err = regex_constants::error_unknown, std::ptrdiff_t pos = 0) + : std::runtime_error(s) + , m_error_code(err) + , m_position(pos) + { + } + explicit regex_error(regex_constants::error_type err) + : std::runtime_error(::boost::BOOST_REGEX_DETAIL_NS::get_default_error_string(err)) + , m_error_code(err) + , m_position(0) + { + } + ~regex_error() BOOST_NOEXCEPT_OR_NOTHROW BOOST_OVERRIDE {} + regex_constants::error_type code()const + { return m_error_code; } + std::ptrdiff_t position()const + { return m_position; } + void raise()const + { +#ifndef BOOST_NO_EXCEPTIONS +#ifndef BOOST_REGEX_STANDALONE + ::boost::throw_exception(*this); +#else + throw* this; +#endif +#endif + } +private: + regex_constants::error_type m_error_code; + std::ptrdiff_t m_position; +}; + +typedef regex_error bad_pattern; +typedef regex_error bad_expression; + +namespace BOOST_REGEX_DETAIL_NS{ + +inline void BOOST_REGEX_CALL raise_runtime_error(const std::runtime_error& ex) +{ +#ifndef BOOST_REGEX_STANDALONE + ::boost::throw_exception(ex); +#else + throw ex; +#endif +} + +template +void raise_error(const traits& t, regex_constants::error_type code) +{ + (void)t; // warning suppression + std::runtime_error e(t.error_string(code)); + ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(e); +} + +} + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 05886b08..4bd7d114 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -45,7 +45,17 @@ namespace BOOST_REGEX_DETAIL_NS{ // // error checking API: // -BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type ef, match_flag_type mf); +inline void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type, match_flag_type mf) +{ + // + // can't mix match_extra with POSIX matching rules: + // + if ((mf & match_extra) && (mf & match_posix)) + { + std::logic_error msg("Usage Error: Can't mix regular expression captures with POSIX matching rules"); + throw_exception(msg); + } +} // // function can_start: // @@ -88,7 +98,7 @@ inline bool can_start(unsigned int c, const unsigned char* map, unsigned char ma // // Unfortunately Rogue Waves standard library appears to have a bug -// in std::basic_string::compare that results in eroneous answers +// in std::basic_string::compare that results in erroneous answers // in some cases (tested with Borland C++ 5.1, Rogue Wave lib version // 0x020101) the test case was: // {39135,0} < {0xff,0} @@ -375,7 +385,7 @@ class perl_matcher public: typedef typename traits::char_type char_type; typedef perl_matcher self_type; - typedef bool (self_type::*matcher_proc_type)(void); + typedef bool (self_type::*matcher_proc_type)(); typedef std::size_t traits_size_type; typedef typename is_byte::width_type width_type; typedef typename regex_iterator_traits::difference_type difference_type; @@ -633,4 +643,3 @@ private: #include #endif - diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index dd767968..7cf0574b 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -622,7 +622,7 @@ bool perl_matcher::match_backref() if(index >= hash_value_mask) { named_subexpressions::range_type r = re.get_data().equal_range(index); - BOOST_ASSERT(r.first != r.second); + BOOST_REGEX_ASSERT(r.first != r.second); do { index = r.first->index; diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index 4ce93790..9b770f15 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -20,7 +20,7 @@ #ifndef BOOST_REGEX_V4_PERL_MATCHER_NON_RECURSIVE_HPP #define BOOST_REGEX_V4_PERL_MATCHER_NON_RECURSIVE_HPP -#include +#include #ifdef BOOST_MSVC #pragma warning(push) @@ -111,7 +111,7 @@ struct save_state_init *end = reinterpret_cast(reinterpret_cast(*base)+BOOST_REGEX_BLOCKSIZE); --(*end); (void) new (*end)saved_state(0); - BOOST_ASSERT(*end > *base); + BOOST_REGEX_ASSERT(*end > *base); } ~save_state_init() { @@ -217,7 +217,7 @@ bool perl_matcher::match_all_states() bool successful_unwind = unwind(false); if((m_match_flags & match_partial) && (position == last) && (position != search_base)) m_has_partial_match = true; - if(false == successful_unwind) + if(!successful_unwind) return m_recursive_result; } } @@ -248,7 +248,7 @@ void perl_matcher::extend_stack() template inline void perl_matcher::push_matched_paren(int index, const sub_match& sub) { - //BOOST_ASSERT(index); + //BOOST_REGEX_ASSERT(index); saved_matched_paren* pmp = static_cast*>(m_backup_state); --pmp; if(pmp < m_stack_base) @@ -264,7 +264,7 @@ inline void perl_matcher::push_matched_paren(in template inline void perl_matcher::push_case_change(bool c) { - //BOOST_ASSERT(index); + //BOOST_REGEX_ASSERT(index); saved_change_case* pmp = static_cast(m_backup_state); --pmp; if(pmp < m_stack_base) @@ -348,7 +348,7 @@ inline void perl_matcher::push_repeater_count(i pmp = static_cast*>(m_backup_state); --pmp; } - (void) new (pmp)saved_repeater(i, s, position, this->recursion_stack.size() ? this->recursion_stack.back().idx : (INT_MIN + 3)); + (void) new (pmp)saved_repeater(i, s, position, this->recursion_stack.empty() ? (INT_MIN + 3) : this->recursion_stack.back().idx); m_backup_state = pmp; } @@ -492,7 +492,7 @@ bool perl_matcher::match_startmark() { // conditional expression: const re_alt* alt = static_cast(pstate->next.p); - BOOST_ASSERT(alt->type == syntax_element_alt); + BOOST_REGEX_ASSERT(alt->type == syntax_element_alt); pstate = alt->next.p; if(pstate->type == syntax_element_assert_backref) { @@ -503,7 +503,7 @@ bool perl_matcher::match_startmark() else { // zero width assertion, have to match this recursively: - BOOST_ASSERT(pstate->type == syntax_element_startmark); + BOOST_REGEX_ASSERT(pstate->type == syntax_element_startmark); bool negated = static_cast(pstate)->index == -2; BidiIterator saved_position = position; const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; @@ -543,7 +543,7 @@ bool perl_matcher::match_startmark() } default: { - BOOST_ASSERT(index > 0); + BOOST_REGEX_ASSERT(index > 0); if((m_match_flags & match_nosubs) == 0) { push_matched_paren(index, (*m_presult)[index]); @@ -705,7 +705,7 @@ bool perl_matcher::match_dot_repeat_slow() std::size_t count = 0; const re_repeat* rep = static_cast(pstate); re_syntax_base* psingle = rep->next.p; - // match compulsary repeats first: + // match compulsory repeats first: while(count < rep->min) { pstate = psingle; @@ -794,7 +794,7 @@ bool perl_matcher::match_char_repeat() #pragma option push -w-8008 -w-8066 -w-8004 #endif const re_repeat* rep = static_cast(pstate); - BOOST_ASSERT(1 == static_cast(rep->next.p)->length); + BOOST_REGEX_ASSERT(1 == static_cast(rep->next.p)->length); const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); std::size_t count = 0; // @@ -1010,7 +1010,7 @@ bool perl_matcher::match_long_set_repeat() template bool perl_matcher::match_recursion() { - BOOST_ASSERT(pstate->type == syntax_element_recurse); + BOOST_REGEX_ASSERT(pstate->type == syntax_element_recurse); // // See if we've seen this recursion before at this location, if we have then // we need to prevent infinite recursion: @@ -1087,7 +1087,7 @@ bool perl_matcher::match_match() { if(!recursion_stack.empty()) { - BOOST_ASSERT(0 == recursion_stack.back().idx); + BOOST_REGEX_ASSERT(0 == recursion_stack.back().idx); pstate = recursion_stack.back().preturn_address; push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, m_presult, &recursion_stack.back().results); *m_presult = recursion_stack.back().results; @@ -1224,7 +1224,7 @@ bool perl_matcher::skip_until_paren(int index, /**************************************************************************** -Unwind and associated proceedures follow, these perform what normal stack +Unwind and associated procedures follow, these perform what normal stack unwinding does in the recursive implementation. ****************************************************************************/ @@ -1296,7 +1296,7 @@ bool perl_matcher::unwind_paren(bool have_match { saved_matched_paren* pmp = static_cast*>(m_backup_state); // restore previous values if no match was found: - if(have_match == false) + if(!have_match) { m_presult->set_first(pmp->sub.first, pmp->index, pmp->index == 0); m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched, pmp->index == 0); @@ -1394,15 +1394,15 @@ bool perl_matcher::unwind_greedy_single_repeat( const re_repeat* rep = pmp->rep; std::size_t count = pmp->count; - BOOST_ASSERT(rep->next.p != 0); - BOOST_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); count -= rep->min; if((m_match_flags & match_partial) && (position == last)) m_has_partial_match = true; - BOOST_ASSERT(count); + BOOST_REGEX_ASSERT(count); position = pmp->last_position; // backtrack till we can skip out: @@ -1443,12 +1443,12 @@ bool perl_matcher::unwind_slow_dot_repeat(bool const re_repeat* rep = pmp->rep; std::size_t count = pmp->count; - BOOST_ASSERT(rep->type == syntax_element_dot_rep); - BOOST_ASSERT(rep->next.p != 0); - BOOST_ASSERT(rep->alt.p != 0); - BOOST_ASSERT(rep->next.p->type == syntax_element_wild); + BOOST_REGEX_ASSERT(rep->type == syntax_element_dot_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_wild); - BOOST_ASSERT(count < rep->max); + BOOST_REGEX_ASSERT(count < rep->max); pstate = rep->next.p; position = pmp->last_position; @@ -1508,7 +1508,7 @@ bool perl_matcher::unwind_fast_dot_repeat(bool const re_repeat* rep = pmp->rep; std::size_t count = pmp->count; - BOOST_ASSERT(count < rep->max); + BOOST_REGEX_ASSERT(count < rep->max); position = pmp->last_position; if(position != last) { @@ -1568,11 +1568,11 @@ bool perl_matcher::unwind_char_repeat(bool r) const char_type what = *reinterpret_cast(static_cast(pstate) + 1); position = pmp->last_position; - BOOST_ASSERT(rep->type == syntax_element_char_rep); - BOOST_ASSERT(rep->next.p != 0); - BOOST_ASSERT(rep->alt.p != 0); - BOOST_ASSERT(rep->next.p->type == syntax_element_literal); - BOOST_ASSERT(count < rep->max); + BOOST_REGEX_ASSERT(rep->type == syntax_element_char_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_literal); + BOOST_REGEX_ASSERT(count < rep->max); if(position != last) { @@ -1637,11 +1637,11 @@ bool perl_matcher::unwind_short_set_repeat(bool const unsigned char* map = static_cast(rep->next.p)->_map; position = pmp->last_position; - BOOST_ASSERT(rep->type == syntax_element_short_set_rep); - BOOST_ASSERT(rep->next.p != 0); - BOOST_ASSERT(rep->alt.p != 0); - BOOST_ASSERT(rep->next.p->type == syntax_element_set); - BOOST_ASSERT(count < rep->max); + BOOST_REGEX_ASSERT(rep->type == syntax_element_short_set_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_set); + BOOST_REGEX_ASSERT(count < rep->max); if(position != last) { @@ -1707,11 +1707,11 @@ bool perl_matcher::unwind_long_set_repeat(bool const re_set_long* set = static_cast*>(pstate); position = pmp->last_position; - BOOST_ASSERT(rep->type == syntax_element_long_set_rep); - BOOST_ASSERT(rep->next.p != 0); - BOOST_ASSERT(rep->alt.p != 0); - BOOST_ASSERT(rep->next.p->type == syntax_element_long_set); - BOOST_ASSERT(count < rep->max); + BOOST_REGEX_ASSERT(rep->type == syntax_element_long_set_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_long_set); + BOOST_REGEX_ASSERT(count < rep->max); if(position != last) { @@ -1944,5 +1944,3 @@ inline void perl_matcher::push_parenthesis_push #endif #endif - - diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index c33475f0..7d262fe2 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -195,7 +195,7 @@ bool perl_matcher::match_startmark() { // conditional expression: const re_alt* alt = static_cast(pstate->next.p); - BOOST_ASSERT(alt->type == syntax_element_alt); + BOOST_REGEX_ASSERT(alt->type == syntax_element_alt); pstate = alt->next.p; if(pstate->type == syntax_element_assert_backref) { @@ -206,7 +206,7 @@ bool perl_matcher::match_startmark() else { // zero width assertion, have to match this recursively: - BOOST_ASSERT(pstate->type == syntax_element_startmark); + BOOST_REGEX_ASSERT(pstate->type == syntax_element_startmark); bool negated = static_cast(pstate)->index == -2; BidiIterator saved_position = position; const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; @@ -235,7 +235,7 @@ bool perl_matcher::match_startmark() } default: { - BOOST_ASSERT(index > 0); + BOOST_REGEX_ASSERT(index > 0); if((m_match_flags & match_nosubs) == 0) { backup_subex sub(*m_presult, index); @@ -552,7 +552,7 @@ bool perl_matcher::match_char_repeat() #pragma option push -w-8008 -w-8066 -w-8004 #endif const re_repeat* rep = static_cast(pstate); - BOOST_ASSERT(1 == static_cast(rep->next.p)->length); + BOOST_REGEX_ASSERT(1 == static_cast(rep->next.p)->length); const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); // // start by working out how much we can skip: @@ -892,7 +892,7 @@ bool perl_matcher::backtrack_till_match(std::si template bool perl_matcher::match_recursion() { - BOOST_ASSERT(pstate->type == syntax_element_recurse); + BOOST_REGEX_ASSERT(pstate->type == syntax_element_recurse); // // Set new call stack: // @@ -987,7 +987,7 @@ bool perl_matcher::match_match() { if(!recursion_stack.empty()) { - BOOST_ASSERT(0 == recursion_stack.back().idx); + BOOST_REGEX_ASSERT(0 == recursion_stack.back().idx); const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address; *m_presult = recursion_stack.back().results; recursion_stack.pop_back(); diff --git a/include/boost/regex/v4/primary_transform.hpp b/include/boost/regex/v4/primary_transform.hpp index 3e7cac54..47d172aa 100644 --- a/include/boost/regex/v4/primary_transform.hpp +++ b/include/boost/regex/v4/primary_transform.hpp @@ -46,7 +46,7 @@ template unsigned count_chars(const S& s, charT c) { // - // Count how many occurances of character c occur + // Count how many occurrences of character c occur // in string s: if c is a delimeter between collation // fields, then this should be the same value for all // sort keys: diff --git a/include/boost/regex/v4/protected_call.hpp b/include/boost/regex/v4/protected_call.hpp index 9d47514f..451b2e34 100644 --- a/include/boost/regex/v4/protected_call.hpp +++ b/include/boost/regex/v4/protected_call.hpp @@ -20,6 +20,8 @@ #ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP #define BOOST_REGEX_V4_PROTECTED_CALL_HPP +#include + #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) @@ -53,7 +55,7 @@ public: concrete_protected_call(T* o, proc_type p) : obj(o), proc(p) {} private: - virtual bool call()const; + bool call()const BOOST_OVERRIDE; T* obj; proc_type proc; }; diff --git a/include/boost/regex/v4/regex.hpp b/include/boost/regex/v4/regex.hpp index 7cc260a3..e8ce7e5e 100644 --- a/include/boost/regex/v4/regex.hpp +++ b/include/boost/regex/v4/regex.hpp @@ -87,42 +87,6 @@ #ifndef BOOST_REGEX_MATCHER_HPP #include #endif -// -// template instances: -// -#define BOOST_REGEX_CHAR_T char -#ifdef BOOST_REGEX_NARROW_INSTANTIATE -# define BOOST_REGEX_INSTANTIATE -#endif -#include -#undef BOOST_REGEX_CHAR_T -#ifdef BOOST_REGEX_INSTANTIATE -# undef BOOST_REGEX_INSTANTIATE -#endif - -#ifndef BOOST_NO_WREGEX -#define BOOST_REGEX_CHAR_T wchar_t -#ifdef BOOST_REGEX_WIDE_INSTANTIATE -# define BOOST_REGEX_INSTANTIATE -#endif -#include -#undef BOOST_REGEX_CHAR_T -#ifdef BOOST_REGEX_INSTANTIATE -# undef BOOST_REGEX_INSTANTIATE -#endif -#endif - -#if !defined(BOOST_NO_WREGEX) && defined(BOOST_REGEX_HAS_OTHER_WCHAR_T) -#define BOOST_REGEX_CHAR_T unsigned short -#ifdef BOOST_REGEX_US_INSTANTIATE -# define BOOST_REGEX_INSTANTIATE -#endif -#include -#undef BOOST_REGEX_CHAR_T -#ifdef BOOST_REGEX_INSTANTIATE -# undef BOOST_REGEX_INSTANTIATE -#endif -#endif namespace boost{ diff --git a/include/boost/regex/v4/regex_format.hpp b/include/boost/regex/v4/regex_format.hpp index 33c4a5cb..9401fa30 100644 --- a/include/boost/regex/v4/regex_format.hpp +++ b/include/boost/regex/v4/regex_format.hpp @@ -251,7 +251,7 @@ void basic_regex_formatter::format m_have_conditional = have_conditional; if(m_position == m_end) return; - BOOST_ASSERT(*m_position == static_cast(')')); + BOOST_REGEX_ASSERT(*m_position == static_cast(')')); ++m_position; // skip the closing ')' break; } @@ -307,7 +307,7 @@ void basic_regex_formatter::format // On entry *m_position points to a '$' character // output the information that goes with it: // - BOOST_ASSERT(*m_position == '$'); + BOOST_REGEX_ASSERT(*m_position == '$'); // // see if this is a trailing '$': // @@ -656,7 +656,7 @@ void basic_regex_formatter::format len = ::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end); len = (std::min)(static_cast(4), len); v = this->toi(m_position, m_position + len, 8); - BOOST_ASSERT(v >= 0); + BOOST_REGEX_ASSERT(v >= 0); put(static_cast(v)); break; } diff --git a/include/boost/regex/v4/regex_raw_buffer.hpp b/include/boost/regex/v4/regex_raw_buffer.hpp index f1f4f50a..ac27e1a2 100644 --- a/include/boost/regex/v4/regex_raw_buffer.hpp +++ b/include/boost/regex/v4/regex_raw_buffer.hpp @@ -106,7 +106,7 @@ enum{ // this is used by basic_regex for expression storage // -class BOOST_REGEX_DECL raw_storage +class raw_storage { public: typedef std::size_t size_type; @@ -123,8 +123,30 @@ public: ::operator delete(start); } - void BOOST_REGEX_CALL resize(size_type n); - + void BOOST_REGEX_CALL resize(size_type n) + { + size_type newsize = start ? last - start : 1024; + while (newsize < n) + newsize *= 2; + size_type datasize = end - start; + // extend newsize to WORD/DWORD boundary: + newsize = (newsize + padding_mask) & ~(padding_mask); + + // allocate and copy data: + pointer ptr = static_cast(::operator new(newsize)); + BOOST_REGEX_NOEH_ASSERT(ptr) + if (start) + std::memcpy(ptr, start, datasize); + + // get rid of old buffer: + ::operator delete(start); + + // and set up pointers: + start = ptr; + end = ptr + datasize; + last = ptr + newsize; + } + void* BOOST_REGEX_CALL extend(size_type n) { if(size_type(last - end) < n) @@ -134,7 +156,16 @@ public: return result; } - void* BOOST_REGEX_CALL insert(size_type pos, size_type n); + void* BOOST_REGEX_CALL insert(size_type pos, size_type n) + { + BOOST_REGEX_ASSERT(pos <= size_type(end - start)); + if (size_type(last - end) < n) + resize(n + (end - start)); + void* result = start + pos; + std::memmove(start + pos + n, start + pos, (end - start) - pos); + end += n; + return result; + } size_type BOOST_REGEX_CALL size() { diff --git a/include/boost/regex/v4/regex_token_iterator.hpp b/include/boost/regex/v4/regex_token_iterator.hpp index ab15a257..fc54b3c9 100644 --- a/include/boost/regex/v4/regex_token_iterator.hpp +++ b/include/boost/regex/v4/regex_token_iterator.hpp @@ -66,6 +66,8 @@ class regex_token_iterator_implementation std::vector subs; // the sub-expressions to enumerate public: + regex_token_iterator_implementation(const regex_token_iterator_implementation& other) + : what(other.what), base(other.base), end(other.end), re(other.re), flags(other.flags), result(other.result), N(other.N), subs(other.subs) {} regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) : end(last), re(*p), flags(f), N(0){ subs.push_back(sub); } regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector& v, match_flag_type f) diff --git a/include/boost/regex/v4/regex_traits_defaults.hpp b/include/boost/regex/v4/regex_traits_defaults.hpp index d08047d9..1dbb0bf9 100644 --- a/include/boost/regex/v4/regex_traits_defaults.hpp +++ b/include/boost/regex/v4/regex_traits_defaults.hpp @@ -31,6 +31,11 @@ #endif #include +#include + +#include +#include +#include #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP #include @@ -38,6 +43,7 @@ #ifndef BOOST_REGEX_ERROR_TYPE_HPP #include #endif +#include #include #include @@ -62,14 +68,548 @@ inline bool is_extended(charT c) inline bool is_extended(char) { return false; } +inline const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n) +{ + // if the user hasn't supplied a message catalog, then this supplies + // default "messages" for us to load in the range 1-100. + const char* messages[] = { + "", + "(", + ")", + "$", + "^", + ".", + "*", + "+", + "?", + "[", + "]", + "|", + "\\", + "#", + "-", + "{", + "}", + "0123456789", + "b", + "B", + "<", + ">", + "", + "", + "A`", + "z'", + "\n", + ",", + "a", + "f", + "n", + "r", + "t", + "v", + "x", + "c", + ":", + "=", + "e", + "", + "", + "", + "", + "", + "", + "", + "", + "E", + "Q", + "X", + "C", + "Z", + "G", + "!", + "p", + "P", + "N", + "gk", + "K", + "R", + }; -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); -BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); -BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); + return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]); +} + +inline const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n) +{ + static const char* const s_default_error_messages[] = { + "Success", /* REG_NOERROR 0 error_ok */ + "No match", /* REG_NOMATCH 1 error_no_match */ + "Invalid regular expression.", /* REG_BADPAT 2 error_bad_pattern */ + "Invalid collation character.", /* REG_ECOLLATE 3 error_collate */ + "Invalid character class name, collating name, or character range.", /* REG_ECTYPE 4 error_ctype */ + "Invalid or unterminated escape sequence.", /* REG_EESCAPE 5 error_escape */ + "Invalid back reference: specified capturing group does not exist.", /* REG_ESUBREG 6 error_backref */ + "Unmatched [ or [^ in character class declaration.", /* REG_EBRACK 7 error_brack */ + "Unmatched marking parenthesis ( or \\(.", /* REG_EPAREN 8 error_paren */ + "Unmatched quantified repeat operator { or \\{.", /* REG_EBRACE 9 error_brace */ + "Invalid content of repeat range.", /* REG_BADBR 10 error_badbrace */ + "Invalid range end in character class", /* REG_ERANGE 11 error_range */ + "Out of memory.", /* REG_ESPACE 12 error_space NOT USED */ + "Invalid preceding regular expression prior to repetition operator.", /* REG_BADRPT 13 error_badrepeat */ + "Premature end of regular expression", /* REG_EEND 14 error_end NOT USED */ + "Regular expression is too large.", /* REG_ESIZE 15 error_size NOT USED */ + "Unmatched ) or \\)", /* REG_ERPAREN 16 error_right_paren NOT USED */ + "Empty regular expression.", /* REG_EMPTY 17 error_empty */ + "The complexity of matching the regular expression exceeded predefined bounds. " + "Try refactoring the regular expression to make each choice made by the state machine unambiguous. " + "This exception is thrown to prevent \"eternal\" matches that take an " + "indefinite period time to locate.", /* REG_ECOMPLEXITY 18 error_complexity */ + "Ran out of stack space trying to match the regular expression.", /* REG_ESTACK 19 error_stack */ + "Invalid or unterminated Perl (?...) sequence.", /* REG_E_PERL 20 error_perl */ + "Unknown error.", /* REG_E_UNKNOWN 21 error_unknown */ + }; + + return (n > ::boost::regex_constants::error_unknown) ? s_default_error_messages[::boost::regex_constants::error_unknown] : s_default_error_messages[n]; +} + +inline regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::syntax_type char_syntax[] = { + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_newline, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /* */ // 32 + regex_constants::syntax_not, /*!*/ + regex_constants::syntax_char, /*"*/ + regex_constants::syntax_hash, /*#*/ + regex_constants::syntax_dollar, /*$*/ + regex_constants::syntax_char, /*%*/ + regex_constants::syntax_char, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::syntax_star, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::syntax_comma, /*,*/ + regex_constants::syntax_dash, /*-*/ + regex_constants::syntax_dot, /*.*/ + regex_constants::syntax_char, /*/*/ + regex_constants::syntax_digit, /*0*/ + regex_constants::syntax_digit, /*1*/ + regex_constants::syntax_digit, /*2*/ + regex_constants::syntax_digit, /*3*/ + regex_constants::syntax_digit, /*4*/ + regex_constants::syntax_digit, /*5*/ + regex_constants::syntax_digit, /*6*/ + regex_constants::syntax_digit, /*7*/ + regex_constants::syntax_digit, /*8*/ + regex_constants::syntax_digit, /*9*/ + regex_constants::syntax_colon, /*:*/ + regex_constants::syntax_char, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::syntax_equal, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::syntax_char, /*@*/ + regex_constants::syntax_char, /*A*/ + regex_constants::syntax_char, /*B*/ + regex_constants::syntax_char, /*C*/ + regex_constants::syntax_char, /*D*/ + regex_constants::syntax_char, /*E*/ + regex_constants::syntax_char, /*F*/ + regex_constants::syntax_char, /*G*/ + regex_constants::syntax_char, /*H*/ + regex_constants::syntax_char, /*I*/ + regex_constants::syntax_char, /*J*/ + regex_constants::syntax_char, /*K*/ + regex_constants::syntax_char, /*L*/ + regex_constants::syntax_char, /*M*/ + regex_constants::syntax_char, /*N*/ + regex_constants::syntax_char, /*O*/ + regex_constants::syntax_char, /*P*/ + regex_constants::syntax_char, /*Q*/ + regex_constants::syntax_char, /*R*/ + regex_constants::syntax_char, /*S*/ + regex_constants::syntax_char, /*T*/ + regex_constants::syntax_char, /*U*/ + regex_constants::syntax_char, /*V*/ + regex_constants::syntax_char, /*W*/ + regex_constants::syntax_char, /*X*/ + regex_constants::syntax_char, /*Y*/ + regex_constants::syntax_char, /*Z*/ + regex_constants::syntax_open_set, /*[*/ + regex_constants::syntax_escape, /*\*/ + regex_constants::syntax_close_set, /*]*/ + regex_constants::syntax_caret, /*^*/ + regex_constants::syntax_char, /*_*/ + regex_constants::syntax_char, /*`*/ + regex_constants::syntax_char, /*a*/ + regex_constants::syntax_char, /*b*/ + regex_constants::syntax_char, /*c*/ + regex_constants::syntax_char, /*d*/ + regex_constants::syntax_char, /*e*/ + regex_constants::syntax_char, /*f*/ + regex_constants::syntax_char, /*g*/ + regex_constants::syntax_char, /*h*/ + regex_constants::syntax_char, /*i*/ + regex_constants::syntax_char, /*j*/ + regex_constants::syntax_char, /*k*/ + regex_constants::syntax_char, /*l*/ + regex_constants::syntax_char, /*m*/ + regex_constants::syntax_char, /*n*/ + regex_constants::syntax_char, /*o*/ + regex_constants::syntax_char, /*p*/ + regex_constants::syntax_char, /*q*/ + regex_constants::syntax_char, /*r*/ + regex_constants::syntax_char, /*s*/ + regex_constants::syntax_char, /*t*/ + regex_constants::syntax_char, /*u*/ + regex_constants::syntax_char, /*v*/ + regex_constants::syntax_char, /*w*/ + regex_constants::syntax_char, /*x*/ + regex_constants::syntax_char, /*y*/ + regex_constants::syntax_char, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::syntax_char, /*~*/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + }; + + return char_syntax[(unsigned char)c]; +} + +inline regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::escape_syntax_type char_syntax[] = { + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /* */ // 32 + regex_constants::escape_type_identity, /*!*/ + regex_constants::escape_type_identity, /*"*/ + regex_constants::escape_type_identity, /*#*/ + regex_constants::escape_type_identity, /*$*/ + regex_constants::escape_type_identity, /*%*/ + regex_constants::escape_type_identity, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::escape_type_identity, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::escape_type_identity, /*,*/ + regex_constants::escape_type_identity, /*-*/ + regex_constants::escape_type_identity, /*.*/ + regex_constants::escape_type_identity, /*/*/ + regex_constants::escape_type_decimal, /*0*/ + regex_constants::escape_type_backref, /*1*/ + regex_constants::escape_type_backref, /*2*/ + regex_constants::escape_type_backref, /*3*/ + regex_constants::escape_type_backref, /*4*/ + regex_constants::escape_type_backref, /*5*/ + regex_constants::escape_type_backref, /*6*/ + regex_constants::escape_type_backref, /*7*/ + regex_constants::escape_type_backref, /*8*/ + regex_constants::escape_type_backref, /*9*/ + regex_constants::escape_type_identity, /*:*/ + regex_constants::escape_type_identity, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::escape_type_identity, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::escape_type_identity, /*@*/ + regex_constants::escape_type_start_buffer, /*A*/ + regex_constants::escape_type_not_word_assert, /*B*/ + regex_constants::escape_type_C, /*C*/ + regex_constants::escape_type_not_class, /*D*/ + regex_constants::escape_type_E, /*E*/ + regex_constants::escape_type_not_class, /*F*/ + regex_constants::escape_type_G, /*G*/ + regex_constants::escape_type_not_class, /*H*/ + regex_constants::escape_type_not_class, /*I*/ + regex_constants::escape_type_not_class, /*J*/ + regex_constants::escape_type_reset_start_mark, /*K*/ + regex_constants::escape_type_not_class, /*L*/ + regex_constants::escape_type_not_class, /*M*/ + regex_constants::escape_type_named_char, /*N*/ + regex_constants::escape_type_not_class, /*O*/ + regex_constants::escape_type_not_property, /*P*/ + regex_constants::escape_type_Q, /*Q*/ + regex_constants::escape_type_line_ending, /*R*/ + regex_constants::escape_type_not_class, /*S*/ + regex_constants::escape_type_not_class, /*T*/ + regex_constants::escape_type_not_class, /*U*/ + regex_constants::escape_type_not_class, /*V*/ + regex_constants::escape_type_not_class, /*W*/ + regex_constants::escape_type_X, /*X*/ + regex_constants::escape_type_not_class, /*Y*/ + regex_constants::escape_type_Z, /*Z*/ + regex_constants::escape_type_identity, /*[*/ + regex_constants::escape_type_identity, /*\*/ + regex_constants::escape_type_identity, /*]*/ + regex_constants::escape_type_identity, /*^*/ + regex_constants::escape_type_identity, /*_*/ + regex_constants::escape_type_start_buffer, /*`*/ + regex_constants::escape_type_control_a, /*a*/ + regex_constants::escape_type_word_assert, /*b*/ + regex_constants::escape_type_ascii_control, /*c*/ + regex_constants::escape_type_class, /*d*/ + regex_constants::escape_type_e, /*e*/ + regex_constants::escape_type_control_f, /*f*/ + regex_constants::escape_type_extended_backref, /*g*/ + regex_constants::escape_type_class, /*h*/ + regex_constants::escape_type_class, /*i*/ + regex_constants::escape_type_class, /*j*/ + regex_constants::escape_type_extended_backref, /*k*/ + regex_constants::escape_type_class, /*l*/ + regex_constants::escape_type_class, /*m*/ + regex_constants::escape_type_control_n, /*n*/ + regex_constants::escape_type_class, /*o*/ + regex_constants::escape_type_property, /*p*/ + regex_constants::escape_type_class, /*q*/ + regex_constants::escape_type_control_r, /*r*/ + regex_constants::escape_type_class, /*s*/ + regex_constants::escape_type_control_t, /*t*/ + regex_constants::escape_type_class, /*u*/ + regex_constants::escape_type_control_v, /*v*/ + regex_constants::escape_type_class, /*w*/ + regex_constants::escape_type_hex, /*x*/ + regex_constants::escape_type_class, /*y*/ + regex_constants::escape_type_end_buffer, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::escape_type_identity, /*~*/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + }; + + return char_syntax[(unsigned char)c]; +} // is charT c a combining character? -BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); +inline bool BOOST_REGEX_CALL is_combining_implementation(boost::uint_least16_t c) +{ + const boost::uint_least16_t combining_ranges[] = { 0x0300, 0x0361, + 0x0483, 0x0486, + 0x0903, 0x0903, + 0x093E, 0x0940, + 0x0949, 0x094C, + 0x0982, 0x0983, + 0x09BE, 0x09C0, + 0x09C7, 0x09CC, + 0x09D7, 0x09D7, + 0x0A3E, 0x0A40, + 0x0A83, 0x0A83, + 0x0ABE, 0x0AC0, + 0x0AC9, 0x0ACC, + 0x0B02, 0x0B03, + 0x0B3E, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B4C, + 0x0B57, 0x0B57, + 0x0B83, 0x0B83, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0C01, 0x0C03, + 0x0C41, 0x0C44, + 0x0C82, 0x0C83, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0D02, 0x0D03, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D4C, + 0x0D57, 0x0D57, + 0x0F7F, 0x0F7F, + 0x20D0, 0x20E1, + 0x3099, 0x309A, + 0xFE20, 0xFE23, + 0xffff, 0xffff, }; + + const boost::uint_least16_t* p = combining_ranges + 1; + while (*p < c) p += 2; + --p; + if ((c >= *p) && (c <= *(p + 1))) + return true; + return false; +} template inline bool is_combining(charT c) @@ -138,7 +678,77 @@ inline bool is_separator(char c) // // get a default collating element: // -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); +inline std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name) +{ + // + // these are the POSIX collating names: + // + static const char* def_coll_names[] = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", + "vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", + "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", + "quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", + "left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", + "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", + "question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", + "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", + "right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", + "vertical-line", "right-curly-bracket", "tilde", "DEL", "", + }; + + // these multi-character collating elements + // should keep most Western-European locales + // happy - we should really localise these a + // little more - but this will have to do for + // now: + + static const char* def_multi_coll[] = { + "ae", + "Ae", + "AE", + "ch", + "Ch", + "CH", + "ll", + "Ll", + "LL", + "ss", + "Ss", + "SS", + "nj", + "Nj", + "NJ", + "dz", + "Dz", + "DZ", + "lj", + "Lj", + "LJ", + "", + }; + + unsigned int i = 0; + while (*def_coll_names[i]) + { + if (def_coll_names[i] == name) + { + return std::string(1, char(i)); + } + ++i; + } + i = 0; + while (*def_multi_coll[i]) + { + if (def_multi_coll[i] == name) + { + return def_multi_coll[i]; + } + ++i; + } + return std::string(); +} // // get the state_id of a character classification, the individual @@ -255,15 +865,25 @@ inline charT BOOST_REGEX_CALL global_upper(charT c) return c; } -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); +inline char BOOST_REGEX_CALL do_global_lower(char c) +{ + return static_cast((std::tolower)((unsigned char)c)); +} + +inline char BOOST_REGEX_CALL do_global_upper(char c) +{ + return static_cast((std::toupper)((unsigned char)c)); +} #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); -#endif -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); +inline wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c) +{ + return (std::towlower)(c); +} + +inline wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c) +{ + return (std::towupper)(c); +} #endif // // This sucks: declare template specialisations of global_lower/global_upper @@ -275,15 +895,11 @@ BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short // otherwise the "local template instantiation" compiler option can pick // the wrong instantiation when linking: // -template<> inline char BOOST_REGEX_CALL global_lower(char c){ return do_global_lower(c); } -template<> inline char BOOST_REGEX_CALL global_upper(char c){ return do_global_upper(c); } +template<> inline char BOOST_REGEX_CALL global_lower(char c) { return do_global_lower(c); } +template<> inline char BOOST_REGEX_CALL global_upper(char c) { return do_global_upper(c); } #ifndef BOOST_NO_WREGEX -template<> inline wchar_t BOOST_REGEX_CALL global_lower(wchar_t c){ return do_global_lower(c); } -template<> inline wchar_t BOOST_REGEX_CALL global_upper(wchar_t c){ return do_global_upper(c); } -#endif -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -template<> inline unsigned short BOOST_REGEX_CALL global_lower(unsigned short c){ return do_global_lower(c); } -template<> inline unsigned short BOOST_REGEX_CALL global_upper(unsigned short c){ return do_global_upper(c); } +template<> inline wchar_t BOOST_REGEX_CALL global_lower(wchar_t c) { return do_global_lower(c); } +template<> inline wchar_t BOOST_REGEX_CALL global_upper(wchar_t c) { return do_global_upper(c); } #endif template diff --git a/include/boost/regex/v4/states.hpp b/include/boost/regex/v4/states.hpp index b56d0134..86eb02b1 100644 --- a/include/boost/regex/v4/states.hpp +++ b/include/boost/regex/v4/states.hpp @@ -161,7 +161,7 @@ A marked parenthesis. struct re_brace : public re_syntax_base { // The index to match, can be zero (don't mark the sub-expression) - // or negative (for perl style (?...) extentions): + // or negative (for perl style (?...) extensions): int index; bool icase; }; diff --git a/include/boost/regex/v4/unicode_iterator.hpp b/include/boost/regex/v4/unicode_iterator.hpp new file mode 100644 index 00000000..25711fc7 --- /dev/null +++ b/include/boost/regex/v4/unicode_iterator.hpp @@ -0,0 +1,790 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE unicode_iterator.hpp + * VERSION see + * DESCRIPTION: Iterator adapters for converting between different Unicode encodings. + */ + +/**************************************************************************** + +Contents: +~~~~~~~~~ + +1) Read Only, Input Adapters: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template +class u32_to_u8_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8. + +template +class u8_to_u32_iterator; + +Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32. + +template +class u32_to_u16_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16. + +template +class u16_to_u32_iterator; + +Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32. + +2) Single pass output iterator adapters: + +template +class utf8_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-8 code points. + +template +class utf16_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-16 code points. + +****************************************************************************/ + +#ifndef BOOST_REGEX_V4_UNICODE_ITERATOR_HPP +#define BOOST_REGEX_V4_UNICODE_ITERATOR_HPP +#include +#include +#include +#include +#include +#include +#ifndef BOOST_NO_STD_LOCALE +#include +#include +#endif +#include // CHAR_BIT + +#ifdef BOOST_REGEX_CXX03 + +#else +#endif + +namespace boost{ + +namespace detail{ + +static const ::boost::uint16_t high_surrogate_base = 0xD7C0u; +static const ::boost::uint16_t low_surrogate_base = 0xDC00u; +static const ::boost::uint32_t ten_bit_mask = 0x3FFu; + +inline bool is_high_surrogate(::boost::uint16_t v) +{ + return (v & 0xFFFFFC00u) == 0xd800u; +} +inline bool is_low_surrogate(::boost::uint16_t v) +{ + return (v & 0xFFFFFC00u) == 0xdc00u; +} +template +inline bool is_surrogate(T v) +{ + return (v & 0xFFFFF800u) == 0xd800; +} + +inline unsigned utf8_byte_count(boost::uint8_t c) +{ + // if the most significant bit with a zero in it is in position + // 8-N then there are N bytes in this UTF-8 sequence: + boost::uint8_t mask = 0x80u; + unsigned result = 0; + while(c & mask) + { + ++result; + mask >>= 1; + } + return (result == 0) ? 1 : ((result > 4) ? 4 : result); +} + +inline unsigned utf8_trailing_byte_count(boost::uint8_t c) +{ + return utf8_byte_count(c) - 1; +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4100) +#endif +#ifndef BOOST_NO_EXCEPTIONS +BOOST_NORETURN +#endif +inline void invalid_utf32_code_point(::boost::uint32_t val) +{ +#ifndef BOOST_NO_STD_LOCALE + std::stringstream ss; + ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence"; + std::out_of_range e(ss.str()); +#else + std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence"); +#endif + boost::throw_exception(e); +} +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + + +} // namespace detail + +template +class u32_to_u16_iterator + : public boost::iterator_facade, U16Type, std::bidirectional_iterator_tag, const U16Type> +{ + typedef boost::iterator_facade, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type; + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) + typedef typename std::iterator_traits::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); + BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_current == 2) + extract_current(); + return m_values[m_current]; + } + bool equal(const u32_to_u16_iterator& that)const + { + if(m_position == that.m_position) + { + // Both m_currents must be equal, or both even + // this is the same as saying their sum must be even: + return (m_current + that.m_current) & 1u ? false : true; + } + return false; + } + void increment() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 2) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 2; + ++m_position; + } + } + void decrement() + { + if(m_current != 1) + { + // decrementing an iterator always leads to a valid position: + --m_position; + extract_current(); + m_current = m_values[1] ? 1 : 0; + } + else + { + m_current = 0; + } + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u16_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } + u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } +private: + + void extract_current()const + { + // begin by checking for a code point out of range: + ::boost::uint32_t v = *m_position; + if(v >= 0x10000u) + { + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(*m_position); + // split into two surrogates: + m_values[0] = static_cast(v >> 10) + detail::high_surrogate_base; + m_values[1] = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; + m_current = 0; + BOOST_REGEX_ASSERT(detail::is_high_surrogate(m_values[0])); + BOOST_REGEX_ASSERT(detail::is_low_surrogate(m_values[1])); + } + else + { + // 16-bit code point: + m_values[0] = static_cast(*m_position); + m_values[1] = 0; + m_current = 0; + // value must not be a surrogate: + if(detail::is_surrogate(m_values[0])) + detail::invalid_utf32_code_point(*m_position); + } + } + BaseIterator m_position; + mutable U16Type m_values[3]; + mutable unsigned m_current; +}; + +template +class u16_to_u32_iterator + : public boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> +{ + typedef boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; + // special values for pending iterator reads: + BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) + typedef typename std::iterator_traits::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16); + BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool equal(const u16_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + void increment() + { + // skip high surrogate first if there is one: + if(detail::is_high_surrogate(*m_position)) ++m_position; + ++m_position; + m_value = pending_read; + } + void decrement() + { + --m_position; + // if we have a low surrogate then go back one more: + if(detail::is_low_surrogate(*m_position)) + --m_position; + m_value = pending_read; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u16_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u16_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } + // + // Range checked version: + // + u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) + { + m_value = pending_read; + // + // The range must not start with a low surrogate, or end in a high surrogate, + // otherwise we run the risk of running outside the underlying input range. + // Likewise b must not be located at a low surrogate. + // + boost::uint16_t val; + if(start != end) + { + if((b != start) && (b != end)) + { + val = *b; + if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) + invalid_code_point(val); + } + val = *start; + if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) + invalid_code_point(val); + val = *--end; + if(detail::is_high_surrogate(val)) + invalid_code_point(val); + } + } +private: + static void invalid_code_point(::boost::uint16_t val) + { +#ifndef BOOST_NO_STD_LOCALE + std::stringstream ss; + ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence"; + std::out_of_range e(ss.str()); +#else + std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence"); +#endif + boost::throw_exception(e); + } + void extract_current()const + { + m_value = static_cast(static_cast< ::boost::uint16_t>(*m_position)); + // if the last value is a high surrogate then adjust m_position and m_value as needed: + if(detail::is_high_surrogate(*m_position)) + { + // precondition; next value must have be a low-surrogate: + BaseIterator next(m_position); + ::boost::uint16_t t = *++next; + if((t & 0xFC00u) != 0xDC00u) + invalid_code_point(t); + m_value = (m_value - detail::high_surrogate_base) << 10; + m_value |= (static_cast(static_cast< ::boost::uint16_t>(t)) & detail::ten_bit_mask); + } + // postcondition; result must not be a surrogate: + if(detail::is_surrogate(m_value)) + invalid_code_point(static_cast< ::boost::uint16_t>(m_value)); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template +class u32_to_u8_iterator + : public boost::iterator_facade, U8Type, std::bidirectional_iterator_tag, const U8Type> +{ + typedef boost::iterator_facade, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type; + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) + typedef typename std::iterator_traits::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); + BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_current == 4) + extract_current(); + return m_values[m_current]; + } + bool equal(const u32_to_u8_iterator& that)const + { + if(m_position == that.m_position) + { + // either the m_current's must be equal, or one must be 0 and + // the other 4: which means neither must have bits 1 or 2 set: + return (m_current == that.m_current) + || (((m_current | that.m_current) & 3) == 0); + } + return false; + } + void increment() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 4) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 4; + ++m_position; + } + } + void decrement() + { + if((m_current & 3) == 0) + { + --m_position; + extract_current(); + m_current = 3; + while(m_current && (m_values[m_current] == 0)) + --m_current; + } + else + --m_current; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u8_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } + u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } +private: + + void extract_current()const + { + boost::uint32_t c = *m_position; + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + m_values[0] = static_cast(c); + m_values[1] = static_cast(0u); + m_values[2] = static_cast(0u); + m_values[3] = static_cast(0u); + } + else if(c < 0x800u) + { + m_values[0] = static_cast(0xC0u + (c >> 6)); + m_values[1] = static_cast(0x80u + (c & 0x3Fu)); + m_values[2] = static_cast(0u); + m_values[3] = static_cast(0u); + } + else if(c < 0x10000u) + { + m_values[0] = static_cast(0xE0u + (c >> 12)); + m_values[1] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + m_values[2] = static_cast(0x80u + (c & 0x3Fu)); + m_values[3] = static_cast(0u); + } + else + { + m_values[0] = static_cast(0xF0u + (c >> 18)); + m_values[1] = static_cast(0x80u + ((c >> 12) & 0x3Fu)); + m_values[2] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + m_values[3] = static_cast(0x80u + (c & 0x3Fu)); + } + m_current= 0; + } + BaseIterator m_position; + mutable U8Type m_values[5]; + mutable unsigned m_current; +}; + +template +class u8_to_u32_iterator + : public boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> +{ + typedef boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; + // special values for pending iterator reads: + BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) + typedef typename std::iterator_traits::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8); + BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool equal(const u8_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + void increment() + { + // We must not start with a continuation character: + if((static_cast(*m_position) & 0xC0) == 0x80) + invalid_sequence(); + // skip high surrogate first if there is one: + unsigned c = detail::utf8_byte_count(*m_position); + if(m_value == pending_read) + { + // Since we haven't read in a value, we need to validate the code points: + for(unsigned i = 0; i < c; ++i) + { + ++m_position; + // We must have a continuation byte: + if((i != c - 1) && ((static_cast(*m_position) & 0xC0) != 0x80)) + invalid_sequence(); + } + } + else + { + std::advance(m_position, c); + } + m_value = pending_read; + } + void decrement() + { + // Keep backtracking until we don't have a trailing character: + unsigned count = 0; + while((*--m_position & 0xC0u) == 0x80u) ++count; + // now check that the sequence was valid: + if(count != detail::utf8_trailing_byte_count(*m_position)) + invalid_sequence(); + m_value = pending_read; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u8_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u8_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } + // + // Checked constructor: + // + u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) + { + m_value = pending_read; + // + // We must not start with a continuation character, or end with a + // truncated UTF-8 sequence otherwise we run the risk of going past + // the start/end of the underlying sequence: + // + if(start != end) + { + unsigned char v = *start; + if((v & 0xC0u) == 0x80u) + invalid_sequence(); + if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u)) + invalid_sequence(); + BaseIterator pos = end; + do + { + v = *--pos; + } + while((start != pos) && ((v & 0xC0u) == 0x80u)); + std::ptrdiff_t extra = detail::utf8_byte_count(v); + if(std::distance(pos, end) < extra) + invalid_sequence(); + } + } +private: + static void invalid_sequence() + { + std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character"); + boost::throw_exception(e); + } + void extract_current()const + { + m_value = static_cast(static_cast< ::boost::uint8_t>(*m_position)); + // we must not have a continuation character: + if((m_value & 0xC0u) == 0x80u) + invalid_sequence(); + // see how many extra bytes we have: + unsigned extra = detail::utf8_trailing_byte_count(*m_position); + // extract the extra bits, 6 from each extra byte: + BaseIterator next(m_position); + for(unsigned c = 0; c < extra; ++c) + { + ++next; + m_value <<= 6; + // We must have a continuation byte: + if((static_cast(*next) & 0xC0) != 0x80) + invalid_sequence(); + m_value += static_cast(*next) & 0x3Fu; + } + // we now need to remove a few of the leftmost bits, but how many depends + // upon how many extra bytes we've extracted: + static const boost::uint32_t masks[4] = + { + 0x7Fu, + 0x7FFu, + 0xFFFFu, + 0x1FFFFFu, + }; + m_value &= masks[extra]; + // check the result is in range: + if(m_value > static_cast(0x10FFFFu)) + invalid_sequence(); + // The result must not be a surrogate: + if((m_value >= static_cast(0xD800)) && (m_value <= static_cast(0xDFFF))) + invalid_sequence(); + // We should not have had an invalidly encoded UTF8 sequence: + if((extra > 0) && (m_value <= static_cast(masks[extra - 1]))) + invalid_sequence(); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template +class utf16_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef boost::uint32_t* pointer; + typedef boost::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf16_output_iterator(const BaseIterator& b) + : m_position(b){} + utf16_output_iterator(const utf16_output_iterator& that) + : m_position(that.m_position){} + utf16_output_iterator& operator=(const utf16_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf16_output_iterator& operator*()const + { + return *this; + } + void operator=(boost::uint32_t val)const + { + push(val); + } + utf16_output_iterator& operator++() + { + return *this; + } + utf16_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(boost::uint32_t v)const + { + if(v >= 0x10000u) + { + // begin by checking for a code point out of range: + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(v); + // split into two surrogates: + *m_position++ = static_cast(v >> 10) + detail::high_surrogate_base; + *m_position++ = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; + } + else + { + // 16-bit code point: + // value must not be a surrogate: + if(detail::is_surrogate(v)) + detail::invalid_utf32_code_point(v); + *m_position++ = static_cast(v); + } + } + mutable BaseIterator m_position; +}; + +template +class utf8_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef boost::uint32_t* pointer; + typedef boost::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf8_output_iterator(const BaseIterator& b) + : m_position(b){} + utf8_output_iterator(const utf8_output_iterator& that) + : m_position(that.m_position){} + utf8_output_iterator& operator=(const utf8_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf8_output_iterator& operator*()const + { + return *this; + } + void operator=(boost::uint32_t val)const + { + push(val); + } + utf8_output_iterator& operator++() + { + return *this; + } + utf8_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(boost::uint32_t c)const + { + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + *m_position++ = static_cast(c); + } + else if(c < 0x800u) + { + *m_position++ = static_cast(0xC0u + (c >> 6)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + else if(c < 0x10000u) + { + *m_position++ = static_cast(0xE0u + (c >> 12)); + *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + else + { + *m_position++ = static_cast(0xF0u + (c >> 18)); + *m_position++ = static_cast(0x80u + ((c >> 12) & 0x3Fu)); + *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + } + mutable BaseIterator m_position; +}; + +} // namespace boost + +#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP + diff --git a/include/boost/regex/v4/w32_regex_traits.hpp b/include/boost/regex/v4/w32_regex_traits.hpp index 1f38bf36..e1ac5d5f 100644 --- a/include/boost/regex/v4/w32_regex_traits.hpp +++ b/include/boost/regex/v4/w32_regex_traits.hpp @@ -34,9 +34,18 @@ #include #endif #ifndef BOOST_REGEX_OBJECT_CACHE_HPP -#include +#include #endif +#define VC_EXTRALEAN +#define WIN32_LEAN_AND_MEAN +#include + +#if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE) +#pragma comment(lib, "user32.lib") +#endif + + #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) @@ -75,53 +84,35 @@ typedef ::boost::shared_ptr cat_type; // placeholder for dll HANDLE. // // then add wrappers around the actual Win32 API's (ie implementation hiding): // -BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale(); -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char, lcid_type); +lcid_type BOOST_REGEX_CALL w32_get_default_locale(); +bool BOOST_REGEX_CALL w32_is_lower(char, lcid_type); #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t, lcid_type); -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type state_id); +bool BOOST_REGEX_CALL w32_is_lower(wchar_t, lcid_type); #endif -#endif -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char, lcid_type); +bool BOOST_REGEX_CALL w32_is_upper(char, lcid_type); #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t, lcid_type); -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type state_id); +bool BOOST_REGEX_CALL w32_is_upper(wchar_t, lcid_type); #endif -#endif -BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name); -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::string& def); +cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name); +std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::string& def); #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::wstring& def); -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string& def); +std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::wstring& def); #endif -#endif -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type state_id, const char* p1, const char* p2); +std::string BOOST_REGEX_CALL w32_transform(lcid_type state_id, const char* p1, const char* p2); #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type state_id, const wchar_t* p1, const wchar_t* p2); -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_transform(lcid_type state_id, const unsigned short* p1, const unsigned short* p2); +std::wstring BOOST_REGEX_CALL w32_transform(lcid_type state_id, const wchar_t* p1, const wchar_t* p2); #endif -#endif -BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type); +char BOOST_REGEX_CALL w32_tolower(char c, lcid_type); #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type); -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type state_id); +wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type); #endif -#endif -BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type); +char BOOST_REGEX_CALL w32_toupper(char c, lcid_type); #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type); +wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type); #endif -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type, boost::uint32_t mask, char c); +bool BOOST_REGEX_CALL w32_is(lcid_type, boost::uint32_t mask, char c); #ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type, boost::uint32_t mask, wchar_t c); -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type state_id, boost::uint32_t m, unsigned short c); -#endif +bool BOOST_REGEX_CALL w32_is(lcid_type, boost::uint32_t mask, wchar_t c); #endif // // class w32_regex_traits_base: @@ -253,14 +244,14 @@ typename w32_regex_traits_char_layer::string_type // specialised version for narrow characters: // template <> -class BOOST_REGEX_DECL w32_regex_traits_char_layer : public w32_regex_traits_base +class w32_regex_traits_char_layer : public w32_regex_traits_base { typedef std::string string_type; public: w32_regex_traits_char_layer(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) : w32_regex_traits_base(l) { - init(); + init(); } regex_constants::syntax_type syntax_type(char c)const @@ -284,6 +275,7 @@ private: regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; char m_lower_map[1u << CHAR_BIT]; boost::uint16_t m_type_map[1u << CHAR_BIT]; + template void init(); }; @@ -722,6 +714,498 @@ static_mutex& w32_regex_traits::get_mutex_inst() } #endif +namespace BOOST_REGEX_DETAIL_NS { + +#ifdef BOOST_NO_ANSI_APIS + inline UINT get_code_page_for_locale_id(lcid_type idx) + { + WCHAR code_page_string[7]; + if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0) + return 0; + + return static_cast(_wtol(code_page_string)); +} +#endif + + template + inline void w32_regex_traits_char_layer::init() + { + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + std::memset(m_char_map, 0, sizeof(m_char_map)); + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if (cat_name.size()) + { + cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); + if (!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if (cat) + { + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i)); + for (string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[static_cast(mss[j])] = i; + } + } + } + else + { + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while (ptr && *ptr) + { + m_char_map[static_cast(*ptr)] = i; + ++ptr; + } + } + } + // + // finish off by calculating our escape types: + // + unsigned char i = 'A'; + do + { + if (m_char_map[i] == 0) + { + if (::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0002u, (char)i)) + m_char_map[i] = regex_constants::escape_type_class; + else if (::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0001u, (char)i)) + m_char_map[i] = regex_constants::escape_type_not_class; + } + } while (0xFF != i++); + + // + // fill in lower case map: + // + char char_map[1 << CHAR_BIT]; + for (int ii = 0; ii < (1 << CHAR_BIT); ++ii) + char_map[ii] = static_cast(ii); +#ifndef BOOST_NO_ANSI_APIS + int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(r != 0); +#else + UINT code_page = get_code_page_for_locale_id(this->m_locale); + BOOST_REGEX_ASSERT(code_page != 0); + + WCHAR wide_char_map[1 << CHAR_BIT]; + int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(conv_r != 0); + + WCHAR wide_lower_map[1 << CHAR_BIT]; + int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(r != 0); + + conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL); + BOOST_REGEX_ASSERT(conv_r != 0); +#endif + if (r < (1 << CHAR_BIT)) + { + // if we have multibyte characters then not all may have been given + // a lower case mapping: + for (int jj = r; jj < (1 << CHAR_BIT); ++jj) + this->m_lower_map[jj] = static_cast(jj); + } + +#ifndef BOOST_NO_ANSI_APIS + r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map); +#else + r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map); +#endif + BOOST_REGEX_ASSERT(0 != r); + } + + inline lcid_type BOOST_REGEX_CALL w32_get_default_locale() + { + return ::GetUserDefaultLCID(); + } + + inline bool BOOST_REGEX_CALL w32_is_lower(char c, lcid_type idx) + { +#ifndef BOOST_NO_ANSI_APIS + WORD mask; + if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; +#endif + } + + inline bool BOOST_REGEX_CALL w32_is_lower(wchar_t c, lcid_type idx) + { + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; + } + + inline bool BOOST_REGEX_CALL w32_is_upper(char c, lcid_type idx) + { +#ifndef BOOST_NO_ANSI_APIS + WORD mask; + if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; +#endif + } + + inline bool BOOST_REGEX_CALL w32_is_upper(wchar_t c, lcid_type idx) + { + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; + } + + inline void free_module(void* mod) + { + ::FreeLibrary(static_cast(mod)); + } + + inline cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name) + { +#ifndef BOOST_NO_ANSI_APIS + cat_type result(::LoadLibraryA(name.c_str()), &free_module); + return result; +#else + LPWSTR wide_name = (LPWSTR)_alloca((name.size() + 1) * sizeof(WCHAR)); + if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0) + return cat_type(); + + cat_type result(::LoadLibraryW(wide_name), &free_module); + return result; +#endif + } + + inline std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def) + { +#ifndef BOOST_NO_ANSI_APIS + char buf[256]; + if (0 == ::LoadStringA( + static_cast(cat.get()), + i, + buf, + 256 + )) + { + return def; + } +#else + WCHAR wbuf[256]; + int r = ::LoadStringW( + static_cast(cat.get()), + i, + wbuf, + 256 + ); + if (r == 0) + return def; + + + int buf_size = 1 + ::WideCharToMultiByte(CP_ACP, 0, wbuf, r, NULL, 0, NULL, NULL); + LPSTR buf = (LPSTR)_alloca(buf_size); + if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, buf_size, NULL, NULL) == 0) + return def; // failed conversion. +#endif + return std::string(buf); + } + +#ifndef BOOST_NO_WREGEX + inline std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def) + { + wchar_t buf[256]; + if (0 == ::LoadStringW( + static_cast(cat.get()), + i, + buf, + 256 + )) + { + return def; + } + return std::wstring(buf); + } +#endif + inline std::string BOOST_REGEX_CALL w32_transform(lcid_type idx, const char* p1, const char* p2) + { +#ifndef BOOST_NO_ANSI_APIS + int bytes = ::LCMapStringA( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::string(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringA( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + &*result.begin(), // destination buffer + bytes // size of destination buffer + ); +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return std::string(p1, p2); + + int src_len = static_cast(p2 - p1); + LPWSTR wide_p1 = (LPWSTR)_alloca((src_len + 1) * 2); + if (::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0) + return std::string(p1, p2); + + int bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + wide_p1, // source string + src_len, // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::string(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + wide_p1, // source string + src_len, // number of characters in source string + (LPWSTR) & *result.begin(), // destination buffer + bytes // size of destination buffer + ); +#endif + if (bytes > static_cast(result.size())) + return std::string(p1, p2); + while (result.size() && result[result.size() - 1] == '\0') + { + result.erase(result.size() - 1); + } + return result; + } + +#ifndef BOOST_NO_WREGEX + inline std::wstring BOOST_REGEX_CALL w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2) + { + int bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::wstring(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + reinterpret_cast(&*result.begin()), // destination buffer *of bytes* + bytes // size of destination buffer + ); + if (bytes > static_cast(result.size())) + return std::wstring(p1, p2); + while (result.size() && result[result.size() - 1] == L'\0') + { + result.erase(result.size() - 1); + } + std::wstring r2; + for (std::string::size_type i = 0; i < result.size(); ++i) + r2.append(1, static_cast(static_cast(result[i]))); + return r2; + } +#endif + inline char BOOST_REGEX_CALL w32_tolower(char c, lcid_type idx) + { + char result[2]; +#ifndef BOOST_NO_ANSI_APIS + int b = ::LCMapStringA( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return c; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return c; + + WCHAR wide_result; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &wide_c, // source string + 1, // number of characters in source string + &wide_result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + + if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) + return c; // No single byte lower case equivalent available +#endif + return result[0]; + } + +#ifndef BOOST_NO_WREGEX + inline wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type idx) + { + wchar_t result[2]; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + return result[0]; + } +#endif + inline char BOOST_REGEX_CALL w32_toupper(char c, lcid_type idx) + { + char result[2]; +#ifndef BOOST_NO_ANSI_APIS + int b = ::LCMapStringA( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return c; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return c; + + WCHAR wide_result; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &wide_c, // source string + 1, // number of characters in source string + &wide_result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + + if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) + return c; // No single byte upper case equivalent available. +#endif + return result[0]; + } + +#ifndef BOOST_NO_WREGEX + inline wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type idx) + { + wchar_t result[2]; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + return result[0]; + } +#endif + inline bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, char c) + { + WORD mask; +#ifndef BOOST_NO_ANSI_APIS + if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; +#endif + if ((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + return false; + } + +#ifndef BOOST_NO_WREGEX + inline bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, wchar_t c) + { + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; + if ((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + if ((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) + return true; + return false; + } +#endif + +} // BOOST_REGEX_DETAIL_NS + } // boost diff --git a/include/boost/regex/v5/basic_regex.hpp b/include/boost/regex/v5/basic_regex.hpp new file mode 100644 index 00000000..5c73775f --- /dev/null +++ b/include/boost/regex/v5/basic_regex.hpp @@ -0,0 +1,734 @@ +/* + * + * Copyright (c) 1998-2004 John Maddock + * Copyright 2011 Garmin Ltd. or its subsidiaries + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org/ for most recent version. + * FILE basic_regex.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex. + */ + +#ifndef BOOST_REGEX_V5_BASIC_REGEX_HPP +#define BOOST_REGEX_V5_BASIC_REGEX_HPP + +#include + +namespace boost{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable : 4251) +#if BOOST_REGEX_MSVC < 1700 +# pragma warning(disable : 4231) +#endif +#if BOOST_REGEX_MSVC < 1600 +#pragma warning(disable : 4660) +#endif +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// forward declaration, we will need this one later: +// +template +class basic_regex_parser; + +template +void bubble_down_one(I first, I last) +{ + if(first != last) + { + I next = last - 1; + while((next != first) && (*next < *(next-1))) + { + (next-1)->swap(*next); + --next; + } + } +} + +static const int hash_value_mask = 1 << (std::numeric_limits::digits - 1); + +template +inline int hash_value_from_capture_name(Iterator i, Iterator j) +{ + std::size_t r = 0; + while (i != j) + { + r ^= *i + 0x9e3779b9 + (r << 6) + (r >> 2); + ++i; + } + r %= ((std::numeric_limits::max)()); + return static_cast(r) | hash_value_mask; +} + +class named_subexpressions +{ +public: + struct name + { + template + name(const charT* i, const charT* j, int idx) + : index(idx) + { + hash = hash_value_from_capture_name(i, j); + } + name(int h, int idx) + : index(idx), hash(h) + { + } + int index; + int hash; + bool operator < (const name& other)const + { + return hash < other.hash; + } + bool operator == (const name& other)const + { + return hash == other.hash; + } + void swap(name& other) + { + std::swap(index, other.index); + std::swap(hash, other.hash); + } + }; + + typedef std::vector::const_iterator const_iterator; + typedef std::pair range_type; + + named_subexpressions(){} + + template + void set_name(const charT* i, const charT* j, int index) + { + m_sub_names.push_back(name(i, j, index)); + bubble_down_one(m_sub_names.begin(), m_sub_names.end()); + } + template + int get_id(const charT* i, const charT* j)const + { + name t(i, j, 0); + typename std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } + template + range_type equal_range(const charT* i, const charT* j)const + { + name t(i, j, 0); + return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t); + } + int get_id(int h)const + { + name t(h, 0); + std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } + range_type equal_range(int h)const + { + name t(h, 0); + return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t); + } +private: + std::vector m_sub_names; +}; + +// +// class regex_data: +// represents the data we wish to expose to the matching algorithms. +// +template +struct regex_data : public named_subexpressions +{ + typedef regex_constants::syntax_option_type flag_type; + typedef std::size_t size_type; + + regex_data(const ::std::shared_ptr< + ::boost::regex_traits_wrapper >& t) + : m_ptraits(t), m_flags(0), m_status(0), m_expression(0), m_expression_len(0), + m_mark_count(0), m_first_state(0), m_restart_type(0), + m_startmap{ 0 }, + m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {} + regex_data() + : m_ptraits(new ::boost::regex_traits_wrapper()), m_flags(0), m_status(0), m_expression(0), m_expression_len(0), + m_mark_count(0), m_first_state(0), m_restart_type(0), + m_startmap{ 0 }, + m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {} + + ::std::shared_ptr< + ::boost::regex_traits_wrapper + > m_ptraits; // traits class instance + flag_type m_flags; // flags with which we were compiled + int m_status; // error code (0 implies OK). + const charT* m_expression; // the original expression + std::ptrdiff_t m_expression_len; // the length of the original expression + size_type m_mark_count; // the number of marked sub-expressions + BOOST_REGEX_DETAIL_NS::re_syntax_base* m_first_state; // the first state of the machine + unsigned m_restart_type; // search optimisation type + unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match + unsigned int m_can_be_null; // whether we can match a null string + BOOST_REGEX_DETAIL_NS::raw_storage m_data; // the buffer in which our states are constructed + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + std::vector< + std::pair< + std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*. + bool m_has_recursions; // whether we have recursive expressions; + bool m_disable_match_any; // when set we need to disable the match_any flag as it causes different/buggy behaviour. +}; +// +// class basic_regex_implementation +// pimpl implementation class for basic_regex. +// +template +class basic_regex_implementation + : public regex_data +{ +public: + typedef regex_constants::syntax_option_type flag_type; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef typename traits::locale_type locale_type; + typedef const charT* const_iterator; + + basic_regex_implementation(){} + basic_regex_implementation(const ::std::shared_ptr< + ::boost::regex_traits_wrapper >& t) + : regex_data(t) {} + void assign(const charT* arg_first, + const charT* arg_last, + flag_type f) + { + regex_data* pdat = this; + basic_regex_parser parser(pdat); + parser.parse(arg_first, arg_last, f); + } + + locale_type imbue(locale_type l) + { + return this->m_ptraits->imbue(l); + } + locale_type getloc()const + { + return this->m_ptraits->getloc(); + } + std::basic_string str()const + { + std::basic_string result; + if(this->m_status == 0) + result = std::basic_string(this->m_expression, this->m_expression_len); + return result; + } + const_iterator expression()const + { + return this->m_expression; + } + std::pair subexpression(std::size_t n)const + { + const std::pair& pi = this->m_subs.at(n); + std::pair p(expression() + pi.first, expression() + pi.second); + return p; + } + // + // begin, end: + const_iterator begin()const + { + return (this->m_status ? 0 : this->m_expression); + } + const_iterator end()const + { + return (this->m_status ? 0 : this->m_expression + this->m_expression_len); + } + flag_type flags()const + { + return this->m_flags; + } + size_type size()const + { + return this->m_expression_len; + } + int status()const + { + return this->m_status; + } + size_type mark_count()const + { + return this->m_mark_count - 1; + } + const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const + { + return this->m_first_state; + } + unsigned get_restart_type()const + { + return this->m_restart_type; + } + const unsigned char* get_map()const + { + return this->m_startmap; + } + const ::boost::regex_traits_wrapper& get_traits()const + { + return *(this->m_ptraits); + } + bool can_be_null()const + { + return this->m_can_be_null; + } + const regex_data& get_data()const + { + basic_regex_implementation const* p = this; + return *static_cast*>(p); + } +}; + +} // namespace BOOST_REGEX_DETAIL_NS +// +// class basic_regex: +// represents the compiled +// regular expression: +// + +#ifdef BOOST_REGEX_NO_FWD +template > +#else +template +#endif +class basic_regex : public regbase +{ +public: + // typedefs: + typedef std::size_t traits_size_type; + typedef typename traits::string_type traits_string_type; + typedef charT char_type; + typedef traits traits_type; + + typedef charT value_type; + typedef charT& reference; + typedef const charT& const_reference; + typedef const charT* const_iterator; + typedef const_iterator iterator; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef regex_constants::syntax_option_type flag_type; + // locale_type + // placeholder for actual locale type used by the + // traits class to localise *this. + typedef typename traits::locale_type locale_type; + +public: + explicit basic_regex(){} + explicit basic_regex(const charT* p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + { + assign(p1, p2, f); + } + basic_regex(const charT* p, size_type len, flag_type f) + { + assign(p, len, f); + } + basic_regex(const basic_regex& that) + : m_pimpl(that.m_pimpl) {} + ~basic_regex(){} + basic_regex& operator=(const basic_regex& that) + { + return assign(that); + } + basic_regex& operator=(const charT* ptr) + { + return assign(ptr); + } + + // + // assign: + basic_regex& assign(const basic_regex& that) + { + m_pimpl = that.m_pimpl; + return *this; + } + basic_regex& assign(const charT* p, flag_type f = regex_constants::normal) + { + return assign(p, p + traits::length(p), f); + } + basic_regex& assign(const charT* p, size_type len, flag_type f) + { + return assign(p, p + len, f); + } +private: + basic_regex& do_assign(const charT* p1, + const charT* p2, + flag_type f); +public: + basic_regex& assign(const charT* p1, + const charT* p2, + flag_type f = regex_constants::normal) + { + return do_assign(p1, p2, f); + } + + template + unsigned int set_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + { + return set_expression(p.data(), p.data() + p.size(), f); + } + + template + explicit basic_regex(const std::basic_string& p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + + template + basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) + { + typedef typename traits::string_type seq_type; + seq_type a(arg_first, arg_last); + if(!a.empty()) + assign(static_cast(&*a.begin()), static_cast(&*a.begin() + a.size()), f); + else + assign(static_cast(0), static_cast(0), f); + } + + template + basic_regex& operator=(const std::basic_string& p) + { + return assign(p.data(), p.data() + p.size(), regex_constants::normal); + } + + template + basic_regex& assign( + const std::basic_string& s, + flag_type f = regex_constants::normal) + { + return assign(s.data(), s.data() + s.size(), f); + } + + template + basic_regex& assign(InputIterator arg_first, + InputIterator arg_last, + flag_type f = regex_constants::normal) + { + typedef typename traits::string_type seq_type; + seq_type a(arg_first, arg_last); + if(a.size()) + { + const charT* p1 = &*a.begin(); + const charT* p2 = &*a.begin() + a.size(); + return assign(p1, p2, f); + } + return assign(static_cast(0), static_cast(0), f); + } + + // + // locale: + locale_type imbue(locale_type l); + locale_type getloc()const + { + return m_pimpl.get() ? m_pimpl->getloc() : locale_type(); + } + // + // getflags: + // retained for backwards compatibility only, "flags" + // is now the preferred name: + flag_type getflags()const + { + return flags(); + } + flag_type flags()const + { + return m_pimpl.get() ? m_pimpl->flags() : 0; + } + // + // str: + std::basic_string str()const + { + return m_pimpl.get() ? m_pimpl->str() : std::basic_string(); + } + // + // begin, end, subexpression: + std::pair subexpression(std::size_t n)const + { +#ifdef BOOST_REGEX_STANDALONE + if (!m_pimpl.get()) + throw std::logic_error("Can't access subexpressions in an invalid regex."); +#else + if(!m_pimpl.get()) + boost::throw_exception(std::logic_error("Can't access subexpressions in an invalid regex.")); +#endif + return m_pimpl->subexpression(n); + } + const_iterator begin()const + { + return (m_pimpl.get() ? m_pimpl->begin() : 0); + } + const_iterator end()const + { + return (m_pimpl.get() ? m_pimpl->end() : 0); + } + // + // swap: + void swap(basic_regex& that)throw() + { + m_pimpl.swap(that.m_pimpl); + } + // + // size: + size_type size()const + { + return (m_pimpl.get() ? m_pimpl->size() : 0); + } + // + // max_size: + size_type max_size()const + { + return UINT_MAX; + } + // + // empty: + bool empty()const + { + return (m_pimpl.get() ? 0 != m_pimpl->status() : true); + } + + size_type mark_count()const + { + return (m_pimpl.get() ? m_pimpl->mark_count() : 0); + } + + int status()const + { + return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty); + } + + int compare(const basic_regex& that) const + { + if(m_pimpl.get() == that.m_pimpl.get()) + return 0; + if(!m_pimpl.get()) + return -1; + if(!that.m_pimpl.get()) + return 1; + if(status() != that.status()) + return status() - that.status(); + if(flags() != that.flags()) + return flags() - that.flags(); + return str().compare(that.str()); + } + bool operator==(const basic_regex& e)const + { + return compare(e) == 0; + } + bool operator != (const basic_regex& e)const + { + return compare(e) != 0; + } + bool operator<(const basic_regex& e)const + { + return compare(e) < 0; + } + bool operator>(const basic_regex& e)const + { + return compare(e) > 0; + } + bool operator<=(const basic_regex& e)const + { + return compare(e) <= 0; + } + bool operator>=(const basic_regex& e)const + { + return compare(e) >= 0; + } + + // + // The following are deprecated as public interfaces + // but are available for compatibility with earlier versions. + const charT* expression()const + { + return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0); + } + unsigned int set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + { + assign(p1, p2, f | regex_constants::no_except); + return status(); + } + unsigned int set_expression(const charT* p, flag_type f = regex_constants::normal) + { + assign(p, f | regex_constants::no_except); + return status(); + } + unsigned int error_code()const + { + return status(); + } + // + // private access methods: + // + const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_first_state(); + } + unsigned get_restart_type()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_restart_type(); + } + const unsigned char* get_map()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_map(); + } + const ::boost::regex_traits_wrapper& get_traits()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_traits(); + } + bool can_be_null()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->can_be_null(); + } + const BOOST_REGEX_DETAIL_NS::regex_data& get_data()const + { + BOOST_REGEX_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_data(); + } + std::shared_ptr get_named_subs()const + { + return m_pimpl; + } + +private: + std::shared_ptr > m_pimpl; +}; + +// +// out of line members; +// these are the only members that mutate the basic_regex object, +// and are designed to provide the strong exception guarantee +// (in the event of a throw, the state of the object remains unchanged). +// +template +basic_regex& basic_regex::do_assign(const charT* p1, + const charT* p2, + flag_type f) +{ + std::shared_ptr > temp; + if(!m_pimpl.get()) + { + temp = std::shared_ptr >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation()); + } + else + { + temp = std::shared_ptr >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation(m_pimpl->m_ptraits)); + } + temp->assign(p1, p2, f); + temp.swap(m_pimpl); + return *this; +} + +template +typename basic_regex::locale_type basic_regex::imbue(locale_type l) +{ + std::shared_ptr > temp(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation()); + locale_type result = temp->imbue(l); + temp.swap(m_pimpl); + return result; +} + +// +// non-members: +// +template +void swap(basic_regex& e1, basic_regex& e2) +{ + e1.swap(e2); +} + +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const basic_regex& e) +{ + return (os << e.str()); +} + +// +// class reg_expression: +// this is provided for backwards compatibility only, +// it is deprecated, no not use! +// +#ifdef BOOST_REGEX_NO_FWD +template > +#else +template +#endif +class reg_expression : public basic_regex +{ +public: + typedef typename basic_regex::flag_type flag_type; + typedef typename basic_regex::size_type size_type; + explicit reg_expression(){} + explicit reg_expression(const charT* p, flag_type f = regex_constants::normal) + : basic_regex(p, f){} + reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + : basic_regex(p1, p2, f){} + reg_expression(const charT* p, size_type len, flag_type f) + : basic_regex(p, len, f){} + reg_expression(const reg_expression& that) + : basic_regex(that) {} + ~reg_expression(){} + reg_expression& operator=(const reg_expression& that) + { + return this->assign(that); + } + + template + explicit reg_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + : basic_regex(p, f) + { + } + + template + reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) + : basic_regex(arg_first, arg_last, f) + { + } + + template + reg_expression& operator=(const std::basic_string& p) + { + this->assign(p); + return *this; + } + +}; + +#ifdef BOOST_REGEX_MSVC +#pragma warning (pop) +#endif + +} // namespace boost + +#endif diff --git a/include/boost/regex/v5/basic_regex_creator.hpp b/include/boost/regex/v5/basic_regex_creator.hpp new file mode 100644 index 00000000..ddd2f257 --- /dev/null +++ b/include/boost/regex/v5/basic_regex_creator.hpp @@ -0,0 +1,1575 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_creator.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_creator which fills in + * the data members of a regex_data object. + */ + +#ifndef BOOST_REGEX_V5_BASIC_REGEX_CREATOR_HPP +#define BOOST_REGEX_V5_BASIC_REGEX_CREATOR_HPP + +#include + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:4459) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace boost{ + +namespace BOOST_REGEX_DETAIL_NS{ + +template +struct digraph : public std::pair +{ + digraph() : std::pair(charT(0), charT(0)){} + digraph(charT c1) : std::pair(c1, charT(0)){} + digraph(charT c1, charT c2) : std::pair(c1, c2) + {} + digraph(const digraph& d) : std::pair(d.first, d.second){} + digraph& operator=(const digraph&) = default; + template + digraph(const Seq& s) : std::pair() + { + BOOST_REGEX_ASSERT(s.size() <= 2); + BOOST_REGEX_ASSERT(s.size()); + this->first = s[0]; + this->second = (s.size() > 1) ? s[1] : 0; + } +}; + +template +class basic_char_set +{ +public: + typedef digraph digraph_type; + typedef typename traits::string_type string_type; + typedef typename traits::char_class_type m_type; + + basic_char_set() + { + m_negate = false; + m_has_digraphs = false; + m_classes = 0; + m_negated_classes = 0; + m_empty = true; + } + + void add_single(const digraph_type& s) + { + m_singles.insert(s); + if(s.second) + m_has_digraphs = true; + m_empty = false; + } + void add_range(const digraph_type& first, const digraph_type& end) + { + m_ranges.push_back(first); + m_ranges.push_back(end); + if(first.second) + { + m_has_digraphs = true; + add_single(first); + } + if(end.second) + { + m_has_digraphs = true; + add_single(end); + } + m_empty = false; + } + void add_class(m_type m) + { + m_classes |= m; + m_empty = false; + } + void add_negated_class(m_type m) + { + m_negated_classes |= m; + m_empty = false; + } + void add_equivalent(const digraph_type& s) + { + m_equivalents.insert(s); + if(s.second) + { + m_has_digraphs = true; + add_single(s); + } + m_empty = false; + } + void negate() + { + m_negate = true; + //m_empty = false; + } + + // + // accessor functions: + // + bool has_digraphs()const + { + return m_has_digraphs; + } + bool is_negated()const + { + return m_negate; + } + typedef typename std::vector::const_iterator list_iterator; + typedef typename std::set::const_iterator set_iterator; + set_iterator singles_begin()const + { + return m_singles.begin(); + } + set_iterator singles_end()const + { + return m_singles.end(); + } + list_iterator ranges_begin()const + { + return m_ranges.begin(); + } + list_iterator ranges_end()const + { + return m_ranges.end(); + } + set_iterator equivalents_begin()const + { + return m_equivalents.begin(); + } + set_iterator equivalents_end()const + { + return m_equivalents.end(); + } + m_type classes()const + { + return m_classes; + } + m_type negated_classes()const + { + return m_negated_classes; + } + bool empty()const + { + return m_empty; + } +private: + std::set m_singles; // a list of single characters to match + std::vector m_ranges; // a list of end points of our ranges + bool m_negate; // true if the set is to be negated + bool m_has_digraphs; // true if we have digraphs present + m_type m_classes; // character classes to match + m_type m_negated_classes; // negated character classes to match + bool m_empty; // whether we've added anything yet + std::set m_equivalents; // a list of equivalence classes +}; + +template +class basic_regex_creator +{ +public: + basic_regex_creator(regex_data* data); + std::ptrdiff_t getoffset(void* addr) + { + return getoffset(addr, m_pdata->m_data.data()); + } + std::ptrdiff_t getoffset(const void* addr, const void* base) + { + return static_cast(addr) - static_cast(base); + } + re_syntax_base* getaddress(std::ptrdiff_t off) + { + return getaddress(off, m_pdata->m_data.data()); + } + re_syntax_base* getaddress(std::ptrdiff_t off, void* base) + { + return static_cast(static_cast(static_cast(base) + off)); + } + void init(unsigned l_flags) + { + m_pdata->m_flags = l_flags; + m_icase = l_flags & regex_constants::icase; + } + regbase::flag_type flags() + { + return m_pdata->m_flags; + } + void flags(regbase::flag_type f) + { + m_pdata->m_flags = f; + if(m_icase != static_cast(f & regbase::icase)) + { + m_icase = static_cast(f & regbase::icase); + } + } + re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base)); + re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base)); + re_literal* append_literal(charT c); + re_syntax_base* append_set(const basic_char_set& char_set); + re_syntax_base* append_set(const basic_char_set& char_set, std::integral_constant*); + re_syntax_base* append_set(const basic_char_set& char_set, std::integral_constant*); + void finalize(const charT* p1, const charT* p2); +protected: + regex_data* m_pdata; // pointer to the basic_regex_data struct we are filling in + const ::boost::regex_traits_wrapper& + m_traits; // convenience reference to traits class + re_syntax_base* m_last_state; // the last state we added + bool m_icase; // true for case insensitive matches + unsigned m_repeater_id; // the state_id of the next repeater + bool m_has_backrefs; // true if there are actually any backrefs + indexed_bit_flag m_backrefs; // bitmask of permitted backrefs + std::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; + bool m_has_recursions; // set when we have recursive expressions to fixup + std::vector m_recursion_checks; // notes which recursions we've followed while analysing this expression + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character + typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character + typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character + typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character +private: + basic_regex_creator& operator=(const basic_regex_creator&); + basic_regex_creator(const basic_regex_creator&); + + void fixup_pointers(re_syntax_base* state); + void fixup_recursions(re_syntax_base* state); + void create_startmaps(re_syntax_base* state); + int calculate_backstep(re_syntax_base* state); + void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask); + unsigned get_restart_type(re_syntax_base* state); + void set_all_masks(unsigned char* bits, unsigned char); + bool is_bad_repeat(re_syntax_base* pt); + void set_bad_repeat(re_syntax_base* pt); + syntax_element_type get_repeat_type(re_syntax_base* state); + void probe_leading_repeat(re_syntax_base* state); +}; + +template +basic_regex_creator::basic_regex_creator(regex_data* data) + : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_icase(false), m_repeater_id(0), + m_has_backrefs(false), m_bad_repeats(0), m_has_recursions(false), m_word_mask(0), m_mask_space(0), m_lower_mask(0), m_upper_mask(0), m_alpha_mask(0) +{ + m_pdata->m_data.clear(); + m_pdata->m_status = ::boost::regex_constants::error_ok; + static const charT w = 'w'; + static const charT s = 's'; + static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', }; + static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', }; + static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', }; + m_word_mask = m_traits.lookup_classname(&w, &w +1); + m_mask_space = m_traits.lookup_classname(&s, &s +1); + m_lower_mask = m_traits.lookup_classname(l, l + 5); + m_upper_mask = m_traits.lookup_classname(u, u + 5); + m_alpha_mask = m_traits.lookup_classname(a, a + 5); + m_pdata->m_word_mask = m_word_mask; + BOOST_REGEX_ASSERT(m_word_mask != 0); + BOOST_REGEX_ASSERT(m_mask_space != 0); + BOOST_REGEX_ASSERT(m_lower_mask != 0); + BOOST_REGEX_ASSERT(m_upper_mask != 0); + BOOST_REGEX_ASSERT(m_alpha_mask != 0); +} + +template +re_syntax_base* basic_regex_creator::append_state(syntax_element_type t, std::size_t s) +{ + // if the state is a backref then make a note of it: + if(t == syntax_element_backref) + this->m_has_backrefs = true; + // append a new state, start by aligning our last one: + m_pdata->m_data.align(); + // set the offset to the next state in our last one: + if(m_last_state) + m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); + // now actually extend our data: + m_last_state = static_cast(m_pdata->m_data.extend(s)); + // fill in boilerplate options in the new state: + m_last_state->next.i = 0; + m_last_state->type = t; + return m_last_state; +} + +template +re_syntax_base* basic_regex_creator::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s) +{ + // append a new state, start by aligning our last one: + m_pdata->m_data.align(); + // set the offset to the next state in our last one: + if(m_last_state) + m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); + // remember the last state position: + std::ptrdiff_t off = getoffset(m_last_state) + s; + // now actually insert our data: + re_syntax_base* new_state = static_cast(m_pdata->m_data.insert(pos, s)); + // fill in boilerplate options in the new state: + new_state->next.i = s; + new_state->type = t; + m_last_state = getaddress(off); + return new_state; +} + +template +re_literal* basic_regex_creator::append_literal(charT c) +{ + re_literal* result; + // start by seeing if we have an existing re_literal we can extend: + if((0 == m_last_state) || (m_last_state->type != syntax_element_literal)) + { + // no existing re_literal, create a new one: + result = static_cast(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); + result->length = 1; + *static_cast(static_cast(result+1)) = m_traits.translate(c, m_icase); + } + else + { + // we have an existing re_literal, extend it: + std::ptrdiff_t off = getoffset(m_last_state); + m_pdata->m_data.extend(sizeof(charT)); + m_last_state = result = static_cast(getaddress(off)); + charT* characters = static_cast(static_cast(result+1)); + characters[result->length] = m_traits.translate(c, m_icase); + result->length += 1; + } + return result; +} + +template +inline re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set) +{ + typedef std::integral_constant truth_type; + return char_set.has_digraphs() + ? append_set(char_set, static_cast*>(0)) + : append_set(char_set, static_cast(0)); +} + +template +re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set, std::integral_constant*) +{ + typedef typename traits::string_type string_type; + typedef typename basic_char_set::list_iterator item_iterator; + typedef typename basic_char_set::set_iterator set_iterator; + typedef typename traits::char_class_type m_type; + + re_set_long* result = static_cast*>(append_state(syntax_element_long_set, sizeof(re_set_long))); + // + // fill in the basics: + // + result->csingles = static_cast(std::distance(char_set.singles_begin(), char_set.singles_end())); + result->cranges = static_cast(std::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2; + result->cequivalents = static_cast(std::distance(char_set.equivalents_begin(), char_set.equivalents_end())); + result->cclasses = char_set.classes(); + result->cnclasses = char_set.negated_classes(); + if(flags() & regbase::icase) + { + // adjust classes as needed: + if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask)) + result->cclasses |= m_alpha_mask; + if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask)) + result->cnclasses |= m_alpha_mask; + } + + result->isnot = char_set.is_negated(); + result->singleton = !char_set.has_digraphs(); + // + // remember where the state is for later: + // + std::ptrdiff_t offset = getoffset(result); + // + // now extend with all the singles: + // + item_iterator first, last; + set_iterator sfirst, slast; + sfirst = char_set.singles_begin(); + slast = char_set.singles_end(); + while(sfirst != slast) + { + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (sfirst->first == static_cast(0) ? 1 : sfirst->second ? 3 : 2))); + p[0] = m_traits.translate(sfirst->first, m_icase); + if(sfirst->first == static_cast(0)) + { + p[0] = 0; + } + else if(sfirst->second) + { + p[1] = m_traits.translate(sfirst->second, m_icase); + p[2] = 0; + } + else + p[1] = 0; + ++sfirst; + } + // + // now extend with all the ranges: + // + first = char_set.ranges_begin(); + last = char_set.ranges_end(); + while(first != last) + { + // first grab the endpoints of the range: + digraph c1 = *first; + c1.first = this->m_traits.translate(c1.first, this->m_icase); + c1.second = this->m_traits.translate(c1.second, this->m_icase); + ++first; + digraph c2 = *first; + c2.first = this->m_traits.translate(c2.first, this->m_icase); + c2.second = this->m_traits.translate(c2.second, this->m_icase); + ++first; + string_type s1, s2; + // different actions now depending upon whether collation is turned on: + if(flags() & regex_constants::collate) + { + // we need to transform our range into sort keys: + charT a1[3] = { c1.first, c1.second, charT(0), }; + charT a2[3] = { c2.first, c2.second, charT(0), }; + s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1)); + s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1)); + if(s1.empty()) + s1 = string_type(1, charT(0)); + if(s2.empty()) + s2 = string_type(1, charT(0)); + } + else + { + if(c1.second) + { + s1.insert(s1.end(), c1.first); + s1.insert(s1.end(), c1.second); + } + else + s1 = string_type(1, c1.first); + if(c2.second) + { + s2.insert(s2.end(), c2.first); + s2.insert(s2.end(), c2.second); + } + else + s2.insert(s2.end(), c2.first); + } + if(s1 > s2) + { + // Oops error: + return 0; + } + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) ); + BOOST_REGEX_DETAIL_NS::copy(s1.begin(), s1.end(), p); + p[s1.size()] = charT(0); + p += s1.size() + 1; + BOOST_REGEX_DETAIL_NS::copy(s2.begin(), s2.end(), p); + p[s2.size()] = charT(0); + } + // + // now process the equivalence classes: + // + sfirst = char_set.equivalents_begin(); + slast = char_set.equivalents_end(); + while(sfirst != slast) + { + string_type s; + if(sfirst->second) + { + charT cs[3] = { sfirst->first, sfirst->second, charT(0), }; + s = m_traits.transform_primary(cs, cs+2); + } + else + s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1); + if(s.empty()) + return 0; // invalid or unsupported equivalence class + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) ); + BOOST_REGEX_DETAIL_NS::copy(s.begin(), s.end(), p); + p[s.size()] = charT(0); + ++sfirst; + } + // + // finally reset the address of our last state: + // + m_last_state = result = static_cast*>(getaddress(offset)); + return result; +} + +template +inline bool char_less(T t1, T t2) +{ + return t1 < t2; +} +inline bool char_less(char t1, char t2) +{ + return static_cast(t1) < static_cast(t2); +} +inline bool char_less(signed char t1, signed char t2) +{ + return static_cast(t1) < static_cast(t2); +} + +template +re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set, std::integral_constant*) +{ + typedef typename traits::string_type string_type; + typedef typename basic_char_set::list_iterator item_iterator; + typedef typename basic_char_set::set_iterator set_iterator; + + re_set* result = static_cast(append_state(syntax_element_set, sizeof(re_set))); + bool negate = char_set.is_negated(); + std::memset(result->_map, 0, sizeof(result->_map)); + // + // handle singles first: + // + item_iterator first, last; + set_iterator sfirst, slast; + sfirst = char_set.singles_begin(); + slast = char_set.singles_end(); + while(sfirst != slast) + { + for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i) + { + if(this->m_traits.translate(static_cast(i), this->m_icase) + == this->m_traits.translate(sfirst->first, this->m_icase)) + result->_map[i] = true; + } + ++sfirst; + } + // + // OK now handle ranges: + // + first = char_set.ranges_begin(); + last = char_set.ranges_end(); + while(first != last) + { + // first grab the endpoints of the range: + charT c1 = this->m_traits.translate(first->first, this->m_icase); + ++first; + charT c2 = this->m_traits.translate(first->first, this->m_icase); + ++first; + // different actions now depending upon whether collation is turned on: + if(flags() & regex_constants::collate) + { + // we need to transform our range into sort keys: + charT c3[2] = { c1, charT(0), }; + string_type s1 = this->m_traits.transform(c3, c3+1); + c3[0] = c2; + string_type s2 = this->m_traits.transform(c3, c3+1); + if(s1 > s2) + { + // Oops error: + return 0; + } + BOOST_REGEX_ASSERT(c3[1] == charT(0)); + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + c3[0] = static_cast(i); + string_type s3 = this->m_traits.transform(c3, c3 +1); + if((s1 <= s3) && (s3 <= s2)) + result->_map[i] = true; + } + } + else + { + if(char_less(c2, c1)) + { + // Oops error: + return 0; + } + // everything in range matches: + std::memset(result->_map + static_cast(c1), true, static_cast(1u) + static_cast(static_cast(c2) - static_cast(c1))); + } + } + // + // and now the classes: + // + typedef typename traits::char_class_type m_type; + m_type m = char_set.classes(); + if(flags() & regbase::icase) + { + // adjust m as needed: + if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask)) + m |= m_alpha_mask; + } + if(m != 0) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + if(this->m_traits.isctype(static_cast(i), m)) + result->_map[i] = true; + } + } + // + // and now the negated classes: + // + m = char_set.negated_classes(); + if(flags() & regbase::icase) + { + // adjust m as needed: + if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask)) + m |= m_alpha_mask; + } + if(m != 0) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + if(0 == this->m_traits.isctype(static_cast(i), m)) + result->_map[i] = true; + } + } + // + // now process the equivalence classes: + // + sfirst = char_set.equivalents_begin(); + slast = char_set.equivalents_end(); + while(sfirst != slast) + { + string_type s; + BOOST_REGEX_ASSERT(static_cast(0) == sfirst->second); + s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1); + if(s.empty()) + return 0; // invalid or unsupported equivalence class + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + charT c[2] = { (static_cast(i)), charT(0), }; + string_type s2 = this->m_traits.transform_primary(c, c+1); + if(s == s2) + result->_map[i] = true; + } + ++sfirst; + } + if(negate) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + result->_map[i] = !(result->_map[i]); + } + } + return result; +} + +template +void basic_regex_creator::finalize(const charT* p1, const charT* p2) +{ + if(this->m_pdata->m_status) + return; + // we've added all the states we need, now finish things off. + // start by adding a terminating state: + append_state(syntax_element_match); + // extend storage to store original expression: + std::ptrdiff_t len = p2 - p1; + m_pdata->m_expression_len = len; + charT* ps = static_cast(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1)))); + m_pdata->m_expression = ps; + BOOST_REGEX_DETAIL_NS::copy(p1, p2, ps); + ps[p2 - p1] = 0; + // fill in our other data... + // successful parsing implies a zero status: + m_pdata->m_status = 0; + // get the first state of the machine: + m_pdata->m_first_state = static_cast(m_pdata->m_data.data()); + // fixup pointers in the machine: + fixup_pointers(m_pdata->m_first_state); + if(m_has_recursions) + { + m_pdata->m_has_recursions = true; + fixup_recursions(m_pdata->m_first_state); + if(this->m_pdata->m_status) + return; + } + else + m_pdata->m_has_recursions = false; + // create nested startmaps: + create_startmaps(m_pdata->m_first_state); + // create main startmap: + std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap)); + m_pdata->m_can_be_null = 0; + + m_bad_repeats = 0; + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u); + create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all); + // get the restart type: + m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state); + // optimise a leading repeat if there is one: + probe_leading_repeat(m_pdata->m_first_state); +} + +template +void basic_regex_creator::fixup_pointers(re_syntax_base* state) +{ + while(state) + { + switch(state->type) + { + case syntax_element_recurse: + m_has_recursions = true; + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + break; + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + // set the state_id of this repeat: + static_cast(state)->state_id = m_repeater_id++; + BOOST_REGEX_FALLTHROUGH; + case syntax_element_alt: + std::memset(static_cast(state)->_map, 0, sizeof(static_cast(state)->_map)); + static_cast(state)->can_be_null = 0; + BOOST_REGEX_FALLTHROUGH; + case syntax_element_jump: + static_cast(state)->alt.p = getaddress(static_cast(state)->alt.i, state); + BOOST_REGEX_FALLTHROUGH; + default: + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + } + state = state->next.p; + } +} + +template +void basic_regex_creator::fixup_recursions(re_syntax_base* state) +{ + re_syntax_base* base = state; + while(state) + { + switch(state->type) + { + case syntax_element_assert_backref: + { + // just check that the index is valid: + int idx = static_cast(state)->index; + if(idx < 0) + { + idx = -idx-1; + if(idx >= hash_value_mask) + { + idx = m_pdata->get_id(idx); + if(idx <= 0) + { + // check of sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a marked sub-expression that does not exist."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + } + } + break; + case syntax_element_recurse: + { + bool ok = false; + re_syntax_base* p = base; + std::ptrdiff_t idx = static_cast(state)->alt.i; + if(idx >= hash_value_mask) + { + // + // There may be more than one capture group with this hash, just do what Perl + // does and recurse to the leftmost: + // + idx = m_pdata->get_id(static_cast(idx)); + } + if(idx < 0) + { + ok = false; + } + else + { + while(p) + { + if((p->type == syntax_element_startmark) && (static_cast(p)->index == idx)) + { + // + // We've found the target of the recursion, set the jump target: + // + static_cast(state)->alt.p = p; + ok = true; + // + // Now scan the target for nested repeats: + // + p = p->next.p; + int next_rep_id = 0; + while(p) + { + switch(p->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + next_rep_id = static_cast(p)->state_id; + break; + case syntax_element_endmark: + if(static_cast(p)->index == idx) + next_rep_id = -1; + break; + default: + break; + } + if(next_rep_id) + break; + p = p->next.p; + } + if(next_rep_id > 0) + { + static_cast(state)->state_id = next_rep_id - 1; + } + + break; + } + p = p->next.p; + } + } + if(!ok) + { + // recursion to sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + break; + default: + break; + } + state = state->next.p; + } +} + +template +void basic_regex_creator::create_startmaps(re_syntax_base* state) +{ + // non-recursive implementation: + // create the last map in the machine first, so that earlier maps + // can make use of the result... + // + // This was originally a recursive implementation, but that caused stack + // overflows with complex expressions on small stacks (think COM+). + + // start by saving the case setting: + bool l_icase = m_icase; + std::vector > v; + + while(state) + { + switch(state->type) + { + case syntax_element_toggle_case: + // we need to track case changes here: + m_icase = static_cast(state)->icase; + state = state->next.p; + continue; + case syntax_element_alt: + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + // just push the state onto our stack for now: + v.push_back(std::pair(m_icase, state)); + state = state->next.p; + break; + case syntax_element_backstep: + // we need to calculate how big the backstep is: + static_cast(state)->index + = this->calculate_backstep(state->next.p); + if(static_cast(state)->index < 0) + { + // Oops error: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Invalid lookbehind assertion encountered in the regular expression."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + BOOST_REGEX_FALLTHROUGH; + default: + state = state->next.p; + } + } + + // now work through our list, building all the maps as we go: + while(!v.empty()) + { + // Initialize m_recursion_checks if we need it: + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u); + + const std::pair& p = v.back(); + m_icase = p.first; + state = p.second; + v.pop_back(); + + // Build maps: + m_bad_repeats = 0; + create_startmap(state->next.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_take); + m_bad_repeats = 0; + + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u); + create_startmap(static_cast(state)->alt.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_skip); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + } + // restore case sensitivity: + m_icase = l_icase; +} + +template +int basic_regex_creator::calculate_backstep(re_syntax_base* state) +{ + typedef typename traits::char_class_type m_type; + int result = 0; + while(state) + { + switch(state->type) + { + case syntax_element_startmark: + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + { + state = static_cast(state->next.p)->alt.p->next.p; + continue; + } + else if(static_cast(state)->index == -3) + { + state = state->next.p->next.p; + continue; + } + break; + case syntax_element_endmark: + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + return result; + break; + case syntax_element_literal: + result += static_cast(state)->length; + break; + case syntax_element_wild: + case syntax_element_set: + result += 1; + break; + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_backref: + case syntax_element_rep: + case syntax_element_combining: + case syntax_element_long_set_rep: + case syntax_element_backstep: + { + re_repeat* rep = static_cast(state); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + if((state->type == syntax_element_dot_rep) + || (state->type == syntax_element_char_rep) + || (state->type == syntax_element_short_set_rep)) + { + if(rep->max != rep->min) + return -1; + result += static_cast(rep->min); + state = rep->alt.p; + continue; + } + else if(state->type == syntax_element_long_set_rep) + { + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_long_set); + if(static_cast*>(rep->next.p)->singleton == 0) + return -1; + if(rep->max != rep->min) + return -1; + result += static_cast(rep->min); + state = rep->alt.p; + continue; + } + } + return -1; + case syntax_element_long_set: + if(static_cast*>(state)->singleton == 0) + return -1; + result += 1; + break; + case syntax_element_jump: + state = static_cast(state)->alt.p; + continue; + case syntax_element_alt: + { + int r1 = calculate_backstep(state->next.p); + int r2 = calculate_backstep(static_cast(state)->alt.p); + if((r1 < 0) || (r1 != r2)) + return -1; + return result + r1; + } + default: + break; + } + state = state->next.p; + } + return -1; +} + +struct recursion_saver +{ + std::vector saved_state; + std::vector* state; + recursion_saver(std::vector* p) : saved_state(*p), state(p) {} + ~recursion_saver() + { + state->swap(saved_state); + } +}; + +template +void basic_regex_creator::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask) +{ + recursion_saver saved_recursions(&m_recursion_checks); + int not_last_jump = 1; + re_syntax_base* recursion_start = 0; + int recursion_sub = 0; + re_syntax_base* recursion_restart = 0; + + // track case sensitivity: + bool l_icase = m_icase; + + while(state) + { + switch(state->type) + { + case syntax_element_toggle_case: + l_icase = static_cast(state)->icase; + state = state->next.p; + break; + case syntax_element_literal: + { + // don't set anything in *pnull, set each element in l_map + // that could match the first character in the literal: + if(l_map) + { + l_map[0] |= mask_init; + charT first_char = *static_cast(static_cast(static_cast(state) + 1)); + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(m_traits.translate(static_cast(i), l_icase) == first_char) + l_map[i] |= mask; + } + } + return; + } + case syntax_element_end_line: + { + // next character must be a line separator (if there is one): + if(l_map) + { + l_map[0] |= mask_init; + l_map[static_cast('\n')] |= mask; + l_map[static_cast('\r')] |= mask; + l_map[static_cast('\f')] |= mask; + l_map[0x85] |= mask; + } + // now figure out if we can match a NULL string at this point: + if(pnull) + create_startmap(state->next.p, 0, pnull, mask); + return; + } + case syntax_element_recurse: + { + BOOST_REGEX_ASSERT(static_cast(state)->alt.p->type == syntax_element_startmark); + recursion_sub = static_cast(static_cast(state)->alt.p)->index; + if(m_recursion_checks[recursion_sub] & 1u) + { + // Infinite recursion!! + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered an infinite recursion."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + else if(recursion_start == 0) + { + recursion_start = state; + recursion_restart = state->next.p; + state = static_cast(state)->alt.p; + m_recursion_checks[recursion_sub] |= 1u; + break; + } + m_recursion_checks[recursion_sub] |= 1u; + // can't handle nested recursion here... + BOOST_REGEX_FALLTHROUGH; + } + case syntax_element_backref: + // can be null, and any character can match: + if(pnull) + *pnull |= mask; + BOOST_REGEX_FALLTHROUGH; + case syntax_element_wild: + { + // can't be null, any character can match: + set_all_masks(l_map, mask); + return; + } + case syntax_element_accept: + case syntax_element_match: + { + // must be null, any character can match: + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + case syntax_element_word_start: + { + // recurse, then AND with all the word characters: + create_startmap(state->next.p, l_map, pnull, mask); + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(!m_traits.isctype(static_cast(i), m_word_mask)) + l_map[i] &= static_cast(~mask); + } + } + return; + } + case syntax_element_word_end: + { + // recurse, then AND with all the word characters: + create_startmap(state->next.p, l_map, pnull, mask); + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(m_traits.isctype(static_cast(i), m_word_mask)) + l_map[i] &= static_cast(~mask); + } + } + return; + } + case syntax_element_buffer_end: + { + // we *must be null* : + if(pnull) + *pnull |= mask; + return; + } + case syntax_element_long_set: + if(l_map) + { + typedef typename traits::char_class_type m_type; + if(static_cast*>(state)->singleton) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + charT c = static_cast(i); + if(&c != re_is_set_member(&c, &c + 1, static_cast*>(state), *m_pdata, l_icase)) + l_map[i] |= mask; + } + } + else + set_all_masks(l_map, mask); + } + return; + case syntax_element_set: + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(static_cast(state)->_map[ + static_cast(m_traits.translate(static_cast(i), l_icase))]) + l_map[i] |= mask; + } + } + return; + case syntax_element_jump: + // take the jump: + state = static_cast(state)->alt.p; + not_last_jump = -1; + break; + case syntax_element_alt: + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + re_alt* rep = static_cast(state); + if(rep->_map[0] & mask_init) + { + if(l_map) + { + // copy previous results: + l_map[0] |= mask_init; + for(unsigned int i = 0; i <= UCHAR_MAX; ++i) + { + if(rep->_map[i] & mask_any) + l_map[i] |= mask; + } + } + if(pnull) + { + if(rep->can_be_null & mask_any) + *pnull |= mask; + } + } + else + { + // we haven't created a startmap for this alternative yet + // so take the union of the two options: + if(is_bad_repeat(state)) + { + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + set_bad_repeat(state); + create_startmap(state->next.p, l_map, pnull, mask); + if((state->type == syntax_element_alt) + || (static_cast(state)->min == 0) + || (not_last_jump == 0)) + create_startmap(rep->alt.p, l_map, pnull, mask); + } + } + return; + case syntax_element_soft_buffer_end: + // match newline or null: + if(l_map) + { + l_map[0] |= mask_init; + l_map[static_cast('\n')] |= mask; + l_map[static_cast('\r')] |= mask; + } + if(pnull) + *pnull |= mask; + return; + case syntax_element_endmark: + // need to handle independent subs as a special case: + if(static_cast(state)->index < 0) + { + // can be null, any character can match: + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast(state)->index)) + { + // recursion termination: + recursion_start = 0; + state = recursion_restart; + break; + } + + // + // Normally we just go to the next state... but if this sub-expression is + // the target of a recursion, then we might be ending a recursion, in which + // case we should check whatever follows that recursion, as well as whatever + // follows this state: + // + if(m_pdata->m_has_recursions && static_cast(state)->index) + { + bool ok = false; + re_syntax_base* p = m_pdata->m_first_state; + while(p) + { + if(p->type == syntax_element_recurse) + { + re_brace* p2 = static_cast(static_cast(p)->alt.p); + if((p2->type == syntax_element_startmark) && (p2->index == static_cast(state)->index)) + { + ok = true; + break; + } + } + p = p->next.p; + } + if(ok && ((m_recursion_checks[static_cast(state)->index] & 2u) == 0)) + { + m_recursion_checks[static_cast(state)->index] |= 2u; + create_startmap(p->next.p, l_map, pnull, mask); + } + } + state = state->next.p; + break; + + case syntax_element_commit: + set_all_masks(l_map, mask); + // Continue scanning so we can figure out whether we can be null: + state = state->next.p; + break; + case syntax_element_startmark: + // need to handle independent subs as a special case: + if(static_cast(state)->index == -3) + { + state = state->next.p->next.p; + break; + } + BOOST_REGEX_FALLTHROUGH; + default: + state = state->next.p; + } + ++not_last_jump; + } +} + +template +unsigned basic_regex_creator::get_restart_type(re_syntax_base* state) +{ + // + // find out how the machine starts, so we can optimise the search: + // + while(state) + { + switch(state->type) + { + case syntax_element_startmark: + case syntax_element_endmark: + state = state->next.p; + continue; + case syntax_element_start_line: + return regbase::restart_line; + case syntax_element_word_start: + return regbase::restart_word; + case syntax_element_buffer_start: + return regbase::restart_buf; + case syntax_element_restart_continue: + return regbase::restart_continue; + default: + state = 0; + continue; + } + } + return regbase::restart_any; +} + +template +void basic_regex_creator::set_all_masks(unsigned char* bits, unsigned char mask) +{ + // + // set mask in all of bits elements, + // if bits[0] has mask_init not set then we can + // optimise this to a call to memset: + // + if(bits) + { + if(bits[0] == 0) + (std::memset)(bits, mask, 1u << CHAR_BIT); + else + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + bits[i] |= mask; + } + bits[0] |= mask_init; + } +} + +template +bool basic_regex_creator::is_bad_repeat(re_syntax_base* pt) +{ + switch(pt->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + unsigned state_id = static_cast(pt)->state_id; + if(state_id >= sizeof(m_bad_repeats) * CHAR_BIT) + return true; // run out of bits, assume we can't traverse this one. + static const std::uintmax_t one = 1uL; + return m_bad_repeats & (one << state_id); + } + default: + return false; + } +} + +template +void basic_regex_creator::set_bad_repeat(re_syntax_base* pt) +{ + switch(pt->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + unsigned state_id = static_cast(pt)->state_id; + static const std::uintmax_t one = 1uL; + if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT) + m_bad_repeats |= (one << state_id); + } + break; + default: + break; + } +} + +template +syntax_element_type basic_regex_creator::get_repeat_type(re_syntax_base* state) +{ + typedef typename traits::char_class_type m_type; + if(state->type == syntax_element_rep) + { + // check to see if we are repeating a single state: + if(state->next.p->next.p->next.p == static_cast(state)->alt.p) + { + switch(state->next.p->type) + { + case BOOST_REGEX_DETAIL_NS::syntax_element_wild: + return BOOST_REGEX_DETAIL_NS::syntax_element_dot_rep; + case BOOST_REGEX_DETAIL_NS::syntax_element_literal: + return BOOST_REGEX_DETAIL_NS::syntax_element_char_rep; + case BOOST_REGEX_DETAIL_NS::syntax_element_set: + return BOOST_REGEX_DETAIL_NS::syntax_element_short_set_rep; + case BOOST_REGEX_DETAIL_NS::syntax_element_long_set: + if(static_cast*>(state->next.p)->singleton) + return BOOST_REGEX_DETAIL_NS::syntax_element_long_set_rep; + break; + default: + break; + } + } + } + return state->type; +} + +template +void basic_regex_creator::probe_leading_repeat(re_syntax_base* state) +{ + // enumerate our states, and see if we have a leading repeat + // for which failed search restarts can be optimized; + do + { + switch(state->type) + { + case syntax_element_startmark: + if(static_cast(state)->index >= 0) + { + state = state->next.p; + continue; + } +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:6011) +#endif + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + { + // skip past the zero width assertion: + state = static_cast(state->next.p)->alt.p->next.p; + continue; + } +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + if(static_cast(state)->index == -3) + { + // Have to skip the leading jump state: + state = state->next.p->next.p; + continue; + } + return; + case syntax_element_endmark: + case syntax_element_start_line: + case syntax_element_end_line: + case syntax_element_word_boundary: + case syntax_element_within_word: + case syntax_element_word_start: + case syntax_element_word_end: + case syntax_element_buffer_start: + case syntax_element_buffer_end: + case syntax_element_restart_continue: + state = state->next.p; + break; + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + if(this->m_has_backrefs == 0) + static_cast(state)->leading = true; + BOOST_REGEX_FALLTHROUGH; + default: + return; + } + }while(state); +} + +} // namespace BOOST_REGEX_DETAIL_NS + +} // namespace boost + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +#endif diff --git a/include/boost/regex/v5/basic_regex_parser.hpp b/include/boost/regex/v5/basic_regex_parser.hpp new file mode 100644 index 00000000..858eb805 --- /dev/null +++ b/include/boost/regex/v5/basic_regex_parser.hpp @@ -0,0 +1,3117 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_parser.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_parser. + */ + +#ifndef BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP +#define BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4244 4459) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +inline std::intmax_t umax(std::integral_constant const&) +{ + // Get out clause here, just in case numeric_limits is unspecialized: + return std::numeric_limits::is_specialized ? (std::numeric_limits::max)() : INT_MAX; +} +inline std::intmax_t umax(std::integral_constant const&) +{ + return (std::numeric_limits::max)(); +} + +inline std::intmax_t umax() +{ + return umax(std::integral_constant::digits >= std::numeric_limits::digits>()); +} + +template +class basic_regex_parser : public basic_regex_creator +{ +public: + basic_regex_parser(regex_data* data); + void parse(const charT* p1, const charT* p2, unsigned flags); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message) + { + fail(error_code, position, message, position); + } + + bool parse_all(); + bool parse_basic(); + bool parse_extended(); + bool parse_literal(); + bool parse_open_paren(); + bool parse_basic_escape(); + bool parse_extended_escape(); + bool parse_match_any(); + bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits::max)()); + bool parse_repeat_range(bool isbasic); + bool parse_alt(); + bool parse_set(); + bool parse_backref(); + void parse_set_literal(basic_char_set& char_set); + bool parse_inner_set(basic_char_set& char_set); + bool parse_QE(); + bool parse_perl_extension(); + bool parse_perl_verb(); + bool match_verb(const char*); + bool add_emacs_code(bool negate); + bool unwind_alts(std::ptrdiff_t last_paren_start); + digraph get_next_set_literal(basic_char_set& char_set); + charT unescape_character(); + regex_constants::syntax_option_type parse_options(); + +private: + typedef bool (basic_regex_parser::*parser_proc_type)(); + typedef typename traits::string_type string_type; + typedef typename traits::char_class_type char_class_type; + parser_proc_type m_parser_proc; // the main parser to use + const charT* m_base; // the start of the string being parsed + const charT* m_end; // the end of the string being parsed + const charT* m_position; // our current parser position + unsigned m_mark_count; // how many sub-expressions we have + int m_mark_reset; // used to indicate that we're inside a (?|...) block. + unsigned m_max_mark; // largest mark count seen inside a (?|...) block. + std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted). + std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative + bool m_has_case_change; // true if somewhere in the current block the case has changed + unsigned m_recursion_count; // How many times we've called parse_all. +#if defined(BOOST_REGEX_MSVC) && defined(_M_IX86) + // This is an ugly warning suppression workaround (for warnings *inside* std::vector + // that can not otherwise be suppressed)... + static_assert(sizeof(long) >= sizeof(void*), "Long isn't long enough!"); + std::vector m_alt_jumps; // list of alternative in the current scope. +#else + std::vector m_alt_jumps; // list of alternative in the current scope. +#endif + + basic_regex_parser& operator=(const basic_regex_parser&); + basic_regex_parser(const basic_regex_parser&); +}; + +template +basic_regex_parser::basic_regex_parser(regex_data* data) + : basic_regex_creator(data), m_parser_proc(), m_base(0), m_end(0), m_position(0), + m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0) +{ +} + +template +void basic_regex_parser::parse(const charT* p1, const charT* p2, unsigned l_flags) +{ + // pass l_flags on to base class: + this->init(l_flags); + // set up pointers: + m_position = m_base = p1; + m_end = p2; + // empty strings are errors: + if((p1 == p2) && + ( + ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group) + || (l_flags & regbase::no_empty_expressions) + ) + ) + { + fail(regex_constants::error_empty, 0); + return; + } + // select which parser to use: + switch(l_flags & regbase::main_option_type) + { + case regbase::perl_syntax_group: + { + m_parser_proc = &basic_regex_parser::parse_extended; + // + // Add a leading paren with index zero to give recursions a target: + // + re_brace* br = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + br->index = 0; + br->icase = this->flags() & regbase::icase; + break; + } + case regbase::basic_syntax_group: + m_parser_proc = &basic_regex_parser::parse_basic; + break; + case regbase::literal: + m_parser_proc = &basic_regex_parser::parse_literal; + break; + default: + // Oops, someone has managed to set more than one of the main option flags, + // so this must be an error: + fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used."); + return; + } + + // parse all our characters: + bool result = parse_all(); + // + // Unwind our alternatives: + // + unwind_alts(-1); + // reset l_flags as a global scope (?imsx) may have altered them: + this->flags(l_flags); + // if we haven't gobbled up all the characters then we must + // have had an unexpected ')' : + if(!result) + { + fail(regex_constants::error_paren, std::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis."); + return; + } + // if an error has been set then give up now: + if(this->m_pdata->m_status) + return; + // fill in our sub-expression count: + this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count; + this->finalize(p1, p2); +} + +template +void basic_regex_parser::fail(regex_constants::error_type error_code, std::ptrdiff_t position) +{ + // get the error message: + std::string message = this->m_pdata->m_ptraits->error_string(error_code); + fail(error_code, position, message); +} + +template +void basic_regex_parser::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos) +{ + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = error_code; + m_position = m_end; // don't bother parsing anything else + + // + // Augment error message with the regular expression text: + // + if(start_pos == position) + start_pos = (std::max)(static_cast(0), position - static_cast(10)); + std::ptrdiff_t end_pos = (std::min)(position + static_cast(10), static_cast(m_end - m_base)); + if(error_code != regex_constants::error_empty) + { + if((start_pos != 0) || (end_pos != (m_end - m_base))) + message += " The error occurred while parsing the regular expression fragment: '"; + else + message += " The error occurred while parsing the regular expression: '"; + if(start_pos != end_pos) + { + message += std::string(m_base + start_pos, m_base + position); + message += ">>>HERE>>>"; + message += std::string(m_base + position, m_base + end_pos); + } + message += "'."; + } + +#ifndef BOOST_NO_EXCEPTIONS + if(0 == (this->flags() & regex_constants::no_except)) + { + boost::regex_error e(message, error_code, position); + e.raise(); + } +#else + (void)position; // suppress warnings. +#endif +} + +template +bool basic_regex_parser::parse_all() +{ + if (++m_recursion_count > 400) + { + // exceeded internal limits + fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit."); + } + bool result = true; + while(result && (m_position != m_end)) + { + result = (this->*m_parser_proc)(); + } + --m_recursion_count; + return result; +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4702) +#endif +template +bool basic_regex_parser::parse_basic() +{ + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_escape: + return parse_basic_escape(); + case regex_constants::syntax_dot: + return parse_match_any(); + case regex_constants::syntax_caret: + ++m_position; + this->append_state(syntax_element_start_line); + break; + case regex_constants::syntax_dollar: + ++m_position; + this->append_state(syntax_element_end_line); + break; + case regex_constants::syntax_star: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(); + } + case regex_constants::syntax_plus: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(1); + } + case regex_constants::syntax_question: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(0, 1); + } + case regex_constants::syntax_open_set: + return parse_set(); + case regex_constants::syntax_newline: + if(this->flags() & regbase::newline_alt) + return parse_alt(); + else + return parse_literal(); + default: + return parse_literal(); + } + return true; +} + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable:26812) +#endif +#endif +template +bool basic_regex_parser::parse_extended() +{ + bool result = true; + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_open_mark: + return parse_open_paren(); + case regex_constants::syntax_close_mark: + return false; + case regex_constants::syntax_escape: + return parse_extended_escape(); + case regex_constants::syntax_dot: + return parse_match_any(); + case regex_constants::syntax_caret: + ++m_position; + this->append_state( + (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line)); + break; + case regex_constants::syntax_dollar: + ++m_position; + this->append_state( + (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line)); + break; + case regex_constants::syntax_star: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(); + case regex_constants::syntax_question: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(0,1); + case regex_constants::syntax_plus: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(1); + case regex_constants::syntax_open_brace: + ++m_position; + return parse_repeat_range(false); + case regex_constants::syntax_close_brace: + if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {."); + return false; + } + result = parse_literal(); + break; + case regex_constants::syntax_or: + return parse_alt(); + case regex_constants::syntax_open_set: + return parse_set(); + case regex_constants::syntax_newline: + if(this->flags() & regbase::newline_alt) + return parse_alt(); + else + return parse_literal(); + case regex_constants::syntax_hash: + // + // If we have a mod_x flag set, then skip until + // we get to a newline character: + // + if((this->flags() + & (regbase::no_perl_ex|regbase::mod_x)) + == regbase::mod_x) + { + while((m_position != m_end) && !is_separator(*m_position++)){} + return true; + } + BOOST_REGEX_FALLTHROUGH; + default: + result = parse_literal(); + break; + } + return result; +} +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +template +bool basic_regex_parser::parse_literal() +{ + // append this as a literal provided it's not a space character + // or the perl option regbase::mod_x is not set: + if( + ((this->flags() + & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex)) + != regbase::mod_x) + || !this->m_traits.isctype(*m_position, this->m_mask_space)) + this->append_literal(*m_position); + ++m_position; + return true; +} + +template +bool basic_regex_parser::parse_open_paren() +{ + // + // skip the '(' and error check: + // + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + // + // begin by checking for a perl-style (?...) extension: + // + if( + ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0) + || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + ) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) + return parse_perl_extension(); + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star) + return parse_perl_verb(); + } + // + // update our mark count, and append the required state: + // + unsigned markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair(std::distance(m_base, m_position) - 1, 0)); + } + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + std::ptrdiff_t last_paren_start = this->getoffset(pb); + // back up insertion point for alternations, and set new point: + std::ptrdiff_t last_alt_point = m_alt_insert_point; + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + // + // back up the current flags in case we have a nested (?imsx) group: + // + regex_constants::syntax_option_type opts = this->flags(); + bool old_case_change = m_has_case_change; + m_has_case_change = false; // no changes to this scope as yet... + // + // Back up branch reset data in case we have a nested (?|...) + // + int mark_reset = m_mark_reset; + m_mark_reset = -1; + // + // now recursively add more states, this will terminate when we get to a + // matching ')' : + // + parse_all(); + // + // Unwind pushed alternatives: + // + if(0 == unwind_alts(last_paren_start)) + return false; + // + // restore flags: + // + if(m_has_case_change) + { + // the case has changed in one or more of the alternatives + // within the scoped (...) block: we have to add a state + // to reset the case sensitivity: + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = opts & regbase::icase; + } + this->flags(opts); + m_has_case_change = old_case_change; + // + // restore branch reset: + // + m_mark_reset = mark_reset; + // + // we either have a ')' or we have run out of characters prematurely: + // + if(m_position == m_end) + { + this->fail(regex_constants::error_paren, std::distance(m_base, m_end)); + return false; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + return false; + if(markid && (this->flags() & regbase::save_subexpression_location)) + this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position); + ++m_position; + // + // append closing parenthesis state: + // + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + this->m_paren_start = last_paren_start; + // + // restore the alternate insertion point: + // + this->m_alt_insert_point = last_alt_point; + // + // allow backrefs to this mark: + // + if(markid > 0) + this->m_backrefs.set(markid); + + return true; +} + +template +bool basic_regex_parser::parse_basic_escape() +{ + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + bool result = true; + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::syntax_open_mark: + return parse_open_paren(); + case regex_constants::syntax_close_mark: + return false; + case regex_constants::syntax_plus: + if(this->flags() & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(1); + } + else + return parse_literal(); + case regex_constants::syntax_question: + if(this->flags() & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(0, 1); + } + else + return parse_literal(); + case regex_constants::syntax_open_brace: + if(this->flags() & regbase::no_intervals) + return parse_literal(); + ++m_position; + return parse_repeat_range(true); + case regex_constants::syntax_close_brace: + if(this->flags() & regbase::no_intervals) + return parse_literal(); + fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {."); + return false; + case regex_constants::syntax_or: + if(this->flags() & regbase::bk_vbar) + return parse_alt(); + else + result = parse_literal(); + break; + case regex_constants::syntax_digit: + return parse_backref(); + case regex_constants::escape_type_start_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_end_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_end); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_boundary); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_not_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_within_word); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_left_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_right_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_end); + } + else + result = parse_literal(); + break; + default: + if(this->flags() & regbase::emacs_ex) + { + bool negate = true; + switch(*m_position) + { + case 'w': + negate = false; + BOOST_REGEX_FALLTHROUGH; + case 'W': + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(this->m_word_mask); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + case 's': + negate = false; + BOOST_REGEX_FALLTHROUGH; + case 'S': + return add_emacs_code(negate); + case 'c': + case 'C': + // not supported yet: + fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead."); + return false; + default: + break; + } + } + result = parse_literal(); + break; + } + return result; +} + +template +bool basic_regex_parser::parse_extended_escape() +{ + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found."); + return false; + } + bool negate = false; // in case this is a character class escape: \w \d etc + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::escape_type_not_class: + negate = true; + BOOST_REGEX_FALLTHROUGH; + case regex_constants::escape_type_class: + { +escape_type_class_jump: + typedef typename traits::char_class_type m_type; + m_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(m); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + // + // not a class, just a regular unknown escape: + // + this->append_literal(unescape_character()); + break; + } + case regex_constants::syntax_digit: + return parse_backref(); + case regex_constants::escape_type_left_word: + ++m_position; + this->append_state(syntax_element_word_start); + break; + case regex_constants::escape_type_right_word: + ++m_position; + this->append_state(syntax_element_word_end); + break; + case regex_constants::escape_type_start_buffer: + ++m_position; + this->append_state(syntax_element_buffer_start); + break; + case regex_constants::escape_type_end_buffer: + ++m_position; + this->append_state(syntax_element_buffer_end); + break; + case regex_constants::escape_type_word_assert: + ++m_position; + this->append_state(syntax_element_word_boundary); + break; + case regex_constants::escape_type_not_word_assert: + ++m_position; + this->append_state(syntax_element_within_word); + break; + case regex_constants::escape_type_Z: + ++m_position; + this->append_state(syntax_element_soft_buffer_end); + break; + case regex_constants::escape_type_Q: + return parse_QE(); + case regex_constants::escape_type_C: + return parse_match_any(); + case regex_constants::escape_type_X: + ++m_position; + this->append_state(syntax_element_combining); + break; + case regex_constants::escape_type_G: + ++m_position; + this->append_state(syntax_element_restart_continue); + break; + case regex_constants::escape_type_not_property: + negate = true; + BOOST_REGEX_FALLTHROUGH; + case regex_constants::escape_type_property: + { + ++m_position; + char_class_type m; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found."); + return false; + } + // maybe have \p{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + const charT* base = m_position; + // skip forward until we find enclosing brace: + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence."); + return false; + } + m = this->m_traits.lookup_classname(++base, m_position++); + } + else + { + m = this->m_traits.lookup_classname(m_position, m_position+1); + ++m_position; + } + if(m != 0) + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(m); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + return true; + } + fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name."); + return false; + } + case regex_constants::escape_type_reset_start_mark: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = -5; + pb->icase = this->flags() & regbase::icase; + this->m_pdata->m_data.align(); + ++m_position; + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_line_ending: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + const charT* e = get_escape_R_string(); + const charT* old_position = m_position; + const charT* old_end = m_end; + const charT* old_base = m_base; + m_position = e; + m_base = e; + m_end = e + traits::length(e); + bool r = parse_all(); + m_position = ++old_position; + m_end = old_end; + m_base = old_base; + return r; + } + goto escape_type_class_jump; + case regex_constants::escape_type_extended_backref: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + bool have_brace = false; + bool negative = false; + static const char incomplete_message[] = "Incomplete \\g escape found."; + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + // maybe have \g{ddd} + regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position); + regex_constants::syntax_type syn_end = 0; + if((syn == regex_constants::syntax_open_brace) + || (syn == regex_constants::escape_type_left_word) + || (syn == regex_constants::escape_type_end_buffer)) + { + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + have_brace = true; + switch(syn) + { + case regex_constants::syntax_open_brace: + syn_end = regex_constants::syntax_close_brace; + break; + case regex_constants::escape_type_left_word: + syn_end = regex_constants::escape_type_right_word; + break; + default: + syn_end = regex_constants::escape_type_end_buffer; + break; + } + } + negative = (*m_position == static_cast('-')); + if((negative) && (++m_position == m_end)) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + const charT* pc = m_position; + std::intmax_t i = this->m_traits.toi(pc, m_end, 10); + if((i < 0) && syn_end) + { + // Check for a named capture, get the leftmost one if there is more than one: + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end)) + { + ++m_position; + } + i = hash_value_from_capture_name(base, m_position); + pc = m_position; + } + if(negative) + i = 1 + (static_cast(m_mark_count) - i); + if(((i < hash_value_mask) && (i > 0) && (this->m_backrefs.test((std::size_t)i))) || ((i >= hash_value_mask) && (this->m_pdata->get_id((int)i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id((int)i))))) + { + m_position = pc; + re_brace* pb = static_cast(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = (int)i; + pb->icase = this->flags() & regbase::icase; + } + else + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + m_position = pc; + if(have_brace) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end)) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + ++m_position; + } + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_control_v: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + goto escape_type_class_jump; + BOOST_REGEX_FALLTHROUGH; + default: + this->append_literal(unescape_character()); + break; + } + return true; +} + +template +bool basic_regex_parser::parse_match_any() +{ + // + // we have a '.' that can match any character: + // + ++m_position; + static_cast( + this->append_state(syntax_element_wild, sizeof(re_dot)) + )->mask = static_cast(this->flags() & regbase::no_mod_s + ? BOOST_REGEX_DETAIL_NS::force_not_newline + : this->flags() & regbase::mod_s ? + BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care); + return true; +} + +template +bool basic_regex_parser::parse_repeat(std::size_t low, std::size_t high) +{ + bool greedy = true; + bool possessive = false; + std::size_t insert_point; + // + // when we get to here we may have a non-greedy ? mark still to come: + // + if((m_position != m_end) + && ( + (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex))) + ) + ) + { + // OK we have a perl or emacs regex, check for a '?': + if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x) + { + // whitespace skip: + while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + } + if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)) + { + greedy = false; + ++m_position; + } + // for perl regexes only check for possessive ++ repeats. + if((m_position != m_end) + && (0 == (this->flags() & regbase::main_option_type)) + && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus)) + { + possessive = true; + ++m_position; + } + } + if(0 == this->m_last_state) + { + fail(regex_constants::error_badrepeat, std::distance(m_base, m_position), "Nothing to repeat."); + return false; + } + if(this->m_last_state->type == syntax_element_endmark) + { + // insert a repeat before the '(' matching the last ')': + insert_point = this->m_paren_start; + } + else if((this->m_last_state->type == syntax_element_literal) && (static_cast(this->m_last_state)->length > 1)) + { + // the last state was a literal with more than one character, split it in two: + re_literal* lit = static_cast(this->m_last_state); + charT c = (static_cast(static_cast(lit+1)))[lit->length - 1]; + lit->length -= 1; + // now append new state: + lit = static_cast(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); + lit->length = 1; + (static_cast(static_cast(lit+1)))[0] = c; + insert_point = this->getoffset(this->m_last_state); + } + else + { + // repeat the last state whatever it was, need to add some error checking here: + switch(this->m_last_state->type) + { + case syntax_element_start_line: + case syntax_element_end_line: + case syntax_element_word_boundary: + case syntax_element_within_word: + case syntax_element_word_start: + case syntax_element_word_end: + case syntax_element_buffer_start: + case syntax_element_buffer_end: + case syntax_element_alt: + case syntax_element_soft_buffer_end: + case syntax_element_restart_continue: + case syntax_element_jump: + case syntax_element_startmark: + case syntax_element_backstep: + // can't legally repeat any of the above: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + default: + // do nothing... + break; + } + insert_point = this->getoffset(this->m_last_state); + } + // + // OK we now know what to repeat, so insert the repeat around it: + // + re_repeat* rep = static_cast(this->insert_state(insert_point, syntax_element_rep, re_repeater_size)); + rep->min = low; + rep->max = high; + rep->greedy = greedy; + rep->leading = false; + // store our repeater position for later: + std::ptrdiff_t rep_off = this->getoffset(rep); + // and append a back jump to the repeat: + re_jump* jmp = static_cast(this->append_state(syntax_element_jump, sizeof(re_jump))); + jmp->alt.i = rep_off - this->getoffset(jmp); + this->m_pdata->m_data.align(); + // now fill in the alt jump for the repeat: + rep = static_cast(this->getaddress(rep_off)); + rep->alt.i = this->m_pdata->m_data.size() - rep_off; + // + // If the repeat is possessive then bracket the repeat with a (?>...) + // independent sub-expression construct: + // + if(possessive) + { + if(m_position != m_end) + { + // + // Check for illegal following quantifier, we have to do this here, because + // the extra states we insert below circumvents our usual error checking :-( + // + bool contin = false; + do + { + if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x) + { + // whitespace skip: + while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + } + if (m_position != m_end) + { + switch (this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_star: + case regex_constants::syntax_plus: + case regex_constants::syntax_question: + case regex_constants::syntax_open_brace: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + case regex_constants::syntax_open_mark: + // Do we have a comment? If so we need to skip it here... + if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question + && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash) + { + while ((m_position != m_end) + && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) { + } + contin = true; + } + else + contin = false; + } + } + else + contin = false; + } while (contin); + } + re_brace* pb = static_cast(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + jmp = static_cast(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + } + return true; +} + +template +bool basic_regex_parser::parse_repeat_range(bool isbasic) +{ + static const char incomplete_message[] = "Missing } in quantified repetition."; + // + // parse a repeat-range: + // + std::size_t min, max; + std::intmax_t v; + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + // get min: + v = this->m_traits.toi(m_position, m_end, 10); + // skip whitespace: + if((v < 0) || (v > umax())) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + min = static_cast(v); + // see if we have a comma: + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) + { + // move on and error check: + ++m_position; + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + // get the value if any: + v = this->m_traits.toi(m_position, m_end, 10); + max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits::max)(); + } + else + { + // no comma, max = min: + max = min; + } + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + // OK now check trailing }: + if(this->m_position == this->m_end) + { + if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + if(isbasic) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape) + { + ++m_position; + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + } + else + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace) + ++m_position; + else + { + // Treat the opening '{' as a literal character, rewind to start of error: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; + return parse_literal(); + } + // + // finally go and add the repeat, unless error: + // + if(min > max) + { + // Backtrack to error location: + m_position -= 2; + while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position; + ++m_position; + fail(regex_constants::error_badbrace, m_position - m_base); + return false; + } + return parse_repeat(min, max); +} + +template +bool basic_regex_parser::parse_alt() +{ + // + // error check: if there have been no previous states, + // or if the last state was a '(' then error: + // + if( + ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark)) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) + { + fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |."); + return false; + } + // + // Reset mark count if required: + // + if(m_max_mark < m_mark_count) + m_max_mark = m_mark_count; + if(m_mark_reset >= 0) + m_mark_count = m_mark_reset; + + ++m_position; + // + // we need to append a trailing jump: + // + re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump)); + std::ptrdiff_t jump_offset = this->getoffset(pj); + // + // now insert the alternative: + // + re_alt* palt = static_cast(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size)); + jump_offset += re_alt_size; + this->m_pdata->m_data.align(); + palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt); + // + // update m_alt_insert_point so that the next alternate gets + // inserted at the start of the second of the two we've just created: + // + this->m_alt_insert_point = this->m_pdata->m_data.size(); + // + // the start of this alternative must have a case changes state + // if the current block has messed around with case changes: + // + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->m_icase; + } + // + // push the alternative onto our stack, a recursive + // implementation here is easier to understand (and faster + // as it happens), but causes all kinds of stack overflow problems + // on programs with small stacks (COM+). + // + m_alt_jumps.push_back(jump_offset); + return true; +} + +template +bool basic_regex_parser::parse_set() +{ + static const char incomplete_message[] = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content."; + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + basic_char_set char_set; + + const charT* base = m_position; // where the '[' was + const charT* item_base = m_position; // where the '[' or '^' was + + while(m_position != m_end) + { + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_caret: + if(m_position == base) + { + char_set.negate(); + ++m_position; + item_base = m_position; + } + else + parse_set_literal(char_set); + break; + case regex_constants::syntax_close_set: + if(m_position == item_base) + { + parse_set_literal(char_set); + break; + } + else + { + ++m_position; + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + } + return true; + case regex_constants::syntax_open_set: + if(parse_inner_set(char_set)) + break; + return true; + case regex_constants::syntax_escape: + { + // + // look ahead and see if this is a character class shortcut + // \d \w \s etc... + // + ++m_position; + if(this->m_traits.escape_syntax_type(*m_position) + == regex_constants::escape_type_class) + { + char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + char_set.add_class(m); + ++m_position; + break; + } + } + else if(this->m_traits.escape_syntax_type(*m_position) + == regex_constants::escape_type_not_class) + { + // negated character class: + char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + char_set.add_negated_class(m); + ++m_position; + break; + } + } + // not a character class, just a regular escape: + --m_position; + parse_set_literal(char_set); + break; + } + default: + parse_set_literal(char_set); + break; + } + } + return m_position != m_end; +} + +template +bool basic_regex_parser::parse_inner_set(basic_char_set& char_set) +{ + static const char incomplete_message[] = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content."; + // + // we have either a character class [:name:] + // a collating element [.name.] + // or an equivalence class [=name=] + // + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_dot: + // + // a collating element is treated as a literal: + // + --m_position; + parse_set_literal(char_set); + return true; + case regex_constants::syntax_colon: + { + // check that character classes are actually enabled: + if((this->flags() & (regbase::main_option_type | regbase::no_char_classes)) + == (regbase::basic_syntax_group | regbase::no_char_classes)) + { + --m_position; + parse_set_literal(char_set); + return true; + } + // skip the ':' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching ':]' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + // + // check for negated class: + // + bool negated = false; + if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret) + { + ++name_first; + negated = true; + } + typedef typename traits::char_class_type m_type; + m_type m = this->m_traits.lookup_classname(name_first, name_last); + if(m == 0) + { + if(char_set.empty() && (name_last - name_first == 1)) + { + // maybe a special case: + ++m_position; + if( (m_position != m_end) + && (this->m_traits.syntax_type(*m_position) + == regex_constants::syntax_close_set)) + { + if(this->m_traits.escape_syntax_type(*name_first) + == regex_constants::escape_type_left_word) + { + ++m_position; + this->append_state(syntax_element_word_start); + return false; + } + if(this->m_traits.escape_syntax_type(*name_first) + == regex_constants::escape_type_right_word) + { + ++m_position; + this->append_state(syntax_element_word_end); + return false; + } + } + } + fail(regex_constants::error_ctype, name_first - m_base); + return false; + } + if(!negated) + char_set.add_class(m); + else + char_set.add_negated_class(m); + ++m_position; + break; + } + case regex_constants::syntax_equal: + { + // skip the '=' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching '=]' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + string_type m = this->m_traits.lookup_collatename(name_first, name_last); + if(m.empty() || (m.size() > 2)) + { + fail(regex_constants::error_collate, name_first - m_base); + return false; + } + digraph d; + d.first = m[0]; + if(m.size() > 1) + d.second = m[1]; + else + d.second = 0; + char_set.add_equivalent(d); + ++m_position; + break; + } + default: + --m_position; + parse_set_literal(char_set); + break; + } + return true; +} + +template +void basic_regex_parser::parse_set_literal(basic_char_set& char_set) +{ + digraph start_range(get_next_set_literal(char_set)); + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) + { + // we have a range: + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set) + { + digraph end_range = get_next_set_literal(char_set); + char_set.add_range(start_range, end_range); + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) + { + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set) + { + // trailing - : + --m_position; + return; + } + fail(regex_constants::error_range, m_position - m_base); + return; + } + return; + } + --m_position; + } + char_set.add_single(start_range); +} + +template +digraph basic_regex_parser::get_next_set_literal(basic_char_set& char_set) +{ + digraph result; + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_dash: + if(!char_set.empty()) + { + // see if we are at the end of the set: + if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_range, m_position - m_base); + return result; + } + --m_position; + } + result.first = *m_position++; + return result; + case regex_constants::syntax_escape: + // check to see if escapes are supported first: + if(this->flags() & regex_constants::no_escape_in_lists) + { + result = *m_position++; + break; + } + ++m_position; + result = unescape_character(); + break; + case regex_constants::syntax_open_set: + { + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, m_position - m_base); + return result; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot) + { + --m_position; + result.first = *m_position; + ++m_position; + return result; + } + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, m_position - m_base); + return result; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching ':]' + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + ++m_position; + string_type s = this->m_traits.lookup_collatename(name_first, name_last); + if(s.empty() || (s.size() > 2)) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + result.first = s[0]; + if(s.size() > 1) + result.second = s[1]; + else + result.second = 0; + return result; + } + default: + result = *m_position++; + } + return result; +} + +// +// does a value fit in the specified charT type? +// +template +bool valid_value(charT, std::intmax_t v, const std::integral_constant&) +{ + return (v >> (sizeof(charT) * CHAR_BIT)) == 0; +} +template +bool valid_value(charT, std::intmax_t, const std::integral_constant&) +{ + return true; // v will alsways fit in a charT +} +template +bool valid_value(charT c, std::intmax_t v) +{ + return valid_value(c, v, std::integral_constant()); +} + +template +charT basic_regex_parser::unescape_character() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + charT result(0); + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely."); + return false; + } + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::escape_type_control_a: + result = charT('\a'); + break; + case regex_constants::escape_type_e: + result = charT(27); + break; + case regex_constants::escape_type_control_f: + result = charT('\f'); + break; + case regex_constants::escape_type_control_n: + result = charT('\n'); + break; + case regex_constants::escape_type_control_r: + result = charT('\r'); + break; + case regex_constants::escape_type_control_t: + result = charT('\t'); + break; + case regex_constants::escape_type_control_v: + result = charT('\v'); + break; + case regex_constants::escape_type_word_assert: + result = charT('\b'); + break; + case regex_constants::escape_type_ascii_control: + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely."); + return result; + } + result = static_cast(*m_position % 32); + break; + case regex_constants::escape_type_hex: + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely."); + return result; + } + // maybe have \x{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence."); + return result; + } + std::intmax_t i = this->m_traits.toi(m_position, m_end, 16); + if((m_position == m_end) + || (i < 0) + || ((std::numeric_limits::is_specialized) && (i > (std::intmax_t)(std::numeric_limits::max)())) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid."); + return result; + } + ++m_position; + result = charT(i); + } + else + { + std::ptrdiff_t len = (std::min)(static_cast(2), static_cast(m_end - m_position)); + std::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16); + if((i < 0) + || !valid_value(charT(0), i)) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character."); + return result; + } + result = charT(i); + } + return result; + case regex_constants::syntax_digit: + { + // an octal escape sequence, the first character must be a zero + // followed by up to 3 octal digits: + std::ptrdiff_t len = (std::min)(std::distance(m_position, m_end), static_cast(4)); + const charT* bp = m_position; + std::intmax_t val = this->m_traits.toi(bp, bp + 1, 8); + if(val != 0) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + // Oops not an octal escape after all: + fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence."); + return result; + } + val = this->m_traits.toi(m_position, m_position + len, 8); + if((val < 0) || (val > (std::intmax_t)(std::numeric_limits::max)())) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid."); + return result; + } + return static_cast(val); + } + case regex_constants::escape_type_named_char: + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + // maybe have \N{name} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + const charT* base = m_position; + // skip forward until we find enclosing brace: + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + string_type s = this->m_traits.lookup_collatename(++base, m_position++); + if(s.empty()) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_collate, m_position - m_base); + return false; + } + if(s.size() == 1) + { + return s[0]; + } + } + // fall through is a failure: + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + default: + result = *m_position; + break; + } + ++m_position; + return result; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool basic_regex_parser::parse_backref() +{ + BOOST_REGEX_ASSERT(m_position != m_end); + const charT* pc = m_position; + std::intmax_t i = this->m_traits.toi(pc, pc + 1, 10); + if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs))) + { + // not a backref at all but an octal escape sequence: + charT c = unescape_character(); + this->append_literal(c); + } + else if((i > 0) && (this->m_backrefs.test((std::size_t)i))) + { + m_position = pc; + re_brace* pb = static_cast(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = (int)i; + pb->icase = this->flags() & regbase::icase; + } + else + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + return true; +} + +template +bool basic_regex_parser::parse_QE() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + // + // parse a \Q...\E sequence: + // + ++m_position; // skip the Q + const charT* start = m_position; + const charT* end; + do + { + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape)) + ++m_position; + if(m_position == m_end) + { + // a \Q...\E sequence may terminate with the end of the expression: + end = m_position; + break; + } + if(++m_position == m_end) // skip the escape + { + fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence."); + return false; + } + // check to see if it's a \E: + if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E) + { + ++m_position; + end = m_position - 2; + break; + } + // otherwise go round again: + }while(true); + // + // now add all the character between the two escapes as literals: + // + while(start != end) + { + this->append_literal(*start); + ++start; + } + return true; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool basic_regex_parser::parse_perl_extension() +{ + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + // + // treat comments as a special case, as these + // are the only ones that don't start with a leading + // startmark state: + // + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash) + { + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) + {} + return true; + } + // + // backup some state, and prepare the way: + // + int markid = 0; + std::ptrdiff_t jump_offset = 0; + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->icase = this->flags() & regbase::icase; + std::ptrdiff_t last_paren_start = this->getoffset(pb); + // back up insertion point for alternations, and set new point: + std::ptrdiff_t last_alt_point = m_alt_insert_point; + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + std::ptrdiff_t expected_alt_point = m_alt_insert_point; + bool restore_flags = true; + regex_constants::syntax_option_type old_flags = this->flags(); + bool old_case_change = m_has_case_change; + m_has_case_change = false; + charT name_delim; + int mark_reset = m_mark_reset; + int max_mark = m_max_mark; + m_mark_reset = -1; + m_max_mark = m_mark_count; + std::intmax_t v; + // + // select the actual extension used: + // + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_or: + m_mark_reset = m_mark_count; + BOOST_REGEX_FALLTHROUGH; + case regex_constants::syntax_colon: + // + // a non-capturing mark: + // + pb->index = markid = 0; + ++m_position; + break; + case regex_constants::syntax_digit: + { + // + // a recursive subexpression: + // + v = this->m_traits.toi(m_position, m_end, 10); + if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated."); + return false; + } +insert_recursion: + pb->index = markid = 0; + re_recurse* pr = static_cast(this->append_state(syntax_element_recurse, sizeof(re_recurse))); + pr->alt.i = (std::ptrdiff_t)v; + pr->state_id = 0; + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->flags() & regbase::icase; + break; + } + case regex_constants::syntax_plus: + // + // A forward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + if ((std::numeric_limits::max)() - m_mark_count < v) + { + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + v += m_mark_count; + goto insert_recursion; + case regex_constants::syntax_dash: + // + // Possibly a backward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if(v <= 0) + { + --m_position; + // Oops not a relative recursion at all, but a (?-imsx) group: + goto option_group_jump; + } + v = static_cast(m_mark_count) + 1 - v; + if(v <= 0) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + goto insert_recursion; + case regex_constants::syntax_equal: + pb->index = markid = -1; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::syntax_not: + pb->index = markid = -2; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::escape_type_left_word: + { + // a lookbehind assertion: + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position); + if(t == regex_constants::syntax_not) + pb->index = markid = -2; + else if(t == regex_constants::syntax_equal) + pb->index = markid = -1; + else + { + // Probably a named capture which also starts (?< : + name_delim = '>'; + --m_position; + goto named_capture_jump; + } + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->append_state(syntax_element_backstep, sizeof(re_brace)); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + } + case regex_constants::escape_type_right_word: + // + // an independent sub-expression: + // + pb->index = markid = -3; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::syntax_open_mark: + { + // a conditional expression: + pb->index = markid = -4; + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = this->m_traits.toi(m_position, m_end, 10); + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('R')) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('&')) + { + const charT* base = ++m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = -static_cast(hash_value_from_capture_name(base, m_position)); + } + else + { + v = -this->m_traits.toi(m_position, m_end, 10); + } + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v < 0 ? (int)(v - 1) : 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if((*m_position == charT('\'')) || (*m_position == charT('<'))) + { + const charT* base = ++m_position; + while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\''))) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = (int)v; + if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture."); + return false; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if(*m_position == charT('D')) + { + const char* def = "DEFINE"; + while(*def && (m_position != m_end) && (*m_position == charT(*def))) + ++m_position, ++def; + if((m_position == m_end) || *def) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = 9999; // special magic value! + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if(v > 0) + { + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = (int)v; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else + { + // verify that we have a lookahead or lookbehind assert: + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + m_position -= 3; + } + else + { + if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + m_position -= 2; + } + } + break; + } + case regex_constants::syntax_close_mark: + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + case regex_constants::escape_type_end_buffer: + { + name_delim = *m_position; +named_capture_jump: + markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair(std::distance(m_base, m_position) - 2, 0)); + } + pb->index = markid; + const charT* base = ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + while((m_position != m_end) && (*m_position != name_delim)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + this->m_pdata->set_name(base, m_position, markid); + ++m_position; + break; + } + default: + if(*m_position == charT('R')) + { + ++m_position; + v = 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + goto insert_recursion; + } + if(*m_position == charT('&')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + if(*m_position == charT('P')) + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('>')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + } + // + // lets assume that we have a (?imsx) group and try and parse it: + // +option_group_jump: + regex_constants::syntax_option_type opts = parse_options(); + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + // make a note of whether we have a case change: + m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase)); + pb->index = markid = 0; + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) + { + // update flags and carry on as normal: + this->flags(opts); + restore_flags = false; + old_case_change |= m_has_case_change; // defer end of scope by one ')' + } + else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon) + { + // update flags and carry on until the matching ')' is found: + this->flags(opts); + ++m_position; + } + else + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + + // finally append a case change state if we need it: + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = opts & regbase::icase; + } + + } + // + // now recursively add more states, this will terminate when we get to a + // matching ')' : + // + parse_all(); + // + // Unwind alternatives: + // + if(0 == unwind_alts(last_paren_start)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block."); + return false; + } + // + // we either have a ')' or we have run out of characters prematurely: + // + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + this->fail(regex_constants::error_paren, std::distance(m_base, m_end)); + return false; + } + BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); + ++m_position; + // + // restore the flags: + // + if(restore_flags) + { + // append a case change state if we need it: + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = old_flags & regbase::icase; + } + this->flags(old_flags); + } + // + // set up the jump pointer if we have one: + // + if(jump_offset) + { + this->m_pdata->m_data.align(); + re_jump* jmp = static_cast(this->getaddress(jump_offset)); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + if((this->m_last_state == jmp) && (markid != -2)) + { + // Oops... we didn't have anything inside the assertion. + // Note we don't get here for negated forward lookahead as (?!) + // does have some uses. + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion."); + return false; + } + } + // + // verify that if this is conditional expression, that we do have + // an alternative, if not add one: + // + if(markid == -4) + { + re_syntax_base* b = this->getaddress(expected_alt_point); + // Make sure we have exactly one alternative following this state: + if(b->type != syntax_element_alt) + { + re_alt* alt = static_cast(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt))); + alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt); + } + else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast(b)->alt.i + this->getoffset(b))) && (static_cast(b)->alt.i > 0) && this->getaddress(static_cast(b)->alt.i, b)->type == syntax_element_alt) + { + // Can't have seen more than one alternative: + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression."); + return false; + } + else + { + // We must *not* have seen an alternative inside a (DEFINE) block: + b = this->getaddress(b->next.i, b); + if((b->type == syntax_element_assert_backref) && (static_cast(b)->index == 9999)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block."); + return false; + } + } + // check for invalid repetition of next state: + b = this->getaddress(expected_alt_point); + b = this->getaddress(static_cast(b)->next.i, b); + if((b->type != syntax_element_assert_backref) + && (b->type != syntax_element_startmark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion."); + return false; + } + } + // + // append closing parenthesis state: + // + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + this->m_paren_start = last_paren_start; + // + // restore the alternate insertion point: + // + this->m_alt_insert_point = last_alt_point; + // + // and the case change data: + // + m_has_case_change = old_case_change; + // + // And the mark_reset data: + // + if(m_max_mark > m_mark_count) + { + m_mark_count = m_max_mark; + } + m_mark_reset = mark_reset; + m_max_mark = max_mark; + + + if(markid > 0) + { + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1; + // + // allow backrefs to this mark: + // + this->m_backrefs.set(markid); + } + return true; +} + +template +bool basic_regex_parser::match_verb(const char* verb) +{ + while(*verb) + { + if(static_cast(*verb) != *m_position) + { + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++verb; + } + return true; +} + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable:26812) +#endif +#endif +template +bool basic_regex_parser::parse_perl_verb() +{ + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + switch(*m_position) + { + case 'F': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + this->append_state(syntax_element_fail); + return true; + } + break; + case 'A': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("CCEPT")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + this->append_state(syntax_element_accept); + return true; + } + break; + case 'C': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("OMMIT")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit; + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + case 'P': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("RUNE")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune; + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + case 'S': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("KIP")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip; + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + case 'T': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("HEN")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + this->append_state(syntax_element_then); + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + } + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; +} +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +template +bool basic_regex_parser::add_emacs_code(bool negate) +{ + // + // parses an emacs style \sx or \Sx construct. + // + if(++m_position == m_end) + { + // Rewind to start of sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + basic_char_set char_set; + if(negate) + char_set.negate(); + + static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', }; + + switch(*m_position) + { + case 's': + case ' ': + char_set.add_class(this->m_mask_space); + break; + case 'w': + char_set.add_class(this->m_word_mask); + break; + case '_': + char_set.add_single(digraph(charT('$'))); + char_set.add_single(digraph(charT('&'))); + char_set.add_single(digraph(charT('*'))); + char_set.add_single(digraph(charT('+'))); + char_set.add_single(digraph(charT('-'))); + char_set.add_single(digraph(charT('_'))); + char_set.add_single(digraph(charT('<'))); + char_set.add_single(digraph(charT('>'))); + break; + case '.': + char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5)); + break; + case '(': + char_set.add_single(digraph(charT('('))); + char_set.add_single(digraph(charT('['))); + char_set.add_single(digraph(charT('{'))); + break; + case ')': + char_set.add_single(digraph(charT(')'))); + char_set.add_single(digraph(charT(']'))); + char_set.add_single(digraph(charT('}'))); + break; + case '"': + char_set.add_single(digraph(charT('"'))); + char_set.add_single(digraph(charT('\''))); + char_set.add_single(digraph(charT('`'))); + break; + case '\'': + char_set.add_single(digraph(charT('\''))); + char_set.add_single(digraph(charT(','))); + char_set.add_single(digraph(charT('#'))); + break; + case '<': + char_set.add_single(digraph(charT(';'))); + break; + case '>': + char_set.add_single(digraph(charT('\n'))); + char_set.add_single(digraph(charT('\f'))); + break; + default: + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; +} + +template +regex_constants::syntax_option_type basic_regex_parser::parse_options() +{ + // we have a (?imsx-imsx) group, convert it into a set of flags: + regex_constants::syntax_option_type f = this->flags(); + bool breakout = false; + do + { + switch(*m_position) + { + case 's': + f |= regex_constants::mod_s; + f &= ~regex_constants::no_mod_s; + break; + case 'm': + f &= ~regex_constants::no_mod_m; + break; + case 'i': + f |= regex_constants::icase; + break; + case 'x': + f |= regex_constants::mod_x; + break; + default: + breakout = true; + continue; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + } + while(!breakout); + + breakout = false; + + if(*m_position == static_cast('-')) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + do + { + switch(*m_position) + { + case 's': + f &= ~regex_constants::mod_s; + f |= regex_constants::no_mod_s; + break; + case 'm': + f |= regex_constants::no_mod_m; + break; + case 'i': + f &= ~regex_constants::icase; + break; + case 'x': + f &= ~regex_constants::mod_x; + break; + default: + breakout = true; + continue; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + } + while(!breakout); + } + return f; +} + +template +bool basic_regex_parser::unwind_alts(std::ptrdiff_t last_paren_start) +{ + // + // If we didn't actually add any states after the last + // alternative then that's an error: + // + if((this->m_alt_insert_point == static_cast(this->m_pdata->m_data.size())) + && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) + { + fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |."); + return false; + } + // + // Fix up our alternatives: + // + while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)) + { + // + // fix up the jump to point to the end of the states + // that we've just added: + // + std::ptrdiff_t jump_offset = m_alt_jumps.back(); + m_alt_jumps.pop_back(); + this->m_pdata->m_data.align(); + re_jump* jmp = static_cast(this->getaddress(jump_offset)); + BOOST_REGEX_ASSERT(jmp->type == syntax_element_jump); + jmp->alt.i = this->m_pdata->m_data.size() - jump_offset; + } + return true; +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#endif diff --git a/include/boost/regex/v5/c_regex_traits.hpp b/include/boost/regex/v5/c_regex_traits.hpp new file mode 100644 index 00000000..9de34632 --- /dev/null +++ b/include/boost/regex/v5/c_regex_traits.hpp @@ -0,0 +1,474 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE c_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class that wraps the global C locale. + */ + +#ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_C_REGEX_TRAITS_HPP_INCLUDED + +#include +#include +#include + +namespace boost{ + + namespace BOOST_REGEX_DETAIL_NS { + + enum + { + char_class_space = 1 << 0, + char_class_print = 1 << 1, + char_class_cntrl = 1 << 2, + char_class_upper = 1 << 3, + char_class_lower = 1 << 4, + char_class_alpha = 1 << 5, + char_class_digit = 1 << 6, + char_class_punct = 1 << 7, + char_class_xdigit = 1 << 8, + char_class_alnum = char_class_alpha | char_class_digit, + char_class_graph = char_class_alnum | char_class_punct, + char_class_blank = 1 << 9, + char_class_word = 1 << 10, + char_class_unicode = 1 << 11, + char_class_horizontal = 1 << 12, + char_class_vertical = 1 << 13 + }; + + } + +template +struct c_regex_traits; + +template<> +struct c_regex_traits +{ + c_regex_traits(){} + typedef char char_type; + typedef std::size_t size_type; + typedef std::string string_type; + struct locale_type{}; + typedef std::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::strlen)(p); + } + + char translate(char c) const + { + return c; + } + char translate_nocase(char c) const + { + return static_cast((std::tolower)(static_cast(c))); + } + + static string_type transform(const char* p1, const char* p2); + static string_type transform_primary(const char* p1, const char* p2); + + static char_class_type lookup_classname(const char* p1, const char* p2); + static string_type lookup_collatename(const char* p1, const char* p2); + + static bool isctype(char, char_class_type); + static int value(char, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#ifndef BOOST_NO_WREGEX +template<> +struct c_regex_traits +{ + c_regex_traits(){} + typedef wchar_t char_type; + typedef std::size_t size_type; + typedef std::wstring string_type; + struct locale_type{}; + typedef std::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::wcslen)(p); + } + + wchar_t translate(wchar_t c) const + { + return c; + } + wchar_t translate_nocase(wchar_t c) const + { + return (std::towlower)(c); + } + + static string_type transform(const wchar_t* p1, const wchar_t* p2); + static string_type transform_primary(const wchar_t* p1, const wchar_t* p2); + + static char_class_type lookup_classname(const wchar_t* p1, const wchar_t* p2); + static string_type lookup_collatename(const wchar_t* p1, const wchar_t* p2); + + static bool isctype(wchar_t, char_class_type); + static int value(wchar_t, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#endif // BOOST_NO_WREGEX + +inline c_regex_traits::string_type c_regex_traits::transform(const char* p1, const char* p2) +{ + std::string result(10, ' '); + std::size_t s = result.size(); + std::size_t r; + std::string src(p1, p2); + while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s))) + { +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::strxfrm, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if (r == INT_MAX) + { + result.erase(); + result.insert(result.begin(), static_cast(0)); + return result; + } +#endif + result.append(r - s + 3, ' '); + s = result.size(); + } + result.erase(r); + return result; +} + +inline c_regex_traits::string_type c_regex_traits::transform_primary(const char* p1, const char* p2) +{ + static char s_delim; + static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast*>(0), &s_delim); + std::string result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch (s_collate_type) + { + case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: + case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + for (std::string::size_type i = 0; i < result.size(); ++i) + result[i] = static_cast((std::tolower)(static_cast(result[i]))); + result = transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: + { + // get a regular sort key, and then truncate it: + result = transform(p1, p2); + result.erase(s_delim); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result = transform(p1, p2); + if ((!result.empty()) && (result[0] == s_delim)) + break; + std::size_t i; + for (i = 0; i < result.size(); ++i) + { + if (result[i] == s_delim) + break; + } + result.erase(i); + break; + } + if (result.empty()) + result = std::string(1, char(0)); + return result; +} + +inline c_regex_traits::char_class_type c_regex_traits::lookup_classname(const char* p1, const char* p2) +{ + using namespace BOOST_REGEX_DETAIL_NS; + static const char_class_type masks[] = + { + 0, + char_class_alnum, + char_class_alpha, + char_class_blank, + char_class_cntrl, + char_class_digit, + char_class_digit, + char_class_graph, + char_class_horizontal, + char_class_lower, + char_class_lower, + char_class_print, + char_class_punct, + char_class_space, + char_class_space, + char_class_upper, + char_class_unicode, + char_class_upper, + char_class_vertical, + char_class_alnum | char_class_word, + char_class_alnum | char_class_word, + char_class_xdigit, + }; + + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx < 0) + { + std::string s(p1, p2); + for (std::string::size_type i = 0; i < s.size(); ++i) + s[i] = static_cast((std::tolower)(static_cast(s[i]))); + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + } + BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0])); + return masks[idx + 1]; +} + +inline bool c_regex_traits::isctype(char c, char_class_type mask) +{ + using namespace BOOST_REGEX_DETAIL_NS; + return + ((mask & char_class_space) && (std::isspace)(static_cast(c))) + || ((mask & char_class_print) && (std::isprint)(static_cast(c))) + || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast(c))) + || ((mask & char_class_upper) && (std::isupper)(static_cast(c))) + || ((mask & char_class_lower) && (std::islower)(static_cast(c))) + || ((mask & char_class_alpha) && (std::isalpha)(static_cast(c))) + || ((mask & char_class_digit) && (std::isdigit)(static_cast(c))) + || ((mask & char_class_punct) && (std::ispunct)(static_cast(c))) + || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast(c))) + || ((mask & char_class_blank) && (std::isspace)(static_cast(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) + || ((mask & char_class_word) && (c == '_')) + || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v'))) + || ((mask & char_class_horizontal) && (std::isspace)(static_cast(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v')); +} + +inline c_regex_traits::string_type c_regex_traits::lookup_collatename(const char* p1, const char* p2) +{ + std::string s(p1, p2); + s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s); + if (s.empty() && (p2 - p1 == 1)) + s.append(1, *p1); + return s; +} + +inline int c_regex_traits::value(char c, int radix) +{ + char b[2] = { c, '\0', }; + char* ep; + int result = std::strtol(b, &ep, radix); + if (ep == b) + return -1; + return result; +} + +#ifndef BOOST_NO_WREGEX + +inline c_regex_traits::string_type c_regex_traits::transform(const wchar_t* p1, const wchar_t* p2) +{ + std::size_t r; + std::size_t s = 10; + std::wstring src(p1, p2); + std::wstring result(s, L' '); + while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s))) + { +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::strxfrm, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if (r == INT_MAX) + { + result.erase(); + result.insert(result.begin(), static_cast(0)); + return result; + } +#endif + result.append(r - s + 3, L' '); + s = result.size(); + } + result.erase(r); + return result; +} + +inline c_regex_traits::string_type c_regex_traits::transform_primary(const wchar_t* p1, const wchar_t* p2) +{ + static wchar_t s_delim; + static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast*>(0), &s_delim); + std::wstring result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch (s_collate_type) + { + case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: + case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + for (std::wstring::size_type i = 0; i < result.size(); ++i) + result[i] = (std::towlower)(result[i]); + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: + { + // get a regular sort key, and then truncate it: + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + result.erase(s_delim); + break; + } + case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + if ((!result.empty()) && (result[0] == s_delim)) + break; + std::size_t i; + for (i = 0; i < result.size(); ++i) + { + if (result[i] == s_delim) + break; + } + result.erase(i); + break; + } + if (result.empty()) + result = std::wstring(1, char(0)); + return result; +} + +inline c_regex_traits::char_class_type c_regex_traits::lookup_classname(const wchar_t* p1, const wchar_t* p2) +{ + using namespace BOOST_REGEX_DETAIL_NS; + static const char_class_type masks[] = + { + 0, + char_class_alnum, + char_class_alpha, + char_class_blank, + char_class_cntrl, + char_class_digit, + char_class_digit, + char_class_graph, + char_class_horizontal, + char_class_lower, + char_class_lower, + char_class_print, + char_class_punct, + char_class_space, + char_class_space, + char_class_upper, + char_class_unicode, + char_class_upper, + char_class_vertical, + char_class_alnum | char_class_word, + char_class_alnum | char_class_word, + char_class_xdigit, + }; + + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx < 0) + { + std::wstring s(p1, p2); + for (std::wstring::size_type i = 0; i < s.size(); ++i) + s[i] = (std::towlower)(s[i]); + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + } + BOOST_REGEX_ASSERT(idx + 1 < static_cast(sizeof(masks) / sizeof(masks[0]))); + return masks[idx + 1]; +} + +inline bool c_regex_traits::isctype(wchar_t c, char_class_type mask) +{ + using namespace BOOST_REGEX_DETAIL_NS; + return + ((mask & char_class_space) && (std::iswspace)(c)) + || ((mask & char_class_print) && (std::iswprint)(c)) + || ((mask & char_class_cntrl) && (std::iswcntrl)(c)) + || ((mask & char_class_upper) && (std::iswupper)(c)) + || ((mask & char_class_lower) && (std::iswlower)(c)) + || ((mask & char_class_alpha) && (std::iswalpha)(c)) + || ((mask & char_class_digit) && (std::iswdigit)(c)) + || ((mask & char_class_punct) && (std::iswpunct)(c)) + || ((mask & char_class_xdigit) && (std::iswxdigit)(c)) + || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) + || ((mask & char_class_word) && (c == '_')) + || ((mask & char_class_unicode) && (c & ~static_cast(0xff))) + || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v'))) + || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v')); +} + +inline c_regex_traits::string_type c_regex_traits::lookup_collatename(const wchar_t* p1, const wchar_t* p2) +{ + std::string name; + // Usual msvc warning suppression does not work here with std::string template constructor.... use a workaround instead: + for (const wchar_t* pos = p1; pos != p2; ++pos) + name.push_back((char)*pos); + name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name); + if (!name.empty()) + return string_type(name.begin(), name.end()); + if (p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +inline int c_regex_traits::value(wchar_t c, int radix) +{ +#ifdef BOOST_BORLANDC + // workaround for broken wcstol: + if ((std::iswxdigit)(c) == 0) + return -1; +#endif + wchar_t b[2] = { c, '\0', }; + wchar_t* ep; + int result = std::wcstol(b, &ep, radix); + if (ep == b) + return -1; + return result; +} + +#endif + +} + +#endif + + + diff --git a/include/boost/regex/v5/char_regex_traits.hpp b/include/boost/regex/v5/char_regex_traits.hpp new file mode 100644 index 00000000..aeed86f8 --- /dev/null +++ b/include/boost/regex/v5/char_regex_traits.hpp @@ -0,0 +1,59 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE char_regex_traits.cpp + * VERSION see + * DESCRIPTION: Declares deprecated traits classes char_regex_traits<>. + */ + + +#ifndef BOOST_REGEX_V5_CHAR_REGEX_TRAITS_HPP +#define BOOST_REGEX_V5_CHAR_REGEX_TRAITS_HPP + +namespace boost{ + +namespace deprecated{ +// +// class char_regex_traits_i +// provides case insensitive traits classes (deprecated): +template +class char_regex_traits_i : public regex_traits {}; + +template<> +class char_regex_traits_i : public regex_traits +{ +public: + typedef char char_type; + typedef unsigned char uchar_type; + typedef unsigned int size_type; + typedef regex_traits base_type; + +}; + +#ifndef BOOST_NO_WREGEX +template<> +class char_regex_traits_i : public regex_traits +{ +public: + typedef wchar_t char_type; + typedef unsigned short uchar_type; + typedef unsigned int size_type; + typedef regex_traits base_type; + +}; +#endif +} // namespace deprecated +} // namespace boost + +#endif // include + diff --git a/include/boost/regex/v5/cpp_regex_traits.hpp b/include/boost/regex/v5/cpp_regex_traits.hpp new file mode 100644 index 00000000..26b6f503 --- /dev/null +++ b/include/boost/regex/v5/cpp_regex_traits.hpp @@ -0,0 +1,1040 @@ +/* + * + * Copyright (c) 2004 John Maddock + * Copyright 2011 Garmin Ltd. or its subsidiaries + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cpp_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class cpp_regex_traits. + */ + +#ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED + +#include +#include +#include +#include + +#include +#include + +#ifdef BOOST_HAS_THREADS +#include +#endif +#include +#include + +#include +#include +#include + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4786 4251) +#endif + +namespace boost{ + +// +// forward declaration is needed by some compilers: +// +template +class cpp_regex_traits; + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// class parser_buf: +// acts as a stream buffer which wraps around a pair of pointers: +// +template > +class parser_buf : public ::std::basic_streambuf +{ + typedef ::std::basic_streambuf base_type; + typedef typename base_type::int_type int_type; + typedef typename base_type::char_type char_type; + typedef typename base_type::pos_type pos_type; + typedef ::std::streamsize streamsize; + typedef typename base_type::off_type off_type; +public: + parser_buf() : base_type() { setbuf(0, 0); } + const charT* getnext() { return this->gptr(); } +protected: + std::basic_streambuf* setbuf(char_type* s, streamsize n) override; + typename parser_buf::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which) override; + typename parser_buf::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) override; +private: + parser_buf& operator=(const parser_buf&); + parser_buf(const parser_buf&); +}; + +template +std::basic_streambuf* +parser_buf::setbuf(char_type* s, streamsize n) +{ + this->setg(s, s, s + n); + return this; +} + +template +typename parser_buf::pos_type +parser_buf::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + std::ptrdiff_t size = this->egptr() - this->eback(); + std::ptrdiff_t pos = this->gptr() - this->eback(); + charT* g = this->eback(); + switch(static_cast(way)) + { + case ::std::ios_base::beg: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + off, g + size); + break; + case ::std::ios_base::end: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + size - off, g + size); + break; + case ::std::ios_base::cur: + { + std::ptrdiff_t newpos = static_cast(pos + off); + if((newpos < 0) || (newpos > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + newpos, g + size); + break; + } + default: ; + } +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4244) +#endif + return static_cast(this->gptr() - this->eback()); +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +typename parser_buf::pos_type +parser_buf::seekpos(pos_type sp, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + off_type size = static_cast(this->egptr() - this->eback()); + charT* g = this->eback(); + if(off_type(sp) <= size) + { + this->setg(g, g + off_type(sp), g + size); + } + return pos_type(off_type(-1)); +} + +// +// class cpp_regex_traits_base: +// acts as a container for locale and the facets we are using. +// +template +struct cpp_regex_traits_base +{ + cpp_regex_traits_base(const std::locale& l) + { (void)imbue(l); } + std::locale imbue(const std::locale& l); + + std::locale m_locale; + std::ctype const* m_pctype; + std::messages const* m_pmessages; + std::collate const* m_pcollate; + + bool operator<(const cpp_regex_traits_base& b)const + { + if(m_pctype == b.m_pctype) + { + if(m_pmessages == b.m_pmessages) + { + return m_pcollate < b.m_pcollate; + } + return m_pmessages < b.m_pmessages; + } + return m_pctype < b.m_pctype; + } + bool operator==(const cpp_regex_traits_base& b)const + { + return (m_pctype == b.m_pctype) + && (m_pmessages == b.m_pmessages) + && (m_pcollate == b.m_pcollate); + } +}; + +template +std::locale cpp_regex_traits_base::imbue(const std::locale& l) +{ + std::locale result(m_locale); + m_locale = l; + m_pctype = &std::use_facet>(l); + m_pmessages = std::has_facet >(l) ? &std::use_facet >(l) : 0; + m_pcollate = &std::use_facet >(l); + return result; +} + +// +// class cpp_regex_traits_char_layer: +// implements methods that require specialization for narrow characters: +// +template +class cpp_regex_traits_char_layer : public cpp_regex_traits_base +{ + typedef std::basic_string string_type; + typedef std::map map_type; + typedef typename map_type::const_iterator map_iterator_type; +public: + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base& b) + : cpp_regex_traits_base(b) + { + init(); + } + void init(); + + regex_constants::syntax_type syntax_type(charT c)const + { + map_iterator_type i = m_char_map.find(c); + return ((i == m_char_map.end()) ? 0 : i->second); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + map_iterator_type i = m_char_map.find(c); + if(i == m_char_map.end()) + { + if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class; + if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class; + return 0; + } + return i->second; + } + +private: + string_type get_default_message(regex_constants::syntax_type); + // TODO: use a hash table when available! + map_type m_char_map; +}; + +template +void cpp_regex_traits_char_layer::init() +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: +#ifndef __IBMCPP__ + typename std::messages::catalog cat = static_cast::catalog>(-1); +#else + typename std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if((!cat_name.empty()) && (this->m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i)); + for(typename string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[mss[j]] = i; + } + } + this->m_pmessages->close(cat); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + if(this->m_pmessages) + this->m_pmessages->close(cat); + throw; + } +#endif + } + else + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + m_char_map[this->m_pctype->widen(*ptr)] = i; + ++ptr; + } + } + } +} + +template +typename cpp_regex_traits_char_layer::string_type + cpp_regex_traits_char_layer::get_default_message(regex_constants::syntax_type i) +{ + const char* ptr = get_default_syntax(i); + string_type result; + while(ptr && *ptr) + { + result.append(1, this->m_pctype->widen(*ptr)); + ++ptr; + } + return result; +} + +// +// specialized version for narrow characters: +// +template <> +class cpp_regex_traits_char_layer : public cpp_regex_traits_base +{ + typedef std::string string_type; +public: + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base& l) + : cpp_regex_traits_base(l) + { + init(); + } + + regex_constants::syntax_type syntax_type(char c)const + { + return m_char_map[static_cast(c)]; + } + regex_constants::escape_syntax_type escape_syntax_type(char c) const + { + return m_char_map[static_cast(c)]; + } + +private: + regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; + void init(); +}; + +// +// class cpp_regex_traits_implementation: +// provides pimpl implementation for cpp_regex_traits. +// +template +class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer +{ +public: + typedef typename cpp_regex_traits::char_class_type char_class_type; + typedef typename std::ctype::mask native_mask_type; + typedef typename std::make_unsigned::type unsigned_native_mask_type; + static const char_class_type mask_blank = 1u << 24; + static const char_class_type mask_word = 1u << 25; + static const char_class_type mask_unicode = 1u << 26; + static const char_class_type mask_horizontal = 1u << 27; + static const char_class_type mask_vertical = 1u << 28; + + typedef std::basic_string string_type; + typedef charT char_type; + //cpp_regex_traits_implementation(); + cpp_regex_traits_implementation(const std::locale& l) + : cpp_regex_traits_char_layer(l) + { + init(); + } + cpp_regex_traits_implementation(const cpp_regex_traits_base& l) + : cpp_regex_traits_char_layer(l) + { + init(); + } + std::string error_string(regex_constants::error_type n) const + { + if(!m_error_strings.empty()) + { + std::map::const_iterator p = m_error_strings.find(n); + return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second; + } + return get_default_error_string(n); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + char_class_type result = lookup_classname_imp(p1, p2); + if(result == 0) + { + string_type temp(p1, p2); + this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size()); + result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size()); + } + return result; + } + string_type lookup_collatename(const charT* p1, const charT* p2) const; + string_type transform_primary(const charT* p1, const charT* p2) const; + string_type transform(const charT* p1, const charT* p2) const; +private: + std::map m_error_strings; // error messages indexed by numberic ID + std::map m_custom_class_names; // character class names + std::map m_custom_collate_names; // collating element names + unsigned m_collate_type; // the form of the collation string + charT m_collate_delim; // the collation group delimiter + // + // helpers: + // + char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; + void init(); +}; + +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_blank; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_word; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_unicode; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_vertical; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_horizontal; + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform_primary(const charT* p1, const charT* p2) const +{ + // + // PRECONDITIONS: + // + // A bug in gcc 3.2 (and maybe other versions as well) treats + // p1 as a null terminated string, for efficiency reasons + // we work around this elsewhere, but just assert here that + // we adhere to gcc's (buggy) preconditions... + // + BOOST_REGEX_ASSERT(*p2 == 0); + string_type result; +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::collate::transform, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if(*p1 == 0) + { + return string_type(1, charT(0)); + } +#endif + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) + { + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size()); + result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->m_pcollate->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->m_pcollate->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } +#ifndef BOOST_NO_EXCEPTIONS + }catch(...){} +#endif + while((!result.empty()) && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); + if(result.empty()) + { + // character is ignorable at the primary level: + result = string_type(1, charT(0)); + } + return result; +} + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform(const charT* p1, const charT* p2) const +{ + // + // PRECONDITIONS: + // + // A bug in gcc 3.2 (and maybe other versions as well) treats + // p1 as a null terminated string, for efficiency reasons + // we work around this elsewhere, but just assert here that + // we adhere to gcc's (buggy) preconditions... + // + BOOST_REGEX_ASSERT(*p2 == 0); + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // + string_type result, result2; +#if defined(_CPPLIB_VER) + // + // A bug in VC11 and 12 causes the program to hang if we pass a null-string + // to std::collate::transform, but only for certain locales :-( + // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). + // + if(*p1 == 0) + { + return result; + } +#endif +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + result = this->m_pcollate->transform(p1, p2); + // + // some implementations (Dinkumware) append unnecessary trailing \0's: + while((!result.empty()) && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); + // + // We may have NULL's used as separators between sections of the collate string, + // an example would be Boost.Locale. We have no way to detect this case via + // #defines since this can be used with any compiler/platform combination. + // Unfortunately our state machine (which was devised when all implementations + // used underlying C language API's) can't cope with that case. One workaround + // is to replace each character with 2, fortunately this code isn't used that + // much as this is now slower than before :-( + // + typedef typename std::make_unsigned::type uchar_type; + result2.reserve(result.size() * 2 + 2); + for(unsigned i = 0; i < result.size(); ++i) + { + if(static_cast(result[i]) == (std::numeric_limits::max)()) + { + result2.append(1, charT((std::numeric_limits::max)())).append(1, charT('b')); + } + else + { + result2.append(1, static_cast(1 + static_cast(result[i]))).append(1, charT('b') - 1); + } + } + BOOST_REGEX_ASSERT(std::find(result2.begin(), result2.end(), charT(0)) == result2.end()); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + } +#endif + return result2; +} + + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::lookup_collatename(const charT* p1, const charT* p2) const +{ + typedef typename std::map::const_iterator iter_type; + if(!m_custom_collate_names.empty()) + { + iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); + if(pos != m_custom_collate_names.end()) + return pos->second; + } + std::string name(p1, p2); + name = lookup_default_collate_name(name); + if(!name.empty()) + return string_type(name.begin(), name.end()); + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +template +void cpp_regex_traits_implementation::init() +{ +#ifndef __IBMCPP__ + typename std::messages::catalog cat = static_cast::catalog>(-1); +#else + typename std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if((!cat_name.empty()) && (this->m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { + // + // Error messages: + // + for(boost::regex_constants::error_type i = static_cast(0); + i <= boost::regex_constants::error_unknown; + i = static_cast(i + 1)) + { + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, this->m_pctype->widen(*p)); + ++p; + } + string_type s = this->m_pmessages->get(cat, 0, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, this->m_pctype->narrow(s[j], 0)); + } + m_error_strings[i] = result; + } + // + // Custom class names: + // + static const char_class_type masks[16] = + { + static_cast(std::ctype::alnum), + static_cast(std::ctype::alpha), + static_cast(std::ctype::cntrl), + static_cast(std::ctype::digit), + static_cast(std::ctype::graph), + cpp_regex_traits_implementation::mask_horizontal, + static_cast(std::ctype::lower), + static_cast(std::ctype::print), + static_cast(std::ctype::punct), + static_cast(std::ctype::space), + static_cast(std::ctype::upper), + cpp_regex_traits_implementation::mask_vertical, + static_cast(std::ctype::xdigit), + cpp_regex_traits_implementation::mask_blank, + cpp_regex_traits_implementation::mask_word, + cpp_regex_traits_implementation::mask_unicode, + }; + static const string_type null_string; + for(unsigned int j = 0; j <= 13; ++j) + { + string_type s(this->m_pmessages->get(cat, 0, j+300, null_string)); + if(!s.empty()) + this->m_custom_class_names[s] = masks[j]; + } + } + // + // get the collation format used by m_pcollate: + // + m_collate_type = BOOST_REGEX_DETAIL_NS::find_sort_syntax(this, &m_collate_delim); +} + +template +typename cpp_regex_traits_implementation::char_class_type + cpp_regex_traits_implementation::lookup_classname_imp(const charT* p1, const charT* p2) const +{ + static const char_class_type masks[22] = + { + 0, + static_cast(std::ctype::alnum), + static_cast(std::ctype::alpha), + cpp_regex_traits_implementation::mask_blank, + static_cast(std::ctype::cntrl), + static_cast(std::ctype::digit), + static_cast(std::ctype::digit), + static_cast(std::ctype::graph), + cpp_regex_traits_implementation::mask_horizontal, + static_cast(std::ctype::lower), + static_cast(std::ctype::lower), + static_cast(std::ctype::print), + static_cast(std::ctype::punct), + static_cast(std::ctype::space), + static_cast(std::ctype::space), + static_cast(std::ctype::upper), + cpp_regex_traits_implementation::mask_unicode, + static_cast(std::ctype::upper), + cpp_regex_traits_implementation::mask_vertical, + static_cast(std::ctype::alnum) | cpp_regex_traits_implementation::mask_word, + static_cast(std::ctype::alnum) | cpp_regex_traits_implementation::mask_word, + static_cast(std::ctype::xdigit), + }; + if(!m_custom_class_names.empty()) + { + typedef typename std::map, char_class_type>::const_iterator map_iter; + map_iter pos = m_custom_class_names.find(string_type(p1, p2)); + if(pos != m_custom_class_names.end()) + return pos->second; + } + std::size_t state_id = 1 + BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + BOOST_REGEX_ASSERT(state_id < sizeof(masks) / sizeof(masks[0])); + return masks[state_id]; +} + +template +inline std::shared_ptr > create_cpp_regex_traits(const std::locale& l) +{ + cpp_regex_traits_base key(l); + return ::boost::object_cache, cpp_regex_traits_implementation >::get(key, 5); +} + +} // BOOST_REGEX_DETAIL_NS + +template +class cpp_regex_traits +{ +private: + typedef std::ctype ctype_type; +public: + typedef charT char_type; + typedef std::size_t size_type; + typedef std::basic_string string_type; + typedef std::locale locale_type; + typedef std::uint_least32_t char_class_type; + + struct boost_extensions_tag{}; + + cpp_regex_traits() + : m_pimpl(BOOST_REGEX_DETAIL_NS::create_cpp_regex_traits(std::locale())) + { } + static size_type length(const char_type* p) + { + return std::char_traits::length(p); + } + regex_constants::syntax_type syntax_type(charT c)const + { + return m_pimpl->syntax_type(c); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + return m_pimpl->escape_syntax_type(c); + } + charT translate(charT c) const + { + return c; + } + charT translate_nocase(charT c) const + { + return m_pimpl->m_pctype->tolower(c); + } + charT translate(charT c, bool icase) const + { + return icase ? m_pimpl->m_pctype->tolower(c) : c; + } + charT tolower(charT c) const + { + return m_pimpl->m_pctype->tolower(c); + } + charT toupper(charT c) const + { + return m_pimpl->m_pctype->toupper(c); + } + string_type transform(const charT* p1, const charT* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const charT* p1, const charT* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_classname(p1, p2); + } + string_type lookup_collatename(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_collatename(p1, p2); + } + bool isctype(charT c, char_class_type f) const + { + typedef typename std::ctype::mask ctype_mask; + + static const ctype_mask mask_base = + static_cast( + std::ctype::alnum + | std::ctype::alpha + | std::ctype::cntrl + | std::ctype::digit + | std::ctype::graph + | std::ctype::lower + | std::ctype::print + | std::ctype::punct + | std::ctype::space + | std::ctype::upper + | std::ctype::xdigit); + + if((f & mask_base) + && (m_pimpl->m_pctype->is( + static_cast(f & mask_base), c))) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_unicode) && BOOST_REGEX_DETAIL_NS::is_extended(c)) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_word) && (c == '_')) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_blank) + && m_pimpl->m_pctype->is(std::ctype::space, c) + && !BOOST_REGEX_DETAIL_NS::is_separator(c)) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_vertical) + && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v'))) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_horizontal) + && this->isctype(c, std::ctype::space) && !this->isctype(c, BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation::mask_vertical)) + return true; +#ifdef __CYGWIN__ + // + // Cygwin has a buggy ctype facet, see https://www.cygwin.com/ml/cygwin/2012-08/msg00178.html: + // + else if((f & std::ctype::xdigit) == std::ctype::xdigit) + { + if((c >= 'a') && (c <= 'f')) + return true; + if((c >= 'A') && (c <= 'F')) + return true; + } +#endif + return false; + } + std::intmax_t toi(const charT*& p1, const charT* p2, int radix)const; + int value(charT c, int radix)const + { + const charT* pc = &c; + return (int)toi(pc, pc + 1, radix); + } + locale_type imbue(locale_type l) + { + std::locale result(getloc()); + m_pimpl = BOOST_REGEX_DETAIL_NS::create_cpp_regex_traits(l); + return result; + } + locale_type getloc()const + { + return m_pimpl->m_locale; + } + std::string error_string(regex_constants::error_type n) const + { + return m_pimpl->error_string(n); + } + + // + // extension: + // set the name of the message catalog in use (defaults to "boost_regex"). + // + static std::string catalog_name(const std::string& name); + static std::string get_catalog_name(); + +private: + std::shared_ptr > m_pimpl; + // + // catalog name handler: + // + static std::string& get_catalog_name_inst(); + +#ifdef BOOST_HAS_THREADS + static std::mutex& get_mutex_inst(); +#endif +}; + + +template +std::intmax_t cpp_regex_traits::toi(const charT*& first, const charT* last, int radix)const +{ + BOOST_REGEX_DETAIL_NS::parser_buf sbuf; // buffer for parsing numbers. + std::basic_istream is(&sbuf); // stream for parsing numbers. + + // we do NOT want to parse any thousands separators inside the stream: + last = std::find(first, last, std::use_facet>(is.getloc()).thousands_sep()); + + sbuf.pubsetbuf(const_cast(static_cast(first)), static_cast(last-first)); + is.clear(); + if(std::abs(radix) == 16) is >> std::hex; + else if(std::abs(radix) == 8) is >> std::oct; + else is >> std::dec; + std::intmax_t val; + if(is >> val) + { + first = first + ((last - first) - sbuf.in_avail()); + return val; + } + else + return -1; +} + +template +std::string cpp_regex_traits::catalog_name(const std::string& name) +{ +#ifdef BOOST_HAS_THREADS + std::lock_guard lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + get_catalog_name_inst() = name; + return result; +} + +template +std::string& cpp_regex_traits::get_catalog_name_inst() +{ + static std::string s_name; + return s_name; +} + +template +std::string cpp_regex_traits::get_catalog_name() +{ +#ifdef BOOST_HAS_THREADS + std::lock_guard lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + return result; +} + +#ifdef BOOST_HAS_THREADS +template +std::mutex& cpp_regex_traits::get_mutex_inst() +{ + static std::mutex s_mutex; + return s_mutex; +} +#endif + +namespace BOOST_REGEX_DETAIL_NS { + + inline void cpp_regex_traits_char_layer::init() + { + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + std::memset(m_char_map, 0, sizeof(m_char_map)); +#ifndef __IBMCPP__ + std::messages::catalog cat = static_cast::catalog>(-1); +#else + std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if ((!cat_name.empty()) && (m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if ((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if ((int)cat >= 0) + { +#ifndef BOOST_NO_EXCEPTIONS + try { +#endif + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = this->m_pmessages->get(cat, 0, i, get_default_syntax(i)); + for (string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[static_cast(mss[j])] = i; + } + } + this->m_pmessages->close(cat); +#ifndef BOOST_NO_EXCEPTIONS + } + catch (...) + { + this->m_pmessages->close(cat); + throw; + } +#endif + } + else + { + for (regex_constants::syntax_type j = 1; j < regex_constants::syntax_max; ++j) + { + const char* ptr = get_default_syntax(j); + while (ptr && *ptr) + { + m_char_map[static_cast(*ptr)] = j; + ++ptr; + } + } + } + // + // finish off by calculating our escape types: + // + unsigned char i = 'A'; + do + { + if (m_char_map[i] == 0) + { + if (this->m_pctype->is(std::ctype_base::lower, i)) + m_char_map[i] = regex_constants::escape_type_class; + else if (this->m_pctype->is(std::ctype_base::upper, i)) + m_char_map[i] = regex_constants::escape_type_not_class; + } + } while (0xFF != i++); + } + +} // namespace detail + + +} // boost + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + + +#endif diff --git a/include/boost/regex/v5/cregex.hpp b/include/boost/regex/v5/cregex.hpp new file mode 100644 index 00000000..a6571052 --- /dev/null +++ b/include/boost/regex/v5/cregex.hpp @@ -0,0 +1,195 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cregex.cpp + * VERSION see + * DESCRIPTION: Declares POSIX API functions + * + boost::RegEx high level wrapper. + */ + +#ifndef BOOST_RE_CREGEX_HPP_INCLUDED +#define BOOST_RE_CREGEX_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif +#include +#include + +#ifndef BOOST_REGEX_STANDALONE +#if !defined(BOOST_REGEX_NO_LIB) && !defined(BOOST_REGEX_SOURCE) && !defined(BOOST_ALL_NO_LIB) && defined(__cplusplus) +# define BOOST_LIB_NAME boost_regex +# if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) +# define BOOST_DYN_LINK +# endif +# ifdef BOOST_REGEX_DIAG +# define BOOST_LIB_DIAGNOSTIC +# endif +# include +#endif +#endif + +#ifdef __cplusplus +#include +#else +#include +#endif + +/* include these defs only for POSIX compatablity */ +#ifdef __cplusplus +namespace boost{ +extern "C" { +#endif + +#if defined(__cplusplus) +typedef std::ptrdiff_t regoff_t; +typedef std::size_t regsize_t; +#else +typedef ptrdiff_t regoff_t; +typedef size_t regsize_t; +#endif + +typedef struct +{ + unsigned int re_magic; +#ifdef __cplusplus + std::size_t re_nsub; /* number of parenthesized subexpressions */ +#else + size_t re_nsub; +#endif + const char* re_endp; /* end pointer for REG_PEND */ + void* guts; /* none of your business :-) */ + match_flag_type eflags; /* none of your business :-) */ +} regex_tA; + +#ifndef BOOST_NO_WREGEX +typedef struct +{ + unsigned int re_magic; +#ifdef __cplusplus + std::size_t re_nsub; /* number of parenthesized subexpressions */ +#else + size_t re_nsub; +#endif + const wchar_t* re_endp; /* end pointer for REG_PEND */ + void* guts; /* none of your business :-) */ + match_flag_type eflags; /* none of your business :-) */ +} regex_tW; +#endif + +typedef struct +{ + regoff_t rm_so; /* start of match */ + regoff_t rm_eo; /* end of match */ +} regmatch_t; + +/* regcomp() flags */ +typedef enum{ + REG_BASIC = 0000, + REG_EXTENDED = 0001, + REG_ICASE = 0002, + REG_NOSUB = 0004, + REG_NEWLINE = 0010, + REG_NOSPEC = 0020, + REG_PEND = 0040, + REG_DUMP = 0200, + REG_NOCOLLATE = 0400, + REG_ESCAPE_IN_LISTS = 01000, + REG_NEWLINE_ALT = 02000, + REG_PERLEX = 04000, + + REG_PERL = REG_EXTENDED | REG_NOCOLLATE | REG_ESCAPE_IN_LISTS | REG_PERLEX, + REG_AWK = REG_EXTENDED | REG_ESCAPE_IN_LISTS, + REG_GREP = REG_BASIC | REG_NEWLINE_ALT, + REG_EGREP = REG_EXTENDED | REG_NEWLINE_ALT, + + REG_ASSERT = 15, + REG_INVARG = 16, + REG_ATOI = 255, /* convert name to number (!) */ + REG_ITOA = 0400 /* convert number to name (!) */ +} reg_comp_flags; + +/* regexec() flags */ +typedef enum{ + REG_NOTBOL = 00001, + REG_NOTEOL = 00002, + REG_STARTEND = 00004 +} reg_exec_flags; + +/* + * POSIX error codes: + */ +typedef unsigned reg_error_t; +typedef reg_error_t reg_errcode_t; /* backwards compatibility */ + +static const reg_error_t REG_NOERROR = 0; /* Success. */ +static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ +static const reg_error_t REG_BADPAT = 2; /* Invalid pattern. */ +static const reg_error_t REG_ECOLLATE = 3; /* Undefined collating element. */ +static const reg_error_t REG_ECTYPE = 4; /* Invalid character class name. */ +static const reg_error_t REG_EESCAPE = 5; /* Trailing backslash. */ +static const reg_error_t REG_ESUBREG = 6; /* Invalid back reference. */ +static const reg_error_t REG_EBRACK = 7; /* Unmatched left bracket. */ +static const reg_error_t REG_EPAREN = 8; /* Parenthesis imbalance. */ +static const reg_error_t REG_EBRACE = 9; /* Unmatched \{. */ +static const reg_error_t REG_BADBR = 10; /* Invalid contents of \{\}. */ +static const reg_error_t REG_ERANGE = 11; /* Invalid range end. */ +static const reg_error_t REG_ESPACE = 12; /* Ran out of memory. */ +static const reg_error_t REG_BADRPT = 13; /* No preceding re for repetition op. */ +static const reg_error_t REG_EEND = 14; /* unexpected end of expression */ +static const reg_error_t REG_ESIZE = 15; /* expression too big */ +static const reg_error_t REG_ERPAREN = 8; /* = REG_EPAREN : unmatched right parenthesis */ +static const reg_error_t REG_EMPTY = 17; /* empty expression */ +static const reg_error_t REG_E_MEMORY = 15; /* = REG_ESIZE : out of memory */ +static const reg_error_t REG_ECOMPLEXITY = 18; /* complexity too high */ +static const reg_error_t REG_ESTACK = 19; /* out of stack space */ +static const reg_error_t REG_E_PERL = 20; /* Perl (?...) error */ +static const reg_error_t REG_E_UNKNOWN = 21; /* unknown error */ +static const reg_error_t REG_ENOSYS = 21; /* = REG_E_UNKNOWN : Reserved. */ + +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA*, const char*, int); +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorA(int, const regex_tA*, char*, regsize_t); +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA*, const char*, regsize_t, regmatch_t*, int); +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeA(regex_tA*); + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompW(regex_tW*, const wchar_t*, int); +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorW(int, const regex_tW*, wchar_t*, regsize_t); +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecW(const regex_tW*, const wchar_t*, regsize_t, regmatch_t*, int); +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW*); +#endif + +#ifdef UNICODE +#define regcomp regcompW +#define regerror regerrorW +#define regexec regexecW +#define regfree regfreeW +#define regex_t regex_tW +#else +#define regcomp regcompA +#define regerror regerrorA +#define regexec regexecA +#define regfree regfreeA +#define regex_t regex_tA +#endif + +#ifdef __cplusplus +} /* extern "C" */ +} /* namespace */ +#endif + +#endif /* include guard */ + diff --git a/include/boost/regex/v5/error_type.hpp b/include/boost/regex/v5/error_type.hpp new file mode 100644 index 00000000..afcc71e3 --- /dev/null +++ b/include/boost/regex/v5/error_type.hpp @@ -0,0 +1,59 @@ +/* + * + * Copyright (c) 2003-2005 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE error_type.hpp + * VERSION see + * DESCRIPTION: Declares regular expression error type enumerator. + */ + +#ifndef BOOST_REGEX_ERROR_TYPE_HPP +#define BOOST_REGEX_ERROR_TYPE_HPP + +#ifdef __cplusplus +namespace boost{ +#endif + +#ifdef __cplusplus +namespace regex_constants{ + +enum error_type{ + + error_ok = 0, /* not used */ + error_no_match = 1, /* not used */ + error_bad_pattern = 2, + error_collate = 3, + error_ctype = 4, + error_escape = 5, + error_backref = 6, + error_brack = 7, + error_paren = 8, + error_brace = 9, + error_badbrace = 10, + error_range = 11, + error_space = 12, + error_badrepeat = 13, + error_end = 14, /* not used */ + error_size = 15, + error_right_paren = 16, /* not used */ + error_empty = 17, + error_complexity = 18, + error_stack = 19, + error_perl_extension = 20, + error_unknown = 21 +}; + +} +} +#endif /* __cplusplus */ + +#endif diff --git a/include/boost/regex/v5/icu.hpp b/include/boost/regex/v5/icu.hpp new file mode 100644 index 00000000..a9264965 --- /dev/null +++ b/include/boost/regex/v5/icu.hpp @@ -0,0 +1,1402 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE icu.hpp + * VERSION see + * DESCRIPTION: Unicode regular expressions on top of the ICU Library. + */ + +#ifndef BOOST_REGEX_ICU_V5_HPP +#define BOOST_REGEX_ICU_V5_HPP + +#include +#include +#include +#include +#include +#include +#include + +#ifdef BOOST_REGEX_MSVC +#pragma warning (push) +#pragma warning (disable: 4251) +#endif + +namespace boost{ + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// Implementation details: +// +class icu_regex_traits_implementation +{ + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; + typedef std::uint_least32_t char_class_type; +public: + icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l) + : m_locale(l) + { + UErrorCode success = U_ZERO_ERROR; + m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if(U_SUCCESS(success) == 0) + init_error(); + m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL); + success = U_ZERO_ERROR; + m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if(U_SUCCESS(success) == 0) + init_error(); + m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY); + } + U_NAMESPACE_QUALIFIER Locale getloc()const + { + return m_locale; + } + string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const + { + // TODO make thread safe!!!! : + typedef u32_to_u16_iterator itt; + itt i(p1), j(p2); + std::vector< ::UChar> t(i, j); + std::uint8_t result[100]; + std::int32_t len; + if (!t.empty()) + len = pcoll->getSortKey(&*t.begin(), static_cast(t.size()), result, sizeof(result)); + else + len = pcoll->getSortKey(static_cast(0), static_cast(0), result, sizeof(result)); + if (std::size_t(len) > sizeof(result)) + { + std::unique_ptr< std::uint8_t[]> presult(new ::uint8_t[len + 1]); + if (!t.empty()) + len = pcoll->getSortKey(&*t.begin(), static_cast(t.size()), presult.get(), len + 1); + else + len = pcoll->getSortKey(static_cast(0), static_cast(0), presult.get(), len + 1); + if ((0 == presult[len - 1]) && (len > 1)) + --len; + return string_type(presult.get(), presult.get() + len); + } + if ((0 == result[len - 1]) && (len > 1)) + --len; + return string_type(result, result + len); + } + string_type transform(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_collator.get()); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_primary_collator.get()); + } +private: + void init_error() + { + std::runtime_error e("Could not initialize ICU resources"); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using + std::unique_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object + std::unique_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object +}; +inline std::shared_ptr get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc) +{ + return std::shared_ptr(new icu_regex_traits_implementation(loc)); +} + +} + +class icu_regex_traits +{ +public: + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; + typedef std::uint64_t char_class_type; + + struct boost_extensions_tag{}; + + icu_regex_traits() + : m_pimpl(BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale())) + { + } + static size_type length(const char_type* p) + { + size_type result = 0; + while (*p) + { + ++p; + ++result; + } + return result; + } + + ::boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_syntax_type(static_cast(c)) : regex_constants::syntax_char; + } + ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const + { + return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_escape_syntax_type(static_cast(c)) : regex_constants::syntax_char; + } + char_type translate(char_type c) const + { + return c; + } + char_type translate_nocase(char_type c) const + { + return ::u_tolower(c); + } + char_type translate(char_type c, bool icase) const + { + return icase ? translate_nocase(c) : translate(c); + } + char_type tolower(char_type c) const + { + return ::u_tolower(c); + } + char_type toupper(char_type c) const + { + return ::u_toupper(c); + } + string_type transform(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const char_type* p1, const char_type* p2) const + { + constexpr char_class_type mask_blank = char_class_type(1) << offset_blank; + constexpr char_class_type mask_space = char_class_type(1) << offset_space; + constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore; + constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode; + constexpr char_class_type mask_any = char_class_type(1) << offset_any; + constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii; + constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + static const char_class_type masks[] = + { + 0, + U_GC_L_MASK | U_GC_ND_MASK, + U_GC_L_MASK, + mask_blank, + U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK, + U_GC_ND_MASK, + U_GC_ND_MASK, + (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK), + mask_horizontal, + U_GC_LL_MASK, + U_GC_LL_MASK, + ~(U_GC_C_MASK), + U_GC_P_MASK, + char_class_type(U_GC_Z_MASK) | mask_space, + char_class_type(U_GC_Z_MASK) | mask_space, + U_GC_LU_MASK, + mask_unicode, + U_GC_LU_MASK, + mask_vertical, + char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, + char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, + char_class_type(U_GC_ND_MASK) | mask_xdigit, + }; + + int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if (idx >= 0) + return masks[idx + 1]; + char_class_type result = lookup_icu_mask(p1, p2); + if (result != 0) + return result; + + if (idx < 0) + { + string_type s(p1, p2); + string_type::size_type i = 0; + while (i < s.size()) + { + s[i] = static_cast((::u_tolower)(s[i])); + if (::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_')) + s.erase(s.begin() + i, s.begin() + i + 1); + else + { + s[i] = static_cast((::u_tolower)(s[i])); + ++i; + } + } + if (!s.empty()) + idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + if (idx >= 0) + return masks[idx + 1]; + if (!s.empty()) + result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size()); + if (result != 0) + return result; + } + BOOST_REGEX_ASSERT(std::size_t(idx + 1) < sizeof(masks) / sizeof(masks[0])); + return masks[idx + 1]; + } + string_type lookup_collatename(const char_type* p1, const char_type* p2) const + { + string_type result; + if (std::find_if(p1, p2, std::bind(std::greater< ::UChar32>(), std::placeholders::_1, 0x7f)) == p2) + { + std::string s(p1, p2); + // Try Unicode name: + UErrorCode err = U_ZERO_ERROR; + UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err); + if (U_SUCCESS(err)) + { + result.push_back(c); + return result; + } + // Try Unicode-extended name: + err = U_ZERO_ERROR; + c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err); + if (U_SUCCESS(err)) + { + result.push_back(c); + return result; + } + // try POSIX name: + s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s); + result.assign(s.begin(), s.end()); + } + if (result.empty() && (p2 - p1 == 1)) + result.push_back(*p1); + return result; + } + bool isctype(char_type c, char_class_type f) const + { + constexpr char_class_type mask_blank = char_class_type(1) << offset_blank; + constexpr char_class_type mask_space = char_class_type(1) << offset_space; + constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore; + constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode; + constexpr char_class_type mask_any = char_class_type(1) << offset_any; + constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii; + constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + // check for standard catagories first: + char_class_type m = char_class_type(static_cast(1) << u_charType(c)); + if ((m & f) != 0) + return true; + // now check for special cases: + if (((f & mask_blank) != 0) && u_isblank(c)) + return true; + if (((f & mask_space) != 0) && u_isspace(c)) + return true; + if (((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0)) + return true; + if (((f & mask_unicode) != 0) && (c >= 0x100)) + return true; + if (((f & mask_underscore) != 0) && (c == '_')) + return true; + if (((f & mask_any) != 0) && (c <= 0x10FFFF)) + return true; + if (((f & mask_ascii) != 0) && (c <= 0x7F)) + return true; + if (((f & mask_vertical) != 0) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == static_cast('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK))) + return true; + if (((f & mask_horizontal) != 0) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && u_isspace(c) && (c != static_cast('\v'))) + return true; + return false; + } + std::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const + { + return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); + } + int value(char_type c, int radix)const + { + return u_digit(c, static_cast< std::int8_t>(radix)); + } + locale_type imbue(locale_type l) + { + locale_type result(m_pimpl->getloc()); + m_pimpl = BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(l); + return result; + } + locale_type getloc()const + { + return locale_type(); + } + std::string error_string(::boost::regex_constants::error_type n) const + { + return BOOST_REGEX_DETAIL_NS::get_default_error_string(n); + } +private: + icu_regex_traits(const icu_regex_traits&); + icu_regex_traits& operator=(const icu_regex_traits&); + + // + // define the bitmasks offsets we need for additional character properties: + // + enum{ + offset_blank = U_CHAR_CATEGORY_COUNT, + offset_space = U_CHAR_CATEGORY_COUNT+1, + offset_xdigit = U_CHAR_CATEGORY_COUNT+2, + offset_underscore = U_CHAR_CATEGORY_COUNT+3, + offset_unicode = U_CHAR_CATEGORY_COUNT+4, + offset_any = U_CHAR_CATEGORY_COUNT+5, + offset_ascii = U_CHAR_CATEGORY_COUNT+6, + offset_horizontal = U_CHAR_CATEGORY_COUNT+7, + offset_vertical = U_CHAR_CATEGORY_COUNT+8 + }; + + static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2) + { + constexpr char_class_type mask_blank = char_class_type(1) << offset_blank; + constexpr char_class_type mask_space = char_class_type(1) << offset_space; + constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit; + constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore; + constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode; + constexpr char_class_type mask_any = char_class_type(1) << offset_any; + constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii; + constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal; + constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical; + + static const ::UChar32 prop_name_table[] = { + /* any */ 'a', 'n', 'y', + /* ascii */ 'a', 's', 'c', 'i', 'i', + /* assigned */ 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', + /* c* */ 'c', '*', + /* cc */ 'c', 'c', + /* cf */ 'c', 'f', + /* closepunctuation */ 'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* cn */ 'c', 'n', + /* co */ 'c', 'o', + /* connectorpunctuation */ 'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* control */ 'c', 'o', 'n', 't', 'r', 'o', 'l', + /* cs */ 'c', 's', + /* currencysymbol */ 'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l', + /* dashpunctuation */ 'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* decimaldigitnumber */ 'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r', + /* enclosingmark */ 'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* finalpunctuation */ 'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* format */ 'f', 'o', 'r', 'm', 'a', 't', + /* initialpunctuation */ 'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* l* */ 'l', '*', + /* letter */ 'l', 'e', 't', 't', 'e', 'r', + /* letternumber */ 'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', + /* lineseparator */ 'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* ll */ 'l', 'l', + /* lm */ 'l', 'm', + /* lo */ 'l', 'o', + /* lowercaseletter */ 'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* lt */ 'l', 't', + /* lu */ 'l', 'u', + /* m* */ 'm', '*', + /* mark */ 'm', 'a', 'r', 'k', + /* mathsymbol */ 'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l', + /* mc */ 'm', 'c', + /* me */ 'm', 'e', + /* mn */ 'm', 'n', + /* modifierletter */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', + /* modifiersymbol */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', + /* n* */ 'n', '*', + /* nd */ 'n', 'd', + /* nl */ 'n', 'l', + /* no */ 'n', 'o', + /* nonspacingmark */ 'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* notassigned */ 'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', + /* number */ 'n', 'u', 'm', 'b', 'e', 'r', + /* openpunctuation */ 'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* other */ 'o', 't', 'h', 'e', 'r', + /* otherletter */ 'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', + /* othernumber */ 'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', + /* otherpunctuation */ 'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* othersymbol */ 'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', + /* p* */ 'p', '*', + /* paragraphseparator */ 'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* pc */ 'p', 'c', + /* pd */ 'p', 'd', + /* pe */ 'p', 'e', + /* pf */ 'p', 'f', + /* pi */ 'p', 'i', + /* po */ 'p', 'o', + /* privateuse */ 'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e', + /* ps */ 'p', 's', + /* punctuation */ 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* s* */ 's', '*', + /* sc */ 's', 'c', + /* separator */ 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* sk */ 's', 'k', + /* sm */ 's', 'm', + /* so */ 's', 'o', + /* spaceseparator */ 's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* spacingcombiningmark */ 's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* surrogate */ 's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e', + /* symbol */ 's', 'y', 'm', 'b', 'o', 'l', + /* titlecase */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', + /* titlecaseletter */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* uppercaseletter */ 'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* z* */ 'z', '*', + /* zl */ 'z', 'l', + /* zp */ 'z', 'p', + /* zs */ 'z', 's', + }; + + static const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> range_data[] = { + { prop_name_table + 0, prop_name_table + 3, }, // any + { prop_name_table + 3, prop_name_table + 8, }, // ascii + { prop_name_table + 8, prop_name_table + 16, }, // assigned + { prop_name_table + 16, prop_name_table + 18, }, // c* + { prop_name_table + 18, prop_name_table + 20, }, // cc + { prop_name_table + 20, prop_name_table + 22, }, // cf + { prop_name_table + 22, prop_name_table + 38, }, // closepunctuation + { prop_name_table + 38, prop_name_table + 40, }, // cn + { prop_name_table + 40, prop_name_table + 42, }, // co + { prop_name_table + 42, prop_name_table + 62, }, // connectorpunctuation + { prop_name_table + 62, prop_name_table + 69, }, // control + { prop_name_table + 69, prop_name_table + 71, }, // cs + { prop_name_table + 71, prop_name_table + 85, }, // currencysymbol + { prop_name_table + 85, prop_name_table + 100, }, // dashpunctuation + { prop_name_table + 100, prop_name_table + 118, }, // decimaldigitnumber + { prop_name_table + 118, prop_name_table + 131, }, // enclosingmark + { prop_name_table + 131, prop_name_table + 147, }, // finalpunctuation + { prop_name_table + 147, prop_name_table + 153, }, // format + { prop_name_table + 153, prop_name_table + 171, }, // initialpunctuation + { prop_name_table + 171, prop_name_table + 173, }, // l* + { prop_name_table + 173, prop_name_table + 179, }, // letter + { prop_name_table + 179, prop_name_table + 191, }, // letternumber + { prop_name_table + 191, prop_name_table + 204, }, // lineseparator + { prop_name_table + 204, prop_name_table + 206, }, // ll + { prop_name_table + 206, prop_name_table + 208, }, // lm + { prop_name_table + 208, prop_name_table + 210, }, // lo + { prop_name_table + 210, prop_name_table + 225, }, // lowercaseletter + { prop_name_table + 225, prop_name_table + 227, }, // lt + { prop_name_table + 227, prop_name_table + 229, }, // lu + { prop_name_table + 229, prop_name_table + 231, }, // m* + { prop_name_table + 231, prop_name_table + 235, }, // mark + { prop_name_table + 235, prop_name_table + 245, }, // mathsymbol + { prop_name_table + 245, prop_name_table + 247, }, // mc + { prop_name_table + 247, prop_name_table + 249, }, // me + { prop_name_table + 249, prop_name_table + 251, }, // mn + { prop_name_table + 251, prop_name_table + 265, }, // modifierletter + { prop_name_table + 265, prop_name_table + 279, }, // modifiersymbol + { prop_name_table + 279, prop_name_table + 281, }, // n* + { prop_name_table + 281, prop_name_table + 283, }, // nd + { prop_name_table + 283, prop_name_table + 285, }, // nl + { prop_name_table + 285, prop_name_table + 287, }, // no + { prop_name_table + 287, prop_name_table + 301, }, // nonspacingmark + { prop_name_table + 301, prop_name_table + 312, }, // notassigned + { prop_name_table + 312, prop_name_table + 318, }, // number + { prop_name_table + 318, prop_name_table + 333, }, // openpunctuation + { prop_name_table + 333, prop_name_table + 338, }, // other + { prop_name_table + 338, prop_name_table + 349, }, // otherletter + { prop_name_table + 349, prop_name_table + 360, }, // othernumber + { prop_name_table + 360, prop_name_table + 376, }, // otherpunctuation + { prop_name_table + 376, prop_name_table + 387, }, // othersymbol + { prop_name_table + 387, prop_name_table + 389, }, // p* + { prop_name_table + 389, prop_name_table + 407, }, // paragraphseparator + { prop_name_table + 407, prop_name_table + 409, }, // pc + { prop_name_table + 409, prop_name_table + 411, }, // pd + { prop_name_table + 411, prop_name_table + 413, }, // pe + { prop_name_table + 413, prop_name_table + 415, }, // pf + { prop_name_table + 415, prop_name_table + 417, }, // pi + { prop_name_table + 417, prop_name_table + 419, }, // po + { prop_name_table + 419, prop_name_table + 429, }, // privateuse + { prop_name_table + 429, prop_name_table + 431, }, // ps + { prop_name_table + 431, prop_name_table + 442, }, // punctuation + { prop_name_table + 442, prop_name_table + 444, }, // s* + { prop_name_table + 444, prop_name_table + 446, }, // sc + { prop_name_table + 446, prop_name_table + 455, }, // separator + { prop_name_table + 455, prop_name_table + 457, }, // sk + { prop_name_table + 457, prop_name_table + 459, }, // sm + { prop_name_table + 459, prop_name_table + 461, }, // so + { prop_name_table + 461, prop_name_table + 475, }, // spaceseparator + { prop_name_table + 475, prop_name_table + 495, }, // spacingcombiningmark + { prop_name_table + 495, prop_name_table + 504, }, // surrogate + { prop_name_table + 504, prop_name_table + 510, }, // symbol + { prop_name_table + 510, prop_name_table + 519, }, // titlecase + { prop_name_table + 519, prop_name_table + 534, }, // titlecaseletter + { prop_name_table + 534, prop_name_table + 549, }, // uppercaseletter + { prop_name_table + 549, prop_name_table + 551, }, // z* + { prop_name_table + 551, prop_name_table + 553, }, // zl + { prop_name_table + 553, prop_name_table + 555, }, // zp + { prop_name_table + 555, prop_name_table + 557, }, // zs + }; + + static const icu_regex_traits::char_class_type icu_class_map[] = { + mask_any, // any + mask_ascii, // ascii + (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned + U_GC_C_MASK, // c* + U_GC_CC_MASK, // cc + U_GC_CF_MASK, // cf + U_GC_PE_MASK, // closepunctuation + U_GC_CN_MASK, // cn + U_GC_CO_MASK, // co + U_GC_PC_MASK, // connectorpunctuation + U_GC_CC_MASK, // control + U_GC_CS_MASK, // cs + U_GC_SC_MASK, // currencysymbol + U_GC_PD_MASK, // dashpunctuation + U_GC_ND_MASK, // decimaldigitnumber + U_GC_ME_MASK, // enclosingmark + U_GC_PF_MASK, // finalpunctuation + U_GC_CF_MASK, // format + U_GC_PI_MASK, // initialpunctuation + U_GC_L_MASK, // l* + U_GC_L_MASK, // letter + U_GC_NL_MASK, // letternumber + U_GC_ZL_MASK, // lineseparator + U_GC_LL_MASK, // ll + U_GC_LM_MASK, // lm + U_GC_LO_MASK, // lo + U_GC_LL_MASK, // lowercaseletter + U_GC_LT_MASK, // lt + U_GC_LU_MASK, // lu + U_GC_M_MASK, // m* + U_GC_M_MASK, // mark + U_GC_SM_MASK, // mathsymbol + U_GC_MC_MASK, // mc + U_GC_ME_MASK, // me + U_GC_MN_MASK, // mn + U_GC_LM_MASK, // modifierletter + U_GC_SK_MASK, // modifiersymbol + U_GC_N_MASK, // n* + U_GC_ND_MASK, // nd + U_GC_NL_MASK, // nl + U_GC_NO_MASK, // no + U_GC_MN_MASK, // nonspacingmark + U_GC_CN_MASK, // notassigned + U_GC_N_MASK, // number + U_GC_PS_MASK, // openpunctuation + U_GC_C_MASK, // other + U_GC_LO_MASK, // otherletter + U_GC_NO_MASK, // othernumber + U_GC_PO_MASK, // otherpunctuation + U_GC_SO_MASK, // othersymbol + U_GC_P_MASK, // p* + U_GC_ZP_MASK, // paragraphseparator + U_GC_PC_MASK, // pc + U_GC_PD_MASK, // pd + U_GC_PE_MASK, // pe + U_GC_PF_MASK, // pf + U_GC_PI_MASK, // pi + U_GC_PO_MASK, // po + U_GC_CO_MASK, // privateuse + U_GC_PS_MASK, // ps + U_GC_P_MASK, // punctuation + U_GC_S_MASK, // s* + U_GC_SC_MASK, // sc + U_GC_Z_MASK, // separator + U_GC_SK_MASK, // sk + U_GC_SM_MASK, // sm + U_GC_SO_MASK, // so + U_GC_ZS_MASK, // spaceseparator + U_GC_MC_MASK, // spacingcombiningmark + U_GC_CS_MASK, // surrogate + U_GC_S_MASK, // symbol + U_GC_LT_MASK, // titlecase + U_GC_LT_MASK, // titlecaseletter + U_GC_LU_MASK, // uppercaseletter + U_GC_Z_MASK, // z* + U_GC_ZL_MASK, // zl + U_GC_ZP_MASK, // zp + U_GC_ZS_MASK, // zs + }; + + + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_begin = range_data; + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data) / sizeof(range_data[0])); + + BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> t = { p1, p2, }; + const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t); + if ((p != ranges_end) && (t == *p)) + return icu_class_map[p - ranges_begin]; + return 0; + } + std::shared_ptr< ::boost::BOOST_REGEX_DETAIL_NS::icu_regex_traits_implementation> m_pimpl; +}; + +} // namespace boost + +namespace boost{ + +// types: +typedef basic_regex< ::UChar32, icu_regex_traits> u32regex; +typedef match_results u32match; +typedef match_results u16match; + +// +// Construction of 32-bit regex types from UTF-8 and UTF-16 primitives: +// +namespace BOOST_REGEX_DETAIL_NS{ + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const std::integral_constant*) +{ + typedef boost::u8_to_u32_iterator conv_type; + return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); +} + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const std::integral_constant*) +{ + typedef boost::u16_to_u32_iterator conv_type; + return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); +} + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const std::integral_constant*) +{ + return u32regex(i, j, opt); +} +} + +// BOOST_REGEX_UCHAR_IS_WCHAR_T +// +// Source inspection of unicode/umachine.h in ICU version 59 indicates that: +// +// On version 59, UChar is always char16_t in C++ mode (and uint16_t in C mode) +// +// On earlier versions, the logic is +// +// #if U_SIZEOF_WCHAR_T==2 +// typedef wchar_t OldUChar; +// #elif defined(__CHAR16_TYPE__) +// typedef __CHAR16_TYPE__ OldUChar; +// #else +// typedef uint16_t OldUChar; +// #endif +// +// That is, UChar is wchar_t only on versions below 59, when U_SIZEOF_WCHAR_T==2 +// +// Hence, + +#define BOOST_REGEX_UCHAR_IS_WCHAR_T (U_ICU_VERSION_MAJOR_NUM < 59 && U_SIZEOF_WCHAR_T == 2) + +#if BOOST_REGEX_UCHAR_IS_WCHAR_T + static_assert((std::is_same::value), "Configuration logic has failed!"); +#else + static_assert(!(std::is_same::value), "Configuration logic has failed!"); +#endif + +// +// Construction from an iterator pair: +// +template +inline u32regex make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(i, j, opt, static_cast const*>(0)); +} +// +// construction from UTF-8 nul-terminated strings: +// +inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(p), opt, static_cast const*>(0)); +} +inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(reinterpret_cast(p)), opt, static_cast const*>(0)); +} +// +// construction from UTF-16 nul-terminated strings: +// +#ifndef BOOST_NO_WREGEX +inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast const*>(0)); +} +#endif +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T +inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + u_strlen(p), opt, static_cast const*>(0)); +} +#endif +// +// construction from basic_string class-template: +// +template +inline u32regex make_u32regex(const std::basic_string& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.begin(), s.end(), opt, static_cast const*>(0)); +} +// +// Construction from ICU string type: +// +inline u32regex make_u32regex(const U_NAMESPACE_QUALIFIER UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast const*>(0)); +} + +// +// regex_match overloads that widen the character type as appropriate: +// +namespace BOOST_REGEX_DETAIL_NS{ +template +void copy_results(MR1& out, MR2 const& in, NSubs named_subs) +{ + // copy results from an adapted MR2 match_results: + out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base()); + out.set_base(in.base().base()); + out.set_named_subs(named_subs); + for(int i = 0; i < (int)in.size(); ++i) + { + if(in[i].matched || !i) + { + out.set_first(in[i].first.base(), i); + out.set_second(in[i].second.base(), i, in[i].matched); + } + } +#ifdef BOOST_REGEX_MATCH_EXTRA + // Copy full capture info as well: + for(int i = 0; i < (int)in.size(); ++i) + { + if(in[i].captures().size()) + { + out[i].get_captures().assign(in[i].captures().size(), typename MR1::value_type()); + for(int j = 0; j < (int)out[i].captures().size(); ++j) + { + out[i].get_captures()[j].first = in[i].captures()[j].first.base(); + out[i].get_captures()[j].second = in[i].captures()[j].second.base(); + out[i].get_captures()[j].matched = in[i].captures()[j].matched; + } + } + } +#endif +} + +template +inline bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + std::integral_constant const*) +{ + return ::boost::regex_match(first, last, m, e, flags); +} +template +bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + std::integral_constant const*) +{ + typedef u16_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); + // copy results across to m: + if(result) copy_results(m, what, e.get_named_subs()); + return result; +} +template +bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + std::integral_constant const*) +{ + typedef u8_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); + // copy results across to m: + if(result) copy_results(m, what, e.get_named_subs()); + return result; +} +} // namespace BOOST_REGEX_DETAIL_NS + +template +inline bool u32regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const UChar* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); +} +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) +inline bool u32regex_match(const wchar_t* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const unsigned char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const std::string& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_match(const std::wstring& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); +} +// +// regex_match overloads that do not return what matched: +// +template +inline bool u32regex_match(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); +} +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) +inline bool u32regex_match(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_match(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); +} + +// +// regex_search overloads that widen the character type as appropriate: +// +namespace BOOST_REGEX_DETAIL_NS{ +template +inline bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + std::integral_constant const*) +{ + return ::boost::regex_search(first, last, m, e, flags, base); +} +template +bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + std::integral_constant const*) +{ + typedef u16_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); + // copy results across to m: + if(result) copy_results(m, what, e.get_named_subs()); + return result; +} +template +bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + std::integral_constant const*) +{ + typedef u8_to_u32_iterator conv_type; + typedef match_results match_type; + //typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); + // copy results across to m: + if(result) copy_results(m, what, e.get_named_subs()); + return result; +} +} + +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); +} +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, base, static_cast const*>(0)); +} +inline bool u32regex_search(const UChar* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); +} +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) +inline bool u32regex_search(const wchar_t* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const unsigned char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const std::string& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_search(const std::wstring& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); +} +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); +} +inline bool u32regex_search(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); +} +#if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX) +inline bool u32regex_search(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_search(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); +} + +// +// overloads for regex_replace with utf-8 and utf-16 data types: +// +namespace BOOST_REGEX_DETAIL_NS{ +template +inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > + make_utf32_seq(I i, I j, std::integral_constant const*) +{ + return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(i, i, j), boost::u8_to_u32_iterator(j, i, j)); +} +template +inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > + make_utf32_seq(I i, I j, std::integral_constant const*) +{ + return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(i, i, j), boost::u16_to_u32_iterator(j, i, j)); +} +template +inline std::pair< I, I > + make_utf32_seq(I i, I j, std::integral_constant const*) +{ + return std::pair< I, I >(i, j); +} +template +inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > + make_utf32_seq(const charT* p, std::integral_constant const*) +{ + std::size_t len = std::strlen((const char*)p); + return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(p, p, p+len), boost::u8_to_u32_iterator(p+len, p, p+len)); +} +template +inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > + make_utf32_seq(const charT* p, std::integral_constant const*) +{ + std::size_t len = u_strlen((const UChar*)p); + return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(p, p, p + len), boost::u16_to_u32_iterator(p+len, p, p + len)); +} +template +inline std::pair< const charT*, const charT* > + make_utf32_seq(const charT* p, std::integral_constant const*) +{ + return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p)); +} +template +inline OutputIterator make_utf32_out(OutputIterator o, std::integral_constant const*) +{ + return o; +} +template +inline utf16_output_iterator make_utf32_out(OutputIterator o, std::integral_constant const*) +{ + return o; +} +template +inline utf8_output_iterator make_utf32_out(OutputIterator o, std::integral_constant const*) +{ + return o; +} + +template +OutputIterator do_regex_replace(OutputIterator out, + std::pair const& in, + const u32regex& e, + const std::pair& fmt, + match_flag_type flags + ) +{ + // unfortunately we have to copy the format string in order to pass in onward: + std::vector f; + f.assign(fmt.first, fmt.second); + + regex_iterator i(in.first, in.second, e, flags); + regex_iterator j; + if(i == j) + { + if(!(flags & regex_constants::format_no_copy)) + out = std::copy(in.first, in.second, out); + } + else + { + I1 last_m = in.first; + while(i != j) + { + if(!(flags & regex_constants::format_no_copy)) + out = std::copy(i->prefix().first, i->prefix().second, out); + if(!f.empty()) + out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits()); + else + out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, static_cast(0), static_cast(0), flags, e.get_traits()); + last_m = (*i)[0].second; + if(flags & regex_constants::format_first_only) + break; + ++i; + } + if(!(flags & regex_constants::format_no_copy)) + out = std::copy(last_m, in.second, out); + } + return out; +} +template +inline const BaseIterator& extract_output_base(const BaseIterator& b) +{ + return b; +} +template +inline BaseIterator extract_output_base(const utf8_output_iterator& b) +{ + return b.base(); +} +template +inline BaseIterator extract_output_base(const utf16_output_iterator& b) +{ + return b.base(); +} +} // BOOST_REGEX_DETAIL_NS + +template +inline OutputIterator u32regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt, static_cast const*>(0)), + flags) + ); +} + +template +inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.begin(), fmt.end(), static_cast const*>(0)), + flags) + ); +} + +template +inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, + match_flag_type flags = match_default) +{ + return BOOST_REGEX_DETAIL_NS::extract_output_base + ( + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), + flags) + ); +} + +template +std::basic_string u32regex_replace(const std::basic_string& s, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +template +std::basic_string u32regex_replace(const std::basic_string& s, + const u32regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); + return result; +} + +namespace BOOST_REGEX_DETAIL_NS{ + +class unicode_string_out_iterator +{ + U_NAMESPACE_QUALIFIER UnicodeString* out; +public: + unicode_string_out_iterator(U_NAMESPACE_QUALIFIER UnicodeString& s) : out(&s) {} + unicode_string_out_iterator& operator++() { return *this; } + unicode_string_out_iterator& operator++(int) { return *this; } + unicode_string_out_iterator& operator*() { return *this; } + unicode_string_out_iterator& operator=(UChar v) + { + *out += v; + return *this; + } + typedef std::ptrdiff_t difference_type; + typedef UChar value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +}; + +} + +inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + const UChar* fmt, + match_flag_type flags = match_default) +{ + U_NAMESPACE_QUALIFIER UnicodeString result; + BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result); + u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags); + return result; +} + +inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, + match_flag_type flags = match_default) +{ + U_NAMESPACE_QUALIFIER UnicodeString result; + BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result); + BOOST_REGEX_DETAIL_NS::do_regex_replace( + BOOST_REGEX_DETAIL_NS::make_utf32_out(i, static_cast const*>(0)), + BOOST_REGEX_DETAIL_NS::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast const*>(0)), + e, + BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), + flags); + return result; +} + +} // namespace boost. + +#ifdef BOOST_REGEX_MSVC +#pragma warning (pop) +#endif + +#include +#include + +#endif diff --git a/include/boost/regex/v5/indexed_bit_flag.hpp b/include/boost/regex/v5/indexed_bit_flag.hpp new file mode 100644 index 00000000..b61e5cad --- /dev/null +++ b/include/boost/regex/v5/indexed_bit_flag.hpp @@ -0,0 +1,54 @@ +/* + * + * Copyright (c) 2020 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_parser.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_parser. + */ + +#include +#include + +#ifndef BOOST_REGEX_V5_INDEXED_BIT_FLAG_HPP +#define BOOST_REGEX_V5_INDEXED_BIT_FLAG_HPP + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +class indexed_bit_flag +{ + std::uint64_t low_mask; + std::set mask_set; +public: + indexed_bit_flag() : low_mask(0) {} + void set(std::size_t i) + { + if (i < std::numeric_limits::digits - 1) + low_mask |= static_cast(1u) << i; + else + mask_set.insert(i); + } + bool test(std::size_t i) + { + if (i < std::numeric_limits::digits - 1) + return low_mask & static_cast(1u) << i ? true : false; + else + return mask_set.find(i) != mask_set.end(); + } +}; + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + + +#endif diff --git a/include/boost/regex/v5/iterator_category.hpp b/include/boost/regex/v5/iterator_category.hpp new file mode 100644 index 00000000..9bd74578 --- /dev/null +++ b/include/boost/regex/v5/iterator_category.hpp @@ -0,0 +1,84 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_match.hpp + * VERSION see + * DESCRIPTION: Iterator traits for selecting an iterator type as + * an integral constant expression. + */ + + +#ifndef BOOST_REGEX_ITERATOR_CATEGORY_HPP +#define BOOST_REGEX_ITERATOR_CATEGORY_HPP + +#include +#include + +namespace boost{ +namespace detail{ + +template +struct is_random_imp +{ +private: + typedef typename std::iterator_traits::iterator_category cat; +public: + static const bool value = (std::is_convertible::value); +}; + +template +struct is_random_pointer_imp +{ + static const bool value = true; +}; + +template +struct is_random_imp_selector +{ + template + struct rebind + { + typedef is_random_imp type; + }; +}; + +template <> +struct is_random_imp_selector +{ + template + struct rebind + { + typedef is_random_pointer_imp type; + }; +}; + +} + +template +struct is_random_access_iterator +{ +private: + typedef detail::is_random_imp_selector< std::is_pointer::value> selector; + typedef typename selector::template rebind bound_type; + typedef typename bound_type::type answer; +public: + static const bool value = answer::value; +}; + +template +const bool is_random_access_iterator::value; + +} + +#endif + diff --git a/include/boost/regex/v5/iterator_traits.hpp b/include/boost/regex/v5/iterator_traits.hpp new file mode 100644 index 00000000..293db6bf --- /dev/null +++ b/include/boost/regex/v5/iterator_traits.hpp @@ -0,0 +1,32 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE iterator_traits.cpp + * VERSION see + * DESCRIPTION: Declares iterator traits workarounds. + */ + +#ifndef BOOST_REGEX_V5_ITERATOR_TRAITS_HPP +#define BOOST_REGEX_V5_ITERATOR_TRAITS_HPP + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +template +struct regex_iterator_traits : public std::iterator_traits {}; + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#endif + diff --git a/include/boost/regex/v5/match_flags.hpp b/include/boost/regex/v5/match_flags.hpp new file mode 100644 index 00000000..6ff236b0 --- /dev/null +++ b/include/boost/regex/v5/match_flags.hpp @@ -0,0 +1,156 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE match_flags.hpp + * VERSION see + * DESCRIPTION: Declares match_flags type. + */ + +#ifndef BOOST_REGEX_V5_MATCH_FLAGS +#define BOOST_REGEX_V5_MATCH_FLAGS + +#ifdef __cplusplus +# include +#endif + +#ifdef __cplusplus +namespace boost{ + namespace regex_constants{ +#endif + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable : 26812) +#endif +#endif + +typedef enum _match_flags +{ + match_default = 0, + match_not_bol = 1, /* first is not start of line */ + match_not_eol = match_not_bol << 1, /* last is not end of line */ + match_not_bob = match_not_eol << 1, /* first is not start of buffer */ + match_not_eob = match_not_bob << 1, /* last is not end of buffer */ + match_not_bow = match_not_eob << 1, /* first is not start of word */ + match_not_eow = match_not_bow << 1, /* last is not end of word */ + match_not_dot_newline = match_not_eow << 1, /* \n is not matched by '.' */ + match_not_dot_null = match_not_dot_newline << 1, /* '\0' is not matched by '.' */ + match_prev_avail = match_not_dot_null << 1, /* *--first is a valid expression */ + match_init = match_prev_avail << 1, /* internal use */ + match_any = match_init << 1, /* don't care what we match */ + match_not_null = match_any << 1, /* string can't be null */ + match_continuous = match_not_null << 1, /* each grep match must continue from */ + /* uninterrupted from the previous one */ + match_partial = match_continuous << 1, /* find partial matches */ + + match_stop = match_partial << 1, /* stop after first match (grep) V3 only */ + match_not_initial_null = match_stop, /* don't match initial null, V4 only */ + match_all = match_stop << 1, /* must find the whole of input even if match_any is set */ + match_perl = match_all << 1, /* Use perl matching rules */ + match_posix = match_perl << 1, /* Use POSIX matching rules */ + match_nosubs = match_posix << 1, /* don't trap marked subs */ + match_extra = match_nosubs << 1, /* include full capture information for repeated captures */ + match_single_line = match_extra << 1, /* treat text as single line and ignore any \n's when matching ^ and $. */ + match_unused1 = match_single_line << 1, /* unused */ + match_unused2 = match_unused1 << 1, /* unused */ + match_unused3 = match_unused2 << 1, /* unused */ + match_max = match_unused3, + + format_perl = 0, /* perl style replacement */ + format_default = 0, /* ditto. */ + format_sed = match_max << 1, /* sed style replacement. */ + format_all = format_sed << 1, /* enable all extensions to syntax. */ + format_no_copy = format_all << 1, /* don't copy non-matching segments. */ + format_first_only = format_no_copy << 1, /* Only replace first occurrence. */ + format_is_if = format_first_only << 1, /* internal use only. */ + format_literal = format_is_if << 1, /* treat string as a literal */ + + match_not_any = match_not_bol | match_not_eol | match_not_bob + | match_not_eob | match_not_bow | match_not_eow | match_not_dot_newline + | match_not_dot_null | match_prev_avail | match_init | match_not_null + | match_continuous | match_partial | match_stop | match_not_initial_null + | match_stop | match_all | match_perl | match_posix | match_nosubs + | match_extra | match_single_line | match_unused1 | match_unused2 + | match_unused3 | match_max | format_perl | format_default | format_sed + | format_all | format_no_copy | format_first_only | format_is_if + | format_literal + + +} match_flags; + +typedef match_flags match_flag_type; + +#ifdef __cplusplus +inline match_flags operator&(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) & static_cast(m2)); } +inline match_flags operator|(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) | static_cast(m2)); } +inline match_flags operator^(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) ^ static_cast(m2)); } +inline match_flags operator~(match_flags m1) +{ return static_cast(~static_cast(m1)); } +inline match_flags& operator&=(match_flags& m1, match_flags m2) +{ m1 = m1&m2; return m1; } +inline match_flags& operator|=(match_flags& m1, match_flags m2) +{ m1 = m1|m2; return m1; } +inline match_flags& operator^=(match_flags& m1, match_flags m2) +{ m1 = m1^m2; return m1; } +#endif + +#ifdef __cplusplus +} /* namespace regex_constants */ +/* + * import names into boost for backwards compatibility: + */ +using regex_constants::match_flag_type; +using regex_constants::match_default; +using regex_constants::match_not_bol; +using regex_constants::match_not_eol; +using regex_constants::match_not_bob; +using regex_constants::match_not_eob; +using regex_constants::match_not_bow; +using regex_constants::match_not_eow; +using regex_constants::match_not_dot_newline; +using regex_constants::match_not_dot_null; +using regex_constants::match_prev_avail; +/* using regex_constants::match_init; */ +using regex_constants::match_any; +using regex_constants::match_not_null; +using regex_constants::match_continuous; +using regex_constants::match_partial; +/*using regex_constants::match_stop; */ +using regex_constants::match_all; +using regex_constants::match_perl; +using regex_constants::match_posix; +using regex_constants::match_nosubs; +using regex_constants::match_extra; +using regex_constants::match_single_line; +/*using regex_constants::match_max; */ +using regex_constants::format_all; +using regex_constants::format_sed; +using regex_constants::format_perl; +using regex_constants::format_default; +using regex_constants::format_no_copy; +using regex_constants::format_first_only; +/*using regex_constants::format_is_if;*/ + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + + +} /* namespace boost */ +#endif /* __cplusplus */ +#endif /* include guard */ + diff --git a/include/boost/regex/v5/match_results.hpp b/include/boost/regex/v5/match_results.hpp new file mode 100644 index 00000000..3f9cd012 --- /dev/null +++ b/include/boost/regex/v5/match_results.hpp @@ -0,0 +1,667 @@ +/* + * + * Copyright (c) 1998-2009 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE match_results.cpp + * VERSION see + * DESCRIPTION: Declares template class match_results. + */ + +#ifndef BOOST_REGEX_V5_MATCH_RESULTS_HPP +#define BOOST_REGEX_V5_MATCH_RESULTS_HPP + +namespace boost{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4459) +#if BOOST_REGEX_MSVC < 1700 +# pragma warning(disable : 4231) +#endif +# if BOOST_REGEX_MSVC < 1600 +# pragma warning(disable : 4660) +# endif +#endif + +namespace BOOST_REGEX_DETAIL_NS{ + +class named_subexpressions; + +} + +template +class match_results +{ +private: + typedef std::vector, Allocator> vector_type; +public: + typedef sub_match value_type; + typedef typename std::allocator_traits::value_type const & const_reference; + typedef const_reference reference; + typedef typename vector_type::const_iterator const_iterator; + typedef const_iterator iterator; + typedef typename std::iterator_traits< + BidiIterator>::difference_type difference_type; + typedef typename std::allocator_traits::size_type size_type; + typedef Allocator allocator_type; + typedef typename std::iterator_traits< + BidiIterator>::value_type char_type; + typedef std::basic_string string_type; + typedef BOOST_REGEX_DETAIL_NS::named_subexpressions named_sub_type; + + // construct/copy/destroy: + explicit match_results(const Allocator& a = Allocator()) + : m_subs(a), m_base(), m_null(), m_last_closed_paren(0), m_is_singular(true) {} + // + // IMPORTANT: in the code below, the crazy looking checks around m_is_singular are + // all required because it is illegal to copy a singular iterator. + // See https://svn.boost.org/trac/boost/ticket/3632. + // + match_results(const match_results& m) + : m_subs(m.m_subs), m_base(), m_null(), m_named_subs(m.m_named_subs), m_last_closed_paren(m.m_last_closed_paren), m_is_singular(m.m_is_singular) + { + if(!m_is_singular) + { + m_base = m.m_base; + m_null = m.m_null; + } + } + match_results& operator=(const match_results& m) + { + m_subs = m.m_subs; + m_named_subs = m.m_named_subs; + m_last_closed_paren = m.m_last_closed_paren; + m_is_singular = m.m_is_singular; + if(!m_is_singular) + { + m_base = m.m_base; + m_null = m.m_null; + } + return *this; + } + ~match_results(){} + + // size: + size_type size() const + { return empty() ? 0 : m_subs.size() - 2; } + size_type max_size() const + { return m_subs.max_size(); } + bool empty() const + { return m_subs.size() < 2; } + // element access: + difference_type length(int sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + if((sub < (int)m_subs.size()) && (sub > 0)) + return m_subs[sub].length(); + return 0; + } + difference_type length(const char_type* sub) const + { + if(m_is_singular) + raise_logic_error(); + const char_type* sub_end = sub; + while(*sub_end) ++sub_end; + return length(named_subexpression_index(sub, sub_end)); + } + template + difference_type length(const charT* sub) const + { + if(m_is_singular) + raise_logic_error(); + const charT* sub_end = sub; + while(*sub_end) ++sub_end; + return length(named_subexpression_index(sub, sub_end)); + } + template + difference_type length(const std::basic_string& sub) const + { + return length(sub.c_str()); + } + difference_type position(size_type sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + if(sub < m_subs.size()) + { + const sub_match& s = m_subs[sub]; + if(s.matched || (sub == 2)) + { + return std::distance((BidiIterator)(m_base), (BidiIterator)(s.first)); + } + } + return ~static_cast(0); + } + difference_type position(const char_type* sub) const + { + const char_type* sub_end = sub; + while(*sub_end) ++sub_end; + return position(named_subexpression_index(sub, sub_end)); + } + template + difference_type position(const charT* sub) const + { + const charT* sub_end = sub; + while(*sub_end) ++sub_end; + return position(named_subexpression_index(sub, sub_end)); + } + template + difference_type position(const std::basic_string& sub) const + { + return position(sub.c_str()); + } + string_type str(int sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + string_type result; + if(sub < (int)m_subs.size() && (sub > 0)) + { + const sub_match& s = m_subs[sub]; + if(s.matched) + { + result = s.str(); + } + } + return result; + } + string_type str(const char_type* sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const std::basic_string& sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const charT* sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const std::basic_string& sub) const + { + return (*this)[sub].str(); + } + const_reference operator[](int sub) const + { + if(m_is_singular && m_subs.empty()) + raise_logic_error(); + sub += 2; + if(sub < (int)m_subs.size() && (sub >= 0)) + { + return m_subs[sub]; + } + return m_null; + } + // + // Named sub-expressions: + // + const_reference named_subexpression(const char_type* i, const char_type* j) const + { + // + // Scan for the leftmost *matched* subexpression with the specified named: + // + if(m_is_singular) + raise_logic_error(); + BOOST_REGEX_DETAIL_NS::named_subexpressions::range_type r = m_named_subs->equal_range(i, j); + while((r.first != r.second) && ((*this)[r.first->index].matched == false)) + ++r.first; + return r.first != r.second ? (*this)[r.first->index] : m_null; + } + template + const_reference named_subexpression(const charT* i, const charT* j) const + { + static_assert(sizeof(charT) <= sizeof(char_type), "Failed internal logic"); + if(i == j) + return m_null; + std::vector s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + int named_subexpression_index(const char_type* i, const char_type* j) const + { + // + // Scan for the leftmost *matched* subexpression with the specified named. + // If none found then return the leftmost expression with that name, + // otherwise an invalid index: + // + if(m_is_singular) + raise_logic_error(); + BOOST_REGEX_DETAIL_NS::named_subexpressions::range_type s, r; + s = r = m_named_subs->equal_range(i, j); + while((r.first != r.second) && ((*this)[r.first->index].matched == false)) + ++r.first; + if(r.first == r.second) + r = s; + return r.first != r.second ? r.first->index : -20; + } + template + int named_subexpression_index(const charT* i, const charT* j) const + { + static_assert(sizeof(charT) <= sizeof(char_type), "Failed internal logic"); + if(i == j) + return -20; + std::vector s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression_index(&*s.begin(), &*s.begin() + s.size()); + } + template + const_reference operator[](const std::basic_string& s) const + { + return named_subexpression(s.c_str(), s.c_str() + s.size()); + } + const_reference operator[](const char_type* p) const + { + const char_type* e = p; + while(*e) ++e; + return named_subexpression(p, e); + } + + template + const_reference operator[](const charT* p) const + { + static_assert(sizeof(charT) <= sizeof(char_type), "Failed internal logic"); + if(*p == 0) + return m_null; + std::vector s; + while(*p) + s.insert(s.end(), *p++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + template + const_reference operator[](const std::basic_string& ns) const + { + static_assert(sizeof(charT) <= sizeof(char_type), "Failed internal logic"); + if(ns.empty()) + return m_null; + std::vector s; + for(unsigned i = 0; i < ns.size(); ++i) + s.insert(s.end(), ns[i]); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + + const_reference prefix() const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[-1]; + } + + const_reference suffix() const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[-2]; + } + const_iterator begin() const + { + return (m_subs.size() > 2) ? (m_subs.begin() + 2) : m_subs.end(); + } + const_iterator end() const + { + return m_subs.end(); + } + // format: + template + OutputIterator format(OutputIterator out, + Functor fmt, + match_flag_type flags = format_default) const + { + if(m_is_singular) + raise_logic_error(); + typedef typename BOOST_REGEX_DETAIL_NS::compute_functor_type, OutputIterator>::type F; + F func(fmt); + return func(*this, out, flags); + } + template + string_type format(Functor fmt, match_flag_type flags = format_default) const + { + if(m_is_singular) + raise_logic_error(); + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + + typedef typename BOOST_REGEX_DETAIL_NS::compute_functor_type, BOOST_REGEX_DETAIL_NS::string_out_iterator > >::type F; + F func(fmt); + + func(*this, i, flags); + return result; + } + // format with locale: + template + OutputIterator format(OutputIterator out, + Functor fmt, + match_flag_type flags, + const RegexT& re) const + { + if(m_is_singular) + raise_logic_error(); + typedef ::boost::regex_traits_wrapper traits_type; + typedef typename BOOST_REGEX_DETAIL_NS::compute_functor_type, OutputIterator, traits_type>::type F; + F func(fmt); + return func(*this, out, flags, re.get_traits()); + } + template + string_type format(Functor fmt, + match_flag_type flags, + const RegexT& re) const + { + if(m_is_singular) + raise_logic_error(); + typedef ::boost::regex_traits_wrapper traits_type; + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + + typedef typename BOOST_REGEX_DETAIL_NS::compute_functor_type, BOOST_REGEX_DETAIL_NS::string_out_iterator >, traits_type >::type F; + F func(fmt); + + func(*this, i, flags, re.get_traits()); + return result; + } + + const_reference get_last_closed_paren()const + { + if(m_is_singular) + raise_logic_error(); + return m_last_closed_paren == 0 ? m_null : (*this)[m_last_closed_paren]; + } + + allocator_type get_allocator() const + { + return m_subs.get_allocator(); + } + void swap(match_results& that) + { + std::swap(m_subs, that.m_subs); + std::swap(m_named_subs, that.m_named_subs); + std::swap(m_last_closed_paren, that.m_last_closed_paren); + if(m_is_singular) + { + if(!that.m_is_singular) + { + m_base = that.m_base; + m_null = that.m_null; + } + } + else if(that.m_is_singular) + { + that.m_base = m_base; + that.m_null = m_null; + } + else + { + std::swap(m_base, that.m_base); + std::swap(m_null, that.m_null); + } + std::swap(m_is_singular, that.m_is_singular); + } + bool operator==(const match_results& that)const + { + if(m_is_singular) + { + return that.m_is_singular; + } + else if(that.m_is_singular) + { + return false; + } + return (m_subs == that.m_subs) && (m_base == that.m_base) && (m_last_closed_paren == that.m_last_closed_paren); + } + bool operator!=(const match_results& that)const + { return !(*this == that); } + +#ifdef BOOST_REGEX_MATCH_EXTRA + typedef typename sub_match::capture_sequence_type capture_sequence_type; + + const capture_sequence_type& captures(int i)const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[i].captures(); + } +#endif + + // + // private access functions: + void set_second(BidiIterator i) + { + BOOST_REGEX_ASSERT(m_subs.size() > 2); + m_subs[2].second = i; + m_subs[2].matched = true; + m_subs[0].first = i; + m_subs[0].matched = (m_subs[0].first != m_subs[0].second); + m_null.first = i; + m_null.second = i; + m_null.matched = false; + m_is_singular = false; + } + + void set_second(BidiIterator i, size_type pos, bool m = true, bool escape_k = false) + { + if(pos) + m_last_closed_paren = static_cast(pos); + pos += 2; + BOOST_REGEX_ASSERT(m_subs.size() > pos); + m_subs[pos].second = i; + m_subs[pos].matched = m; + if((pos == 2) && !escape_k) + { + m_subs[0].first = i; + m_subs[0].matched = (m_subs[0].first != m_subs[0].second); + m_null.first = i; + m_null.second = i; + m_null.matched = false; + m_is_singular = false; + } + } + void set_size(size_type n, BidiIterator i, BidiIterator j) + { + value_type v(j); + size_type len = m_subs.size(); + if(len > n + 2) + { + m_subs.erase(m_subs.begin()+n+2, m_subs.end()); + std::fill(m_subs.begin(), m_subs.end(), v); + } + else + { + std::fill(m_subs.begin(), m_subs.end(), v); + if(n+2 != len) + m_subs.insert(m_subs.end(), n+2-len, v); + } + m_subs[1].first = i; + m_last_closed_paren = 0; + } + void set_base(BidiIterator pos) + { + m_base = pos; + } + BidiIterator base()const + { + return m_base; + } + void set_first(BidiIterator i) + { + BOOST_REGEX_ASSERT(m_subs.size() > 2); + // set up prefix: + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != i); + // set up $0: + m_subs[2].first = i; + // zero out everything else: + for(size_type n = 3; n < m_subs.size(); ++n) + { + m_subs[n].first = m_subs[n].second = m_subs[0].second; + m_subs[n].matched = false; + } + } + void set_first(BidiIterator i, size_type pos, bool escape_k = false) + { + BOOST_REGEX_ASSERT(pos+2 < m_subs.size()); + if(pos || escape_k) + { + m_subs[pos+2].first = i; + if(escape_k) + { + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != m_subs[1].second); + } + } + else + set_first(i); + } + void maybe_assign(const match_results& m); + + void set_named_subs(std::shared_ptr subs) + { + m_named_subs = subs; + } + +private: + // + // Error handler called when an uninitialized match_results is accessed: + // + static void raise_logic_error() + { + std::logic_error e("Attempt to access an uninitialized boost::match_results<> class."); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + + + vector_type m_subs; // subexpressions + BidiIterator m_base; // where the search started from + sub_match m_null; // a null match + std::shared_ptr m_named_subs; // Shared copy of named subs in the regex object + int m_last_closed_paren; // Last ) to be seen - used for formatting + bool m_is_singular; // True if our stored iterators are singular +}; + +template +void match_results::maybe_assign(const match_results& m) +{ + if(m_is_singular) + { + *this = m; + return; + } + const_iterator p1, p2; + p1 = begin(); + p2 = m.begin(); + // + // Distances are measured from the start of *this* match, unless this isn't + // a valid match in which case we use the start of the whole sequence. Note that + // no subsequent match-candidate can ever be to the left of the first match found. + // This ensures that when we are using bidirectional iterators, that distances + // measured are as short as possible, and therefore as efficient as possible + // to compute. Finally note that we don't use the "matched" data member to test + // whether a sub-expression is a valid match, because partial matches set this + // to false for sub-expression 0. + // + BidiIterator l_end = this->suffix().second; + BidiIterator l_base = (p1->first == l_end) ? this->prefix().first : (*this)[0].first; + difference_type len1 = 0; + difference_type len2 = 0; + difference_type base1 = 0; + difference_type base2 = 0; + std::size_t i; + for(i = 0; i < size(); ++i, ++p1, ++p2) + { + // + // Leftmost takes priority over longest; handle special cases + // where distances need not be computed first (an optimisation + // for bidirectional iterators: ensure that we don't accidently + // compute the length of the whole sequence, as this can be really + // expensive). + // + if(p1->first == l_end) + { + if(p2->first != l_end) + { + // p2 must be better than p1, and no need to calculate + // actual distances: + base1 = 1; + base2 = 0; + break; + } + else + { + // *p1 and *p2 are either unmatched or match end-of sequence, + // either way no need to calculate distances: + if((p1->matched == false) && (p2->matched == true)) + break; + if((p1->matched == true) && (p2->matched == false)) + return; + continue; + } + } + else if(p2->first == l_end) + { + // p1 better than p2, and no need to calculate distances: + return; + } + base1 = std::distance(l_base, p1->first); + base2 = std::distance(l_base, p2->first); + BOOST_REGEX_ASSERT(base1 >= 0); + BOOST_REGEX_ASSERT(base2 >= 0); + if(base1 < base2) return; + if(base2 < base1) break; + + len1 = std::distance((BidiIterator)p1->first, (BidiIterator)p1->second); + len2 = std::distance((BidiIterator)p2->first, (BidiIterator)p2->second); + BOOST_REGEX_ASSERT(len1 >= 0); + BOOST_REGEX_ASSERT(len2 >= 0); + if((len1 != len2) || ((p1->matched == false) && (p2->matched == true))) + break; + if((p1->matched == true) && (p2->matched == false)) + return; + } + if(i == size()) + return; + if(base2 < base1) + *this = m; + else if((len2 > len1) || ((p1->matched == false) && (p2->matched == true)) ) + *this = m; +} + +template +void swap(match_results& a, match_results& b) +{ + a.swap(b); +} + +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const match_results& s) +{ + return (os << s.str()); +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} // namespace boost + +#endif + + diff --git a/include/boost/regex/v5/mem_block_cache.hpp b/include/boost/regex/v5/mem_block_cache.hpp new file mode 100644 index 00000000..0af4eae1 --- /dev/null +++ b/include/boost/regex/v5/mem_block_cache.hpp @@ -0,0 +1,175 @@ + /* + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE mem_block_cache.hpp + * VERSION see + * DESCRIPTION: memory block cache used by the non-recursive matcher. + */ + +#ifndef BOOST_REGEX_V5_MEM_BLOCK_CACHE_HPP +#define BOOST_REGEX_V5_MEM_BLOCK_CACHE_HPP + +#include +#ifdef BOOST_HAS_THREADS +#include +#endif + +#ifndef BOOST_NO_CXX11_HDR_ATOMIC + #include + #if ATOMIC_POINTER_LOCK_FREE == 2 + #define BOOST_REGEX_MEM_BLOCK_CACHE_LOCK_FREE + #define BOOST_REGEX_ATOMIC_POINTER std::atomic + #endif +#endif + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +#ifdef BOOST_REGEX_MEM_BLOCK_CACHE_LOCK_FREE /* lock free implementation */ +struct mem_block_cache +{ + std::atomic cache[BOOST_REGEX_MAX_CACHE_BLOCKS]; + + ~mem_block_cache() + { + for (size_t i = 0;i < BOOST_REGEX_MAX_CACHE_BLOCKS; ++i) { + if (cache[i].load()) ::operator delete(cache[i].load()); + } + } + void* get() + { + for (size_t i = 0;i < BOOST_REGEX_MAX_CACHE_BLOCKS; ++i) { + void* p = cache[i].load(); + if (p != NULL) { + if (cache[i].compare_exchange_strong(p, NULL)) return p; + } + } + return ::operator new(BOOST_REGEX_BLOCKSIZE); + } + void put(void* ptr) + { + for (size_t i = 0;i < BOOST_REGEX_MAX_CACHE_BLOCKS; ++i) { + void* p = cache[i].load(); + if (p == NULL) { + if (cache[i].compare_exchange_strong(p, ptr)) return; + } + } + ::operator delete(ptr); + } + + static mem_block_cache& instance() + { + static mem_block_cache block_cache = { { {nullptr} } }; + return block_cache; + } +}; + + +#else /* lock-based implementation */ + + +struct mem_block_node +{ + mem_block_node* next; +}; + +struct mem_block_cache +{ + // this member has to be statically initialsed: + mem_block_node* next; + unsigned cached_blocks; +#ifdef BOOST_HAS_THREADS + boost::static_mutex mut; +#endif + + ~mem_block_cache() + { + while(next) + { + mem_block_node* old = next; + next = next->next; + ::operator delete(old); + } + } + void* get() + { +#ifdef BOOST_HAS_THREADS + std::lock_guard g(mut); +#endif + if(next) + { + mem_block_node* result = next; + next = next->next; + --cached_blocks; + return result; + } + return ::operator new(BOOST_REGEX_BLOCKSIZE); + } + void put(void* p) + { +#ifdef BOOST_HAS_THREADS + std::lock_guard g(mut); +#endif + if(cached_blocks >= BOOST_REGEX_MAX_CACHE_BLOCKS) + { + ::operator delete(p); + } + else + { + mem_block_node* old = static_cast(p); + old->next = next; + next = old; + ++cached_blocks; + } + } + static mem_block_cache& instance() + { +#ifdef BOOST_HAS_THREADS + static mem_block_cache block_cache = { 0, 0, BOOST_STATIC_MUTEX_INIT, }; +#else + static mem_block_cache block_cache = { 0, 0, }; +#endif + return block_cache; + } +}; +#endif + +#if BOOST_REGEX_MAX_CACHE_BLOCKS == 0 + +inline void* get_mem_block() +{ + return ::operator new(BOOST_REGEX_BLOCKSIZE); +} + +inline void put_mem_block(void* p) +{ + ::operator delete(p); +} + +#else + +inline void* get_mem_block() +{ + return mem_block_cache::instance().get(); +} + +inline void put_mem_block(void* p) +{ + mem_block_cache::instance().put(p); +} + +#endif +} +} // namespace boost + +#endif + diff --git a/include/boost/regex/v5/object_cache.hpp b/include/boost/regex/v5/object_cache.hpp new file mode 100644 index 00000000..6ebef9b3 --- /dev/null +++ b/include/boost/regex/v5/object_cache.hpp @@ -0,0 +1,160 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE object_cache.hpp + * VERSION see + * DESCRIPTION: Implements a generic object cache. + */ + +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#define BOOST_REGEX_OBJECT_CACHE_HPP + +#include +#include +#include +#include +#include +#include +#ifdef BOOST_HAS_THREADS +#include +#endif + +namespace boost{ + +template +class object_cache +{ +public: + typedef std::pair< ::std::shared_ptr, Key const*> value_type; + typedef std::list list_type; + typedef typename list_type::iterator list_iterator; + typedef std::map map_type; + typedef typename map_type::iterator map_iterator; + typedef typename list_type::size_type size_type; + static std::shared_ptr get(const Key& k, size_type l_max_cache_size); + +private: + static std::shared_ptr do_get(const Key& k, size_type l_max_cache_size); + + struct data + { + list_type cont; + map_type index; + }; + + // Needed by compilers not implementing the resolution to DR45. For reference, + // see http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. + friend struct data; +}; + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable: 4702) +#endif +template +std::shared_ptr object_cache::get(const Key& k, size_type l_max_cache_size) +{ +#ifdef BOOST_HAS_THREADS + static std::mutex mut; + std::lock_guard l(mut); + return do_get(k, l_max_cache_size); +#else + return do_get(k, l_max_cache_size); +#endif +} +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +template +std::shared_ptr object_cache::do_get(const Key& k, size_type l_max_cache_size) +{ + typedef typename object_cache::data object_data; + typedef typename map_type::size_type map_size_type; + static object_data s_data; + + // + // see if the object is already in the cache: + // + map_iterator mpos = s_data.index.find(k); + if(mpos != s_data.index.end()) + { + // + // Eureka! + // We have a cached item, bump it up the list and return it: + // + if(--(s_data.cont.end()) != mpos->second) + { + // splice out the item we want to move: + list_type temp; + temp.splice(temp.end(), s_data.cont, mpos->second); + // and now place it at the end of the list: + s_data.cont.splice(s_data.cont.end(), temp, temp.begin()); + BOOST_REGEX_ASSERT(*(s_data.cont.back().second) == k); + // update index with new position: + mpos->second = --(s_data.cont.end()); + BOOST_REGEX_ASSERT(&(mpos->first) == mpos->second->second); + BOOST_REGEX_ASSERT(&(mpos->first) == s_data.cont.back().second); + } + return s_data.cont.back().first; + } + // + // if we get here then the item is not in the cache, + // so create it: + // + std::shared_ptr result(new Object(k)); + // + // Add it to the list, and index it: + // + s_data.cont.push_back(value_type(result, static_cast(0))); + s_data.index.insert(std::make_pair(k, --(s_data.cont.end()))); + s_data.cont.back().second = &(s_data.index.find(k)->first); + map_size_type s = s_data.index.size(); + BOOST_REGEX_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_REGEX_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(s_data.index.find(k)->first == k); + if(s > l_max_cache_size) + { + // + // We have too many items in the list, so we need to start + // popping them off the back of the list, but only if they're + // being held uniquely by us: + // + list_iterator pos = s_data.cont.begin(); + list_iterator last = s_data.cont.end(); + while((pos != last) && (s > l_max_cache_size)) + { + if(pos->first.use_count() == 1) + { + list_iterator condemmed(pos); + ++pos; + // now remove the items from our containers, + // then order has to be as follows: + BOOST_REGEX_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end()); + s_data.index.erase(*(condemmed->second)); + s_data.cont.erase(condemmed); + --s; + } + else + ++pos; + } + BOOST_REGEX_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_REGEX_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_REGEX_ASSERT(s_data.index.find(k)->first == k); + } + return result; +} + +} + +#endif diff --git a/include/boost/regex/v5/pattern_except.hpp b/include/boost/regex/v5/pattern_except.hpp new file mode 100644 index 00000000..731aefb6 --- /dev/null +++ b/include/boost/regex/v5/pattern_except.hpp @@ -0,0 +1,105 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE pattern_except.hpp + * VERSION see + * DESCRIPTION: Declares pattern-matching exception classes. + */ + +#ifndef BOOST_RE_V5_PAT_EXCEPT_HPP +#define BOOST_RE_V5_PAT_EXCEPT_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#include +#include +#include +#include + +namespace boost{ + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable : 4275) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable : 26812 4459) +#endif +#endif +class regex_error : public std::runtime_error +{ +public: + explicit regex_error(const std::string& s, regex_constants::error_type err = regex_constants::error_unknown, std::ptrdiff_t pos = 0) + : std::runtime_error(s) + , m_error_code(err) + , m_position(pos) + { + } + explicit regex_error(regex_constants::error_type err) + : std::runtime_error(::boost::BOOST_REGEX_DETAIL_NS::get_default_error_string(err)) + , m_error_code(err) + , m_position(0) + { + } + ~regex_error() noexcept override {} + regex_constants::error_type code()const + { return m_error_code; } + std::ptrdiff_t position()const + { return m_position; } + void raise()const + { +#ifndef BOOST_NO_EXCEPTIONS +#ifndef BOOST_REGEX_STANDALONE + ::boost::throw_exception(*this); +#else + throw* this; +#endif +#endif + } +private: + regex_constants::error_type m_error_code; + std::ptrdiff_t m_position; +}; + +typedef regex_error bad_pattern; +typedef regex_error bad_expression; + +namespace BOOST_REGEX_DETAIL_NS{ + +inline void raise_runtime_error(const std::runtime_error& ex) +{ +#ifndef BOOST_REGEX_STANDALONE + ::boost::throw_exception(ex); +#else + throw ex; +#endif +} + +template +void raise_error(const traits& t, regex_constants::error_type code) +{ + (void)t; // warning suppression + std::runtime_error e(t.error_string(code)); + ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(e); +} + +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif diff --git a/include/boost/regex/v5/perl_matcher.hpp b/include/boost/regex/v5/perl_matcher.hpp new file mode 100644 index 00000000..deeffa58 --- /dev/null +++ b/include/boost/regex/v5/perl_matcher.hpp @@ -0,0 +1,576 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#ifndef BOOST_REGEX_MATCHER_HPP +#define BOOST_REGEX_MATCHER_HPP + +#include + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable : 4251 4459) +#if BOOST_REGEX_MSVC < 1700 +# pragma warning(disable : 4231) +#endif +# if BOOST_REGEX_MSVC < 1600 +# pragma warning(disable : 4660) +# endif +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +// +// error checking API: +// +inline void verify_options(boost::regex_constants::syntax_option_type, match_flag_type mf) +{ + // + // can't mix match_extra with POSIX matching rules: + // + if ((mf & match_extra) && (mf & match_posix)) + { + std::logic_error msg("Usage Error: Can't mix regular expression captures with POSIX matching rules"); +#ifndef BOOST_REGEX_STANDALONE + throw_exception(msg); +#else + throw msg; +#endif + } +} +// +// function can_start: +// +template +inline bool can_start(charT c, const unsigned char* map, unsigned char mask) +{ + return ((c < static_cast(0)) ? true : ((c >= static_cast(1 << CHAR_BIT)) ? true : map[c] & mask)); +} +inline bool can_start(char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(signed char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask) +{ + return map[c] & mask; +} +inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask) +{ + return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask); +} +#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask) +{ + return ((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask); +} +#endif +#if !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask) +{ + return (((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask)); +} +#endif + +template +inline int string_compare(const std::basic_string& s, const C* p) +{ + if(0 == *p) + { + if(s.empty() || ((s.size() == 1) && (s[0] == 0))) + return 0; + } + return s.compare(p); +} +template +inline int string_compare(const Seq& s, const C* p) +{ + std::size_t i = 0; + while((i < s.size()) && (p[i] == s[i])) + { + ++i; + } + return (i == s.size()) ? -(int)p[i] : (int)s[i] - (int)p[i]; +} +# define STR_COMP(s,p) string_compare(s,p) + +template +inline const charT* re_skip_past_null(const charT* p) +{ + while (*p != static_cast(0)) ++p; + return ++p; +} + +template +iterator re_is_set_member(iterator next, + iterator last, + const re_set_long* set_, + const regex_data& e, bool icase) +{ + const charT* p = reinterpret_cast(set_+1); + iterator ptr; + unsigned int i; + //bool icase = e.m_flags & regex_constants::icase; + + if(next == last) return next; + + typedef typename traits_type::string_type traits_string_type; + const ::boost::regex_traits_wrapper& traits_inst = *(e.m_ptraits); + + // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never + // referenced + (void)traits_inst; + + // try and match a single character, could be a multi-character + // collating element... + for(i = 0; i < set_->csingles; ++i) + { + ptr = next; + if(*p == static_cast(0)) + { + // treat null string as special case: + if(traits_inst.translate(*ptr, icase)) + { + ++p; + continue; + } + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + } + else + { + while(*p && (ptr != last)) + { + if(traits_inst.translate(*ptr, icase) != *p) + break; + ++p; + ++ptr; + } + + if(*p == static_cast(0)) // if null we've matched + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + + p = re_skip_past_null(p); // skip null + } + } + + charT col = traits_inst.translate(*next, icase); + + + if(set_->cranges || set_->cequivalents) + { + traits_string_type s1; + // + // try and match a range, NB only a single character can match + if(set_->cranges) + { + if((e.m_flags & regex_constants::collate) == 0) + s1.assign(1, col); + else + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform(a, a + 1); + } + for(i = 0; i < set_->cranges; ++i) + { + if(STR_COMP(s1, p) >= 0) + { + do{ ++p; }while(*p); + ++p; + if(STR_COMP(s1, p) <= 0) + return set_->isnot ? next : ++next; + } + else + { + // skip first string + do{ ++p; }while(*p); + ++p; + } + // skip second string + do{ ++p; }while(*p); + ++p; + } + } + // + // try and match an equivalence class, NB only a single character can match + if(set_->cequivalents) + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform_primary(a, a +1); + for(i = 0; i < set_->cequivalents; ++i) + { + if(STR_COMP(s1, p) == 0) + return set_->isnot ? next : ++next; + // skip string + do{ ++p; }while(*p); + ++p; + } + } + } + if(traits_inst.isctype(col, set_->cclasses) == true) + return set_->isnot ? next : ++next; + if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false)) + return set_->isnot ? next : ++next; + return set_->isnot ? ++next : next; +} + +template +class repeater_count +{ + repeater_count** stack; + repeater_count* next; + int state_id; + std::size_t count; // the number of iterations so far + BidiIterator start_pos; // where the last repeat started + + repeater_count* unwind_until(int n, repeater_count* p, int current_recursion_id) + { + while(p && (p->state_id != n)) + { + if(-2 - current_recursion_id == p->state_id) + return 0; + p = p->next; + if(p && (p->state_id < 0)) + { + p = unwind_until(p->state_id, p, current_recursion_id); + if(!p) + return p; + p = p->next; + } + } + return p; + } +public: + repeater_count(repeater_count** s) : stack(s), next(0), state_id(-1), count(0), start_pos() {} + + repeater_count(int i, repeater_count** s, BidiIterator start, int current_recursion_id) + : start_pos(start) + { + state_id = i; + stack = s; + next = *stack; + *stack = this; + if((state_id > next->state_id) && (next->state_id >= 0)) + count = 0; + else + { + repeater_count* p = next; + p = unwind_until(state_id, p, current_recursion_id); + if(p) + { + count = p->count; + start_pos = p->start_pos; + } + else + count = 0; + } + } + ~repeater_count() + { + if(next) + *stack = next; + } + std::size_t get_count() { return count; } + int get_id() { return state_id; } + std::size_t operator++() { return ++count; } + bool check_null_repeat(const BidiIterator& pos, std::size_t max) + { + // this is called when we are about to start a new repeat, + // if the last one was NULL move our count to max, + // otherwise save the current position. + bool result = (count == 0) ? false : (pos == start_pos); + if(result) + count = max; + else + start_pos = pos; + return result; + } +}; + +struct saved_state; + +enum saved_state_type +{ + saved_type_end = 0, + saved_type_paren = 1, + saved_type_recurse = 2, + saved_type_assertion = 3, + saved_state_alt = 4, + saved_state_repeater_count = 5, + saved_state_extra_block = 6, + saved_state_greedy_single_repeat = 7, + saved_state_rep_slow_dot = 8, + saved_state_rep_fast_dot = 9, + saved_state_rep_char = 10, + saved_state_rep_short_set = 11, + saved_state_rep_long_set = 12, + saved_state_non_greedy_long_repeat = 13, + saved_state_count = 14 +}; + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable:26495) +#endif +#endif +template +struct recursion_info +{ + typedef typename Results::value_type value_type; + typedef typename value_type::iterator iterator; + int idx; + const re_syntax_base* preturn_address; + Results results; + repeater_count* repeater_stack; + iterator location_of_start; +}; +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +template +class perl_matcher +{ +public: + typedef typename traits::char_type char_type; + typedef perl_matcher self_type; + typedef bool (self_type::*matcher_proc_type)(); + typedef std::size_t traits_size_type; + typedef typename is_byte::width_type width_type; + typedef typename std::iterator_traits::difference_type difference_type; + typedef match_results results_type; + + perl_matcher(BidiIterator first, BidiIterator end, + match_results& what, + const basic_regex& e, + match_flag_type f, + BidiIterator l_base) + : m_result(what), base(first), last(end), + position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), + m_independent(false), next_count(&rep_obj), rep_obj(&next_count) + , m_recursions(0) + { + construct_init(e, f); + } + + bool match(); + bool find(); + + void setf(match_flag_type f) + { m_match_flags |= f; } + void unsetf(match_flag_type f) + { m_match_flags &= ~f; } + +private: + void construct_init(const basic_regex& e, match_flag_type f); + + bool find_imp(); + bool match_imp(); + void estimate_max_state_count(std::random_access_iterator_tag*); + void estimate_max_state_count(void*); + bool match_prefix(); + bool match_all_states(); + + // match procs, stored in s_match_vtable: + bool match_startmark(); + bool match_endmark(); + bool match_literal(); + bool match_start_line(); + bool match_end_line(); + bool match_wild(); + bool match_match(); + bool match_word_boundary(); + bool match_within_word(); + bool match_word_start(); + bool match_word_end(); + bool match_buffer_start(); + bool match_buffer_end(); + bool match_backref(); + bool match_long_set(); + bool match_set(); + bool match_jump(); + bool match_alt(); + bool match_rep(); + bool match_combining(); + bool match_soft_buffer_end(); + bool match_restart_continue(); + bool match_long_set_repeat(); + bool match_set_repeat(); + bool match_char_repeat(); + bool match_dot_repeat_fast(); + bool match_dot_repeat_slow(); + bool match_dot_repeat_dispatch() + { + return ::boost::is_random_access_iterator::value ? match_dot_repeat_fast() : match_dot_repeat_slow(); + } + bool match_backstep(); + bool match_assert_backref(); + bool match_toggle_case(); + bool match_recursion(); + bool match_fail(); + bool match_accept(); + bool match_commit(); + bool match_then(); + bool skip_until_paren(int index, bool match = true); + + // find procs stored in s_find_vtable: + bool find_restart_any(); + bool find_restart_word(); + bool find_restart_line(); + bool find_restart_buf(); + bool find_restart_lit(); + +private: + // final result structure to be filled in: + match_results& m_result; + // temporary result for POSIX matches: + std::unique_ptr > m_temp_match; + // pointer to actual result structure to fill in: + match_results* m_presult; + // start of sequence being searched: + BidiIterator base; + // end of sequence being searched: + BidiIterator last; + // current character being examined: + BidiIterator position; + // where to restart next search after failed match attempt: + BidiIterator restart; + // where the current search started from, acts as base for $` during grep: + BidiIterator search_base; + // how far we can go back when matching lookbehind: + BidiIterator backstop; + // the expression being examined: + const basic_regex& re; + // the expression's traits class: + const ::boost::regex_traits_wrapper& traits_inst; + // the next state in the machine being matched: + const re_syntax_base* pstate; + // matching flags in use: + match_flag_type m_match_flags; + // how many states we have examined so far: + std::ptrdiff_t state_count; + // max number of states to examine before giving up: + std::ptrdiff_t max_state_count; + // whether we should ignore case or not: + bool icase; + // set to true when (position == last), indicates that we may have a partial match: + bool m_has_partial_match; + // set to true whenever we get a match: + bool m_has_found_match; + // set to true whenever we're inside an independent sub-expression: + bool m_independent; + // the current repeat being examined: + repeater_count* next_count; + // the first repeat being examined (top of linked list): + repeater_count rep_obj; + // the mask to pass when matching word boundaries: + typename traits::char_class_type m_word_mask; + // the bitmask to use when determining whether a match_any matches a newline or not: + unsigned char match_any_mask; + // recursion information: + std::vector > recursion_stack; + // + // additional members for non-recursive version: + // + typedef bool (self_type::*unwind_proc_type)(bool); + + void extend_stack(); + bool unwind(bool); + bool unwind_end(bool); + bool unwind_paren(bool); + bool unwind_recursion_stopper(bool); + bool unwind_assertion(bool); + bool unwind_alt(bool); + bool unwind_repeater_counter(bool); + bool unwind_extra_block(bool); + bool unwind_greedy_single_repeat(bool); + bool unwind_slow_dot_repeat(bool); + bool unwind_fast_dot_repeat(bool); + bool unwind_char_repeat(bool); + bool unwind_short_set_repeat(bool); + bool unwind_long_set_repeat(bool); + bool unwind_non_greedy_repeat(bool); + bool unwind_recursion(bool); + bool unwind_recursion_pop(bool); + bool unwind_commit(bool); + bool unwind_then(bool); + bool unwind_case(bool); + void destroy_single_repeat(); + void push_matched_paren(int index, const sub_match& sub); + void push_recursion_stopper(); + void push_assertion(const re_syntax_base* ps, bool positive); + void push_alt(const re_syntax_base* ps); + void push_repeater_count(int i, repeater_count** s); + void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id); + void push_non_greedy_repeat(const re_syntax_base* ps); + void push_recursion(int idx, const re_syntax_base* p, results_type* presults, results_type* presults2); + void push_recursion_pop(); + void push_case_change(bool); + + // pointer to base of stack: + saved_state* m_stack_base; + // pointer to current stack position: + saved_state* m_backup_state; + // how many memory blocks have we used up?: + unsigned used_block_count; + // determines what value to return when unwinding from recursion, + // allows for mixed recursive/non-recursive algorithm: + bool m_recursive_result; + // We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP: + bool m_unwound_lookahead; + // We have unwound to an alternative, used by THEN: + bool m_unwound_alt; + // We are unwinding a commit - used by independent subs to determine whether to stop there or carry on unwinding: + //bool m_unwind_commit; + // Recursion limit: + unsigned m_recursions; + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC >= 1800 +#pragma warning(disable:26495) +#endif +#endif + // these operations aren't allowed, so are declared private, + // bodies are provided to keep explicit-instantiation requests happy: + perl_matcher& operator=(const perl_matcher&) + { + return *this; + } + perl_matcher(const perl_matcher& that) + : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {} +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif +}; + +} // namespace BOOST_REGEX_DETAIL_NS + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +} // namespace boost + +// +// include the implementation of perl_matcher: +// +#include +// this one has to be last: +#include + +#endif diff --git a/include/boost/regex/v5/perl_matcher_common.hpp b/include/boost/regex/v5/perl_matcher_common.hpp new file mode 100644 index 00000000..5b29d893 --- /dev/null +++ b/include/boost/regex/v5/perl_matcher_common.hpp @@ -0,0 +1,915 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see + * DESCRIPTION: Definitions of perl_matcher member functions that are + * common to both the recursive and non-recursive versions. + */ + +#ifndef BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP +#define BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:4459) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:26812) +#endif + template +void perl_matcher::construct_init(const basic_regex& e, match_flag_type f) +{ + typedef typename std::iterator_traits::iterator_category category; + typedef typename basic_regex::flag_type expression_flag_type; + + if(e.empty()) + { + // precondition failure: e is not a valid regex. + std::invalid_argument ex("Invalid regular expression object"); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(ex); +#else + throw e; +#endif + } + pstate = 0; + m_match_flags = f; + estimate_max_state_count(static_cast(0)); + expression_flag_type re_f = re.flags(); + icase = re_f & regex_constants::icase; + if(!(m_match_flags & (match_perl|match_posix))) + { + if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0) + m_match_flags |= match_perl; + else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + m_match_flags |= match_perl; + else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal)) + m_match_flags |= match_perl; + else + m_match_flags |= match_posix; + } + if(m_match_flags & match_posix) + { + m_temp_match.reset(new match_results()); + m_presult = m_temp_match.get(); + } + else + m_presult = &m_result; + m_stack_base = 0; + m_backup_state = 0; + // find the value to use for matching word boundaries: + m_word_mask = re.get_data().m_word_mask; + // find bitmask to use for matching '.': + match_any_mask = static_cast((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline); + // Disable match_any if requested in the state machine: + if(e.get_data().m_disable_match_any) + m_match_flags &= regex_constants::match_not_any; +} +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +template +void perl_matcher::estimate_max_state_count(std::random_access_iterator_tag*) +{ + // + // How many states should we allow our machine to visit before giving up? + // This is a heuristic: it takes the greater of O(N^2) and O(NS^2) + // where N is the length of the string, and S is the number of states + // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2) + // but these take unreasonably amounts of time to bale out in pathological + // cases. + // + // Calculate NS^2 first: + // + static const std::ptrdiff_t k = 100000; + std::ptrdiff_t dist = std::distance(base, last); + if(dist == 0) + dist = 1; + std::ptrdiff_t states = re.size(); + if(states == 0) + states = 1; + if ((std::numeric_limits::max)() / states < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states *= states; + if((std::numeric_limits::max)() / dist < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states *= dist; + if((std::numeric_limits::max)() - k < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states += k; + + max_state_count = states; + + // + // Now calculate N^2: + // + states = dist; + if((std::numeric_limits::max)() / dist < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states *= dist; + if((std::numeric_limits::max)() - k < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states += k; + // + // N^2 can be a very large number indeed, to prevent things getting out + // of control, cap the max states: + // + if(states > BOOST_REGEX_MAX_STATE_COUNT) + states = BOOST_REGEX_MAX_STATE_COUNT; + // + // If (the possibly capped) N^2 is larger than our first estimate, + // use this instead: + // + if(states > max_state_count) + max_state_count = states; +} + +template +inline void perl_matcher::estimate_max_state_count(void*) +{ + // we don't know how long the sequence is: + max_state_count = BOOST_REGEX_MAX_STATE_COUNT; +} + +template +inline bool perl_matcher::match() +{ + return match_imp(); +} + +template +bool perl_matcher::match_imp() +{ + // initialise our stack if we are non-recursive: + save_state_init init(&m_stack_base, &m_backup_state); + used_block_count = BOOST_REGEX_MAX_BLOCKS; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + + // reset our state machine: + position = base; + search_base = base; + state_count = 0; + m_match_flags |= regex_constants::match_all; + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), search_base, last); + m_presult->set_base(base); + m_presult->set_named_subs(this->re.get_named_subs()); + if(m_match_flags & match_posix) + m_result = *m_presult; + verify_options(re.flags(), m_match_flags); + if(0 == match_prefix()) + return false; + return (m_result[0].second == last) && (m_result[0].first == base); + +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif +} + +template +inline bool perl_matcher::find() +{ + return find_imp(); +} + +template +bool perl_matcher::find_imp() +{ + static matcher_proc_type const s_find_vtable[7] = + { + &perl_matcher::find_restart_any, + &perl_matcher::find_restart_word, + &perl_matcher::find_restart_line, + &perl_matcher::find_restart_buf, + &perl_matcher::match_prefix, + &perl_matcher::find_restart_lit, + &perl_matcher::find_restart_lit, + }; + + // initialise our stack if we are non-recursive: + save_state_init init(&m_stack_base, &m_backup_state); + used_block_count = BOOST_REGEX_MAX_BLOCKS; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + + state_count = 0; + if((m_match_flags & regex_constants::match_init) == 0) + { + // reset our state machine: + search_base = position = base; + pstate = re.get_first_state(); + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), base, last); + m_presult->set_base(base); + m_presult->set_named_subs(this->re.get_named_subs()); + m_match_flags |= regex_constants::match_init; + } + else + { + // start again: + search_base = position = m_result[0].second; + // If last match was null and match_not_null was not set then increment + // our start position, otherwise we go into an infinite loop: + if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0)) + { + if(position == last) + return false; + else + ++position; + } + // reset $` start: + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), search_base, last); + //if((base != search_base) && (base == backstop)) + // m_match_flags |= match_prev_avail; + } + if(m_match_flags & match_posix) + { + m_result.set_size(static_cast(1u + re.mark_count()), base, last); + m_result.set_base(base); + } + + verify_options(re.flags(), m_match_flags); + // find out what kind of expression we have: + unsigned type = (m_match_flags & match_continuous) ? + static_cast(regbase::restart_continue) + : static_cast(re.get_restart_type()); + + // call the appropriate search routine: + matcher_proc_type proc = s_find_vtable[type]; + return (this->*proc)(); + +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif +} + +template +bool perl_matcher::match_prefix() +{ + m_has_partial_match = false; + m_has_found_match = false; + pstate = re.get_first_state(); + m_presult->set_first(position); + restart = position; + match_all_states(); + if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial)) + { + m_has_found_match = true; + m_presult->set_second(last, 0, false); + position = last; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + } + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(m_has_found_match && (match_extra & m_match_flags)) + { + // + // we have a match, reverse the capture information: + // + for(unsigned i = 0; i < m_presult->size(); ++i) + { + typename sub_match::capture_sequence_type & seq = ((*m_presult)[i]).get_captures(); + std::reverse(seq.begin(), seq.end()); + } + } +#endif + if(!m_has_found_match) + position = restart; // reset search postion + return m_has_found_match; +} + +template +bool perl_matcher::match_literal() +{ + unsigned int len = static_cast(pstate)->length; + const char_type* what = reinterpret_cast(static_cast(pstate) + 1); + // + // compare string with what we stored in + // our records: + for(unsigned int i = 0; i < len; ++i, ++position) + { + if((position == last) || (traits_inst.translate(*position, icase) != what[i])) + return false; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_start_line() +{ + if(position == backstop) + { + if((m_match_flags & match_prev_avail) == 0) + { + if((m_match_flags & match_not_bol) == 0) + { + pstate = pstate->next.p; + return true; + } + return false; + } + } + else if(m_match_flags & match_single_line) + return false; + + // check the previous value character: + BidiIterator t(position); + --t; + if(position != last) + { + if(is_separator(*t) && !((*t == static_cast('\r')) && (*position == static_cast('\n'))) ) + { + pstate = pstate->next.p; + return true; + } + } + else if(is_separator(*t)) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_end_line() +{ + if(position != last) + { + if(m_match_flags & match_single_line) + return false; + // we're not yet at the end so *first is always valid: + if(is_separator(*position)) + { + if((position != backstop) || (m_match_flags & match_prev_avail)) + { + // check that we're not in the middle of \r\n sequence + BidiIterator t(position); + --t; + if((*t == static_cast('\r')) && (*position == static_cast('\n'))) + { + return false; + } + } + pstate = pstate->next.p; + return true; + } + } + else if((m_match_flags & match_not_eol) == 0) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_wild() +{ + if(position == last) + return false; + if(is_separator(*position) && ((match_any_mask & static_cast(pstate)->mask) == 0)) + return false; + if((*position == char_type(0)) && (m_match_flags & match_not_dot_null)) + return false; + pstate = pstate->next.p; + ++position; + return true; +} + +template +bool perl_matcher::match_word_boundary() +{ + bool b; // indcates whether next character is a word character + if(position != last) + { + // prev and this character must be opposites: + b = traits_inst.isctype(*position, m_word_mask); + } + else + { + if (m_match_flags & match_not_eow) + return false; + b = false; + } + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + { + if(m_match_flags & match_not_bow) + return false; + else + b ^= false; + } + else + { + --position; + b ^= traits_inst.isctype(*position, m_word_mask); + ++position; + } + if(b) + { + pstate = pstate->next.p; + return true; + } + return false; // no match if we get to here... +} + +template +bool perl_matcher::match_within_word() +{ + if(position == last) + return false; + // both prev and this character must be m_word_mask: + bool prev = traits_inst.isctype(*position, m_word_mask); + { + bool b; + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + return false; + else + { + --position; + b = traits_inst.isctype(*position, m_word_mask); + ++position; + } + if(b == prev) + { + pstate = pstate->next.p; + return true; + } + } + return false; +} + +template +bool perl_matcher::match_word_start() +{ + if(position == last) + return false; // can't be starting a word if we're already at the end of input + if(!traits_inst.isctype(*position, m_word_mask)) + return false; // next character isn't a word character + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + { + if(m_match_flags & match_not_bow) + return false; // no previous input + } + else + { + // otherwise inside buffer: + BidiIterator t(position); + --t; + if(traits_inst.isctype(*t, m_word_mask)) + return false; // previous character not non-word + } + // OK we have a match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_word_end() +{ + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + return false; // start of buffer can't be end of word + BidiIterator t(position); + --t; + if(traits_inst.isctype(*t, m_word_mask) == false) + return false; // previous character wasn't a word character + + if(position == last) + { + if(m_match_flags & match_not_eow) + return false; // end of buffer but not end of word + } + else + { + // otherwise inside buffer: + if(traits_inst.isctype(*position, m_word_mask)) + return false; // next character is a word character + } + pstate = pstate->next.p; + return true; // if we fall through to here then we've succeeded +} + +template +bool perl_matcher::match_buffer_start() +{ + if((position != backstop) || (m_match_flags & match_not_bob)) + return false; + // OK match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_buffer_end() +{ + if((position != last) || (m_match_flags & match_not_eob)) + return false; + // OK match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_backref() +{ + // + // Compare with what we previously matched. + // Note that this succeeds if the backref did not partisipate + // in the match, this is in line with ECMAScript, but not Perl + // or PCRE. + // + int index = static_cast(pstate)->index; + if(index >= hash_value_mask) + { + named_subexpressions::range_type r = re.get_data().equal_range(index); + BOOST_REGEX_ASSERT(r.first != r.second); + do + { + index = r.first->index; + ++r.first; + }while((r.first != r.second) && ((*m_presult)[index].matched != true)); + } + + if((m_match_flags & match_perl) && !(*m_presult)[index].matched) + return false; + + BidiIterator i = (*m_presult)[index].first; + BidiIterator j = (*m_presult)[index].second; + while(i != j) + { + if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase))) + return false; + ++i; + ++position; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_long_set() +{ + typedef typename traits::char_class_type char_class_type; + // let the traits class do the work: + if(position == last) + return false; + BidiIterator t = re_is_set_member(position, last, static_cast*>(pstate), re.get_data(), icase); + if(t != position) + { + pstate = pstate->next.p; + position = t; + return true; + } + return false; +} + +template +bool perl_matcher::match_set() +{ + if(position == last) + return false; + if(static_cast(pstate)->_map[static_cast(traits_inst.translate(*position, icase))]) + { + pstate = pstate->next.p; + ++position; + return true; + } + return false; +} + +template +bool perl_matcher::match_jump() +{ + pstate = static_cast(pstate)->alt.p; + return true; +} + +template +bool perl_matcher::match_combining() +{ + if(position == last) + return false; + if(is_combining(traits_inst.translate(*position, icase))) + return false; + ++position; + while((position != last) && is_combining(traits_inst.translate(*position, icase))) + ++position; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_soft_buffer_end() +{ + if(m_match_flags & match_not_eob) + return false; + BidiIterator p(position); + while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p; + if(p != last) + return false; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_restart_continue() +{ + if(position == search_base) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_backstep() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + if( ::boost::is_random_access_iterator::value) + { + std::ptrdiff_t maxlen = std::distance(backstop, position); + if(maxlen < static_cast(pstate)->index) + return false; + std::advance(position, -static_cast(pstate)->index); + } + else + { + int c = static_cast(pstate)->index; + while(c--) + { + if(position == backstop) + return false; + --position; + } + } + pstate = pstate->next.p; + return true; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +inline bool perl_matcher::match_assert_backref() +{ + // return true if marked sub-expression N has been matched: + int index = static_cast(pstate)->index; + bool result = false; + if(index == 9999) + { + // Magic value for a (DEFINE) block: + return false; + } + else if(index > 0) + { + // Have we matched subexpression "index"? + // Check if index is a hash value: + if(index >= hash_value_mask) + { + named_subexpressions::range_type r = re.get_data().equal_range(index); + while(r.first != r.second) + { + if((*m_presult)[r.first->index].matched) + { + result = true; + break; + } + ++r.first; + } + } + else + { + result = (*m_presult)[index].matched; + } + pstate = pstate->next.p; + } + else + { + // Have we recursed into subexpression "index"? + // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. + int idx = -(index+1); + if(idx >= hash_value_mask) + { + named_subexpressions::range_type r = re.get_data().equal_range(idx); + int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx; + while(r.first != r.second) + { + result |= (stack_index == r.first->index); + if(result)break; + ++r.first; + } + } + else + { + result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0)); + } + pstate = pstate->next.p; + } + return result; +} + +template +bool perl_matcher::match_fail() +{ + // Just force a backtrack: + return false; +} + +template +bool perl_matcher::match_accept() +{ + if(!recursion_stack.empty()) + { + return skip_until_paren(recursion_stack.back().idx); + } + else + { + return skip_until_paren(INT_MAX); + } +} + +template +bool perl_matcher::find_restart_any() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + const unsigned char* _map = re.get_map(); + while(true) + { + // skip everything we can't match: + while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) ) + ++position; + if(position == last) + { + // run out of characters, try a null match if possible: + if(re.can_be_null()) + return match_prefix(); + break; + } + // now try and obtain a match: + if(match_prefix()) + return true; + if(position == last) + return false; + ++position; + } + return false; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::find_restart_word() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + // do search optimised for word starts: + const unsigned char* _map = re.get_map(); + if((m_match_flags & match_prev_avail) || (position != base)) + --position; + else if(match_prefix()) + return true; + do + { + while((position != last) && traits_inst.isctype(*position, m_word_mask)) + ++position; + while((position != last) && !traits_inst.isctype(*position, m_word_mask)) + ++position; + if(position == last) + break; + + if(can_start(*position, _map, (unsigned char)mask_any) ) + { + if(match_prefix()) + return true; + } + if(position == last) + break; + } while(true); + return false; +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::find_restart_line() +{ + // do search optimised for line starts: + const unsigned char* _map = re.get_map(); + if(match_prefix()) + return true; + while(position != last) + { + while((position != last) && !is_separator(*position)) + ++position; + if(position == last) + return false; + ++position; + if(position == last) + { + if(re.can_be_null() && match_prefix()) + return true; + return false; + } + + if( can_start(*position, _map, (unsigned char)mask_any) ) + { + if(match_prefix()) + return true; + } + if(position == last) + return false; + //++position; + } + return false; +} + +template +bool perl_matcher::find_restart_buf() +{ + if((position == base) && ((m_match_flags & match_not_bob) == 0)) + return match_prefix(); + return false; +} + +template +bool perl_matcher::find_restart_lit() +{ + return false; +} + +} // namespace BOOST_REGEX_DETAIL_NS + +} // namespace boost + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +#endif + diff --git a/include/boost/regex/v5/perl_matcher_non_recursive.hpp b/include/boost/regex/v5/perl_matcher_non_recursive.hpp new file mode 100644 index 00000000..1d2f0a53 --- /dev/null +++ b/include/boost/regex/v5/perl_matcher_non_recursive.hpp @@ -0,0 +1,1872 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see + * DESCRIPTION: Definitions of perl_matcher member functions that are + * specific to the non-recursive implementation. + */ + +#ifndef BOOST_REGEX_V5_PERL_MATCHER_NON_RECURSIVE_HPP +#define BOOST_REGEX_V5_PERL_MATCHER_NON_RECURSIVE_HPP + +#include + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +# pragma warning(disable: 4706 4459) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +template +inline void inplace_destroy(T* p) +{ + (void)p; // warning suppression + p->~T(); +} + +struct saved_state +{ + union{ + unsigned int state_id; + // this padding ensures correct alignment on 64-bit platforms: + std::size_t padding1; + std::ptrdiff_t padding2; + void* padding3; + }; + saved_state(unsigned i) : state_id(i) {} +}; + +template +struct saved_matched_paren : public saved_state +{ + int index; + sub_match sub; + saved_matched_paren(int i, const sub_match& s) : saved_state(1), index(i), sub(s){} +}; + +template +struct saved_position : public saved_state +{ + const re_syntax_base* pstate; + BidiIterator position; + saved_position(const re_syntax_base* ps, BidiIterator pos, int i) : saved_state(i), pstate(ps), position(pos){} +}; + +template +struct saved_assertion : public saved_position +{ + bool positive; + saved_assertion(bool p, const re_syntax_base* ps, BidiIterator pos) + : saved_position(ps, pos, saved_type_assertion), positive(p){} +}; + +template +struct saved_repeater : public saved_state +{ + repeater_count count; + saved_repeater(int i, repeater_count** s, BidiIterator start, int current_recursion_id) + : saved_state(saved_state_repeater_count), count(i, s, start, current_recursion_id){} +}; + +struct saved_extra_block : public saved_state +{ + saved_state *base, *end; + saved_extra_block(saved_state* b, saved_state* e) + : saved_state(saved_state_extra_block), base(b), end(e) {} +}; + +struct save_state_init +{ + saved_state** stack; + save_state_init(saved_state** base, saved_state** end) + : stack(base) + { + *base = static_cast(get_mem_block()); + *end = reinterpret_cast(reinterpret_cast(*base)+BOOST_REGEX_BLOCKSIZE); + --(*end); + (void) new (*end)saved_state(0); + BOOST_REGEX_ASSERT(*end > *base); + } + ~save_state_init() + { + put_mem_block(*stack); + *stack = 0; + } +}; + +template +struct saved_single_repeat : public saved_state +{ + std::size_t count; + const re_repeat* rep; + BidiIterator last_position; + saved_single_repeat(std::size_t c, const re_repeat* r, BidiIterator lp, int arg_id) + : saved_state(arg_id), count(c), rep(r), last_position(lp){} +}; + +template +struct saved_recursion : public saved_state +{ + saved_recursion(int idx, const re_syntax_base* p, Results* pr, Results* pr2) + : saved_state(14), recursion_id(idx), preturn_address(p), internal_results(*pr), prior_results(*pr2) {} + int recursion_id; + const re_syntax_base* preturn_address; + Results internal_results, prior_results; +}; + +struct saved_change_case : public saved_state +{ + bool icase; + saved_change_case(bool c) : saved_state(18), icase(c) {} +}; + +struct incrementer +{ + incrementer(unsigned* pu) : m_pu(pu) { ++*m_pu; } + ~incrementer() { --*m_pu; } + bool operator > (unsigned i) { return *m_pu > i; } +private: + unsigned* m_pu; +}; + +template +bool perl_matcher::match_all_states() +{ + static matcher_proc_type const s_match_vtable[34] = + { + (&perl_matcher::match_startmark), + &perl_matcher::match_endmark, + &perl_matcher::match_literal, + &perl_matcher::match_start_line, + &perl_matcher::match_end_line, + &perl_matcher::match_wild, + &perl_matcher::match_match, + &perl_matcher::match_word_boundary, + &perl_matcher::match_within_word, + &perl_matcher::match_word_start, + &perl_matcher::match_word_end, + &perl_matcher::match_buffer_start, + &perl_matcher::match_buffer_end, + &perl_matcher::match_backref, + &perl_matcher::match_long_set, + &perl_matcher::match_set, + &perl_matcher::match_jump, + &perl_matcher::match_alt, + &perl_matcher::match_rep, + &perl_matcher::match_combining, + &perl_matcher::match_soft_buffer_end, + &perl_matcher::match_restart_continue, + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::boost::is_random_access_iterator::value ? &perl_matcher::match_dot_repeat_fast : &perl_matcher::match_dot_repeat_slow), + &perl_matcher::match_dot_repeat_dispatch, + &perl_matcher::match_char_repeat, + &perl_matcher::match_set_repeat, + &perl_matcher::match_long_set_repeat, + &perl_matcher::match_backstep, + &perl_matcher::match_assert_backref, + &perl_matcher::match_toggle_case, + &perl_matcher::match_recursion, + &perl_matcher::match_fail, + &perl_matcher::match_accept, + &perl_matcher::match_commit, + &perl_matcher::match_then, + }; + incrementer inc(&m_recursions); + if(inc > 80) + raise_error(traits_inst, regex_constants::error_complexity); + push_recursion_stopper(); + do{ + while(pstate) + { + matcher_proc_type proc = s_match_vtable[pstate->type]; + ++state_count; + if(!(this->*proc)()) + { + if(state_count > max_state_count) + raise_error(traits_inst, regex_constants::error_complexity); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + bool successful_unwind = unwind(false); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(!successful_unwind) + return m_recursive_result; + } + } + }while(unwind(true)); + return m_recursive_result; +} + +template +void perl_matcher::extend_stack() +{ + if(used_block_count) + { + --used_block_count; + saved_state* stack_base; + saved_state* backup_state; + stack_base = static_cast(get_mem_block()); + backup_state = reinterpret_cast(reinterpret_cast(stack_base)+BOOST_REGEX_BLOCKSIZE); + saved_extra_block* block = static_cast(backup_state); + --block; + (void) new (block) saved_extra_block(m_stack_base, m_backup_state); + m_stack_base = stack_base; + m_backup_state = block; + } + else + raise_error(traits_inst, regex_constants::error_stack); +} + +template +inline void perl_matcher::push_matched_paren(int index, const sub_match& sub) +{ + //BOOST_REGEX_ASSERT(index); + saved_matched_paren* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_matched_paren(index, sub); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_case_change(bool c) +{ + //BOOST_REGEX_ASSERT(index); + saved_change_case* pmp = static_cast(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast(m_backup_state); + --pmp; + } + (void) new (pmp)saved_change_case(c); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_recursion_stopper() +{ + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(saved_type_recurse); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_assertion(const re_syntax_base* ps, bool positive) +{ + saved_assertion* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_assertion(positive, ps, position); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_alt(const re_syntax_base* ps) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(ps, position, saved_state_alt); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_non_greedy_repeat(const re_syntax_base* ps) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(ps, position, saved_state_non_greedy_long_repeat); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_repeater_count(int i, repeater_count** s) +{ + saved_repeater* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_repeater(i, s, position, this->recursion_stack.empty() ? (INT_MIN + 3) : this->recursion_stack.back().idx); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_single_repeat(c, r, last_position, state_id); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_recursion(int idx, const re_syntax_base* p, results_type* presults, results_type* presults2) +{ + saved_recursion* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_recursion(idx, p, presults, presults2); + m_backup_state = pmp; +} + +template +bool perl_matcher::match_toggle_case() +{ + // change our case sensitivity: + push_case_change(this->icase); + this->icase = static_cast(pstate)->icase; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_startmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + switch(index) + { + case 0: + pstate = pstate->next.p; + break; + case -1: + case -2: + { + // forward lookahead assert: + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + push_assertion(next_pstate, index == -1); + break; + } + case -3: + { + // independent sub-expression, currently this is always recursive: + bool old_independent = m_independent; + m_independent = true; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = false; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + r = match_all_states(); + if(!r && !m_independent) + { + // Must be unwinding from a COMMIT/SKIP/PRUNE and the independent + // sub failed, need to unwind everything else: + while(unwind(false)); + return false; + } +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + pstate = next_pstate; + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)) {} + throw; + } +#endif + pstate = next_pstate; + m_independent = old_independent; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(r && (m_match_flags & match_extra)) + { + // + // our captures have been stored in *m_presult + // we need to unpack them, and insert them + // back in the right order when we unwind the stack: + // + match_results temp_match(*m_presult); + unsigned i; + for(i = 0; i < temp_match.size(); ++i) + (*m_presult)[i].get_captures().clear(); + // match everything else: +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + r = match_all_states(); +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + pstate = next_pstate; + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)) {} + throw; + } +#endif + // now place the stored captures back: + for(i = 0; i < temp_match.size(); ++i) + { + typedef typename sub_match::capture_sequence_type seq; + seq& s1 = (*m_presult)[i].get_captures(); + const seq& s2 = temp_match[i].captures(); + s1.insert( + s1.end(), + s2.begin(), + s2.end()); + } + } +#endif + return r; + } + case -4: + { + // conditional expression: + const re_alt* alt = static_cast(pstate->next.p); + BOOST_REGEX_ASSERT(alt->type == syntax_element_alt); + pstate = alt->next.p; + if(pstate->type == syntax_element_assert_backref) + { + if(!match_assert_backref()) + pstate = alt->alt.p; + break; + } + else + { + // zero width assertion, have to match this recursively: + BOOST_REGEX_ASSERT(pstate->type == syntax_element_startmark); + bool negated = static_cast(pstate)->index == -2; + BidiIterator saved_position = position; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif + bool r = match_all_states(); + position = saved_position; + if(negated) + r = !r; + if(r) + pstate = next_pstate; + else + pstate = alt->alt.p; +#if !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + pstate = next_pstate; + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif + break; + } + } + case -5: + { + push_matched_paren(0, (*m_presult)[0]); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + break; + } + default: + { + BOOST_REGEX_ASSERT(index > 0); + if((m_match_flags & match_nosubs) == 0) + { + push_matched_paren(index, (*m_presult)[index]); + m_presult->set_first(position, index); + } + pstate = pstate->next.p; + break; + } + } + return true; +} + +template +bool perl_matcher::match_alt() +{ + bool take_first, take_second; + const re_alt* jmp = static_cast(pstate); + + // find out which of these two alternatives we need to take: + if(position == last) + { + take_first = jmp->can_be_null & mask_take; + take_second = jmp->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); + take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); + } + + if(take_first) + { + // we can take the first alternative, + // see if we need to push next alternative: + if(take_second) + { + push_alt(jmp->alt.p); + } + pstate = pstate->next.p; + return true; + } + if(take_second) + { + pstate = jmp->alt.p; + return true; + } + return false; // neither option is possible +} + +template +bool perl_matcher::match_rep() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127 4244) +#endif +#ifdef BOOST_BORLANDC +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + + // find out which of these two alternatives we need to take: + bool take_first, take_second; + if(position == last) + { + take_first = rep->can_be_null & mask_take; + take_second = rep->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, rep->_map, (unsigned char)mask_take); + take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); + } + + if((m_backup_state->state_id != saved_state_repeater_count) + || (static_cast*>(m_backup_state)->count.get_id() != rep->state_id) + || (next_count->get_id() != rep->state_id)) + { + // we're moving to a different repeat from the last + // one, so set up a counter object: + push_repeater_count(rep->state_id, &next_count); + } + // + // If we've had at least one repeat already, and the last one + // matched the NULL string then set the repeat count to + // maximum: + // + next_count->check_null_repeat(position, rep->max); + + if(next_count->get_count() < rep->min) + { + // we must take the repeat: + if(take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + return false; + } + + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // try and take the repeat if we can: + if((next_count->get_count() < rep->max) && take_first) + { + if(take_second) + { + // store position in case we fail: + push_alt(rep->alt.p); + } + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + else if(take_second) + { + pstate = rep->alt.p; + return true; + } + return false; // can't take anything, fail... + } + else // non-greedy + { + // try and skip the repeat if we can: + if(take_second) + { + if((next_count->get_count() < rep->max) && take_first) + { + // store position in case we fail: + push_non_greedy_repeat(rep->next.p); + } + pstate = rep->alt.p; + return true; + } + if((next_count->get_count() < rep->max) && take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + } + return false; +#ifdef BOOST_BORLANDC +#pragma option pop +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_dot_repeat_slow() +{ + std::size_t count = 0; + const re_repeat* rep = static_cast(pstate); + re_syntax_base* psingle = rep->next.p; + // match compulsory repeats first: + while(count < rep->min) + { + pstate = psingle; + if(!match_wild()) + return false; + ++count; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // repeat for as long as we can: + while(count < rep->max) + { + pstate = psingle; + if(!match_wild()) + break; + ++count; + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_slow_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template +bool perl_matcher::match_dot_repeat_fast() +{ + if(m_match_flags & match_not_dot_null) + return match_dot_repeat_slow(); + if((static_cast(pstate->next.p)->mask & match_any_mask) == 0) + return match_dot_repeat_slow(); + + const re_repeat* rep = static_cast(pstate); + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t count = static_cast((std::min)(static_cast(std::distance(position, last)), greedy ? rep->max : rep->min)); + if(rep->min > count) + { + position = last; + return false; // not enough text left to match + } + std::advance(position, count); + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_fast_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template +bool perl_matcher::match_char_repeat() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef BOOST_BORLANDC +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + BOOST_REGEX_ASSERT(1 == static_cast(rep->next.p)->length); + const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator::value) + { + BidiIterator end = position; + // Move end forward by "desired", preferably without using distance or advance if we can + // as these can be slow for some iterator types. + std::size_t len = (desired == (std::numeric_limits::max)()) ? 0u : std::distance(position, last); + if(desired >= len) + end = last; + else + std::advance(end, desired); + BidiIterator origin(position); + while((position != end) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + } + count = (unsigned)std::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_char); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef BOOST_BORLANDC +#pragma option pop +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_set_repeat() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef BOOST_BORLANDC +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + const unsigned char* map = static_cast(rep->next.p)->_map; + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator::value) + { + BidiIterator end = position; + // Move end forward by "desired", preferably without using distance or advance if we can + // as these can be slow for some iterator types. + std::size_t len = (desired == (std::numeric_limits::max)()) ? 0u : std::distance(position, last); + if(desired >= len) + end = last; + else + std::advance(end, desired); + BidiIterator origin(position); + while((position != end) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + } + count = (unsigned)std::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_short_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef BOOST_BORLANDC +#pragma option pop +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_long_set_repeat() +{ +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef BOOST_BORLANDC +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + typedef typename traits::char_class_type m_type; + const re_repeat* rep = static_cast(pstate); + const re_set_long* set = static_cast*>(pstate->next.p); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator::value) + { + BidiIterator end = position; + // Move end forward by "desired", preferably without using distance or advance if we can + // as these can be slow for some iterator types. + std::size_t len = (desired == (std::numeric_limits::max)()) ? 0u : std::distance(position, last); + if(desired >= len) + end = last; + else + std::advance(end, desired); + BidiIterator origin(position); + while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + } + count = (unsigned)std::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_long_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef BOOST_BORLANDC +#pragma option pop +#endif +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_recursion() +{ + BOOST_REGEX_ASSERT(pstate->type == syntax_element_recurse); + // + // See if we've seen this recursion before at this location, if we have then + // we need to prevent infinite recursion: + // + for(typename std::vector >::reverse_iterator i = recursion_stack.rbegin(); i != recursion_stack.rend(); ++i) + { + if(i->idx == static_cast(static_cast(pstate)->alt.p)->index) + { + if(i->location_of_start == position) + return false; + break; + } + } + // + // Backup call stack: + // + push_recursion_pop(); + // + // Set new call stack: + // + if(recursion_stack.capacity() == 0) + { + recursion_stack.reserve(50); + } + recursion_stack.push_back(recursion_info()); + recursion_stack.back().preturn_address = pstate->next.p; + recursion_stack.back().results = *m_presult; + pstate = static_cast(pstate)->alt.p; + recursion_stack.back().idx = static_cast(pstate)->index; + recursion_stack.back().location_of_start = position; + //if(static_cast(pstate)->state_id > 0) + { + push_repeater_count(-(2 + static_cast(pstate)->index), &next_count); + } + + return true; +} + +template +bool perl_matcher::match_endmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(!recursion_stack.empty()) + { + if(index == recursion_stack.back().idx) + { + pstate = recursion_stack.back().preturn_address; + *m_presult = recursion_stack.back().results; + push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, m_presult, &recursion_stack.back().results); + recursion_stack.pop_back(); + push_repeater_count(-(2 + index), &next_count); + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_match() +{ + if(!recursion_stack.empty()) + { + BOOST_REGEX_ASSERT(0 == recursion_stack.back().idx); + pstate = recursion_stack.back().preturn_address; + push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, m_presult, &recursion_stack.back().results); + *m_presult = recursion_stack.back().results; + recursion_stack.pop_back(); + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + +template +bool perl_matcher::match_commit() +{ + // Ideally we would just junk all the states that are on the stack, + // however we might not unwind correctly in that case, so for now, + // just mark that we don't backtrack into whatever is left (or rather + // we'll unwind it unconditionally without pausing to try other matches). + + switch(static_cast(pstate)->action) + { + case commit_commit: + restart = last; + break; + case commit_skip: + if(base != position) + { + restart = position; + // Have to decrement restart since it will get incremented again later: + --restart; + } + break; + case commit_prune: + break; + } + + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_then() +{ + // Just leave a mark that we need to skip to next alternative: + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(17); + m_backup_state = pmp; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::skip_until_paren(int index, bool have_match) +{ + while(pstate) + { + if(pstate->type == syntax_element_endmark) + { + if(static_cast(pstate)->index == index) + { + if(have_match) + return this->match_endmark(); + pstate = pstate->next.p; + return true; + } + else + { + // Unenclosed closing ), occurs when (*ACCEPT) is inside some other + // parenthesis which may or may not have other side effects associated with it. + const re_syntax_base* sp = pstate; + match_endmark(); + if(!pstate) + { + unwind(true); + // unwind may leave pstate NULL if we've unwound a forward lookahead, in which + // case just move to the next state and keep looking... + if (!pstate) + pstate = sp->next.p; + } + } + continue; + } + else if(pstate->type == syntax_element_match) + return true; + else if(pstate->type == syntax_element_startmark) + { + int idx = static_cast(pstate)->index; + pstate = pstate->next.p; + skip_until_paren(idx, false); + continue; + } + pstate = pstate->next.p; + } + return true; +} + +/**************************************************************************** + +Unwind and associated procedures follow, these perform what normal stack +unwinding does in the recursive implementation. + +****************************************************************************/ + +template +bool perl_matcher::unwind(bool have_match) +{ + static unwind_proc_type const s_unwind_table[19] = + { + &perl_matcher::unwind_end, + &perl_matcher::unwind_paren, + &perl_matcher::unwind_recursion_stopper, + &perl_matcher::unwind_assertion, + &perl_matcher::unwind_alt, + &perl_matcher::unwind_repeater_counter, + &perl_matcher::unwind_extra_block, + &perl_matcher::unwind_greedy_single_repeat, + &perl_matcher::unwind_slow_dot_repeat, + &perl_matcher::unwind_fast_dot_repeat, + &perl_matcher::unwind_char_repeat, + &perl_matcher::unwind_short_set_repeat, + &perl_matcher::unwind_long_set_repeat, + &perl_matcher::unwind_non_greedy_repeat, + &perl_matcher::unwind_recursion, + &perl_matcher::unwind_recursion_pop, + &perl_matcher::unwind_commit, + &perl_matcher::unwind_then, + &perl_matcher::unwind_case, + }; + + m_recursive_result = have_match; + m_unwound_lookahead = false; + m_unwound_alt = false; + unwind_proc_type unwinder; + bool cont; + // + // keep unwinding our stack until we have something to do: + // + do + { + unwinder = s_unwind_table[m_backup_state->state_id]; + cont = (this->*unwinder)(m_recursive_result); + }while(cont); + // + // return true if we have more states to try: + // + return pstate ? true : false; +} + +template +bool perl_matcher::unwind_end(bool) +{ + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template +bool perl_matcher::unwind_case(bool) +{ + saved_change_case* pmp = static_cast(m_backup_state); + icase = pmp->icase; + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +bool perl_matcher::unwind_paren(bool have_match) +{ + saved_matched_paren* pmp = static_cast*>(m_backup_state); + // restore previous values if no match was found: + if(!have_match) + { + m_presult->set_first(pmp->sub.first, pmp->index, pmp->index == 0); + m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched, pmp->index == 0); + } +#ifdef BOOST_REGEX_MATCH_EXTRA + // + // we have a match, push the capture information onto the stack: + // + else if(pmp->sub.matched && (match_extra & m_match_flags)) + ((*m_presult)[pmp->index]).get_captures().push_back(pmp->sub); +#endif + // unwind stack: + m_backup_state = pmp+1; + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp); + return true; // keep looking +} + +template +bool perl_matcher::unwind_recursion_stopper(bool) +{ + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template +bool perl_matcher::unwind_assertion(bool r) +{ + saved_assertion* pmp = static_cast*>(m_backup_state); + pstate = pmp->pstate; + position = pmp->position; + bool result = (r == pmp->positive); + m_recursive_result = pmp->positive ? r : !r; + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + m_unwound_lookahead = true; + return !result; // return false if the assertion was matched to stop search. +} + +template +bool perl_matcher::unwind_alt(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + pstate = pmp->pstate; + position = pmp->position; + } + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + m_unwound_alt = !r; + return r; +} + +template +bool perl_matcher::unwind_repeater_counter(bool) +{ + saved_repeater* pmp = static_cast*>(m_backup_state); + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; // keep looking +} + +template +bool perl_matcher::unwind_extra_block(bool) +{ + saved_extra_block* pmp = static_cast(m_backup_state); + void* condemmed = m_stack_base; + m_stack_base = pmp->base; + m_backup_state = pmp->end; + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp); + put_mem_block(condemmed); + return true; // keep looking +} + +template +inline void perl_matcher::destroy_single_repeat() +{ + saved_single_repeat* p = static_cast*>(m_backup_state); + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(p++); + m_backup_state = p; +} + +template +bool perl_matcher::unwind_greedy_single_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + + count -= rep->min; + + if((m_match_flags & match_partial) && (position == last)) + m_has_partial_match = true; + + BOOST_REGEX_ASSERT(count); + position = pmp->last_position; + + // backtrack till we can skip out: + do + { + --position; + --count; + ++state_count; + }while(count && !can_start(*position, rep->_map, mask_skip)); + + // if we've hit base, destroy this state: + if(count == 0) + { + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count + rep->min; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_slow_dot_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_REGEX_ASSERT(rep->type == syntax_element_dot_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_wild); + + BOOST_REGEX_ASSERT(count < rep->max); + pstate = rep->next.p; + position = pmp->last_position; + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!match_wild()) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_fast_dot_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + + BOOST_REGEX_ASSERT(count < rep->max); + position = pmp->last_position; + if(position != last) + { + + // wind forward until we can skip out of the repeat: + do + { + ++position; + ++count; + ++state_count; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_char_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const char_type what = *reinterpret_cast(static_cast(pstate) + 1); + position = pmp->last_position; + + BOOST_REGEX_ASSERT(rep->type == syntax_element_char_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_literal); + BOOST_REGEX_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(traits_inst.translate(*position, icase) != what) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_short_set_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const unsigned char* map = static_cast(rep->next.p)->_map; + position = pmp->last_position; + + BOOST_REGEX_ASSERT(rep->type == syntax_element_short_set_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_set); + BOOST_REGEX_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!map[static_cast(traits_inst.translate(*position, icase))]) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_long_set_repeat(bool r) +{ + typedef typename traits::char_class_type m_type; + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const re_set_long* set = static_cast*>(pstate); + position = pmp->last_position; + + BOOST_REGEX_ASSERT(rep->type == syntax_element_long_set_rep); + BOOST_REGEX_ASSERT(rep->next.p != 0); + BOOST_REGEX_ASSERT(rep->alt.p != 0); + BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_long_set); + BOOST_REGEX_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(position == re_is_set_member(position, last, set, re.get_data(), icase)) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++position; + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_non_greedy_repeat(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + position = pmp->position; + pstate = pmp->pstate; + ++(*next_count); + } + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return r; +} + +template +bool perl_matcher::unwind_recursion(bool r) +{ + // We are backtracking back inside a recursion, need to push the info + // back onto the recursion stack, and do so unconditionally, otherwise + // we can get mismatched pushes and pops... + saved_recursion* pmp = static_cast*>(m_backup_state); + if (!r) + { + recursion_stack.push_back(recursion_info()); + recursion_stack.back().idx = pmp->recursion_id; + recursion_stack.back().preturn_address = pmp->preturn_address; + recursion_stack.back().results = pmp->prior_results; + recursion_stack.back().location_of_start = position; + *m_presult = pmp->internal_results; + } + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +bool perl_matcher::unwind_recursion_pop(bool r) +{ + // Backtracking out of a recursion, we must pop state off the recursion + // stack unconditionally to ensure matched pushes and pops: + saved_state* pmp = static_cast(m_backup_state); + if (!r && !recursion_stack.empty()) + { + *m_presult = recursion_stack.back().results; + position = recursion_stack.back().location_of_start; + recursion_stack.pop_back(); + } + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +void perl_matcher::push_recursion_pop() +{ + saved_state* pmp = static_cast(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(15); + m_backup_state = pmp; +} + +template +bool perl_matcher::unwind_commit(bool b) +{ + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); + while(unwind(b) && !m_unwound_lookahead){} + if(m_unwound_lookahead && pstate) + { + // + // If we stop because we just unwound an assertion, put the + // commit state back on the stack again: + // + m_unwound_lookahead = false; + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; + } + // This prevents us from stopping when we exit from an independent sub-expression: + m_independent = false; + return false; +} + +template +bool perl_matcher::unwind_then(bool b) +{ + // Unwind everything till we hit an alternative: + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); + bool result = false; + result = unwind(b); + while(result && !m_unwound_alt) + { + result = unwind(b); + } + // We're now pointing at the next alternative, need one more backtrack + // since *all* the other alternatives must fail once we've reached a THEN clause: + if(result && m_unwound_alt) + unwind(b); + return false; +} + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +#endif diff --git a/include/boost/regex/v5/primary_transform.hpp b/include/boost/regex/v5/primary_transform.hpp new file mode 100644 index 00000000..ed46f392 --- /dev/null +++ b/include/boost/regex/v5/primary_transform.hpp @@ -0,0 +1,120 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: primary_transform.hpp + * VERSION: see + * DESCRIPTION: Heuristically determines the sort string format in use + * by the current locale. + */ + +#ifndef BOOST_REGEX_PRIMARY_TRANSFORM +#define BOOST_REGEX_PRIMARY_TRANSFORM + +namespace boost{ + namespace BOOST_REGEX_DETAIL_NS{ + + +enum{ + sort_C, + sort_fixed, + sort_delim, + sort_unknown +}; + +template +unsigned count_chars(const S& s, charT c) +{ + // + // Count how many occurrences of character c occur + // in string s: if c is a delimeter between collation + // fields, then this should be the same value for all + // sort keys: + // + unsigned int count = 0; + for(unsigned pos = 0; pos < s.size(); ++pos) + { + if(s[pos] == c) ++count; + } + return count; +} + + +template +unsigned find_sort_syntax(const traits* pt, charT* delim) +{ + // + // compare 'a' with 'A' to see how similar they are, + // should really use a-accute but we can't portably do that, + // + typedef typename traits::string_type string_type; + typedef typename traits::char_type char_type; + + // Suppress incorrect warning for MSVC + (void)pt; + + char_type a[2] = {'a', '\0', }; + string_type sa(pt->transform(a, a+1)); + if(sa == a) + { + *delim = 0; + return sort_C; + } + char_type A[2] = { 'A', '\0', }; + string_type sA(pt->transform(A, A+1)); + char_type c[2] = { ';', '\0', }; + string_type sc(pt->transform(c, c+1)); + + int pos = 0; + while((pos <= static_cast(sa.size())) && (pos <= static_cast(sA.size())) && (sa[pos] == sA[pos])) ++pos; + --pos; + if(pos < 0) + { + *delim = 0; + return sort_unknown; + } + // + // at this point sa[pos] is either the end of a fixed width field + // or the character that acts as a delimiter: + // + charT maybe_delim = sa[pos]; + if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim))) + { + *delim = maybe_delim; + return sort_delim; + } + // + // OK doen't look like a delimiter, try for fixed width field: + // + if((sa.size() == sA.size()) && (sa.size() == sc.size())) + { + // note assumes that the fixed width field is less than + // (numeric_limits::max)(), should be true for all types + // I can't imagine 127 character fields... + *delim = static_cast(++pos); + return sort_fixed; + } + // + // don't know what it is: + // + *delim = 0; + return sort_unknown; +} + + + } // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#endif + + + diff --git a/include/boost/regex/v5/regbase.hpp b/include/boost/regex/v5/regbase.hpp new file mode 100644 index 00000000..5cefb36b --- /dev/null +++ b/include/boost/regex/v5/regbase.hpp @@ -0,0 +1,158 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regbase.cpp + * VERSION see + * DESCRIPTION: Declares class regbase. + */ + +#ifndef BOOST_REGEX_V5_REGBASE_HPP +#define BOOST_REGEX_V5_REGBASE_HPP + +namespace boost{ +// +// class regbase +// handles error codes and flags +// +class regbase +{ +public: + enum flag_type_ + { + // + // Divide the flags up into logical groups: + // bits 0-7 indicate main synatx type. + // bits 8-15 indicate syntax subtype. + // bits 16-31 indicate options that are common to all + // regex syntaxes. + // In all cases the default is 0. + // + // Main synatx group: + // + perl_syntax_group = 0, // default + basic_syntax_group = 1, // POSIX basic + literal = 2, // all characters are literals + main_option_type = literal | basic_syntax_group | perl_syntax_group, // everything! + // + // options specific to perl group: + // + no_bk_refs = 1 << 8, // \d not allowed + no_perl_ex = 1 << 9, // disable perl extensions + no_mod_m = 1 << 10, // disable Perl m modifier + mod_x = 1 << 11, // Perl x modifier + mod_s = 1 << 12, // force s modifier on (overrides match_not_dot_newline) + no_mod_s = 1 << 13, // force s modifier off (overrides match_not_dot_newline) + + // + // options specific to basic group: + // + no_char_classes = 1 << 8, // [[:CLASS:]] not allowed + no_intervals = 1 << 9, // {x,y} not allowed + bk_plus_qm = 1 << 10, // uses \+ and \? + bk_vbar = 1 << 11, // use \| for alternatives + emacs_ex = 1 << 12, // enables emacs extensions + + // + // options common to all groups: + // + no_escape_in_lists = 1 << 16, // '\' not special inside [...] + newline_alt = 1 << 17, // \n is the same as | + no_except = 1 << 18, // no exception on error + failbit = 1 << 19, // error flag + icase = 1 << 20, // characters are matched regardless of case + nocollate = 0, // don't use locale specific collation (deprecated) + collate = 1 << 21, // use locale specific collation + nosubs = 1 << 22, // don't mark sub-expressions + save_subexpression_location = 1 << 23, // save subexpression locations + no_empty_expressions = 1 << 24, // no empty expressions allowed + optimize = 0, // not really supported + + + + basic = basic_syntax_group | collate | no_escape_in_lists, + extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists, + normal = 0, + emacs = basic_syntax_group | collate | emacs_ex | bk_vbar, + awk = no_bk_refs | collate | no_perl_ex, + grep = basic | newline_alt, + egrep = extended | newline_alt, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef unsigned int flag_type; + + enum restart_info + { + restart_any = 0, + restart_word = 1, + restart_line = 2, + restart_buf = 3, + restart_continue = 4, + restart_lit = 5, + restart_fixed_lit = 6, + restart_count = 7 + }; +}; + +// +// provide std lib proposal compatible constants: +// +namespace regex_constants{ + + enum flag_type_ + { + + no_except = ::boost::regbase::no_except, + failbit = ::boost::regbase::failbit, + literal = ::boost::regbase::literal, + icase = ::boost::regbase::icase, + nocollate = ::boost::regbase::nocollate, + collate = ::boost::regbase::collate, + nosubs = ::boost::regbase::nosubs, + optimize = ::boost::regbase::optimize, + bk_plus_qm = ::boost::regbase::bk_plus_qm, + bk_vbar = ::boost::regbase::bk_vbar, + no_intervals = ::boost::regbase::no_intervals, + no_char_classes = ::boost::regbase::no_char_classes, + no_escape_in_lists = ::boost::regbase::no_escape_in_lists, + no_mod_m = ::boost::regbase::no_mod_m, + mod_x = ::boost::regbase::mod_x, + mod_s = ::boost::regbase::mod_s, + no_mod_s = ::boost::regbase::no_mod_s, + save_subexpression_location = ::boost::regbase::save_subexpression_location, + no_empty_expressions = ::boost::regbase::no_empty_expressions, + + basic = ::boost::regbase::basic, + extended = ::boost::regbase::extended, + normal = ::boost::regbase::normal, + emacs = ::boost::regbase::emacs, + awk = ::boost::regbase::awk, + grep = ::boost::regbase::grep, + egrep = ::boost::regbase::egrep, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef ::boost::regbase::flag_type syntax_option_type; + +} // namespace regex_constants + +} // namespace boost + +#endif + diff --git a/include/boost/regex/v5/regex.hpp b/include/boost/regex/v5/regex.hpp new file mode 100644 index 00000000..eb6dc728 --- /dev/null +++ b/include/boost/regex/v5/regex.hpp @@ -0,0 +1,106 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex.cpp + * VERSION see + * DESCRIPTION: Declares boost::basic_regex<> and associated + * functions and classes. This header is the main + * entry point for the template regex code. + */ + +#ifndef BOOST_RE_REGEX_HPP_INCLUDED +#define BOOST_RE_REGEX_HPP_INCLUDED + +#ifdef __cplusplus + +// what follows is all C++ don't include in C builds!! + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost{ +#ifdef BOOST_REGEX_NO_FWD +typedef basic_regex > regex; +#ifndef BOOST_NO_WREGEX +typedef basic_regex > wregex; +#endif +#endif + +typedef match_results cmatch; +typedef match_results smatch; +#ifndef BOOST_NO_WREGEX +typedef match_results wcmatch; +typedef match_results wsmatch; +#endif + +} // namespace boost + +#include +#include +#include +#include +#include +#include +#include +#include + +#endif // __cplusplus + +#endif // include + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/include/boost/regex/v5/regex_format.hpp b/include/boost/regex/v5/regex_format.hpp new file mode 100644 index 00000000..4c82d185 --- /dev/null +++ b/include/boost/regex/v5/regex_format.hpp @@ -0,0 +1,1124 @@ +/* + * + * Copyright (c) 1998-2009 John Maddock + * Copyright 2008 Eric Niebler. + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_FORMAT_HPP +#define BOOST_REGEX_FORMAT_HPP + +#include +#include + +namespace boost{ + +// +// Forward declaration: +// + template >::allocator_type > +class match_results; + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// struct trivial_format_traits: +// defines minimum localisation support for formatting +// in the case that the actual regex traits is unavailable. +// +template +struct trivial_format_traits +{ + typedef charT char_type; + + static std::ptrdiff_t length(const charT* p) + { + return global_length(p); + } + static charT tolower(charT c) + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_lower(c); + } + static charT toupper(charT c) + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_upper(c); + } + static int value(const charT c, int radix) + { + int result = global_value(c); + return result >= radix ? -1 : result; + } + int toi(const charT*& p1, const charT* p2, int radix)const + { + return (int)global_toi(p1, p2, radix, *this); + } +}; + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#pragma warning(disable:26812) +#endif +template +class basic_regex_formatter +{ +public: + typedef typename traits::char_type char_type; + basic_regex_formatter(OutputIterator o, const Results& r, const traits& t) + : m_traits(t), m_results(r), m_out(o), m_position(), m_end(), m_flags(), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {} + OutputIterator format(ForwardIter p1, ForwardIter p2, match_flag_type f); + OutputIterator format(ForwardIter p1, match_flag_type f) + { + return format(p1, p1 + m_traits.length(p1), f); + } +private: + typedef typename Results::value_type sub_match_type; + enum output_state + { + output_copy, + output_next_lower, + output_next_upper, + output_lower, + output_upper, + output_none + }; + + void put(char_type c); + void put(const sub_match_type& sub); + void format_all(); + void format_perl(); + void format_escape(); + void format_conditional(); + void format_until_scope_end(); + bool handle_perl_verb(bool have_brace); + + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const std::integral_constant&) + { + std::vector v(i, j); + return (i != j) ? this->m_results.named_subexpression(&v[0], &v[0] + v.size()) + : this->m_results.named_subexpression(static_cast(0), static_cast(0)); + } + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const std::integral_constant&) + { + return this->m_results.named_subexpression(i, j); + } + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j) + { + typedef typename std::is_convertible::type tag_type; + return get_named_sub(i, j, tag_type()); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j, const std::integral_constant&) + { + std::vector v(i, j); + return (i != j) ? this->m_results.named_subexpression_index(&v[0], &v[0] + v.size()) + : this->m_results.named_subexpression_index(static_cast(0), static_cast(0)); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j, const std::integral_constant&) + { + return this->m_results.named_subexpression_index(i, j); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j) + { + typedef typename std::is_convertible::type tag_type; + return get_named_sub_index(i, j, tag_type()); + } +#ifdef BOOST_REGEX_MSVC + // msvc-8.0 issues a spurious warning on the call to std::advance here: +#pragma warning(push) +#pragma warning(disable:4244) +#endif + inline int toi(ForwardIter& i, ForwardIter j, int base, const std::integral_constant&) + { + if(i != j) + { + std::vector v(i, j); + const char_type* start = &v[0]; + const char_type* pos = start; + int r = (int)m_traits.toi(pos, &v[0] + v.size(), base); + std::advance(i, pos - start); + return r; + } + return -1; + } +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + inline int toi(ForwardIter& i, ForwardIter j, int base, const std::integral_constant&) + { + return m_traits.toi(i, j, base); + } + inline int toi(ForwardIter& i, ForwardIter j, int base) + { +#if defined(_MSC_VER) && defined(__INTEL_COMPILER) && ((__INTEL_COMPILER == 9999) || (__INTEL_COMPILER == 1210)) + // Workaround for Intel support issue #656654. + // See also https://svn.boost.org/trac/boost/ticket/6359 + return toi(i, j, base, std::integral_constant()); +#else + typedef typename std::is_convertible::type tag_type; + return toi(i, j, base, tag_type()); +#endif + } + + const traits& m_traits; // the traits class for localised formatting operations + const Results& m_results; // the match_results being used. + OutputIterator m_out; // where to send output. + ForwardIter m_position; // format string, current position + ForwardIter m_end; // format string end + match_flag_type m_flags; // format flags to use + output_state m_state; // what to do with the next character + output_state m_restore_state; // what state to restore to. + bool m_have_conditional; // we are parsing a conditional +private: + basic_regex_formatter(const basic_regex_formatter&); + basic_regex_formatter& operator=(const basic_regex_formatter&); +}; +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +template +OutputIterator basic_regex_formatter::format(ForwardIter p1, ForwardIter p2, match_flag_type f) +{ + m_position = p1; + m_end = p2; + m_flags = f; + format_all(); + return m_out; +} + +template +void basic_regex_formatter::format_all() +{ + // over and over: + while(m_position != m_end) + { + switch(*m_position) + { + case '&': + if(m_flags & ::boost::regex_constants::format_sed) + { + ++m_position; + put(m_results[0]); + break; + } + put(*m_position++); + break; + case '\\': + format_escape(); + break; + case '(': + if(m_flags & boost::regex_constants::format_all) + { + ++m_position; + bool have_conditional = m_have_conditional; + m_have_conditional = false; + format_until_scope_end(); + m_have_conditional = have_conditional; + if(m_position == m_end) + return; + BOOST_REGEX_ASSERT(*m_position == static_cast(')')); + ++m_position; // skip the closing ')' + break; + } + put(*m_position); + ++m_position; + break; + case ')': + if(m_flags & boost::regex_constants::format_all) + { + return; + } + put(*m_position); + ++m_position; + break; + case ':': + if((m_flags & boost::regex_constants::format_all) && m_have_conditional) + { + return; + } + put(*m_position); + ++m_position; + break; + case '?': + if(m_flags & boost::regex_constants::format_all) + { + ++m_position; + format_conditional(); + break; + } + put(*m_position); + ++m_position; + break; + case '$': + if((m_flags & format_sed) == 0) + { + format_perl(); + break; + } + // not a special character: + BOOST_REGEX_FALLTHROUGH; + default: + put(*m_position); + ++m_position; + break; + } + } +} + +template +void basic_regex_formatter::format_perl() +{ + // + // On entry *m_position points to a '$' character + // output the information that goes with it: + // + BOOST_REGEX_ASSERT(*m_position == '$'); + // + // see if this is a trailing '$': + // + if(++m_position == m_end) + { + --m_position; + put(*m_position); + ++m_position; + return; + } + // + // OK find out what kind it is: + // + bool have_brace = false; + ForwardIter save_position = m_position; + switch(*m_position) + { + case '&': + ++m_position; + put(this->m_results[0]); + break; + case '`': + ++m_position; + put(this->m_results.prefix()); + break; + case '\'': + ++m_position; + put(this->m_results.suffix()); + break; + case '$': + put(*m_position++); + break; + case '+': + if((++m_position != m_end) && (*m_position == '{')) + { + ForwardIter base = ++m_position; + while((m_position != m_end) && (*m_position != '}')) ++m_position; + if(m_position != m_end) + { + // Named sub-expression: + put(get_named_sub(base, m_position)); + ++m_position; + break; + } + else + { + m_position = --base; + } + } + put((this->m_results)[this->m_results.size() > 1 ? static_cast(this->m_results.size() - 1) : 1]); + break; + case '{': + have_brace = true; + ++m_position; + BOOST_REGEX_FALLTHROUGH; + default: + // see if we have a number: + { + std::ptrdiff_t len = std::distance(m_position, m_end); + //len = (std::min)(static_cast(2), len); + int v = this->toi(m_position, m_position + len, 10); + if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) + { + // Look for a Perl-5.10 verb: + if(!handle_perl_verb(have_brace)) + { + // leave the $ as is, and carry on: + m_position = --save_position; + put(*m_position); + ++m_position; + } + break; + } + // otherwise output sub v: + put(this->m_results[v]); + if(have_brace) + ++m_position; + } + } +} + +template +bool basic_regex_formatter::handle_perl_verb(bool have_brace) +{ + // + // We may have a capitalised string containing a Perl action: + // + static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' }; + static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' }; + static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' }; + static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' }; + + if(m_position == m_end) + return false; + if(have_brace && (*m_position == '^')) + ++m_position; + + std::ptrdiff_t max_len = m_end - m_position; + + if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH)) + { + m_position += 5; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 5; + return false; + } + } + put(this->m_results[0]); + return true; + } + if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH)) + { + m_position += 8; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 8; + return false; + } + } + put(this->m_results.prefix()); + return true; + } + if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH)) + { + m_position += 9; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 9; + return false; + } + } + put(this->m_results.suffix()); + return true; + } + if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH)) + { + m_position += 16; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 16; + return false; + } + } + put((this->m_results)[this->m_results.size() > 1 ? static_cast(this->m_results.size() - 1) : 1]); + return true; + } + if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT)) + { + m_position += 20; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 20; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT)) + { + m_position += 2; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 2; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + return false; +} + +template +void basic_regex_formatter::format_escape() +{ + // skip the escape and check for trailing escape: + if(++m_position == m_end) + { + put(static_cast('\\')); + return; + } + // now switch on the escape type: + switch(*m_position) + { + case 'a': + put(static_cast('\a')); + ++m_position; + break; + case 'f': + put(static_cast('\f')); + ++m_position; + break; + case 'n': + put(static_cast('\n')); + ++m_position; + break; + case 'r': + put(static_cast('\r')); + ++m_position; + break; + case 't': + put(static_cast('\t')); + ++m_position; + break; + case 'v': + put(static_cast('\v')); + ++m_position; + break; + case 'x': + if(++m_position == m_end) + { + put(static_cast('x')); + return; + } + // maybe have \x{ddd} + if(*m_position == static_cast('{')) + { + ++m_position; + int val = this->toi(m_position, m_end, 16); + if(val < 0) + { + // invalid value treat everything as literals: + put(static_cast('x')); + put(static_cast('{')); + return; + } + if((m_position == m_end) || (*m_position != static_cast('}'))) + { + --m_position; + while(*m_position != static_cast('\\')) + --m_position; + ++m_position; + put(*m_position++); + return; + } + ++m_position; + put(static_cast(val)); + return; + } + else + { + std::ptrdiff_t len = std::distance(m_position, m_end); + len = (std::min)(static_cast(2), len); + int val = this->toi(m_position, m_position + len, 16); + if(val < 0) + { + --m_position; + put(*m_position++); + return; + } + put(static_cast(val)); + } + break; + case 'c': + if(++m_position == m_end) + { + --m_position; + put(*m_position++); + return; + } + put(static_cast(*m_position++ % 32)); + break; + case 'e': + put(static_cast(27)); + ++m_position; + break; + default: + // see if we have a perl specific escape: + if((m_flags & boost::regex_constants::format_sed) == 0) + { + bool breakout = false; + switch(*m_position) + { + case 'l': + ++m_position; + m_restore_state = m_state; + m_state = output_next_lower; + breakout = true; + break; + case 'L': + ++m_position; + m_state = output_lower; + breakout = true; + break; + case 'u': + ++m_position; + m_restore_state = m_state; + m_state = output_next_upper; + breakout = true; + break; + case 'U': + ++m_position; + m_state = output_upper; + breakout = true; + break; + case 'E': + ++m_position; + m_state = output_copy; + breakout = true; + break; + } + if(breakout) + break; + } + // see if we have a \n sed style backreference: + std::ptrdiff_t len = std::distance(m_position, m_end); + len = (std::min)(static_cast(1), len); + int v = this->toi(m_position, m_position+len, 10); + if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed))) + { + put(m_results[v]); + break; + } + else if(v == 0) + { + // octal ecape sequence: + --m_position; + len = std::distance(m_position, m_end); + len = (std::min)(static_cast(4), len); + v = this->toi(m_position, m_position + len, 8); + BOOST_REGEX_ASSERT(v >= 0); + put(static_cast(v)); + break; + } + // Otherwise output the character "as is": + put(*m_position++); + break; + } +} + +template +void basic_regex_formatter::format_conditional() +{ + if(m_position == m_end) + { + // oops trailing '?': + put(static_cast('?')); + return; + } + int v; + if(*m_position == '{') + { + ForwardIter base = m_position; + ++m_position; + v = this->toi(m_position, m_end, 10); + if(v < 0) + { + // Try a named subexpression: + while((m_position != m_end) && (*m_position != '}')) + ++m_position; + v = this->get_named_sub_index(base + 1, m_position); + } + if((v < 0) || (*m_position != '}')) + { + m_position = base; + // oops trailing '?': + put(static_cast('?')); + return; + } + // Skip trailing '}': + ++m_position; + } + else + { + std::ptrdiff_t len = std::distance(m_position, m_end); + len = (std::min)(static_cast(2), len); + v = this->toi(m_position, m_position + len, 10); + } + if(v < 0) + { + // oops not a number: + put(static_cast('?')); + return; + } + + // output varies depending upon whether sub-expression v matched or not: + if(m_results[v].matched) + { + m_have_conditional = true; + format_all(); + m_have_conditional = false; + if((m_position != m_end) && (*m_position == static_cast(':'))) + { + // skip the ':': + ++m_position; + // save output state, then turn it off: + output_state saved_state = m_state; + m_state = output_none; + // format the rest of this scope: + format_until_scope_end(); + // restore output state: + m_state = saved_state; + } + } + else + { + // save output state, then turn it off: + output_state saved_state = m_state; + m_state = output_none; + // format until ':' or ')': + m_have_conditional = true; + format_all(); + m_have_conditional = false; + // restore state: + m_state = saved_state; + if((m_position != m_end) && (*m_position == static_cast(':'))) + { + // skip the ':': + ++m_position; + // format the rest of this scope: + format_until_scope_end(); + } + } +} + +template +void basic_regex_formatter::format_until_scope_end() +{ + do + { + format_all(); + if((m_position == m_end) || (*m_position == static_cast(')'))) + return; + put(*m_position++); + }while(m_position != m_end); +} + +template +void basic_regex_formatter::put(char_type c) +{ + // write a single character to output + // according to which case translation mode we are in: + switch(this->m_state) + { + case output_none: + return; + case output_next_lower: + c = m_traits.tolower(c); + this->m_state = m_restore_state; + break; + case output_next_upper: + c = m_traits.toupper(c); + this->m_state = m_restore_state; + break; + case output_lower: + c = m_traits.tolower(c); + break; + case output_upper: + c = m_traits.toupper(c); + break; + default: + break; + } + *m_out = c; + ++m_out; +} + +template +void basic_regex_formatter::put(const sub_match_type& sub) +{ + typedef typename sub_match_type::iterator iterator_type; + iterator_type i = sub.first; + while(i != sub.second) + { + put(*i); + ++i; + } +} + +template +class string_out_iterator +{ + S* out; +public: + string_out_iterator(S& s) : out(&s) {} + string_out_iterator& operator++() { return *this; } + string_out_iterator& operator++(int) { return *this; } + string_out_iterator& operator*() { return *this; } + string_out_iterator& operator=(typename S::value_type v) + { + out->append(1, v); + return *this; + } + + typedef std::ptrdiff_t difference_type; + typedef typename S::value_type value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +}; + +template +OutputIterator regex_format_imp(OutputIterator out, + const match_results& m, + ForwardIter p1, ForwardIter p2, + match_flag_type flags, + const traits& t + ) +{ + if(flags & regex_constants::format_literal) + { + return BOOST_REGEX_DETAIL_NS::copy(p1, p2, out); + } + + BOOST_REGEX_DETAIL_NS::basic_regex_formatter< + OutputIterator, + match_results, + traits, ForwardIter> f(out, m, t); + return f.format(p1, p2, flags); +} + +template +struct has_const_iterator +{ + template + static typename U::const_iterator tester(U*); + static char tester(...); + + static T* get(); + + static const bool value = sizeof(tester(get())) != sizeof(char); +}; + +struct any_type +{ + template + any_type(const T&); + template + any_type(const T&, const U&); + template + any_type(const T&, const U&, const V&); +}; +typedef char no_type; +typedef char (&unary_type)[2]; +typedef char (&binary_type)[3]; +typedef char (&ternary_type)[4]; + +no_type check_is_formatter(unary_type, binary_type, ternary_type); +template +unary_type check_is_formatter(T const &, binary_type, ternary_type); +template +binary_type check_is_formatter(unary_type, T const &, ternary_type); +template +binary_type check_is_formatter(T const &, U const &, ternary_type); +template +ternary_type check_is_formatter(unary_type, binary_type, T const &); +template +ternary_type check_is_formatter(T const &, binary_type, U const &); +template +ternary_type check_is_formatter(unary_type, T const &, U const &); +template +ternary_type check_is_formatter(T const &, U const &, V const &); + +struct unary_binary_ternary +{ + typedef unary_type (*unary_fun)(any_type); + typedef binary_type (*binary_fun)(any_type, any_type); + typedef ternary_type (*ternary_fun)(any_type, any_type, any_type); + operator unary_fun(); + operator binary_fun(); + operator ternary_fun(); +}; + +template::value> +struct formatter_wrapper + : Formatter + , unary_binary_ternary +{ + formatter_wrapper(){} +}; + +template +struct formatter_wrapper + : unary_binary_ternary +{ + operator Formatter *(); +}; + +template +struct formatter_wrapper + : unary_binary_ternary +{ + operator Formatter *(); +}; + +template +struct do_unwrap_reference +{ + typedef T type; +}; +template +struct do_unwrap_reference > +{ + typedef T type; +}; + +template +T& do_unwrap_ref(T& r) { return r; } +template +T& do_unwrap_ref(std::reference_wrapper const& r) { return r.get(); } + +template +struct format_traits_imp +{ +private: + // + // F must be a pointer, a function, or a class with a function call operator: + // + static_assert((::std::is_pointer::value || ::std::is_function::value || ::std::is_class::value), "The functor must be a pointer or a class with a function call operator"); + static formatter_wrapper::type> f; + static M m; + static O out; + static boost::regex_constants::match_flag_type flags; +public: + static const int value = sizeof(check_is_formatter(f(m), f(m, out), f(m, out, flags))); +}; + +template +struct format_traits +{ +public: + // + // Type is std::integral_constant where N is one of: + // + // 0 : F is a pointer to a presumably null-terminated string. + // 1 : F is a character-container such as a std::string. + // 2 : F is a Unary Functor. + // 3 : F is a Binary Functor. + // 4 : F is a Ternary Functor. + // + typedef typename std::conditional< + std::is_pointer::value && !std::is_function::type>::value, + std::integral_constant, + typename std::conditional< + has_const_iterator::value, + std::integral_constant, + std::integral_constant::value> + >::type + >::type type; + // + // This static assertion will fail if the functor passed does not accept + // the same type of arguments passed. + // + static_assert( std::is_class::value && !has_const_iterator::value ? (type::value > 1) : true, "Argument mismatch in Functor type"); +}; + +template +struct format_functor3 +{ + format_functor3(Base b) : func(b) {} + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f) + { + return do_unwrap_ref(func)(m, i, f); + } + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor3(const format_functor3&); + format_functor3& operator=(const format_functor3&); +}; + +template +struct format_functor2 +{ + format_functor2(Base b) : func(b) {} + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type /*f*/) + { + return do_unwrap_ref(func)(m, i); + } + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor2(const format_functor2&); + format_functor2& operator=(const format_functor2&); +}; + +template +struct format_functor1 +{ + format_functor1(Base b) : func(b) {} + + template + OutputIter do_format_string(const S& s, OutputIter i) + { + return std::copy(s.begin(), s.end(), i); + } + template + inline OutputIter do_format_string(const S* s, OutputIter i) + { + while(s && *s) + { + *i = *s; + ++i; + ++s; + } + return i; + } + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type /*f*/) + { + return do_format_string(do_unwrap_ref(func)(m), i); + } + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor1(const format_functor1&); + format_functor1& operator=(const format_functor1&); +}; + +template +struct format_functor_c_string +{ + format_functor_c_string(const charT* ps) : func(ps) {} + + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits& t = Traits()) + { + //typedef typename Match::char_type char_type; + const charT* end = func; + while(*end) ++end; + return regex_format_imp(i, m, func, end, f, t); + } +private: + const charT* func; + format_functor_c_string(const format_functor_c_string&); + format_functor_c_string& operator=(const format_functor_c_string&); +}; + +template +struct format_functor_container +{ + format_functor_container(const Container& c) : func(c) {} + + template + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits& t = Traits()) + { + //typedef typename Match::char_type char_type; + return BOOST_REGEX_DETAIL_NS::regex_format_imp(i, m, func.begin(), func.end(), f, t); + } +private: + const Container& func; + format_functor_container(const format_functor_container&); + format_functor_container& operator=(const format_functor_container&); +}; + +template > +struct compute_functor_type +{ + typedef typename format_traits::type tag; + typedef typename std::remove_cv< typename std::remove_pointer::type>::type maybe_char_type; + + typedef typename std::conditional< + tag::value == 0, format_functor_c_string, + typename std::conditional< + tag::value == 1, format_functor_container, + typename std::conditional< + tag::value == 2, format_functor1, + typename std::conditional< + tag::value == 3, format_functor2, + format_functor3 + >::type + >::type + >::type + >::type type; +}; + +} // namespace BOOST_REGEX_DETAIL_NS + +template +inline OutputIterator regex_format(OutputIterator out, + const match_results& m, + Functor fmt, + match_flag_type flags = format_all + ) +{ + return m.format(out, fmt, flags); +} + +template +inline std::basic_string::char_type> regex_format(const match_results& m, + Functor fmt, + match_flag_type flags = format_all) +{ + return m.format(fmt, flags); +} + +} // namespace boost + +#endif // BOOST_REGEX_FORMAT_HPP + + + + + + diff --git a/include/boost/regex/v5/regex_fwd.hpp b/include/boost/regex/v5/regex_fwd.hpp new file mode 100644 index 00000000..3076b069 --- /dev/null +++ b/include/boost/regex/v5/regex_fwd.hpp @@ -0,0 +1,73 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_fwd.cpp + * VERSION see + * DESCRIPTION: Forward declares boost::basic_regex<> and + * associated typedefs. + */ + +#ifndef BOOST_REGEX_FWD_HPP_INCLUDED +#define BOOST_REGEX_FWD_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +// +// define BOOST_REGEX_NO_FWD if this +// header doesn't work! +// +#ifdef BOOST_REGEX_NO_FWD +# ifndef BOOST_RE_REGEX_HPP +# include +# endif +#else + +namespace boost{ + +template +class cpp_regex_traits; +template +struct c_regex_traits; +template +class w32_regex_traits; + +#ifdef BOOST_REGEX_USE_WIN32_LOCALE +template > +struct regex_traits; +#elif defined(BOOST_REGEX_USE_CPP_LOCALE) +template > +struct regex_traits; +#else +template > +struct regex_traits; +#endif + +template > +class basic_regex; + +typedef basic_regex > regex; +#ifndef BOOST_NO_WREGEX +typedef basic_regex > wregex; +#endif + +} // namespace boost + +#endif // BOOST_REGEX_NO_FWD + +#endif + + + + diff --git a/include/boost/regex/v5/regex_grep.hpp b/include/boost/regex/v5/regex_grep.hpp new file mode 100644 index 00000000..6cd625d6 --- /dev/null +++ b/include/boost/regex/v5/regex_grep.hpp @@ -0,0 +1,98 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_grep.hpp + * VERSION see + * DESCRIPTION: Provides regex_grep implementation. + */ + +#ifndef BOOST_REGEX_V5_REGEX_GREP_HPP +#define BOOST_REGEX_V5_REGEX_GREP_HPP + + +namespace boost{ + +// +// regex_grep: +// find all non-overlapping matches within the sequence first last: +// +template +inline unsigned int regex_grep(Predicate foo, + BidiIterator first, + BidiIterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + typedef typename match_results::allocator_type match_allocator_type; + + match_results m; + BOOST_REGEX_DETAIL_NS::perl_matcher matcher(first, last, m, e, flags, first); + unsigned int count = 0; + while(matcher.find()) + { + ++count; + if(0 == foo(m)) + return count; // caller doesn't want to go on + if(m[0].second == last) + return count; // we've reached the end, don't try and find an extra null match. + if(m.length() == 0) + { + if(m[0].second == last) + return count; + // we found a NULL-match, now try to find + // a non-NULL one at the same position: + match_results m2(m); + matcher.setf(match_not_null | match_continuous); + if(matcher.find()) + { + ++count; + if(0 == foo(m)) + return count; + } + else + { + // reset match back to where it was: + m = m2; + } + matcher.unsetf((match_not_null | match_continuous) & ~flags); + } + } + return count; +} + +// +// regex_grep convenience interfaces: +// +template +inline unsigned int regex_grep(Predicate foo, const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, str, str + traits::length(str), e, flags); +} + +template +inline unsigned int regex_grep(Predicate foo, const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, s.begin(), s.end(), e, flags); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_GREP_HPP + diff --git a/include/boost/regex/v5/regex_iterator.hpp b/include/boost/regex/v5/regex_iterator.hpp new file mode 100644 index 00000000..ba74e6de --- /dev/null +++ b/include/boost/regex/v5/regex_iterator.hpp @@ -0,0 +1,173 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_iterator.hpp + * VERSION see + * DESCRIPTION: Provides regex_iterator implementation. + */ + +#ifndef BOOST_REGEX_V5_REGEX_ITERATOR_HPP +#define BOOST_REGEX_V5_REGEX_ITERATOR_HPP + +#include + +namespace boost{ + +template +class regex_iterator_implementation +{ + typedef basic_regex regex_type; + + match_results what; // current match + BidirectionalIterator base; // start of sequence + BidirectionalIterator end; // end of sequence + const regex_type re; // the expression + match_flag_type flags; // flags for matching + +public: + regex_iterator_implementation(const regex_type* p, BidirectionalIterator last, match_flag_type f) + : base(), end(last), re(*p), flags(f){} + regex_iterator_implementation(const regex_iterator_implementation& other) + :what(other.what), base(other.base), end(other.end), re(other.re), flags(other.flags){} + bool init(BidirectionalIterator first) + { + base = first; + return regex_search(first, end, what, re, flags); + } + bool compare(const regex_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) && (end == that.end) && (flags == that.flags) && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); + } + const match_results& get() + { return what; } + bool next() + { + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; + BidirectionalIterator next_start = what[0].second; + match_flag_type f(flags); + if(!what.length() || (f & regex_constants::match_posix)) + f |= regex_constants::match_not_initial_null; + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = regex_search(next_start, end, what, re, f, base); + if(result) + what.set_base(base); + return result; + } +private: + regex_iterator_implementation& operator=(const regex_iterator_implementation&); +}; + +template ::value_type, + class traits = regex_traits > +class regex_iterator +{ +private: + typedef regex_iterator_implementation impl; + typedef std::shared_ptr pimpl; +public: + typedef basic_regex regex_type; + typedef match_results value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + regex_iterator(){} + regex_iterator(BidirectionalIterator a, BidirectionalIterator b, + const regex_type& re, + match_flag_type m = match_default) + : pdata(new impl(&re, b, m)) + { + if(!pdata->init(a)) + { + pdata.reset(); + } + } + regex_iterator(const regex_iterator& that) + : pdata(that.pdata) {} + regex_iterator& operator=(const regex_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const regex_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const regex_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + regex_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + regex_iterator operator++(int) + { + regex_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef regex_iterator cregex_iterator; +typedef regex_iterator sregex_iterator; +#ifndef BOOST_NO_WREGEX +typedef regex_iterator wcregex_iterator; +typedef regex_iterator wsregex_iterator; +#endif + +// make_regex_iterator: +template +inline regex_iterator make_regex_iterator(const charT* p, const basic_regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_iterator(p, p+traits::length(p), e, m); +} +template +inline regex_iterator::const_iterator, charT, traits> make_regex_iterator(const std::basic_string& p, const basic_regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, m); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_ITERATOR_HPP + diff --git a/include/boost/regex/v5/regex_match.hpp b/include/boost/regex/v5/regex_match.hpp new file mode 100644 index 00000000..21753576 --- /dev/null +++ b/include/boost/regex/v5/regex_match.hpp @@ -0,0 +1,92 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_match.hpp + * VERSION see + * DESCRIPTION: Regular expression matching algorithms. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + + +#ifndef BOOST_REGEX_MATCH_HPP +#define BOOST_REGEX_MATCH_HPP + +namespace boost{ + +// +// proc regex_match +// returns true if the specified regular expression matches +// the whole of the input. Fills in what matched in m. +// +template +bool regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + BOOST_REGEX_DETAIL_NS::perl_matcher matcher(first, last, m, e, flags, first); + return matcher.match(); +} +template +bool regex_match(iterator first, iterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(first, last, m, e, flags | regex_constants::match_any); +} +// +// query_match convenience interfaces: +// +template +inline bool regex_match(const charT* str, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + traits::length(str), m, e, flags); +} + +template +inline bool regex_match(const std::basic_string& s, + match_results::const_iterator, Allocator>& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +template +inline bool regex_match(const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + traits::length(str), m, e, flags | regex_constants::match_any); +} + +template +inline bool regex_match(const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + typedef typename std::basic_string::const_iterator iterator; + match_results m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} + + +} // namespace boost + +#endif // BOOST_REGEX_MATCH_HPP + diff --git a/include/boost/regex/v5/regex_merge.hpp b/include/boost/regex/v5/regex_merge.hpp new file mode 100644 index 00000000..a5103c53 --- /dev/null +++ b/include/boost/regex/v5/regex_merge.hpp @@ -0,0 +1,71 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V5_REGEX_MERGE_HPP +#define BOOST_REGEX_V5_REGEX_MERGE_HPP + + +namespace boost{ + +template +inline OutputIterator regex_merge(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return regex_replace(out, first, last, e, fmt, flags); +} + +template +inline OutputIterator regex_merge(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return regex_merge(out, first, last, e, fmt.c_str(), flags); +} + +template +inline std::basic_string regex_merge(const std::basic_string& s, + const basic_regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return regex_replace(s, e, fmt, flags); +} + +template +inline std::basic_string regex_merge(const std::basic_string& s, + const basic_regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return regex_replace(s, e, fmt, flags); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_MERGE_HPP + + diff --git a/include/boost/regex/v5/regex_raw_buffer.hpp b/include/boost/regex/v5/regex_raw_buffer.hpp new file mode 100644 index 00000000..443c57a2 --- /dev/null +++ b/include/boost/regex/v5/regex_raw_buffer.hpp @@ -0,0 +1,213 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_raw_buffer.hpp + * VERSION see + * DESCRIPTION: Raw character buffer for regex code. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_RAW_BUFFER_HPP +#define BOOST_REGEX_RAW_BUFFER_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#include +#include + +namespace boost{ + namespace BOOST_REGEX_DETAIL_NS{ + +struct empty_padding{}; + +union padding +{ + void* p; + unsigned int i; +}; + +template +struct padding3 +{ + enum{ + padding_size = 8, + padding_mask = 7 + }; +}; + +template<> +struct padding3<2> +{ + enum{ + padding_size = 2, + padding_mask = 1 + }; +}; + +template<> +struct padding3<4> +{ + enum{ + padding_size = 4, + padding_mask = 3 + }; +}; + +template<> +struct padding3<8> +{ + enum{ + padding_size = 8, + padding_mask = 7 + }; +}; + +template<> +struct padding3<16> +{ + enum{ + padding_size = 16, + padding_mask = 15 + }; +}; + +enum{ + padding_size = padding3::padding_size, + padding_mask = padding3::padding_mask +}; + +// +// class raw_storage +// basically this is a simplified vector +// this is used by basic_regex for expression storage +// + +class raw_storage +{ +public: + typedef std::size_t size_type; + typedef unsigned char* pointer; +private: + pointer last, start, end; +public: + + raw_storage(); + raw_storage(size_type n); + + ~raw_storage() + { + ::operator delete(start); + } + + void resize(size_type n) + { + size_type newsize = start ? last - start : 1024; + while (newsize < n) + newsize *= 2; + size_type datasize = end - start; + // extend newsize to WORD/DWORD boundary: + newsize = (newsize + padding_mask) & ~(padding_mask); + + // allocate and copy data: + pointer ptr = static_cast(::operator new(newsize)); + BOOST_REGEX_NOEH_ASSERT(ptr) + if (start) + std::memcpy(ptr, start, datasize); + + // get rid of old buffer: + ::operator delete(start); + + // and set up pointers: + start = ptr; + end = ptr + datasize; + last = ptr + newsize; + } + + void* extend(size_type n) + { + if(size_type(last - end) < n) + resize(n + (end - start)); + pointer result = end; + end += n; + return result; + } + + void* insert(size_type pos, size_type n) + { + BOOST_REGEX_ASSERT(pos <= size_type(end - start)); + if (size_type(last - end) < n) + resize(n + (end - start)); + void* result = start + pos; + std::memmove(start + pos + n, start + pos, (end - start) - pos); + end += n; + return result; + } + + size_type size() + { + return size_type(end - start); + } + + size_type capacity() + { + return size_type(last - start); + } + + void* data()const + { + return start; + } + + size_type index(void* ptr) + { + return size_type(static_cast(ptr) - static_cast(data())); + } + + void clear() + { + end = start; + } + + void align() + { + // move end up to a boundary: + end = start + (((end - start) + padding_mask) & ~padding_mask); + } + void swap(raw_storage& that) + { + std::swap(start, that.start); + std::swap(end, that.end); + std::swap(last, that.last); + } +}; + +inline raw_storage::raw_storage() +{ + last = start = end = 0; +} + +inline raw_storage::raw_storage(size_type n) +{ + start = end = static_cast(::operator new(n)); + BOOST_REGEX_NOEH_ASSERT(start) + last = start + n; +} + +} // namespace BOOST_REGEX_DETAIL_NS +} // namespace boost + +#endif + diff --git a/include/boost/regex/v5/regex_replace.hpp b/include/boost/regex/v5/regex_replace.hpp new file mode 100644 index 00000000..75f8894f --- /dev/null +++ b/include/boost/regex/v5/regex_replace.hpp @@ -0,0 +1,77 @@ +/* + * + * Copyright (c) 1998-2009 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V5_REGEX_REPLACE_HPP +#define BOOST_REGEX_V5_REGEX_REPLACE_HPP + + +namespace boost{ + +template +OutputIterator regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const basic_regex& e, + Formatter fmt, + match_flag_type flags = match_default) +{ + regex_iterator i(first, last, e, flags); + regex_iterator j; + if(i == j) + { + if(!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(first, last, out); + } + else + { + BidirectionalIterator last_m(first); + while(i != j) + { + if(!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(i->prefix().first, i->prefix().second, out); + out = i->format(out, fmt, flags, e); + last_m = (*i)[0].second; + if(flags & regex_constants::format_first_only) + break; + ++i; + } + if(!(flags & regex_constants::format_no_copy)) + out = BOOST_REGEX_DETAIL_NS::copy(last_m, last, out); + } + return out; +} + +template +std::basic_string regex_replace(const std::basic_string& s, + const basic_regex& e, + Formatter fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + BOOST_REGEX_DETAIL_NS::string_out_iterator > i(result); + regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_REPLACE_HPP + + diff --git a/include/boost/regex/v5/regex_search.hpp b/include/boost/regex/v5/regex_search.hpp new file mode 100644 index 00000000..d0addb19 --- /dev/null +++ b/include/boost/regex/v5/regex_search.hpp @@ -0,0 +1,103 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_search.hpp + * VERSION see + * DESCRIPTION: Provides regex_search implementation. + */ + +#ifndef BOOST_REGEX_V5_REGEX_SEARCH_HPP +#define BOOST_REGEX_V5_REGEX_SEARCH_HPP + + +namespace boost{ + +template +bool regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(first, last, m, e, flags, first); +} + +template +bool regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags, + BidiIterator base) +{ + if(e.flags() & regex_constants::failbit) + return false; + + BOOST_REGEX_DETAIL_NS::perl_matcher matcher(first, last, m, e, flags, base); + return matcher.find(); +} + +// +// regex_search convenience interfaces: +// +template +inline bool regex_search(const charT* str, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), m, e, flags); +} + +template +inline bool regex_search(const std::basic_string& s, + match_results::const_iterator, Allocator>& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), m, e, flags); +} + +template +bool regex_search(BidiIterator first, BidiIterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + match_results m; + typedef typename match_results::allocator_type match_alloc_type; + BOOST_REGEX_DETAIL_NS::perl_matcher matcher(first, last, m, e, flags | regex_constants::match_any, first); + return matcher.find(); +} + +template +inline bool regex_search(const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), e, flags); +} + +template +inline bool regex_search(const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), e, flags); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_SEARCH_HPP + + diff --git a/include/boost/regex/v5/regex_split.hpp b/include/boost/regex/v5/regex_split.hpp new file mode 100644 index 00000000..8d932896 --- /dev/null +++ b/include/boost/regex/v5/regex_split.hpp @@ -0,0 +1,152 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_split.hpp + * VERSION see + * DESCRIPTION: Implements regex_split and associated functions. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_SPLIT_HPP +#define BOOST_REGEX_SPLIT_HPP + +namespace boost{ + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace BOOST_REGEX_DETAIL_NS{ + +template +const basic_regex& get_default_expression(charT) +{ + static const charT expression_text[4] = { '\\', 's', '+', '\00', }; + static const basic_regex e(expression_text); + return e; +} + +template +class split_pred +{ + typedef std::basic_string string_type; + typedef typename string_type::const_iterator iterator_type; + iterator_type* p_last; + OutputIterator* p_out; + std::size_t* p_max; + std::size_t initial_max; +public: + split_pred(iterator_type* a, OutputIterator* b, std::size_t* c) + : p_last(a), p_out(b), p_max(c), initial_max(*c) {} + + bool operator()(const match_results& what); +}; + +template +bool split_pred::operator() + (const match_results& what) +{ + *p_last = what[0].second; + if(what.size() > 1) + { + // output sub-expressions only: + for(unsigned i = 1; i < what.size(); ++i) + { + *(*p_out) = what.str(i); + ++(*p_out); + if(0 == --*p_max) return false; + } + return *p_max != 0; + } + else + { + // output $` only if it's not-null or not at the start of the input: + const sub_match& sub = what[-1]; + if((sub.first != sub.second) || (*p_max != initial_max)) + { + *(*p_out) = sub.str(); + ++(*p_out); + return --*p_max; + } + } + // + // initial null, do nothing: + return true; +} + +} // namespace BOOST_REGEX_DETAIL_NS + +template +std::size_t regex_split(OutputIterator out, + std::basic_string& s, + const basic_regex& e, + match_flag_type flags, + std::size_t max_split) +{ + typedef typename std::basic_string::const_iterator ci_t; + //typedef typename match_results::allocator_type match_allocator; + ci_t last = s.begin(); + std::size_t init_size = max_split; + BOOST_REGEX_DETAIL_NS::split_pred pred(&last, &out, &max_split); + ci_t i, j; + i = s.begin(); + j = s.end(); + regex_grep(pred, i, j, e, flags); + // + // if there is still input left, do a final push as long as max_split + // is not exhausted, and we're not splitting sub-expressions rather + // than whitespace: + if(max_split && (last != s.end()) && (e.mark_count() == 0)) + { + *out = std::basic_string((ci_t)last, (ci_t)s.end()); + ++out; + last = s.end(); + --max_split; + } + // + // delete from the string everything that has been processed so far: + s.erase(0, last - s.begin()); + // + // return the number of new records pushed: + return init_size - max_split; +} + +template +inline std::size_t regex_split(OutputIterator out, + std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_split(out, s, e, flags, UINT_MAX); +} + +template +inline std::size_t regex_split(OutputIterator out, + std::basic_string& s) +{ + return regex_split(out, s, BOOST_REGEX_DETAIL_NS::get_default_expression(charT(0)), match_default, UINT_MAX); +} + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +} // namespace boost + +#endif + + diff --git a/include/boost/regex/v5/regex_token_iterator.hpp b/include/boost/regex/v5/regex_token_iterator.hpp new file mode 100644 index 00000000..98f7d213 --- /dev/null +++ b/include/boost/regex/v5/regex_token_iterator.hpp @@ -0,0 +1,255 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_token_iterator.hpp + * VERSION see + * DESCRIPTION: Provides regex_token_iterator implementation. + */ + +#ifndef BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP +#define BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP + +#include + +namespace boost{ + +template +class regex_token_iterator_implementation +{ + typedef basic_regex regex_type; + typedef sub_match value_type; + + match_results what; // current match + BidirectionalIterator base; // start of search area + BidirectionalIterator end; // end of search area + const regex_type re; // the expression + match_flag_type flags; // match flags + value_type result; // the current string result + int N; // the current sub-expression being enumerated + std::vector subs; // the sub-expressions to enumerate + +public: + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) + : end(last), re(*p), flags(f), N(0){ subs.push_back(sub); } + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector& v, match_flag_type f) + : end(last), re(*p), flags(f), N(0), subs(v){} + template + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const int (&submatches)[CN], match_flag_type f) + : end(last), re(*p), flags(f), N(0) + { + for(std::size_t i = 0; i < CN; ++i) + { + subs.push_back(submatches[i]); + } + } + regex_token_iterator_implementation(const regex_token_iterator_implementation& other) = default; + bool init(BidirectionalIterator first) + { + N = 0; + base = first; + if(regex_search(first, end, what, re, flags, base) == true) + { + N = 0; + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); + return true; + } + else if((subs[N] == -1) && (first != end)) + { + result.first = first; + result.second = end; + result.matched = (first != end); + N = -1; + return true; + } + return false; + } + bool compare(const regex_token_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) + && (end == that.end) + && (flags == that.flags) + && (N == that.N) + && (what[0].first == that.what[0].first) + && (what[0].second == that.what[0].second); + } + const value_type& get() + { return result; } + bool next() + { + if(N == -1) + return false; + if(N+1 < (int)subs.size()) + { + ++N; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + //if(what.prefix().first != what[0].second) + // flags |= /*match_prev_avail |*/ regex_constants::match_not_bob; + BidirectionalIterator last_end(what[0].second); + if(regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) + { + N =0; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + else if((last_end != end) && (subs[0] == -1)) + { + N =-1; + result.first = last_end; + result.second = end; + result.matched = (last_end != end); + return true; + } + return false; + } +private: + regex_token_iterator_implementation& operator=(const regex_token_iterator_implementation&); +}; + +template ::value_type, + class traits = regex_traits > +class regex_token_iterator +{ +private: + typedef regex_token_iterator_implementation impl; + typedef std::shared_ptr pimpl; +public: + typedef basic_regex regex_type; + typedef sub_match value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + regex_token_iterator(){} + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + int submatch = 0, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatch, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const std::vector& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + template + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const int (&submatches)[N], match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + regex_token_iterator(const regex_token_iterator& that) + : pdata(that.pdata) {} + regex_token_iterator& operator=(const regex_token_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const regex_token_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const regex_token_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + regex_token_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + regex_token_iterator operator++(int) + { + regex_token_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef regex_token_iterator cregex_token_iterator; +typedef regex_token_iterator sregex_token_iterator; +#ifndef BOOST_NO_WREGEX +typedef regex_token_iterator wcregex_token_iterator; +typedef regex_token_iterator wsregex_token_iterator; +#endif + +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP + + + + diff --git a/include/boost/regex/v5/regex_traits.hpp b/include/boost/regex/v5/regex_traits.hpp new file mode 100644 index 00000000..6c64695b --- /dev/null +++ b/include/boost/regex/v5/regex_traits.hpp @@ -0,0 +1,130 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits classes. + */ + +#ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_REGEX_TRAITS_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +# include +#endif +#include + +namespace boost{ + +template +struct regex_traits : public implementationT +{ + regex_traits() : implementationT() {} +}; + +// +// class regex_traits_wrapper. +// this is what our implementation will actually store; +// it provides default implementations of the "optional" +// interfaces that we support, in addition to the +// required "standard" ones: +// +namespace BOOST_REGEX_DETAIL_NS{ + + template + struct has_boost_extensions_tag + { + template + static double checker(U*, typename U::boost_extensions_tag* = nullptr); + static char checker(...); + static T* get(); + + static const bool value = sizeof(checker(get())) > 1; + }; + + +template +struct default_wrapper : public BaseT +{ + typedef typename BaseT::char_type char_type; + std::string error_string(::boost::regex_constants::error_type e)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::get_default_error_string(e); + } + ::boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return (char_type(c & 0x7f) == c) ? get_default_syntax_type(static_cast(c)) : ::boost::regex_constants::syntax_char; + } + ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c)const + { + return (char_type(c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast(c)) : ::boost::regex_constants::escape_type_identity; + } + std::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); + } + char_type translate(char_type c, bool icase)const + { + return (icase ? this->translate_nocase(c) : this->translate(c)); + } + char_type translate(char_type c)const + { + return BaseT::translate(c); + } + char_type tolower(char_type c)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_lower(c); + } + char_type toupper(char_type c)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_upper(c); + } +}; + +template +struct compute_wrapper_base +{ + typedef BaseT type; +}; +template +struct compute_wrapper_base +{ + typedef default_wrapper type; +}; + +} // namespace BOOST_REGEX_DETAIL_NS + +template +struct regex_traits_wrapper + : public ::boost::BOOST_REGEX_DETAIL_NS::compute_wrapper_base< + BaseT, + ::boost::BOOST_REGEX_DETAIL_NS::has_boost_extensions_tag::value + >::type +{ + regex_traits_wrapper(){} +private: + regex_traits_wrapper(const regex_traits_wrapper&); + regex_traits_wrapper& operator=(const regex_traits_wrapper&); +}; + +} // namespace boost + +#endif // include + diff --git a/src/regex_traits_defaults.cpp b/include/boost/regex/v5/regex_traits_defaults.hpp similarity index 67% rename from src/regex_traits_defaults.cpp rename to include/boost/regex/v5/regex_traits_defaults.hpp index 5ac46d2d..f5136b97 100644 --- a/src/regex_traits_defaults.cpp +++ b/include/boost/regex/v5/regex_traits_defaults.hpp @@ -3,42 +3,49 @@ * Copyright (c) 2004 * John Maddock * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ - + /* * LOCATION: see http://www.boost.org for most recent version. - * FILE regex_traits_defaults.cpp + * FILE regex_traits_defaults.hpp * VERSION see * DESCRIPTION: Declares API's for access to regex_traits default properties. */ -#define BOOST_REGEX_SOURCE -#include +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#include + +#include +#include +#include +#include +#include #include -#ifndef BOOST_NO_WREGEX +#include #include -#endif - -#if defined(BOOST_NO_STDC_NAMESPACE) -namespace std{ - using ::tolower; - using ::toupper; -#ifndef BOOST_NO_WREGEX - using ::towlower; - using ::towupper; -#endif -} -#endif - namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n) + +// +// helpers to suppress warnings: +// +template +inline bool is_extended(charT c) +{ + typedef typename std::make_unsigned::type unsigned_type; + return (sizeof(charT) > 1) && (static_cast(c) >= 256u); +} +inline bool is_extended(char) +{ return false; } + +inline const char* get_default_syntax(regex_constants::syntax_type n) { // if the user hasn't supplied a message catalog, then this supplies // default "messages" for us to load in the range 1-100. @@ -108,7 +115,7 @@ BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]); } -BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n) +inline const char* get_default_error_string(regex_constants::error_type n) { static const char* const s_default_error_messages[] = { "Success", /* REG_NOERROR 0 error_ok */ @@ -138,361 +145,10 @@ BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_con "Unknown error.", /* REG_E_UNKNOWN 21 error_unknown */ }; - return (n > ::boost::regex_constants::error_unknown) ? s_default_error_messages[ ::boost::regex_constants::error_unknown] : s_default_error_messages[n]; + return (n > ::boost::regex_constants::error_unknown) ? s_default_error_messages[::boost::regex_constants::error_unknown] : s_default_error_messages[n]; } -BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(boost::uint_least16_t c) -{ - const boost::uint_least16_t combining_ranges[] = { 0x0300, 0x0361, - 0x0483, 0x0486, - 0x0903, 0x0903, - 0x093E, 0x0940, - 0x0949, 0x094C, - 0x0982, 0x0983, - 0x09BE, 0x09C0, - 0x09C7, 0x09CC, - 0x09D7, 0x09D7, - 0x0A3E, 0x0A40, - 0x0A83, 0x0A83, - 0x0ABE, 0x0AC0, - 0x0AC9, 0x0ACC, - 0x0B02, 0x0B03, - 0x0B3E, 0x0B3E, - 0x0B40, 0x0B40, - 0x0B47, 0x0B4C, - 0x0B57, 0x0B57, - 0x0B83, 0x0B83, - 0x0BBE, 0x0BBF, - 0x0BC1, 0x0BCC, - 0x0BD7, 0x0BD7, - 0x0C01, 0x0C03, - 0x0C41, 0x0C44, - 0x0C82, 0x0C83, - 0x0CBE, 0x0CBE, - 0x0CC0, 0x0CC4, - 0x0CC7, 0x0CCB, - 0x0CD5, 0x0CD6, - 0x0D02, 0x0D03, - 0x0D3E, 0x0D40, - 0x0D46, 0x0D4C, - 0x0D57, 0x0D57, - 0x0F7F, 0x0F7F, - 0x20D0, 0x20E1, - 0x3099, 0x309A, - 0xFE20, 0xFE23, - 0xffff, 0xffff, }; - - const boost::uint_least16_t* p = combining_ranges + 1; - while(*p < c) p += 2; - --p; - if((c >= *p) && (c <= *(p+1))) - return true; - return false; -} - -// -// these are the POSIX collating names: -// -static const char* def_coll_names[] = { -"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", -"vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", -"SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", -"quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", -"left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", -"period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", -"colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", -"question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", -"Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", -"right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", -"g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", -"vertical-line", "right-curly-bracket", "tilde", "DEL", "", -}; - -// these multi-character collating elements -// should keep most Western-European locales -// happy - we should really localise these a -// little more - but this will have to do for -// now: - -static const char* def_multi_coll[] = { - "ae", - "Ae", - "AE", - "ch", - "Ch", - "CH", - "ll", - "Ll", - "LL", - "ss", - "Ss", - "SS", - "nj", - "Nj", - "NJ", - "dz", - "Dz", - "DZ", - "lj", - "Lj", - "LJ", - "", -}; - - - -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name) -{ - unsigned int i = 0; - while(*def_coll_names[i]) - { - if(def_coll_names[i] == name) - { - return std::string(1, char(i)); - } - ++i; - } - i = 0; - while(*def_multi_coll[i]) - { - if(def_multi_coll[i] == name) - { - return def_multi_coll[i]; - } - ++i; - } - return std::string(); -} - -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c) -{ - return static_cast((std::tolower)((unsigned char)c)); -} - -BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c) -{ - return static_cast((std::toupper)((unsigned char)c)); -} -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c) -{ - return (std::towlower)(c); -} - -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c) -{ - return (std::towupper)(c); -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c) -{ - return (std::towlower)(c); -} - -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c) -{ - return (std::towupper)(c); -} -#endif - -#endif - -BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c) -{ - // - // char_syntax determines how the compiler treats a given character - // in a regular expression. - // - static regex_constants::escape_syntax_type char_syntax[] = { - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /* */ // 32 - regex_constants::escape_type_identity, /*!*/ - regex_constants::escape_type_identity, /*"*/ - regex_constants::escape_type_identity, /*#*/ - regex_constants::escape_type_identity, /*$*/ - regex_constants::escape_type_identity, /*%*/ - regex_constants::escape_type_identity, /*&*/ - regex_constants::escape_type_end_buffer, /*'*/ - regex_constants::syntax_open_mark, /*(*/ - regex_constants::syntax_close_mark, /*)*/ - regex_constants::escape_type_identity, /***/ - regex_constants::syntax_plus, /*+*/ - regex_constants::escape_type_identity, /*,*/ - regex_constants::escape_type_identity, /*-*/ - regex_constants::escape_type_identity, /*.*/ - regex_constants::escape_type_identity, /*/*/ - regex_constants::escape_type_decimal, /*0*/ - regex_constants::escape_type_backref, /*1*/ - regex_constants::escape_type_backref, /*2*/ - regex_constants::escape_type_backref, /*3*/ - regex_constants::escape_type_backref, /*4*/ - regex_constants::escape_type_backref, /*5*/ - regex_constants::escape_type_backref, /*6*/ - regex_constants::escape_type_backref, /*7*/ - regex_constants::escape_type_backref, /*8*/ - regex_constants::escape_type_backref, /*9*/ - regex_constants::escape_type_identity, /*:*/ - regex_constants::escape_type_identity, /*;*/ - regex_constants::escape_type_left_word, /*<*/ - regex_constants::escape_type_identity, /*=*/ - regex_constants::escape_type_right_word, /*>*/ - regex_constants::syntax_question, /*?*/ - regex_constants::escape_type_identity, /*@*/ - regex_constants::escape_type_start_buffer, /*A*/ - regex_constants::escape_type_not_word_assert, /*B*/ - regex_constants::escape_type_C, /*C*/ - regex_constants::escape_type_not_class, /*D*/ - regex_constants::escape_type_E, /*E*/ - regex_constants::escape_type_not_class, /*F*/ - regex_constants::escape_type_G, /*G*/ - regex_constants::escape_type_not_class, /*H*/ - regex_constants::escape_type_not_class, /*I*/ - regex_constants::escape_type_not_class, /*J*/ - regex_constants::escape_type_reset_start_mark, /*K*/ - regex_constants::escape_type_not_class, /*L*/ - regex_constants::escape_type_not_class, /*M*/ - regex_constants::escape_type_named_char, /*N*/ - regex_constants::escape_type_not_class, /*O*/ - regex_constants::escape_type_not_property, /*P*/ - regex_constants::escape_type_Q, /*Q*/ - regex_constants::escape_type_line_ending, /*R*/ - regex_constants::escape_type_not_class, /*S*/ - regex_constants::escape_type_not_class, /*T*/ - regex_constants::escape_type_not_class, /*U*/ - regex_constants::escape_type_not_class, /*V*/ - regex_constants::escape_type_not_class, /*W*/ - regex_constants::escape_type_X, /*X*/ - regex_constants::escape_type_not_class, /*Y*/ - regex_constants::escape_type_Z, /*Z*/ - regex_constants::escape_type_identity, /*[*/ - regex_constants::escape_type_identity, /*\*/ - regex_constants::escape_type_identity, /*]*/ - regex_constants::escape_type_identity, /*^*/ - regex_constants::escape_type_identity, /*_*/ - regex_constants::escape_type_start_buffer, /*`*/ - regex_constants::escape_type_control_a, /*a*/ - regex_constants::escape_type_word_assert, /*b*/ - regex_constants::escape_type_ascii_control, /*c*/ - regex_constants::escape_type_class, /*d*/ - regex_constants::escape_type_e, /*e*/ - regex_constants::escape_type_control_f, /*f*/ - regex_constants::escape_type_extended_backref, /*g*/ - regex_constants::escape_type_class, /*h*/ - regex_constants::escape_type_class, /*i*/ - regex_constants::escape_type_class, /*j*/ - regex_constants::escape_type_extended_backref, /*k*/ - regex_constants::escape_type_class, /*l*/ - regex_constants::escape_type_class, /*m*/ - regex_constants::escape_type_control_n, /*n*/ - regex_constants::escape_type_class, /*o*/ - regex_constants::escape_type_property, /*p*/ - regex_constants::escape_type_class, /*q*/ - regex_constants::escape_type_control_r, /*r*/ - regex_constants::escape_type_class, /*s*/ - regex_constants::escape_type_control_t, /*t*/ - regex_constants::escape_type_class, /*u*/ - regex_constants::escape_type_control_v, /*v*/ - regex_constants::escape_type_class, /*w*/ - regex_constants::escape_type_hex, /*x*/ - regex_constants::escape_type_class, /*y*/ - regex_constants::escape_type_end_buffer, /*z*/ - regex_constants::syntax_open_brace, /*{*/ - regex_constants::syntax_or, /*|*/ - regex_constants::syntax_close_brace, /*}*/ - regex_constants::escape_type_identity, /*~*/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - regex_constants::escape_type_identity, /**/ - }; - - return char_syntax[(unsigned char)c]; -} - -BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c) +inline regex_constants::syntax_type get_default_syntax_type(char c) { // // char_syntax determines how the compiler treats a given character @@ -687,6 +343,653 @@ BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_synta return char_syntax[(unsigned char)c]; } +inline regex_constants::escape_syntax_type get_default_escape_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::escape_syntax_type char_syntax[] = { + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /* */ // 32 + regex_constants::escape_type_identity, /*!*/ + regex_constants::escape_type_identity, /*"*/ + regex_constants::escape_type_identity, /*#*/ + regex_constants::escape_type_identity, /*$*/ + regex_constants::escape_type_identity, /*%*/ + regex_constants::escape_type_identity, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::escape_type_identity, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::escape_type_identity, /*,*/ + regex_constants::escape_type_identity, /*-*/ + regex_constants::escape_type_identity, /*.*/ + regex_constants::escape_type_identity, /*/*/ + regex_constants::escape_type_decimal, /*0*/ + regex_constants::escape_type_backref, /*1*/ + regex_constants::escape_type_backref, /*2*/ + regex_constants::escape_type_backref, /*3*/ + regex_constants::escape_type_backref, /*4*/ + regex_constants::escape_type_backref, /*5*/ + regex_constants::escape_type_backref, /*6*/ + regex_constants::escape_type_backref, /*7*/ + regex_constants::escape_type_backref, /*8*/ + regex_constants::escape_type_backref, /*9*/ + regex_constants::escape_type_identity, /*:*/ + regex_constants::escape_type_identity, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::escape_type_identity, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::escape_type_identity, /*@*/ + regex_constants::escape_type_start_buffer, /*A*/ + regex_constants::escape_type_not_word_assert, /*B*/ + regex_constants::escape_type_C, /*C*/ + regex_constants::escape_type_not_class, /*D*/ + regex_constants::escape_type_E, /*E*/ + regex_constants::escape_type_not_class, /*F*/ + regex_constants::escape_type_G, /*G*/ + regex_constants::escape_type_not_class, /*H*/ + regex_constants::escape_type_not_class, /*I*/ + regex_constants::escape_type_not_class, /*J*/ + regex_constants::escape_type_reset_start_mark, /*K*/ + regex_constants::escape_type_not_class, /*L*/ + regex_constants::escape_type_not_class, /*M*/ + regex_constants::escape_type_named_char, /*N*/ + regex_constants::escape_type_not_class, /*O*/ + regex_constants::escape_type_not_property, /*P*/ + regex_constants::escape_type_Q, /*Q*/ + regex_constants::escape_type_line_ending, /*R*/ + regex_constants::escape_type_not_class, /*S*/ + regex_constants::escape_type_not_class, /*T*/ + regex_constants::escape_type_not_class, /*U*/ + regex_constants::escape_type_not_class, /*V*/ + regex_constants::escape_type_not_class, /*W*/ + regex_constants::escape_type_X, /*X*/ + regex_constants::escape_type_not_class, /*Y*/ + regex_constants::escape_type_Z, /*Z*/ + regex_constants::escape_type_identity, /*[*/ + regex_constants::escape_type_identity, /*\*/ + regex_constants::escape_type_identity, /*]*/ + regex_constants::escape_type_identity, /*^*/ + regex_constants::escape_type_identity, /*_*/ + regex_constants::escape_type_start_buffer, /*`*/ + regex_constants::escape_type_control_a, /*a*/ + regex_constants::escape_type_word_assert, /*b*/ + regex_constants::escape_type_ascii_control, /*c*/ + regex_constants::escape_type_class, /*d*/ + regex_constants::escape_type_e, /*e*/ + regex_constants::escape_type_control_f, /*f*/ + regex_constants::escape_type_extended_backref, /*g*/ + regex_constants::escape_type_class, /*h*/ + regex_constants::escape_type_class, /*i*/ + regex_constants::escape_type_class, /*j*/ + regex_constants::escape_type_extended_backref, /*k*/ + regex_constants::escape_type_class, /*l*/ + regex_constants::escape_type_class, /*m*/ + regex_constants::escape_type_control_n, /*n*/ + regex_constants::escape_type_class, /*o*/ + regex_constants::escape_type_property, /*p*/ + regex_constants::escape_type_class, /*q*/ + regex_constants::escape_type_control_r, /*r*/ + regex_constants::escape_type_class, /*s*/ + regex_constants::escape_type_control_t, /*t*/ + regex_constants::escape_type_class, /*u*/ + regex_constants::escape_type_control_v, /*v*/ + regex_constants::escape_type_class, /*w*/ + regex_constants::escape_type_hex, /*x*/ + regex_constants::escape_type_class, /*y*/ + regex_constants::escape_type_end_buffer, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::escape_type_identity, /*~*/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + }; + + return char_syntax[(unsigned char)c]; +} + +// is charT c a combining character? +inline bool is_combining_implementation(std::uint_least16_t c) +{ + const std::uint_least16_t combining_ranges[] = { 0x0300, 0x0361, + 0x0483, 0x0486, + 0x0903, 0x0903, + 0x093E, 0x0940, + 0x0949, 0x094C, + 0x0982, 0x0983, + 0x09BE, 0x09C0, + 0x09C7, 0x09CC, + 0x09D7, 0x09D7, + 0x0A3E, 0x0A40, + 0x0A83, 0x0A83, + 0x0ABE, 0x0AC0, + 0x0AC9, 0x0ACC, + 0x0B02, 0x0B03, + 0x0B3E, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B4C, + 0x0B57, 0x0B57, + 0x0B83, 0x0B83, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0C01, 0x0C03, + 0x0C41, 0x0C44, + 0x0C82, 0x0C83, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0D02, 0x0D03, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D4C, + 0x0D57, 0x0D57, + 0x0F7F, 0x0F7F, + 0x20D0, 0x20E1, + 0x3099, 0x309A, + 0xFE20, 0xFE23, + 0xffff, 0xffff, }; + + const std::uint_least16_t* p = combining_ranges + 1; + while (*p < c) p += 2; + --p; + if ((c >= *p) && (c <= *(p + 1))) + return true; + return false; +} + +template +inline bool is_combining(charT c) +{ + return (c <= static_cast(0)) ? false : ((c >= static_cast((std::numeric_limits::max)())) ? false : is_combining_implementation(static_cast(c))); +} +template <> +inline bool is_combining(char) +{ + return false; +} +template <> +inline bool is_combining(signed char) +{ + return false; +} +template <> +inline bool is_combining(unsigned char) +{ + return false; +} +#ifdef _MSC_VER +template<> +inline bool is_combining(wchar_t c) +{ + return is_combining_implementation(static_cast(c)); +} +#elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +#if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) +template<> +inline bool is_combining(wchar_t c) +{ + return is_combining_implementation(static_cast(c)); +} +#else +template<> +inline bool is_combining(wchar_t c) +{ + return (c >= (std::numeric_limits::max)()) ? false : is_combining_implementation(static_cast(c)); +} +#endif +#endif + +// +// is a charT c a line separator? +// +template +inline bool is_separator(charT c) +{ + return BOOST_REGEX_MAKE_BOOL( + (c == static_cast('\n')) + || (c == static_cast('\r')) + || (c == static_cast('\f')) + || (static_cast(c) == 0x2028u) + || (static_cast(c) == 0x2029u) + || (static_cast(c) == 0x85u)); +} +template <> +inline bool is_separator(char c) +{ + return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f')); +} + +// +// get a default collating element: +// +inline std::string lookup_default_collate_name(const std::string& name) +{ + // + // these are the POSIX collating names: + // + static const char* def_coll_names[] = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", + "vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", + "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", + "quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", + "left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", + "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", + "question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", + "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", + "right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", + "vertical-line", "right-curly-bracket", "tilde", "DEL", "", + }; + + // these multi-character collating elements + // should keep most Western-European locales + // happy - we should really localise these a + // little more - but this will have to do for + // now: + + static const char* def_multi_coll[] = { + "ae", + "Ae", + "AE", + "ch", + "Ch", + "CH", + "ll", + "Ll", + "LL", + "ss", + "Ss", + "SS", + "nj", + "Nj", + "NJ", + "dz", + "Dz", + "DZ", + "lj", + "Lj", + "LJ", + "", + }; + + unsigned int i = 0; + while (*def_coll_names[i]) + { + if (def_coll_names[i] == name) + { + return std::string(1, char(i)); + } + ++i; + } + i = 0; + while (*def_multi_coll[i]) + { + if (def_multi_coll[i] == name) + { + return def_multi_coll[i]; + } + ++i; + } + return std::string(); +} + +// +// get the state_id of a character classification, the individual +// traits classes then transform that state_id into a bitmask: +// +template +struct character_pointer_range +{ + const charT* p1; + const charT* p2; + + bool operator < (const character_pointer_range& r)const + { + return std::lexicographical_compare(p1, p2, r.p1, r.p2); + } + bool operator == (const character_pointer_range& r)const + { + // Not only do we check that the ranges are of equal size before + // calling std::equal, but there is no other algorithm available: + // not even a non-standard MS one. So forward to unchecked_equal + // in the MS case. +#ifdef __cpp_lib_robust_nonmodifying_seq_ops + return std::equal(p1, p2, r.p1, r.p2); +#elif defined(BOOST_REGEX_MSVC) + if (((p2 - p1) != (r.p2 - r.p1))) + return false; + const charT* with = r.p1; + const charT* pos = p1; + while (pos != p2) + if (*pos++ != *with++) return false; + return true; + +#else + return ((p2 - p1) == (r.p2 - r.p1)) && std::equal(p1, p2, r.p1); +#endif + } +}; +template +int get_default_class_id(const charT* p1, const charT* p2) +{ + static const charT data[73] = { + 'a', 'l', 'n', 'u', 'm', + 'a', 'l', 'p', 'h', 'a', + 'b', 'l', 'a', 'n', 'k', + 'c', 'n', 't', 'r', 'l', + 'd', 'i', 'g', 'i', 't', + 'g', 'r', 'a', 'p', 'h', + 'l', 'o', 'w', 'e', 'r', + 'p', 'r', 'i', 'n', 't', + 'p', 'u', 'n', 'c', 't', + 's', 'p', 'a', 'c', 'e', + 'u', 'n', 'i', 'c', 'o', 'd', 'e', + 'u', 'p', 'p', 'e', 'r', + 'v', + 'w', 'o', 'r', 'd', + 'x', 'd', 'i', 'g', 'i', 't', + }; + + static const character_pointer_range ranges[21] = + { + {data+0, data+5,}, // alnum + {data+5, data+10,}, // alpha + {data+10, data+15,}, // blank + {data+15, data+20,}, // cntrl + {data+20, data+21,}, // d + {data+20, data+25,}, // digit + {data+25, data+30,}, // graph + {data+29, data+30,}, // h + {data+30, data+31,}, // l + {data+30, data+35,}, // lower + {data+35, data+40,}, // print + {data+40, data+45,}, // punct + {data+45, data+46,}, // s + {data+45, data+50,}, // space + {data+57, data+58,}, // u + {data+50, data+57,}, // unicode + {data+57, data+62,}, // upper + {data+62, data+63,}, // v + {data+63, data+64,}, // w + {data+63, data+67,}, // word + {data+67, data+73,}, // xdigit + }; + const character_pointer_range* ranges_begin = ranges; + const character_pointer_range* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); + + character_pointer_range t = { p1, p2, }; + const character_pointer_range* p = std::lower_bound(ranges_begin, ranges_end, t); + if((p != ranges_end) && (t == *p)) + return static_cast(p - ranges); + return -1; +} + +// +// helper functions: +// +template +std::ptrdiff_t global_length(const charT* p) +{ + std::ptrdiff_t n = 0; + while(*p) + { + ++p; + ++n; + } + return n; +} +template<> +inline std::ptrdiff_t global_length(const char* p) +{ + return (std::strlen)(p); +} +#ifndef BOOST_NO_WREGEX +template<> +inline std::ptrdiff_t global_length(const wchar_t* p) +{ + return (std::ptrdiff_t)(std::wcslen)(p); +} +#endif +template +inline charT global_lower(charT c) +{ + return c; +} +template +inline charT global_upper(charT c) +{ + return c; +} + +inline char do_global_lower(char c) +{ + return static_cast((std::tolower)((unsigned char)c)); +} + +inline char do_global_upper(char c) +{ + return static_cast((std::toupper)((unsigned char)c)); +} +#ifndef BOOST_NO_WREGEX +inline wchar_t do_global_lower(wchar_t c) +{ + return (std::towlower)(c); +} + +inline wchar_t do_global_upper(wchar_t c) +{ + return (std::towupper)(c); +} +#endif +// +// This sucks: declare template specialisations of global_lower/global_upper +// that just forward to the non-template implementation functions. We do +// this because there is one compiler (Compaq Tru64 C++) that doesn't seem +// to differentiate between templates and non-template overloads.... +// what's more, the primary template, plus all overloads have to be +// defined in the same translation unit (if one is inline they all must be) +// otherwise the "local template instantiation" compiler option can pick +// the wrong instantiation when linking: +// +template<> inline char global_lower(char c) { return do_global_lower(c); } +template<> inline char global_upper(char c) { return do_global_upper(c); } +#ifndef BOOST_NO_WREGEX +template<> inline wchar_t global_lower(wchar_t c) { return do_global_lower(c); } +template<> inline wchar_t global_upper(wchar_t c) { return do_global_upper(c); } +#endif + +template +int global_value(charT c) +{ + static const charT zero = '0'; + static const charT nine = '9'; + static const charT a = 'a'; + static const charT f = 'f'; + static const charT A = 'A'; + static const charT F = 'F'; + + if(c > f) return -1; + if(c >= a) return 10 + (c - a); + if(c > F) return -1; + if(c >= A) return 10 + (c - A); + if(c > nine) return -1; + if(c >= zero) return c - zero; + return -1; +} +template +std::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) +{ + (void)t; // warning suppression + std::intmax_t limit = (std::numeric_limits::max)() / radix; + std::intmax_t next_value = t.value(*p1, radix); + if((p1 == p2) || (next_value < 0) || (next_value >= radix)) + return -1; + std::intmax_t result = 0; + while(p1 != p2) + { + next_value = t.value(*p1, radix); + if((next_value < 0) || (next_value >= radix)) + break; + result *= radix; + result += next_value; + ++p1; + if (result > limit) + return -1; + } + return result; +} + +template +inline typename std::enable_if<(sizeof(charT) > 1), const charT*>::type get_escape_R_string() +{ +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +# pragma warning(disable:4309 4245) +#endif + static const charT e1[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', static_cast(0x85), static_cast(0x2028), + static_cast(0x2029), ']', ')', ')', '\0' }; + static const charT e2[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', static_cast(0x85), ']', ')', ')', '\0' }; + + charT c = static_cast(0x2029u); + bool b = (static_cast(c) == 0x2029u); + + return (b ? e1 : e2); +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif +} + +template +inline typename std::enable_if<(sizeof(charT) == 1), const charT*>::type get_escape_R_string() +{ +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +# pragma warning(disable:4309 4245) +#endif + static const charT e2[] = { + static_cast('('), + static_cast('?'), + static_cast('-'), + static_cast('x'), + static_cast(':'), + static_cast('('), + static_cast('?'), + static_cast('>'), + static_cast('\x0D'), + static_cast('\x0A'), + static_cast('?'), + static_cast('|'), + static_cast('['), + static_cast('\x0A'), + static_cast('\x0B'), + static_cast('\x0C'), + static_cast('\x85'), + static_cast(']'), + static_cast(')'), + static_cast(')'), + static_cast('\0') + }; + return e2; +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif +} } // BOOST_REGEX_DETAIL_NS } // boost + +#endif diff --git a/include/boost/regex/v5/regex_workaround.hpp b/include/boost/regex/v5/regex_workaround.hpp new file mode 100644 index 00000000..e9ca79d4 --- /dev/null +++ b/include/boost/regex/v5/regex_workaround.hpp @@ -0,0 +1,159 @@ +/* + * + * Copyright (c) 1998-2005 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_workarounds.cpp + * VERSION see + * DESCRIPTION: Declares Misc workarounds. + */ + +#ifndef BOOST_REGEX_WORKAROUND_HPP +#define BOOST_REGEX_WORKAROUND_HPP + +#include +#include +#include +#include + +#ifndef BOOST_REGEX_STANDALONE +#include +#include +#endif + +#ifdef BOOST_REGEX_NO_BOOL +# define BOOST_REGEX_MAKE_BOOL(x) static_cast((x) ? true : false) +#else +# define BOOST_REGEX_MAKE_BOOL(x) static_cast(x) +#endif + +/***************************************************************************** + * + * helper functions pointer_construct/pointer_destroy: + * + ****************************************************************************/ + +#ifdef __cplusplus +namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ + +#ifdef BOOST_REGEX_MSVC +#pragma warning (push) +#pragma warning (disable : 4100) +#endif + +template +inline void pointer_destroy(T* p) +{ p->~T(); (void)p; } + +#ifdef BOOST_REGEX_MSVC +#pragma warning (pop) +#endif + +template +inline void pointer_construct(T* p, const T& t) +{ new (p) T(t); } + +}} // namespaces +#endif + +/***************************************************************************** + * + * helper function copy: + * + ****************************************************************************/ + +#if defined(BOOST_WORKAROUND) +#if BOOST_WORKAROUND(BOOST_REGEX_MSVC, >= 1400) && defined(__STDC_WANT_SECURE_LIB__) && __STDC_WANT_SECURE_LIB__ +#define BOOST_REGEX_HAS_STRCPY_S +#endif +#endif + +#ifdef __cplusplus +namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ + +#if defined(BOOST_REGEX_MSVC) && (BOOST_REGEX_MSVC < 1910) + // + // MSVC 10 will either emit warnings or else refuse to compile + // code that makes perfectly legitimate use of std::copy, when + // the OutputIterator type is a user-defined class (apparently all user + // defined iterators are "unsafe"). What's more Microsoft have removed their + // non-standard "unchecked" versions, even though they are still in the MS + // documentation!! Work around this as best we can: + // + template + inline OutputIterator copy( + InputIterator first, + InputIterator last, + OutputIterator dest + ) + { + while (first != last) + *dest++ = *first++; + return dest; + } +#else + using std::copy; +#endif + + +#if defined(BOOST_REGEX_HAS_STRCPY_S) + + // use safe versions of strcpy etc: + using ::strcpy_s; + using ::strcat_s; +#else + inline std::size_t strcpy_s( + char *strDestination, + std::size_t sizeInBytes, + const char *strSource + ) + { + std::size_t lenSourceWithNull = std::strlen(strSource) + 1; + if (lenSourceWithNull > sizeInBytes) + return 1; + std::memcpy(strDestination, strSource, lenSourceWithNull); + return 0; + } + inline std::size_t strcat_s( + char *strDestination, + std::size_t sizeInBytes, + const char *strSource + ) + { + std::size_t lenSourceWithNull = std::strlen(strSource) + 1; + std::size_t lenDestination = std::strlen(strDestination); + if (lenSourceWithNull + lenDestination > sizeInBytes) + return 1; + std::memcpy(strDestination + lenDestination, strSource, lenSourceWithNull); + return 0; + } + +#endif + + inline void overflow_error_if_not_zero(std::size_t i) + { + if(i) + { + std::overflow_error e("String buffer too small"); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + } + +}} // namespaces + +#endif // __cplusplus + +#endif // include guard + diff --git a/include/boost/regex/v5/states.hpp b/include/boost/regex/v5/states.hpp new file mode 100644 index 00000000..60fc99d0 --- /dev/null +++ b/include/boost/regex/v5/states.hpp @@ -0,0 +1,299 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE states.cpp + * VERSION see + * DESCRIPTION: Declares internal state machine structures. + */ + +#ifndef BOOST_REGEX_V5_STATES_HPP +#define BOOST_REGEX_V5_STATES_HPP + +namespace boost{ +namespace BOOST_REGEX_DETAIL_NS{ + +/*** mask_type ******************************************************* +Whenever we have a choice of two alternatives, we use an array of bytes +to indicate which of the two alternatives it is possible to take for any +given input character. If mask_take is set, then we can take the next +state, and if mask_skip is set then we can take the alternative. +***********************************************************************/ +enum mask_type +{ + mask_take = 1, + mask_skip = 2, + mask_init = 4, + mask_any = mask_skip | mask_take, + mask_all = mask_any +}; + +/*** helpers ********************************************************** +These helpers let us use function overload resolution to detect whether +we have narrow or wide character strings: +***********************************************************************/ +struct _narrow_type{}; +struct _wide_type{}; +template struct is_byte; +template<> struct is_byte { typedef _narrow_type width_type; }; +template<> struct is_byte{ typedef _narrow_type width_type; }; +template<> struct is_byte { typedef _narrow_type width_type; }; +template struct is_byte { typedef _wide_type width_type; }; + +/*** enum syntax_element_type ****************************************** +Every record in the state machine falls into one of the following types: +***********************************************************************/ +enum syntax_element_type +{ + // start of a marked sub-expression, or perl-style (?...) extension + syntax_element_startmark = 0, + // end of a marked sub-expression, or perl-style (?...) extension + syntax_element_endmark = syntax_element_startmark + 1, + // any sequence of literal characters + syntax_element_literal = syntax_element_endmark + 1, + // start of line assertion: ^ + syntax_element_start_line = syntax_element_literal + 1, + // end of line assertion $ + syntax_element_end_line = syntax_element_start_line + 1, + // match any character: . + syntax_element_wild = syntax_element_end_line + 1, + // end of expression: we have a match when we get here + syntax_element_match = syntax_element_wild + 1, + // perl style word boundary: \b + syntax_element_word_boundary = syntax_element_match + 1, + // perl style within word boundary: \B + syntax_element_within_word = syntax_element_word_boundary + 1, + // start of word assertion: \< + syntax_element_word_start = syntax_element_within_word + 1, + // end of word assertion: \> + syntax_element_word_end = syntax_element_word_start + 1, + // start of buffer assertion: \` + syntax_element_buffer_start = syntax_element_word_end + 1, + // end of buffer assertion: \' + syntax_element_buffer_end = syntax_element_buffer_start + 1, + // backreference to previously matched sub-expression + syntax_element_backref = syntax_element_buffer_end + 1, + // either a wide character set [..] or one with multicharacter collating elements: + syntax_element_long_set = syntax_element_backref + 1, + // narrow character set: [...] + syntax_element_set = syntax_element_long_set + 1, + // jump to a new state in the machine: + syntax_element_jump = syntax_element_set + 1, + // choose between two production states: + syntax_element_alt = syntax_element_jump + 1, + // a repeat + syntax_element_rep = syntax_element_alt + 1, + // match a combining character sequence + syntax_element_combining = syntax_element_rep + 1, + // perl style soft buffer end: \z + syntax_element_soft_buffer_end = syntax_element_combining + 1, + // perl style continuation: \G + syntax_element_restart_continue = syntax_element_soft_buffer_end + 1, + // single character repeats: + syntax_element_dot_rep = syntax_element_restart_continue + 1, + syntax_element_char_rep = syntax_element_dot_rep + 1, + syntax_element_short_set_rep = syntax_element_char_rep + 1, + syntax_element_long_set_rep = syntax_element_short_set_rep + 1, + // a backstep for lookbehind repeats: + syntax_element_backstep = syntax_element_long_set_rep + 1, + // an assertion that a mark was matched: + syntax_element_assert_backref = syntax_element_backstep + 1, + syntax_element_toggle_case = syntax_element_assert_backref + 1, + // a recursive expression: + syntax_element_recurse = syntax_element_toggle_case + 1, + // Verbs: + syntax_element_fail = syntax_element_recurse + 1, + syntax_element_accept = syntax_element_fail + 1, + syntax_element_commit = syntax_element_accept + 1, + syntax_element_then = syntax_element_commit + 1 +}; + +#ifdef BOOST_REGEX_DEBUG +// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion +std::ostream& operator<<(std::ostream&, syntax_element_type); +#endif + +struct re_syntax_base; + +/*** union offset_type ************************************************ +Points to another state in the machine. During machine construction +we use integral offsets, but these are converted to pointers before +execution of the machine. +***********************************************************************/ +union offset_type +{ + re_syntax_base* p; + std::ptrdiff_t i; +}; + +/*** struct re_syntax_base ******************************************** +Base class for all states in the machine. +***********************************************************************/ +struct re_syntax_base +{ + syntax_element_type type; // what kind of state this is + offset_type next; // next state in the machine +}; + +/*** struct re_brace ************************************************** +A marked parenthesis. +***********************************************************************/ +struct re_brace : public re_syntax_base +{ + // The index to match, can be zero (don't mark the sub-expression) + // or negative (for perl style (?...) extensions): + int index; + bool icase; +}; + +/*** struct re_dot ************************************************** +Match anything. +***********************************************************************/ +enum +{ + dont_care = 1, + force_not_newline = 0, + force_newline = 2, + + test_not_newline = 2, + test_newline = 3 +}; +struct re_dot : public re_syntax_base +{ + unsigned char mask; +}; + +/*** struct re_literal ************************************************ +A string of literals, following this structure will be an +array of characters: charT[length] +***********************************************************************/ +struct re_literal : public re_syntax_base +{ + unsigned int length; +}; + +/*** struct re_case ************************************************ +Indicates whether we are moving to a case insensive block or not +***********************************************************************/ +struct re_case : public re_syntax_base +{ + bool icase; +}; + +/*** struct re_set_long *********************************************** +A wide character set of characters, following this structure will be +an array of type charT: +First csingles null-terminated strings +Then 2 * cranges NULL terminated strings +Then cequivalents NULL terminated strings +***********************************************************************/ +template +struct re_set_long : public re_syntax_base +{ + unsigned int csingles, cranges, cequivalents; + mask_type cclasses; + mask_type cnclasses; + bool isnot; + bool singleton; +}; + +/*** struct re_set **************************************************** +A set of narrow-characters, matches any of _map which is none-zero +***********************************************************************/ +struct re_set : public re_syntax_base +{ + unsigned char _map[1 << CHAR_BIT]; +}; + +/*** struct re_jump *************************************************** +Jump to a new location in the machine (not next). +***********************************************************************/ +struct re_jump : public re_syntax_base +{ + offset_type alt; // location to jump to +}; + +/*** struct re_alt *************************************************** +Jump to a new location in the machine (possibly next). +***********************************************************************/ +struct re_alt : public re_jump +{ + unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump + unsigned int can_be_null; // true if we match a NULL string +}; + +/*** struct re_repeat ************************************************* +Repeat a section of the machine +***********************************************************************/ +struct re_repeat : public re_alt +{ + std::size_t min, max; // min and max allowable repeats + int state_id; // Unique identifier for this repeat + bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches) + bool greedy; // True if this is a greedy repeat +}; + +/*** struct re_recurse ************************************************ +Recurse to a particular subexpression. +**********************************************************************/ +struct re_recurse : public re_jump +{ + int state_id; // identifier of first nested repeat within the recursion. +}; + +/*** struct re_commit ************************************************* +Used for the PRUNE, SKIP and COMMIT verbs which basically differ only in what happens +if no match is found and we start searching forward. +**********************************************************************/ +enum commit_type +{ + commit_prune, + commit_skip, + commit_commit +}; +struct re_commit : public re_syntax_base +{ + commit_type action; +}; + +/*** enum re_jump_size_type ******************************************* +Provides compiled size of re_jump structure (allowing for trailing alignment). +We provide this so we know how manybytes to insert when constructing the machine +(The value of padding_mask is defined in regex_raw_buffer.hpp). +***********************************************************************/ +enum re_jump_size_type +{ + re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask), + re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask), + re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask) +}; + +/*** proc re_is_set_member ********************************************* +Forward declaration: we'll need this one later... +***********************************************************************/ + +template +struct regex_data; + +template +iterator re_is_set_member(iterator next, + iterator last, + const re_set_long* set_, + const regex_data& e, bool icase); + +} // namespace BOOST_REGEX_DETAIL_NS + +} // namespace boost + +#endif + + diff --git a/include/boost/regex/v5/sub_match.hpp b/include/boost/regex/v5/sub_match.hpp new file mode 100644 index 00000000..aa61b560 --- /dev/null +++ b/include/boost/regex/v5/sub_match.hpp @@ -0,0 +1,382 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE sub_match.cpp + * VERSION see + * DESCRIPTION: Declares template class sub_match. + */ + +#ifndef BOOST_REGEX_V5_SUB_MATCH_HPP +#define BOOST_REGEX_V5_SUB_MATCH_HPP + +namespace boost{ + +template +struct sub_match : public std::pair +{ + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::difference_type difference_type; + typedef BidiIterator iterator_type; + typedef BidiIterator iterator; + typedef BidiIterator const_iterator; + + bool matched; + + sub_match() : std::pair(), matched(false) {} + sub_match(BidiIterator i) : std::pair(i, i), matched(false) {} + template + operator std::basic_string ()const + { + return matched ? std::basic_string(this->first, this->second) : std::basic_string(); + } + difference_type length()const + { + difference_type n = matched ? std::distance((BidiIterator)this->first, (BidiIterator)this->second) : 0; + return n; + } + std::basic_string str()const + { + std::basic_string result; + if(matched) + { + std::size_t len = std::distance((BidiIterator)this->first, (BidiIterator)this->second); + result.reserve(len); + BidiIterator i = this->first; + while(i != this->second) + { + result.append(1, *i); + ++i; + } + } + return result; + } + int compare(const sub_match& s)const + { + if(matched != s.matched) + return static_cast(matched) - static_cast(s.matched); + return str().compare(s.str()); + } + int compare(const std::basic_string& s)const + { + return str().compare(s); + } + int compare(const value_type* p)const + { + return str().compare(p); + } + + bool operator==(const sub_match& that)const + { return compare(that) == 0; } + bool operator !=(const sub_match& that)const + { return compare(that) != 0; } + bool operator<(const sub_match& that)const + { return compare(that) < 0; } + bool operator>(const sub_match& that)const + { return compare(that) > 0; } + bool operator<=(const sub_match& that)const + { return compare(that) <= 0; } + bool operator>=(const sub_match& that)const + { return compare(that) >= 0; } + +#ifdef BOOST_REGEX_MATCH_EXTRA + typedef std::vector > capture_sequence_type; + + const capture_sequence_type& captures()const + { + if(!m_captures) + m_captures.reset(new capture_sequence_type()); + return *m_captures; + } + // + // Private implementation API: DO NOT USE! + // + capture_sequence_type& get_captures()const + { + if(!m_captures) + m_captures.reset(new capture_sequence_type()); + return *m_captures; + } + +private: + mutable std::unique_ptr m_captures; +public: + +#endif + sub_match(const sub_match& that, bool +#ifdef BOOST_REGEX_MATCH_EXTRA + deep_copy +#endif + = true + ) + : std::pair(that), + matched(that.matched) + { +#ifdef BOOST_REGEX_MATCH_EXTRA + if(that.m_captures) + if(deep_copy) + m_captures.reset(new capture_sequence_type(*(that.m_captures))); +#endif + } + sub_match& operator=(const sub_match& that) + { + this->first = that.first; + this->second = that.second; + matched = that.matched; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(that.m_captures) + get_captures() = *(that.m_captures); +#endif + return *this; + } + // + // Make this type a range, for both Boost.Range, and C++11: + // + BidiIterator begin()const { return this->first; } + BidiIterator end()const { return this->second; } +}; + +typedef sub_match csub_match; +typedef sub_match ssub_match; +#ifndef BOOST_NO_WREGEX +typedef sub_match wcsub_match; +typedef sub_match wssub_match; +#endif + +// comparison to std::basic_string<> part 1: +template +inline bool operator == (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) == 0; } +template +inline bool operator != (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) != 0; } +template +inline bool operator < (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) < 0; } +template +inline bool operator <= (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) <= 0; } +template +inline bool operator >= (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) >= 0; } +template +inline bool operator > (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) > 0; } +// comparison to std::basic_string<> part 2: +template +inline bool operator == (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) != 0; } +template +inline bool operator < (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) < 0; } +template +inline bool operator > (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) > 0; } +template +inline bool operator <= (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) <= 0; } +template +inline bool operator >= (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) >= 0; } +// comparison to const charT* part 1: +template +inline bool operator == (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) != 0; } +template +inline bool operator > (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) > 0; } +template +inline bool operator < (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) < 0; } +template +inline bool operator >= (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) >= 0; } +template +inline bool operator <= (const sub_match& m, + typename std::iterator_traits::value_type const* s) +{ return m.str().compare(s) <= 0; } +// comparison to const charT* part 2: +template +inline bool operator == (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) != 0; } +template +inline bool operator < (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) > 0; } +template +inline bool operator > (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) < 0; } +template +inline bool operator <= (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) >= 0; } +template +inline bool operator >= (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) <= 0; } + +// comparison to const charT& part 1: +template +inline bool operator == (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template +inline bool operator != (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template +inline bool operator > (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template +inline bool operator < (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template +inline bool operator >= (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template +inline bool operator <= (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } +// comparison to const charT* part 2: +template +inline bool operator == (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template +inline bool operator != (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template +inline bool operator < (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template +inline bool operator > (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template +inline bool operator <= (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template +inline bool operator >= (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } + +// addition operators: +template +inline std::basic_string::value_type, traits, Allocator> +operator + (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ + std::basic_string::value_type, traits, Allocator> result; + result.reserve(s.size() + m.length() + 1); + return result.append(s).append(m.first, m.second); +} +template +inline std::basic_string::value_type, traits, Allocator> +operator + (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ + std::basic_string::value_type, traits, Allocator> result; + result.reserve(s.size() + m.length() + 1); + return result.append(m.first, m.second).append(s); +} +template +inline std::basic_string::value_type> +operator + (typename std::iterator_traits::value_type const* s, + const sub_match& m) +{ + std::basic_string::value_type> result; + result.reserve(std::char_traits::value_type>::length(s) + m.length() + 1); + return result.append(s).append(m.first, m.second); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m, + typename std::iterator_traits::value_type const * s) +{ + std::basic_string::value_type> result; + result.reserve(std::char_traits::value_type>::length(s) + m.length() + 1); + return result.append(m.first, m.second).append(s); +} +template +inline std::basic_string::value_type> +operator + (typename std::iterator_traits::value_type const& s, + const sub_match& m) +{ + std::basic_string::value_type> result; + result.reserve(m.length() + 2); + return result.append(1, s).append(m.first, m.second); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m, + typename std::iterator_traits::value_type const& s) +{ + std::basic_string::value_type> result; + result.reserve(m.length() + 2); + return result.append(m.first, m.second).append(1, s); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m1, + const sub_match& m2) +{ + std::basic_string::value_type> result; + result.reserve(m1.length() + m2.length() + 1); + return result.append(m1.first, m1.second).append(m2.first, m2.second); +} +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const sub_match& s) +{ + return (os << s.str()); +} + +} // namespace boost + +#endif + diff --git a/include/boost/regex/v5/syntax_type.hpp b/include/boost/regex/v5/syntax_type.hpp new file mode 100644 index 00000000..3efdf0b0 --- /dev/null +++ b/include/boost/regex/v5/syntax_type.hpp @@ -0,0 +1,105 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE syntax_type.hpp + * VERSION see + * DESCRIPTION: Declares regular expression synatx type enumerator. + */ + +#ifndef BOOST_REGEX_SYNTAX_TYPE_HPP +#define BOOST_REGEX_SYNTAX_TYPE_HPP + +namespace boost{ +namespace regex_constants{ + +typedef unsigned char syntax_type; + +// +// values chosen are binary compatible with previous version: +// +static const syntax_type syntax_char = 0; +static const syntax_type syntax_open_mark = 1; +static const syntax_type syntax_close_mark = 2; +static const syntax_type syntax_dollar = 3; +static const syntax_type syntax_caret = 4; +static const syntax_type syntax_dot = 5; +static const syntax_type syntax_star = 6; +static const syntax_type syntax_plus = 7; +static const syntax_type syntax_question = 8; +static const syntax_type syntax_open_set = 9; +static const syntax_type syntax_close_set = 10; +static const syntax_type syntax_or = 11; +static const syntax_type syntax_escape = 12; +static const syntax_type syntax_dash = 14; +static const syntax_type syntax_open_brace = 15; +static const syntax_type syntax_close_brace = 16; +static const syntax_type syntax_digit = 17; +static const syntax_type syntax_comma = 27; +static const syntax_type syntax_equal = 37; +static const syntax_type syntax_colon = 36; +static const syntax_type syntax_not = 53; + +// extensions: + +static const syntax_type syntax_hash = 13; +static const syntax_type syntax_newline = 26; + +// escapes: + +typedef syntax_type escape_syntax_type; + +static const escape_syntax_type escape_type_word_assert = 18; +static const escape_syntax_type escape_type_not_word_assert = 19; +static const escape_syntax_type escape_type_control_f = 29; +static const escape_syntax_type escape_type_control_n = 30; +static const escape_syntax_type escape_type_control_r = 31; +static const escape_syntax_type escape_type_control_t = 32; +static const escape_syntax_type escape_type_control_v = 33; +static const escape_syntax_type escape_type_ascii_control = 35; +static const escape_syntax_type escape_type_hex = 34; +static const escape_syntax_type escape_type_unicode = 0; // not used +static const escape_syntax_type escape_type_identity = 0; // not used +static const escape_syntax_type escape_type_backref = syntax_digit; +static const escape_syntax_type escape_type_decimal = syntax_digit; // not used +static const escape_syntax_type escape_type_class = 22; +static const escape_syntax_type escape_type_not_class = 23; + +// extensions: + +static const escape_syntax_type escape_type_left_word = 20; +static const escape_syntax_type escape_type_right_word = 21; +static const escape_syntax_type escape_type_start_buffer = 24; // for \` +static const escape_syntax_type escape_type_end_buffer = 25; // for \' +static const escape_syntax_type escape_type_control_a = 28; // for \a +static const escape_syntax_type escape_type_e = 38; // for \e +static const escape_syntax_type escape_type_E = 47; // for \Q\E +static const escape_syntax_type escape_type_Q = 48; // for \Q\E +static const escape_syntax_type escape_type_X = 49; // for \X +static const escape_syntax_type escape_type_C = 50; // for \C +static const escape_syntax_type escape_type_Z = 51; // for \Z +static const escape_syntax_type escape_type_G = 52; // for \G + +static const escape_syntax_type escape_type_property = 54; // for \p +static const escape_syntax_type escape_type_not_property = 55; // for \P +static const escape_syntax_type escape_type_named_char = 56; // for \N +static const escape_syntax_type escape_type_extended_backref = 57; // for \g +static const escape_syntax_type escape_type_reset_start_mark = 58; // for \K +static const escape_syntax_type escape_type_line_ending = 59; // for \R + +static const escape_syntax_type syntax_max = 60; + +} +} + + +#endif diff --git a/include/boost/regex/v5/u32regex_iterator.hpp b/include/boost/regex/v5/u32regex_iterator.hpp new file mode 100644 index 00000000..97d8e799 --- /dev/null +++ b/include/boost/regex/v5/u32regex_iterator.hpp @@ -0,0 +1,177 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE u32regex_iterator.hpp + * VERSION see + * DESCRIPTION: Provides u32regex_iterator implementation. + */ + +#ifndef BOOST_REGEX_V5_U32REGEX_ITERATOR_HPP +#define BOOST_REGEX_V5_U32REGEX_ITERATOR_HPP + +namespace boost{ + +template +class u32regex_iterator_implementation +{ + typedef u32regex regex_type; + + match_results what; // current match + BidirectionalIterator base; // start of sequence + BidirectionalIterator end; // end of sequence + const regex_type re; // the expression + match_flag_type flags; // flags for matching + +public: + u32regex_iterator_implementation(const regex_type* p, BidirectionalIterator last, match_flag_type f) + : base(), end(last), re(*p), flags(f){} + bool init(BidirectionalIterator first) + { + base = first; + return u32regex_search(first, end, what, re, flags, base); + } + bool compare(const u32regex_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) && (end == that.end) && (flags == that.flags) && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); + } + const match_results& get() + { return what; } + bool next() + { + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; + BidirectionalIterator next_start = what[0].second; + match_flag_type f(flags); + if(!what.length()) + f |= regex_constants::match_not_initial_null; + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = u32regex_search(next_start, end, what, re, f, base); + if(result) + what.set_base(base); + return result; + } +private: + u32regex_iterator_implementation& operator=(const u32regex_iterator_implementation&); +}; + +template +class u32regex_iterator +{ +private: + typedef u32regex_iterator_implementation impl; + typedef std::shared_ptr pimpl; +public: + typedef u32regex regex_type; + typedef match_results value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + u32regex_iterator(){} + u32regex_iterator(BidirectionalIterator a, BidirectionalIterator b, + const regex_type& re, + match_flag_type m = match_default) + : pdata(new impl(&re, b, m)) + { + if(!pdata->init(a)) + { + pdata.reset(); + } + } + u32regex_iterator(const u32regex_iterator& that) + : pdata(that.pdata) {} + u32regex_iterator& operator=(const u32regex_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const u32regex_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const u32regex_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + u32regex_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + u32regex_iterator operator++(int) + { + u32regex_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef u32regex_iterator utf8regex_iterator; +typedef u32regex_iterator utf16regex_iterator; +typedef u32regex_iterator utf32regex_iterator; + +inline u32regex_iterator make_u32regex_iterator(const char* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+std::strlen(p), e, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_iterator make_u32regex_iterator(const wchar_t* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+std::wcslen(p), e, m); +} +#endif +#if !defined(BOOST_REGEX_UCHAR_IS_WCHAR_T) +inline u32regex_iterator make_u32regex_iterator(const UChar* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+u_strlen(p), e, m); +} +#endif +template +inline u32regex_iterator::const_iterator> make_u32regex_iterator(const std::basic_string& p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_iterator(p.begin(), p.end(), e, m); +} +inline u32regex_iterator make_u32regex_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, m); +} + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_ITERATOR_HPP + diff --git a/include/boost/regex/v5/u32regex_token_iterator.hpp b/include/boost/regex/v5/u32regex_token_iterator.hpp new file mode 100644 index 00000000..0f3cad5b --- /dev/null +++ b/include/boost/regex/v5/u32regex_token_iterator.hpp @@ -0,0 +1,312 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE u32regex_token_iterator.hpp + * VERSION see + * DESCRIPTION: Provides u32regex_token_iterator implementation. + */ + +#ifndef BOOST_REGEX_V5_U32REGEX_TOKEN_ITERATOR_HPP +#define BOOST_REGEX_V5_U32REGEX_TOKEN_ITERATOR_HPP + +namespace boost{ + +#ifdef BOOST_REGEX_MSVC +# pragma warning(push) +# pragma warning(disable:4700) +#endif + +template +class u32regex_token_iterator_implementation +{ + typedef u32regex regex_type; + typedef sub_match value_type; + + match_results what; // current match + BidirectionalIterator end; // end of search area + BidirectionalIterator base; // start of search area + const regex_type re; // the expression + match_flag_type flags; // match flags + value_type result; // the current string result + int N; // the current sub-expression being enumerated + std::vector subs; // the sub-expressions to enumerate + +public: + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) + : end(last), re(*p), flags(f){ subs.push_back(sub); } + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector& v, match_flag_type f) + : end(last), re(*p), flags(f), subs(v){} + template + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const int (&submatches)[CN], match_flag_type f) + : end(last), re(*p), flags(f) + { + for(std::size_t i = 0; i < CN; ++i) + { + subs.push_back(submatches[i]); + } + } + + bool init(BidirectionalIterator first) + { + base = first; + N = 0; + if(u32regex_search(first, end, what, re, flags, base) == true) + { + N = 0; + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); + return true; + } + else if((subs[N] == -1) && (first != end)) + { + result.first = first; + result.second = end; + result.matched = (first != end); + N = -1; + return true; + } + return false; + } + bool compare(const u32regex_token_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) + && (end == that.end) + && (flags == that.flags) + && (N == that.N) + && (what[0].first == that.what[0].first) + && (what[0].second == that.what[0].second); + } + const value_type& get() + { return result; } + bool next() + { + if(N == -1) + return false; + if(N+1 < (int)subs.size()) + { + ++N; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail | regex_constants::match_not_bob; + BidirectionalIterator last_end(what[0].second); + if(u32regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) + { + N =0; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + else if((last_end != end) && (subs[0] == -1)) + { + N =-1; + result.first = last_end; + result.second = end; + result.matched = (last_end != end); + return true; + } + return false; + } +private: + u32regex_token_iterator_implementation& operator=(const u32regex_token_iterator_implementation&); +}; + +template +class u32regex_token_iterator +{ +private: + typedef u32regex_token_iterator_implementation impl; + typedef std::shared_ptr pimpl; +public: + typedef u32regex regex_type; + typedef sub_match value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + u32regex_token_iterator(){} + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + int submatch = 0, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatch, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const std::vector& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + template + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const int (&submatches)[N], match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + u32regex_token_iterator(const u32regex_token_iterator& that) + : pdata(that.pdata) {} + u32regex_token_iterator& operator=(const u32regex_token_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const u32regex_token_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const u32regex_token_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + u32regex_token_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + u32regex_token_iterator operator++(int) + { + u32regex_token_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef u32regex_token_iterator utf8regex_token_iterator; +typedef u32regex_token_iterator utf16regex_token_iterator; +typedef u32regex_token_iterator utf32regex_token_iterator; + +// construction from an integral sub_match state_id: +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(BOOST_REGEX_UCHAR_IS_WCHAR_T) +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); +} +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +// construction from a reference to an array: +template +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +template +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(BOOST_REGEX_UCHAR_IS_WCHAR_T) +template +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); +} +template +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +// construction from a vector of sub_match state_id's: +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); +} +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +#ifdef BOOST_REGEX_MSVC +# pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V5_REGEX_TOKEN_ITERATOR_HPP + + + + diff --git a/include/boost/regex/v5/unicode_iterator.hpp b/include/boost/regex/v5/unicode_iterator.hpp new file mode 100644 index 00000000..07443609 --- /dev/null +++ b/include/boost/regex/v5/unicode_iterator.hpp @@ -0,0 +1,864 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE unicode_iterator.hpp + * VERSION see + * DESCRIPTION: Iterator adapters for converting between different Unicode encodings. + */ + +/**************************************************************************** + +Contents: +~~~~~~~~~ + +1) Read Only, Input Adapters: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template +class u32_to_u8_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8. + +template +class u8_to_u32_iterator; + +Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32. + +template +class u32_to_u16_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16. + +template +class u16_to_u32_iterator; + +Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32. + +2) Single pass output iterator adapters: + +template +class utf8_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-8 code points. + +template +class utf16_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-16 code points. + +****************************************************************************/ + +#ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP +#define BOOST_REGEX_UNICODE_ITERATOR_HPP +#include +#include +#include +#include +#include +#include // CHAR_BIT + +#include + +#ifndef BOOST_REGEX_STANDALONE +#include +#endif + +namespace boost{ + +namespace detail{ + +static const std::uint16_t high_surrogate_base = 0xD7C0u; +static const std::uint16_t low_surrogate_base = 0xDC00u; +static const std::uint32_t ten_bit_mask = 0x3FFu; + +inline bool is_high_surrogate(std::uint16_t v) +{ + return (v & 0xFFFFFC00u) == 0xd800u; +} +inline bool is_low_surrogate(std::uint16_t v) +{ + return (v & 0xFFFFFC00u) == 0xdc00u; +} +template +inline bool is_surrogate(T v) +{ + return (v & 0xFFFFF800u) == 0xd800; +} + +inline unsigned utf8_byte_count(std::uint8_t c) +{ + // if the most significant bit with a zero in it is in position + // 8-N then there are N bytes in this UTF-8 sequence: + std::uint8_t mask = 0x80u; + unsigned result = 0; + while(c & mask) + { + ++result; + mask >>= 1; + } + return (result == 0) ? 1 : ((result > 4) ? 4 : result); +} + +inline unsigned utf8_trailing_byte_count(std::uint8_t c) +{ + return utf8_byte_count(c) - 1; +} + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4100) +#endif +#ifndef BOOST_NO_EXCEPTIONS +BOOST_REGEX_NORETURN +#endif +inline void invalid_utf32_code_point(std::uint32_t val) +{ + std::stringstream ss; + ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence"; + std::out_of_range e(ss.str()); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif +} +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + + +} // namespace detail + +template +class u32_to_u16_iterator +{ + typedef typename std::iterator_traits::value_type base_value_type; + + static_assert(sizeof(base_value_type)*CHAR_BIT == 32, "Incorrectly sized template argument"); + static_assert(sizeof(U16Type)*CHAR_BIT == 16, "Incorrectly sized template argument"); + +public: + typedef std::ptrdiff_t difference_type; + typedef U16Type value_type; + typedef value_type const* pointer; + typedef value_type const reference; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator*()const + { + if(m_current == 2) + extract_current(); + return m_values[m_current]; + } + bool operator==(const u32_to_u16_iterator& that)const + { + if(m_position == that.m_position) + { + // Both m_currents must be equal, or both even + // this is the same as saying their sum must be even: + return (m_current + that.m_current) & 1u ? false : true; + } + return false; + } + bool operator!=(const u32_to_u16_iterator& that)const + { + return !(*this == that); + } + u32_to_u16_iterator& operator++() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 2) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 2; + ++m_position; + } + return *this; + } + u32_to_u16_iterator operator++(int) + { + u32_to_u16_iterator r(*this); + ++(*this); + return r; + } + u32_to_u16_iterator& operator--() + { + if(m_current != 1) + { + // decrementing an iterator always leads to a valid position: + --m_position; + extract_current(); + m_current = m_values[1] ? 1 : 0; + } + else + { + m_current = 0; + } + return *this; + } + u32_to_u16_iterator operator--(int) + { + u32_to_u16_iterator r(*this); + --(*this); + return r; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u16_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } + u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } +private: + + void extract_current()const + { + // begin by checking for a code point out of range: + std::uint32_t v = *m_position; + if(v >= 0x10000u) + { + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(*m_position); + // split into two surrogates: + m_values[0] = static_cast(v >> 10) + detail::high_surrogate_base; + m_values[1] = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; + m_current = 0; + BOOST_REGEX_ASSERT(detail::is_high_surrogate(m_values[0])); + BOOST_REGEX_ASSERT(detail::is_low_surrogate(m_values[1])); + } + else + { + // 16-bit code point: + m_values[0] = static_cast(*m_position); + m_values[1] = 0; + m_current = 0; + // value must not be a surrogate: + if(detail::is_surrogate(m_values[0])) + detail::invalid_utf32_code_point(*m_position); + } + } + BaseIterator m_position; + mutable U16Type m_values[3]; + mutable unsigned m_current; +}; + +template +class u16_to_u32_iterator +{ + // special values for pending iterator reads: + static const U32Type pending_read = 0xffffffffu; + + typedef typename std::iterator_traits::value_type base_value_type; + + static_assert(sizeof(base_value_type)*CHAR_BIT == 16, "Incorrectly sized template argument"); + static_assert(sizeof(U32Type)*CHAR_BIT == 32, "Incorrectly sized template argument"); + +public: + typedef std::ptrdiff_t difference_type; + typedef U32Type value_type; + typedef value_type const* pointer; + typedef value_type const reference; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator*()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool operator==(const u16_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + bool operator!=(const u16_to_u32_iterator& that)const + { + return !(*this == that); + } + u16_to_u32_iterator& operator++() + { + // skip high surrogate first if there is one: + if(detail::is_high_surrogate(*m_position)) ++m_position; + ++m_position; + m_value = pending_read; + return *this; + } + u16_to_u32_iterator operator++(int) + { + u16_to_u32_iterator r(*this); + ++(*this); + return r; + } + u16_to_u32_iterator& operator--() + { + --m_position; + // if we have a low surrogate then go back one more: + if(detail::is_low_surrogate(*m_position)) + --m_position; + m_value = pending_read; + return *this; + } + u16_to_u32_iterator operator--(int) + { + u16_to_u32_iterator r(*this); + --(*this); + return r; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u16_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u16_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } + // + // Range checked version: + // + u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) + { + m_value = pending_read; + // + // The range must not start with a low surrogate, or end in a high surrogate, + // otherwise we run the risk of running outside the underlying input range. + // Likewise b must not be located at a low surrogate. + // + std::uint16_t val; + if(start != end) + { + if((b != start) && (b != end)) + { + val = *b; + if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) + invalid_code_point(val); + } + val = *start; + if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) + invalid_code_point(val); + val = *--end; + if(detail::is_high_surrogate(val)) + invalid_code_point(val); + } + } +private: + static void invalid_code_point(std::uint16_t val) + { + std::stringstream ss; + ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence"; + std::out_of_range e(ss.str()); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + void extract_current()const + { + m_value = static_cast(static_cast< std::uint16_t>(*m_position)); + // if the last value is a high surrogate then adjust m_position and m_value as needed: + if(detail::is_high_surrogate(*m_position)) + { + // precondition; next value must have be a low-surrogate: + BaseIterator next(m_position); + std::uint16_t t = *++next; + if((t & 0xFC00u) != 0xDC00u) + invalid_code_point(t); + m_value = (m_value - detail::high_surrogate_base) << 10; + m_value |= (static_cast(static_cast< std::uint16_t>(t)) & detail::ten_bit_mask); + } + // postcondition; result must not be a surrogate: + if(detail::is_surrogate(m_value)) + invalid_code_point(static_cast< std::uint16_t>(m_value)); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template +class u32_to_u8_iterator +{ + typedef typename std::iterator_traits::value_type base_value_type; + + static_assert(sizeof(base_value_type)*CHAR_BIT == 32, "Incorrectly sized template argument"); + static_assert(sizeof(U8Type)*CHAR_BIT == 8, "Incorrectly sized template argument"); + +public: + typedef std::ptrdiff_t difference_type; + typedef U8Type value_type; + typedef value_type const* pointer; + typedef value_type const reference; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator*()const + { + if(m_current == 4) + extract_current(); + return m_values[m_current]; + } + bool operator==(const u32_to_u8_iterator& that)const + { + if(m_position == that.m_position) + { + // either the m_current's must be equal, or one must be 0 and + // the other 4: which means neither must have bits 1 or 2 set: + return (m_current == that.m_current) + || (((m_current | that.m_current) & 3) == 0); + } + return false; + } + bool operator!=(const u32_to_u8_iterator& that)const + { + return !(*this == that); + } + u32_to_u8_iterator& operator++() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 4) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 4; + ++m_position; + } + return *this; + } + u32_to_u8_iterator operator++(int) + { + u32_to_u8_iterator r(*this); + ++(*this); + return r; + } + u32_to_u8_iterator& operator--() + { + if((m_current & 3) == 0) + { + --m_position; + extract_current(); + m_current = 3; + while(m_current && (m_values[m_current] == 0)) + --m_current; + } + else + --m_current; + return *this; + } + u32_to_u8_iterator operator--(int) + { + u32_to_u8_iterator r(*this); + --(*this); + return r; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u8_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } + u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } +private: + + void extract_current()const + { + std::uint32_t c = *m_position; + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + m_values[0] = static_cast(c); + m_values[1] = static_cast(0u); + m_values[2] = static_cast(0u); + m_values[3] = static_cast(0u); + } + else if(c < 0x800u) + { + m_values[0] = static_cast(0xC0u + (c >> 6)); + m_values[1] = static_cast(0x80u + (c & 0x3Fu)); + m_values[2] = static_cast(0u); + m_values[3] = static_cast(0u); + } + else if(c < 0x10000u) + { + m_values[0] = static_cast(0xE0u + (c >> 12)); + m_values[1] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + m_values[2] = static_cast(0x80u + (c & 0x3Fu)); + m_values[3] = static_cast(0u); + } + else + { + m_values[0] = static_cast(0xF0u + (c >> 18)); + m_values[1] = static_cast(0x80u + ((c >> 12) & 0x3Fu)); + m_values[2] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + m_values[3] = static_cast(0x80u + (c & 0x3Fu)); + } + m_current= 0; + } + BaseIterator m_position; + mutable U8Type m_values[5]; + mutable unsigned m_current; +}; + +template +class u8_to_u32_iterator +{ + // special values for pending iterator reads: + static const U32Type pending_read = 0xffffffffu; + + typedef typename std::iterator_traits::value_type base_value_type; + + static_assert(sizeof(base_value_type)*CHAR_BIT == 8, "Incorrectly sized template argument"); + static_assert(sizeof(U32Type)*CHAR_BIT == 32, "Incorrectly sized template argument"); + +public: + typedef std::ptrdiff_t difference_type; + typedef U32Type value_type; + typedef value_type const* pointer; + typedef value_type const reference; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator*()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool operator==(const u8_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + bool operator!=(const u8_to_u32_iterator& that)const + { + return !(*this == that); + } + u8_to_u32_iterator& operator++() + { + // We must not start with a continuation character: + if((static_cast(*m_position) & 0xC0) == 0x80) + invalid_sequence(); + // skip high surrogate first if there is one: + unsigned c = detail::utf8_byte_count(*m_position); + if(m_value == pending_read) + { + // Since we haven't read in a value, we need to validate the code points: + for(unsigned i = 0; i < c; ++i) + { + ++m_position; + // We must have a continuation byte: + if((i != c - 1) && ((static_cast(*m_position) & 0xC0) != 0x80)) + invalid_sequence(); + } + } + else + { + std::advance(m_position, c); + } + m_value = pending_read; + return *this; + } + u8_to_u32_iterator operator++(int) + { + u8_to_u32_iterator r(*this); + ++(*this); + return r; + } + u8_to_u32_iterator& operator--() + { + // Keep backtracking until we don't have a trailing character: + unsigned count = 0; + while((*--m_position & 0xC0u) == 0x80u) ++count; + // now check that the sequence was valid: + if(count != detail::utf8_trailing_byte_count(*m_position)) + invalid_sequence(); + m_value = pending_read; + return *this; + } + u8_to_u32_iterator operator--(int) + { + u8_to_u32_iterator r(*this); + --(*this); + return r; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u8_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u8_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } + // + // Checked constructor: + // + u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) + { + m_value = pending_read; + // + // We must not start with a continuation character, or end with a + // truncated UTF-8 sequence otherwise we run the risk of going past + // the start/end of the underlying sequence: + // + if(start != end) + { + unsigned char v = *start; + if((v & 0xC0u) == 0x80u) + invalid_sequence(); + if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u)) + invalid_sequence(); + BaseIterator pos = end; + do + { + v = *--pos; + } + while((start != pos) && ((v & 0xC0u) == 0x80u)); + std::ptrdiff_t extra = detail::utf8_byte_count(v); + if(std::distance(pos, end) < extra) + invalid_sequence(); + } + } +private: + static void invalid_sequence() + { + std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character"); +#ifndef BOOST_REGEX_STANDALONE + boost::throw_exception(e); +#else + throw e; +#endif + } + void extract_current()const + { + m_value = static_cast(static_cast< std::uint8_t>(*m_position)); + // we must not have a continuation character: + if((m_value & 0xC0u) == 0x80u) + invalid_sequence(); + // see how many extra bytes we have: + unsigned extra = detail::utf8_trailing_byte_count(*m_position); + // extract the extra bits, 6 from each extra byte: + BaseIterator next(m_position); + for(unsigned c = 0; c < extra; ++c) + { + ++next; + m_value <<= 6; + // We must have a continuation byte: + if((static_cast(*next) & 0xC0) != 0x80) + invalid_sequence(); + m_value += static_cast(*next) & 0x3Fu; + } + // we now need to remove a few of the leftmost bits, but how many depends + // upon how many extra bytes we've extracted: + static const std::uint32_t masks[4] = + { + 0x7Fu, + 0x7FFu, + 0xFFFFu, + 0x1FFFFFu, + }; + m_value &= masks[extra]; + // check the result is in range: + if(m_value > static_cast(0x10FFFFu)) + invalid_sequence(); + // The result must not be a surrogate: + if((m_value >= static_cast(0xD800)) && (m_value <= static_cast(0xDFFF))) + invalid_sequence(); + // We should not have had an invalidly encoded UTF8 sequence: + if((extra > 0) && (m_value <= static_cast(masks[extra - 1]))) + invalid_sequence(); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template +class utf16_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef std::uint32_t* pointer; + typedef std::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf16_output_iterator(const BaseIterator& b) + : m_position(b){} + utf16_output_iterator(const utf16_output_iterator& that) + : m_position(that.m_position){} + utf16_output_iterator& operator=(const utf16_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf16_output_iterator& operator*()const + { + return *this; + } + void operator=(std::uint32_t val)const + { + push(val); + } + utf16_output_iterator& operator++() + { + return *this; + } + utf16_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(std::uint32_t v)const + { + if(v >= 0x10000u) + { + // begin by checking for a code point out of range: + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(v); + // split into two surrogates: + *m_position++ = static_cast(v >> 10) + detail::high_surrogate_base; + *m_position++ = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; + } + else + { + // 16-bit code point: + // value must not be a surrogate: + if(detail::is_surrogate(v)) + detail::invalid_utf32_code_point(v); + *m_position++ = static_cast(v); + } + } + mutable BaseIterator m_position; +}; + +template +class utf8_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef std::uint32_t* pointer; + typedef std::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf8_output_iterator(const BaseIterator& b) + : m_position(b){} + utf8_output_iterator(const utf8_output_iterator& that) + : m_position(that.m_position){} + utf8_output_iterator& operator=(const utf8_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf8_output_iterator& operator*()const + { + return *this; + } + void operator=(std::uint32_t val)const + { + push(val); + } + utf8_output_iterator& operator++() + { + return *this; + } + utf8_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(std::uint32_t c)const + { + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + *m_position++ = static_cast(c); + } + else if(c < 0x800u) + { + *m_position++ = static_cast(0xC0u + (c >> 6)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + else if(c < 0x10000u) + { + *m_position++ = static_cast(0xE0u + (c >> 12)); + *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + else + { + *m_position++ = static_cast(0xF0u + (c >> 18)); + *m_position++ = static_cast(0x80u + ((c >> 12) & 0x3Fu)); + *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + } + mutable BaseIterator m_position; +}; + +} // namespace boost + +#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP + diff --git a/include/boost/regex/v5/w32_regex_traits.hpp b/include/boost/regex/v5/w32_regex_traits.hpp new file mode 100644 index 00000000..7e90e4ba --- /dev/null +++ b/include/boost/regex/v5/w32_regex_traits.hpp @@ -0,0 +1,1176 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE w32_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class w32_regex_traits. + */ + +#ifndef BOOST_W32_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_W32_REGEX_TRAITS_HPP_INCLUDED + +#ifndef BOOST_REGEX_NO_WIN32_LOCALE + +#include +#include +#ifdef BOOST_HAS_THREADS +#include +#endif +#include +#include + +#define VC_EXTRALEAN +#define WIN32_LEAN_AND_MEAN +#include + +#if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE) +#pragma comment(lib, "user32.lib") +#endif + +#ifdef BOOST_REGEX_MSVC +#pragma warning(push) +#pragma warning(disable:4786) +#if BOOST_REGEX_MSVC < 1910 +#pragma warning(disable:4800) +#endif +#endif + +namespace boost{ + +// +// forward declaration is needed by some compilers: +// +template +class w32_regex_traits; + +namespace BOOST_REGEX_DETAIL_NS{ + +// +// start by typedeffing the types we'll need: +// +typedef std::uint32_t lcid_type; // placeholder for LCID. +typedef std::shared_ptr cat_type; // placeholder for dll HANDLE. + +// +// then add wrappers around the actual Win32 API's (ie implementation hiding): +// +lcid_type w32_get_default_locale(); +bool w32_is_lower(char, lcid_type); +#ifndef BOOST_NO_WREGEX +bool w32_is_lower(wchar_t, lcid_type); +#endif +bool w32_is_upper(char, lcid_type); +#ifndef BOOST_NO_WREGEX +bool w32_is_upper(wchar_t, lcid_type); +#endif +cat_type w32_cat_open(const std::string& name); +std::string w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::string& def); +#ifndef BOOST_NO_WREGEX +std::wstring w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::wstring& def); +#endif +std::string w32_transform(lcid_type state_id, const char* p1, const char* p2); +#ifndef BOOST_NO_WREGEX +std::wstring w32_transform(lcid_type state_id, const wchar_t* p1, const wchar_t* p2); +#endif +char w32_tolower(char c, lcid_type); +#ifndef BOOST_NO_WREGEX +wchar_t w32_tolower(wchar_t c, lcid_type); +#endif +char w32_toupper(char c, lcid_type); +#ifndef BOOST_NO_WREGEX +wchar_t w32_toupper(wchar_t c, lcid_type); +#endif +bool w32_is(lcid_type, std::uint32_t mask, char c); +#ifndef BOOST_NO_WREGEX +bool w32_is(lcid_type, std::uint32_t mask, wchar_t c); +#endif +// +// class w32_regex_traits_base: +// acts as a container for locale and the facets we are using. +// +template +struct w32_regex_traits_base +{ + w32_regex_traits_base(lcid_type l) + { imbue(l); } + lcid_type imbue(lcid_type l); + + lcid_type m_locale; +}; + +template +inline lcid_type w32_regex_traits_base::imbue(lcid_type l) +{ + lcid_type result(m_locale); + m_locale = l; + return result; +} + +// +// class w32_regex_traits_char_layer: +// implements methods that require specialisation for narrow characters: +// +template +class w32_regex_traits_char_layer : public w32_regex_traits_base +{ + typedef std::basic_string string_type; + typedef std::map map_type; + typedef typename map_type::const_iterator map_iterator_type; +public: + w32_regex_traits_char_layer(const lcid_type l); + + regex_constants::syntax_type syntax_type(charT c)const + { + map_iterator_type i = m_char_map.find(c); + return ((i == m_char_map.end()) ? 0 : i->second); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + map_iterator_type i = m_char_map.find(c); + if(i == m_char_map.end()) + { + if(::boost::BOOST_REGEX_DETAIL_NS::w32_is_lower(c, this->m_locale)) return regex_constants::escape_type_class; + if(::boost::BOOST_REGEX_DETAIL_NS::w32_is_upper(c, this->m_locale)) return regex_constants::escape_type_not_class; + return 0; + } + return i->second; + } + charT tolower(charT c)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_tolower(c, this->m_locale); + } + bool isctype(std::uint32_t mask, charT c)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, mask, c); + } + +private: + string_type get_default_message(regex_constants::syntax_type); + // TODO: use a hash table when available! + map_type m_char_map; +}; + +template +w32_regex_traits_char_layer::w32_regex_traits_char_layer(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) + : w32_regex_traits_base(l) +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if(cat_name.size()) + { + cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_message(i)); + for(typename string_type::size_type j = 0; j < mss.size(); ++j) + { + this->m_char_map[mss[j]] = i; + } + } + } + else + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + this->m_char_map[static_cast(*ptr)] = i; + ++ptr; + } + } + } +} + +template +typename w32_regex_traits_char_layer::string_type + w32_regex_traits_char_layer::get_default_message(regex_constants::syntax_type i) +{ + const char* ptr = get_default_syntax(i); + string_type result; + while(ptr && *ptr) + { + result.append(1, static_cast(*ptr)); + ++ptr; + } + return result; +} + +// +// specialised version for narrow characters: +// +template <> +class w32_regex_traits_char_layer : public w32_regex_traits_base +{ + typedef std::string string_type; +public: + w32_regex_traits_char_layer(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) + : w32_regex_traits_base(l) + { + init(); + } + + regex_constants::syntax_type syntax_type(char c)const + { + return m_char_map[static_cast(c)]; + } + regex_constants::escape_syntax_type escape_syntax_type(char c) const + { + return m_char_map[static_cast(c)]; + } + char tolower(char c)const + { + return m_lower_map[static_cast(c)]; + } + bool isctype(std::uint32_t mask, char c)const + { + return m_type_map[static_cast(c)] & mask; + } + +private: + regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; + char m_lower_map[1u << CHAR_BIT]; + std::uint16_t m_type_map[1u << CHAR_BIT]; + template + void init(); +}; + +// +// class w32_regex_traits_implementation: +// provides pimpl implementation for w32_regex_traits. +// +template +class w32_regex_traits_implementation : public w32_regex_traits_char_layer +{ +public: + typedef typename w32_regex_traits::char_class_type char_class_type; + static const char_class_type mask_word = 0x0400; // must be C1_DEFINED << 1 + static const char_class_type mask_unicode = 0x0800; // must be C1_DEFINED << 2 + static const char_class_type mask_horizontal = 0x1000; // must be C1_DEFINED << 3 + static const char_class_type mask_vertical = 0x2000; // must be C1_DEFINED << 4 + static const char_class_type mask_base = 0x3ff; // all the masks used by the CT_CTYPE1 group + + typedef std::basic_string string_type; + typedef charT char_type; + w32_regex_traits_implementation(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l); + std::string error_string(regex_constants::error_type n) const + { + if(!m_error_strings.empty()) + { + std::map::const_iterator p = m_error_strings.find(n); + return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second; + } + return get_default_error_string(n); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + char_class_type result = lookup_classname_imp(p1, p2); + if(result == 0) + { + typedef typename string_type::size_type size_type; + string_type temp(p1, p2); + for(size_type i = 0; i < temp.size(); ++i) + temp[i] = this->tolower(temp[i]); + result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size()); + } + return result; + } + string_type lookup_collatename(const charT* p1, const charT* p2) const; + string_type transform_primary(const charT* p1, const charT* p2) const; + string_type transform(const charT* p1, const charT* p2) const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_transform(this->m_locale, p1, p2); + } +private: + std::map m_error_strings; // error messages indexed by numberic ID + std::map m_custom_class_names; // character class names + std::map m_custom_collate_names; // collating element names + unsigned m_collate_type; // the form of the collation string + charT m_collate_delim; // the collation group delimiter + // + // helpers: + // + char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; +}; + +template +typename w32_regex_traits_implementation::string_type + w32_regex_traits_implementation::transform_primary(const charT* p1, const charT* p2) const +{ + string_type result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) + { + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + typedef typename string_type::size_type size_type; + for(size_type i = 0; i < result.size(); ++i) + result[i] = this->tolower(result[i]); + result = this->transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } + if(result.empty()) + result = string_type(1, charT(0)); + return result; +} + +template +typename w32_regex_traits_implementation::string_type + w32_regex_traits_implementation::lookup_collatename(const charT* p1, const charT* p2) const +{ + typedef typename std::map::const_iterator iter_type; + if(m_custom_collate_names.size()) + { + iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); + if(pos != m_custom_collate_names.end()) + return pos->second; + } + std::string name(p1, p2); + name = lookup_default_collate_name(name); + if(name.size()) + return string_type(name.begin(), name.end()); + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +template +w32_regex_traits_implementation::w32_regex_traits_implementation(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) +: w32_regex_traits_char_layer(l) +{ + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if(cat_name.size()) + { + cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + // + // Error messages: + // + for(boost::regex_constants::error_type i = static_cast(0); + i <= boost::regex_constants::error_unknown; + i = static_cast(i + 1)) + { + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, static_cast(*p)); + ++p; + } + string_type s = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, static_cast(s[j])); + } + m_error_strings[i] = result; + } + // + // Custom class names: + // + static const char_class_type masks[14] = + { + 0x0104u, // C1_ALPHA | C1_DIGIT + 0x0100u, // C1_ALPHA + 0x0020u, // C1_CNTRL + 0x0004u, // C1_DIGIT + (~(0x0020u|0x0008u) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE + 0x0002u, // C1_LOWER + (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL + 0x0010u, // C1_PUNCT + 0x0008u, // C1_SPACE + 0x0001u, // C1_UPPER + 0x0080u, // C1_XDIGIT + 0x0040u, // C1_BLANK + w32_regex_traits_implementation::mask_word, + w32_regex_traits_implementation::mask_unicode, + }; + static const string_type null_string; + for(unsigned int j = 0; j <= 13; ++j) + { + string_type s(::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, j+300, null_string)); + if(s.size()) + this->m_custom_class_names[s] = masks[j]; + } + } + // + // get the collation format used by m_pcollate: + // + m_collate_type = BOOST_REGEX_DETAIL_NS::find_sort_syntax(this, &m_collate_delim); +} + +template +typename w32_regex_traits_implementation::char_class_type + w32_regex_traits_implementation::lookup_classname_imp(const charT* p1, const charT* p2) const +{ + static const char_class_type masks[22] = + { + 0, + 0x0104u, // C1_ALPHA | C1_DIGIT + 0x0100u, // C1_ALPHA + 0x0040u, // C1_BLANK + 0x0020u, // C1_CNTRL + 0x0004u, // C1_DIGIT + 0x0004u, // C1_DIGIT + (~(0x0020u|0x0008u|0x0040) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE or C1_BLANK + w32_regex_traits_implementation::mask_horizontal, + 0x0002u, // C1_LOWER + 0x0002u, // C1_LOWER + (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL + 0x0010u, // C1_PUNCT + 0x0008u, // C1_SPACE + 0x0008u, // C1_SPACE + 0x0001u, // C1_UPPER + w32_regex_traits_implementation::mask_unicode, + 0x0001u, // C1_UPPER + w32_regex_traits_implementation::mask_vertical, + 0x0104u | w32_regex_traits_implementation::mask_word, + 0x0104u | w32_regex_traits_implementation::mask_word, + 0x0080u, // C1_XDIGIT + }; + if(m_custom_class_names.size()) + { + typedef typename std::map, char_class_type>::const_iterator map_iter; + map_iter pos = m_custom_class_names.find(string_type(p1, p2)); + if(pos != m_custom_class_names.end()) + return pos->second; + } + std::size_t state_id = 1u + (std::size_t)BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); + if(state_id < sizeof(masks) / sizeof(masks[0])) + return masks[state_id]; + return masks[0]; +} + + +template +std::shared_ptr > create_w32_regex_traits(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l) +{ + // TODO: create a cache for previously constructed objects. + return boost::object_cache< ::boost::BOOST_REGEX_DETAIL_NS::lcid_type, w32_regex_traits_implementation >::get(l, 5); +} + +} // BOOST_REGEX_DETAIL_NS + +template +class w32_regex_traits +{ +public: + typedef charT char_type; + typedef std::size_t size_type; + typedef std::basic_string string_type; + typedef ::boost::BOOST_REGEX_DETAIL_NS::lcid_type locale_type; + typedef std::uint_least32_t char_class_type; + + struct boost_extensions_tag{}; + + w32_regex_traits() + : m_pimpl(BOOST_REGEX_DETAIL_NS::create_w32_regex_traits(::boost::BOOST_REGEX_DETAIL_NS::w32_get_default_locale())) + { } + static size_type length(const char_type* p) + { + return std::char_traits::length(p); + } + regex_constants::syntax_type syntax_type(charT c)const + { + return m_pimpl->syntax_type(c); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + return m_pimpl->escape_syntax_type(c); + } + charT translate(charT c) const + { + return c; + } + charT translate_nocase(charT c) const + { + return this->m_pimpl->tolower(c); + } + charT translate(charT c, bool icase) const + { + return icase ? this->m_pimpl->tolower(c) : c; + } + charT tolower(charT c) const + { + return this->m_pimpl->tolower(c); + } + charT toupper(charT c) const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_toupper(c, this->m_pimpl->m_locale); + } + string_type transform(const charT* p1, const charT* p2) const + { + return ::boost::BOOST_REGEX_DETAIL_NS::w32_transform(this->m_pimpl->m_locale, p1, p2); + } + string_type transform_primary(const charT* p1, const charT* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_classname(p1, p2); + } + string_type lookup_collatename(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_collatename(p1, p2); + } + bool isctype(charT c, char_class_type f) const + { + if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_base) + && (this->m_pimpl->isctype(f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_base, c))) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_unicode) && BOOST_REGEX_DETAIL_NS::is_extended(c)) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_vertical) + && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v'))) + return true; + else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_horizontal) + && this->isctype(c, 0x0008u) && !this->isctype(c, BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation::mask_vertical)) + return true; + return false; + } + std::intmax_t toi(const charT*& p1, const charT* p2, int radix)const + { + return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); + } + int value(charT c, int radix)const + { + int result = (int)::boost::BOOST_REGEX_DETAIL_NS::global_value(c); + return result < radix ? result : -1; + } + locale_type imbue(locale_type l) + { + ::boost::BOOST_REGEX_DETAIL_NS::lcid_type result(getloc()); + m_pimpl = BOOST_REGEX_DETAIL_NS::create_w32_regex_traits(l); + return result; + } + locale_type getloc()const + { + return m_pimpl->m_locale; + } + std::string error_string(regex_constants::error_type n) const + { + return m_pimpl->error_string(n); + } + + // + // extension: + // set the name of the message catalog in use (defaults to "boost_regex"). + // + static std::string catalog_name(const std::string& name); + static std::string get_catalog_name(); + +private: + std::shared_ptr > m_pimpl; + // + // catalog name handler: + // + static std::string& get_catalog_name_inst(); + +#ifdef BOOST_HAS_THREADS + static std::mutex& get_mutex_inst(); +#endif +}; + +template +std::string w32_regex_traits::catalog_name(const std::string& name) +{ +#ifdef BOOST_HAS_THREADS + std::lock_guard lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + get_catalog_name_inst() = name; + return result; +} + +template +std::string& w32_regex_traits::get_catalog_name_inst() +{ + static std::string s_name; + return s_name; +} + +template +std::string w32_regex_traits::get_catalog_name() +{ +#ifdef BOOST_HAS_THREADS + std::lock_guard lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + return result; +} + +#ifdef BOOST_HAS_THREADS +template +std::mutex& w32_regex_traits::get_mutex_inst() +{ + static std::mutex s_mutex; + return s_mutex; +} +#endif + +namespace BOOST_REGEX_DETAIL_NS { + +#ifdef BOOST_NO_ANSI_APIS + inline UINT get_code_page_for_locale_id(lcid_type idx) + { + WCHAR code_page_string[7]; + if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0) + return 0; + + return static_cast(_wtol(code_page_string)); +} +#endif + + template + inline void w32_regex_traits_char_layer::init() + { + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + std::memset(m_char_map, 0, sizeof(m_char_map)); + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if (cat_name.size()) + { + cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); + if (!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if (cat) + { + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i)); + for (string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[static_cast(mss[j])] = i; + } + } + } + else + { + for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while (ptr && *ptr) + { + m_char_map[static_cast(*ptr)] = i; + ++ptr; + } + } + } + // + // finish off by calculating our escape types: + // + unsigned char i = 'A'; + do + { + if (m_char_map[i] == 0) + { + if (::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0002u, (char)i)) + m_char_map[i] = regex_constants::escape_type_class; + else if (::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0001u, (char)i)) + m_char_map[i] = regex_constants::escape_type_not_class; + } + } while (0xFF != i++); + + // + // fill in lower case map: + // + char char_map[1 << CHAR_BIT]; + for (int ii = 0; ii < (1 << CHAR_BIT); ++ii) + char_map[ii] = static_cast(ii); +#ifndef BOOST_NO_ANSI_APIS + int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(r != 0); +#else + UINT code_page = get_code_page_for_locale_id(this->m_locale); + BOOST_REGEX_ASSERT(code_page != 0); + + WCHAR wide_char_map[1 << CHAR_BIT]; + int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(conv_r != 0); + + WCHAR wide_lower_map[1 << CHAR_BIT]; + int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT); + BOOST_REGEX_ASSERT(r != 0); + + conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL); + BOOST_REGEX_ASSERT(conv_r != 0); +#endif + if (r < (1 << CHAR_BIT)) + { + // if we have multibyte characters then not all may have been given + // a lower case mapping: + for (int jj = r; jj < (1 << CHAR_BIT); ++jj) + this->m_lower_map[jj] = static_cast(jj); + } + +#ifndef BOOST_NO_ANSI_APIS + r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map); +#else + r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map); +#endif + BOOST_REGEX_ASSERT(0 != r); + } + + inline lcid_type w32_get_default_locale() + { + return ::GetUserDefaultLCID(); + } + + inline bool w32_is_lower(char c, lcid_type idx) + { +#ifndef BOOST_NO_ANSI_APIS + WORD mask; + if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; +#endif + } + + inline bool w32_is_lower(wchar_t c, lcid_type idx) + { + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; + } + + inline bool w32_is_upper(char c, lcid_type idx) + { +#ifndef BOOST_NO_ANSI_APIS + WORD mask; + if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; +#endif + } + + inline bool w32_is_upper(wchar_t c, lcid_type idx) + { + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; + } + + inline void free_module(void* mod) + { + ::FreeLibrary(static_cast(mod)); + } + + inline cat_type w32_cat_open(const std::string& name) + { +#ifndef BOOST_NO_ANSI_APIS + cat_type result(::LoadLibraryA(name.c_str()), &free_module); + return result; +#else + LPWSTR wide_name = (LPWSTR)_alloca((name.size() + 1) * sizeof(WCHAR)); + if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0) + return cat_type(); + + cat_type result(::LoadLibraryW(wide_name), &free_module); + return result; +#endif + } + + inline std::string w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def) + { +#ifndef BOOST_NO_ANSI_APIS + char buf[256]; + if (0 == ::LoadStringA( + static_cast(cat.get()), + i, + buf, + 256 + )) + { + return def; + } +#else + WCHAR wbuf[256]; + int r = ::LoadStringW( + static_cast(cat.get()), + i, + wbuf, + 256 + ); + if (r == 0) + return def; + + + int buf_size = 1 + ::WideCharToMultiByte(CP_ACP, 0, wbuf, r, NULL, 0, NULL, NULL); + LPSTR buf = (LPSTR)_alloca(buf_size); + if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, buf_size, NULL, NULL) == 0) + return def; // failed conversion. +#endif + return std::string(buf); + } + +#ifndef BOOST_NO_WREGEX + inline std::wstring w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def) + { + wchar_t buf[256]; + if (0 == ::LoadStringW( + static_cast(cat.get()), + i, + buf, + 256 + )) + { + return def; + } + return std::wstring(buf); + } +#endif + inline std::string w32_transform(lcid_type idx, const char* p1, const char* p2) + { +#ifndef BOOST_NO_ANSI_APIS + int bytes = ::LCMapStringA( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::string(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringA( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + &*result.begin(), // destination buffer + bytes // size of destination buffer + ); +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return std::string(p1, p2); + + int src_len = static_cast(p2 - p1); + LPWSTR wide_p1 = (LPWSTR)_alloca((src_len + 1) * 2); + if (::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0) + return std::string(p1, p2); + + int bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + wide_p1, // source string + src_len, // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::string(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + wide_p1, // source string + src_len, // number of characters in source string + (LPWSTR) & *result.begin(), // destination buffer + bytes // size of destination buffer + ); +#endif + if (bytes > static_cast(result.size())) + return std::string(p1, p2); + while (result.size() && result[result.size() - 1] == '\0') + { + result.erase(result.size() - 1); + } + return result; + } + +#ifndef BOOST_NO_WREGEX + inline std::wstring w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2) + { + int bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if (!bytes) + return std::wstring(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + reinterpret_cast(&*result.begin()), // destination buffer *of bytes* + bytes // size of destination buffer + ); + if (bytes > static_cast(result.size())) + return std::wstring(p1, p2); + while (result.size() && result[result.size() - 1] == L'\0') + { + result.erase(result.size() - 1); + } + std::wstring r2; + for (std::string::size_type i = 0; i < result.size(); ++i) + r2.append(1, static_cast(static_cast(result[i]))); + return r2; + } +#endif + inline char w32_tolower(char c, lcid_type idx) + { + char result[2]; +#ifndef BOOST_NO_ANSI_APIS + int b = ::LCMapStringA( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return c; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return c; + + WCHAR wide_result; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &wide_c, // source string + 1, // number of characters in source string + &wide_result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + + if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) + return c; // No single byte lower case equivalent available +#endif + return result[0]; + } + +#ifndef BOOST_NO_WREGEX + inline wchar_t w32_tolower(wchar_t c, lcid_type idx) + { + wchar_t result[2]; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + return result[0]; + } +#endif + inline char w32_toupper(char c, lcid_type idx) + { + char result[2]; +#ifndef BOOST_NO_ANSI_APIS + int b = ::LCMapStringA( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return c; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return c; + + WCHAR wide_result; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &wide_c, // source string + 1, // number of characters in source string + &wide_result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + + if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) + return c; // No single byte upper case equivalent available. +#endif + return result[0]; + } + +#ifndef BOOST_NO_WREGEX + inline wchar_t w32_toupper(wchar_t c, lcid_type idx) + { + wchar_t result[2]; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if (b == 0) + return c; + return result[0]; + } +#endif + inline bool w32_is(lcid_type idx, std::uint32_t m, char c) + { + WORD mask; +#ifndef BOOST_NO_ANSI_APIS + if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; +#endif + if ((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + return false; + } + +#ifndef BOOST_NO_WREGEX + inline bool w32_is(lcid_type idx, std::uint32_t m, wchar_t c) + { + WORD mask; + if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; + if ((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + if ((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) + return true; + return false; + } +#endif + +} // BOOST_REGEX_DETAIL_NS + + +} // boost + +#ifdef BOOST_REGEX_MSVC +#pragma warning(pop) +#endif + +#endif // BOOST_REGEX_NO_WIN32_LOCALE + +#endif diff --git a/include/boost/regex_fwd.hpp b/include/boost/regex_fwd.hpp index 2ee4a249..99371b8c 100644 --- a/include/boost/regex_fwd.hpp +++ b/include/boost/regex_fwd.hpp @@ -24,7 +24,11 @@ #include #endif +#ifdef BOOST_REGEX_CXX03 #include +#else +#include +#endif #endif diff --git a/meta/libraries.json b/meta/libraries.json index a2aadf92..026de86c 100644 --- a/meta/libraries.json +++ b/meta/libraries.json @@ -13,5 +13,6 @@ ], "maintainers": [ "John Maddock " - ] + ], + "cxxstd": "11" } diff --git a/performance/performance.cpp b/performance/performance.cpp index 798aa330..3fb22535 100644 --- a/performance/performance.cpp +++ b/performance/performance.cpp @@ -148,7 +148,7 @@ void test_search(const char* expression, const char* text, bool isperl = false, { double time = exec_timed_test([&]() { return (*i)->find_all(text); }); report_execution_time(time, table, row, heading); - std::cout << "Search with library: " << heading << " found " << last_value_returned << " occurances.\n"; + std::cout << "Search with library: " << heading << " found " << last_value_returned << " occurrences.\n"; } } } diff --git a/src/c_regex_traits.cpp b/src/c_regex_traits.cpp deleted file mode 100644 index dd356561..00000000 --- a/src/c_regex_traits.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE: c_regex_traits.cpp - * VERSION: see - * DESCRIPTION: Implements out of line c_regex_traits members - */ - - -#define BOOST_REGEX_SOURCE - -#include -#include -#include "internals.hpp" - -#if !BOOST_WORKAROUND(BOOST_BORLANDC, < 0x560) - -#include -#include -#include - -#ifdef BOOST_NO_STDC_NAMESPACE -namespace std{ - using ::strxfrm; using ::isspace; - using ::ispunct; using ::isalpha; - using ::isalnum; using ::iscntrl; - using ::isprint; using ::isupper; - using ::islower; using ::isdigit; - using ::isxdigit; using ::strtol; -} -#endif - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_PREFIX -#endif - -namespace boost{ - -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const char* p1, const char* p2) -{ - std::string result(10, ' '); - std::size_t s = result.size(); - std::size_t r; - std::string src(p1, p2); - while(s < (r = std::strxfrm(&*result.begin(), src.c_str(), s))) - { -#if defined(_CPPLIB_VER) - // - // A bug in VC11 and 12 causes the program to hang if we pass a null-string - // to std::strxfrm, but only for certain locales :-( - // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). - // - if(r == INT_MAX) - { - result.erase(); - result.insert(result.begin(), static_cast(0)); - return result; - } -#endif - result.append(r - s + 3, ' '); - s = result.size(); - } - result.erase(r); - return result; -} - -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform_primary(const char* p1, const char* p2) -{ - static char s_delim; - static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast*>(0), &s_delim); - std::string result; - // - // What we do here depends upon the format of the sort key returned by - // sort key returned by this->transform: - // - switch(s_collate_type) - { - case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: - case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: - // the best we can do is translate to lower case, then get a regular sort key: - { - result.assign(p1, p2); - for(std::string::size_type i = 0; i < result.size(); ++i) - result[i] = static_cast((std::tolower)(static_cast(result[i]))); - result = transform(&*result.begin(), &*result.begin() + result.size()); - break; - } - case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: - { - // get a regular sort key, and then truncate it: - result = transform(p1, p2); - result.erase(s_delim); - break; - } - case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: - // get a regular sort key, and then truncate everything after the delim: - result = transform(p1, p2); - if(result.size() && (result[0] == s_delim)) - break; - std::size_t i; - for(i = 0; i < result.size(); ++i) - { - if(result[i] == s_delim) - break; - } - result.erase(i); - break; - } - if(result.empty()) - result = std::string(1, char(0)); - return result; -} - -c_regex_traits::char_class_type BOOST_REGEX_CALL c_regex_traits::lookup_classname(const char* p1, const char* p2) -{ - static const char_class_type masks[] = - { - 0, - char_class_alnum, - char_class_alpha, - char_class_blank, - char_class_cntrl, - char_class_digit, - char_class_digit, - char_class_graph, - char_class_horizontal, - char_class_lower, - char_class_lower, - char_class_print, - char_class_punct, - char_class_space, - char_class_space, - char_class_upper, - char_class_unicode, - char_class_upper, - char_class_vertical, - char_class_alnum | char_class_word, - char_class_alnum | char_class_word, - char_class_xdigit, - }; - - int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); - if(idx < 0) - { - std::string s(p1, p2); - for(std::string::size_type i = 0; i < s.size(); ++i) - s[i] = static_cast((std::tolower)(static_cast(s[i]))); - idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); - } - BOOST_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0])); - return masks[idx+1]; -} - -bool BOOST_REGEX_CALL c_regex_traits::isctype(char c, char_class_type mask) -{ - return - ((mask & char_class_space) && (std::isspace)(static_cast(c))) - || ((mask & char_class_print) && (std::isprint)(static_cast(c))) - || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast(c))) - || ((mask & char_class_upper) && (std::isupper)(static_cast(c))) - || ((mask & char_class_lower) && (std::islower)(static_cast(c))) - || ((mask & char_class_alpha) && (std::isalpha)(static_cast(c))) - || ((mask & char_class_digit) && (std::isdigit)(static_cast(c))) - || ((mask & char_class_punct) && (std::ispunct)(static_cast(c))) - || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast(c))) - || ((mask & char_class_blank) && (std::isspace)(static_cast(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) - || ((mask & char_class_word) && (c == '_')) - || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v'))) - || ((mask & char_class_horizontal) && (std::isspace)(static_cast(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v')); -} - -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::lookup_collatename(const char* p1, const char* p2) -{ - std::string s(p1, p2); - s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s); - if(s.empty() && (p2-p1 == 1)) - s.append(1, *p1); - return s; -} - -int BOOST_REGEX_CALL c_regex_traits::value(char c, int radix) -{ - char b[2] = { c, '\0', }; - char* ep; - int result = std::strtol(b, &ep, radix); - if(ep == b) - return -1; - return result; -} - -} -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_SUFFIX -#endif - -#endif diff --git a/src/cpp_regex_traits.cpp b/src/cpp_regex_traits.cpp deleted file mode 100644 index 05bbc50d..00000000 --- a/src/cpp_regex_traits.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE cpp_regex_traits.cpp - * VERSION see - * DESCRIPTION: Implements cpp_regex_traits (and associated helper classes). - */ - -#define BOOST_REGEX_SOURCE -#include -#ifndef BOOST_NO_STD_LOCALE -#include -#include - -#ifdef BOOST_NO_STDC_NAMESPACE -namespace std{ - using ::memset; -} -#endif - -namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ - -void cpp_regex_traits_char_layer::init() -{ - // we need to start by initialising our syntax map so we know which - // character is used for which purpose: - std::memset(m_char_map, 0, sizeof(m_char_map)); -#ifndef BOOST_NO_STD_MESSAGES -#ifndef __IBMCPP__ - std::messages::catalog cat = static_cast::catalog>(-1); -#else - std::messages::catalog cat = reinterpret_cast::catalog>(-1); -#endif - std::string cat_name(cpp_regex_traits::get_catalog_name()); - if(cat_name.size() && (m_pmessages != 0)) - { - cat = this->m_pmessages->open( - cat_name, - this->m_locale); - if((int)cat < 0) - { - std::string m("Unable to open message catalog: "); - std::runtime_error err(m + cat_name); - boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); - } - } - // - // if we have a valid catalog then load our messages: - // - if((int)cat >= 0) - { -#ifndef BOOST_NO_EXCEPTIONS - try{ -#endif - for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) - { - string_type mss = this->m_pmessages->get(cat, 0, i, get_default_syntax(i)); - for(string_type::size_type j = 0; j < mss.size(); ++j) - { - m_char_map[static_cast(mss[j])] = i; - } - } - this->m_pmessages->close(cat); -#ifndef BOOST_NO_EXCEPTIONS - } - catch(...) - { - this->m_pmessages->close(cat); - throw; - } -#endif - } - else - { -#endif - for(regex_constants::syntax_type j = 1; j < regex_constants::syntax_max; ++j) - { - const char* ptr = get_default_syntax(j); - while(ptr && *ptr) - { - m_char_map[static_cast(*ptr)] = j; - ++ptr; - } - } -#ifndef BOOST_NO_STD_MESSAGES - } -#endif - // - // finish off by calculating our escape types: - // - unsigned char i = 'A'; - do - { - if(m_char_map[i] == 0) - { - if(this->m_pctype->is(std::ctype_base::lower, i)) - m_char_map[i] = regex_constants::escape_type_class; - else if(this->m_pctype->is(std::ctype_base::upper, i)) - m_char_map[i] = regex_constants::escape_type_not_class; - } - }while(0xFF != i++); -} - -} // BOOST_REGEX_DETAIL_NS -} // boost -#endif - diff --git a/src/cregex.cpp b/src/cregex.cpp deleted file mode 100644 index 118c590a..00000000 --- a/src/cregex.cpp +++ /dev/null @@ -1,667 +0,0 @@ -/* - * - * Copyright (c) 1998-2002 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE: cregex.cpp - * VERSION: see - * DESCRIPTION: Implements high level class boost::RexEx - */ - - -#define BOOST_REGEX_SOURCE - -#include -#include -#if !defined(BOOST_NO_STD_STRING) -#include -#include -#include -typedef boost::match_flag_type match_flag_type; -#include - -#ifdef BOOST_MSVC -#pragma warning(disable:4309) -#endif -#ifdef BOOST_INTEL -#pragma warning(disable:981 383) -#endif - -namespace boost{ - -#ifdef BOOST_BORLANDC -#if BOOST_BORLANDC < 0x530 -// -// we need to instantiate the vector classes we use -// since declaring a reference to type doesn't seem to -// do the job... -std::vector inst1; -std::vector inst2; -#endif -#endif - -namespace{ - -template -std::string to_string(iterator i, iterator j) -{ - std::string s; - while(i != j) - { - s.append(1, *i); - ++i; - } - return s; -} - -inline std::string to_string(const char* i, const char* j) -{ - return std::string(i, j); -} - -} -namespace BOOST_REGEX_DETAIL_NS{ - -#ifdef BOOST_MSVC -# pragma warning(push) -#pragma warning(disable:26812) -#endif -class RegExData -{ -public: - enum type - { - type_pc, - type_pf, - type_copy - }; - regex e; - cmatch m; -#ifndef BOOST_REGEX_NO_FILEITER - match_results fm; -#endif - type t; - const char* pbase; -#ifndef BOOST_REGEX_NO_FILEITER - mapfile::iterator fbase; -#endif - std::map > strings; - std::map > positions; - void update(); - void clean(); - RegExData() : e(), m(), -#ifndef BOOST_REGEX_NO_FILEITER - fm(), -#endif - t(type_copy), pbase(0), -#ifndef BOOST_REGEX_NO_FILEITER - fbase(), -#endif - strings(), positions() {} -}; -#ifdef BOOST_MSVC -# pragma warning(pop) -#endif - -void RegExData::update() -{ - strings.erase(strings.begin(), strings.end()); - positions.erase(positions.begin(), positions.end()); - if(t == type_pc) - { - for(unsigned int i = 0; i < m.size(); ++i) - { - if(m[i].matched) strings[i] = std::string(m[i].first, m[i].second); - positions[i] = m[i].matched ? m[i].first - pbase : -1; - } - } -#ifndef BOOST_REGEX_NO_FILEITER - else - { - for(unsigned int i = 0; i < fm.size(); ++i) - { - if(fm[i].matched) strings[i] = to_string(fm[i].first, fm[i].second); - positions[i] = fm[i].matched ? fm[i].first - fbase : -1; - } - } -#endif - t = type_copy; -} - -void RegExData::clean() -{ -#ifndef BOOST_REGEX_NO_FILEITER - fbase = mapfile::iterator(); - fm = match_results(); -#endif -} - -} // namespace - -RegEx::RegEx() -{ - pdata = new BOOST_REGEX_DETAIL_NS::RegExData(); -} - -RegEx::RegEx(const RegEx& o) -{ - pdata = new BOOST_REGEX_DETAIL_NS::RegExData(*(o.pdata)); -} - -RegEx::~RegEx() -{ - delete pdata; -} - -RegEx::RegEx(const char* c, bool icase) -{ - pdata = new BOOST_REGEX_DETAIL_NS::RegExData(); - SetExpression(c, icase); -} - -RegEx::RegEx(const std::string& s, bool icase) -{ - pdata = new BOOST_REGEX_DETAIL_NS::RegExData(); - SetExpression(s.c_str(), icase); -} - -RegEx& RegEx::operator=(const RegEx& o) -{ - *pdata = *(o.pdata); - return *this; -} - -RegEx& RegEx::operator=(const char* p) -{ - SetExpression(p, false); - return *this; -} - -unsigned int RegEx::SetExpression(const char* p, bool icase) -{ - boost::uint_fast32_t f = icase ? regex::normal | regex::icase : regex::normal; - return pdata->e.set_expression(p, f); -} - -unsigned int RegEx::error_code()const -{ - return pdata->e.error_code(); -} - - -std::string RegEx::Expression()const -{ - return pdata->e.expression(); -} - -// -// now matching operators: -// -bool RegEx::Match(const char* p, match_flag_type flags) -{ - pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc; - pdata->pbase = p; - const char* end = p; - while(*end)++end; - - if(regex_match(p, end, pdata->m, pdata->e, flags)) - { - pdata->update(); - return true; - } - return false; -} - -bool RegEx::Search(const char* p, match_flag_type flags) -{ - pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc; - pdata->pbase = p; - const char* end = p; - while(*end)++end; - - if(regex_search(p, end, pdata->m, pdata->e, flags)) - { - pdata->update(); - return true; - } - return false; -} -namespace BOOST_REGEX_DETAIL_NS{ -struct pred1 -{ - GrepCallback cb; - RegEx* pe; - pred1(GrepCallback c, RegEx* i) : cb(c), pe(i) {} - bool operator()(const cmatch& m) - { - pe->pdata->m = m; - return cb(*pe); - } -}; -} -unsigned int RegEx::Grep(GrepCallback cb, const char* p, match_flag_type flags) -{ - pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc; - pdata->pbase = p; - const char* end = p; - while(*end)++end; - - unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred1(cb, this), p, end, pdata->e, flags); - if(result) - pdata->update(); - return result; -} -namespace BOOST_REGEX_DETAIL_NS{ -struct pred2 -{ - std::vector& v; - RegEx* pe; - pred2(std::vector& o, RegEx* e) : v(o), pe(e) {} - bool operator()(const cmatch& m) - { - pe->pdata->m = m; - v.push_back(std::string(m[0].first, m[0].second)); - return true; - } -private: - pred2& operator=(const pred2&); -}; -} - -unsigned int RegEx::Grep(std::vector& v, const char* p, match_flag_type flags) -{ - pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc; - pdata->pbase = p; - const char* end = p; - while(*end)++end; - - unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred2(v, this), p, end, pdata->e, flags); - if(result) - pdata->update(); - return result; -} -namespace BOOST_REGEX_DETAIL_NS{ -struct pred3 -{ - std::vector& v; - const char* base; - RegEx* pe; - pred3(std::vector& o, const char* pb, RegEx* p) : v(o), base(pb), pe(p) {} - bool operator()(const cmatch& m) - { - pe->pdata->m = m; - v.push_back(static_cast(m[0].first - base)); - return true; - } -private: - pred3& operator=(const pred3&); -}; -} -unsigned int RegEx::Grep(std::vector& v, const char* p, match_flag_type flags) -{ - pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc; - pdata->pbase = p; - const char* end = p; - while(*end)++end; - - unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred3(v, p, this), p, end, pdata->e, flags); - if(result) - pdata->update(); - return result; -} -#ifndef BOOST_REGEX_NO_FILEITER -namespace BOOST_REGEX_DETAIL_NS{ -struct pred4 -{ - GrepFileCallback cb; - RegEx* pe; - const char* file; - bool ok; - pred4(GrepFileCallback c, RegEx* i, const char* f) : cb(c), pe(i), file(f), ok(true) {} - bool operator()(const match_results& m) - { - pe->pdata->t = RegExData::type_pf; - pe->pdata->fm = m; - pe->pdata->update(); - ok = cb(file, *pe); - return ok; - } -}; -} -namespace{ -void BuildFileList(std::list* pl, const char* files, bool recurse) -{ - file_iterator start(files); - file_iterator end; - if(recurse) - { - // go through sub directories: - char buf[MAX_PATH]; - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(buf, MAX_PATH, start.root())); - if(*buf == 0) - { - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(buf, MAX_PATH, ".")); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, directory_iterator::separator())); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, "*")); - } - else - { - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, directory_iterator::separator())); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, "*")); - } - directory_iterator dstart(buf); - directory_iterator dend; - - // now get the file mask bit of "files": - const char* ptr = files; - while(*ptr) ++ptr; - while((ptr != files) && (*ptr != *directory_iterator::separator()) && (*ptr != '/'))--ptr; - if(ptr != files) ++ptr; - - while(dstart != dend) - { - // Verify that sprintf will not overflow: - if(std::strlen(dstart.path()) + std::strlen(directory_iterator::separator()) + std::strlen(ptr) >= MAX_PATH) - { - // Oops overflow, skip this item: - ++dstart; - continue; - } -#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE) - int r = (::sprintf_s)(buf, sizeof(buf), "%s%s%s", dstart.path(), directory_iterator::separator(), ptr); -#else - int r = (std::sprintf)(buf, "%s%s%s", dstart.path(), directory_iterator::separator(), ptr); -#endif - if(r < 0) - { - // sprintf failed, skip this item: - ++dstart; - continue; - } - BuildFileList(pl, buf, recurse); - ++dstart; - } - } - while(start != end) - { - pl->push_back(*start); - ++start; - } -} -} - -unsigned int RegEx::GrepFiles(GrepFileCallback cb, const char* files, bool recurse, match_flag_type flags) -{ - unsigned int result = 0; - std::list file_list; - BuildFileList(&file_list, files, recurse); - std::list::iterator start, end; - start = file_list.begin(); - end = file_list.end(); - - while(start != end) - { - mapfile map((*start).c_str()); - pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pf; - pdata->fbase = map.begin(); - BOOST_REGEX_DETAIL_NS::pred4 pred(cb, this, (*start).c_str()); - int r = regex_grep(pred, map.begin(), map.end(), pdata->e, flags); - result += r; - ++start; - pdata->clean(); - if(pred.ok == false) - return result; - } - - return result; -} - - -unsigned int RegEx::FindFiles(FindFilesCallback cb, const char* files, bool recurse, match_flag_type flags) -{ - unsigned int result = 0; - std::list file_list; - BuildFileList(&file_list, files, recurse); - std::list::iterator start, end; - start = file_list.begin(); - end = file_list.end(); - - while(start != end) - { - mapfile map((*start).c_str()); - pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pf; - pdata->fbase = map.begin(); - - if(regex_search(map.begin(), map.end(), pdata->fm, pdata->e, flags)) - { - ++result; - if(false == cb((*start).c_str())) - return result; - } - //pdata->update(); - ++start; - //pdata->clean(); - } - - return result; -} -#endif - -#ifdef BOOST_REGEX_V3 -#define regex_replace regex_merge -#endif - -std::string RegEx::Merge(const std::string& in, const std::string& fmt, - bool copy, match_flag_type flags) -{ - std::string result; - BOOST_REGEX_DETAIL_NS::string_out_iterator i(result); - if(!copy) flags |= format_no_copy; - regex_replace(i, in.begin(), in.end(), pdata->e, fmt.c_str(), flags); - return result; -} - -std::string RegEx::Merge(const char* in, const char* fmt, - bool copy, match_flag_type flags) -{ - std::string result; - if(!copy) flags |= format_no_copy; - BOOST_REGEX_DETAIL_NS::string_out_iterator i(result); - regex_replace(i, in, in + std::strlen(in), pdata->e, fmt, flags); - return result; -} - -std::size_t RegEx::Split(std::vector& v, - std::string& s, - match_flag_type flags, - unsigned max_count) -{ - return regex_split(std::back_inserter(v), s, pdata->e, flags, max_count); -} - - - -// -// now operators for returning what matched in more detail: -// -std::size_t RegEx::Position(int i)const -{ - switch(pdata->t) - { - case BOOST_REGEX_DETAIL_NS::RegExData::type_pc: - return pdata->m[i].matched ? pdata->m[i].first - pdata->pbase : RegEx::npos; - case BOOST_REGEX_DETAIL_NS::RegExData::type_pf: -#ifndef BOOST_REGEX_NO_FILEITER - return pdata->fm[i].matched ? pdata->fm[i].first - pdata->fbase : RegEx::npos; -#endif - case BOOST_REGEX_DETAIL_NS::RegExData::type_copy: - { - std::map >::iterator pos = pdata->positions.find(i); - if(pos == pdata->positions.end()) - return RegEx::npos; - return (*pos).second; - } - } - return RegEx::npos; -} - -std::size_t RegEx::Marks()const -{ - return pdata->e.mark_count(); -} - - -std::size_t RegEx::Length(int i)const -{ - switch(pdata->t) - { - case BOOST_REGEX_DETAIL_NS::RegExData::type_pc: - return pdata->m[i].matched ? pdata->m[i].second - pdata->m[i].first : RegEx::npos; - case BOOST_REGEX_DETAIL_NS::RegExData::type_pf: -#ifndef BOOST_REGEX_NO_FILEITER - return pdata->fm[i].matched ? pdata->fm[i].second - pdata->fm[i].first : RegEx::npos; -#endif - case BOOST_REGEX_DETAIL_NS::RegExData::type_copy: - { - std::map >::iterator pos = pdata->strings.find(i); - if(pos == pdata->strings.end()) - return RegEx::npos; - return (*pos).second.size(); - } - } - return RegEx::npos; -} - -bool RegEx::Matched(int i)const -{ - switch(pdata->t) - { - case BOOST_REGEX_DETAIL_NS::RegExData::type_pc: - return pdata->m[i].matched; - case BOOST_REGEX_DETAIL_NS::RegExData::type_pf: -#ifndef BOOST_REGEX_NO_FILEITER - return pdata->fm[i].matched; -#endif - case BOOST_REGEX_DETAIL_NS::RegExData::type_copy: - { - std::map >::iterator pos = pdata->strings.find(i); - if(pos == pdata->strings.end()) - return false; - return true; - } - } - return false; -} - - -std::string RegEx::What(int i)const -{ - std::string result; - switch(pdata->t) - { - case BOOST_REGEX_DETAIL_NS::RegExData::type_pc: - if(pdata->m[i].matched) - result.assign(pdata->m[i].first, pdata->m[i].second); - break; - case BOOST_REGEX_DETAIL_NS::RegExData::type_pf: - if(pdata->m[i].matched) - result.assign(to_string(pdata->m[i].first, pdata->m[i].second)); - break; - case BOOST_REGEX_DETAIL_NS::RegExData::type_copy: - { - std::map >::iterator pos = pdata->strings.find(i); - if(pos != pdata->strings.end()) - result = (*pos).second; - break; - } - } - return result; -} - -const std::size_t RegEx::npos = ~static_cast(0); - -} // namespace boost - -#if defined(BOOST_BORLANDC) && (BOOST_BORLANDC >= 0x550) && (BOOST_BORLANDC <= 0x551) && !defined(_RWSTD_COMPILE_INSTANTIATE) -// -// this is an ugly hack to work around an ugly problem: -// by default this file will produce unresolved externals during -// linking unless _RWSTD_COMPILE_INSTANTIATE is defined (Borland bug). -// However if _RWSTD_COMPILE_INSTANTIATE is defined then we get separate -// copies of basic_string's static data in the RTL and this DLL, this messes -// with basic_string's memory management and results in run-time crashes, -// Oh sweet joy of Catch 22.... -// -namespace std{ -template<> template<> -basic_string& BOOST_REGEX_DECL -basic_string::replace(char* f1, char* f2, const char* i1, const char* i2) -{ - unsigned insert_pos = f1 - begin(); - unsigned remove_len = f2 - f1; - unsigned insert_len = i2 - i1; - unsigned org_size = size(); - if(insert_len > remove_len) - { - append(insert_len-remove_len, ' '); - std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end()); - std::copy(i1, i2, begin() + insert_pos); - } - else - { - std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len); - std::copy(i1, i2, begin() + insert_pos); - erase(size() + insert_len - remove_len); - } - return *this; -} -template<> template<> -basic_string& BOOST_REGEX_DECL -basic_string::replace(wchar_t* f1, wchar_t* f2, const wchar_t* i1, const wchar_t* i2) -{ - unsigned insert_pos = f1 - begin(); - unsigned remove_len = f2 - f1; - unsigned insert_len = i2 - i1; - unsigned org_size = size(); - if(insert_len > remove_len) - { - append(insert_len-remove_len, ' '); - std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end()); - std::copy(i1, i2, begin() + insert_pos); - } - else - { - std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len); - std::copy(i1, i2, begin() + insert_pos); - erase(size() + insert_len - remove_len); - } - return *this; -} -} // namespace std -#endif - -#endif - - - - - - - - - - - - - - - - diff --git a/src/fileiter.cpp b/src/fileiter.cpp deleted file mode 100644 index 0c027ec1..00000000 --- a/src/fileiter.cpp +++ /dev/null @@ -1,933 +0,0 @@ -/* - * - * Copyright (c) 1998-2002 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE: fileiter.cpp - * VERSION: see - * DESCRIPTION: Implements file io primitives + directory searching for class boost::RegEx. - */ - - -#define BOOST_REGEX_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#if defined(BOOST_NO_STDC_NAMESPACE) -namespace std{ - using ::sprintf; - using ::fseek; - using ::fread; - using ::ftell; - using ::fopen; - using ::fclose; - using ::FILE; - using ::strcpy; - using ::strcpy; - using ::strcat; - using ::strcmp; - using ::strlen; -} -#endif - - -#ifndef BOOST_REGEX_NO_FILEITER - -#if defined(__CYGWIN__) || defined(__CYGWIN32__) -#include -#endif - -#ifdef BOOST_MSVC -# pragma warning(disable: 4800) -#endif - -namespace boost{ - namespace BOOST_REGEX_DETAIL_NS{ -// start with the operating system specific stuff: - -#if (defined(BOOST_BORLANDC) || defined(BOOST_REGEX_FI_WIN32_DIR) || defined(BOOST_MSVC)) && !defined(BOOST_RE_NO_WIN32) - -// platform is DOS or Windows -// directories are separated with '\\' -// and names are insensitive of case - -BOOST_REGEX_DECL const char* _fi_sep = "\\"; -const char* _fi_sep_alt = "/"; -#define BOOST_REGEX_FI_TRANSLATE(c) std::tolower(c) - -#else - -// platform is not DOS or Windows -// directories are separated with '/' -// and names are sensitive of case - -BOOST_REGEX_DECL const char* _fi_sep = "/"; -const char* _fi_sep_alt = _fi_sep; -#define BOOST_REGEX_FI_TRANSLATE(c) c - -#endif - -#ifdef BOOST_REGEX_FI_WIN32_MAP - -void mapfile::open(const char* file) -{ -#if defined(BOOST_NO_ANSI_APIS) - int filename_size = strlen(file); - LPWSTR wide_file = (LPWSTR)_alloca( (filename_size + 1) * sizeof(WCHAR) ); - if(::MultiByteToWideChar(CP_ACP, 0, file, filename_size, wide_file, filename_size + 1) == 0) - hfile = INVALID_HANDLE_VALUE; - else - hfile = CreateFileW(wide_file, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); -#elif defined(__CYGWIN__)||defined(__CYGWIN32__) - char win32file[ MAX_PATH ]; - cygwin_conv_to_win32_path( file, win32file ); - hfile = CreateFileA(win32file, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); -#else - hfile = CreateFileA(file, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); -#endif - if(hfile != INVALID_HANDLE_VALUE) - { - hmap = CreateFileMapping(hfile, 0, PAGE_READONLY, 0, 0, 0); - if((hmap == INVALID_HANDLE_VALUE) || (hmap == NULL)) - { - CloseHandle(hfile); - hmap = 0; - hfile = 0; - std::runtime_error err("Unable to create file mapping."); - boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); - } - else - { - _first = static_cast(MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0)); - if (_first == 0) - { - CloseHandle(hmap); - CloseHandle(hfile); - hmap = 0; - hfile = 0; - std::runtime_error err("Unable to create file mapping."); - boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); - } - else - _last = _first + GetFileSize(hfile, 0); - } - } - else - { - hfile = 0; -#ifndef BOOST_NO_EXCEPTIONS - throw std::runtime_error("Unable to open file."); -#else - BOOST_REGEX_NOEH_ASSERT(hfile != INVALID_HANDLE_VALUE); -#endif - } -} - -void mapfile::close() -{ - if(hfile != INVALID_HANDLE_VALUE) - { - UnmapViewOfFile((void*)_first); - CloseHandle(hmap); - CloseHandle(hfile); - hmap = hfile = 0; - _first = _last = 0; - } -} - -#elif !defined(BOOST_RE_NO_STL) - -mapfile_iterator& mapfile_iterator::operator = (const mapfile_iterator& i) -{ - if(file && node) - file->unlock(node); - file = i.file; - node = i.node; - offset = i.offset; - if(file) - file->lock(node); - return *this; -} - -mapfile_iterator& mapfile_iterator::operator++ () -{ - if((++offset == mapfile::buf_size) && file) - { - ++node; - offset = 0; - file->lock(node); - file->unlock(node-1); - } - return *this; -} - -mapfile_iterator mapfile_iterator::operator++ (int) -{ - mapfile_iterator temp(*this); - if((++offset == mapfile::buf_size) && file) - { - ++node; - offset = 0; - file->lock(node); - file->unlock(node-1); - } - return temp; -} - -mapfile_iterator& mapfile_iterator::operator-- () -{ - if((offset == 0) && file) - { - --node; - offset = mapfile::buf_size - 1; - file->lock(node); - file->unlock(node + 1); - } - else - --offset; - return *this; -} - -mapfile_iterator mapfile_iterator::operator-- (int) -{ - mapfile_iterator temp(*this); - if((offset == 0) && file) - { - --node; - offset = mapfile::buf_size - 1; - file->lock(node); - file->unlock(node + 1); - } - else - --offset; - return temp; -} - -mapfile_iterator operator + (const mapfile_iterator& i, long off) -{ - mapfile_iterator temp(i); - temp += off; - return temp; -} - -mapfile_iterator operator - (const mapfile_iterator& i, long off) -{ - mapfile_iterator temp(i); - temp -= off; - return temp; -} - -mapfile::iterator mapfile::begin()const -{ - return mapfile_iterator(this, 0); -} - -mapfile::iterator mapfile::end()const -{ - return mapfile_iterator(this, _size); -} - -void mapfile::lock(pointer* node)const -{ - BOOST_ASSERT(node >= _first); - BOOST_ASSERT(node <= _last); - if(node < _last) - { - if(*node == 0) - { - if(condemed.empty()) - { - *node = new char[sizeof(int) + buf_size]; - *(reinterpret_cast(*node)) = 1; - } - else - { - pointer* p = condemed.front(); - condemed.pop_front(); - *node = *p; - *p = 0; - *(reinterpret_cast(*node)) = 1; - } - - std::size_t read_size = 0; - int read_pos = std::fseek(hfile, (node - _first) * buf_size, SEEK_SET); - - if(0 == read_pos && node == _last - 1) - read_size = std::fread(*node + sizeof(int), _size % buf_size, 1, hfile); - else - read_size = std::fread(*node + sizeof(int), buf_size, 1, hfile); - if((read_size == 0) || (std::ferror(hfile))) - { -#ifndef BOOST_NO_EXCEPTIONS - unlock(node); - throw std::runtime_error("Unable to read file."); -#else - BOOST_REGEX_NOEH_ASSERT((0 == std::ferror(hfile)) && (read_size != 0)); -#endif - } - } - else - { - if(*reinterpret_cast(*node) == 0) - { - *reinterpret_cast(*node) = 1; - condemed.remove(node); - } - else - ++(*reinterpret_cast(*node)); - } - } -} - -void mapfile::unlock(pointer* node)const -{ - BOOST_ASSERT(node >= _first); - BOOST_ASSERT(node <= _last); - if(node < _last) - { - if(--(*reinterpret_cast(*node)) == 0) - { - condemed.push_back(node); - } - } -} - -long int get_file_length(std::FILE* hfile) -{ - long int result; - std::fseek(hfile, 0, SEEK_END); - result = std::ftell(hfile); - std::fseek(hfile, 0, SEEK_SET); - return result; -} - - -void mapfile::open(const char* file) -{ - hfile = std::fopen(file, "rb"); -#ifndef BOOST_NO_EXCEPTIONS - try{ -#endif - if(hfile != 0) - { - _size = get_file_length(hfile); - long cnodes = (_size + buf_size - 1) / buf_size; - - // check that number of nodes is not too high: - if(cnodes > (long)((INT_MAX) / sizeof(pointer*))) - { - std::fclose(hfile); - hfile = 0; - _size = 0; - return; - } - - _first = new pointer[(int)cnodes]; - _last = _first + cnodes; - std::memset(_first, 0, cnodes*sizeof(pointer)); - } - else - { - std::runtime_error err("Unable to open file."); - } -#ifndef BOOST_NO_EXCEPTIONS - }catch(...) - { close(); throw; } -#endif -} - -void mapfile::close() -{ - if(hfile != 0) - { - pointer* p = _first; - while(p != _last) - { - if(*p) - delete[] *p; - ++p; - } - delete[] _first; - _size = 0; - _first = _last = 0; - std::fclose(hfile); - hfile = 0; - condemed.erase(condemed.begin(), condemed.end()); - } -} - - -#endif - -inline _fi_find_handle find_first_file(const char* wild, _fi_find_data& data) -{ -#ifdef BOOST_NO_ANSI_APIS - std::size_t wild_size = std::strlen(wild); - LPWSTR wide_wild = (LPWSTR)_alloca( (wild_size + 1) * sizeof(WCHAR) ); - if (::MultiByteToWideChar(CP_ACP, 0, wild, wild_size, wide_wild, wild_size + 1) == 0) - return _fi_invalid_handle; - - return FindFirstFileW(wide_wild, &data); -#else - return FindFirstFileA(wild, &data); -#endif -} - -inline bool find_next_file(_fi_find_handle hf, _fi_find_data& data) -{ -#ifdef BOOST_NO_ANSI_APIS - return FindNextFileW(hf, &data); -#else - return FindNextFileA(hf, &data); -#endif -} - -inline void copy_find_file_result_with_overflow_check(const _fi_find_data& data, char* path, size_t max_size) -{ -#ifdef BOOST_NO_ANSI_APIS - if (::WideCharToMultiByte(CP_ACP, 0, data.cFileName, -1, path, max_size, NULL, NULL) == 0) - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(1); -#else - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(path, max_size, data.cFileName)); -#endif -} - -inline bool is_not_current_or_parent_path_string(const _fi_find_data& data) -{ -#ifdef BOOST_NO_ANSI_APIS - return (std::wcscmp(data.cFileName, L".") && std::wcscmp(data.cFileName, L"..")); -#else - return (std::strcmp(data.cFileName, ".") && std::strcmp(data.cFileName, "..")); -#endif -} - - -file_iterator::file_iterator() -{ - _root = _path = 0; - ref = 0; -#ifndef BOOST_NO_EXCEPTIONS - try{ -#endif - _root = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_root) - _path = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_path) - ptr = _path; - *_path = 0; - *_root = 0; - ref = new file_iterator_ref(); - BOOST_REGEX_NOEH_ASSERT(ref) - ref->hf = _fi_invalid_handle; - ref->count = 1; -#ifndef BOOST_NO_EXCEPTIONS - } - catch(...) - { - delete[] _root; - delete[] _path; - delete ref; - throw; - } -#endif -} - -file_iterator::file_iterator(const char* wild) -{ - _root = _path = 0; - ref = 0; -#ifndef BOOST_NO_EXCEPTIONS - try{ -#endif - _root = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_root) - _path = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_path) - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_root, MAX_PATH, wild)); - ptr = _root; - while(*ptr)++ptr; - while((ptr > _root) && (*ptr != *_fi_sep) && (*ptr != *_fi_sep_alt))--ptr; - if((ptr == _root) && ( (*ptr== *_fi_sep) || (*ptr==*_fi_sep_alt) ) ) - { - _root[1]='\0'; - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, _root)); - } - else - { - *ptr = 0; - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, _root)); - if(*_path == 0) - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, ".")); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(_path, MAX_PATH, _fi_sep)); - } - ptr = _path + std::strlen(_path); - - ref = new file_iterator_ref(); - BOOST_REGEX_NOEH_ASSERT(ref) - ref->hf = find_first_file(wild, ref->_data); - ref->count = 1; - - if(ref->hf == _fi_invalid_handle) - { - *_path = 0; - ptr = _path; - } - else - { - copy_find_file_result_with_overflow_check(ref->_data, ptr, (MAX_PATH - (ptr - _path))); - if(ref->_data.dwFileAttributes & _fi_dir) - next(); - } -#ifndef BOOST_NO_EXCEPTIONS - } - catch(...) - { - delete[] _root; - delete[] _path; - delete ref; - throw; - } -#endif -} - -file_iterator::file_iterator(const file_iterator& other) -{ - _root = _path = 0; - ref = 0; -#ifndef BOOST_NO_EXCEPTIONS - try{ -#endif - _root = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_root) - _path = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_path) - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_root, MAX_PATH, other._root)); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, other._path)); - ptr = _path + (other.ptr - other._path); - ref = other.ref; -#ifndef BOOST_NO_EXCEPTIONS - } - catch(...) - { - delete[] _root; - delete[] _path; - throw; - } -#endif - ++(ref->count); -} - -file_iterator& file_iterator::operator=(const file_iterator& other) -{ - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_root, MAX_PATH, other._root)); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, other._path)); - ptr = _path + (other.ptr - other._path); - if(--(ref->count) == 0) - { - if(ref->hf != _fi_invalid_handle) - FindClose(ref->hf); - delete ref; - } - ref = other.ref; - ++(ref->count); - return *this; -} - - -file_iterator::~file_iterator() -{ - delete[] _root; - delete[] _path; - if(--(ref->count) == 0) - { - if(ref->hf != _fi_invalid_handle) - FindClose(ref->hf); - delete ref; - } -} - -file_iterator file_iterator::operator++(int) -{ - file_iterator temp(*this); - next(); - return temp; -} - - -void file_iterator::next() -{ - if(ref->hf != _fi_invalid_handle) - { - bool cont = true; - while(cont) - { - cont = find_next_file(ref->hf, ref->_data); - if(cont && ((ref->_data.dwFileAttributes & _fi_dir) == 0)) - break; - } - if(!cont) - { - // end of sequence - FindClose(ref->hf); - ref->hf = _fi_invalid_handle; - *_path = 0; - ptr = _path; - } - else - copy_find_file_result_with_overflow_check(ref->_data, ptr, MAX_PATH - (ptr - _path)); - } -} - - - -directory_iterator::directory_iterator() -{ - _root = _path = 0; - ref = 0; -#ifndef BOOST_NO_EXCEPTIONS - try{ -#endif - _root = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_root) - _path = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_path) - ptr = _path; - *_path = 0; - *_root = 0; - ref = new file_iterator_ref(); - BOOST_REGEX_NOEH_ASSERT(ref) - ref->hf = _fi_invalid_handle; - ref->count = 1; -#ifndef BOOST_NO_EXCEPTIONS - } - catch(...) - { - delete[] _root; - delete[] _path; - delete ref; - throw; - } -#endif -} - -directory_iterator::directory_iterator(const char* wild) -{ - _root = _path = 0; - ref = 0; -#ifndef BOOST_NO_EXCEPTIONS - try{ -#endif - _root = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_root) - _path = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_path) - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_root, MAX_PATH, wild)); - ptr = _root; - while(*ptr)++ptr; - while((ptr > _root) && (*ptr != *_fi_sep) && (*ptr != *_fi_sep_alt))--ptr; - - if((ptr == _root) && ( (*ptr== *_fi_sep) || (*ptr==*_fi_sep_alt) ) ) - { - _root[1]='\0'; - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, _root)); - } - else - { - *ptr = 0; - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, _root)); - if(*_path == 0) - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, ".")); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(_path, MAX_PATH, _fi_sep)); - } - ptr = _path + std::strlen(_path); - - ref = new file_iterator_ref(); - BOOST_REGEX_NOEH_ASSERT(ref) - ref->count = 1; - ref->hf = find_first_file(wild, ref->_data); - if(ref->hf == _fi_invalid_handle) - { - *_path = 0; - ptr = _path; - } - else - { - copy_find_file_result_with_overflow_check(ref->_data, ptr, MAX_PATH - (ptr - _path)); - if(((ref->_data.dwFileAttributes & _fi_dir) == 0) || (std::strcmp(ptr, ".") == 0) || (std::strcmp(ptr, "..") == 0)) - next(); - } -#ifndef BOOST_NO_EXCEPTIONS - } - catch(...) - { - delete[] _root; - delete[] _path; - delete ref; - throw; - } -#endif -} - -directory_iterator::~directory_iterator() -{ - delete[] _root; - delete[] _path; - if(--(ref->count) == 0) - { - if(ref->hf != _fi_invalid_handle) - FindClose(ref->hf); - delete ref; - } -} - -directory_iterator::directory_iterator(const directory_iterator& other) -{ - _root = _path = 0; - ref = 0; -#ifndef BOOST_NO_EXCEPTIONS - try{ -#endif - _root = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_root) - _path = new char[MAX_PATH]; - BOOST_REGEX_NOEH_ASSERT(_path) - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_root, MAX_PATH, other._root)); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, other._path)); - ptr = _path + (other.ptr - other._path); - ref = other.ref; -#ifndef BOOST_NO_EXCEPTIONS - } - catch(...) - { - delete[] _root; - delete[] _path; - throw; - } -#endif - ++(ref->count); -} - -directory_iterator& directory_iterator::operator=(const directory_iterator& other) -{ - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_root, MAX_PATH, other._root)); - BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(_path, MAX_PATH, other._path)); - ptr = _path + (other.ptr - other._path); - if(--(ref->count) == 0) - { - if(ref->hf != _fi_invalid_handle) - FindClose(ref->hf); - delete ref; - } - ref = other.ref; - ++(ref->count); - return *this; -} - -directory_iterator directory_iterator::operator++(int) -{ - directory_iterator temp(*this); - next(); - return temp; -} - -void directory_iterator::next() -{ - if(ref->hf != _fi_invalid_handle) - { - bool cont = true; - while(cont) - { - cont = find_next_file(ref->hf, ref->_data); - if(cont && (ref->_data.dwFileAttributes & _fi_dir)) - { - if(is_not_current_or_parent_path_string(ref->_data)) - break; - } - } - if(!cont) - { - // end of sequence - FindClose(ref->hf); - ref->hf = _fi_invalid_handle; - *_path = 0; - ptr = _path; - } - else - copy_find_file_result_with_overflow_check(ref->_data, ptr, MAX_PATH - (ptr - _path)); - } -} - - -#ifdef BOOST_REGEX_FI_POSIX_DIR - -struct _fi_priv_data -{ - char root[MAX_PATH]; - char* mask; - DIR* d; - _fi_priv_data(const char* p); -}; - -_fi_priv_data::_fi_priv_data(const char* p) -{ - std::strcpy(root, p); - mask = root; - while(*mask) ++mask; - while((mask > root) && (*mask != *_fi_sep) && (*mask != *_fi_sep_alt)) --mask; - if(mask == root && ((*mask== *_fi_sep) || (*mask == *_fi_sep_alt)) ) - { - root[1] = '\0'; - std::strcpy(root+2, p+1); - mask = root+2; - } - else if(mask == root) - { - root[0] = '.'; - root[1] = '\0'; - std::strcpy(root+2, p); - mask = root+2; - } - else - { - *mask = 0; - ++mask; - } -} - -bool iswild(const char* mask, const char* name) -{ - while(*mask && *name) - { - switch(*mask) - { - case '?': - ++name; - ++mask; - continue; - case '*': - ++mask; - if(*mask == 0) - return true; - while(*name) - { - if(iswild(mask, name)) - return true; - ++name; - } - return false; - case '.': - if(0 == *name) - { - ++mask; - continue; - } - // fall through - default: - if(BOOST_REGEX_FI_TRANSLATE(*mask) != BOOST_REGEX_FI_TRANSLATE(*name)) - return false; - ++mask; - ++name; - continue; - } - } - if(*mask != *name) - return false; - return true; -} - -unsigned _fi_attributes(const char* root, const char* name) -{ - char buf[MAX_PATH]; - // verify that we can not overflow: - if(std::strlen(root) + std::strlen(_fi_sep) + std::strlen(name) >= MAX_PATH) - return 0; - int r; - if( ( (root[0] == *_fi_sep) || (root[0] == *_fi_sep_alt) ) && (root[1] == '\0') ) - r = (std::sprintf)(buf, "%s%s", root, name); - else - r = (std::sprintf)(buf, "%s%s%s", root, _fi_sep, name); - if(r < 0) - return 0; // sprintf failed - DIR* d = opendir(buf); - if(d) - { - closedir(d); - return _fi_dir; - } - return 0; -} - -_fi_find_handle _fi_FindFirstFile(const char* lpFileName, _fi_find_data* lpFindFileData) -{ - _fi_find_handle dat = new _fi_priv_data(lpFileName); - - DIR* h = opendir(dat->root); - dat->d = h; - if(h != 0) - { - if(_fi_FindNextFile(dat, lpFindFileData)) - return dat; - closedir(h); - } - delete dat; - return 0; -} - -bool _fi_FindNextFile(_fi_find_handle dat, _fi_find_data* lpFindFileData) -{ - dirent* d; - do - { - d = readdir(dat->d); - } while(d && !iswild(dat->mask, d->d_name)); - - if(d) - { - std::strcpy(lpFindFileData->cFileName, d->d_name); - lpFindFileData->dwFileAttributes = _fi_attributes(dat->root, d->d_name); - return true; - } - return false; -} - -bool _fi_FindClose(_fi_find_handle dat) -{ - closedir(dat->d); - delete dat; - return true; -} - -#endif - -} // namespace BOOST_REGEX_DETAIL_NS -} // namspace boost - -#endif // BOOST_REGEX_NO_FILEITER - - - - - - - - - - - - diff --git a/src/icu.cpp b/src/icu.cpp deleted file mode 100644 index cb11f323..00000000 --- a/src/icu.cpp +++ /dev/null @@ -1,511 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE icu.cpp - * VERSION see - * DESCRIPTION: Unicode regular expressions on top of the ICU Library. - */ -#define BOOST_REGEX_SOURCE - -#include -#ifdef BOOST_HAS_ICU -#define BOOST_REGEX_ICU_INSTANTIATE -#include - -#ifdef BOOST_INTEL -#pragma warning(disable:981 2259 383) -#endif - -namespace boost{ - -namespace BOOST_REGEX_DETAIL_NS{ - -icu_regex_traits_implementation::string_type icu_regex_traits_implementation::do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const -{ - // TODO make thread safe!!!! : - typedef u32_to_u16_iterator itt; - itt i(p1), j(p2); -#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS - std::vector< ::UChar> t(i, j); -#else - std::vector< ::UChar> t; - while(i != j) - t.push_back(*i++); -#endif - ::uint8_t result[100]; - ::int32_t len; - if(t.size()) - len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result)); - else - len = pcoll->getSortKey(static_cast(0), static_cast< ::int32_t>(0), result, sizeof(result)); - if(std::size_t(len) > sizeof(result)) - { - scoped_array< ::uint8_t> presult(new ::uint8_t[len+1]); - if(t.size()) - len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len+1); - else - len = pcoll->getSortKey(static_cast(0), static_cast< ::int32_t>(0), presult.get(), len+1); - if((0 == presult[len-1]) && (len > 1)) - --len; -#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS - return string_type(presult.get(), presult.get()+len); -#else - string_type sresult; - ::uint8_t const* ia = presult.get(); - ::uint8_t const* ib = presult.get()+len; - while(ia != ib) - sresult.push_back(*ia++); - return sresult; -#endif - } - if((0 == result[len-1]) && (len > 1)) - --len; -#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS - return string_type(result, result+len); -#else - string_type sresult; - ::uint8_t const* ia = result; - ::uint8_t const* ib = result+len; - while(ia != ib) - sresult.push_back(*ia++); - return sresult; -#endif -} - -} - -icu_regex_traits::size_type icu_regex_traits::length(const char_type* p) -{ - size_type result = 0; - while(*p) - { - ++p; - ++result; - } - return result; -} - -// -// define our bitmasks: -// -const icu_regex_traits::char_class_type icu_regex_traits::mask_blank = icu_regex_traits::char_class_type(1) << offset_blank; -const icu_regex_traits::char_class_type icu_regex_traits::mask_space = icu_regex_traits::char_class_type(1) << offset_space; -const icu_regex_traits::char_class_type icu_regex_traits::mask_xdigit = icu_regex_traits::char_class_type(1) << offset_xdigit; -const icu_regex_traits::char_class_type icu_regex_traits::mask_underscore = icu_regex_traits::char_class_type(1) << offset_underscore; -const icu_regex_traits::char_class_type icu_regex_traits::mask_unicode = icu_regex_traits::char_class_type(1) << offset_unicode; -const icu_regex_traits::char_class_type icu_regex_traits::mask_any = icu_regex_traits::char_class_type(1) << offset_any; -const icu_regex_traits::char_class_type icu_regex_traits::mask_ascii = icu_regex_traits::char_class_type(1) << offset_ascii; -const icu_regex_traits::char_class_type icu_regex_traits::mask_horizontal = icu_regex_traits::char_class_type(1) << offset_horizontal; -const icu_regex_traits::char_class_type icu_regex_traits::mask_vertical = icu_regex_traits::char_class_type(1) << offset_vertical; - -icu_regex_traits::char_class_type icu_regex_traits::lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2) -{ - static const ::UChar32 prop_name_table[] = { - /* any */ 'a', 'n', 'y', - /* ascii */ 'a', 's', 'c', 'i', 'i', - /* assigned */ 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', - /* c* */ 'c', '*', - /* cc */ 'c', 'c', - /* cf */ 'c', 'f', - /* closepunctuation */ 'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', - /* cn */ 'c', 'n', - /* co */ 'c', 'o', - /* connectorpunctuation */ 'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', - /* control */ 'c', 'o', 'n', 't', 'r', 'o', 'l', - /* cs */ 'c', 's', - /* currencysymbol */ 'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l', - /* dashpunctuation */ 'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', - /* decimaldigitnumber */ 'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r', - /* enclosingmark */ 'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k', - /* finalpunctuation */ 'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', - /* format */ 'f', 'o', 'r', 'm', 'a', 't', - /* initialpunctuation */ 'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', - /* l* */ 'l', '*', - /* letter */ 'l', 'e', 't', 't', 'e', 'r', - /* letternumber */ 'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', - /* lineseparator */ 'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', - /* ll */ 'l', 'l', - /* lm */ 'l', 'm', - /* lo */ 'l', 'o', - /* lowercaseletter */ 'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', - /* lt */ 'l', 't', - /* lu */ 'l', 'u', - /* m* */ 'm', '*', - /* mark */ 'm', 'a', 'r', 'k', - /* mathsymbol */ 'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l', - /* mc */ 'm', 'c', - /* me */ 'm', 'e', - /* mn */ 'm', 'n', - /* modifierletter */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', - /* modifiersymbol */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', - /* n* */ 'n', '*', - /* nd */ 'n', 'd', - /* nl */ 'n', 'l', - /* no */ 'n', 'o', - /* nonspacingmark */ 'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k', - /* notassigned */ 'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', - /* number */ 'n', 'u', 'm', 'b', 'e', 'r', - /* openpunctuation */ 'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', - /* other */ 'o', 't', 'h', 'e', 'r', - /* otherletter */ 'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', - /* othernumber */ 'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', - /* otherpunctuation */ 'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', - /* othersymbol */ 'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', - /* p* */ 'p', '*', - /* paragraphseparator */ 'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', - /* pc */ 'p', 'c', - /* pd */ 'p', 'd', - /* pe */ 'p', 'e', - /* pf */ 'p', 'f', - /* pi */ 'p', 'i', - /* po */ 'p', 'o', - /* privateuse */ 'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e', - /* ps */ 'p', 's', - /* punctuation */ 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', - /* s* */ 's', '*', - /* sc */ 's', 'c', - /* separator */ 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', - /* sk */ 's', 'k', - /* sm */ 's', 'm', - /* so */ 's', 'o', - /* spaceseparator */ 's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', - /* spacingcombiningmark */ 's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k', - /* surrogate */ 's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e', - /* symbol */ 's', 'y', 'm', 'b', 'o', 'l', - /* titlecase */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', - /* titlecaseletter */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', - /* uppercaseletter */ 'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', - /* z* */ 'z', '*', - /* zl */ 'z', 'l', - /* zp */ 'z', 'p', - /* zs */ 'z', 's', - }; - - static const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> range_data[] = { - { prop_name_table+0, prop_name_table+3, }, // any - { prop_name_table+3, prop_name_table+8, }, // ascii - { prop_name_table+8, prop_name_table+16, }, // assigned - { prop_name_table+16, prop_name_table+18, }, // c* - { prop_name_table+18, prop_name_table+20, }, // cc - { prop_name_table+20, prop_name_table+22, }, // cf - { prop_name_table+22, prop_name_table+38, }, // closepunctuation - { prop_name_table+38, prop_name_table+40, }, // cn - { prop_name_table+40, prop_name_table+42, }, // co - { prop_name_table+42, prop_name_table+62, }, // connectorpunctuation - { prop_name_table+62, prop_name_table+69, }, // control - { prop_name_table+69, prop_name_table+71, }, // cs - { prop_name_table+71, prop_name_table+85, }, // currencysymbol - { prop_name_table+85, prop_name_table+100, }, // dashpunctuation - { prop_name_table+100, prop_name_table+118, }, // decimaldigitnumber - { prop_name_table+118, prop_name_table+131, }, // enclosingmark - { prop_name_table+131, prop_name_table+147, }, // finalpunctuation - { prop_name_table+147, prop_name_table+153, }, // format - { prop_name_table+153, prop_name_table+171, }, // initialpunctuation - { prop_name_table+171, prop_name_table+173, }, // l* - { prop_name_table+173, prop_name_table+179, }, // letter - { prop_name_table+179, prop_name_table+191, }, // letternumber - { prop_name_table+191, prop_name_table+204, }, // lineseparator - { prop_name_table+204, prop_name_table+206, }, // ll - { prop_name_table+206, prop_name_table+208, }, // lm - { prop_name_table+208, prop_name_table+210, }, // lo - { prop_name_table+210, prop_name_table+225, }, // lowercaseletter - { prop_name_table+225, prop_name_table+227, }, // lt - { prop_name_table+227, prop_name_table+229, }, // lu - { prop_name_table+229, prop_name_table+231, }, // m* - { prop_name_table+231, prop_name_table+235, }, // mark - { prop_name_table+235, prop_name_table+245, }, // mathsymbol - { prop_name_table+245, prop_name_table+247, }, // mc - { prop_name_table+247, prop_name_table+249, }, // me - { prop_name_table+249, prop_name_table+251, }, // mn - { prop_name_table+251, prop_name_table+265, }, // modifierletter - { prop_name_table+265, prop_name_table+279, }, // modifiersymbol - { prop_name_table+279, prop_name_table+281, }, // n* - { prop_name_table+281, prop_name_table+283, }, // nd - { prop_name_table+283, prop_name_table+285, }, // nl - { prop_name_table+285, prop_name_table+287, }, // no - { prop_name_table+287, prop_name_table+301, }, // nonspacingmark - { prop_name_table+301, prop_name_table+312, }, // notassigned - { prop_name_table+312, prop_name_table+318, }, // number - { prop_name_table+318, prop_name_table+333, }, // openpunctuation - { prop_name_table+333, prop_name_table+338, }, // other - { prop_name_table+338, prop_name_table+349, }, // otherletter - { prop_name_table+349, prop_name_table+360, }, // othernumber - { prop_name_table+360, prop_name_table+376, }, // otherpunctuation - { prop_name_table+376, prop_name_table+387, }, // othersymbol - { prop_name_table+387, prop_name_table+389, }, // p* - { prop_name_table+389, prop_name_table+407, }, // paragraphseparator - { prop_name_table+407, prop_name_table+409, }, // pc - { prop_name_table+409, prop_name_table+411, }, // pd - { prop_name_table+411, prop_name_table+413, }, // pe - { prop_name_table+413, prop_name_table+415, }, // pf - { prop_name_table+415, prop_name_table+417, }, // pi - { prop_name_table+417, prop_name_table+419, }, // po - { prop_name_table+419, prop_name_table+429, }, // privateuse - { prop_name_table+429, prop_name_table+431, }, // ps - { prop_name_table+431, prop_name_table+442, }, // punctuation - { prop_name_table+442, prop_name_table+444, }, // s* - { prop_name_table+444, prop_name_table+446, }, // sc - { prop_name_table+446, prop_name_table+455, }, // separator - { prop_name_table+455, prop_name_table+457, }, // sk - { prop_name_table+457, prop_name_table+459, }, // sm - { prop_name_table+459, prop_name_table+461, }, // so - { prop_name_table+461, prop_name_table+475, }, // spaceseparator - { prop_name_table+475, prop_name_table+495, }, // spacingcombiningmark - { prop_name_table+495, prop_name_table+504, }, // surrogate - { prop_name_table+504, prop_name_table+510, }, // symbol - { prop_name_table+510, prop_name_table+519, }, // titlecase - { prop_name_table+519, prop_name_table+534, }, // titlecaseletter - { prop_name_table+534, prop_name_table+549, }, // uppercaseletter - { prop_name_table+549, prop_name_table+551, }, // z* - { prop_name_table+551, prop_name_table+553, }, // zl - { prop_name_table+553, prop_name_table+555, }, // zp - { prop_name_table+555, prop_name_table+557, }, // zs - }; - - static const icu_regex_traits::char_class_type icu_class_map[] = { - icu_regex_traits::mask_any, // any - icu_regex_traits::mask_ascii, // ascii - (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned - U_GC_C_MASK, // c* - U_GC_CC_MASK, // cc - U_GC_CF_MASK, // cf - U_GC_PE_MASK, // closepunctuation - U_GC_CN_MASK, // cn - U_GC_CO_MASK, // co - U_GC_PC_MASK, // connectorpunctuation - U_GC_CC_MASK, // control - U_GC_CS_MASK, // cs - U_GC_SC_MASK, // currencysymbol - U_GC_PD_MASK, // dashpunctuation - U_GC_ND_MASK, // decimaldigitnumber - U_GC_ME_MASK, // enclosingmark - U_GC_PF_MASK, // finalpunctuation - U_GC_CF_MASK, // format - U_GC_PI_MASK, // initialpunctuation - U_GC_L_MASK, // l* - U_GC_L_MASK, // letter - U_GC_NL_MASK, // letternumber - U_GC_ZL_MASK, // lineseparator - U_GC_LL_MASK, // ll - U_GC_LM_MASK, // lm - U_GC_LO_MASK, // lo - U_GC_LL_MASK, // lowercaseletter - U_GC_LT_MASK, // lt - U_GC_LU_MASK, // lu - U_GC_M_MASK, // m* - U_GC_M_MASK, // mark - U_GC_SM_MASK, // mathsymbol - U_GC_MC_MASK, // mc - U_GC_ME_MASK, // me - U_GC_MN_MASK, // mn - U_GC_LM_MASK, // modifierletter - U_GC_SK_MASK, // modifiersymbol - U_GC_N_MASK, // n* - U_GC_ND_MASK, // nd - U_GC_NL_MASK, // nl - U_GC_NO_MASK, // no - U_GC_MN_MASK, // nonspacingmark - U_GC_CN_MASK, // notassigned - U_GC_N_MASK, // number - U_GC_PS_MASK, // openpunctuation - U_GC_C_MASK, // other - U_GC_LO_MASK, // otherletter - U_GC_NO_MASK, // othernumber - U_GC_PO_MASK, // otherpunctuation - U_GC_SO_MASK, // othersymbol - U_GC_P_MASK, // p* - U_GC_ZP_MASK, // paragraphseparator - U_GC_PC_MASK, // pc - U_GC_PD_MASK, // pd - U_GC_PE_MASK, // pe - U_GC_PF_MASK, // pf - U_GC_PI_MASK, // pi - U_GC_PO_MASK, // po - U_GC_CO_MASK, // privateuse - U_GC_PS_MASK, // ps - U_GC_P_MASK, // punctuation - U_GC_S_MASK, // s* - U_GC_SC_MASK, // sc - U_GC_Z_MASK, // separator - U_GC_SK_MASK, // sk - U_GC_SM_MASK, // sm - U_GC_SO_MASK, // so - U_GC_ZS_MASK, // spaceseparator - U_GC_MC_MASK, // spacingcombiningmark - U_GC_CS_MASK, // surrogate - U_GC_S_MASK, // symbol - U_GC_LT_MASK, // titlecase - U_GC_LT_MASK, // titlecaseletter - U_GC_LU_MASK, // uppercaseletter - U_GC_Z_MASK, // z* - U_GC_ZL_MASK, // zl - U_GC_ZP_MASK, // zp - U_GC_ZS_MASK, // zs - }; - - - const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_begin = range_data; - const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data)/sizeof(range_data[0])); - - BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> t = { p1, p2, }; - const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t); - if((p != ranges_end) && (t == *p)) - return icu_class_map[p - ranges_begin]; - return 0; -} - -icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_type* p1, const char_type* p2) const -{ - static const char_class_type masks[] = - { - 0, - U_GC_L_MASK | U_GC_ND_MASK, - U_GC_L_MASK, - mask_blank, - U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK, - U_GC_ND_MASK, - U_GC_ND_MASK, - (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK), - mask_horizontal, - U_GC_LL_MASK, - U_GC_LL_MASK, - ~(U_GC_C_MASK), - U_GC_P_MASK, - char_class_type(U_GC_Z_MASK) | mask_space, - char_class_type(U_GC_Z_MASK) | mask_space, - U_GC_LU_MASK, - mask_unicode, - U_GC_LU_MASK, - mask_vertical, - char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, - char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, - char_class_type(U_GC_ND_MASK) | mask_xdigit, - }; - - int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); - if(idx >= 0) - return masks[idx+1]; - char_class_type result = lookup_icu_mask(p1, p2); - if(result != 0) - return result; - - if(idx < 0) - { - string_type s(p1, p2); - string_type::size_type i = 0; - while(i < s.size()) - { - s[i] = static_cast((::u_tolower)(s[i])); - if(::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_')) - s.erase(s.begin()+i, s.begin()+i+1); - else - { - s[i] = static_cast((::u_tolower)(s[i])); - ++i; - } - } - if(s.size()) - idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); - if(idx >= 0) - return masks[idx+1]; - if(s.size()) - result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size()); - if(result != 0) - return result; - } - BOOST_ASSERT(std::size_t(idx+1) < sizeof(masks) / sizeof(masks[0])); - return masks[idx+1]; -} - -icu_regex_traits::string_type icu_regex_traits::lookup_collatename(const char_type* p1, const char_type* p2) const -{ - string_type result; -#ifdef BOOST_NO_CXX98_BINDERS - if(std::find_if(p1, p2, std::bind(std::greater< ::UChar32>(), std::placeholders::_1, 0x7f)) == p2) -#else - if(std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2) -#endif - { -#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS - std::string s(p1, p2); -#else - std::string s; - const char_type* p3 = p1; - while(p3 != p2) - s.append(1, *p3++); -#endif - // Try Unicode name: - UErrorCode err = U_ZERO_ERROR; - UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err); - if(U_SUCCESS(err)) - { - result.push_back(c); - return result; - } - // Try Unicode-extended name: - err = U_ZERO_ERROR; - c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err); - if(U_SUCCESS(err)) - { - result.push_back(c); - return result; - } - // try POSIX name: - s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s); -#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS - result.assign(s.begin(), s.end()); -#else - result.clear(); - std::string::const_iterator si, sj; - si = s.begin(); - sj = s.end(); - while(si != sj) - result.push_back(*si++); -#endif - } - if(result.empty() && (p2-p1 == 1)) - result.push_back(*p1); - return result; -} - -bool icu_regex_traits::isctype(char_type c, char_class_type f) const -{ - // check for standard catagories first: - char_class_type m = char_class_type(static_cast(1) << u_charType(c)); - if((m & f) != 0) - return true; - // now check for special cases: - if(((f & mask_blank) != 0) && u_isblank(c)) - return true; - if(((f & mask_space) != 0) && u_isspace(c)) - return true; - if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0)) - return true; - if(((f & mask_unicode) != 0) && (c >= 0x100)) - return true; - if(((f & mask_underscore) != 0) && (c == '_')) - return true; - if(((f & mask_any) != 0) && (c <= 0x10FFFF)) - return true; - if(((f & mask_ascii) != 0) && (c <= 0x7F)) - return true; - if(((f & mask_vertical) != 0) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == static_cast('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK))) - return true; - if(((f & mask_horizontal) != 0) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && u_isspace(c) && (c != static_cast('\v'))) - return true; - return false; -} - -} - -#endif // BOOST_HAS_ICU diff --git a/src/instances.cpp b/src/instances.cpp deleted file mode 100644 index 88502b19..00000000 --- a/src/instances.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * - * Copyright (c) 1998-2002 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE: instances.cpp - * VERSION: see - * DESCRIPTION: regex narrow character template instances. - */ - -#define BOOST_REGEX_SOURCE - -#include - -#if !defined(BOOST_REGEX_NO_EXTERNAL_TEMPLATES) -#define BOOST_REGEX_NARROW_INSTANTIATE - -#ifdef BOOST_BORLANDC -#pragma hrdstop -#endif - -#include - -#endif diff --git a/src/posix_api.cpp b/src/posix_api.cpp index 47133fa0..95978e43 100644 --- a/src/posix_api.cpp +++ b/src/posix_api.cpp @@ -19,9 +19,9 @@ #define BOOST_REGEX_SOURCE #include -#include #include #include +#include #if defined(BOOST_NO_STDC_NAMESPACE) namespace std{ @@ -269,8 +269,8 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA* expression, cons std::size_t i; for(i = 0; (i < n) && (i < expression->re_nsub + 1); ++i) { - array[i].rm_so = (m[i].matched == false) ? -1 : (m[i].first - buf); - array[i].rm_eo = (m[i].matched == false) ? -1 : (m[i].second - buf); + array[i].rm_so = m[i].matched ? (m[i].first - buf) : -1; + array[i].rm_eo = m[i].matched ? (m[i].second - buf) : -1; } // and set anything else to -1: for(i = expression->re_nsub + 1; i < n; ++i) @@ -296,7 +296,3 @@ BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeA(regex_tA* expression) } } // namespace boost - - - - diff --git a/src/regex.cpp b/src/regex.cpp index ee0204cc..2d923955 100644 --- a/src/regex.cpp +++ b/src/regex.cpp @@ -19,15 +19,12 @@ #define BOOST_REGEX_SOURCE -#include -#include -#include -#include +#include -#if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && defined(_MSC_VER) && (_MSC_VER >= 1300) -# include -#endif #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + +#include + #ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN #endif @@ -37,78 +34,12 @@ #define NOGDI #define NOUSER #include -#endif +#include +#include +#include -#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_REGEX_V3) -#if BOOST_REGEX_MAX_CACHE_BLOCKS == 0 -#include -#else -#include -#endif -#endif - -#ifdef BOOST_INTEL -#pragma warning(disable:383) -#endif - -namespace boost{ - -// -// fix: these are declared out of line here to ensure -// that dll builds contain the Virtual table for these -// types - this ensures that exceptions can be thrown -// from the dll and caught in an exe. -regex_error::regex_error(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos) - : std::runtime_error(s) - , m_error_code(err) - , m_position(pos) -{ -} - -regex_error::regex_error(regex_constants::error_type err) - : std::runtime_error(::boost::BOOST_REGEX_DETAIL_NS::get_default_error_string(err)) - , m_error_code(err) - , m_position(0) -{ -} - -regex_error::~regex_error() BOOST_NOEXCEPT_OR_NOTHROW -{ -} - -void regex_error::raise()const -{ -#ifndef BOOST_NO_EXCEPTIONS - ::boost::throw_exception(*this); -#endif -} - - - -namespace BOOST_REGEX_DETAIL_NS{ - -BOOST_REGEX_DECL void BOOST_REGEX_CALL raise_runtime_error(const std::runtime_error& ex) -{ - ::boost::throw_exception(ex); -} -// -// error checking API: -// -BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex::flag_type /*ef*/, match_flag_type mf) -{ -#ifndef BOOST_REGEX_V3 - // - // can't mix match_extra with POSIX matching rules: - // - if((mf & match_extra) && (mf & match_posix)) - { - std::logic_error msg("Usage Error: Can't mix regular expression captures with POSIX matching rules"); - throw_exception(msg); - } -#endif -} - -#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD +namespace boost { +namespace BOOST_REGEX_DETAIL_NS { static void execute_eror() { @@ -175,52 +106,10 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page() } #endif } -#endif - -#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_REGEX_V3) - -#if BOOST_REGEX_MAX_CACHE_BLOCKS == 0 - -BOOST_REGEX_DECL void* BOOST_REGEX_CALL get_mem_block() -{ - return ::operator new(BOOST_REGEX_BLOCKSIZE); } - -BOOST_REGEX_DECL void BOOST_REGEX_CALL put_mem_block(void* p) -{ - ::operator delete(p); -} - -#else - -#if defined(BOOST_REGEX_MEM_BLOCK_CACHE_LOCK_FREE) -mem_block_cache block_cache = { { {nullptr} } } ; -#elif defined(BOOST_HAS_THREADS) -mem_block_cache block_cache = { 0, 0, BOOST_STATIC_MUTEX_INIT, }; -#else -mem_block_cache block_cache = { 0, 0, }; +} // namspaces #endif -BOOST_REGEX_DECL void* BOOST_REGEX_CALL get_mem_block() -{ - return block_cache.get(); -} - -BOOST_REGEX_DECL void BOOST_REGEX_CALL put_mem_block(void* p) -{ - block_cache.put(p); -} - -#endif - -#endif - -} // namespace BOOST_REGEX_DETAIL_NS - - - -} // namespace boost - #if defined(BOOST_RE_USE_VCL) && defined(BOOST_REGEX_DYN_LINK) int WINAPI DllEntryPoint(HINSTANCE , unsigned long , void*) diff --git a/src/regex_raw_buffer.cpp b/src/regex_raw_buffer.cpp deleted file mode 100644 index bb492297..00000000 --- a/src/regex_raw_buffer.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE regex_raw_buffer.cpp - * VERSION see - * DESCRIPTION: Member functions for class raw_storage. - */ - - -#define BOOST_REGEX_SOURCE -#include -#include -#include -#include -#include - -#if defined(BOOST_NO_STDC_NAMESPACE) -namespace std{ - using ::memcpy; - using ::memmove; -} -#endif - - -namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ - -void BOOST_REGEX_CALL raw_storage::resize(size_type n) -{ - size_type newsize = start ? last - start : 1024; - while(newsize < n) - newsize *= 2; - size_type datasize = end - start; - // extend newsize to WORD/DWORD boundary: - newsize = (newsize + padding_mask) & ~(padding_mask); - - // allocate and copy data: - pointer ptr = static_cast(::operator new(newsize)); - BOOST_REGEX_NOEH_ASSERT(ptr) - if(start) - std::memcpy(ptr, start, datasize); - - // get rid of old buffer: - ::operator delete(start); - - // and set up pointers: - start = ptr; - end = ptr + datasize; - last = ptr + newsize; -} - -void* BOOST_REGEX_CALL raw_storage::insert(size_type pos, size_type n) -{ - BOOST_ASSERT(pos <= size_type(end - start)); - if(size_type(last - end) < n) - resize(n + (end - start)); - void* result = start + pos; - std::memmove(start + pos + n, start + pos, (end - start) - pos); - end += n; - return result; -} - -}} // namespaces diff --git a/src/static_mutex.cpp b/src/static_mutex.cpp index 35dd0c79..b238b44a 100644 --- a/src/static_mutex.cpp +++ b/src/static_mutex.cpp @@ -17,7 +17,10 @@ */ #define BOOST_REGEX_SOURCE -#include +#include + +#if defined(BOOST_REGEX_CXX03) + #include #ifdef BOOST_HAS_THREADS @@ -183,3 +186,4 @@ void scoped_static_mutex_lock::unlock() } #endif // BOOST_HAS_THREADS +#endif diff --git a/src/usinstances.cpp b/src/usinstances.cpp deleted file mode 100644 index 44ebd28d..00000000 --- a/src/usinstances.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * - * Copyright (c) 1998-2002 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE: winstances.cpp - * VERSION: see - * DESCRIPTION: regex unsigned short template instances (MSVC only). - */ - -#define BOOST_REGEX_SOURCE -#ifdef _MSC_VER -#pragma warning(disable:4506) // 'no definition for inline function' -#endif - -#include -#include -#include - -#if defined(_DLL_CPPLIB) && !defined(_M_CEE_PURE) && defined(_NATIVE_WCHAR_T_DEFINED) \ - && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION) || defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER))\ - && BOOST_WORKAROUND(BOOST_MSVC, <1600) -// -// This is a horrible workaround, but without declaring these symbols extern we get -// duplicate symbol errors when linking if the application is built without -// /Zc:wchar_t -// -#ifdef _CRTIMP2_PURE -# define BOOST_REGEX_STDLIB_DECL _CRTIMP2_PURE -#else -# define BOOST_REGEX_STDLIB_DECL _CRTIMP2 -#endif - -namespace std{ - -#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) -template class BOOST_REGEX_STDLIB_DECL allocator; -template class BOOST_REGEX_STDLIB_DECL _String_val >; -template class BOOST_REGEX_STDLIB_DECL basic_string, allocator >; -#endif - -#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) && BOOST_WORKAROUND(BOOST_MSVC, BOOST_TESTED_AT(1400)) -template<> BOOST_REGEX_STDLIB_DECL std::size_t __cdecl char_traits::length(unsigned short const*); -#endif - -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( - const basic_string, allocator >&, - const basic_string, allocator >&); -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( - const unsigned short *, - const basic_string, allocator >&); -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( - const basic_string, allocator >&, - const unsigned short *); -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator<( - const basic_string, allocator >&, - const basic_string, allocator >&); -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator>( - const basic_string, allocator >&, - const basic_string, allocator >&); -} -#endif - -#include - -#if !defined(BOOST_NO_WREGEX) && defined(BOOST_REGEX_HAS_OTHER_WCHAR_T) && !defined(BOOST_REGEX_NO_EXTERNAL_TEMPLATES) -#define BOOST_REGEX_US_INSTANTIATE - -#include - -#endif - - diff --git a/src/w32_regex_traits.cpp b/src/w32_regex_traits.cpp deleted file mode 100644 index 5b5236aa..00000000 --- a/src/w32_regex_traits.cpp +++ /dev/null @@ -1,654 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE w32_regex_traits.cpp - * VERSION see - * DESCRIPTION: Implements w32_regex_traits (and associated helper classes). - */ - -#define BOOST_REGEX_SOURCE -#include - -#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) && !defined(BOOST_REGEX_NO_WIN32_LOCALE) -#include -#include - -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -#ifndef NOMINMAX -# define NOMINMAX -#endif -#define NOGDI -#include - -#if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE) -#pragma comment(lib, "user32.lib") -#endif - -#ifdef BOOST_NO_STDC_NAMESPACE -namespace std{ - using ::memset; -} -#endif - -namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ - -#ifdef BOOST_NO_ANSI_APIS -UINT get_code_page_for_locale_id(lcid_type idx) -{ - WCHAR code_page_string[7]; - if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0) - return 0; - - return static_cast(_wtol(code_page_string)); -} -#endif - - -void w32_regex_traits_char_layer::init() -{ - // we need to start by initialising our syntax map so we know which - // character is used for which purpose: - std::memset(m_char_map, 0, sizeof(m_char_map)); - cat_type cat; - std::string cat_name(w32_regex_traits::get_catalog_name()); - if(cat_name.size()) - { - cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name); - if(!cat) - { - std::string m("Unable to open message catalog: "); - std::runtime_error err(m + cat_name); - ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err); - } - } - // - // if we have a valid catalog then load our messages: - // - if(cat) - { - for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) - { - string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i)); - for(string_type::size_type j = 0; j < mss.size(); ++j) - { - m_char_map[static_cast(mss[j])] = i; - } - } - } - else - { - for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) - { - const char* ptr = get_default_syntax(i); - while(ptr && *ptr) - { - m_char_map[static_cast(*ptr)] = i; - ++ptr; - } - } - } - // - // finish off by calculating our escape types: - // - unsigned char i = 'A'; - do - { - if(m_char_map[i] == 0) - { - if(::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0002u, (char)i)) - m_char_map[i] = regex_constants::escape_type_class; - else if(::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0001u, (char)i)) - m_char_map[i] = regex_constants::escape_type_not_class; - } - }while(0xFF != i++); - - // - // fill in lower case map: - // - char char_map[1 << CHAR_BIT]; - for(int ii = 0; ii < (1 << CHAR_BIT); ++ii) - char_map[ii] = static_cast(ii); -#ifndef BOOST_NO_ANSI_APIS - int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT); - BOOST_ASSERT(r != 0); -#else - UINT code_page = get_code_page_for_locale_id(this->m_locale); - BOOST_ASSERT(code_page != 0); - - WCHAR wide_char_map[1 << CHAR_BIT]; - int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT); - BOOST_ASSERT(conv_r != 0); - - WCHAR wide_lower_map[1 << CHAR_BIT]; - int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT); - BOOST_ASSERT(r != 0); - - conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL); - BOOST_ASSERT(conv_r != 0); -#endif - if(r < (1 << CHAR_BIT)) - { - // if we have multibyte characters then not all may have been given - // a lower case mapping: - for(int jj = r; jj < (1 << CHAR_BIT); ++jj) - this->m_lower_map[jj] = static_cast(jj); - } - -#ifndef BOOST_NO_ANSI_APIS - r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map); -#else - r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map); -#endif - BOOST_ASSERT(0 != r); -} - -BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale() -{ - return ::GetUserDefaultLCID(); -} - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char c, lcid_type idx) -{ -#ifndef BOOST_NO_ANSI_APIS - WORD mask; - if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) - return true; - return false; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if (code_page == 0) - return false; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return false; - - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER)) - return true; - return false; -#endif -} - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t c, lcid_type idx) -{ - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) - return true; - return false; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type idx) -{ - WORD mask; - wchar_t c = ca; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) - return true; - return false; -} -#endif - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char c, lcid_type idx) -{ -#ifndef BOOST_NO_ANSI_APIS - WORD mask; - if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) - return true; - return false; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if (code_page == 0) - return false; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return false; - - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER)) - return true; - return false; -#endif -} - -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t c, lcid_type idx) -{ - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) - return true; - return false; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type idx) -{ - WORD mask; - wchar_t c = ca; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) - return true; - return false; -} -#endif - -void free_module(void* mod) -{ - ::FreeLibrary(static_cast(mod)); -} - -BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name) -{ -#ifndef BOOST_NO_ANSI_APIS - cat_type result(::LoadLibraryA(name.c_str()), &free_module); - return result; -#else - LPWSTR wide_name = (LPWSTR)_alloca( (name.size() + 1) * sizeof(WCHAR) ); - if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0) - return cat_type(); - - cat_type result(::LoadLibraryW(wide_name), &free_module); - return result; -#endif -} - -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def) -{ -#ifndef BOOST_NO_ANSI_APIS - char buf[256]; - if(0 == ::LoadStringA( - static_cast(cat.get()), - i, - buf, - 256 - )) - { - return def; - } -#else - WCHAR wbuf[256]; - int r = ::LoadStringW( - static_cast(cat.get()), - i, - wbuf, - 256 - ); - if (r == 0) - return def; - - - int buf_size = 1 + ::WideCharToMultiByte(CP_ACP, 0, wbuf, r, NULL, 0, NULL, NULL); - LPSTR buf = (LPSTR)_alloca(buf_size); - if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, buf_size, NULL, NULL) == 0) - return def; // failed conversion. -#endif - return std::string(buf); -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def) -{ - wchar_t buf[256]; - if(0 == ::LoadStringW( - static_cast(cat.get()), - i, - buf, - 256 - )) - { - return def; - } - return std::wstring(buf); -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string& def) -{ - unsigned short buf[256]; - if(0 == ::LoadStringW( - static_cast(cat.get()), - i, - (LPWSTR)buf, - 256 - )) - { - return def; - } - return std::basic_string(buf); -} -#endif -#endif -BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type idx, const char* p1, const char* p2) -{ -#ifndef BOOST_NO_ANSI_APIS - int bytes = ::LCMapStringA( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - p1, // source string - static_cast(p2 - p1), // number of characters in source string - 0, // destination buffer - 0 // size of destination buffer - ); - if(!bytes) - return std::string(p1, p2); - std::string result(++bytes, '\0'); - bytes = ::LCMapStringA( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - p1, // source string - static_cast(p2 - p1), // number of characters in source string - &*result.begin(), // destination buffer - bytes // size of destination buffer - ); -#else - UINT code_page = get_code_page_for_locale_id(idx); - if(code_page == 0) - return std::string(p1, p2); - - int src_len = static_cast(p2 - p1); - LPWSTR wide_p1 = (LPWSTR)_alloca( (src_len + 1) * 2 ); - if(::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0) - return std::string(p1, p2); - - int bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - wide_p1, // source string - src_len, // number of characters in source string - 0, // destination buffer - 0 // size of destination buffer - ); - if(!bytes) - return std::string(p1, p2); - std::string result(++bytes, '\0'); - bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - wide_p1, // source string - src_len, // number of characters in source string - (LPWSTR)&*result.begin(), // destination buffer - bytes // size of destination buffer - ); -#endif - if(bytes > static_cast(result.size())) - return std::string(p1, p2); - while(result.size() && result[result.size()-1] == '\0') - { - result.erase(result.size()-1); - } - return result; -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2) -{ - int bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - p1, // source string - static_cast(p2 - p1), // number of characters in source string - 0, // destination buffer - 0 // size of destination buffer - ); - if(!bytes) - return std::wstring(p1, p2); - std::string result(++bytes, '\0'); - bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - p1, // source string - static_cast(p2 - p1), // number of characters in source string - reinterpret_cast(&*result.begin()), // destination buffer *of bytes* - bytes // size of destination buffer - ); - if(bytes > static_cast(result.size())) - return std::wstring(p1, p2); - while(result.size() && result[result.size()-1] == L'\0') - { - result.erase(result.size()-1); - } - std::wstring r2; - for(std::string::size_type i = 0; i < result.size(); ++i) - r2.append(1, static_cast(static_cast(result[i]))); - return r2; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_transform(lcid_type idx, const unsigned short* p1, const unsigned short* p2) -{ - int bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - (LPCWSTR)p1, // source string - static_cast(p2 - p1), // number of characters in source string - 0, // destination buffer - 0 // size of destination buffer - ); - if(!bytes) - return std::basic_string(p1, p2); - std::string result(++bytes, '\0'); - bytes = ::LCMapStringW( - idx, // locale identifier - LCMAP_SORTKEY, // mapping transformation type - (LPCWSTR)p1, // source string - static_cast(p2 - p1), // number of characters in source string - reinterpret_cast(&*result.begin()), // destination buffer *of bytes* - bytes // size of destination buffer - ); - if(bytes > static_cast(result.size())) - return std::basic_string(p1, p2); - while(result.size() && result[result.size()-1] == L'\0') - { - result.erase(result.size()-1); - } - std::basic_string r2; - for(std::string::size_type i = 0; i < result.size(); ++i) - r2.append(1, static_cast(static_cast(result[i]))); - return r2; -} -#endif -#endif -BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type idx) -{ - char result[2]; -#ifndef BOOST_NO_ANSI_APIS - int b = ::LCMapStringA( - idx, // locale identifier - LCMAP_LOWERCASE, // mapping transformation type - &c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if (code_page == 0) - return c; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return c; - - WCHAR wide_result; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_LOWERCASE, // mapping transformation type - &wide_c, // source string - 1, // number of characters in source string - &wide_result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - - if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) - return c; // No single byte lower case equivalent available -#endif - return result[0]; -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type idx) -{ - wchar_t result[2]; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_LOWERCASE, // mapping transformation type - &c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - return result[0]; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type idx) -{ - wchar_t result[2]; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_LOWERCASE, // mapping transformation type - (wchar_t const*)&c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - return result[0]; -} -#endif -#endif -BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type idx) -{ - char result[2]; -#ifndef BOOST_NO_ANSI_APIS - int b = ::LCMapStringA( - idx, // locale identifier - LCMAP_UPPERCASE, // mapping transformation type - &c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if(code_page == 0) - return c; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return c; - - WCHAR wide_result; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_UPPERCASE, // mapping transformation type - &wide_c, // source string - 1, // number of characters in source string - &wide_result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - - if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) - return c; // No single byte upper case equivalent available. -#endif - return result[0]; -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type idx) -{ - wchar_t result[2]; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_UPPERCASE, // mapping transformation type - &c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - return result[0]; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_toupper(unsigned short c, lcid_type idx) -{ - wchar_t result[2]; - int b = ::LCMapStringW( - idx, // locale identifier - LCMAP_UPPERCASE, // mapping transformation type - (wchar_t const*)&c, // source string - 1, // number of characters in source string - result, // destination buffer - 1); // size of destination buffer - if(b == 0) - return c; - return result[0]; -} -#endif -#endif -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, char c) -{ - WORD mask; -#ifndef BOOST_NO_ANSI_APIS - if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) - return true; -#else - UINT code_page = get_code_page_for_locale_id(idx); - if(code_page == 0) - return false; - - WCHAR wide_c; - if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) - return false; - - if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) - return true; -#endif - if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) - return true; - return false; -} - -#ifndef BOOST_NO_WREGEX -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, wchar_t c) -{ - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) - return true; - if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) - return true; - if((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) - return true; - return false; -} -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, unsigned short c) -{ - WORD mask; - if(::GetStringTypeExW(idx, CT_CTYPE1, (wchar_t const*)&c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) - return true; - if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) - return true; - if((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) - return true; - return false; -} -#endif -#endif - -} // BOOST_REGEX_DETAIL_NS -} // boost - -#endif - diff --git a/src/wc_regex_traits.cpp b/src/wc_regex_traits.cpp deleted file mode 100644 index aa5b028a..00000000 --- a/src/wc_regex_traits.cpp +++ /dev/null @@ -1,314 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE: wc_regex_traits.cpp - * VERSION: see - * DESCRIPTION: Implements out of line members for c_regex_traits - */ - - -#define BOOST_REGEX_SOURCE - -#include -#include -#include -#include "internals.hpp" - -#if defined(_DLL_CPPLIB) && !defined(_M_CEE_PURE) && defined(_NATIVE_WCHAR_T_DEFINED) \ - && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION) || defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER))\ - && BOOST_WORKAROUND(BOOST_MSVC, <1600) -// -// This is a horrible workaround, but without declaring these symbols extern we get -// duplicate symbol errors when linking if the application is built without -// /Zc:wchar_t -// -#ifdef _CRTIMP2_PURE -# define BOOST_REGEX_STDLIB_DECL _CRTIMP2_PURE -#else -# define BOOST_REGEX_STDLIB_DECL _CRTIMP2 -#endif - -namespace std{ - -#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) -template class BOOST_REGEX_STDLIB_DECL allocator; -template class BOOST_REGEX_STDLIB_DECL _String_val >; -template class BOOST_REGEX_STDLIB_DECL basic_string, allocator >; -#endif - -#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) && BOOST_WORKAROUND(BOOST_MSVC, BOOST_TESTED_AT(1400)) -template<> BOOST_REGEX_STDLIB_DECL std::size_t __cdecl char_traits::length(unsigned short const*); -#endif - -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( - const basic_string, allocator >&, - const basic_string, allocator >&); -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( - const unsigned short *, - const basic_string, allocator >&); -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( - const basic_string, allocator >&, - const unsigned short *); -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator<( - const basic_string, allocator >&, - const basic_string, allocator >&); -template BOOST_REGEX_STDLIB_DECL bool __cdecl operator>( - const basic_string, allocator >&, - const basic_string, allocator >&); -} -#endif - -#include -#include - -#if !BOOST_WORKAROUND(BOOST_BORLANDC, < 0x560) - -#include -#ifndef BOOST_NO_WREGEX -#include -#include - -#if defined(BOOST_NO_STDC_NAMESPACE) -namespace std{ - using ::wcstol; -} -#endif - -namespace boost{ - -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const wchar_t* p1, const wchar_t* p2) -{ - std::size_t r; - std::size_t s = 10; - std::wstring src(p1, p2); - std::wstring result(s, L' '); - while(s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s))) - { -#if defined(_CPPLIB_VER) - // - // A bug in VC11 and 12 causes the program to hang if we pass a null-string - // to std::strxfrm, but only for certain locales :-( - // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). - // - if(r == INT_MAX) - { - result.erase(); - result.insert(result.begin(), static_cast(0)); - return result; - } -#endif - result.append(r - s + 3, L' '); - s = result.size(); - } - result.erase(r); - return result; -} - -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform_primary(const wchar_t* p1, const wchar_t* p2) -{ - static wchar_t s_delim; - static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast*>(0), &s_delim); - std::wstring result; - // - // What we do here depends upon the format of the sort key returned by - // sort key returned by this->transform: - // - switch(s_collate_type) - { - case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: - case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: - // the best we can do is translate to lower case, then get a regular sort key: - { - result.assign(p1, p2); - for(std::wstring::size_type i = 0; i < result.size(); ++i) - result[i] = (std::towlower)(result[i]); - result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); - break; - } - case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: - { - // get a regular sort key, and then truncate it: - result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); - result.erase(s_delim); - break; - } - case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: - // get a regular sort key, and then truncate everything after the delim: - result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); - if(result.size() && (result[0] == s_delim)) - break; - std::size_t i; - for(i = 0; i < result.size(); ++i) - { - if(result[i] == s_delim) - break; - } - result.erase(i); - break; - } - if(result.empty()) - result = std::wstring(1, char(0)); - return result; -} - -c_regex_traits::char_class_type BOOST_REGEX_CALL c_regex_traits::lookup_classname(const wchar_t* p1, const wchar_t* p2) -{ - static const char_class_type masks[] = - { - 0, - char_class_alnum, - char_class_alpha, - char_class_blank, - char_class_cntrl, - char_class_digit, - char_class_digit, - char_class_graph, - char_class_horizontal, - char_class_lower, - char_class_lower, - char_class_print, - char_class_punct, - char_class_space, - char_class_space, - char_class_upper, - char_class_unicode, - char_class_upper, - char_class_vertical, - char_class_alnum | char_class_word, - char_class_alnum | char_class_word, - char_class_xdigit, - }; - - int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); - if(idx < 0) - { - std::wstring s(p1, p2); - for(std::wstring::size_type i = 0; i < s.size(); ++i) - s[i] = (std::towlower)(s[i]); - idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); - } - BOOST_ASSERT(idx+1 < static_cast(sizeof(masks) / sizeof(masks[0]))); - return masks[idx+1]; -} - -bool BOOST_REGEX_CALL c_regex_traits::isctype(wchar_t c, char_class_type mask) -{ - return - ((mask & char_class_space) && (std::iswspace)(c)) - || ((mask & char_class_print) && (std::iswprint)(c)) - || ((mask & char_class_cntrl) && (std::iswcntrl)(c)) - || ((mask & char_class_upper) && (std::iswupper)(c)) - || ((mask & char_class_lower) && (std::iswlower)(c)) - || ((mask & char_class_alpha) && (std::iswalpha)(c)) - || ((mask & char_class_digit) && (std::iswdigit)(c)) - || ((mask & char_class_punct) && (std::iswpunct)(c)) - || ((mask & char_class_xdigit) && (std::iswxdigit)(c)) - || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) - || ((mask & char_class_word) && (c == '_')) - || ((mask & char_class_unicode) && (c & ~static_cast(0xff))) - || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v'))) - || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v')); -} - -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::lookup_collatename(const wchar_t* p1, const wchar_t* p2) -{ -#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ - && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ - && !BOOST_WORKAROUND(BOOST_BORLANDC, <= 0x0551) - std::string name(p1, p2); -#else - std::string name; - const wchar_t* p0 = p1; - while(p0 != p2) - name.append(1, char(*p0++)); -#endif - name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name); -#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ - && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ - && !BOOST_WORKAROUND(BOOST_BORLANDC, <= 0x0551) - if(name.size()) - return string_type(name.begin(), name.end()); -#else - if(name.size()) - { - string_type result; - typedef std::string::const_iterator iter; - iter b = name.begin(); - iter e = name.end(); - while(b != e) - result.append(1, wchar_t(*b++)); - return result; - } -#endif - if(p2 - p1 == 1) - return string_type(1, *p1); - return string_type(); -} - -int BOOST_REGEX_CALL c_regex_traits::value(wchar_t c, int radix) -{ -#ifdef BOOST_BORLANDC - // workaround for broken wcstol: - if((std::iswxdigit)(c) == 0) - return -1; -#endif - wchar_t b[2] = { c, '\0', }; - wchar_t* ep; - int result = std::wcstol(b, &ep, radix); - if(ep == b) - return -1; - return result; -} - -#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const unsigned short* p1, const unsigned short* p2) -{ - std::wstring result = c_regex_traits::transform((const wchar_t*)p1, (const wchar_t*)p2); - return string_type(result.begin(), result.end()); -} - -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform_primary(const unsigned short* p1, const unsigned short* p2) -{ - std::wstring result = c_regex_traits::transform_primary((const wchar_t*)p1, (const wchar_t*)p2); - return string_type(result.begin(), result.end()); -} - -c_regex_traits::char_class_type BOOST_REGEX_CALL c_regex_traits::lookup_classname(const unsigned short* p1, const unsigned short* p2) -{ - return c_regex_traits::lookup_classname((const wchar_t*)p1, (const wchar_t*)p2); -} - -c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::lookup_collatename(const unsigned short* p1, const unsigned short* p2) -{ - std::wstring result = c_regex_traits::lookup_collatename((const wchar_t*)p1, (const wchar_t*)p2); - return string_type(result.begin(), result.end()); -} - -bool BOOST_REGEX_CALL c_regex_traits::isctype(unsigned short c, char_class_type m) -{ - return c_regex_traits::isctype(c, m); -} - -int BOOST_REGEX_CALL c_regex_traits::value(unsigned short c, int radix) -{ - return c_regex_traits::value(c, radix); -} - -#endif - -} - -#endif // BOOST_NO_WREGEX - -#endif // BOOST_BORLANDC - diff --git a/src/wide_posix_api.cpp b/src/wide_posix_api.cpp index c675104b..abd68c59 100644 --- a/src/wide_posix_api.cpp +++ b/src/wide_posix_api.cpp @@ -25,9 +25,9 @@ #include #include -#include -#include #include +#include +#include #ifdef BOOST_INTEL #pragma warning(disable:981) @@ -284,8 +284,8 @@ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecW(const regex_tW* expression, cons std::size_t i; for(i = 0; (i < n) && (i < expression->re_nsub + 1); ++i) { - array[i].rm_so = (m[i].matched == false) ? -1 : (m[i].first - buf); - array[i].rm_eo = (m[i].matched == false) ? -1 : (m[i].second - buf); + array[i].rm_so = m[i].matched ? (m[i].first - buf) : -1; + array[i].rm_eo = m[i].matched ? (m[i].second - buf) : -1; } // and set anything else to -1: for(i = expression->re_nsub + 1; i < n; ++i) @@ -313,7 +313,3 @@ BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW* expression) } // namespace boost; #endif - - - - diff --git a/src/winstances.cpp b/src/winstances.cpp deleted file mode 100644 index beaf0ea5..00000000 --- a/src/winstances.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * - * Copyright (c) 1998-2002 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE: winstances.cpp - * VERSION: see - * DESCRIPTION: regex wide character template instances. - */ - -#define BOOST_REGEX_SOURCE - -#include - -#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_REGEX_NO_EXTERNAL_TEMPLATES) -#define BOOST_REGEX_WIDE_INSTANTIATE - -#ifdef BOOST_BORLANDC -#pragma hrdstop -#endif - -#include - -#endif - - - diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 49ed730b..9cef46b7 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -38,6 +38,7 @@ rule regex-test ( name : sources + : requirements * : input-files * ) R_SOURCE = basic_tests.cpp main.cpp +wmain.cpp test_alt.cpp test_anchors.cpp test_asserts.cpp @@ -61,30 +62,20 @@ test_operators.cpp ; lib boost_regex_recursive : - ../src/c_regex_traits.cpp - ../src/cpp_regex_traits.cpp - ../src/cregex.cpp - ../src/fileiter.cpp - ../src/icu.cpp - ../src/instances.cpp ../src/posix_api.cpp ../src/regex.cpp ../src/regex_debug.cpp - ../src/regex_raw_buffer.cpp - ../src/regex_traits_defaults.cpp ../src/static_mutex.cpp - ../src/w32_regex_traits.cpp - ../src/wc_regex_traits.cpp ../src/wide_posix_api.cpp - ../src/winstances.cpp - ../src/usinstances.cpp ../build//icu_options : BOOST_REGEX_RECURSIVE=1 + BOOST_REGEX_CXX03=1 shared:BOOST_REGEX_DYN_LINK=1 : ; + local regress-sources = regress/$(R_SOURCE) ; test-suite regex @@ -113,24 +104,29 @@ test-suite regex [ regex-test wide_posix_api_check_cpp : c_compiler_checks/wide_posix_api_check.cpp ] - [ run pathology/bad_expression_test.cpp - ../build//boost_regex + [ run pathology/bad_expression_test.cpp : : : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ] - [ run pathology/recursion_test.cpp - ../build//boost_regex + [ run pathology/recursion_test.cpp : : : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ] - [ run named_subexpressions/named_subexpressions_test.cpp - ../build//boost_regex + [ run named_subexpressions/named_subexpressions_test.cpp : : : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ] - [ run unicode/unicode_iterator_test.cpp ../build//boost_regex : : : release TEST_UTF8 : unicode_iterator_test_utf8 ] - [ run unicode/unicode_iterator_test.cpp ../build//boost_regex : : : release TEST_UTF16 : unicode_iterator_test_utf16 ] + [ run unicode/unicode_iterator_test.cpp : : : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] + release TEST_UTF8 : unicode_iterator_test_utf8 ] + [ run unicode/unicode_iterator_test.cpp : : : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] + release TEST_UTF16 : unicode_iterator_test_utf16 ] [ run static_mutex/static_mutex_test.cpp ../../thread/build//boost_thread ../build//boost_regex ] - [ run object_cache/object_cache_test.cpp ../build//boost_regex + [ run object_cache/object_cache_test.cpp : : : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] ] [ run config_info/regex_config_info.cpp @@ -150,31 +146,47 @@ test-suite regex : : : always_show_run_output : test_collate_info ] - [ link concepts/concept_check.cpp ../build//boost_regex ] - [ link concepts/icu_concept_check.cpp ../build//boost_regex ] - [ link concepts/range_concept_check.cpp ../build//boost_regex ] - [ run concepts/test_bug_11988.cpp ../build//boost_regex ] + [ link concepts/concept_check.cpp : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] + ] + [ link concepts/concept_check.cpp : + BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : no ] : standalone_concept_check + ] + [ link concepts/icu_concept_check.cpp : + BOOST_REGEX_STANDALONE [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] + ] + [ link concepts/icu_concept_check.cpp : + [ check-target-builds ../build//is_legacy_03 : : no ] : standalone_icu_concept_check + ] + [ link concepts/range_concept_check.cpp : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] + ] + [ run concepts/test_bug_11988.cpp : : : + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] + ] [ run # sources captures/captures_test.cpp - captures//boost_regex_extra ../build//icu_options + ../build//icu_options : # additional args : # test-files : # requirements multi BOOST_REGEX_MATCH_EXTRA=1 BOOST_REGEX_NO_LIB=1 + [ check-target-builds ../build//is_legacy_03 : : ../build//boost_regex ] : # test name captures_test ] -[ run regress/$(R_SOURCE) .//boost_regex_recursive + [ run regress/$(R_SOURCE) .//boost_regex_recursive ../build//icu_options : # command line : # input files : # requirements BOOST_REGEX_RECURSIVE=1 + BOOST_REGEX_CXX03=1 : regex_regress_recursive ] [ run regress/$(R_SOURCE) ./noeh_test//boost_regex_noeh @@ -201,3 +213,11 @@ compile test_warnings.cpp : msvc:all msvc:on gcc:all gcc:on clang:all clang:on ; + +compile test_warnings.cpp + : msvc:all msvc:on + gcc:all gcc:on + clang:all clang:on + BOOST_REGEX_STANDALONE + [ check-target-builds ../build//is_legacy_03 : : no ] + : test_warnings_standalone ; diff --git a/test/captures/Jamfile.v2 b/test/captures/Jamfile.v2 index f3ee8905..3b0ed7ed 100644 --- a/test/captures/Jamfile.v2 +++ b/test/captures/Jamfile.v2 @@ -8,23 +8,11 @@ project ; EX_SOURCES = - c_regex_traits.cpp - cpp_regex_traits.cpp - cregex.cpp - fileiter.cpp - icu.cpp - instances.cpp posix_api.cpp regex.cpp regex_debug.cpp - regex_raw_buffer.cpp - regex_traits_defaults.cpp static_mutex.cpp - w32_regex_traits.cpp - wc_regex_traits.cpp - wide_posix_api.cpp - winstances.cpp - usinstances.cpp ; + wide_posix_api.cpp ; lib boost_regex_extra : $(EX_SOURCES) ../../build//icu_options : diff --git a/test/cmake_install_test/CMakeLists.txt b/test/cmake_install_test/CMakeLists.txt new file mode 100644 index 00000000..f0fde175 --- /dev/null +++ b/test/cmake_install_test/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright 2018, 2019 Peter Dimov +# Distributed under the Boost Software License, Version 1.0. +# See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt + +cmake_minimum_required(VERSION 3.5...3.16) + +project(cmake_install_test LANGUAGES CXX) + +find_package(boost_regex REQUIRED) +find_package(boost_core REQUIRED) + +add_executable(quick ../quick.cpp) +target_link_libraries(quick Boost::regex Boost::core) + +enable_testing() +add_test(quick quick) + +add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C $) diff --git a/test/cmake_subdir_test/CMakeLists.txt b/test/cmake_subdir_test/CMakeLists.txt new file mode 100644 index 00000000..ddb7b024 --- /dev/null +++ b/test/cmake_subdir_test/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2018, 2019 Peter Dimov +# Distributed under the Boost Software License, Version 1.0. +# See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt + +cmake_minimum_required(VERSION 3.5...3.16) + +project(cmake_subdir_test LANGUAGES CXX) + +add_subdirectory(../.. boostorg/reegx) +add_subdirectory(../../../config boostorg/config) +add_subdirectory(../../../core boostorg/core) +add_subdirectory(../../../assert boostorg/assert) +add_subdirectory(../../../throw_exception boostorg/throw_exception) +add_subdirectory(../../../predef boostorg/predef) + +add_executable(quick ../quick.cpp) +target_link_libraries(quick Boost::regex Boost::core) + +enable_testing() +add_test(quick quick) + +add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C $) diff --git a/test/concepts/concept_check.cpp b/test/concepts/concept_check.cpp index ed06feff..5d2792a8 100644 --- a/test/concepts/concept_check.cpp +++ b/test/concepts/concept_check.cpp @@ -9,6 +9,7 @@ * */ +#include #include #if defined(BOOST_MSVC) @@ -22,18 +23,11 @@ #pragma warning(disable:1418 981 983 595 383) #endif -#include -#include -#if !BOOST_WORKAROUND(_MSC_VER, < 1310) && !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) && !BOOST_WORKAROUND(__GNUC__, < 3) #include -#endif int main() { - // VC6 and VC7 can't cope with the iterator architypes, - // don't bother testing as it doesn't work: -#if !BOOST_WORKAROUND(_MSC_VER, < 1310) && !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) && !BOOST_WORKAROUND(__GNUC__, < 3) boost::function_requires< boost::RegexTraitsConcept< boost::regex_traits @@ -53,7 +47,6 @@ int main() >(); #endif #endif -#if !BOOST_WORKAROUND(BOOST_BORLANDC, < 0x560) boost::function_requires< boost::BoostRegexConcept< boost::basic_regex > @@ -66,7 +59,6 @@ int main() > >(); #endif -#endif #if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) boost::function_requires< boost::BoostRegexConcept< @@ -90,15 +82,12 @@ int main() regex_traits_tester_type1 > >(); -#if !defined(__MWERKS__) && !defined(__SUNPRO_CC) // MWCW tries to instantiate std::basic_string, not sure whose bug this is.... typedef boost::basic_regex > regex_traits_tester_type2; boost::function_requires< boost::BaseRegexConcept< regex_traits_tester_type2 > >(); -#endif // __MWERKS__ -#endif return 0; } diff --git a/test/noeh_test/Jamfile.v2 b/test/noeh_test/Jamfile.v2 index dcaa84b9..49e57d59 100644 --- a/test/noeh_test/Jamfile.v2 +++ b/test/noeh_test/Jamfile.v2 @@ -24,23 +24,11 @@ project lib boost_regex_noeh : - ../../src/c_regex_traits.cpp - ../../src/cpp_regex_traits.cpp - ../../src/cregex.cpp - ../../src/fileiter.cpp - ../../src/icu.cpp - ../../src/instances.cpp ../../src/posix_api.cpp ../../src/regex.cpp ../../src/regex_debug.cpp - ../../src/regex_raw_buffer.cpp - ../../src/regex_traits_defaults.cpp ../../src/static_mutex.cpp - ../../src/w32_regex_traits.cpp - ../../src/wc_regex_traits.cpp ../../src/wide_posix_api.cpp - ../../src/winstances.cpp - ../../src/usinstances.cpp ../../build//icu_options : static diff --git a/test/object_cache/object_cache_test.cpp b/test/object_cache/object_cache_test.cpp index 50d883be..6575ac99 100644 --- a/test/object_cache/object_cache_test.cpp +++ b/test/object_cache/object_cache_test.cpp @@ -15,7 +15,14 @@ * VERSION see * DESCRIPTION: Test code for a generic object cache. */ -#include +#include +#ifdef BOOST_REGEX_CXX03 +#include +#define SP_NS boost +#else +#include +#define SP_NS std +#endif #include #include "../test_macros.hpp" @@ -49,7 +56,7 @@ int cpp_main(int /*argc*/, char * /*argv*/[]) int i; for(i = 0; i < 20; ++i) { - boost::shared_ptr p = boost::object_cache::get(i, max_cache_size); + SP_NS::shared_ptr p = boost::object_cache::get(i, max_cache_size); BOOST_CHECK(p->value() == i); p = boost::object_cache::get(i, max_cache_size); BOOST_CHECK(p->value() == i); @@ -64,7 +71,7 @@ int cpp_main(int /*argc*/, char * /*argv*/[]) { for(i = 20 - max_cache_size; i < 20; ++i) { - boost::shared_ptr p = boost::object_cache::get(i, max_cache_size); + SP_NS::shared_ptr p = boost::object_cache::get(i, max_cache_size); BOOST_CHECK(p->value() == i); p = boost::object_cache::get(i, max_cache_size); BOOST_CHECK(p->value() == i); diff --git a/test/regress/main.cpp b/test/regress/main.cpp index 87ad9ff4..8942f717 100644 --- a/test/regress/main.cpp +++ b/test/regress/main.cpp @@ -201,21 +201,6 @@ void test(const char& c, const test_invalid_regex_tag& tag) do_test(c, tag); } -#ifndef BOOST_NO_WREGEX -void test(const wchar_t& c, const test_regex_replace_tag& tag) -{ - do_test(c, tag); -} -void test(const wchar_t& c, const test_regex_search_tag& tag) -{ - do_test(c, tag); -} -void test(const wchar_t& c, const test_invalid_regex_tag& tag) -{ - do_test(c, tag); -} -#endif - #ifdef BOOST_NO_EXCEPTIONS namespace boost{ diff --git a/test/regress/test_deprecated.cpp b/test/regress/test_deprecated.cpp index 493c02b2..6a04a3ec 100644 --- a/test/regress/test_deprecated.cpp +++ b/test/regress/test_deprecated.cpp @@ -99,7 +99,7 @@ void test_deprecated(const char&, const test_regex_search_tag&) BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" did not compile with the POSIX C API.", char); return; } - // try and find the first occurance: + // try and find the first occurrence: static const unsigned max_subs = 100; boost::regmatch_t matches[max_subs]; if(boost::regexecA(&re, search_text.c_str(), max_subs, matches, posix_match_options) == 0) @@ -131,76 +131,11 @@ void test_deprecated(const char&, const test_regex_search_tag&) // clean up whatever: boost::regfreeA(&re); - // - // now try the RegEx class: - // - if(test_info::syntax_options() & ~boost::regex::icase) - return; -#ifndef BOOST_NO_EXCEPTIONS - try -#endif - { - boost::RegEx e(expression, (test_info::syntax_options() & boost::regex::icase) != 0); - if(e.error_code()) - { - BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done, error code = " << e.error_code(), char); - } - if(e.Search(search_text, test_info::match_options())) - { - int i = 0; - while(results[i*2] != -2) - { - if(e.Matched(i)) - { - if(results[2*i] != static_cast(e.Position(i))) - { - BOOST_REGEX_TEST_ERROR("Mismatch in start of subexpression " << i << " found with the RegEx class (found " << e.Position(i) << " expected " << results[2*i] << ").", char); - } - if(results[2*i+1] != static_cast(e.Position(i) + e.Length(i))) - { - BOOST_REGEX_TEST_ERROR("Mismatch in end of subexpression " << i << " found with the RegEx class (found " << e.Position(i) + e.Length(i) << " expected " << results[2*i+1] << ").", char); - } - } - else - { - if(results[2*i] >= 0) - { - BOOST_REGEX_TEST_ERROR("Mismatch in start of subexpression " << i << " found with the RegEx class (found " << e.Position(i) << " expected " << results[2*i] << ").", char); - } - if(results[2*i+1] >= 0) - { - BOOST_REGEX_TEST_ERROR("Mismatch in end of subexpression " << i << " found with the RegEx class (found " << e.Position(i) + e.Length(i) << " expected " << results[2*i+1] << ").", char); - } - } - ++i; - } - } - else - { - if(results[0] >= 0) - { - BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" was not found with class RegEx.", char); - } - } - } -#ifndef BOOST_NO_EXCEPTIONS - catch(const boost::bad_expression& r) - { - BOOST_REGEX_TEST_ERROR("Expression did not compile with RegEx class: " << r.what(), char); - } - catch(const std::runtime_error& r) - { - BOOST_REGEX_TEST_ERROR("Unexpected std::runtime_error : " << r.what(), char); - } - catch(const std::exception& r) - { - BOOST_REGEX_TEST_ERROR("Unexpected std::exception: " << r.what(), char); - } - catch(...) - { - BOOST_REGEX_TEST_ERROR("Unexpected exception of unknown type", char); - } -#endif +} + +std::string to_narrow_string(std::wstring const& w) +{ + return std::string(w.begin(), w.end()); } void test_deprecated(const wchar_t&, const test_regex_search_tag&) @@ -224,10 +159,10 @@ void test_deprecated(const wchar_t&, const test_regex_search_tag&) boost::regex_tW re; if(boost::regcompW(&re, expression.c_str(), posix_options) != 0) { - BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" did not compile with the POSIX C API.", wchar_t); + BOOST_REGEX_TEST_ERROR("Expression : \"" << to_narrow_string(expression.c_str()) << "\" did not compile with the POSIX C API.", wchar_t); return; } - // try and find the first occurance: + // try and find the first occurrence: static const unsigned max_subs = 100; boost::regmatch_t matches[max_subs]; if(boost::regexecW(&re, search_text.c_str(), max_subs, matches, posix_match_options) == 0) @@ -253,7 +188,7 @@ void test_deprecated(const wchar_t&, const test_regex_search_tag&) { if(results[0] >= 0) { - BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" was not found with the POSIX C API.", wchar_t); + BOOST_REGEX_TEST_ERROR("Expression : \"" << to_narrow_string(expression.c_str()) << "\" was not found with the POSIX C API.", wchar_t); } } // clean up whatever: @@ -300,46 +235,6 @@ void test_deprecated(const char&, const test_invalid_regex_tag&) } } } - // - // now try the RegEx class: - // - if(test_info::syntax_options() & ~boost::regex::icase) - return; - bool have_catch = false; -#ifndef BOOST_NO_EXCEPTIONS - try -#endif - { - boost::RegEx e(expression, (test_info::syntax_options() & boost::regex::icase) != 0); - if(e.error_code()) - have_catch = true; - } -#ifndef BOOST_NO_EXCEPTIONS - catch(const boost::bad_expression&) - { - have_catch = true; - } - catch(const std::runtime_error& r) - { - have_catch = true; - BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::runtime_error instead: " << r.what(), char); - } - catch(const std::exception& r) - { - have_catch = true; - BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::exception instead: " << r.what(), char); - } - catch(...) - { - have_catch = true; - BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but got an exception of unknown type instead", char); - } -#endif - if(!have_catch) - { - // oops expected exception was not thrown: - BOOST_REGEX_TEST_ERROR("Expected an exception, but didn't find one.", char); - } } void test_deprecated(const wchar_t&, const test_invalid_regex_tag&) @@ -358,7 +253,7 @@ void test_deprecated(const wchar_t&, const test_invalid_regex_tag&) if(code == 0) { boost::regfreeW(&re); - BOOST_REGEX_TEST_ERROR("Expression : \"" << expression.c_str() << "\" unexpectedly compiled with the POSIX C API.", wchar_t); + BOOST_REGEX_TEST_ERROR("Expression : \"" << to_narrow_string(expression.c_str()) << "\" unexpectedly compiled with the POSIX C API.", wchar_t); } else { diff --git a/test/regress/test_icu.cpp b/test/regress/test_icu.cpp index 1ef9fa96..39eda8a2 100644 --- a/test/regress/test_icu.cpp +++ b/test/regress/test_icu.cpp @@ -23,6 +23,7 @@ #if defined(BOOST_HAS_ICU) && !defined(BOOST_NO_STD_WSTRING) #include +#include #include "test.hpp" namespace unnecessary_fix{ @@ -86,7 +87,7 @@ void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<2> const*) { BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); } - if((w1.position(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2[i].first), iterator_type(w2[i].second)))) + if((w1.position(i) != std::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != std::distance(iterator_type(w2[i].first), iterator_type(w2[i].second)))) { BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); } @@ -106,7 +107,7 @@ void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<2> const*) { BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); } - if ((w1.position("abc") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2["abc"].first))) || (w1.length("abc") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2["abc"].first), iterator_type(w2["abc"].second)))) + if ((w1.position("abc") != std::distance(iterator_type(w2.prefix().first), iterator_type(w2["abc"].first))) || (w1.length("abc") != std::distance(iterator_type(w2["abc"].first), iterator_type(w2["abc"].second)))) { BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); } @@ -121,7 +122,7 @@ void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<2> const*) { BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); } - if ((w1.position("N") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2["N"].first))) || (w1.length("N") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2["N"].first), iterator_type(w2["N"].second)))) + if ((w1.position("N") != std::distance(iterator_type(w2.prefix().first), iterator_type(w2["N"].first))) || (w1.length("N") != std::distance(iterator_type(w2["N"].first), iterator_type(w2["N"].second)))) { BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); } @@ -150,7 +151,7 @@ void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<1> const*) { BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); } - if((w1.position(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2[i].first), iterator_type(w2[i].second)))) + if((w1.position(i) != std::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != std::distance(iterator_type(w2[i].first), iterator_type(w2[i].second)))) { BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); } diff --git a/test/regress/test_mfc.cpp b/test/regress/test_mfc.cpp index 0d855cef..1dad08cf 100644 --- a/test/regress/test_mfc.cpp +++ b/test/regress/test_mfc.cpp @@ -170,21 +170,21 @@ void test_mfc(const char&, const test_regex_search_tag&) #pragma warning(push) #pragma warning(disable:4244) #endif - if(boost::BOOST_REGEX_DETAIL_NS::distance(s.GetString(), tstart2->first) != last_end2) + if(std::distance(s.GetString(), tstart2->first) != last_end2) { BOOST_REGEX_TEST_ERROR( "Error in location of start of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(s.GetString(), tstart2->first) + << std::distance(s.GetString(), tstart2->first) << ", expected: " << last_end2 << ".", char); } int expected_end = static_cast(answer_table[0] < 0 ? s.GetLength() : answer_table[0]); - if(boost::BOOST_REGEX_DETAIL_NS::distance(s.GetString(), tstart2->second) != expected_end) + if(std::distance(s.GetString(), tstart2->second) != expected_end) { BOOST_REGEX_TEST_ERROR( "Error in location of end2 of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(s.GetString(), tstart2->second) + << std::distance(s.GetString(), tstart2->second) << ", expected: " << expected_end << ".", char); @@ -348,21 +348,21 @@ void test_mfc(const wchar_t&, const test_regex_search_tag&) #pragma warning(push) #pragma warning(disable:4244) #endif - if(boost::BOOST_REGEX_DETAIL_NS::distance(s.GetString(), tstart2->first) != last_end2) + if(std::distance(s.GetString(), tstart2->first) != last_end2) { BOOST_REGEX_TEST_ERROR( "Error in location of start of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(s.GetString(), tstart2->first) + << std::distance(s.GetString(), tstart2->first) << ", expected: " << last_end2 << ".", wchar_t); } int expected_end = static_cast(answer_table[0] < 0 ? s.GetLength() : answer_table[0]); - if(boost::BOOST_REGEX_DETAIL_NS::distance(s.GetString(), tstart2->second) != expected_end) + if(std::distance(s.GetString(), tstart2->second) != expected_end) { BOOST_REGEX_TEST_ERROR( "Error in location of end2 of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(s.GetString(), tstart2->second) + << std::distance(s.GetString(), tstart2->second) << ", expected: " << expected_end << ".", wchar_t); diff --git a/test/regress/test_partial_match.hpp b/test/regress/test_partial_match.hpp index 19e45866..dddf9e23 100644 --- a/test/regress/test_partial_match.hpp +++ b/test/regress/test_partial_match.hpp @@ -47,18 +47,18 @@ void test_sub_match(const boost::sub_match& sub, Bidirect } else { - if(boost::BOOST_REGEX_DETAIL_NS::distance(base, sub.first) != answer_table[2*i]) + if(std::distance(base, sub.first) != answer_table[2*i]) { BOOST_REGEX_TEST_ERROR( "Error in start location of sub-expression " - << i << ", found " << boost::BOOST_REGEX_DETAIL_NS::distance(base, sub.first) + << i << ", found " << std::distance(base, sub.first) << ", expected " << answer_table[2*i] << ".", charT); } - if(boost::BOOST_REGEX_DETAIL_NS::distance(base, sub.second) != answer_table[1+ 2*i]) + if(std::distance(base, sub.second) != answer_table[1+ 2*i]) { BOOST_REGEX_TEST_ERROR( "Error in end location of sub-expression " - << i << ", found " << boost::BOOST_REGEX_DETAIL_NS::distance(base, sub.second) + << i << ", found " << std::distance(base, sub.second) << ", expected " << answer_table[1 + 2*i] << ".", charT); } } @@ -238,21 +238,21 @@ void test_regex_token_iterator(boost::basic_regex& r) #pragma warning(push) #pragma warning(disable:4244) #endif - if(boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->first) != last_end2) + if(std::distance(search_text.begin(), start2->first) != last_end2) { BOOST_REGEX_TEST_ERROR( "Error in location of start of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->first) + << std::distance(search_text.begin(), start2->first) << ", expected: " << last_end2 << ".", charT); } int expected_end = static_cast(answer_table[0] < 0 ? search_text.size() : answer_table[0]); - if(boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->second) != expected_end) + if(std::distance(search_text.begin(), start2->second) != expected_end) { BOOST_REGEX_TEST_ERROR( "Error in location of end2 of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->second) + << std::distance(search_text.begin(), start2->second) << ", expected: " << expected_end << ".", charT); diff --git a/test/regress/test_regex_search.hpp b/test/regress/test_regex_search.hpp index 16b1bf91..f0124727 100644 --- a/test/regress/test_regex_search.hpp +++ b/test/regress/test_regex_search.hpp @@ -55,18 +55,18 @@ void test_sub_match(const boost::sub_match& sub, Bidirect } else { - if(boost::BOOST_REGEX_DETAIL_NS::distance(base, sub.first) != answer_table[2*i]) + if(std::distance(base, sub.first) != answer_table[2*i]) { BOOST_REGEX_TEST_ERROR( "Error in start location of sub-expression " - << i << ", found " << boost::BOOST_REGEX_DETAIL_NS::distance(base, sub.first) + << i << ", found " << std::distance(base, sub.first) << ", expected " << answer_table[2*i] << ".", charT); } - if(boost::BOOST_REGEX_DETAIL_NS::distance(base, sub.second) != answer_table[1+ 2*i]) + if(std::distance(base, sub.second) != answer_table[1+ 2*i]) { BOOST_REGEX_TEST_ERROR( "Error in end location of sub-expression " - << i << ", found " << boost::BOOST_REGEX_DETAIL_NS::distance(base, sub.second) + << i << ", found " << std::distance(base, sub.second) << ", expected " << answer_table[1 + 2*i] << ".", charT); } } @@ -291,21 +291,21 @@ void test_regex_token_iterator(boost::basic_regex& r) #pragma warning(push) #pragma warning(disable:4244) #endif - if(boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->first) != last_end2) + if(std::distance(search_text.begin(), start2->first) != last_end2) { BOOST_REGEX_TEST_ERROR( "Error in location of start of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->first) + << std::distance(search_text.begin(), start2->first) << ", expected: " << last_end2 << ".", charT); } int expected_end = static_cast(answer_table[0] < 0 ? search_text.size() : answer_table[0]); - if(boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->second) != expected_end) + if(std::distance(search_text.begin(), start2->second) != expected_end) { BOOST_REGEX_TEST_ERROR( "Error in location of end2 of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->second) + << std::distance(search_text.begin(), start2->second) << ", expected: " << expected_end << ".", charT); @@ -350,21 +350,21 @@ void test_regex_token_iterator(boost::basic_regex& r) #pragma warning(push) #pragma warning(disable:4244) #endif - if(boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->first) != last_end2) + if(std::distance(search_text.begin(), start2->first) != last_end2) { BOOST_REGEX_TEST_ERROR( "Error in location of start of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->first) + << std::distance(search_text.begin(), start2->first) << ", expected: " << last_end2 << ".", charT); } int expected_end = static_cast(answer_table[0] < 0 ? search_text.size() : answer_table[0]); - if(boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->second) != expected_end) + if(std::distance(search_text.begin(), start2->second) != expected_end) { BOOST_REGEX_TEST_ERROR( "Error in location of end2 of field split, found: " - << boost::BOOST_REGEX_DETAIL_NS::distance(search_text.begin(), start2->second) + << std::distance(search_text.begin(), start2->second) << ", expected: " << expected_end << ".", charT); diff --git a/test/regress/test_replace.cpp b/test/regress/test_replace.cpp index 635e6e30..cd434e18 100644 --- a/test/regress/test_replace.cpp +++ b/test/regress/test_replace.cpp @@ -98,7 +98,7 @@ void test_replace() TEST_REGEX_REPLACE("(?a+)|(?b+)", perl, "...aaabb,,,ab*abbb?", match_default|format_all, "(?{one}A:B)C", "...ACBC,,,ACBC*ACBC?"); TEST_REGEX_REPLACE("(?a+)|(?b+)", perl, "...aaabb,,,ab*abbb?", match_default|format_all, "?{one}:B", "...B,,,B*B?"); - // move to copying unmatched data, but replace first occurance only: + // move to copying unmatched data, but replace first occurrence only: TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_all|format_first_only, "bbb", "...bbb,,,"); TEST_REGEX_REPLACE("a+(b+)", perl, "...aaabb,,,", match_default|format_all|format_first_only, "$1", "...bb,,,"); TEST_REGEX_REPLACE("a+(b+)", perl, "...aaabb,,,ab*abbb?", match_default|format_all|format_first_only, "$1", "...bb,,,ab*abbb?"); diff --git a/test/regress/wmain.cpp b/test/regress/wmain.cpp new file mode 100644 index 00000000..a8019ea7 --- /dev/null +++ b/test/regress/wmain.cpp @@ -0,0 +1,54 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE main.cpp + * VERSION see + * DESCRIPTION: entry point for test program. + */ + +#include "test.hpp" +#include "test_locale.hpp" +#include +#include +#include + +#ifdef BOOST_HAS_ICU +#include +#endif + +#ifdef TEST_THREADS +#include +#include +#include +#include +#include + +int* get_array_data(); + +#endif + +#ifndef BOOST_NO_WREGEX +void test(const wchar_t& c, const test_regex_replace_tag& tag) +{ + do_test(c, tag); +} +void test(const wchar_t& c, const test_regex_search_tag& tag) +{ + do_test(c, tag); +} +void test(const wchar_t& c, const test_invalid_regex_tag& tag) +{ + do_test(c, tag); +} +#endif + diff --git a/test/static_mutex/static_mutex_test.cpp b/test/static_mutex/static_mutex_test.cpp index f029a4be..be7500bd 100644 --- a/test/static_mutex/static_mutex_test.cpp +++ b/test/static_mutex/static_mutex_test.cpp @@ -22,6 +22,7 @@ #include #include +#ifdef BOOST_REGEX_CXX03 // // we cannot use the regular Boost.Test in here: it is not thread safe // and calls to BOOST_CHECK will eventually crash on some compilers @@ -205,3 +206,6 @@ int main() return total_failures; } +#else +int main() {} +#endif diff --git a/test/test_consolidated.cpp b/test/test_consolidated.cpp index 188033f7..d5415f2b 100644 --- a/test/test_consolidated.cpp +++ b/test/test_consolidated.cpp @@ -10,20 +10,8 @@ */ -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include #include -#include -#include -#include #include -#include diff --git a/test/unicode/unicode_iterator_test.cpp b/test/unicode/unicode_iterator_test.cpp index 2e6bcec1..6ead714d 100644 --- a/test/unicode/unicode_iterator_test.cpp +++ b/test/unicode/unicode_iterator_test.cpp @@ -16,6 +16,7 @@ * DESCRIPTION: Simple test suite for Unicode interconversions. */ +#include #include #include #include "../test_macros.hpp" @@ -317,6 +318,6 @@ int cpp_main( int, char* [] ) for(unsigned i = 0xDFFF + 1; i < 0x10FFFF; ++i) v.push_back(i); test(v); - return 0; + return boost::report_errors(); }