diff --git a/.drone.jsonnet b/.drone.jsonnet
index 51340c4c..7e3e7115 100644
--- a/.drone.jsonnet
+++ b/.drone.jsonnet
@@ -11,6 +11,7 @@ local triggers =
 
 local ubsan = { UBSAN: '1', UBSAN_OPTIONS: 'print_stacktrace=1' };
 local asan = { ASAN: '1' };
+local tsan = { TSAN: '1' };
 
 local linux_pipeline(name, image, environment, packages = "", sources = [], arch = "amd64") =
 {
@@ -157,16 +158,29 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
     ),
 
     linux_pipeline(
-        "Linux 18.04 GCC 8 32/64",
+        "Linux 18.04 GCC 8 32/64 (03,11)",
         "cppalliance/droneubuntu1804:1",
-        { TOOLSET: 'gcc', COMPILER: 'g++-8', CXXSTD: '03,11,14,17', ADDRMD: '32,64' },
+        { TOOLSET: 'gcc', COMPILER: 'g++-8', CXXSTD: '03,11', ADDRMD: '32,64' },
         "g++-8-multilib",
     ),
 
     linux_pipeline(
-        "Linux 20.04 GCC 9* 32/64",
+        "Linux 18.04 GCC 8 32/64 (14,17)",
+        "cppalliance/droneubuntu1804:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-8', CXXSTD: '14,17', ADDRMD: '32,64' },
+        "g++-8-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 20.04 GCC 9* 32/64 (03,11,14)",
         "cppalliance/droneubuntu2004:1",
-        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '03,11,14,17,2a', ADDRMD: '32,64' },
+        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '03,11,14', ADDRMD: '32,64' },
+    ),
+
+    linux_pipeline(
+        "Linux 20.04 GCC 9* 32/64 (17,2a)",
+        "cppalliance/droneubuntu2004:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '17,2a', ADDRMD: '32,64' },
     ),
 
     linux_pipeline(
@@ -177,36 +191,77 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
     ),
 
     linux_pipeline(
-        "Linux 20.04 GCC 9* S390x",
+        "Linux 20.04 GCC 9* S390x (03,11,14)",
         "cppalliance/droneubuntu2004:multiarch",
-        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '03,11,14,17,2a' },
+        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '03,11,14' },
         arch="s390x",
     ),
 
     linux_pipeline(
-        "Linux 20.04 GCC 10 32/64",
+        "Linux 20.04 GCC 9* S390x (17,2a)",
+        "cppalliance/droneubuntu2004:multiarch",
+        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '17,2a' },
+        arch="s390x",
+    ),
+
+    linux_pipeline(
+        "Linux 20.04 GCC 10 32/64 (03,11,14)",
         "cppalliance/droneubuntu2004:1",
-        { TOOLSET: 'gcc', COMPILER: 'g++-10', CXXSTD: '03,11,14,17,20', ADDRMD: '32,64' },
+        { TOOLSET: 'gcc', COMPILER: 'g++-10', CXXSTD: '03,11,14', ADDRMD: '32,64' },
         "g++-10-multilib",
     ),
 
     linux_pipeline(
-        "Linux 22.04 GCC 11* 32/64",
+        "Linux 20.04 GCC 10 32/64 (17,20)",
+        "cppalliance/droneubuntu2004:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-10', CXXSTD: '17,20', ADDRMD: '32,64' },
+        "g++-10-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 GCC 11* 32/64 (03,11,14)",
         "cppalliance/droneubuntu2204:1",
-        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '03,11,14,17,2a', ADDRMD: '32,64' },
+        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '03,11,14', ADDRMD: '32,64' },
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 GCC 11* 32/64 (17,2a)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++', CXXSTD: '17,2a', ADDRMD: '32,64' },
     ),
 
     linux_pipeline(
         "Linux 22.04 GCC 12 32 ASAN (03,11,14)",
         "cppalliance/droneubuntu2204:1",
-        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '03,11,14', ADDRMD: '32' } + asan,
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '03,11', ADDRMD: '32' } + asan,
         "g++-12-multilib",
     ),
 
     linux_pipeline(
-        "Linux 22.04 GCC 12 32 ASAN (17,20,2b)",
+        "Linux 22.04 GCC 12 32 ASAN (14)",
         "cppalliance/droneubuntu2204:1",
-        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '17,20,2b', ADDRMD: '32' } + asan,
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '14', ADDRMD: '32' } + asan,
+        "g++-12-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 GCC 12 32 ASAN (17)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '17', ADDRMD: '32' } + asan,
+        "g++-12-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 GCC 12 32 ASAN (20)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '20', ADDRMD: '32' } + asan,
+        "g++-12-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 GCC 12 32 ASAN (2b)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '2b', ADDRMD: '32' } + asan,
         "g++-12-multilib",
     ),
 
@@ -218,12 +273,47 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
     ),
 
     linux_pipeline(
-        "Linux 22.04 GCC 12 64 ASAN (17,20,2b)",
+        "Linux 22.04 GCC 12 64 ASAN (17)",
         "cppalliance/droneubuntu2204:1",
-        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '17,20,2b', ADDRMD: '64' } + asan,
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '17', ADDRMD: '64' } + asan,
         "g++-12-multilib",
     ),
 
+    linux_pipeline(
+        "Linux 22.04 GCC 12 64 ASAN (20)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '20', ADDRMD: '64' } + asan,
+        "g++-12-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 GCC 12 64 ASAN (2b)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '2b', ADDRMD: '64' } + asan,
+        "g++-12-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 GCC 12 64 TSAN (11,14,17,20,2b)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-12', CXXSTD: '11,14,17,20,2b', ADDRMD: '64', TARGET: 'libs/unordered/test//cfoa_tests' } + tsan,
+        "g++-12-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 23.04 GCC 13 32/64 (03,11,14)",
+        "cppalliance/droneubuntu2304:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-13', CXXSTD: '03,11,14', ADDRMD: '32,64' },
+        "g++-13 g++-13-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 23.04 GCC 13 32/64 (17,20,2b)",
+        "cppalliance/droneubuntu2304:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-13', CXXSTD: '17,20,2b', ADDRMD: '32,64' },
+        "g++-13 g++-13-multilib",
+    ),
+
     linux_pipeline(
         "Linux 16.04 Clang 3.5",
         "cppalliance/droneubuntu1604:1",
@@ -330,19 +420,40 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
     ),
 
     linux_pipeline(
-        "Linux 22.04 Clang 14 UBSAN",
+        "Linux 22.04 Clang 14 UBSAN (03,11,14)",
         "cppalliance/droneubuntu2204:1",
-        { TOOLSET: 'clang', COMPILER: 'clang++-14', CXXSTD: '03,11,14,17,20' } + ubsan,
+        { TOOLSET: 'clang', COMPILER: 'clang++-14', CXXSTD: '03,11,14' } + ubsan,
         "clang-14",
     ),
 
     linux_pipeline(
-        "Linux 22.04 Clang 14 ASAN",
+        "Linux 22.04 Clang 14 UBSAN (17,20)",
         "cppalliance/droneubuntu2204:1",
-        { TOOLSET: 'clang', COMPILER: 'clang++-14', CXXSTD: '03,11,14,17,20' } + asan,
+        { TOOLSET: 'clang', COMPILER: 'clang++-14', CXXSTD: '17,20' } + ubsan,
         "clang-14",
     ),
 
+    linux_pipeline(
+        "Linux 22.04 Clang 14 ASAN (03,11,14)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'clang', COMPILER: 'clang++-14', CXXSTD: '03,11,14' } + asan,
+        "clang-14",
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 Clang 14 ASAN (17,20)",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'clang', COMPILER: 'clang++-14', CXXSTD: '17,20' } + asan,
+        "clang-14",
+    ),
+
+    linux_pipeline(
+        "Linux 22.04 Clang 14 libc++ 64 TSAN",
+        "cppalliance/droneubuntu2204:1",
+        { TOOLSET: 'clang', COMPILER: 'clang++-14', ADDRMD: '64', TARGET: 'libs/unordered/test//cfoa_tests', CXXSTD: '11,14,17,20', STDLIB: 'libc++' } + tsan,
+        "clang-14 libc++-14-dev libc++abi-14-dev",
+    ),
+
     linux_pipeline(
         "Linux 22.04 Clang 15",
         "cppalliance/droneubuntu2204:1",
@@ -352,8 +463,18 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
     ),
 
     macos_pipeline(
-        "MacOS 10.15 Xcode 12.2 UBSAN",
-        { TOOLSET: 'clang', COMPILER: 'clang++', CXXSTD: '03,11,14,1z' } + ubsan,
+        "MacOS 10.15 Xcode 12.2 UBSAN (03,11)",
+        { TOOLSET: 'clang', COMPILER: 'clang++', CXXSTD: '03,11' } + ubsan,
+    ),
+
+    macos_pipeline(
+        "MacOS 10.15 Xcode 12.2 UBSAN (14)",
+        { TOOLSET: 'clang', COMPILER: 'clang++', CXXSTD: '14' } + ubsan,
+    ),
+
+    macos_pipeline(
+        "MacOS 10.15 Xcode 12.2 UBSAN (1z)",
+        { TOOLSET: 'clang', COMPILER: 'clang++', CXXSTD: '1z' } + ubsan,
     ),
 
     macos_pipeline(
@@ -362,6 +483,12 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
         xcode_version = "13.4.1", osx_version = "monterey", arch = "arm64",
     ),
 
+    macos_pipeline(
+        "MacOS 12.4 Xcode 13.4.1 TSAN",
+        { TOOLSET: 'clang', COMPILER: 'clang++', CXXSTD: '11,14,1z', TARGET: 'libs/unordered/test//cfoa_tests' } + tsan,
+        xcode_version = "13.4.1", osx_version = "monterey", arch = "arm64",
+    ),
+
     windows_pipeline(
         "Windows VS2015 msvc-14.0",
         "cppalliance/dronevs2015",
diff --git a/.drone/drone.sh b/.drone/drone.sh
index 2f2125df..60f06760 100755
--- a/.drone/drone.sh
+++ b/.drone/drone.sh
@@ -7,6 +7,8 @@
 set -ex
 export PATH=~/.local/bin:/usr/local/bin:$PATH
 
+: ${TARGET:="libs/$LIBRARY/test"}
+
 DRONE_BUILD_DIR=$(pwd)
 
 BOOST_BRANCH=develop
@@ -22,4 +24,4 @@ python tools/boostdep/depinst/depinst.py $LIBRARY
 ./b2 -d0 headers
 
 echo "using $TOOLSET : : $COMPILER ;" > ~/user-config.jam
-./b2 -j3 libs/$LIBRARY/test toolset=$TOOLSET cxxstd=$CXXSTD variant=debug,release ${ADDRMD:+address-model=$ADDRMD} ${UBSAN:+undefined-sanitizer=norecover debug-symbols=on} ${ASAN:+address-sanitizer=norecover debug-symbols=on} ${LINKFLAGS:+linkflags=$LINKFLAGS}
+./b2 -j3 $TARGET toolset=$TOOLSET cxxstd=$CXXSTD variant=debug,release ${ADDRMD:+address-model=$ADDRMD} ${STDLIB:+stdlib=$STDLIB} ${UBSAN:+undefined-sanitizer=norecover debug-symbols=on} ${ASAN:+address-sanitizer=norecover debug-symbols=on} ${TSAN:+thread-sanitizer=norecover debug-symbols=on} ${LINKFLAGS:+linkflags=$LINKFLAGS}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index da3f5b2c..74d14def 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,7 +52,10 @@ jobs:
           - { name: "gcc-12 w/ sanitizers (17,20,2b)", sanitize: yes,
               compiler: gcc-12,    cxxstd: '17,20,2b',       os: ubuntu-22.04, ccache_key: "san2" }
           - { name: Collect coverage, coverage: yes,
-              compiler: gcc-8,     cxxstd: '03,11',          os: ubuntu-20.04, install: 'g++-8-multilib', address-model: '32,64', ccache_key: "cov" }
+              compiler: gcc-12,     cxxstd: '03,20',          os: ubuntu-22.04, install: 'g++-12-multilib', address-model: '32,64', ccache_key: "cov" }
+
+          - { name: "cfoa tsan (gcc)", cxxstd: '11,14,17,20,2b', os: ubuntu-22.04, compiler: gcc-12,
+              targets: 'libs/unordered/test//cfoa_tests', thread-sanitize: yes }
 
           # Linux, clang, libc++
           - { compiler: clang-7,   cxxstd: '03,11,14,17',       os: ubuntu-20.04, stdlib: libc++, install: 'clang-7 libc++-7-dev libc++abi-7-dev' }
@@ -65,15 +68,21 @@ jobs:
               compiler: clang-12,  cxxstd: '17,20,2b',          os: ubuntu-20.04, stdlib: libc++, install: 'clang-12 libc++-12-dev libc++abi-12-dev', ccache_key: "san2" }
           - { compiler: clang-13,  cxxstd: '03,11,14,17,20,2b', os: ubuntu-22.04, stdlib: libc++, install: 'clang-13 libc++-13-dev libc++abi-13-dev' }
           - { compiler: clang-14,  cxxstd: '03,11,14,17,20,2b', os: ubuntu-22.04, stdlib: libc++, install: 'clang-14 libc++-14-dev libc++abi-14-dev' }
+
           # not using libc++ because of https://github.com/llvm/llvm-project/issues/52771
           - { name: "clang-14 w/ sanitizers (03,11,14)", sanitize: yes,
               compiler: clang-14,  cxxstd: '03,11,14',          os: ubuntu-22.04, ccache_key: "san1" }
           - { name: "clang-14 w/ sanitizers (17,20,2b)", sanitize: yes,
               compiler: clang-14,  cxxstd: '17,20,2b',          os: ubuntu-22.04, ccache_key: "san2" }
 
+          - { name: "cfoa tsan (clang)", cxxstd: '11,14,17,20,2b', os: ubuntu-22.04, compiler: clang-14,
+              targets: 'libs/unordered/test//cfoa_tests', thread-sanitize: yes, 
+              stdlib: libc++, install: 'clang-14 libc++-14-dev libc++abi-14-dev' }
+
           # OSX, clang
           - { compiler: clang,     cxxstd: '03,11,14,17,2a',    os: macos-11, }
           - { compiler: clang,     cxxstd: '03,11,14,17,2a',    os: macos-12, sanitize: yes }
+          - { compiler: clang,     cxxstd: '11,14,17,2a',       os: macos-12, thread-sanitize: yes, targets: 'libs/unordered/test//cfoa_tests' }
 
     timeout-minutes: 180
     runs-on: ${{matrix.os}}
@@ -184,6 +193,8 @@ jobs:
           B2_COMPILER: ${{matrix.compiler}}
           B2_CXXSTD: ${{matrix.cxxstd}}
           B2_SANITIZE: ${{matrix.sanitize}}
+          B2_TSAN: ${{matrix.thread-sanitize}}
+          B2_TARGETS: ${{matrix.targets}}
           B2_STDLIB: ${{matrix.stdlib}}
           # More entries can be added in the same way, see the B2_ARGS assignment in ci/enforce.sh for the possible keys.
           # B2_DEFINES: ${{matrix.defines}}
@@ -197,7 +208,7 @@ jobs:
 
       - name: Run tests
         if: '!matrix.coverity'
-        run: ci/build.sh
+        run: B2_TARGETS=${{matrix.targets}} ci/build.sh
 
       - name: Upload coverage
         if: matrix.coverage
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f1477e65..e7a1d024 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,7 @@ target_link_libraries(boost_unordered
     Boost::mp11
     Boost::predef
     Boost::preprocessor
+    Boost::static_assert
     Boost::throw_exception
     Boost::tuple
     Boost::type_traits
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.01.png
new file mode 100644
index 00000000..6f72c31e
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.5.png
new file mode 100644
index 00000000..d362439d
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.99.png
new file mode 100644
index 00000000..f6caa877
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.500k, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.01.png
new file mode 100644
index 00000000..3f73b219
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.5.png
new file mode 100644
index 00000000..70ed4dcc
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.99.png
new file mode 100644
index 00000000..c1f01abf
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-arm64/Parallel workload.xlsx.5M, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.01.png
new file mode 100644
index 00000000..ce92f83d
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.5.png
new file mode 100644
index 00000000..1a27f473
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.99.png
new file mode 100644
index 00000000..02da7cac
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.500k, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.01.png
new file mode 100644
index 00000000..201fa6c2
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.5.png
new file mode 100644
index 00000000..d56b844f
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.99.png
new file mode 100644
index 00000000..807bef8a
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x64/Parallel workload.xlsx.5M, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.01.png
new file mode 100644
index 00000000..da169448
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.5.png
new file mode 100644
index 00000000..ed90bc24
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.99.png
new file mode 100644
index 00000000..7a631ac4
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.500k, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.01.png
new file mode 100644
index 00000000..b5b32507
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.5.png
new file mode 100644
index 00000000..7db7c300
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.99.png
new file mode 100644
index 00000000..35a57e29
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/clang-x86/Parallel workload.xlsx.5M, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.01.png
new file mode 100644
index 00000000..3a135c46
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.5.png
new file mode 100644
index 00000000..6e553843
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.99.png
new file mode 100644
index 00000000..50c8dc99
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.500k, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.01.png
new file mode 100644
index 00000000..8b26b7a8
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.5.png
new file mode 100644
index 00000000..a1a3b95f
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.99.png
new file mode 100644
index 00000000..f54bfe4c
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x64/Parallel workload.xlsx.5M, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.01.png
new file mode 100644
index 00000000..7c88bc73
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.5.png
new file mode 100644
index 00000000..f981750f
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.99.png
new file mode 100644
index 00000000..5b02569e
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.500k, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.01.png
new file mode 100644
index 00000000..d3c89222
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.5.png
new file mode 100644
index 00000000..7c9ada47
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.99.png
new file mode 100644
index 00000000..f6e79ad8
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/gcc-x86/Parallel workload.xlsx.5M, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.01.png
new file mode 100644
index 00000000..61190a07
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.5.png
new file mode 100644
index 00000000..ec157ee7
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.99.png
new file mode 100644
index 00000000..9404453f
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.500k, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.01.png
new file mode 100644
index 00000000..80552317
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.5.png
new file mode 100644
index 00000000..1e221405
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.99.png
new file mode 100644
index 00000000..498bed6c
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x64/Parallel workload.xlsx.5M, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.01.png
new file mode 100644
index 00000000..c0842d76
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.5.png
new file mode 100644
index 00000000..cf11d505
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.99.png
new file mode 100644
index 00000000..55f9c8fc
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.500k, 0.99.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.01.png b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.01.png
new file mode 100644
index 00000000..3f297b5c
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.01.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.5.png b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.5.png
new file mode 100644
index 00000000..08ba90a5
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.5.png differ
diff --git a/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.99.png b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.99.png
new file mode 100644
index 00000000..25b9d9d9
Binary files /dev/null and b/doc/diagrams/benchmarks-concurrent_map/vs-x86/Parallel workload.xlsx.5M, 0.99.png differ
diff --git a/doc/diagrams/cfoa.png b/doc/diagrams/cfoa.png
new file mode 100644
index 00000000..a72e3d53
Binary files /dev/null and b/doc/diagrams/cfoa.png differ
diff --git a/doc/unordered.adoc b/doc/unordered.adoc
index 07e09dbb..5da1b442 100644
--- a/doc/unordered.adoc
+++ b/doc/unordered.adoc
@@ -13,8 +13,10 @@
 include::unordered/intro.adoc[]
 include::unordered/buckets.adoc[]
 include::unordered/hash_equality.adoc[]
-include::unordered/comparison.adoc[]
+include::unordered/regular.adoc[]
+include::unordered/concurrent.adoc[]
 include::unordered/compliance.adoc[]
+include::unordered/structures.adoc[]
 include::unordered/benchmarks.adoc[]
 include::unordered/rationale.adoc[]
 include::unordered/ref.adoc[]
diff --git a/doc/unordered/benchmarks.adoc b/doc/unordered/benchmarks.adoc
index 91f1d06d..853ad44a 100644
--- a/doc/unordered/benchmarks.adoc
+++ b/doc/unordered/benchmarks.adoc
@@ -431,3 +431,263 @@ h|unsuccessful lookup
 
 |===
 
+== boost::concurrent_flat_map
+
+All benchmarks were created using:
+
+* `https://spec.oneapi.io/versions/latest/elements/oneTBB/source/containers/concurrent_hash_map_cls.html[oneapi::tbb::concurrent_hash_map^]<int, int>`
+* `https://github.com/greg7mdp/gtl/blob/main/docs/phmap.md[gtl::parallel_flat_hash_map^]<int, int>` with 64 submaps
+* `boost::concurrent_flat_map<int, int>`
+
+The source code can be https://github.com/boostorg/boost_unordered_benchmarks/tree/boost_concurrent_flat_map[found here^].
+
+The benchmarks exercise a number of threads _T_ (between 1 and 16) concurrently performing operations
+randomly chosen among **update**, **successful lookup** and **unsuccessful lookup**. The keys used in the
+operations follow a https://en.wikipedia.org/wiki/Zipf%27s_law#Formal_definition[Zipf distribution^]
+with different _skew_ parameters: the higher the skew, the more concentrated are the keys in the lower values
+of the covered range.
+
+=== GCC 12, x64
+
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.500k%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.500k%2C%200.01.png]
+|image::benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.500k%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.500k%2C%200.5.png]
+|image::benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.500k%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.500k%2C%200.99.png]
+
+h|500k updates, 4.5M lookups +
+skew=0.01
+h|500k updates, 4.5M lookups +
+skew=0.5
+h|500k updates, 4.5M lookups +
+skew=0.99
+|===
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.5M%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.5M%2C%200.01.png]
+|image::benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.5M%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.5M%2C%200.5.png]
+|image::benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.5M%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x64/Parallel%20workload.xlsx.5M%2C%200.99.png]
+
+h|5M updates, 45M lookups +
+skew=0.01
+h|5M updates, 45M lookups +
+skew=0.5
+h|5M updates, 45M lookups +
+skew=0.99
+|===
+
+=== Clang 15, x64
+
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.500k%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.500k%2C%200.01.png]
+|image::benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.500k%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.500k%2C%200.5.png]
+|image::benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.500k%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.500k%2C%200.99.png]
+
+h|500k updates, 4.5M lookups +
+skew=0.01
+h|500k updates, 4.5M lookups +
+skew=0.5
+h|500k updates, 4.5M lookups +
+skew=0.99
+|===
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.5M%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.5M%2C%200.01.png]
+|image::benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.5M%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.5M%2C%200.5.png]
+|image::benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.5M%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x64/Parallel%20workload.xlsx.5M%2C%200.99.png]
+
+h|5M updates, 45M lookups +
+skew=0.01
+h|5M updates, 45M lookups +
+skew=0.5
+h|5M updates, 45M lookups +
+skew=0.99
+|===
+
+=== Visual Studio 2022, x64
+
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.500k%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.500k%2C%200.01.png]
+|image::benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.500k%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.500k%2C%200.5.png]
+|image::benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.500k%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.500k%2C%200.99.png]
+
+h|500k updates, 4.5M lookups +
+skew=0.01
+h|500k updates, 4.5M lookups +
+skew=0.5
+h|500k updates, 4.5M lookups +
+skew=0.99
+|===
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.5M%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.5M%2C%200.01.png]
+|image::benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.5M%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.5M%2C%200.5.png]
+|image::benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.5M%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x64/Parallel%20workload.xlsx.5M%2C%200.99.png]
+
+h|5M updates, 45M lookups +
+skew=0.01
+h|5M updates, 45M lookups +
+skew=0.5
+h|5M updates, 45M lookups +
+skew=0.99
+|===
+
+=== Clang 12, ARM64
+
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.500k%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.500k%2C%200.01.png]
+|image::benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.500k%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.500k%2C%200.5.png]
+|image::benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.500k%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.500k%2C%200.99.png]
+
+h|500k updates, 4.5M lookups +
+skew=0.01
+h|500k updates, 4.5M lookups +
+skew=0.5
+h|500k updates, 4.5M lookups +
+skew=0.99
+|===
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.5M%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.5M%2C%200.01.png]
+|image::benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.5M%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.5M%2C%200.5.png]
+|image::benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.5M%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-arm64/Parallel%20workload.xlsx.5M%2C%200.99.png]
+
+h|5M updates, 45M lookups +
+skew=0.01
+h|5M updates, 45M lookups +
+skew=0.5
+h|5M updates, 45M lookups +
+skew=0.99
+|===
+
+=== GCC 12, x86
+
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.500k%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.500k%2C%200.01.png]
+|image::benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.500k%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.500k%2C%200.5.png]
+|image::benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.500k%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.500k%2C%200.99.png]
+
+h|500k updates, 4.5M lookups +
+skew=0.01
+h|500k updates, 4.5M lookups +
+skew=0.5
+h|500k updates, 4.5M lookups +
+skew=0.99
+|===
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.5M%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.5M%2C%200.01.png]
+|image::benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.5M%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.5M%2C%200.5.png]
+|image::benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.5M%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/gcc-x86/Parallel%20workload.xlsx.5M%2C%200.99.png]
+
+h|5M updates, 45M lookups +
+skew=0.01
+h|5M updates, 45M lookups +
+skew=0.5
+h|5M updates, 45M lookups +
+skew=0.99
+|===
+
+=== Clang 15, x86
+
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.500k%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.500k%2C%200.01.png]
+|image::benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.500k%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.500k%2C%200.5.png]
+|image::benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.500k%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.500k%2C%200.99.png]
+
+h|500k updates, 4.5M lookups +
+skew=0.01
+h|500k updates, 4.5M lookups +
+skew=0.5
+h|500k updates, 4.5M lookups +
+skew=0.99
+|===
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.5M%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.5M%2C%200.01.png]
+|image::benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.5M%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.5M%2C%200.5.png]
+|image::benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.5M%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/clang-x86/Parallel%20workload.xlsx.5M%2C%200.99.png]
+
+h|5M updates, 45M lookups +
+skew=0.01
+h|5M updates, 45M lookups +
+skew=0.5
+h|5M updates, 45M lookups +
+skew=0.99
+|===
+
+=== Visual Studio 2022, x86
+
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.500k%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.500k%2C%200.01.png]
+|image::benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.500k%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.500k%2C%200.5.png]
+|image::benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.500k%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.500k%2C%200.99.png]
+
+h|500k updates, 4.5M lookups +
+skew=0.01
+h|500k updates, 4.5M lookups +
+skew=0.5
+h|500k updates, 4.5M lookups +
+skew=0.99
+|===
+
+[caption=]
+[cols="3*^.^a", frame=all, grid=all]
+|===
+
+|image::benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.5M%2C%200.01.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.5M%2C%200.01.png]
+|image::benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.5M%2C%200.5.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.5M%2C%200.5.png]
+|image::benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.5M%2C%200.99.png[width=250,window=_blank,link=../diagrams/benchmarks-concurrent_map/vs-x86/Parallel%20workload.xlsx.5M%2C%200.99.png]
+
+h|5M updates, 45M lookups +
+skew=0.01
+h|5M updates, 45M lookups +
+skew=0.5
+h|5M updates, 45M lookups +
+skew=0.99
+|===
diff --git a/doc/unordered/buckets.adoc b/doc/unordered/buckets.adoc
index cf563f8c..e8b5a62f 100644
--- a/doc/unordered/buckets.adoc
+++ b/doc/unordered/buckets.adoc
@@ -2,9 +2,9 @@
 :idprefix: buckets_
 :imagesdir: ../diagrams
 
-= The Data Structure
+= Basics of Hash Tables
 
-The containers are made up of a number of 'buckets', each of which can contain
+The containers are made up of a number of _buckets_, each of which can contain
 any number of elements. For example, the following diagram shows a <<unordered_set,`boost::unordered_set`>> with 7 buckets containing 5 elements, `A`,
 `B`, `C`, `D` and `E` (this is just for illustration, containers will typically
 have more buckets).
@@ -12,8 +12,7 @@ have more buckets).
 image::buckets.png[]
 
 In order to decide which bucket to place an element in, the container applies
-the hash function, `Hash`, to the element's key (for `unordered_set` and
-`unordered_multiset` the key is the whole element, but is referred to as the key
+the hash function, `Hash`, to the element's key (for sets the key is the whole element, but is referred to as the key
 so that the same terminology can be used for sets and maps). This returns a
 value of type `std::size_t`. `std::size_t` has a much greater range of values
 then the number of buckets, so the container applies another transformation to
@@ -53,8 +52,7 @@ h|*Method* h|*Description*
 |`size_type bucket_count() const` 
 |The number of buckets.
 
-2+^h| *Closed-addressing containers only* +
-`boost::unordered_[multi]set`, `boost::unordered_[multi]map` 
+2+^h| *Closed-addressing containers only*
 h|*Method* h|*Description*
 
 |`size_type max_bucket_count() const` 
@@ -80,7 +78,7 @@ h|*Method* h|*Description*
 
 |===
 
-== Controlling the number of buckets
+== Controlling the Number of Buckets
 
 As more elements are added to an unordered associative container, the number
 of collisions will increase causing performance to degrade.
@@ -90,8 +88,8 @@ calling `rehash`.
 
 The standard leaves a lot of freedom to the implementer to decide how the
 number of buckets is chosen, but it does make some requirements based on the
-container's 'load factor', the number of elements divided by the number of buckets.
-Containers also have a 'maximum load factor' which they should try to keep the
+container's _load factor_, the number of elements divided by the number of buckets.
+Containers also have a _maximum load factor_ which they should try to keep the
 load factor below.
 
 You can't control the bucket count directly but there are two ways to
@@ -133,9 +131,7 @@ h|*Method* h|*Description*
 |`void rehash(size_type n)`
 |Changes the number of buckets so that there at least `n` buckets, and so that the load factor is less than the maximum load factor.
 
-2+^h| *Open-addressing containers only* +
-`boost::unordered_flat_set`, `boost::unordered_flat_map` +
-`boost::unordered_node_set`, `boost::unordered_node_map` +
+2+^h| *Open-addressing and concurrent containers only*
 h|*Method* h|*Description*
 
 |`size_type max_load() const`
@@ -143,7 +139,7 @@ h|*Method* h|*Description*
 
 |===
 
-A note on `max_load` for open-addressing containers: the maximum load will be 
+A note on `max_load` for open-addressing and concurrent containers: the maximum load will be 
 (`max_load_factor() * bucket_count()`) right after `rehash` or on container creation, but may
 slightly decrease when erasing elements in high-load situations. For instance, if we
 have a <<unordered_flat_map,`boost::unordered_flat_map`>> with `size()` almost
@@ -151,165 +147,4 @@ at `max_load()` level and then erase 1,000 elements, `max_load()` may decrease b
 few dozen elements. This is done internally by Boost.Unordered in order
 to keep its performance stable, and must be taken into account when planning for rehash-free insertions.
 
-== Iterator Invalidation
 
-It is not specified how member functions other than `rehash` and `reserve` affect
-the bucket count, although `insert` can only invalidate iterators
-when the insertion causes the container's load to be greater than the maximum allowed.
-For most implementations this means that `insert` will only
-change the number of buckets when this happens. Iterators can be
-invalidated by calls to `insert`, `rehash` and `reserve`.
-
-As for pointers and references,
-they are never invalidated for node-based containers 
-(`boost::unordered_[multi]set`, `boost::unordered_[multi]map`, `boost::unordered_node_set`, `boost::unordered_node_map`),
-but they will when rehashing occurs for
-`boost::unordered_flat_set` and `boost::unordered_flat_map`: this is because
-these containers store elements directly into their holding buckets, so
-when allocating a new bucket array the elements must be transferred by means of move construction.
-
-In a similar manner to using `reserve` for ``vector``s, it can be a good idea
-to call `reserve` before inserting a large number of elements. This will get
-the expensive rehashing out of the way and let you store iterators, safe in
-the knowledge that they won't be invalidated. If you are inserting `n`
-elements into container `x`, you could first call:
-
-```
-x.reserve(n);
-```
-
-Note:: `reserve(n)` reserves space for at least `n` elements, allocating enough buckets
-so as to not exceed the maximum load factor.
-+
-Because the maximum load factor is defined as the number of elements divided by the total
-number of available buckets, this function is logically equivalent to:
-+
-```
-x.rehash(std::ceil(n / x.max_load_factor()))
-```
-+
-See the <<unordered_map_rehash,reference for more details>> on the `rehash` function.
-
-== Fast Closed Addressing Implementation
-
-++++
-<style>
-  .imageblock > .title {
-    text-align: inherit;
-  }
-</style>
-++++
-
-Boost.Unordered sports one of the fastest implementations of closed addressing, also commonly known as https://en.wikipedia.org/wiki/Hash_table#Separate_chaining[separate chaining]. An example figure representing the data structure is below:
-
-[#img-bucket-groups,.text-center]
-.A simple bucket group approach
-image::bucket-groups.png[align=center]
-
-An array of "buckets" is allocated and each bucket in turn points to its own individual linked list. This makes meeting the standard requirements of bucket iteration straight-forward. Unfortunately, iteration of the entire container is often times slow using this layout as each bucket must be examined for occupancy, yielding a time complexity of `O(bucket_count() + size())` when the standard requires complexity to be `O(size())`.
-
-Canonical standard implementations will wind up looking like the diagram below:
-
-[.text-center]
-.The canonical standard approach
-image::singly-linked.png[align=center,link=../diagrams/singly-linked.png,window=_blank]
-
-It's worth noting that this approach is only used by pass:[libc++] and pass:[libstdc++]; the MSVC Dinkumware implementation uses a different one. A more detailed analysis of the standard containers can be found http://bannalia.blogspot.com/2013/10/implementation-of-c-unordered.html[here].
-
-This unusually laid out data structure is chosen to make iteration of the entire container efficient by inter-connecting all of the nodes into a singly-linked list. One might also notice that buckets point to the node _before_ the start of the bucket's elements. This is done so that removing elements from the list can be done efficiently without introducing the need for a doubly-linked list. Unfortunately, this data structure introduces a guaranteed extra indirection. For example, to access the first element of a bucket, something like this must be done:
-
-```c++
-auto const idx = get_bucket_idx(hash_function(key));
-node* p = buckets[idx]; // first load
-node* n = p->next; // second load
-if (n && is_in_bucket(n, idx)) {
-  value_type const& v = *n; // third load
-  // ...
-}
-```
-
-With a simple bucket group layout, this is all that must be done:
-```c++
-auto const idx = get_bucket_idx(hash_function(key));
-node* n = buckets[idx]; // first load
-if (n) {
-  value_type const& v = *n; // second load
-  // ...
-}
-```
-
-In practice, the extra indirection can have a dramatic performance impact to common operations such as `insert`, `find` and `erase`. But to keep iteration of the container fast, Boost.Unordered introduces a novel data structure, a "bucket group". A bucket group is a fixed-width view of a subsection of the buckets array. It contains a bitmask (a `std::size_t`) which it uses to track occupancy of buckets and contains two pointers so that it can form a doubly-linked list with non-empty groups. An example diagram is below:
-
-[#img-fca-layout]
-.The new layout used by Boost
-image::fca.png[align=center]
-
-Thus container-wide iteration is turned into traversing the non-empty bucket groups (an operation with constant time complexity) which reduces the time complexity back to `O(size())`. In total, a bucket group is only 4 words in size and it views `sizeof(std::size_t) * CHAR_BIT` buckets meaning that for all common implementations, there's only 4 bits of space overhead per bucket introduced by the bucket groups.
-
-A more detailed description of Boost.Unordered's closed-addressing implementation is
-given in an
-https://bannalia.blogspot.com/2022/06/advancing-state-of-art-for.html[external article].
-For more information on implementation rationale, read the
-xref:#rationale_boostunordered_multiset_and_boostunordered_multimap[corresponding section].
-
-== Open Addressing Implementation
-
-The diagram shows the basic internal layout of `boost::unordered_flat_map`/`unordered_node_map` and
-`boost:unordered_flat_set`/`unordered_node_set`.
-
-
-[#img-foa-layout]
-.Open-addressing layout used by Boost.Unordered.
-image::foa.png[align=center]
-
-As with all open-addressing containers, elements (or pointers to the element nodes in the case of
-`boost::unordered_node_map` and `boost::unordered_node_set`) are stored directly in the bucket array.
-This array is logically divided into 2^_n_^ _groups_ of 15 elements each.
-In addition to the bucket array, there is an associated _metadata array_ with 2^_n_^
-16-byte words.
-
-[#img-foa-metadata]
-.Breakdown of a metadata word.
-image::foa-metadata.png[align=center]
-
-A metadata word is divided into 15 _h_~_i_~ bytes (one for each associated
-bucket), and an _overflow byte_ (_ofw_ in the diagram). The value of _h_~_i_~ is:
-
-  - 0 if the corresponding bucket is empty.
-  - 1 to encode a special empty bucket called a _sentinel_, which is used internally to
-  stop iteration when the container has been fully traversed.
-  - If the bucket is occupied, a _reduced hash value_ obtained from the hash value of
-  the element.
-
-When looking for an element with hash value _h_, SIMD technologies such as
-https://en.wikipedia.org/wiki/SSE2[SSE2] and
-https://en.wikipedia.org/wiki/ARM_architecture_family#Advanced_SIMD_(Neon)[Neon] allow us
-to very quickly inspect the full metadata word and look for the reduced value of _h_ among all the
-15 buckets with just a handful of CPU instructions: non-matching buckets can be
-readily discarded, and those whose reduced hash value matches need be inspected via full
-comparison with the corresponding element. If the looked-for element is not present,
-the overflow byte is inspected:
-
-- If the bit in the position _h_ mod 8 is zero, lookup terminates (and the
-element is not present).
-- If the bit is set to 1 (the group has been _overflowed_), further groups are
-checked using https://en.wikipedia.org/wiki/Quadratic_probing[_quadratic probing_], and
-the process is repeated.
-
-Insertion is algorithmically similar: empty buckets are located using SIMD,
-and when going past a full group its corresponding overflow bit is set to 1.
-
-In architectures without SIMD support, the logical layout stays the same, but the metadata
-word is codified using a technique we call _bit interleaving_: this layout allows us
-to emulate SIMD with reasonably good performance using only standard arithmetic and
-logical operations.
-
-[#img-foa-metadata-interleaving]
-.Bit-interleaved metadata word.
-image::foa-metadata-interleaving.png[align=center]
-
-A more detailed description of Boost.Unordered's open-addressing implementation is
-given in an
-https://bannalia.blogspot.com/2022/11/inside-boostunorderedflatmap.html[external article].
-For more information on implementation rationale, read the
-xref:#rationale_boostunordered_flat_set_and_boostunordered_flat_map[corresponding section].
diff --git a/doc/unordered/changes.adoc b/doc/unordered/changes.adoc
index dfd12081..04f0bc44 100644
--- a/doc/unordered/changes.adoc
+++ b/doc/unordered/changes.adoc
@@ -6,8 +6,9 @@
 :github-pr-url: https://github.com/boostorg/unordered/pull
 :cpp: C++
 
-== Release 1.83.0
+== Release 1.83.0 - Major update
 
+* Added `boost::concurrent_flat_map`, a fast, thread-safe hashmap based on open addressing.
 * Sped up iteration of open-addressing containers.
 
 == Release 1.82.0 - Major update
diff --git a/doc/unordered/compliance.adoc b/doc/unordered/compliance.adoc
index bc53e36e..d5d84e9c 100644
--- a/doc/unordered/compliance.adoc
+++ b/doc/unordered/compliance.adoc
@@ -5,7 +5,7 @@
 
 :cpp: C++
 
-== Closed-addressing containers
+== Closed-addressing Containers
 
 `unordered_[multi]set` and `unordered_[multi]map` are intended to provide a conformant
 implementation of the {cpp}20 standard that will work with {cpp}98 upwards.
@@ -13,7 +13,7 @@ This wide compatibility does mean some compromises have to be made.
 With a compiler and library that fully support {cpp}11, the differences should
 be minor.
 
-=== Move emulation
+=== Move Emulation
 
 Support for move semantics is implemented using Boost.Move. If rvalue
 references are available it will use them, but if not it uses a close,
@@ -25,7 +25,7 @@ but imperfect emulation. On such compilers:
 * The containers themselves are not movable.
 * Argument forwarding is not perfect.
 
-=== Use of allocators
+=== Use of Allocators
 
 {cpp}11 introduced a new allocator system. It's backwards compatible due to
 the lax requirements for allocators in the old standard, but might need
@@ -58,7 +58,7 @@ Due to imperfect move emulation, some assignments might check
 `propagate_on_container_copy_assignment` on some compilers and
 `propagate_on_container_move_assignment` on others.
 
-=== Construction/Destruction using allocators
+=== Construction/Destruction Using Allocators
 
 The following support is required for full use of {cpp}11 style
 construction/destruction:
@@ -117,7 +117,7 @@ Variadic constructor arguments for `emplace` are only used when both
 rvalue references and variadic template parameters are available.
 Otherwise `emplace` can only take up to 10 constructors arguments.
 
-== Open-addressing containers
+== Open-addressing Containers
 
 The C++ standard does not currently provide any open-addressing container
 specification to adhere to, so `boost::unordered_flat_set`/`unordered_node_set` and
@@ -144,4 +144,61 @@ The main differences with C++ unordered associative containers are:
   ** Pointer stability is not kept under rehashing.
   ** There is no API for node extraction/insertion.
 
+== Concurrent Containers
+
+There is currently no specification in the C++ standard for this or any other concurrent
+data structure. `boost::concurrent_flat_map` takes the same template parameters as `std::unordered_map`
+and all the maps provided by Boost.Unordered, and its API is modelled after that of
+`boost::unordered_flat_map` with the crucial difference that iterators are not provided
+due to their inherent problems in concurrent scenarios (high contention, prone to deadlocking):
+so, `boost::concurrent_flat_map` is technically not a
+https://en.cppreference.com/w/cpp/named_req/Container[Container^], although
+it meets all the requirements of https://en.cppreference.com/w/cpp/named_req/AllocatorAwareContainer[AllocatorAware^]
+containers except those implying iterators.
+
+In a non-concurrent unordered container, iterators serve two main purposes:
+
+* Access to an element previously located via lookup. 
+* Container traversal.
+
+In place of iterators, `boost::concurrent_flat_map` uses _internal visitation_
+facilities as a thread-safe substitute. Classical operations returning an iterator to an
+element already existing in the container, like for instance:
+
+[source,c++]
+----
+iterator find(const key_type& k);
+std::pair<iterator, bool> insert(const value_type& obj);
+----
+
+are transformed to accept a _visitation function_ that is passed such element:
+
+[source,c++]
+----
+template<class F> size_t visit(const key_type& k, F f);
+template<class F> bool insert_or_visit(const value_type& obj, F f);
+----
+
+(In the second case `f` is only invoked if there's an equivalent element
+to `obj` in the table, not if insertion is successful). Container traversal
+is served by:
+
+[source,c++]
+----
+template<class F> size_t visit_all(F f);
+----
+
+of which there are parallelized versions in C++17 compilers with parallel
+algorithm support. In general, the interface of `boost::concurrent_flat_map`
+is derived from that of `boost::unordered_flat_map` by a fairly straightforward
+process of replacing iterators with visitation where applicable. If
+`iterator` and `const_iterator` provide mutable and const access to elements,
+respectively, here visitation is granted mutable or const access depending on
+the constness of the member function used (there are also `*cvisit` overloads for
+explicit const visitation).
+
+The one notable operation not provided is `operator[]`/`at`, which can be
+replaced, if in a more convoluted manner, by
+xref:#concurrent_flat_map_try_emplace_or_cvisit[`try_emplace_or_visit`].
+
 //-
diff --git a/doc/unordered/concurrent.adoc b/doc/unordered/concurrent.adoc
new file mode 100644
index 00000000..3410570a
--- /dev/null
+++ b/doc/unordered/concurrent.adoc
@@ -0,0 +1,182 @@
+﻿[#concurrent]
+= Concurrent Containers
+
+:idprefix: concurrent_
+
+Boost.Unordered currently provides just one concurrent container named `boost::concurrent_flat_map`.
+`boost::concurrent_flat_map` is a hash table that allows concurrent write/read access from
+different threads without having to implement any synchronzation mechanism on the user's side.
+
+[source,c++]
+----
+std::vector<int>                    input;
+boost::concurrent_flat_map<int,int> m;
+
+...
+
+// process input in parallel
+const int                 num_threads = 8;
+std::vector<std::jthread> threads;
+std::size_t               chunk = input.size() / num_threads; // how many elements per thread
+
+for (int i = 0; i < num_threads; ++i) {
+  threads.emplace_back([&,i] {
+    // calculate the portion of input this thread takes care of
+    std::size_t start = i * chunk;
+    std::size_t end = (i == num_threads - 1)? input.size(): (i + 1) * chunk;
+
+    for (std::size_t n = start; n < end; ++n) {
+      m.emplace(input[n], calculation(input[n]));
+    }
+  });
+}
+----
+
+In the example above, threads access `m` without synchronization, just as we'd do in a
+single-threaded scenario. In an ideal setting, if a given workload is distributed among
+_N_ threads, execution is _N_ times faster than with one thread —this limit is
+never attained in practice due to synchronization overheads and _contention_ (one thread
+waiting for another to leave a locked portion of the map), but `boost::concurrent_flat_map`
+is designed to perform with very little overhead and typically achieves _linear scaling_
+(that is, performance is proportional to the number of threads up to the number of
+logical cores in the CPU).
+
+== Visitation-based API
+
+The first thing a new user of `boost::concurrent_flat_map` will notice is that this
+class _does not provide iterators_ (which makes it technically
+not a https://en.cppreference.com/w/cpp/named_req/Container[Container^]
+in the C++ standard sense). The reason for this is that iterators are inherently
+thread-unsafe. Consider this hypothetical code:
+
+[source,c++]
+----
+auto it = m.find(k);  // A: get an iterator pointing to the element with key k
+if (it != m.end() ) {
+  some_function(*it); // B: use the value of the element
+}
+----
+
+In a multithreaded scenario, the iterator `it` may be invalid at point B if some other
+thread issues an `m.erase(k)` operation between A and B. There are designs that
+can remedy this by making iterators lock the element they point to, but this
+approach lends itself to high contention and can easily produce deadlocks in a program.
+`operator[]` has similar concurrency issues, and is not provided by
+`boost::concurrent_flat_map` either. Instead, element access is done through
+so-called _visitation functions_:
+
+[source,c++]
+----
+m.visit(k, [](const auto& x) { // x is the element with key k (if it exists)
+  some_function(x);            // use it
+});
+----
+
+The visitation function passed by the user (in this case, a lambda function)
+is executed internally by `boost::concurrent_flat_map` in
+a thread-safe manner, so it can access the element without worrying about other
+threads interfering in the process.
+
+On the other hand, a visitation function can _not_ access the container itself:
+
+[source,c++]
+----
+m.visit(k, [&](const auto& x) { 
+  some_function(x, m.size()); // forbidden: m can't be accessed inside visitation
+});
+----
+
+Access to a different container is allowed, though:
+
+[source,c++]
+----
+m.visit(k, [&](const auto& x) {
+  if (some_function(x)) {
+    m2.insert(x); // OK, m2 is a different boost::concurrent_flat_map
+  }
+});
+----
+
+But, in general, visitation functions should be as lightweight as possible to
+reduce contention and increase parallelization. In some cases, moving heavy work
+outside of visitation may be beneficial:
+
+[source,c++]
+----
+std::optional<value_type> o;
+bool found = m.visit(k, [&](const auto& x) { 
+  o = x;
+});
+if (found) {
+  some_heavy_duty_function(*o);
+}
+----
+
+Visitation is prominent in the API provided by `boost::concurrent_flat_map`, and
+many classical operations have visitation-enabled variations:
+
+[source,c++]
+----
+m.insert_or_visit(x, [](auto& y) { 
+  // if insertion failed because of an equivalent element y,
+  // do something with it, for instance:
+  ++y.second; // increment the mapped part of the element
+});
+----
+
+Note that in this last example the visitation function could actually _modify_
+the element: as a general rule, operations on a `boost::concurrent_flat_map` `m`
+will grant visitation functions const/non-const access to  the element depending on whether
+`m` is const/non-const. Const access can be always be explicitly requested
+by using `cvisit` overloads (for instance, `insert_or_cvisit`) and may result
+in higher parallelization. Consult the xref:#concurrent_flat_map[reference]
+for a complete list of available operations.
+
+== Whole-Table Visitation
+
+In the absence of iterators, `boost::concurrent_flat_map` provides `visit_all`
+as an alternative way to process all the elements in the map:
+
+[source,c++]
+----
+m.visit_all([](auto& x) {
+  x.second = 0; // reset the mapped part of the element
+});
+----
+
+In C++17 compilers implementing standard parallel algorithms, whole-table
+visitation can be parallelized:
+
+[source,c++]
+----
+m.visit_all(std::execution::par, [](auto& x) { // run in parallel
+  x.second = 0; // reset the mapped part of the element
+});
+----
+
+There is another whole-table visitation operation, `erase_if`:
+
+[source,c++]
+----
+m.erase_if([](auto& x) {
+  return x.second == 0; // erase the elements whose mapped value is zero
+});
+----
+
+`erase_if` can also be parallelized. Note that, in order to increase efficiency,
+these operations do not block the table during execution: this implies that elements
+may be inserted, modified or erased by other threads during visitation. It is
+advisable not to assume too much about the exact global state of a `boost::concurrent_flat_map`
+at any point in your program.
+
+== Blocking Operations
+
+``boost::concurrent_flat_map``s can be copied, assigned, cleared and merged just like any
+Boost.Unordered container. Unlike most other operations, these are _blocking_,
+that is, all other threads are prevented from accesing the tables involved while a copy, assignment,
+clear or merge operation is in progress. Blocking is taken care of automatically by the library
+and the user need not take any special precaution, but overall performance may be affected.
+
+Another blocking operation is _rehashing_, which happens explicitly via `rehash`/`reserve`
+or during insertion when the table's load hits `max_load()`. As with non-concurrent containers,
+reserving space in advance of bulk insertions will generally speed up the process.
diff --git a/doc/unordered/concurrent_flat_map.adoc b/doc/unordered/concurrent_flat_map.adoc
new file mode 100644
index 00000000..fcdc8662
--- /dev/null
+++ b/doc/unordered/concurrent_flat_map.adoc
@@ -0,0 +1,1438 @@
+﻿[#concurrent_flat_map]
+== Class Template concurrent_flat_map
+
+:idprefix: concurrent_flat_map_
+
+`boost::concurrent_flat_map` — A hash table that associates unique keys with another value and
+allows for concurrent element insertion, erasure, lookup and access
+without external synchronization mechanisms.
+
+Even though it acts as a container, `boost::concurrent_flat_map`
+does not model the standard C++ https://en.cppreference.com/w/cpp/named_req/Container[Container^] concept.
+In particular, iterators and associated operations (`begin`, `end`, etc.) are not provided.
+Element access and modification are done through user-provided _visitation functions_ that are passed
+to `concurrent_flat_map` operations where they are executed internally in a controlled fashion.
+Such visitation-based API allows for low-contention concurrent usage scenarios.
+
+The internal data structure of `boost::concurrent_flat_map` is similar to that of
+`boost::unordered_flat_map`. As a result of its using open-addressing techniques,
+`value_type` must be move-constructible and pointer stability is not kept under rehashing.
+
+=== Synopsis
+
+[listing,subs="+macros,+quotes"]
+-----
+// #include <boost/unordered/concurrent_flat_map.hpp>
+
+namespace boost {
+  template<class Key,
+           class T,
+           class Hash = boost::hash<Key>,
+           class Pred = std::equal_to<Key>,
+           class Allocator = std::allocator<std::pair<const Key, T>>>
+  class concurrent_flat_map {
+  public:
+    // types
+    using key_type             = Key;
+    using mapped_type          = T;
+    using value_type           = std::pair<const Key, T>;
+    using init_type            = std::pair<
+                                   typename std::remove_const<Key>::type,
+                                   typename std::remove_const<T>::type
+                                 >;
+    using hasher               = Hash;
+    using key_equal            = Pred;
+    using allocator_type       = Allocator;
+    using pointer              = typename std::allocator_traits<Allocator>::pointer;
+    using const_pointer        = typename std::allocator_traits<Allocator>::const_pointer;
+    using reference            = value_type&;
+    using const_reference      = const value_type&;
+    using size_type            = std::size_t;
+    using difference_type      = std::ptrdiff_t;
+
+    // construct/copy/destroy
+    xref:#concurrent_flat_map_default_constructor[concurrent_flat_map]();
+    explicit xref:#concurrent_flat_map_bucket_count_constructor[concurrent_flat_map](size_type n,
+                                 const hasher& hf = hasher(),
+                                 const key_equal& eql = key_equal(),
+                                 const allocator_type& a = allocator_type());
+    template<class InputIterator>
+      xref:#concurrent_flat_map_iterator_range_constructor[concurrent_flat_map](InputIterator f, InputIterator l,
+                          size_type n = _implementation-defined_,
+                          const hasher& hf = hasher(),
+                          const key_equal& eql = key_equal(),
+                          const allocator_type& a = allocator_type());
+    xref:#concurrent_flat_map_copy_constructor[concurrent_flat_map](const concurrent_flat_map& other);
+    xref:#concurrent_flat_map_move_constructor[concurrent_flat_map](concurrent_flat_map&& other);
+    template<class InputIterator>
+      xref:#concurrent_flat_map_iterator_range_constructor_with_allocator[concurrent_flat_map](InputIterator f, InputIterator l,const allocator_type& a);
+    explicit xref:#concurrent_flat_map_allocator_constructor[concurrent_flat_map](const Allocator& a);
+    xref:#concurrent_flat_map_copy_constructor_with_allocator[concurrent_flat_map](const concurrent_flat_map& other, const Allocator& a);
+    xref:#concurrent_flat_map_move_constructor_with_allocator[concurrent_flat_map](concurrent_flat_map&& other, const Allocator& a);
+    xref:#concurrent_flat_map_initializer_list_constructor[concurrent_flat_map](std::initializer_list<value_type> il,
+                        size_type n = _implementation-defined_
+                        const hasher& hf = hasher(),
+                        const key_equal& eql = key_equal(),
+                        const allocator_type& a = allocator_type());
+    xref:#concurrent_flat_map_bucket_count_constructor_with_allocator[concurrent_flat_map](size_type n, const allocator_type& a);
+    xref:#concurrent_flat_map_bucket_count_constructor_with_hasher_and_allocator[concurrent_flat_map](size_type n, const hasher& hf, const allocator_type& a);
+    template<class InputIterator>
+      xref:#concurrent_flat_map_iterator_range_constructor_with_bucket_count_and_allocator[concurrent_flat_map](InputIterator f, InputIterator l, size_type n,
+                          const allocator_type& a);
+    template<class InputIterator>
+      xref:#concurrent_flat_map_iterator_range_constructor_with_bucket_count_and_hasher[concurrent_flat_map](InputIterator f, InputIterator l, size_type n, const hasher& hf,
+                          const allocator_type& a);
+    xref:#concurrent_flat_map_initializer_list_constructor_with_allocator[concurrent_flat_map](std::initializer_list<value_type> il, const allocator_type& a);
+    xref:#concurrent_flat_map_initializer_list_constructor_with_bucket_count_and_allocator[concurrent_flat_map](std::initializer_list<value_type> il, size_type n,
+                        const allocator_type& a);
+    xref:#concurrent_flat_map_initializer_list_constructor_with_bucket_count_and_hasher_and_allocator[concurrent_flat_map](std::initializer_list<value_type> il, size_type n, const hasher& hf,
+                        const allocator_type& a);
+    xref:#concurrent_flat_map_destructor[~concurrent_flat_map]();
+    concurrent_flat_map& xref:#concurrent_flat_map_copy_assignment[operator++=++](const concurrent_flat_map& other);
+    concurrent_flat_map& xref:#concurrent_flat_map_move_assignment[operator++=++](concurrent_flat_map&& other)
+      noexcept(boost::allocator_traits<Allocator>::is_always_equal::value ||
+              boost::allocator_traits<Allocator>::propagate_on_container_move_assignment::value);
+    concurrent_flat_map& xref:#concurrent_flat_map_initializer_list_assignment[operator++=++](std::initializer_list<value_type>);
+    allocator_type xref:#concurrent_flat_map_get_allocator[get_allocator]() const noexcept;
+
+
+    // visitation
+    template<class F> size_t xref:#concurrent_flat_map_cvisit[visit](const key_type& k, F f);
+    template<class F> size_t xref:#concurrent_flat_map_cvisit[visit](const key_type& k, F f) const;
+    template<class F> size_t xref:#concurrent_flat_map_cvisit[cvisit](const key_type& k, F f) const;
+    template<class K, class F> size_t xref:#concurrent_flat_map_cvisit[visit](const K& k, F f);
+    template<class K, class F> size_t xref:#concurrent_flat_map_cvisit[visit](const K& k, F f) const;
+    template<class K, class F> size_t xref:#concurrent_flat_map_cvisit[cvisit](const K& k, F f) const;
+
+    template<class F> size_t xref:#concurrent_flat_map_cvisit_all[visit_all](F f);
+    template<class F> size_t xref:#concurrent_flat_map_cvisit_all[visit_all](F f) const;
+    template<class F> size_t xref:#concurrent_flat_map_cvisit_all[cvisit_all](F f) const;
+    template<class ExecutionPolicy, class F>
+      void xref:#concurrent_flat_map_parallel_cvisit_all[visit_all](ExecutionPolicy&& policy, F f);
+    template<class ExecutionPolicy, class F>
+      void xref:#concurrent_flat_map_parallel_cvisit_all[visit_all](ExecutionPolicy&& policy, F f) const;
+    template<class ExecutionPolicy, class F>
+      void xref:#concurrent_flat_map_parallel_cvisit_all[cvisit_all](ExecutionPolicy&& policy, F f) const;
+
+    // capacity
+    ++[[nodiscard]]++ bool xref:#concurrent_flat_map_empty[empty]() const noexcept;
+    size_type xref:#concurrent_flat_map_size[size]() const noexcept;
+    size_type xref:#concurrent_flat_map_max_size[max_size]() const noexcept;
+
+    // modifiers
+    template<class... Args> bool xref:#concurrent_flat_map_emplace[emplace](Args&&... args);
+    bool xref:#concurrent_flat_map_copy_insert[insert](const value_type& obj);
+    bool xref:#concurrent_flat_map_copy_insert[insert](const init_type& obj);
+    bool xref:#concurrent_flat_map_move_insert[insert](value_type&& obj);
+    bool xref:#concurrent_flat_map_move_insert[insert](init_type&& obj);
+    template<class InputIterator> size_type xref:#concurrent_flat_map_insert_iterator_range[insert](InputIterator first, InputIterator last);
+    size_type xref:#concurrent_flat_map_insert_initializer_list[insert](std::initializer_list<value_type> il);
+
+    template<class... Args, class F> bool xref:#concurrent_flat_map_emplace_or_cvisit[emplace_or_visit](Args&&... args, F&& f);
+    template<class... Args, class F> bool xref:#concurrent_flat_map_emplace_or_cvisit[emplace_or_cvisit](Args&&... args, F&& f);
+    template<class F> bool xref:#concurrent_flat_map_copy_insert_or_cvisit[insert_or_visit](const value_type& obj, F f);
+    template<class F> bool xref:#concurrent_flat_map_copy_insert_or_cvisit[insert_or_cvisit](const value_type& obj, F f);
+    template<class F> bool xref:#concurrent_flat_map_copy_insert_or_cvisit[insert_or_visit](const init_type& obj, F f);
+    template<class F> bool xref:#concurrent_flat_map_copy_insert_or_cvisit[insert_or_cvisit](const init_type& obj, F f);
+    template<class F> bool xref:#concurrent_flat_map_move_insert_or_cvisit[insert_or_visit](value_type&& obj, F f);
+    template<class F> bool xref:#concurrent_flat_map_move_insert_or_cvisit[insert_or_cvisit](value_type&& obj, F f);
+    template<class F> bool xref:#concurrent_flat_map_move_insert_or_cvisit[insert_or_visit](init_type&& obj, F f);
+    template<class F> bool xref:#concurrent_flat_map_move_insert_or_cvisit[insert_or_cvisit](init_type&& obj, F f);
+    template<class InputIterator,class F>
+      size_type xref:#concurrent_flat_map_insert_iterator_range_or_visit[insert_or_visit](InputIterator first, InputIterator last, F f);
+    template<class InputIterator,class F>
+      size_type xref:#concurrent_flat_map_insert_iterator_range_or_visit[insert_or_cvisit](InputIterator first, InputIterator last, F f);
+    template<class F> size_type xref:#concurrent_flat_map_insert_initializer_list_or_visit[insert_or_visit](std::initializer_list<value_type> il, F f);
+    template<class F> size_type xref:#concurrent_flat_map_insert_initializer_list_or_visit[insert_or_cvisit](std::initializer_list<value_type> il, F f);
+
+    template<class... Args> bool xref:#concurrent_flat_map_try_emplace[try_emplace](const key_type& k, Args&&... args);
+    template<class... Args> bool xref:#concurrent_flat_map_try_emplace[try_emplace](key_type&& k, Args&&... args);
+    template<class K, class... Args> bool xref:#concurrent_flat_map_try_emplace[try_emplace](K&& k, Args&&... args);
+
+    template<class... Args, class F>
+      bool xref:#concurrent_flat_map_try_emplace_or_cvisit[try_emplace_or_visit](const key_type& k, Args&&... args, F&& f);
+    template<class... Args, class F>
+      bool xref:#concurrent_flat_map_try_emplace_or_cvisit[try_emplace_or_cvisit](const key_type& k, Args&&... args, F&& f);
+    template<class... Args, class F>
+      bool xref:#concurrent_flat_map_try_emplace_or_cvisit[try_emplace_or_visit](key_type&& k, Args&&... args, F&& f);
+    template<class... Args, class F>
+      bool xref:#concurrent_flat_map_try_emplace_or_cvisit[try_emplace_or_cvisit](key_type&& k, Args&&... args, F&& f);
+    template<class K, class... Args, class F>
+      bool xref:#concurrent_flat_map_try_emplace_or_cvisit[try_emplace_or_visit](K&& k, Args&&... args, F&& f);
+    template<class K, class... Args, class F>
+      bool xref:#concurrent_flat_map_try_emplace_or_cvisit[try_emplace_or_cvisit](K&& k, Args&&... args, F&& f);
+
+    template<class M> bool xref:#concurrent_flat_map_insert_or_assign[insert_or_assign](const key_type& k, M&& obj);
+    template<class M> bool xref:#concurrent_flat_map_insert_or_assign[insert_or_assign](key_type&& k, M&& obj);
+    template<class K, class M> bool xref:#concurrent_flat_map_insert_or_assign[insert_or_assign](K&& k, M&& obj);
+
+    size_type xref:#concurrent_flat_map_erase[erase](const key_type& k);
+    template<class K> size_type xref:#concurrent_flat_map_erase[erase](const K& k);
+
+    template<class F> size_type xref:#concurrent_flat_map_erase_if_by_key[erase_if](const key_type& k, F f);
+    template<class K, class F> size_type xref:#concurrent_flat_map_erase_if_by_key[erase_if](const K& k, F f);
+    template<class F> size_type xref:#concurrent_flat_map_erase_if[erase_if](F f);
+    template<class ExecutionPolicy, class  F> void xref:#concurrent_flat_map_parallel_erase_if[erase_if](ExecutionPolicy&& policy, F f);
+
+    void      xref:#concurrent_flat_map_swap[swap](concurrent_flat_map& other)
+      noexcept(boost::allocator_traits<Allocator>::is_always_equal::value ||
+               boost::allocator_traits<Allocator>::propagate_on_container_swap::value);
+    void      xref:#concurrent_flat_map_clear[clear]() noexcept;
+
+    template<class H2, class P2>
+      size_type xref:#concurrent_flat_map_merge[merge](concurrent_flat_map<Key, T, H2, P2, Allocator>& source);
+    template<class H2, class P2>
+      size_type xref:#concurrent_flat_map_merge[merge](concurrent_flat_map<Key, T, H2, P2, Allocator>&& source);
+
+    // observers
+    hasher xref:#concurrent_flat_map_hash_function[hash_function]() const;
+    key_equal xref:#concurrent_flat_map_key_eq[key_eq]() const;
+
+    // map operations
+    size_type        xref:#concurrent_flat_map_count[count](const key_type& k) const;
+    template<class K>
+      size_type      xref:#concurrent_flat_map_count[count](const K& k) const;
+    bool             xref:#concurrent_flat_map_contains[contains](const key_type& k) const;
+    template<class K>
+      bool           xref:#concurrent_flat_map_contains[contains](const K& k) const;
+
+    // bucket interface
+    size_type xref:#concurrent_flat_map_bucket_count[bucket_count]() const noexcept;
+
+    // hash policy
+    float xref:#concurrent_flat_map_load_factor[load_factor]() const noexcept;
+    float xref:#concurrent_flat_map_max_load_factor[max_load_factor]() const noexcept;
+    void xref:#concurrent_flat_map_set_max_load_factor[max_load_factor](float z);
+    size_type xref:#concurrent_flat_map_max_load[max_load]() const noexcept;
+    void xref:#concurrent_flat_map_rehash[rehash](size_type n);
+    void xref:#concurrent_flat_map_reserve[reserve](size_type n);
+  };
+
+  // Deduction Guides
+  template<class InputIterator,
+           class Hash = boost::hash<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>>,
+           class Pred = std::equal_to<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>>,
+           class Allocator = std::allocator<xref:#concurrent_flat_map_iter_to_alloc_type[__iter-to-alloc-type__]<InputIterator>>>
+    concurrent_flat_map(InputIterator, InputIterator, typename xref:#concurrent_flat_map_deduction_guides[__see below__]::size_type = xref:#concurrent_flat_map_deduction_guides[__see below__],
+                        Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+      -> concurrent_flat_map<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>, xref:#concurrent_flat_map_iter_mapped_type[__iter-mapped-type__]<InputIterator>, Hash,
+                             Pred, Allocator>;
+
+  template<class Key, class T, class Hash = boost::hash<Key>,
+           class Pred = std::equal_to<Key>,
+           class Allocator = std::allocator<std::pair<const Key, T>>>
+    concurrent_flat_map(std::initializer_list<std::pair<Key, T>>,
+                        typename xref:#concurrent_flat_map_deduction_guides[__see below__]::size_type = xref:#concurrent_flat_map_deduction_guides[__see below__], Hash = Hash(),
+                        Pred = Pred(), Allocator = Allocator())
+      -> concurrent_flat_map<Key, T, Hash, Pred, Allocator>;
+
+  template<class InputIterator, class Allocator>
+    concurrent_flat_map(InputIterator, InputIterator, typename xref:#concurrent_flat_map_deduction_guides[__see below__]::size_type, Allocator)
+      -> concurrent_flat_map<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>, xref:#concurrent_flat_map_iter_mapped_type[__iter-mapped-type__]<InputIterator>,
+                             boost::hash<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>>,
+                             std::equal_to<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>>, Allocator>;
+
+  template<class InputIterator, class Allocator>
+    concurrent_flat_map(InputIterator, InputIterator, Allocator)
+      -> concurrent_flat_map<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>, xref:#concurrent_flat_map_iter_mapped_type[__iter-mapped-type__]<InputIterator>,
+                             boost::hash<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>>,
+                             std::equal_to<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>>, Allocator>;
+
+  template<class InputIterator, class Hash, class Allocator>
+    concurrent_flat_map(InputIterator, InputIterator, typename xref:#concurrent_flat_map_deduction_guides[__see below__]::size_type, Hash,
+                        Allocator)
+      -> concurrent_flat_map<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>, xref:#concurrent_flat_map_iter_mapped_type[__iter-mapped-type__]<InputIterator>, Hash,
+                             std::equal_to<xref:#concurrent_flat_map_iter_key_type[__iter-key-type__]<InputIterator>>, Allocator>;
+
+  template<class Key, class T, class Allocator>
+    concurrent_flat_map(std::initializer_list<std::pair<Key, T>>, typename xref:#concurrent_flat_map_deduction_guides[__see below__]::size_type,
+                        Allocator)
+      -> concurrent_flat_map<Key, T, boost::hash<Key>, std::equal_to<Key>, Allocator>;
+
+  template<class Key, class T, class Allocator>
+    concurrent_flat_map(std::initializer_list<std::pair<Key, T>>, Allocator)
+      -> concurrent_flat_map<Key, T, boost::hash<Key>, std::equal_to<Key>, Allocator>;
+
+  template<class Key, class T, class Hash, class Allocator>
+    concurrent_flat_map(std::initializer_list<std::pair<Key, T>>, typename xref:#concurrent_flat_map_deduction_guides[__see below__]::size_type,
+                        Hash, Allocator)
+      -> concurrent_flat_map<Key, T, Hash, std::equal_to<Key>, Allocator>;
+
+  // Equality Comparisons
+  template<class Key, class T, class Hash, class Pred, class Alloc>
+    bool xref:#concurrent_flat_map_operator[operator==](const concurrent_flat_map<Key, T, Hash, Pred, Alloc>& x,
+                    const concurrent_flat_map<Key, T, Hash, Pred, Alloc>& y);
+
+  template<class Key, class T, class Hash, class Pred, class Alloc>
+    bool xref:#concurrent_flat_map_operator_2[operator!=](const concurrent_flat_map<Key, T, Hash, Pred, Alloc>& x,
+                    const concurrent_flat_map<Key, T, Hash, Pred, Alloc>& y);
+
+  // swap
+  template<class Key, class T, class Hash, class Pred, class Alloc>
+    void xref:#concurrent_flat_map_swap_2[swap](concurrent_flat_map<Key, T, Hash, Pred, Alloc>& x,
+              concurrent_flat_map<Key, T, Hash, Pred, Alloc>& y)
+      noexcept(noexcept(x.swap(y)));
+
+  // Erasure
+  template<class K, class T, class H, class P, class A, class Predicate>
+    typename concurrent_flat_map<K, T, H, P, A>::size_type
+       xref:#concurrent_flat_map_erase_if_2[erase_if](concurrent_flat_map<K, T, H, P, A>& c, Predicate pred);
+}
+-----
+
+---
+
+=== Description
+
+*Template Parameters*
+
+[cols="1,1"]
+|===
+
+|_Key_
+.2+|`Key` and `T` must be https://en.cppreference.com/w/cpp/named_req/MoveConstructible[MoveConstructible^].
+`std::pair<const Key, T>` must be https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^]
+into the table from any `std::pair` object convertible to it, and it also must be
+https://en.cppreference.com/w/cpp/named_req/Erasable[Erasable^] from the table.
+
+|_T_
+
+|_Hash_
+|A unary function object type that acts a hash function for a `Key`. It takes a single argument of type `Key` and returns a value of type `std::size_t`.
+
+|_Pred_
+|A binary function object that induces an equivalence relation on values of type `Key`. It takes two arguments of type `Key` and returns a value of type `bool`.
+
+|_Allocator_
+|An allocator whose value type is the same as the table's value type.
+`std::allocator_traits<Allocator>::pointer` and `std::allocator_traits<Allocator>::const_pointer`
+must be convertible to/from `value_type*` and `const value_type*`, respectively.
+
+|===
+
+The elements of the table are held into an internal _bucket array_. An element is inserted into a bucket determined by its
+hash code, but if the bucket is already occupied (a _collision_), an available one in the vicinity of the
+original position is used.
+
+The size of the bucket array can be automatically increased by a call to `insert`/`emplace`, or as a result of calling
+`rehash`/`reserve`. The _load factor_ of the table (number of elements divided by number of buckets) is never
+greater than `max_load_factor()`, except possibly for small sizes where the implementation may decide to
+allow for higher loads.
+
+If `xref:hash_traits_hash_is_avalanching[hash_is_avalanching]<Hash>::value` is `true`, the hash function
+is used as-is; otherwise, a bit-mixing post-processing stage is added to increase the quality of hashing
+at the expense of extra computational cost.
+
+---
+
+=== Concurrency Requirements and Guarantees
+
+Concurrent invocations of `operator()` on the same const instance of `Hash` or `Pred` are required
+to not introduce data races. For `Alloc` being either `Allocator` or any allocator type rebound
+from `Allocator`, concurrent invocations of the following operations on the same instance `al` of `Alloc`
+are required to not introduce data races:
+
+* Copy construction from `al` of an allocator rebound from `Alloc`
+* `std::allocator_traits<Alloc>::allocate`
+* `std::allocator_traits<Alloc>::deallocate`
+* `std::allocator_traits<Alloc>::construct`
+* `std::allocator_traits<Alloc>::destroy`
+
+In general, these requirements on `Hash`, `Pred` and `Allocator` are met if these types
+are not stateful or if the operations only involve constant access to internal data members.
+
+With the exception of destruction, concurrent invocations of any operation on the same instance of a
+`concurrent_flat_map` do not introduce data races — that is, they are thread-safe.
+
+If an operation *op* is explicitly designated as _blocking on_ `x`, where `x` is an instance of a `boost::concurrent_flat_map`,
+prior blocking operations on `x` synchronize with *op*. So, blocking operations on the same
+`concurrent_flat_map` execute sequentially in a multithreaded scenario.
+
+An operation is said to be _blocking on rehashing of_ ``__x__`` if it blocks on `x`
+only when an internal rehashing is issued.
+
+Access or modification of an element of a `boost::concurrent_flat_map` passed by reference to a
+user-provided visitation function do not introduce data races when the visitation function
+is executed internally by the `boost::concurrent_flat_map`.
+
+Any `boost::concurrent_flat_map operation` that inserts or modifies an element `e`
+synchronizes with the internal invocation of a visitation function on `e`.
+
+Visitation functions executed by a `boost::concurrent_flat_map` `x` are not allowed to invoke any operation
+on `x`; invoking operations on a different `boost::concurrent_flat_map` instance `y` is allowed only
+if concurrent outstanding operations on `y` do not access `x` directly or indirectly.
+
+---
+
+=== Constructors
+
+==== Default Constructor
+```c++
+concurrent_flat_map();
+```
+
+Constructs an empty table using `hasher()` as the hash function,
+`key_equal()` as the key equality predicate and `allocator_type()` as the allocator.
+
+[horizontal]
+Postconditions:;; `size() == 0`
+Requires:;; If the defaults are used, `hasher`, `key_equal` and `allocator_type` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== Bucket Count Constructor
+```c++
+explicit concurrent_flat_map(size_type n,
+                             const hasher& hf = hasher(),
+                             const key_equal& eql = key_equal(),
+                             const allocator_type& a = allocator_type());
+```
+
+Constructs an empty table with at least `n` buckets, using `hf` as the hash
+function, `eql` as the key equality predicate, and `a` as the allocator.
+
+[horizontal]
+Postconditions:;; `size() == 0`
+Requires:;; If the defaults are used, `hasher`, `key_equal` and `allocator_type` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== Iterator Range Constructor
+[source,c++,subs="+quotes"]
+----
+template<class InputIterator>
+  concurrent_flat_map(InputIterator f, InputIterator l,
+                      size_type n = _implementation-defined_,
+                      const hasher& hf = hasher(),
+                      const key_equal& eql = key_equal(),
+                      const allocator_type& a = allocator_type());
+----
+
+Constructs an empty table with at least `n` buckets, using `hf` as the hash function, `eql` as the key equality predicate and `a` as the allocator, and inserts the elements from `[f, l)` into it.
+
+[horizontal]
+Requires:;; If the defaults are used, `hasher`, `key_equal` and `allocator_type` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== Copy Constructor
+```c++
+concurrent_flat_map(concurrent_flat_map const& other);
+```
+
+The copy constructor. Copies the contained elements, hash function, predicate and allocator.
+
+If `Allocator::select_on_container_copy_construction` exists and has the right signature, the allocator will be constructed from its result.
+
+[horizontal]
+Requires:;; `value_type` is copy constructible
+Concurrency:;; Blocking on `other`.
+
+---
+
+==== Move Constructor
+```c++
+concurrent_flat_map(concurrent_flat_map&& other);
+```
+
+The move constructor. The internal bucket array of `other` is transferred directly to the new table.
+The hash function, predicate and allocator are moved-constructed from `other`.
+
+[horizontal]
+Concurrency:;; Blocking on `other`.
+
+---
+
+==== Iterator Range Constructor with Allocator
+```c++
+template<class InputIterator>
+  concurrent_flat_map(InputIterator f, InputIterator l, const allocator_type& a);
+```
+
+Constructs an empty table using `a` as the allocator, with the default hash function and key equality predicate and inserts the elements from `[f, l)` into it.
+
+[horizontal]
+Requires:;; `hasher`, `key_equal` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== Allocator Constructor
+```c++
+explicit concurrent_flat_map(Allocator const& a);
+```
+
+Constructs an empty table, using allocator `a`.
+
+---
+
+==== Copy Constructor with Allocator
+```c++
+concurrent_flat_map(concurrent_flat_map const& other, Allocator const& a);
+```
+
+Constructs a table, copying ``other``'s contained elements, hash function, and predicate, but using allocator `a`.
+
+[horizontal]
+Concurrency:;; Blocking on `other`.
+
+---
+
+==== Move Constructor with Allocator
+```c++
+concurrent_flat_map(concurrent_flat_map&& other, Allocator const& a);
+```
+
+If `a == other.get_allocator()`, the elements of `other` are transferred directly to the new table;
+otherwise, elements are moved-constructed from those of `other`. The hash function and predicate are moved-constructed
+from `other`, and the allocator is copy-constructed from `a`.
+
+[horizontal]
+Concurrency:;; Blocking on `other`.
+
+---
+
+==== Initializer List Constructor
+[source,c++,subs="+quotes"]
+----
+concurrent_flat_map(std::initializer_list<value_type> il,
+                    size_type n = _implementation-defined_
+                    const hasher& hf = hasher(),
+                    const key_equal& eql = key_equal(),
+                    const allocator_type& a = allocator_type());
+----
+
+Constructs an empty table with at least `n` buckets, using `hf` as the hash function, `eql` as the key equality predicate and `a`, and inserts the elements from `il` into it.
+
+[horizontal]
+Requires:;; If the defaults are used, `hasher`, `key_equal` and `allocator_type` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== Bucket Count Constructor with Allocator
+```c++
+concurrent_flat_map(size_type n, allocator_type const& a);
+```
+
+Constructs an empty table with at least `n` buckets, using `hf` as the hash function, the default hash function and key equality predicate and `a` as the allocator.
+
+[horizontal]
+Postconditions:;; `size() == 0`
+Requires:;; `hasher` and `key_equal` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== Bucket Count Constructor with Hasher and Allocator
+```c++
+concurrent_flat_map(size_type n, hasher const& hf, allocator_type const& a);
+```
+
+Constructs an empty table with at least `n` buckets, using `hf` as the hash function, the default key equality predicate and `a` as the allocator.
+
+[horizontal]
+Postconditions:;; `size() == 0`
+Requires:;; `key_equal` needs to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== Iterator Range Constructor with Bucket Count and Allocator
+[source,c++,subs="+quotes"]
+----
+template<class InputIterator>
+  concurrent_flat_map(InputIterator f, InputIterator l, size_type n, const allocator_type& a);
+----
+
+Constructs an empty table with at least `n` buckets, using `a` as the allocator and default hash function and key equality predicate, and inserts the elements from `[f, l)` into it.
+
+[horizontal]
+Requires:;; `hasher`, `key_equal` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== Iterator Range Constructor with Bucket Count and Hasher
+[source,c++,subs="+quotes"]
+----
+    template<class InputIterator>
+      concurrent_flat_map(InputIterator f, InputIterator l, size_type n, const hasher& hf,
+                          const allocator_type& a);
+----
+
+Constructs an empty table with at least `n` buckets, using `hf` as the hash function, `a` as the allocator, with the default key equality predicate, and inserts the elements from `[f, l)` into it.
+
+[horizontal]
+Requires:;; `key_equal` needs to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== initializer_list Constructor with Allocator
+
+```c++
+concurrent_flat_map(std::initializer_list<value_type> il, const allocator_type& a);
+```
+
+Constructs an empty table using `a` and default hash function and key equality predicate, and inserts the elements from `il` into it.
+
+[horizontal]
+Requires:;; `hasher` and `key_equal` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== initializer_list Constructor with Bucket Count and Allocator
+
+```c++
+concurrent_flat_map(std::initializer_list<value_type> il, size_type n, const allocator_type& a);
+```
+
+Constructs an empty table with at least `n` buckets, using `a` and default hash function and key equality predicate, and inserts the elements from `il` into it.
+
+[horizontal]
+Requires:;; `hasher` and `key_equal` need to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+==== initializer_list Constructor with Bucket Count and Hasher and Allocator
+
+```c++
+concurrent_flat_map(std::initializer_list<value_type> il, size_type n, const hasher& hf,
+                    const allocator_type& a);
+```
+
+Constructs an empty table with at least `n` buckets, using `hf` as the hash function, `a` as the allocator and default key equality predicate,and inserts the elements from `il` into it.
+
+[horizontal]
+Requires:;; `key_equal` needs to be https://en.cppreference.com/w/cpp/named_req/DefaultConstructible[DefaultConstructible^].
+
+---
+
+=== Destructor
+
+```c++
+~concurrent_flat_map();
+```
+
+[horizontal]
+Note:;; The destructor is applied to every element, and all memory is deallocated
+
+---
+
+=== Assignment
+
+==== Copy Assignment
+
+```c++
+concurrent_flat_map& operator=(concurrent_flat_map const& other);
+```
+
+The assignment operator. Destroys previously existing elements, copy-assigns the hash function and predicate from `other`, 
+copy-assigns the allocator from `other` if `Alloc::propagate_on_container_copy_assignment` exists and `Alloc::propagate_on_container_copy_assignment::value` is `true`,
+and finally inserts copies of the elements of `other`.
+
+[horizontal]
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^]
+Concurrency:;; Blocking on `*this` and `other`.
+
+---
+
+==== Move Assignment
+```c++
+concurrent_flat_map& operator=(concurrent_flat_map&& other)
+  noexcept(boost::allocator_traits<Allocator>::is_always_equal::value ||
+           boost::allocator_traits<Allocator>::propagate_on_container_move_assignment::value);
+```
+The move assignment operator. Destroys previously existing elements, swaps the hash function and predicate from `other`,
+and move-assigns the allocator from `other` if `Alloc::propagate_on_container_move_assignment` exists and `Alloc::propagate_on_container_move_assignment::value` is `true`.
+If at this point the allocator is equal to `other.get_allocator()`, the internal bucket array of `other` is transferred directly to `*this`;
+otherwise, inserts move-constructed copies of the elements of `other`.
+
+[horizontal]
+Concurrency:;; Blocking on `*this` and `other`.
+
+---
+
+==== Initializer List Assignment
+```c++
+concurrent_flat_map& operator=(std::initializer_list<value_type> il);
+```
+
+Assign from values in initializer list. All previously existing elements are destroyed.
+
+[horizontal]
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^]
+Concurrency:;; Blocking on `*this`.
+
+---
+
+=== Visitation
+
+==== [c]visit
+
+```c++
+template<class F> size_t visit(const key_type& k, F f);
+template<class F> size_t visit(const key_type& k, F f) const;
+template<class F> size_t cvisit(const key_type& k, F f) const;
+template<class K, class F> size_t visit(const K& k, F f);
+template<class K, class F> size_t visit(const K& k, F f) const;
+template<class K, class F> size_t cvisit(const K& k, F f) const;
+```
+
+If an element `x` exists with key equivalent to `k`, invokes `f` with a reference to `x`.
+Such reference is const iff `*this` is const.
+
+[horizontal]
+Returns:;; The number of elements visited (0 or 1).
+Notes:;; The `template<class K, class F>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+
+---
+
+==== [c]visit_all
+
+```c++
+template<class F> size_t visit_all(F f);
+template<class F> size_t visit_all(F f) const;
+template<class F> size_t cvisit_all(F f) const;
+```
+
+Successively invokes `f` with references to each of the elements in the table.
+Such references are const iff `*this` is const.
+
+[horizontal]
+Returns:;; The number of elements visited.
+
+---
+
+==== Parallel [c]visit_all
+
+```c++
+template<class ExecutionPolicy, class F> void visit_all(ExecutionPolicy&& policy, F f);
+template<class ExecutionPolicy, class F> void visit_all(ExecutionPolicy&& policy, F f) const;
+template<class ExecutionPolicy, class F> void cvisit_all(ExecutionPolicy&& policy, F f) const;
+```
+
+Invokes `f` with references to each of the elements in the table. Such references are const iff `*this` is const.
+Execution is parallelized according to the semantics of the execution policy specified.
+
+[horizontal]
+Throws:;; Depending on the exception handling mechanism of the execution policy used, may call `std::terminate` if an exception is thrown within `f`.
+Notes:;; Only available in compilers supporting C++17 parallel algorithms. +
++
+These overloads only participate in overload resolution if `std::is_execution_policy_v<std::remove_cvref_t<ExecutionPolicy>>` is `true`. +
++
+Unsequenced execution policies are not allowed.
+
+---
+
+=== Size and Capacity
+
+==== empty
+
+```c++
+[[nodiscard]] bool empty() const noexcept;
+```
+
+[horizontal]
+Returns:;; `size() == 0`
+
+---
+
+==== size
+
+```c++
+size_type size() const noexcept;
+```
+
+[horizontal]
+Returns:;; The number of elements in the table.
+
+[horizontal]
+Notes:;; In the presence of concurrent insertion operations, the value returned may not accurately reflect
+the true size of the table right after execution.
+
+---
+
+==== max_size
+
+```c++
+size_type max_size() const noexcept;
+```
+
+[horizontal]
+Returns:;; `size()` of the largest possible table.
+
+---
+
+=== Modifiers
+
+==== emplace
+```c++
+template<class... Args> bool emplace(Args&&... args);
+```
+
+Inserts an object, constructed with the arguments `args`, in the table if and only if there is no element in the table with an equivalent key.
+
+[horizontal]
+Requires:;; `value_type` is constructible from `args`.
+Returns:;; `true` if an insert took place.
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; Invalidates pointers and references to elements if a rehashing is issued.
+
+---
+
+==== Copy Insert
+```c++
+bool insert(const value_type& obj);
+bool insert(const init_type& obj);
+```
+
+Inserts `obj` in the table if and only if there is no element in the table with an equivalent key.
+
+[horizontal]
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^].
+Returns:;; `true` if an insert took place. +
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; Invalidates pointers and references to elements if a rehashing is issued. +
++
+A call of the form `insert(x)`, where `x` is equally convertible to both `const value_type&` and `const init_type&`, is not ambiguous and selects the `init_type` overload.
+
+---
+
+==== Move Insert
+```c++
+bool insert(value_type&& obj);
+bool insert(init_type&& obj);
+```
+
+Inserts `obj` in the table if and only if there is no element in the table with an equivalent key.
+
+[horizontal]
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/MoveInsertable[MoveInsertable^].
+Returns:;; `true` if an insert took place. 
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; Invalidates pointers and references to elements if a rehashing is issued. +
++
+A call of the form `insert(x)`, where `x` is equally convertible to both `value_type&&` and `init_type&&`, is not ambiguous and selects the `init_type` overload.
+
+---
+
+==== Insert Iterator Range
+```c++
+template<class InputIterator> size_type insert(InputIterator first, InputIterator last);
+```
+
+Equivalent to
+[listing,subs="+macros,+quotes"]
+-----
+  while(first != last) this->xref:#concurrent_flat_map_emplace[emplace](*first++);
+-----
+
+[horizontal]
+Returns:;; The number of elements inserted. 
+
+---
+
+==== Insert Initializer List
+```c++
+size_type insert(std::initializer_list<value_type> il);
+```
+
+Equivalent to
+[listing,subs="+macros,+quotes"]
+-----
+  this->xref:#concurrent_flat_map_insert_iterator_range[insert](il.begin(), il.end());
+-----
+
+[horizontal]
+Returns:;; The number of elements inserted. 
+
+---
+
+==== emplace_or_[c]visit
+```c++
+template<class... Args, class F> bool emplace_or_visit(Args&&... args, F&& f);
+template<class... Args, class F> bool emplace_or_cvisit(Args&&... args, F&& f);
+```
+
+Inserts an object, constructed with the arguments `args`, in the table if there is no element in the table with an equivalent key.
+Otherwise, invokes `f` with a reference to the equivalent element; such reference is const iff `emplace_or_cvisit` is used.
+
+[horizontal]
+Requires:;; `value_type` is constructible from `args`.
+Returns:;; `true` if an insert took place.
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; Invalidates pointers and references to elements if a rehashing is issued. +
++
+The interface is exposition only, as C++ does not allow to declare a parameter `f` after a variadic parameter pack.
+
+---
+
+==== Copy insert_or_[c]visit
+```c++
+template<class F> bool insert_or_visit(const value_type& obj, F f);
+template<class F> bool insert_or_cvisit(const value_type& obj, F f);
+template<class F> bool insert_or_visit(const init_type& obj, F f);
+template<class F> bool insert_or_cvisit(const init_type& obj, F f);
+```
+
+Inserts `obj` in the table if and only if there is no element in the table with an equivalent key.
+Otherwise, invokes `f` with a reference to the equivalent element; such reference is const iff a `*_cvisit` overload is used.
+
+[horizontal]
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^].
+Returns:;; `true` if an insert took place. +
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; Invalidates pointers and references to elements if a rehashing is issued. +
++
+In a call of the form `insert_or_[c]visit(obj, f)`, the overloads accepting a `const value_type&` argument participate in overload resolution
+only if `std::remove_cv<std::remove_reference<decltype(obj)>::type>::type` is `value_type`.
+
+---
+
+==== Move insert_or_[c]visit
+```c++
+template<class F> bool insert_or_visit(value_type&& obj, F f);
+template<class F> bool insert_or_cvisit(value_type&& obj, F f);
+template<class F> bool insert_or_visit(init_type&& obj, F f);
+template<class F> bool insert_or_cvisit(init_type&& obj, F f);
+```
+
+Inserts `obj` in the table if and only if there is no element in the table with an equivalent key.
+Otherwise, invokes `f` with a reference to the equivalent element; such reference is const iff a `*_cvisit` overload is used.
+
+[horizontal]
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/MoveInsertable[MoveInsertable^].
+Returns:;; `true` if an insert took place. +
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; Invalidates pointers and references to elements if a rehashing is issued. +
++
+In a call of the form `insert_or_[c]visit(obj, f)`, the overloads accepting a `value_type&&` argument participate in overload resolution
+only if `std::remove_reference<decltype(obj)>::type` is `value_type`.
+
+---
+
+==== Insert Iterator Range or Visit
+```c++
+template<class InputIterator,class F>
+    size_type insert_or_visit(InputIterator first, InputIterator last, F f);
+template<class InputIterator,class F>
+    size_type insert_or_cvisit(InputIterator first, InputIterator last, F f);
+```
+
+Equivalent to
+[listing,subs="+macros,+quotes"]
+-----
+  while(first != last) this->xref:#concurrent_flat_map_emplace_or_cvisit[emplace_or_[c\]visit](*first++, f);
+-----
+
+[horizontal]
+Returns:;; The number of elements inserted. 
+
+---
+
+==== Insert Initializer List or Visit
+```c++
+template<class F> size_type insert_or_visit(std::initializer_list<value_type> il, F f);
+template<class F> size_type insert_or_cvisit(std::initializer_list<value_type> il, F f);
+```
+
+Equivalent to
+[listing,subs="+macros,+quotes"]
+-----
+  this->xref:#concurrent_flat_map_insert_iterator_range_or_visit[insert_or[c\]visit](il.begin(), il.end(), f);
+-----
+
+[horizontal]
+Returns:;; The number of elements inserted. 
+
+---
+
+==== try_emplace
+```c++
+template<class... Args> bool try_emplace(const key_type& k, Args&&... args);
+template<class... Args> bool try_emplace(key_type&& k, Args&&... args);
+template<class K, class... Args> bool try_emplace(K&& k, Args&&... args);
+```
+
+Inserts an element constructed from `k` and `args` into the table if there is no existing element with key `k` contained within it.
+
+[horizontal]
+Returns:;; `true` if an insert took place. +
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; This function is similiar to xref:#concurrent_flat_map_emplace[emplace], with the difference that no `value_type` is constructed
+if there is an element with an equivalent key; otherwise, the construction is of the form: +
++
+--
+```c++
+// first two overloads
+value_type(std::piecewise_construct,
+           std::forward_as_tuple(boost::forward<Key>(k)),
+           std::forward_as_tuple(boost::forward<Args>(args)...))
+
+// third overload
+value_type(std::piecewise_construct,
+           std::forward_as_tuple(boost::forward<K>(k)),
+           std::forward_as_tuple(boost::forward<Args>(args)...))
+```
+
+unlike xref:#concurrent_flat_map_emplace[emplace], which simply forwards all arguments to ``value_type``'s constructor.
+
+Invalidates pointers and references to elements if a rehashing is issued.
+
+The `template<class K, class\... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+
+--
+
+---
+
+==== try_emplace_or_[c]visit
+```c++
+template<class... Args, class F>
+  bool try_emplace_or_visit(const key_type& k, Args&&... args, F&& f);
+template<class... Args, class F>
+  bool try_emplace_or_cvisit(const key_type& k, Args&&... args, F&& f);
+template<class... Args, class F>
+  bool try_emplace_or_visit(key_type&& k, Args&&... args, F&& f);
+template<class... Args, class F>
+  bool try_emplace_or_cvisit(key_type&& k, Args&&... args, F&& f);
+template<class K, class... Args, class F>
+  bool try_emplace_or_visit(K&& k, Args&&... args, F&& f);
+template<class K, class... Args, class F>
+  bool try_emplace_or_cvisit(K&& k, Args&&... args, F&& f);
+```
+
+Inserts an element constructed from `k` and `args` into the table if there is no existing element with key `k` contained within it.
+Otherwise, invokes `f` with a reference to the equivalent element; such reference is const iff a `*_cvisit` overload is used.
+
+[horizontal]
+Returns:;; `true` if an insert took place. +
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; No `value_type` is constructed
+if there is an element with an equivalent key; otherwise, the construction is of the form: +
++
+--
+```c++
+// first four overloads
+value_type(std::piecewise_construct,
+           std::forward_as_tuple(boost::forward<Key>(k)),
+           std::forward_as_tuple(boost::forward<Args>(args)...))
+
+// last two overloads
+value_type(std::piecewise_construct,
+           std::forward_as_tuple(boost::forward<K>(k)),
+           std::forward_as_tuple(boost::forward<Args>(args)...))
+```
+
+Invalidates pointers and references to elements if a rehashing is issued.
+
+The interface is exposition only, as C++ does not allow to declare a parameter `f` after a variadic parameter pack.
+
+The `template<class K, class\... Args, class F>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+
+--
+
+---
+
+==== insert_or_assign
+```c++
+template<class M> bool insert_or_assign(const key_type& k, M&& obj);
+template<class M> bool insert_or_assign(key_type&& k, M&& obj);
+template<class K, class M> bool insert_or_assign(K&& k, M&& obj);
+```
+
+Inserts a new element into the table or updates an existing one by assigning to the contained value.
+
+If there is an element with key `k`, then it is updated by assigning `boost::forward<M>(obj)`.
+
+If there is no such element, it is added to the table as:
+```c++
+// first two overloads
+value_type(std::piecewise_construct,
+           std::forward_as_tuple(boost::forward<Key>(k)),
+           std::forward_as_tuple(boost::forward<M>(obj)))
+
+// third overload
+value_type(std::piecewise_construct,
+           std::forward_as_tuple(boost::forward<K>(k)),
+           std::forward_as_tuple(boost::forward<M>(obj)))
+```
+
+[horizontal]
+Returns:;; `true` if an insert took place.
+Concurrency:;; Blocking on rehashing of `*this`.
+Notes:;; Invalidates pointers and references to elements if a rehashing is issued. +
++
+The `template<class K, class M>` only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+
+---
+
+==== erase
+```c++
+size_type erase(const key_type& k);
+template<class K> size_type erase(const K& k);
+```
+
+Erases the element with key equivalent to `k` if it exists.
+
+[horizontal]
+Returns:;; The number of elements erased (0 or 1).
+Throws:;; Only throws an exception if it is thrown by `hasher` or `key_equal`.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+
+---
+
+==== erase_if by Key
+```c++
+template<class F> size_type erase_if(const key_type& k, F f);
+template<class K, class F> size_type erase_if(const K& k, F f);
+```
+
+Erases the element `x` with key equivalent to `k` if it exists and `f(x)` is `true`.
+
+[horizontal]
+Returns:;; The number of elements erased (0 or 1).
+Throws:;; Only throws an exception if it is thrown by `hasher`, `key_equal` or `f`.
+Notes:;; The `template<class K, class F>` overload only participates in overload resolution if `std::is_execution_policy_v<std::remove_cvref_t<ExecutionPolicy>>` is `false`. +
++
+The `template<class K, class F>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+
+---
+
+==== erase_if
+```c++
+template<class F> size_type erase_if(F f);
+```
+
+Successively invokes `f` with references to each of the elements in the table, and erases those for which `f` returns `true`.
+
+[horizontal]
+Returns:;; The number of elements erased.
+Throws:;; Only throws an exception if it is thrown by `f`.
+
+---
+
+==== Parallel erase_if
+```c++
+template<class ExecutionPolicy, class  F> void erase_if(ExecutionPolicy&& policy, F f);
+```
+
+Invokes `f` with references to each of the elements in the table, and erases those for which `f` returns `true`.
+Execution is parallelized according to the semantics of the execution policy specified.
+
+[horizontal]
+Throws:;; Depending on the exception handling mechanism of the execution policy used, may call `std::terminate` if an exception is thrown within `f`.
+Notes:;; Only available in compilers supporting C++17 parallel algorithms. +
++
+This overload only participates in overload resolution if `std::is_execution_policy_v<std::remove_cvref_t<ExecutionPolicy>>` is `true`. +
++
+Unsequenced execution policies are not allowed.
+
+---
+
+==== swap
+```c++
+void swap(concurrent_flat_map& other)
+  noexcept(boost::allocator_traits<Allocator>::is_always_equal::value ||
+           boost::allocator_traits<Allocator>::propagate_on_container_swap::value);
+```
+
+Swaps the contents of the table with the parameter.
+
+If `Allocator::propagate_on_container_swap` is declared and `Allocator::propagate_on_container_swap::value` is `true` then the tables' allocators are swapped. Otherwise, swapping with unequal allocators results in undefined behavior.
+
+[horizontal]
+Throws:;; Nothing unless `key_equal` or `hasher` throw on swapping.
+Concurrency:;; Blocking on `*this` and `other`.
+
+---
+
+==== clear
+```c++
+void clear() noexcept;
+```
+
+Erases all elements in the table.
+
+[horizontal]
+Postconditions:;; `size() == 0`, `max_load() >= max_load_factor() * bucket_count()`
+Concurrency:;; Blocking on `*this`.
+
+---
+
+==== merge
+```c++
+template<class H2, class P2>
+  size_type merge(concurrent_flat_map<Key, T, H2, P2, Allocator>& source);
+template<class H2, class P2>
+  size_type merge(concurrent_flat_map<Key, T, H2, P2, Allocator>&& source);
+```
+
+Move-inserts all the elements from `source` whose key is not already present in `*this`, and erases them from `source`.
+
+[horizontal]
+Returns:;; The number of elements inserted.
+Concurrency:;; Blocking on `*this` and `source`.
+
+---
+
+=== Observers
+
+==== get_allocator
+```
+allocator_type get_allocator() const noexcept;
+```
+
+[horizontal]
+Returns:;; The table's allocator.
+
+---
+
+==== hash_function
+```
+hasher hash_function() const;
+```
+
+[horizontal]
+Returns:;; The table's hash function.
+
+---
+
+==== key_eq
+```
+key_equal key_eq() const;
+```
+
+[horizontal]
+Returns:;; The table's key equality predicate.
+
+---
+
+=== Map Operations
+
+==== count
+```c++
+size_type        count(const key_type& k) const;
+template<class K>
+  size_type      count(const K& k) const;
+```
+
+[horizontal]
+Returns:;; The number of elements with key equivalent to `k` (0 or 1).
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type. +
++
+In the presence of concurrent insertion operations, the value returned may not accurately reflect
+the true state of the table right after execution.
+
+---
+
+==== contains
+```c++
+bool             contains(const key_type& k) const;
+template<class K>
+  bool           contains(const K& k) const;
+```
+
+[horizontal]
+Returns:;; A boolean indicating whether or not there is an element with key equal to `k` in the table.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.  +
++
+In the presence of concurrent insertion operations, the value returned may not accurately reflect
+the true state of the table right after execution.
+
+---
+=== Bucket Interface
+
+==== bucket_count
+```c++
+size_type bucket_count() const noexcept;
+```
+
+[horizontal]
+Returns:;; The size of the bucket array.
+
+---
+
+=== Hash Policy
+
+==== load_factor
+```c++
+float load_factor() const noexcept;
+```
+
+[horizontal]
+Returns:;; `static_cast<float>(size())/static_cast<float>(bucket_count())`, or `0` if `bucket_count() == 0`.
+
+---
+
+==== max_load_factor
+
+```c++
+float max_load_factor() const noexcept;
+```
+
+[horizontal]
+Returns:;; Returns the table's maximum load factor.
+
+---
+
+==== Set max_load_factor
+```c++
+void max_load_factor(float z);
+```
+
+[horizontal]
+Effects:;; Does nothing, as the user is not allowed to change this parameter. Kept for compatibility with `boost::unordered_map`.
+
+---
+
+
+==== max_load
+
+```c++
+size_type max_load() const noexcept;
+```
+
+[horizontal]
+Returns:;; The maximum number of elements the table can hold without rehashing, assuming that no further elements will be erased.
+Note:;; After construction, rehash or clearance, the table's maximum load is at least `max_load_factor() * bucket_count()`.
+This number may decrease on erasure under high-load conditions. +
++
+In the presence of concurrent insertion operations, the value returned may not accurately reflect
+the true state of the table right after execution.
+
+---
+
+==== rehash
+```c++
+void rehash(size_type n);
+```
+
+Changes if necessary the size of the bucket array so that there are at least `n` buckets, and so that the load factor is less than or equal to the maximum load factor. When applicable, this will either grow or shrink the `bucket_count()` associated with the table.
+
+When `size() == 0`, `rehash(0)` will deallocate the underlying buckets array.
+
+Invalidates pointers and references to elements, and changes the order of elements.
+
+[horizontal]
+Throws:;; The function has no effect if an exception is thrown, unless it is thrown by the table's hash function or comparison function.
+Concurrency:;; Blocking on `*this`.
+---
+
+==== reserve
+```c++
+void reserve(size_type n);
+```
+
+Equivalent to `a.rehash(ceil(n / a.max_load_factor()))`.
+
+Similar to `rehash`, this function can be used to grow or shrink the number of buckets in the table.
+
+Invalidates pointers and references to elements, and changes the order of elements.
+
+[horizontal]
+Throws:;; The function has no effect if an exception is thrown, unless it is thrown by the table's hash function or comparison function.
+Concurrency:;; Blocking on `*this`.
+
+---
+
+=== Deduction Guides
+A deduction guide will not participate in overload resolution if any of the following are true:
+
+  - It has an `InputIterator` template parameter and a type that does not qualify as an input iterator is deduced for that parameter.
+  - It has an `Allocator` template parameter and a type that does not qualify as an allocator is deduced for that parameter.
+  - It has a `Hash` template parameter and an integral type or a type that qualifies as an allocator is deduced for that parameter.
+  - It has a `Pred` template parameter and a type that qualifies as an allocator is deduced for that parameter.
+
+A `size_­type` parameter type in a deduction guide refers to the `size_­type` member type of the
+table type deduced by the deduction guide. Its default value coincides with the default value
+of the constructor selected.
+
+==== __iter-value-type__
+[listings,subs="+macros,+quotes"]
+-----
+template<class InputIterator>
+  using __iter-value-type__ =
+    typename std::iterator_traits<InputIterator>::value_type; // exposition only
+-----
+
+==== __iter-key-type__
+[listings,subs="+macros,+quotes"]
+-----
+template<class InputIterator>
+  using __iter-key-type__ = std::remove_const_t<
+    std::tuple_element_t<0, xref:#concurrent_map_iter_value_type[__iter-value-type__]<InputIterator>>>; // exposition only
+-----
+
+==== __iter-mapped-type__
+[listings,subs="+macros,+quotes"]
+-----
+template<class InputIterator>
+  using __iter-mapped-type__ =
+    std::tuple_element_t<1, xref:#concurrent_map_iter_value_type[__iter-value-type__]<InputIterator>>;  // exposition only
+-----
+
+==== __iter-to-alloc-type__
+[listings,subs="+macros,+quotes"]
+-----
+template<class InputIterator>
+  using __iter-to-alloc-type__ = std::pair<
+    std::add_const_t<std::tuple_element_t<0, xref:#concurrent_map_iter_value_type[__iter-value-type__]<InputIterator>>>,
+    std::tuple_element_t<1, xref:#concurrent_map_iter_value_type[__iter-value-type__]<InputIterator>>>; // exposition only
+-----
+
+=== Equality Comparisons
+
+==== operator==
+```c++
+template<class Key, class T, class Hash, class Pred, class Alloc>
+  bool operator==(const concurrent_flat_map<Key, T, Hash, Pred, Alloc>& x,
+                  const concurrent_flat_map<Key, T, Hash, Pred, Alloc>& y);
+```
+
+Returns `true` if `x.size() == y.size()` and for every element in `x`, there is an element in `y` with the same key, with an equal value (using `operator==` to compare the value types).
+
+[horizontal]
+Concurrency:;; Blocking on `x` and `y`.
+Notes:;; Behavior is undefined if the two tables don't have equivalent equality predicates.
+
+---
+
+==== operator!=
+```c++
+template<class Key, class T, class Hash, class Pred, class Alloc>
+  bool operator!=(const concurrent_flat_map<Key, T, Hash, Pred, Alloc>& x,
+                  const concurrent_flat_map<Key, T, Hash, Pred, Alloc>& y);
+```
+
+Returns `false` if `x.size() == y.size()` and for every element in `x`, there is an element in `y` with the same key, with an equal value (using `operator==` to compare the value types).
+
+[horizontal]
+Concurrency:;; Blocking on `x` and `y`.
+Notes:;; Behavior is undefined if the two tables don't have equivalent equality predicates.
+
+---
+
+=== Swap
+```c++
+template<class Key, class T, class Hash, class Pred, class Alloc>
+  void swap(concurrent_flat_map<Key, T, Hash, Pred, Alloc>& x,
+            concurrent_flat_map<Key, T, Hash, Pred, Alloc>& y)
+    noexcept(noexcept(x.swap(y)));
+```
+
+Equivalent to
+[listing,subs="+macros,+quotes"]
+-----
+x.xref:#concurrent_flat_map_swap[swap](y);
+-----
+
+---
+
+=== erase_if
+```c++
+template<class K, class T, class H, class P, class A, class Predicate>
+  typename concurrent_flat_map<K, T, H, P, A>::size_type
+    erase_if(concurrent_flat_map<K, T, H, P, A>& c, Predicate pred);
+```
+
+Equivalent to
+[listing,subs="+macros,+quotes"]
+-----
+c.xref:#concurrent_flat_map_erase_if[erase_if](pred);
+-----
diff --git a/doc/unordered/hash_equality.adoc b/doc/unordered/hash_equality.adoc
index dd6b2844..583e1173 100644
--- a/doc/unordered/hash_equality.adoc
+++ b/doc/unordered/hash_equality.adoc
@@ -20,14 +20,14 @@ class unordered_map;
 
 The hash function comes first as you might want to change the hash function
 but not the equality predicate. For example, if you wanted to use the
-http://www.isthe.com/chongo/tech/comp/fnv/[FNV-1 hash^] you could write:
+https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash[FNV-1a hash^] you could write:
 
 ```
-boost::unordered_map<std::string, int, hash::fnv_1>
+boost::unordered_map<std::string, int, hash::fnv_1a>
     dictionary;
 ```
 
-There is an link:../../examples/fnv1.hpp[implementation of FNV-1^] in the examples directory.
+There is an link:../../examples/fnv1.hpp[implementation of FNV-1a^] in the examples directory.
 
 If you wish to use a different equality function, you will also need to use a matching hash function. For example, to implement a case insensitive dictionary you need to define a case insensitive equality predicate and hash function:
 
diff --git a/doc/unordered/intro.adoc b/doc/unordered/intro.adoc
index a809958f..46bc899c 100644
--- a/doc/unordered/intro.adoc
+++ b/doc/unordered/intro.adoc
@@ -4,26 +4,65 @@
 :idprefix: intro_
 :cpp: C++
 
-For accessing data based on key lookup, the {cpp} standard library offers `std::set`,
-`std::map`, `std::multiset` and `std::multimap`. These are generally
-implemented using balanced binary trees so that lookup time has
-logarithmic complexity. That is generally okay, but in many cases a
-link:https://en.wikipedia.org/wiki/Hash_table[hash table^] can perform better, as accessing data has constant complexity,
-on average. The worst case complexity is linear, but that occurs rarely and
-with some care, can be avoided.
+link:https://en.wikipedia.org/wiki/Hash_table[Hash tables^] are extremely popular
+computer data structures and can be found under one form or another in virtually any programming
+language. Whereas other associative structures such as rb-trees (used in {cpp} by `std::set` and `std::map`)
+have logarithmic-time complexity for insertion and lookup, hash tables, if configured properly,
+perform these operations in constant time on average, and are generally much faster.
 
-Also, the existing containers require a 'less than' comparison object
-to order their elements. For some data types this is impossible to implement
-or isn't practical. In contrast, a hash table only needs an equality function
-and a hash function for the key.
+{cpp} introduced __unordered associative containers__ `std::unordered_set`, `std::unordered_map`,
+`std::unordered_multiset` and `std::unordered_multimap` in {cpp}11, but research on hash tables
+hasn't stopped since: advances in CPU architectures such as
+more powerful caches, link:https://en.wikipedia.org/wiki/Single_instruction,_multiple_data[SIMD] operations
+and increasingly available link:https://en.wikipedia.org/wiki/Multi-core_processor[multicore processors]
+open up possibilities for improved hash-based data structures and new use cases that
+are simply beyond reach of unordered associative containers as specified in 2011.
 
-With this in mind, unordered associative containers were added to the {cpp}
-standard. Boost.Unordered provides an implementation of the containers described in {cpp}11,
-with some <<compliance,deviations from the standard>> in
-order to work with non-{cpp}11 compilers and libraries.
+Boost.Unordered offers a catalog of hash containers with different standards compliance levels,
+performances and intented usage scenarios:
+
+[caption=, title='Table {counter:table-counter}. Boost.Unordered containers']
+[cols="1,1,.^1", frame=all, grid=all]
+|===
+^h|
+^h|*Node-based*
+^h|*Flat*
+
+^.^h|*Closed addressing*
+^m|
+boost::unordered_set +
+boost::unordered_map +
+boost::unordered_multiset +
+boost::unordered_multimap
+^|
+
+^.^h|*Open addressing*
+^m| boost::unordered_node_set + 
+boost::unordered_node_map
+^m| boost::unordered_flat_set +
+boost::unordered_flat_map
+
+^.^h|*Concurrent*
+^|
+^| `boost::concurrent_flat_map`
+
+|===
+
+* **Closed-addressing containers** are fully compliant with the C++ specification
+for unordered associative containers and feature one of the fastest implementations
+in the market within the technical constraints imposed by the required standard interface.
+* **Open-addressing containers** rely on much faster data structures and algorithms
+(more than 2 times faster in typical scenarios) while slightly diverging from the standard
+interface to accommodate the implementation.
+There are two variants: **flat** (the fastest) and **node-based**, which 
+provide pointer stability under rehashing at the expense of being slower.
+* Finally, `boost::concurrent_flat_map` (the only **concurrent container** provided
+at present) is a hashmap designed and implemented to be used in high-performance
+multithreaded scenarios. Its interface is radically different from that of regular C++ containers.
+
+All sets and maps in Boost.Unordered are instantiatied similarly as
+`std::unordered_set` and `std::unordered_map`, respectively:
 
-`unordered_set` and `unordered_multiset` are defined in the header
-`<boost/unordered/unordered_set.hpp>`
 [source,c++]
 ----  
 namespace boost {
@@ -32,178 +71,21 @@ namespace boost {
         class Hash = boost::hash<Key>,
         class Pred = std::equal_to<Key>,
         class Alloc = std::allocator<Key> >
-    class unordered_set;
+    class unordered_set; 
+    // same for unordered_multiset, unordered_flat_set, unordered_node_set
 
-    template<
-        class Key,
-        class Hash = boost::hash<Key>, 
-        class Pred = std::equal_to<Key>, 
-        class Alloc = std::allocator<Key> > 
-    class unordered_multiset;
-}
-----
-
-`unordered_map` and `unordered_multimap` are defined in the header
-`<boost/unordered/unordered_map.hpp>`
-
-[source,c++]
-----
-namespace boost {
     template <
         class Key, class Mapped,
         class Hash = boost::hash<Key>,
         class Pred = std::equal_to<Key>,
         class Alloc = std::allocator<std::pair<Key const, Mapped> > >
     class unordered_map;
-
-    template<
-        class Key, class Mapped,
-        class Hash = boost::hash<Key>,
-        class Pred = std::equal_to<Key>,
-        class Alloc = std::allocator<std::pair<Key const, Mapped> > >
-    class unordered_multimap;
+    // same for unordered_multimap, unordered_flat_map, unordered_node_map
+    // and concurrent_flat_map
 }
 ----
 
-These containers, and all other implementations of standard unordered associative
-containers, use an approach to its internal data structure design called
-*closed addressing*. Starting in Boost 1.81, Boost.Unordered also provides containers
-`boost::unordered_flat_set` and `boost::unordered_flat_map`, which use a
-different data structure strategy commonly known as *open addressing* and depart in
-a small number of ways from the standard so as to offer much better performance
-in exchange (more than 2 times faster in typical scenarios):
-
-
-[source,c++]
-----
-// #include <boost/unordered/unordered_flat_set.hpp>
-//
-// Note: no multiset version
-
-namespace boost {
-    template <
-        class Key,
-        class Hash = boost::hash<Key>,
-        class Pred = std::equal_to<Key>,
-        class Alloc = std::allocator<Key> >
-    class unordered_flat_set;
-}
-----
-
-[source,c++]
-----
-// #include <boost/unordered/unordered_flat_map.hpp>
-//
-// Note: no multimap version
-
-namespace boost {
-    template <
-        class Key, class Mapped,
-        class Hash = boost::hash<Key>,
-        class Pred = std::equal_to<Key>,
-        class Alloc = std::allocator<std::pair<Key const, Mapped> > >
-    class unordered_flat_map;
-}
-----
-
-Starting in Boost 1.82, the containers `boost::unordered_node_set` and `boost::unordered_node_map`
-are introduced: they use open addressing like `boost::unordered_flat_set` and `boost::unordered_flat_map`,
-but internally store element _nodes_, like `boost::unordered_set` and `boost::unordered_map`,
-which provide stability of pointers and references to the elements:
-
-[source,c++]
-----
-// #include <boost/unordered/unordered_node_set.hpp>
-//
-// Note: no multiset version
-
-namespace boost {
-    template <
-        class Key,
-        class Hash = boost::hash<Key>,
-        class Pred = std::equal_to<Key>,
-        class Alloc = std::allocator<Key> >
-    class unordered_node_set;
-}
-----
-
-[source,c++]
-----
-// #include <boost/unordered/unordered_node_map.hpp>
-//
-// Note: no multimap version
-
-namespace boost {
-    template <
-        class Key, class Mapped,
-        class Hash = boost::hash<Key>,
-        class Pred = std::equal_to<Key>,
-        class Alloc = std::allocator<std::pair<Key const, Mapped> > >
-    class unordered_node_map;
-}
-----
-
-These are all the containers provided by Boost.Unordered:
-
-[caption=, title='Table {counter:table-counter}. Boost.Unordered containers']
-[cols="1,1,.^1", frame=all, grid=rows]
-|===
-^h|
-^h|*Node-based*
-^h|*Flat*
-
-^.^h|*Closed addressing*
-^| `boost::unordered_set` +
-`boost::unordered_map` +
-`boost::unordered_multiset` +
-`boost::unordered_multimap`
-^| 
-
-^.^h|*Open addressing*
-^| `boost::unordered_node_set` +
-`boost::unordered_node_map`
-^| `boost::unordered_flat_set` +
-`boost::unordered_flat_map`
-
-|===
-
-Closed-addressing containers are pass:[C++]98-compatible. Open-addressing containers require a
-reasonably compliant pass:[C++]11 compiler.
-
-Boost.Unordered containers are used in a similar manner to the normal associative
-containers:
-
-[source,cpp]
-----
-typedef boost::unordered_map<std::string, int> map;
-map x;
-x["one"] = 1;
-x["two"] = 2;
-x["three"] = 3;
-
-assert(x.at("one") == 1);
-assert(x.find("missing") == x.end());
-----
-
-But since the elements aren't ordered, the output of:
-
-[source,c++]
-----
-for(const map::value_type& i: x) {
-    std::cout<<i.first<<","<<i.second<<"\n";
-}
-----
-
-can be in any order. For example, it might be:
-
-[source]
-----
-two,2
-one,1
-three,3
-----
-
-To store an object in an unordered associative container requires both a
+Storing an object in an unordered associative container requires both a
 key equality function and a hash function. The default function objects in
 the standard containers support a few basic types including integer types,
 floating point types, pointer types, and the standard strings. Since
@@ -213,6 +95,3 @@ you have to extend Boost.Hash to support the type or use
 your own custom equality predicates and hash functions. See the
 <<hash_equality,Equality Predicates and Hash Functions>> section
 for more details.
-
-There are other differences, which are listed in the
-<<comparison,Comparison with Associative Containers>> section.
diff --git a/doc/unordered/rationale.adoc b/doc/unordered/rationale.adoc
index 50758164..7bbf2260 100644
--- a/doc/unordered/rationale.adoc
+++ b/doc/unordered/rationale.adoc
@@ -4,7 +4,7 @@
 
 = Implementation Rationale
 
-== Closed-addressing containers 
+== Closed-addressing Containers
 
 `boost::unordered_[multi]set` and `boost::unordered_[multi]map`
 adhere to the standard requirements for unordered associative
@@ -74,7 +74,7 @@ Since release 1.80.0, prime numbers are chosen for the number of buckets in
 tandem with sophisticated modulo arithmetic. This removes the need for "mixing"
 the result of the user's hash function as was used for release 1.79.0.
 
-== Open-addresing containers 
+== Open-addresing Containers 
 
 The C++ standard specification of unordered associative containers impose
 severe limitations on permissible implementations, the most important being
@@ -86,7 +86,7 @@ The design of `boost::unordered_flat_set`/`unordered_node_set` and `boost::unord
 guided by Peter Dimov's https://pdimov.github.io/articles/unordered_dev_plan.html[Development Plan for Boost.Unordered^].
 We discuss here the most relevant principles.
 
-=== Hash function
+=== Hash Function
 
 Given its rich functionality and cross-platform interoperability,
 `boost::hash` remains the default hash function of open-addressing containers.
@@ -105,10 +105,10 @@ whereas in 32 bits _C_ = 0xE817FB2Du has been obtained from https://arxiv.org/ab
 When using a hash function directly suitable for open addressing, post-mixing can be opted out by via a dedicated <<hash_traits_hash_is_avalanching,`hash_is_avalanching`>>trait.
 `boost::hash` specializations for string types are marked as avalanching.
 
-=== Platform interoperability
+=== Platform Interoperability
 
 The observable behavior of `boost::unordered_flat_set`/`unordered_node_set` and `boost::unordered_flat_map`/`unordered_node_map` is deterministically
-identical across different compilers as long as their ``std::size_type``s are the same size and the user-provided
+identical across different compilers as long as their ``std::size_t``s are the same size and the user-provided
 hash function and equality predicate are also interoperable
 &#8212;this includes elements being ordered in exactly the same way for the same sequence of
 operations.
@@ -117,3 +117,25 @@ Although the implementation internally uses SIMD technologies, such as https://e
 and https://en.wikipedia.org/wiki/ARM_architecture_family#Advanced_SIMD_(NEON)[Neon^], when available,
 this does not affect interoperatility. For instance, the behavior is the same
 for Visual Studio on an x64-mode Intel CPU with SSE2 and for GCC on an IBM s390x without any supported SIMD technology.
+
+== Concurrent Containers
+
+The same data structure used by Boost.Unordered open-addressing containers has been chosen
+also as the foundation of `boost::concurrent_flat_map`:
+
+* Open-addressing is faster than closed-addressing alternatives, both in non-concurrent and
+concurrent scenarios.
+* Open-addressing layouts are eminently suitable for concurrent access and modification
+with minimal locking. In particular, the metadata array can be used for implementations of
+lookup that are lock-free up to the last step of actual element comparison.
+* Layout compatibility with Boost.Unordered flat containers allows for fast transfer
+of all elements between `boost::concurrent_flat_map` and `boost::unordered_flat_map`.
+(This feature has not been implemented yet.)
+
+=== Hash Function and Platform Interoperability
+
+`boost::concurrent_flat_map` makes the same decisions and provides the same guarantees
+as Boost.Unordered open-addressing containers with regards to 
+xref:#rationale_hash_function[hash function defaults] and
+xref:#rationale_platform_interoperability[platform interoperability].
+
diff --git a/doc/unordered/ref.adoc b/doc/unordered/ref.adoc
index 62a84b0f..6a9673da 100644
--- a/doc/unordered/ref.adoc
+++ b/doc/unordered/ref.adoc
@@ -10,3 +10,4 @@ include::unordered_flat_map.adoc[]
 include::unordered_flat_set.adoc[]
 include::unordered_node_map.adoc[]
 include::unordered_node_set.adoc[]
+include::concurrent_flat_map.adoc[]
diff --git a/doc/unordered/comparison.adoc b/doc/unordered/regular.adoc
similarity index 56%
rename from doc/unordered/comparison.adoc
rename to doc/unordered/regular.adoc
index 1d5dd97b..9ad36258 100644
--- a/doc/unordered/comparison.adoc
+++ b/doc/unordered/regular.adoc
@@ -1,8 +1,99 @@
+﻿[#regular]
+= Regular Containers
+
+:idprefix: regular_
+
+Boost.Unordered closed-addressing containers (`boost::unordered_set`, `boost::unordered_map`,
+`boost::unordered_multiset` and `boost::unordered_multimap`) are fully conformant with the
+C++ specification for unordered associative containers, so for those who know how to use 
+`std::unordered_set`, `std::unordered_map`, etc., their homonyms in Boost.Unordered are
+drop-in replacements. The interface of open-addressing containers (`boost::unordered_node_set`, 
+`boost::unordered_node_map`, `boost::unordered_flat_set` and `boost::unordered_flat_map`)
+is very similar, but they present some minor differences listed in the dedicated
+xref:#compliance_open_addressing_containers[standard compliance section].
+
+
+For readers without previous experience with hash containers but familiar
+with normal associative containers (`std::set`, `std::map`,
+`std::multiset` and `std::multimap`), Boost.Unordered containers are used in a similar manner:
+
+[source,cpp]
+----
+typedef boost::unordered_map<std::string, int> map;
+map x;
+x["one"] = 1;
+x["two"] = 2;
+x["three"] = 3;
+
+assert(x.at("one") == 1);
+assert(x.find("missing") == x.end());
+----
+
+But since the elements aren't ordered, the output of:
+
+[source,c++]
+----
+for(const map::value_type& i: x) {
+    std::cout<<i.first<<","<<i.second<<"\n";
+}
+----
+
+can be in any order. For example, it might be:
+
+[source]
+----
+two,2
+one,1
+three,3
+----
+
+There are other differences, which are listed in the
+<<comparison,Comparison with Associative Containers>> section.
+
+== Iterator Invalidation
+
+It is not specified how member functions other than `rehash` and `reserve` affect
+the bucket count, although `insert` can only invalidate iterators
+when the insertion causes the container's load to be greater than the maximum allowed.
+For most implementations this means that `insert` will only
+change the number of buckets when this happens. Iterators can be
+invalidated by calls to `insert`, `rehash` and `reserve`.
+
+As for pointers and references,
+they are never invalidated for node-based containers 
+(`boost::unordered_[multi]set`, `boost::unordered_[multi]map`, `boost::unordered_node_set`, `boost::unordered_node_map`),
+but they will be when rehashing occurs for
+`boost::unordered_flat_set` and `boost::unordered_flat_map`: this is because
+these containers store elements directly into their holding buckets, so
+when allocating a new bucket array the elements must be transferred by means of move construction.
+
+In a similar manner to using `reserve` for ``vector``s, it can be a good idea
+to call `reserve` before inserting a large number of elements. This will get
+the expensive rehashing out of the way and let you store iterators, safe in
+the knowledge that they won't be invalidated. If you are inserting `n`
+elements into container `x`, you could first call:
+
+```
+x.reserve(n);
+```
+
+Note:: `reserve(n)` reserves space for at least `n` elements, allocating enough buckets
+so as to not exceed the maximum load factor.
++
+Because the maximum load factor is defined as the number of elements divided by the total
+number of available buckets, this function is logically equivalent to:
++
+```
+x.rehash(std::ceil(n / x.max_load_factor()))
+```
++
+See the <<unordered_map_rehash,reference for more details>> on the `rehash` function.
+
 [#comparison]
 
 :idprefix: comparison_
 
-= Comparison with Associative Containers
+== Comparison with Associative Containers
 
 [caption=, title='Table {counter:table-counter} Interface differences']
 [cols="1,1", frame=all, grid=rows]
@@ -32,7 +123,7 @@
 |`iterator`, `const_iterator` are of at least the forward category.
 
 |Iterators, pointers and references to the container's elements are never invalidated.
-|<<buckets_iterator_invalidation,Iterators can be invalidated by calls to insert or rehash>>. +
+|<<regular_iterator_invalidation,Iterators can be invalidated by calls to insert or rehash>>. +
 **Node-based containers:** Pointers and references to the container's elements are never invalidated. +
 **Flat containers:** Pointers and references to the container's elements are invalidated when rehashing occurs.
 
diff --git a/doc/unordered/structures.adoc b/doc/unordered/structures.adoc
new file mode 100644
index 00000000..9859c39e
--- /dev/null
+++ b/doc/unordered/structures.adoc
@@ -0,0 +1,179 @@
+﻿[#structures]
+= Data Structures
+
+:idprefix: structures_
+
+== Closed-addressing Containers
+
+++++
+<style>
+  .imageblock > .title {
+    text-align: inherit;
+  }
+</style>
+++++
+
+Boost.Unordered sports one of the fastest implementations of closed addressing, also commonly known as https://en.wikipedia.org/wiki/Hash_table#Separate_chaining[separate chaining]. An example figure representing the data structure is below:
+
+[#img-bucket-groups,.text-center]
+.A simple bucket group approach
+image::bucket-groups.png[align=center]
+
+An array of "buckets" is allocated and each bucket in turn points to its own individual linked list. This makes meeting the standard requirements of bucket iteration straight-forward. Unfortunately, iteration of the entire container is often times slow using this layout as each bucket must be examined for occupancy, yielding a time complexity of `O(bucket_count() + size())` when the standard requires complexity to be `O(size())`.
+
+Canonical standard implementations will wind up looking like the diagram below:
+
+[.text-center]
+.The canonical standard approach
+image::singly-linked.png[align=center,link=../diagrams/singly-linked.png,window=_blank]
+
+It's worth noting that this approach is only used by pass:[libc++] and pass:[libstdc++]; the MSVC Dinkumware implementation uses a different one. A more detailed analysis of the standard containers can be found http://bannalia.blogspot.com/2013/10/implementation-of-c-unordered.html[here].
+
+This unusually laid out data structure is chosen to make iteration of the entire container efficient by inter-connecting all of the nodes into a singly-linked list. One might also notice that buckets point to the node _before_ the start of the bucket's elements. This is done so that removing elements from the list can be done efficiently without introducing the need for a doubly-linked list. Unfortunately, this data structure introduces a guaranteed extra indirection. For example, to access the first element of a bucket, something like this must be done:
+
+```c++
+auto const idx = get_bucket_idx(hash_function(key));
+node* p = buckets[idx]; // first load
+node* n = p->next; // second load
+if (n && is_in_bucket(n, idx)) {
+  value_type const& v = *n; // third load
+  // ...
+}
+```
+
+With a simple bucket group layout, this is all that must be done:
+```c++
+auto const idx = get_bucket_idx(hash_function(key));
+node* n = buckets[idx]; // first load
+if (n) {
+  value_type const& v = *n; // second load
+  // ...
+}
+```
+
+In practice, the extra indirection can have a dramatic performance impact to common operations such as `insert`, `find` and `erase`. But to keep iteration of the container fast, Boost.Unordered introduces a novel data structure, a "bucket group". A bucket group is a fixed-width view of a subsection of the buckets array. It contains a bitmask (a `std::size_t`) which it uses to track occupancy of buckets and contains two pointers so that it can form a doubly-linked list with non-empty groups. An example diagram is below:
+
+[#img-fca-layout]
+.The new layout used by Boost
+image::fca.png[align=center]
+
+Thus container-wide iteration is turned into traversing the non-empty bucket groups (an operation with constant time complexity) which reduces the time complexity back to `O(size())`. In total, a bucket group is only 4 words in size and it views `sizeof(std::size_t) * CHAR_BIT` buckets meaning that for all common implementations, there's only 4 bits of space overhead per bucket introduced by the bucket groups.
+
+A more detailed description of Boost.Unordered's closed-addressing implementation is
+given in an
+https://bannalia.blogspot.com/2022/06/advancing-state-of-art-for.html[external article].
+For more information on implementation rationale, read the
+xref:#rationale_closed_addressing_containers[corresponding section].
+
+== Open-addressing Containers
+
+The diagram shows the basic internal layout of `boost::unordered_flat_map`/`unordered_node_map` and
+`boost:unordered_flat_set`/`unordered_node_set`.
+
+
+[#img-foa-layout]
+.Open-addressing layout used by Boost.Unordered.
+image::foa.png[align=center]
+
+As with all open-addressing containers, elements (or pointers to the element nodes in the case of
+`boost::unordered_node_map` and `boost::unordered_node_set`) are stored directly in the bucket array.
+This array is logically divided into 2^_n_^ _groups_ of 15 elements each.
+In addition to the bucket array, there is an associated _metadata array_ with 2^_n_^
+16-byte words.
+
+[#img-foa-metadata]
+.Breakdown of a metadata word.
+image::foa-metadata.png[align=center]
+
+A metadata word is divided into 15 _h_~_i_~ bytes (one for each associated
+bucket), and an _overflow byte_ (_ofw_ in the diagram). The value of _h_~_i_~ is:
+
+  - 0 if the corresponding bucket is empty.
+  - 1 to encode a special empty bucket called a _sentinel_, which is used internally to
+  stop iteration when the container has been fully traversed.
+  - If the bucket is occupied, a _reduced hash value_ obtained from the hash value of
+  the element.
+
+When looking for an element with hash value _h_, SIMD technologies such as
+https://en.wikipedia.org/wiki/SSE2[SSE2] and
+https://en.wikipedia.org/wiki/ARM_architecture_family#Advanced_SIMD_(Neon)[Neon] allow us
+to very quickly inspect the full metadata word and look for the reduced value of _h_ among all the
+15 buckets with just a handful of CPU instructions: non-matching buckets can be
+readily discarded, and those whose reduced hash value matches need be inspected via full
+comparison with the corresponding element. If the looked-for element is not present,
+the overflow byte is inspected:
+
+- If the bit in the position _h_ mod 8 is zero, lookup terminates (and the
+element is not present).
+- If the bit is set to 1 (the group has been _overflowed_), further groups are
+checked using https://en.wikipedia.org/wiki/Quadratic_probing[_quadratic probing_], and
+the process is repeated.
+
+Insertion is algorithmically similar: empty buckets are located using SIMD,
+and when going past a full group its corresponding overflow bit is set to 1.
+
+In architectures without SIMD support, the logical layout stays the same, but the metadata
+word is codified using a technique we call _bit interleaving_: this layout allows us
+to emulate SIMD with reasonably good performance using only standard arithmetic and
+logical operations.
+
+[#img-foa-metadata-interleaving]
+.Bit-interleaved metadata word.
+image::foa-metadata-interleaving.png[align=center]
+
+A more detailed description of Boost.Unordered's open-addressing implementation is
+given in an
+https://bannalia.blogspot.com/2022/11/inside-boostunorderedflatmap.html[external article].
+For more information on implementation rationale, read the
+xref:#rationale_open_addresing_containers[corresponding section].
+
+== Concurrent Containers
+
+`boost::concurrent_flat_map` uses the basic
+xref:#structures_open_addressing_containers[open-addressing layout] described above
+augmented with synchronization mechanisms.
+
+
+[#img-cfoa-layout]
+.Concurrent open-addressing layout used by Boost.Unordered.
+image::cfoa.png[align=center]
+
+Two levels of synchronization are used:
+
+* Container level: A read-write mutex is used to control access from any operation
+to the container. Typically, such access is in read mode (that is, concurrent) even
+for modifying operations, so for most practical purposes there is no thread
+contention at this level. Access is only in write mode (blocking) when rehashing or
+performing container-wide operations such as swapping or assignment.
+* Group level: Each 15-slot group is equipped with an 8-byte word containing:
+  ** A read-write spinlock for synchronized access to any element in the group.
+  ** An atomic _insertion counter_ used for optimistic insertion as described
+  below.
+
+By using atomic operations to access the group metadata, lookup is (group-level)
+lock-free up to the point where an actual comparison needs to be done with an element
+that has been previously SIMD-matched: only then it's the group's spinlock used.
+
+Insertion uses the following _optimistic algorithm_:
+
+* The value of the insertion counter for the initial group in the probe
+sequence is locally recorded (let's call this value `c0`).
+* Lookup is as described above. If lookup finds no equivalent element,
+search for an available slot for insertion successively locks/unlocks
+each group in the probing sequence.
+* When an available slot is located, it is preemptively occupied (its
+reduced hash value is set) and the insertion counter is atomically
+incremented: if no other thread has incremented the counter during the
+whole operation (which is checked by comparing with `c0`), then we're
+good to go and complete the insertion, otherwise we roll back and start
+over.
+
+This algorithm has very low contention both at the lookup and actual
+insertion phases in exchange for the possibility that computations have
+to be started over if some other thread interferes in the process by
+performing a succesful insertion beginning at the same group. In
+practice, the start-over frequency is extremely small, measured in the range
+of parts per million for some of our benchmarks.
+
+For more information on implementation rationale, read the
+xref:#rationale_concurrent_containers[corresponding section].
diff --git a/doc/unordered/unordered_flat_map.adoc b/doc/unordered/unordered_flat_map.adoc
index e1b27499..ba95eb73 100644
--- a/doc/unordered/unordered_flat_map.adoc
+++ b/doc/unordered/unordered_flat_map.adoc
@@ -1,5 +1,5 @@
 [#unordered_flat_map]
-== Class template unordered_flat_map
+== Class Template unordered_flat_map
 
 :idprefix: unordered_flat_map_
 
@@ -280,6 +280,7 @@ namespace boost {
               unordered_flat_map<Key, T, Hash, Pred, Alloc>& y)
       noexcept(noexcept(x.swap(y)));
 
+  // Erasure
   template<class K, class T, class H, class P, class A, class Predicate>
     typename unordered_flat_map<K, T, H, P, A>::size_type
        xref:#unordered_flat_map_erase_if[erase_if](unordered_flat_map<K, T, H, P, A>& c, Predicate pred);
@@ -859,7 +860,7 @@ void insert(std::initializer_list<value_type>);
 Inserts a range of elements into the container. Elements are inserted if and only if there is no element in the container with an equivalent key.
 
 [horizontal]
-Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^] into the container from `*first`.
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^] into the container.
 Throws:;; When inserting a single element, if an exception is thrown by an operation other than a call to `hasher` the function has no effect.
 Notes:;; Can invalidate iterators, pointers and references, but only if the insert causes the load to be greater than the maximum load.
 
@@ -875,7 +876,7 @@ template<class K, class... Args>
   std::pair<iterator, bool> try_emplace(K&& k, Args&&... args);
 ```
 
-Inserts a new node into the container if there is no existing element with key `k` contained within it.
+Inserts a new element into the container if there is no existing element with key `k` contained within it.
 
 If there is an existing element with key `k` this function does nothing.
 
@@ -904,7 +905,7 @@ unlike xref:#unordered_flat_map_emplace[emplace], which simply forwards all argu
 
 Can invalidate iterators pointers and references, but only if the insert causes the load to be greater than the maximum load.
 
-The `template <class K, class... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K, class\... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 --
 
@@ -920,7 +921,7 @@ template<class K, class... Args>
   iterator try_emplace(const_iterator hint, K&& k, Args&&... args);
 ```
 
-Inserts a new node into the container if there is no existing element with key `k` contained within it.
+Inserts a new element into the container if there is no existing element with key `k` contained within it.
 
 If there is an existing element with key `k` this function does nothing.
 
@@ -949,7 +950,7 @@ unlike xref:#unordered_flat_map_emplace_hint[emplace_hint], which simply forward
 
 Can invalidate iterators pointers and references, but only if the insert causes the load to be greater than the maximum load.
 
-The `template <class K, class... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K, class\... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 --
 
@@ -1160,7 +1161,7 @@ template<class K>
 
 [horizontal]
 Returns:;; An iterator pointing to an element with key equivalent to `k`, or `end()` if no such element exists.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1173,7 +1174,7 @@ template<class K>
 
 [horizontal]
 Returns:;; The number of elements with key equivalent to `k`.
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1186,7 +1187,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A boolean indicating whether or not there is an element with key equal to `key` in the container
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1202,7 +1203,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A range containing all elements with key equivalent to `k`. If the container doesn't contain any such elements, returns `std::make_pair(b.end(), b.end())`.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
diff --git a/doc/unordered/unordered_flat_set.adoc b/doc/unordered/unordered_flat_set.adoc
index 7d3d69f8..770ce797 100644
--- a/doc/unordered/unordered_flat_set.adoc
+++ b/doc/unordered/unordered_flat_set.adoc
@@ -1,5 +1,5 @@
 [#unordered_flat_set]
-== Class template unordered_flat_set
+== Class Template unordered_flat_set
 
 :idprefix: unordered_flat_set_
 
@@ -234,6 +234,7 @@ namespace boost {
               unordered_flat_set<Key, T, Hash, Pred, Alloc>& y)
       noexcept(noexcept(x.swap(y)));
 
+  // Erasure
   template<class K, class T, class H, class P, class A, class Predicate>
     typename unordered_flat_set<K, T, H, P, A>::size_type
        xref:#unordered_flat_set_erase_if[erase_if](unordered_flat_set<K, T, H, P, A>& c, Predicate pred);
@@ -837,7 +838,7 @@ void insert(std::initializer_list<value_type>);
 Inserts a range of elements into the container. Elements are inserted if and only if there is no element in the container with an equivalent key.
 
 [horizontal]
-Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^] into the container from `*first`.
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^] into the container.
 Throws:;; When inserting a single element, if an exception is thrown by an operation other than a call to `hasher` the function has no effect.
 Notes:;; Can invalidate iterators, pointers and references, but only if the insert causes the load to be greater than the maximum load.
 
@@ -971,7 +972,7 @@ template<class K>
 
 [horizontal]
 Returns:;; An iterator pointing to an element with key equivalent to `k`, or `end()` if no such element exists.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -984,7 +985,7 @@ template<class K>
 
 [horizontal]
 Returns:;; The number of elements with key equivalent to `k`.
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -997,7 +998,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A boolean indicating whether or not there is an element with key equal to `key` in the container
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1013,7 +1014,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A range containing all elements with key equivalent to `k`. If the container doesn't contain any such elements, returns `std::make_pair(b.end(), b.end())`.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
diff --git a/doc/unordered/unordered_map.adoc b/doc/unordered/unordered_map.adoc
index 2cc08369..02d95ac2 100644
--- a/doc/unordered/unordered_map.adoc
+++ b/doc/unordered/unordered_map.adoc
@@ -1,5 +1,5 @@
 [#unordered_map]
-== Class template unordered_map
+== Class Template unordered_map
 
 :idprefix: unordered_map_
 
@@ -286,6 +286,7 @@ namespace boost {
               unordered_map<Key, T, Hash, Pred, Alloc>& y)
       noexcept(noexcept(x.swap(y)));
 
+  // Erasure
   template<class K, class T, class H, class P, class A, class Predicate>
     typename unordered_map<K, T, H, P, A>::size_type
        xref:#unordered_map_erase_if[erase_if](unordered_map<K, T, H, P, A>& c, Predicate pred);
@@ -995,7 +996,7 @@ void insert(std::initializer_list<value_type>);
 Inserts a range of elements into the container. Elements are inserted if and only if there is no element in the container with an equivalent key.
 
 [horizontal]
-Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^] into `X` from `*first`.
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^] into the container.
 Throws:;; When inserting a single element, if an exception is thrown by an operation other than a call to `hasher` the function has no effect.
 Notes:;; Can invalidate iterators, but only if the insert causes the load factor to be greater to or equal to the maximum load factor. +
 +
@@ -1009,11 +1010,11 @@ template<class... Args>
   std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args);
 template<class... Args>
   std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args);
-template <class K, class... Args>
+template<class K, class... Args>
   std::pair<iterator, bool> try_emplace(K&& k, Args&&... args)
 ```
 
-Inserts a new node into the container if there is no existing element with key `k` contained within it.
+Inserts a new element into the container if there is no existing element with key `k` contained within it.
 
 If there is an existing element with key `k` this function does nothing.
 
@@ -1043,7 +1044,7 @@ Can invalidate iterators, but only if the insert causes the load factor to be gr
 
 Pointers and references to elements are never invalidated.
 
-The `template <class K, class... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K, class\... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 If the compiler doesn't support variadic template arguments or rvalue references, this is emulated for up to `10` arguments, with no support for rvalue references or move semantics.
 
@@ -1062,7 +1063,7 @@ template<class K, class... Args>
   iterator try_emplace(const_iterator hint, K&& k, Args&&... args);
 ```
 
-Inserts a new node into the container if there is no existing element with key `k` contained within it.
+Inserts a new element into the container if there is no existing element with key `k` contained within it.
 
 If there is an existing element with key `k` this function does nothing.
 
@@ -1094,7 +1095,7 @@ Can invalidate iterators, but only if the insert causes the load factor to be gr
 
 Pointers and references to elements are never invalidated.
 
-The `template <class K, class... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K, class\... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 If the compiler doesn't support variadic template arguments or rvalue references, this is emulated for up to `10` arguments, with no support for rvalue references or move semantics.
 
@@ -1466,7 +1467,7 @@ template<typename CompatibleKey, typename CompatibleHash, typename CompatiblePre
 Returns:;; An iterator pointing to an element with key equivalent to `k`, or `b.end()` if no such element exists.
 Notes:;; The templated overloads containing `CompatibleKey`, `CompatibleHash` and `CompatiblePredicate` are non-standard extensions which allow you to use a compatible hash function and equality predicate for a key of a different type in order to avoid an expensive type cast. In general, its use is not encouraged and instead the `K` member function templates should be used. +
 +
-The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1479,7 +1480,7 @@ template<class K>
 
 [horizontal]
 Returns:;; The number of elements with key equivalent to `k`.
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1492,7 +1493,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A boolean indicating whether or not there is an element with key equal to `key` in the container
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1508,7 +1509,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A range containing all elements with key equivalent to `k`. If the container doesn't contain any such elements, returns `std::make_pair(b.end(), b.end())`.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
diff --git a/doc/unordered/unordered_multimap.adoc b/doc/unordered/unordered_multimap.adoc
index ab5ccbbb..7a6a09d7 100644
--- a/doc/unordered/unordered_multimap.adoc
+++ b/doc/unordered/unordered_multimap.adoc
@@ -1,5 +1,5 @@
 [#unordered_multimap]
-== Class template unordered_multimap
+== Class Template unordered_multimap
 
 :idprefix: unordered_multimap_
 
@@ -253,6 +253,7 @@ namespace boost {
               unordered_multimap<Key, T, Hash, Pred, Alloc>& y)
       noexcept(noexcept(x.swap(y)));
 
+  // Erasure
   template<class K, class T, class H, class P, class A, class Predicate>
     typename unordered_multimap<K, T, H, P, A>::size_type
       xref:#unordered_multimap_erase_if[erase_if](unordered_multimap<K, T, H, P, A>& c, Predicate pred);
@@ -941,7 +942,7 @@ void insert(std::initializer_list<value_type> il);
 Inserts a range of elements into the container.
 
 [horizontal]
-Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^] into `X` from `*first`.
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^] into the container.
 Throws:;; When inserting a single element, if an exception is thrown by an operation other than a call to `hasher` the function has no effect.
 Notes:;; Can invalidate iterators, but only if the insert causes the load factor to be greater to or equal to the maximum load factor. +
 +
@@ -1223,7 +1224,7 @@ template<typename CompatibleKey, typename CompatibleHash, typename CompatiblePre
 Returns:;; An iterator pointing to an element with key equivalent to `k`, or `b.end()` if no such element exists.
 Notes:;; The templated overloads containing `CompatibleKey`, `CompatibleHash` and `CompatiblePredicate` are non-standard extensions which allow you to use a compatible hash function and equality predicate for a key of a different type in order to avoid an expensive type cast. In general, its use is not encouraged and instead the `K` member function templates should be used. +
 +
-The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1236,7 +1237,7 @@ template<class K>
 
 [horizontal]
 Returns:;; The number of elements with key equivalent to `k`.
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1249,7 +1250,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A boolean indicating whether or not there is an element with key equal to `key` in the container
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1265,7 +1266,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A range containing all elements with key equivalent to `k`. If the container doesn't contain any such elements, returns `std::make_pair(b.end(), b.end())`.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
diff --git a/doc/unordered/unordered_multiset.adoc b/doc/unordered/unordered_multiset.adoc
index 2495e570..017ef5c2 100644
--- a/doc/unordered/unordered_multiset.adoc
+++ b/doc/unordered/unordered_multiset.adoc
@@ -1,5 +1,5 @@
 [#unordered_multiset]
-== Class template unordered_multiset
+== Class Template unordered_multiset
 
 :idprefix: unordered_multiset_
 
@@ -244,6 +244,7 @@ namespace boost {
               unordered_multiset<Key, Hash, Pred, Alloc>& y)
       noexcept(noexcept(x.swap(y)));
 
+  // Erasure
   template<class K, class H, class P, class A, class Predicate>
     typename unordered_multiset<K, H, P, A>::size_type
       xref:#unordered_multiset_erase_if[erase_if](unordered_multiset<K, H, P, A>& c, Predicate pred);
@@ -899,7 +900,7 @@ void insert(std::initializer_list<value_type> il);
 Inserts a range of elements into the container. Elements are inserted if and only if there is no element in the container with an equivalent key.
 
 [horizontal]
-Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^] into `X` from `*first`.
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^] into the container.
 Throws:;; When inserting a single element, if an exception is thrown by an operation other than a call to `hasher` the function has no effect.
 Notes:;; Can invalidate iterators, but only if the insert causes the load factor to be greater to or equal to the maximum load factor. +
 +
@@ -1181,7 +1182,7 @@ template<typename CompatibleKey, typename CompatibleHash, typename CompatiblePre
 Returns:;; An iterator pointing to an element with key equivalent to `k`, or `b.end()` if no such element exists.
 Notes:;; The templated overloads containing `CompatibleKey`, `CompatibleHash` and `CompatiblePredicate` are non-standard extensions which allow you to use a compatible hash function and equality predicate for a key of a different type in order to avoid an expensive type cast. In general, its use is not encouraged and instead the `K` member function templates should be used. +
 +
-The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1194,7 +1195,7 @@ template<class K>
 
 [horizontal]
 Returns:;; The number of elements with key equivalent to `k`.
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1207,7 +1208,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A boolean indicating whether or not there is an element with key equal to `key` in the container
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1223,7 +1224,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A range containing all elements with key equivalent to `k`. If the container doesn't contain any such elements, returns `std::make_pair(b.end(), b.end())`.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
diff --git a/doc/unordered/unordered_node_map.adoc b/doc/unordered/unordered_node_map.adoc
index 6a43bb92..4821414a 100644
--- a/doc/unordered/unordered_node_map.adoc
+++ b/doc/unordered/unordered_node_map.adoc
@@ -1,5 +1,5 @@
 [#unordered_node_map]
-== Class template unordered_node_map
+== Class Template unordered_node_map
 
 :idprefix: unordered_node_map_
 
@@ -284,6 +284,7 @@ namespace boost {
               unordered_node_map<Key, T, Hash, Pred, Alloc>& y)
       noexcept(noexcept(x.swap(y)));
 
+  // Erasure
   template<class K, class T, class H, class P, class A, class Predicate>
     typename unordered_node_map<K, T, H, P, A>::size_type
        xref:#unordered_node_map_erase_if[erase_if](unordered_node_map<K, T, H, P, A>& c, Predicate pred);
@@ -893,7 +894,7 @@ void insert(std::initializer_list<value_type>);
 Inserts a range of elements into the container. Elements are inserted if and only if there is no element in the container with an equivalent key.
 
 [horizontal]
-Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^] into the container from `*first`.
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^] into the container.
 Throws:;; When inserting a single element, if an exception is thrown by an operation other than a call to `hasher` the function has no effect.
 Notes:;; Can invalidate iterators, but only if the insert causes the load to be greater than the maximum load.
 
@@ -945,7 +946,7 @@ template<class K, class... Args>
   std::pair<iterator, bool> try_emplace(K&& k, Args&&... args);
 ```
 
-Inserts a new node into the container if there is no existing element with key `k` contained within it.
+Inserts a new element into the container if there is no existing element with key `k` contained within it.
 
 If there is an existing element with key `k` this function does nothing.
 
@@ -974,7 +975,7 @@ unlike xref:#unordered_node_map_emplace[emplace], which simply forwards all argu
 
 Can invalidate iterators, but only if the insert causes the load to be greater than the maximum load.
 
-The `template <class K, class... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K, class\... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 --
 
@@ -990,7 +991,7 @@ template<class K, class... Args>
   iterator try_emplace(const_iterator hint, K&& k, Args&&... args);
 ```
 
-Inserts a new node into the container if there is no existing element with key `k` contained within it.
+Inserts a new element into the container if there is no existing element with key `k` contained within it.
 
 If there is an existing element with key `k` this function does nothing.
 
@@ -1019,7 +1020,7 @@ unlike xref:#unordered_node_map_emplace_hint[emplace_hint], which simply forward
 
 Can invalidate iterators, but only if the insert causes the load to be greater than the maximum load.
 
-The `template <class K, class... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K, class\... Args>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs and neither `iterator` nor `const_iterator` are implicitly convertible from `K`. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 --
 
@@ -1258,7 +1259,7 @@ template<class K>
 
 [horizontal]
 Returns:;; An iterator pointing to an element with key equivalent to `k`, or `end()` if no such element exists.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1271,7 +1272,7 @@ template<class K>
 
 [horizontal]
 Returns:;; The number of elements with key equivalent to `k`.
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1284,7 +1285,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A boolean indicating whether or not there is an element with key equal to `key` in the container
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1300,7 +1301,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A range containing all elements with key equivalent to `k`. If the container doesn't contain any such elements, returns `std::make_pair(b.end(), b.end())`.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
diff --git a/doc/unordered/unordered_node_set.adoc b/doc/unordered/unordered_node_set.adoc
index a15de04f..b5a5ff97 100644
--- a/doc/unordered/unordered_node_set.adoc
+++ b/doc/unordered/unordered_node_set.adoc
@@ -1,5 +1,5 @@
 [#unordered_node_set]
-== Class template unordered_node_set
+== Class Template unordered_node_set
 
 :idprefix: unordered_node_set_
 
@@ -238,6 +238,7 @@ namespace boost {
               unordered_node_set<Key, T, Hash, Pred, Alloc>& y)
       noexcept(noexcept(x.swap(y)));
 
+  // Erasure
   template<class K, class T, class H, class P, class A, class Predicate>
     typename unordered_node_set<K, T, H, P, A>::size_type
        xref:#unordered_node_set_erase_if[erase_if](unordered_node_set<K, T, H, P, A>& c, Predicate pred);
@@ -874,7 +875,7 @@ void insert(std::initializer_list<value_type>);
 Inserts a range of elements into the container. Elements are inserted if and only if there is no element in the container with an equivalent key.
 
 [horizontal]
-Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^] into the container from `*first`.
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^] into the container.
 Throws:;; When inserting a single element, if an exception is thrown by an operation other than a call to `hasher` the function has no effect.
 Notes:;; Can invalidate iterators, but only if the insert causes the load to be greater than the maximum load.
 
@@ -1072,7 +1073,7 @@ template<class K>
 
 [horizontal]
 Returns:;; An iterator pointing to an element with key equivalent to `k`, or `end()` if no such element exists.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1085,7 +1086,7 @@ template<class K>
 
 [horizontal]
 Returns:;; The number of elements with key equivalent to `k`.
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1098,7 +1099,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A boolean indicating whether or not there is an element with key equal to `key` in the container
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1114,7 +1115,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A range containing all elements with key equivalent to `k`. If the container doesn't contain any such elements, returns `std::make_pair(b.end(), b.end())`.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
diff --git a/doc/unordered/unordered_set.adoc b/doc/unordered/unordered_set.adoc
index 8530b4d4..57b6e6f4 100644
--- a/doc/unordered/unordered_set.adoc
+++ b/doc/unordered/unordered_set.adoc
@@ -1,5 +1,5 @@
 [#unordered_set]
-== Class template unordered_set
+== Class Template unordered_set
 
 :idprefix: unordered_set_
 
@@ -245,6 +245,7 @@ namespace boost {
               unordered_set<Key, Hash, Pred, Alloc>& y)
       noexcept(noexcept(x.swap(y)));
 
+  // Erasure
   template<class K, class H, class P, class A, class Predicate>
     typename unordered_set<K, H, P, A>::size_type
       xref:#unordered_set_erase_if[erase_if](unordered_set<K, H, P, A>& c, Predicate pred);
@@ -959,7 +960,7 @@ void insert(std::initializer_list<value_type>);
 Inserts a range of elements into the container. Elements are inserted if and only if there is no element in the container with an equivalent key.
 
 [horizontal]
-Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/EmplaceConstructible[EmplaceConstructible^] into `X` from `*first`.
+Requires:;; `value_type` is https://en.cppreference.com/w/cpp/named_req/CopyInsertable[CopyInsertable^] into the container.
 Throws:;; When inserting a single element, if an exception is thrown by an operation other than a call to `hasher` the function has no effect.
 Notes:;; Can invalidate iterators, but only if the insert causes the load factor to be greater to or equal to the maximum load factor. +
 +
@@ -1248,7 +1249,7 @@ template<typename CompatibleKey, typename CompatibleHash, typename CompatiblePre
 Returns:;; An iterator pointing to an element with key equivalent to `k`, or `b.end()` if no such element exists.
 Notes:;; The templated overloads containing `CompatibleKey`, `CompatibleHash` and `CompatiblePredicate` are non-standard extensions which allow you to use a compatible hash function and equality predicate for a key of a different type in order to avoid an expensive type cast. In general, its use is not encouraged and instead the `K` member function templates should be used. +
 +
-The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1261,7 +1262,7 @@ template<class K>
 
 [horizontal]
 Returns:;; The number of elements with key equivalent to `k`.
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1274,7 +1275,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A boolean indicating whether or not there is an element with key equal to `key` in the container
-Notes:;; The `template <typename K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overload only participates in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
@@ -1290,7 +1291,7 @@ template<class K>
 
 [horizontal]
 Returns:;; A range containing all elements with key equivalent to `k`. If the container doesn't contain any such elements, returns `std::make_pair(b.end(), b.end())`.
-Notes:;; The `template <typename K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
+Notes:;; The `template<class K>` overloads only participate in overload resolution if `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs. The library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent. This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
 
 ---
 
diff --git a/include/boost/unordered/concurrent_flat_map.hpp b/include/boost/unordered/concurrent_flat_map.hpp
new file mode 100644
index 00000000..2207b9a0
--- /dev/null
+++ b/include/boost/unordered/concurrent_flat_map.hpp
@@ -0,0 +1,818 @@
+/* Fast open-addressing concurrent hash table.
+ *
+ * Copyright 2023 Christian Mazakas.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/unordered for library home page.
+ */
+
+#ifndef BOOST_UNORDERED_CONCURRENT_FLAT_MAP_HPP
+#define BOOST_UNORDERED_CONCURRENT_FLAT_MAP_HPP
+
+#include <boost/unordered/concurrent_flat_map_fwd.hpp>
+#include <boost/unordered/detail/foa/concurrent_table.hpp>
+#include <boost/unordered/detail/foa/flat_map_types.hpp>
+#include <boost/unordered/detail/type_traits.hpp>
+
+#include <boost/container_hash/hash.hpp>
+#include <boost/core/allocator_access.hpp>
+#include <boost/mp11/algorithm.hpp>
+#include <boost/mp11/list.hpp>
+#include <boost/type_traits/type_identity.hpp>
+
+#include <functional>
+#include <type_traits>
+#include <utility>
+
+#define BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)                             \
+  static_assert(boost::unordered::detail::is_invocable<F, value_type&>::value, \
+    "The provided Callable must be invocable with value_type&");
+
+#define BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)                       \
+  static_assert(                                                               \
+    boost::unordered::detail::is_invocable<F, value_type const&>::value,       \
+    "The provided Callable must be invocable with value_type const&");
+
+#if BOOST_CXX_VERSION >= 202002L
+
+#define BOOST_UNORDERED_STATIC_ASSERT_EXEC_POLICY(P)                           \
+  static_assert(!std::is_base_of<std::execution::parallel_unsequenced_policy,  \
+                  ExecPolicy>::value,                                          \
+    "ExecPolicy must be sequenced.");                                          \
+  static_assert(                                                               \
+    !std::is_base_of<std::execution::unsequenced_policy, ExecPolicy>::value,   \
+    "ExecPolicy must be sequenced.");
+
+#else
+
+#define BOOST_UNORDERED_STATIC_ASSERT_EXEC_POLICY(P)                           \
+  static_assert(!std::is_base_of<std::execution::parallel_unsequenced_policy,  \
+                  ExecPolicy>::value,                                          \
+    "ExecPolicy must be sequenced.");
+#endif
+
+#define BOOST_UNORDERED_COMMA ,
+
+#define BOOST_UNORDERED_LAST_ARG(Arg, Args)                                    \
+  mp11::mp_back<mp11::mp_list<Arg BOOST_UNORDERED_COMMA Args> >
+
+#define BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_INVOCABLE(Arg, Args)            \
+  BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(BOOST_UNORDERED_LAST_ARG(Arg, Args))
+
+#define BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_CONST_INVOCABLE(Arg, Args)      \
+  BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(                               \
+    BOOST_UNORDERED_LAST_ARG(Arg, Args))
+
+namespace boost {
+  namespace unordered {
+    namespace detail {
+      template <class F, class... Args>
+      struct is_invocable
+          : std::is_constructible<std::function<void(Args...)>,
+              std::reference_wrapper<typename std::remove_reference<F>::type> >
+      {
+      };
+
+    } // namespace detail
+
+    template <class Key, class T, class Hash, class Pred, class Allocator>
+    class concurrent_flat_map
+    {
+    private:
+      template <class Key2, class T2, class Hash2, class Pred2,
+        class Allocator2>
+      friend class concurrent_flat_map;
+
+      using type_policy = detail::foa::flat_map_types<Key, T>;
+
+      detail::foa::concurrent_table<type_policy, Hash, Pred, Allocator> table_;
+
+      template <class K, class V, class H, class KE, class A>
+      bool friend operator==(concurrent_flat_map<K, V, H, KE, A> const& lhs,
+        concurrent_flat_map<K, V, H, KE, A> const& rhs);
+
+      template <class K, class V, class H, class KE, class A, class Predicate>
+      friend typename concurrent_flat_map<K, V, H, KE, A>::size_type erase_if(
+        concurrent_flat_map<K, V, H, KE, A>& set, Predicate pred);
+
+    public:
+      using key_type = Key;
+      using mapped_type = T;
+      using value_type = typename type_policy::value_type;
+      using init_type = typename type_policy::init_type;
+      using size_type = std::size_t;
+      using difference_type = std::ptrdiff_t;
+      using hasher = typename boost::type_identity<Hash>::type;
+      using key_equal = typename boost::type_identity<Pred>::type;
+      using allocator_type = typename boost::type_identity<Allocator>::type;
+      using reference = value_type&;
+      using const_reference = value_type const&;
+      using pointer = typename boost::allocator_pointer<allocator_type>::type;
+      using const_pointer =
+        typename boost::allocator_const_pointer<allocator_type>::type;
+
+      concurrent_flat_map()
+          : concurrent_flat_map(detail::foa::default_bucket_count)
+      {
+      }
+
+      explicit concurrent_flat_map(size_type n, const hasher& hf = hasher(),
+        const key_equal& eql = key_equal(),
+        const allocator_type& a = allocator_type())
+          : table_(n, hf, eql, a)
+      {
+      }
+
+      template <class InputIterator>
+      concurrent_flat_map(InputIterator f, InputIterator l,
+        size_type n = detail::foa::default_bucket_count,
+        const hasher& hf = hasher(), const key_equal& eql = key_equal(),
+        const allocator_type& a = allocator_type())
+          : table_(n, hf, eql, a)
+      {
+        this->insert(f, l);
+      }
+
+      concurrent_flat_map(concurrent_flat_map const& rhs)
+          : table_(rhs.table_,
+              boost::allocator_select_on_container_copy_construction(
+                rhs.get_allocator()))
+      {
+      }
+
+      concurrent_flat_map(concurrent_flat_map&& rhs)
+          : table_(std::move(rhs.table_))
+      {
+      }
+
+      template <class InputIterator>
+      concurrent_flat_map(
+        InputIterator f, InputIterator l, allocator_type const& a)
+          : concurrent_flat_map(f, l, 0, hasher(), key_equal(), a)
+      {
+      }
+
+      explicit concurrent_flat_map(allocator_type const& a)
+          : table_(detail::foa::default_bucket_count, hasher(), key_equal(), a)
+      {
+      }
+
+      concurrent_flat_map(
+        concurrent_flat_map const& rhs, allocator_type const& a)
+          : table_(rhs.table_, a)
+      {
+      }
+
+      concurrent_flat_map(concurrent_flat_map&& rhs, allocator_type const& a)
+          : table_(std::move(rhs.table_), a)
+      {
+      }
+
+      concurrent_flat_map(std::initializer_list<value_type> il,
+        size_type n = detail::foa::default_bucket_count,
+        const hasher& hf = hasher(), const key_equal& eql = key_equal(),
+        const allocator_type& a = allocator_type())
+          : concurrent_flat_map(n, hf, eql, a)
+      {
+        this->insert(il.begin(), il.end());
+      }
+
+      concurrent_flat_map(size_type n, const allocator_type& a)
+          : concurrent_flat_map(n, hasher(), key_equal(), a)
+      {
+      }
+
+      concurrent_flat_map(
+        size_type n, const hasher& hf, const allocator_type& a)
+          : concurrent_flat_map(n, hf, key_equal(), a)
+      {
+      }
+
+      template <typename InputIterator>
+      concurrent_flat_map(
+        InputIterator f, InputIterator l, size_type n, const allocator_type& a)
+          : concurrent_flat_map(f, l, n, hasher(), key_equal(), a)
+      {
+      }
+
+      template <typename InputIterator>
+      concurrent_flat_map(InputIterator f, InputIterator l, size_type n,
+        const hasher& hf, const allocator_type& a)
+          : concurrent_flat_map(f, l, n, hf, key_equal(), a)
+      {
+      }
+
+      concurrent_flat_map(
+        std::initializer_list<value_type> il, const allocator_type& a)
+          : concurrent_flat_map(
+              il, detail::foa::default_bucket_count, hasher(), key_equal(), a)
+      {
+      }
+
+      concurrent_flat_map(std::initializer_list<value_type> il, size_type n,
+        const allocator_type& a)
+          : concurrent_flat_map(il, n, hasher(), key_equal(), a)
+      {
+      }
+
+      concurrent_flat_map(std::initializer_list<value_type> il, size_type n,
+        const hasher& hf, const allocator_type& a)
+          : concurrent_flat_map(il, n, hf, key_equal(), a)
+      {
+      }
+
+      ~concurrent_flat_map() = default;
+
+      concurrent_flat_map& operator=(concurrent_flat_map const& rhs)
+      {
+        table_ = rhs.table_;
+        return *this;
+      }
+
+      concurrent_flat_map& operator=(concurrent_flat_map&& rhs)
+        noexcept(boost::allocator_is_always_equal<Allocator>::type::value ||
+                 boost::allocator_propagate_on_container_move_assignment<
+                   Allocator>::type::value)
+      {
+        table_ = std::move(rhs.table_);
+        return *this;
+      }
+
+      concurrent_flat_map& operator=(std::initializer_list<value_type> ilist)
+      {
+        table_ = ilist;
+        return *this;
+      }
+
+      /// Capacity
+      ///
+
+      size_type size() const noexcept { return table_.size(); }
+      size_type max_size() const noexcept { return table_.max_size(); }
+
+      BOOST_ATTRIBUTE_NODISCARD bool empty() const noexcept
+      {
+        return size() == 0;
+      }
+
+      template <class F>
+      BOOST_FORCEINLINE size_type visit(key_type const& k, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)
+        return table_.visit(k, f);
+      }
+
+      template <class F>
+      BOOST_FORCEINLINE size_type visit(key_type const& k, F f) const
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        return table_.visit(k, f);
+      }
+
+      template <class F>
+      BOOST_FORCEINLINE size_type cvisit(key_type const& k, F f) const
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        return table_.visit(k, f);
+      }
+
+      template <class K, class F>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value, size_type>::type
+      visit(K&& k, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)
+        return table_.visit(std::forward<K>(k), f);
+      }
+
+      template <class K, class F>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value, size_type>::type
+      visit(K&& k, F f) const
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        return table_.visit(std::forward<K>(k), f);
+      }
+
+      template <class K, class F>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value, size_type>::type
+      cvisit(K&& k, F f) const
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        return table_.visit(std::forward<K>(k), f);
+      }
+
+      template <class F> size_type visit_all(F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)
+        return table_.visit_all(f);
+      }
+
+      template <class F> size_type visit_all(F f) const
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        return table_.visit_all(f);
+      }
+
+      template <class F> size_type cvisit_all(F f) const
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        return table_.cvisit_all(f);
+      }
+
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+      template <class ExecPolicy, class F>
+      typename std::enable_if<detail::is_execution_policy<ExecPolicy>::value,
+        void>::type
+      visit_all(ExecPolicy&& p, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)
+        BOOST_UNORDERED_STATIC_ASSERT_EXEC_POLICY(ExecPolicy)
+        table_.visit_all(p, f);
+      }
+
+      template <class ExecPolicy, class F>
+      typename std::enable_if<detail::is_execution_policy<ExecPolicy>::value,
+        void>::type
+      visit_all(ExecPolicy&& p, F f) const
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        BOOST_UNORDERED_STATIC_ASSERT_EXEC_POLICY(ExecPolicy)
+        table_.visit_all(p, f);
+      }
+
+      template <class ExecPolicy, class F>
+      typename std::enable_if<detail::is_execution_policy<ExecPolicy>::value,
+        void>::type
+      cvisit_all(ExecPolicy&& p, F f) const
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        BOOST_UNORDERED_STATIC_ASSERT_EXEC_POLICY(ExecPolicy)
+        table_.cvisit_all(p, f);
+      }
+#endif
+
+      /// Modifiers
+      ///
+
+      template <class Ty>
+      BOOST_FORCEINLINE auto insert(Ty&& value)
+        -> decltype(table_.insert(std::forward<Ty>(value)))
+      {
+        return table_.insert(std::forward<Ty>(value));
+      }
+
+      BOOST_FORCEINLINE bool insert(init_type&& obj)
+      {
+        return table_.insert(std::move(obj));
+      }
+
+      template <class InputIterator>
+      void insert(InputIterator begin, InputIterator end)
+      {
+        for (auto pos = begin; pos != end; ++pos) {
+          table_.emplace(*pos);
+        }
+      }
+
+      void insert(std::initializer_list<value_type> ilist)
+      {
+        this->insert(ilist.begin(), ilist.end());
+      }
+
+      template <class M>
+      BOOST_FORCEINLINE bool insert_or_assign(key_type const& k, M&& obj)
+      {
+        return table_.try_emplace_or_visit(k, std::forward<M>(obj),
+          [&](value_type& m) { m.second = std::forward<M>(obj); });
+      }
+
+      template <class M>
+      BOOST_FORCEINLINE bool insert_or_assign(key_type&& k, M&& obj)
+      {
+        return table_.try_emplace_or_visit(std::move(k), std::forward<M>(obj),
+          [&](value_type& m) { m.second = std::forward<M>(obj); });
+      }
+
+      template <class K, class M>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value, bool>::type
+      insert_or_assign(K&& k, M&& obj)
+      {
+        return table_.try_emplace_or_visit(std::forward<K>(k),
+          std::forward<M>(obj),
+          [&](value_type& m) { m.second = std::forward<M>(obj); });
+      }
+
+      template <class Ty, class F>
+      BOOST_FORCEINLINE auto insert_or_visit(Ty&& value, F f)
+        -> decltype(table_.insert_or_visit(std::forward<Ty>(value), f))
+      {
+        return table_.insert_or_visit(std::forward<Ty>(value), f);
+      }
+
+      template <class F>
+      BOOST_FORCEINLINE bool insert_or_visit(init_type&& obj, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)
+        return table_.insert_or_visit(std::move(obj), f);
+      }
+
+      template <class InputIterator, class F>
+      void insert_or_visit(InputIterator first, InputIterator last, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)
+        for (; first != last; ++first) {
+          table_.emplace_or_visit(*first, f);
+        }
+      }
+
+      template <class F>
+      void insert_or_visit(std::initializer_list<value_type> ilist, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)
+        this->insert_or_visit(ilist.begin(), ilist.end(), f);
+      }
+
+      template <class Ty, class F>
+      BOOST_FORCEINLINE auto insert_or_cvisit(Ty&& value, F f)
+        -> decltype(table_.insert_or_cvisit(std::forward<Ty>(value), f))
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        return table_.insert_or_cvisit(std::forward<Ty>(value), f);
+      }
+
+      template <class F>
+      BOOST_FORCEINLINE bool insert_or_cvisit(init_type&& obj, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        return table_.insert_or_cvisit(std::move(obj), f);
+      }
+
+      template <class InputIterator, class F>
+      void insert_or_cvisit(InputIterator first, InputIterator last, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        for (; first != last; ++first) {
+          table_.emplace_or_cvisit(*first, f);
+        }
+      }
+
+      template <class F>
+      void insert_or_cvisit(std::initializer_list<value_type> ilist, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
+        this->insert_or_visit(ilist.begin(), ilist.end(), f);
+      }
+
+      template <class... Args> BOOST_FORCEINLINE bool emplace(Args&&... args)
+      {
+        return table_.emplace(std::forward<Args>(args)...);
+      }
+
+      template <class Arg, class... Args>
+      BOOST_FORCEINLINE bool emplace_or_visit(Arg&& arg, Args&&... args)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_INVOCABLE(Arg, Args...)
+        return table_.emplace_or_visit(
+          std::forward<Arg>(arg), std::forward<Args>(args)...);
+      }
+
+      template <class Arg, class... Args>
+      BOOST_FORCEINLINE bool emplace_or_cvisit(Arg&& arg, Args&&... args)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_CONST_INVOCABLE(Arg, Args...)
+        return table_.emplace_or_cvisit(
+          std::forward<Arg>(arg), std::forward<Args>(args)...);
+      }
+
+      template <class... Args>
+      BOOST_FORCEINLINE bool try_emplace(key_type const& k, Args&&... args)
+      {
+        return table_.try_emplace(k, std::forward<Args>(args)...);
+      }
+
+      template <class... Args>
+      BOOST_FORCEINLINE bool try_emplace(key_type&& k, Args&&... args)
+      {
+        return table_.try_emplace(std::move(k), std::forward<Args>(args)...);
+      }
+
+      template <class K, class... Args>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value, bool>::type
+      try_emplace(K&& k, Args&&... args)
+      {
+        return table_.try_emplace(
+          std::forward<K>(k), std::forward<Args>(args)...);
+      }
+
+      template <class Arg, class... Args>
+      BOOST_FORCEINLINE bool try_emplace_or_visit(
+        key_type const& k, Arg&& arg, Args&&... args)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_INVOCABLE(Arg, Args...)
+        return table_.try_emplace_or_visit(
+          k, std::forward<Arg>(arg), std::forward<Args>(args)...);
+      }
+
+      template <class Arg, class... Args>
+      BOOST_FORCEINLINE bool try_emplace_or_cvisit(
+        key_type const& k, Arg&& arg, Args&&... args)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_CONST_INVOCABLE(Arg, Args...)
+        return table_.try_emplace_or_cvisit(
+          k, std::forward<Arg>(arg), std::forward<Args>(args)...);
+      }
+
+      template <class Arg, class... Args>
+      BOOST_FORCEINLINE bool try_emplace_or_visit(
+        key_type&& k, Arg&& arg, Args&&... args)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_INVOCABLE(Arg, Args...)
+        return table_.try_emplace_or_visit(
+          std::move(k), std::forward<Arg>(arg), std::forward<Args>(args)...);
+      }
+
+      template <class Arg, class... Args>
+      BOOST_FORCEINLINE bool try_emplace_or_cvisit(
+        key_type&& k, Arg&& arg, Args&&... args)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_CONST_INVOCABLE(Arg, Args...)
+        return table_.try_emplace_or_cvisit(
+          std::move(k), std::forward<Arg>(arg), std::forward<Args>(args)...);
+      }
+
+      template <class K, class Arg, class... Args>
+      BOOST_FORCEINLINE bool try_emplace_or_visit(
+        K&& k, Arg&& arg, Args&&... args)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_INVOCABLE(Arg, Args...)
+        return table_.try_emplace_or_visit(std::forward<K>(k),
+          std::forward<Arg>(arg), std::forward<Args>(args)...);
+      }
+
+      template <class K, class Arg, class... Args>
+      BOOST_FORCEINLINE bool try_emplace_or_cvisit(
+        K&& k, Arg&& arg, Args&&... args)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_CONST_INVOCABLE(Arg, Args...)
+        return table_.try_emplace_or_cvisit(std::forward<K>(k),
+          std::forward<Arg>(arg), std::forward<Args>(args)...);
+      }
+
+      BOOST_FORCEINLINE size_type erase(key_type const& k)
+      {
+        return table_.erase(k);
+      }
+
+      template <class K>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value, size_type>::type
+      erase(K&& k)
+      {
+        return table_.erase(std::forward<K>(k));
+      }
+
+      template <class F>
+      BOOST_FORCEINLINE size_type erase_if(key_type const& k, F f)
+      {
+        return table_.erase_if(k, f);
+      }
+
+      template <class K, class F>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value &&
+          !detail::is_execution_policy<K>::value,
+        size_type>::type
+      erase_if(K&& k, F f)
+      {
+        return table_.erase_if(std::forward<K>(k), f);
+      }
+
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+      template <class ExecPolicy, class F>
+      typename std::enable_if<detail::is_execution_policy<ExecPolicy>::value,
+        void>::type
+      erase_if(ExecPolicy&& p, F f)
+      {
+        BOOST_UNORDERED_STATIC_ASSERT_EXEC_POLICY(ExecPolicy)
+        table_.erase_if(p, f);
+      }
+#endif
+
+      template <class F> size_type erase_if(F f) { return table_.erase_if(f); }
+
+      void swap(concurrent_flat_map& other) noexcept(
+        boost::allocator_is_always_equal<Allocator>::type::value ||
+        boost::allocator_propagate_on_container_swap<Allocator>::type::value)
+      {
+        return table_.swap(other.table_);
+      }
+
+      void clear() noexcept { table_.clear(); }
+
+      template <typename H2, typename P2>
+      size_type merge(concurrent_flat_map<Key, T, H2, P2, Allocator>& x)
+      {
+        BOOST_ASSERT(get_allocator() == x.get_allocator());
+        return table_.merge(x.table_);
+      }
+
+      template <typename H2, typename P2>
+      size_type merge(concurrent_flat_map<Key, T, H2, P2, Allocator>&& x)
+      {
+        return merge(x);
+      }
+
+      BOOST_FORCEINLINE size_type count(key_type const& k) const
+      {
+        return table_.count(k);
+      }
+
+      template <class K>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value, size_type>::type
+      count(K const& k)
+      {
+        return table_.count(k);
+      }
+
+      BOOST_FORCEINLINE bool contains(key_type const& k) const
+      {
+        return table_.contains(k);
+      }
+
+      template <class K>
+      BOOST_FORCEINLINE typename std::enable_if<
+        detail::are_transparent<K, hasher, key_equal>::value, bool>::type
+      contains(K const& k) const
+      {
+        return table_.contains(k);
+      }
+
+      /// Hash Policy
+      ///
+      size_type bucket_count() const noexcept { return table_.capacity(); }
+
+      float load_factor() const noexcept { return table_.load_factor(); }
+      float max_load_factor() const noexcept
+      {
+        return table_.max_load_factor();
+      };
+      void max_load_factor(float) {}
+      size_type max_load() const noexcept { return table_.max_load(); }
+
+      void rehash(size_type n) { table_.rehash(n); }
+      void reserve(size_type n) { table_.reserve(n); }
+
+      /// Observers
+      ///
+      allocator_type get_allocator() const noexcept
+      {
+        return table_.get_allocator();
+      }
+
+      hasher hash_function() const { return table_.hash_function(); }
+      key_equal key_eq() const { return table_.key_eq(); }
+    };
+
+    template <class Key, class T, class Hash, class KeyEqual, class Allocator>
+    bool operator==(
+      concurrent_flat_map<Key, T, Hash, KeyEqual, Allocator> const& lhs,
+      concurrent_flat_map<Key, T, Hash, KeyEqual, Allocator> const& rhs)
+    {
+      return lhs.table_ == rhs.table_;
+    }
+
+    template <class Key, class T, class Hash, class KeyEqual, class Allocator>
+    bool operator!=(
+      concurrent_flat_map<Key, T, Hash, KeyEqual, Allocator> const& lhs,
+      concurrent_flat_map<Key, T, Hash, KeyEqual, Allocator> const& rhs)
+    {
+      return !(lhs == rhs);
+    }
+
+    template <class Key, class T, class Hash, class Pred, class Alloc>
+    void swap(concurrent_flat_map<Key, T, Hash, Pred, Alloc>& x,
+      concurrent_flat_map<Key, T, Hash, Pred, Alloc>& y)
+      noexcept(noexcept(x.swap(y)))
+    {
+      x.swap(y);
+    }
+
+    template <class K, class T, class H, class P, class A, class Predicate>
+    typename concurrent_flat_map<K, T, H, P, A>::size_type erase_if(
+      concurrent_flat_map<K, T, H, P, A>& c, Predicate pred)
+    {
+      return c.table_.erase_if(pred);
+    }
+
+#if BOOST_UNORDERED_TEMPLATE_DEDUCTION_GUIDES
+
+    template <class InputIterator,
+      class Hash =
+        boost::hash<boost::unordered::detail::iter_key_t<InputIterator> >,
+      class Pred =
+        std::equal_to<boost::unordered::detail::iter_key_t<InputIterator> >,
+      class Allocator = std::allocator<
+        boost::unordered::detail::iter_to_alloc_t<InputIterator> >,
+      class = boost::enable_if_t<detail::is_input_iterator_v<InputIterator> >,
+      class = boost::enable_if_t<detail::is_hash_v<Hash> >,
+      class = boost::enable_if_t<detail::is_pred_v<Pred> >,
+      class = boost::enable_if_t<detail::is_allocator_v<Allocator> > >
+    concurrent_flat_map(InputIterator, InputIterator,
+      std::size_t = boost::unordered::detail::foa::default_bucket_count,
+      Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+      -> concurrent_flat_map<
+        boost::unordered::detail::iter_key_t<InputIterator>,
+        boost::unordered::detail::iter_val_t<InputIterator>, Hash, Pred,
+        Allocator>;
+
+    template <class Key, class T,
+      class Hash = boost::hash<boost::remove_const_t<Key> >,
+      class Pred = std::equal_to<boost::remove_const_t<Key> >,
+      class Allocator = std::allocator<std::pair<const Key, T> >,
+      class = boost::enable_if_t<detail::is_hash_v<Hash> >,
+      class = boost::enable_if_t<detail::is_pred_v<Pred> >,
+      class = boost::enable_if_t<detail::is_allocator_v<Allocator> > >
+    concurrent_flat_map(std::initializer_list<std::pair<Key, T> >,
+      std::size_t = boost::unordered::detail::foa::default_bucket_count,
+      Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+      -> concurrent_flat_map<boost::remove_const_t<Key>, T, Hash, Pred,
+        Allocator>;
+
+    template <class InputIterator, class Allocator,
+      class = boost::enable_if_t<detail::is_input_iterator_v<InputIterator> >,
+      class = boost::enable_if_t<detail::is_allocator_v<Allocator> > >
+    concurrent_flat_map(InputIterator, InputIterator, std::size_t, Allocator)
+      -> concurrent_flat_map<
+        boost::unordered::detail::iter_key_t<InputIterator>,
+        boost::unordered::detail::iter_val_t<InputIterator>,
+        boost::hash<boost::unordered::detail::iter_key_t<InputIterator> >,
+        std::equal_to<boost::unordered::detail::iter_key_t<InputIterator> >,
+        Allocator>;
+
+    template <class InputIterator, class Allocator,
+      class = boost::enable_if_t<detail::is_input_iterator_v<InputIterator> >,
+      class = boost::enable_if_t<detail::is_allocator_v<Allocator> > >
+    concurrent_flat_map(InputIterator, InputIterator, Allocator)
+      -> concurrent_flat_map<
+        boost::unordered::detail::iter_key_t<InputIterator>,
+        boost::unordered::detail::iter_val_t<InputIterator>,
+        boost::hash<boost::unordered::detail::iter_key_t<InputIterator> >,
+        std::equal_to<boost::unordered::detail::iter_key_t<InputIterator> >,
+        Allocator>;
+
+    template <class InputIterator, class Hash, class Allocator,
+      class = boost::enable_if_t<detail::is_hash_v<Hash> >,
+      class = boost::enable_if_t<detail::is_input_iterator_v<InputIterator> >,
+      class = boost::enable_if_t<detail::is_allocator_v<Allocator> > >
+    concurrent_flat_map(
+      InputIterator, InputIterator, std::size_t, Hash, Allocator)
+      -> concurrent_flat_map<
+        boost::unordered::detail::iter_key_t<InputIterator>,
+        boost::unordered::detail::iter_val_t<InputIterator>, Hash,
+        std::equal_to<boost::unordered::detail::iter_key_t<InputIterator> >,
+        Allocator>;
+
+    template <class Key, class T, class Allocator,
+      class = boost::enable_if_t<detail::is_allocator_v<Allocator> > >
+    concurrent_flat_map(std::initializer_list<std::pair<Key, T> >, std::size_t,
+      Allocator) -> concurrent_flat_map<boost::remove_const_t<Key>, T,
+      boost::hash<boost::remove_const_t<Key> >,
+      std::equal_to<boost::remove_const_t<Key> >, Allocator>;
+
+    template <class Key, class T, class Allocator,
+      class = boost::enable_if_t<detail::is_allocator_v<Allocator> > >
+    concurrent_flat_map(std::initializer_list<std::pair<Key, T> >, Allocator)
+      -> concurrent_flat_map<boost::remove_const_t<Key>, T,
+        boost::hash<boost::remove_const_t<Key> >,
+        std::equal_to<boost::remove_const_t<Key> >, Allocator>;
+
+    template <class Key, class T, class Hash, class Allocator,
+      class = boost::enable_if_t<detail::is_hash_v<Hash> >,
+      class = boost::enable_if_t<detail::is_allocator_v<Allocator> > >
+    concurrent_flat_map(std::initializer_list<std::pair<Key, T> >, std::size_t,
+      Hash, Allocator) -> concurrent_flat_map<boost::remove_const_t<Key>, T,
+      Hash, std::equal_to<boost::remove_const_t<Key> >, Allocator>;
+
+#endif
+
+  } // namespace unordered
+
+  using unordered::concurrent_flat_map;
+} // namespace boost
+
+#undef BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE
+#undef BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE
+#undef BOOST_UNORDERED_STATIC_ASSERT_EXEC_POLICY
+#undef BOOST_UNORDERED_COMMA
+#undef BOOST_UNORDERED_LAST_ARG
+#undef BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_INVOCABLE
+#undef BOOST_UNORDERED_STATIC_ASSERT_LAST_ARG_CONST_INVOCABLE
+
+#endif // BOOST_UNORDERED_CONCURRENT_FLAT_MAP_HPP
diff --git a/include/boost/unordered/concurrent_flat_map_fwd.hpp b/include/boost/unordered/concurrent_flat_map_fwd.hpp
new file mode 100644
index 00000000..3a39c7a7
--- /dev/null
+++ b/include/boost/unordered/concurrent_flat_map_fwd.hpp
@@ -0,0 +1,54 @@
+/* Fast open-addressing concurrent hash table.
+ *
+ * Copyright 2023 Christian Mazakas.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/unordered for library home page.
+ */
+
+#ifndef BOOST_UNORDERED_CONCURRENT_FLAT_MAP_FWD_HPP
+#define BOOST_UNORDERED_CONCURRENT_FLAT_MAP_FWD_HPP
+
+#include <boost/container_hash/hash_fwd.hpp>
+
+#include <functional>
+#include <memory>
+
+namespace boost {
+  namespace unordered {
+
+    template <class Key, class T, class Hash = boost::hash<Key>,
+      class Pred = std::equal_to<Key>,
+      class Allocator = std::allocator<std::pair<Key const, T> > >
+    class concurrent_flat_map;
+
+    template <class Key, class T, class Hash, class KeyEqual, class Allocator>
+    bool operator==(
+      concurrent_flat_map<Key, T, Hash, KeyEqual, Allocator> const& lhs,
+      concurrent_flat_map<Key, T, Hash, KeyEqual, Allocator> const& rhs);
+
+    template <class Key, class T, class Hash, class KeyEqual, class Allocator>
+    bool operator!=(
+      concurrent_flat_map<Key, T, Hash, KeyEqual, Allocator> const& lhs,
+      concurrent_flat_map<Key, T, Hash, KeyEqual, Allocator> const& rhs);
+
+    template <class Key, class T, class Hash, class Pred, class Alloc>
+    void swap(concurrent_flat_map<Key, T, Hash, Pred, Alloc>& x,
+      concurrent_flat_map<Key, T, Hash, Pred, Alloc>& y)
+      noexcept(noexcept(x.swap(y)));
+
+    template <class K, class T, class H, class P, class A, class Predicate>
+    typename concurrent_flat_map<K, T, H, P, A>::size_type erase_if(
+      concurrent_flat_map<K, T, H, P, A>& c, Predicate pred);
+
+  } // namespace unordered
+
+  using boost::unordered::concurrent_flat_map;
+  using boost::unordered::swap;
+  using boost::unordered::operator==;
+  using boost::unordered::operator!=;
+} // namespace boost
+
+#endif // BOOST_UNORDERED_CONCURRENT_FLAT_MAP_HPP
diff --git a/include/boost/unordered/detail/foa/concurrent_table.hpp b/include/boost/unordered/detail/foa/concurrent_table.hpp
new file mode 100644
index 00000000..0f1f1145
--- /dev/null
+++ b/include/boost/unordered/detail/foa/concurrent_table.hpp
@@ -0,0 +1,1324 @@
+/* Fast open-addressing concurrent hash table.
+ *
+ * Copyright 2023 Joaquin M Lopez Munoz.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/unordered for library home page.
+ */
+
+#ifndef BOOST_UNORDERED_DETAIL_FOA_CONCURRENT_TABLE_HPP
+#define BOOST_UNORDERED_DETAIL_FOA_CONCURRENT_TABLE_HPP
+
+#include <atomic>
+#include <boost/assert.hpp>
+#include <boost/config.hpp>
+#include <boost/core/ignore_unused.hpp>
+#include <boost/core/no_exceptions_support.hpp>
+#include <boost/cstdint.hpp>
+#include <boost/mp11/tuple.hpp>
+#include <boost/static_assert.hpp>
+#include <boost/unordered/detail/foa/core.hpp>
+#include <boost/unordered/detail/foa/rw_spinlock.hpp>
+#include <boost/unordered/detail/foa/tuple_rotate_right.hpp>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <memory>
+#include <new>
+#include <type_traits>
+#include <tuple>
+#include <utility>
+
+#if !defined(BOOST_UNORDERED_DISABLE_PARALLEL_ALGORITHMS)
+#if defined(BOOST_UNORDERED_ENABLE_PARALLEL_ALGORITHMS)|| \
+    !defined(BOOST_NO_CXX17_HDR_EXECUTION)
+#define BOOST_UNORDERED_PARALLEL_ALGORITHMS
+#endif
+#endif
+
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+#include <algorithm>
+#include <execution>
+#endif
+
+namespace boost{
+namespace unordered{
+namespace detail{
+
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+
+template<typename ExecutionPolicy>
+using is_execution_policy=std::is_execution_policy<
+  typename std::remove_cv<
+    typename std::remove_reference<ExecutionPolicy>::type
+  >::type
+>;
+
+#else
+
+template<typename ExecutionPolicy>
+using is_execution_policy=std::false_type;
+
+#endif
+
+namespace foa{
+
+static constexpr std::size_t cacheline_size=64;
+
+template<typename T,std::size_t N>
+class cache_aligned_array
+{
+public:
+  cache_aligned_array(){for(std::size_t n=0;n<N;)::new (data(n++)) T();}
+  ~cache_aligned_array(){for(auto n=N;n>0;)data(n--)->~T();}
+  cache_aligned_array(const cache_aligned_array&)=delete;
+  cache_aligned_array& operator=(const cache_aligned_array&)=delete;
+
+  T& operator[](std::size_t pos)noexcept{return *data(pos);}
+
+private:
+  static constexpr std::size_t element_offset=
+    (sizeof(T)+cacheline_size-1)/cacheline_size*cacheline_size;
+
+  BOOST_STATIC_ASSERT(alignof(T)<=cacheline_size);
+
+  T* data(std::size_t pos)noexcept
+  {
+    return reinterpret_cast<T*>(
+      (reinterpret_cast<uintptr_t>(&buf)+cacheline_size-1)/
+        cacheline_size*cacheline_size
+      +pos*element_offset);
+  }
+
+  unsigned char buf[element_offset*N+cacheline_size-1];
+};
+
+template<typename Mutex,std::size_t N>
+class multimutex
+{
+public:
+  constexpr std::size_t size()const noexcept{return N;}
+
+  Mutex& operator[](std::size_t pos)noexcept
+  {
+    BOOST_ASSERT(pos<N);
+    return mutexes[pos];
+  }
+
+  void lock()noexcept{for(std::size_t n=0;n<N;)mutexes[n++].lock();}
+  void unlock()noexcept{for(auto n=N;n>0;)mutexes[--n].unlock();}
+
+private:
+  cache_aligned_array<Mutex,N> mutexes;
+};
+
+/* std::shared_lock is C++14 */
+
+template<typename Mutex>
+class shared_lock
+{
+public:
+  shared_lock(Mutex& m_)noexcept:m{m_}{m.lock_shared();}
+  ~shared_lock()noexcept{if(owns)m.unlock_shared();}
+
+  /* not used but VS in pre-C++17 mode needs to see it for RVO */
+  shared_lock(const shared_lock&);
+
+  void lock(){BOOST_ASSERT(!owns);m.lock_shared();owns=true;}
+  void unlock(){BOOST_ASSERT(owns);m.unlock_shared();owns=false;}
+
+private:
+  Mutex &m;
+  bool owns=true;
+};
+
+/* VS in pre-C++17 mode can't implement RVO for std::lock_guard due to
+ * its copy constructor being deleted.
+ */
+
+template<typename Mutex>
+class lock_guard
+{
+public:
+  lock_guard(Mutex& m_)noexcept:m{m_}{m.lock();}
+  ~lock_guard()noexcept{m.unlock();}
+
+  /* not used but VS in pre-C++17 mode needs to see it for RVO */
+  lock_guard(const lock_guard&);
+
+private:
+  Mutex &m;
+};
+
+/* inspired by boost/multi_index/detail/scoped_bilock.hpp */
+
+template<typename Mutex>
+class scoped_bilock
+{
+public:
+  scoped_bilock(Mutex& m1,Mutex& m2)noexcept
+  {
+    bool mutex_lt=std::less<Mutex*>{}(&m1,&m2);
+
+    pm1=mutex_lt?&m1:&m2;
+    pm1->lock();
+    if(&m1==&m2){
+      pm2=nullptr;
+    }
+    else{
+      pm2=mutex_lt?&m2:&m1;
+      pm2->lock();
+    }
+  }
+
+  /* not used but VS in pre-C++17 mode needs to see it for RVO */
+  scoped_bilock(const scoped_bilock&);
+
+  ~scoped_bilock()noexcept
+  {
+    if(pm2)pm2->unlock();
+    pm1->unlock();
+  }
+
+private:
+  Mutex *pm1,*pm2;
+};
+
+/* use atomics for group metadata storage */
+
+template<typename Integral>
+struct atomic_integral
+{
+  operator Integral()const{return n.load(std::memory_order_relaxed);}
+  void operator=(Integral m){n.store(m,std::memory_order_relaxed);}
+  void operator|=(Integral m){n.fetch_or(m,std::memory_order_relaxed);}
+  void operator&=(Integral m){n.fetch_and(m,std::memory_order_relaxed);}
+
+  atomic_integral& operator=(atomic_integral const& rhs) {
+    n.store(rhs.n.load(std::memory_order_relaxed),std::memory_order_relaxed);
+    return *this;
+  }
+
+  std::atomic<Integral> n;
+};
+
+/* Group-level concurrency protection. It provides a rw mutex plus an
+ * atomic insertion counter for optimistic insertion (see
+ * unprotected_norehash_emplace_or_visit).
+ */
+
+struct group_access
+{    
+  using mutex_type=rw_spinlock;
+  using shared_lock_guard=shared_lock<mutex_type>;
+  using exclusive_lock_guard=lock_guard<mutex_type>;
+  using insert_counter_type=std::atomic<boost::uint32_t>;
+
+  shared_lock_guard    shared_access(){return shared_lock_guard{m};}
+  exclusive_lock_guard exclusive_access(){return exclusive_lock_guard{m};}
+  insert_counter_type& insert_counter(){return cnt;}
+
+private:
+  mutex_type          m;
+  insert_counter_type cnt{0};
+};
+
+template<std::size_t Size>
+group_access* dummy_group_accesses()
+{
+  /* Default group_access array to provide to empty containers without
+   * incurring dynamic allocation. Mutexes won't actually ever be used,
+   * (no successful reduced hash match) and insertion counters won't ever
+   * be incremented (insertions won't succeed as capacity()==0).
+   */
+
+  static group_access accesses[Size];
+
+  return accesses;
+}
+
+/* subclasses table_arrays to add an additional group_access array */
+
+template<typename Value,typename Group,typename SizePolicy>
+struct concurrent_table_arrays:table_arrays<Value,Group,SizePolicy>
+{
+  using super=table_arrays<Value,Group,SizePolicy>;
+
+  concurrent_table_arrays(const super& arrays,group_access *pga):
+    super{arrays},group_accesses{pga}{}
+
+  template<typename Allocator>
+  static concurrent_table_arrays new_(Allocator& al,std::size_t n)
+  {
+    concurrent_table_arrays arrays{super::new_(al,n),nullptr};
+    if(!arrays.elements){
+      arrays.group_accesses=dummy_group_accesses<SizePolicy::min_size()>();
+    }
+    else{
+      using access_alloc=
+        typename boost::allocator_rebind<Allocator,group_access>::type;
+      using access_traits=boost::allocator_traits<access_alloc>;
+
+      BOOST_TRY{
+        auto aal=access_alloc(al);
+        arrays.group_accesses=boost::to_address(
+          access_traits::allocate(aal,arrays.groups_size_mask+1));
+
+        for(std::size_t i=0;i<arrays.groups_size_mask+1;++i){
+          ::new (arrays.group_accesses+i) group_access();
+        }
+      }
+      BOOST_CATCH(...){
+        super::delete_(al,arrays);
+        BOOST_RETHROW
+      }
+      BOOST_CATCH_END
+    }
+    return arrays;
+  }
+
+  template<typename Allocator>
+  static void delete_(Allocator& al,concurrent_table_arrays& arrays)noexcept
+  {
+    if(arrays.elements){
+      using access_alloc=
+        typename boost::allocator_rebind<Allocator,group_access>::type;
+      using access_traits=boost::allocator_traits<access_alloc>;
+      using pointer=typename access_traits::pointer;
+      using pointer_traits=boost::pointer_traits<pointer>;
+
+      auto aal=access_alloc(al);
+      access_traits::deallocate(
+        aal,pointer_traits::pointer_to(*arrays.group_accesses),
+        arrays.groups_size_mask+1);
+    }
+    super::delete_(al,arrays);
+  }
+
+  group_access *group_accesses;
+};
+
+struct atomic_size_control
+{
+  static constexpr auto atomic_size_t_size=sizeof(std::atomic<std::size_t>);
+  BOOST_STATIC_ASSERT(atomic_size_t_size<cacheline_size);
+
+  atomic_size_control(std::size_t ml_,std::size_t size_):
+    pad0_{},ml{ml_},pad1_{},size{size_}{}
+  atomic_size_control(atomic_size_control& x):
+    pad0_{},ml{x.ml.load()},pad1_{},size{x.size.load()}{}
+
+  /* padding to avoid false sharing internally and with sorrounding data */
+
+  unsigned char            pad0_[cacheline_size-atomic_size_t_size];
+  std::atomic<std::size_t> ml;
+  unsigned char            pad1_[cacheline_size-atomic_size_t_size];
+  std::atomic<std::size_t> size;
+};
+
+/* std::swap can't be used on non-assignable atomics */
+
+inline void
+swap_atomic_size_t(std::atomic<std::size_t>& x,std::atomic<std::size_t>& y)
+{
+  std::size_t tmp=x;
+  x=static_cast<std::size_t>(y);
+  y=tmp;
+}
+
+inline void swap(atomic_size_control& x,atomic_size_control& y)
+{
+  swap_atomic_size_t(x.ml,y.ml);
+  swap_atomic_size_t(x.size,y.size);
+}
+
+/* foa::concurrent_table serves as the foundation for end-user concurrent
+ * hash containers. The TypePolicy parameter can specify flat/node-based
+ * map-like and set-like containers, though currently we're only providing
+ * boost::concurrent_flat_map.
+ * 
+ * The exposed interface (completed by the wrapping containers) is not that
+ * of a regular container (in fact, it does not model Container as understood
+ * by the C++ standard):
+ * 
+ *   - Iterators are not provided as they are not suitable for concurrent
+ *     scenarios.
+ *   - As a consequence, composite operations with regular containers
+ *     (like, for instance, looking up an element and modifying it), must
+ *     be provided natively without any intervening iterator/accesor.
+ *     Visitation is a core concept in this design, either on its own (eg.
+ *     visit(k) locates the element with key k *and* accesses it) or as part
+ *     of a native composite operation (eg. try_emplace_or_visit). Visitation
+ *     is constant or mutating depending on whether the used table function is
+ *     const or not.
+ *   - The API provides member functions for all the meaningful composite
+ *     operations of the form "X (and|or) Y", where X, Y are one of the
+ *     primitives FIND, ACCESS, INSERT or ERASE.
+ *   - Parallel versions of [c]visit_all(f) and erase_if(f) are provided based
+ *     on C++17 stdlib parallel algorithms.
+ * 
+ * Consult boost::unordered_flat_map docs for the full API reference.
+ * Heterogeneous lookup is suported by default, that is, without checking for
+ * any ::is_transparent typedefs --this checking is done by the wrapping
+ * containers.
+ *
+ * Thread-safe concurrency is implemented using a two-level lock system:
+ * 
+ *   - A first container-level lock is implemented with an array of
+ *     rw spinlocks acting as a single rw mutex with very little
+ *     cache-coherence traffic on read (each thread is assigned a different
+ *     spinlock in the array). Container-level write locking is only used for
+ *     rehashing and other container-wide operations (assignment, swap, etc.)
+ *   - Each group of slots has an associated rw spinlock. A thread holds
+ *     at most one group lock at any given time. Lookup is implemented in
+ *     a (groupwise) lock-free manner until a reduced hash match is found, in
+ *     which case the relevant group is locked and the slot is double-checked
+ *     for occupancy and compared with the key.
+ *   - Each group has also an associated so-called insertion counter used for
+ *     the following optimistic insertion algorithm:
+ *     - The value of the insertion counter for the initial group in the probe
+ *       sequence is locally recorded (let's call this value c0).
+ *     - Lookup is as described above. If lookup finds no equivalent element,
+ *       search for an available slot for insertion successively locks/unlocks
+ *       each group in the probing sequence.
+ *     - When an available slot is located, it is preemptively occupied (its
+ *       reduced hash value is set) and the insertion counter is atomically
+ *       incremented: if no other thread has incremented the counter during the
+ *       whole operation (which is checked by comparing with c0), then we're
+ *       good to go and complete the insertion, otherwise we roll back and start
+ *       over.
+ */
+
+template <typename TypePolicy,typename Hash,typename Pred,typename Allocator>
+using concurrent_table_core_impl=table_core<
+  TypePolicy,group15<atomic_integral>,concurrent_table_arrays,
+  atomic_size_control,Hash,Pred,Allocator>;
+
+#include <boost/unordered/detail/foa/ignore_wshadow.hpp>
+
+#if defined(BOOST_MSVC)
+#pragma warning(push)
+#pragma warning(disable:4714) /* marked as __forceinline not inlined */
+#endif
+
+template<typename TypePolicy,typename Hash,typename Pred,typename Allocator>
+class concurrent_table:
+  concurrent_table_core_impl<TypePolicy,Hash,Pred,Allocator>
+{
+  using super=concurrent_table_core_impl<TypePolicy,Hash,Pred,Allocator>;
+  using type_policy=typename super::type_policy;
+  using group_type=typename super::group_type;
+  using super::N;
+  using prober=typename super::prober;
+
+  template<
+    typename TypePolicy2,typename Hash2,typename Pred2,typename Allocator2>
+  friend class concurrent_table;
+
+public:
+  using key_type=typename super::key_type;
+  using init_type=typename super::init_type;
+  using value_type=typename super::value_type;
+  using element_type=typename super::element_type;
+  using hasher=typename super::hasher;
+  using key_equal=typename super::key_equal;
+  using allocator_type=typename super::allocator_type;
+  using size_type=typename super::size_type;
+
+private:
+  template<typename Value,typename T>
+  using enable_if_is_value_type=typename std::enable_if<
+    !std::is_same<init_type,value_type>::value&&
+    std::is_same<Value,value_type>::value,
+    T
+  >::type;
+
+public:
+  concurrent_table(
+    std::size_t n=default_bucket_count,const Hash& h_=Hash(),
+    const Pred& pred_=Pred(),const Allocator& al_=Allocator()):
+    super{n,h_,pred_,al_}
+    {}
+
+  concurrent_table(const concurrent_table& x):
+    concurrent_table(x,x.exclusive_access()){}
+  concurrent_table(concurrent_table&& x):
+    concurrent_table(std::move(x),x.exclusive_access()){}
+  concurrent_table(const concurrent_table& x,const Allocator& al_):
+    concurrent_table(x,al_,x.exclusive_access()){}
+  concurrent_table(concurrent_table&& x,const Allocator& al_):
+    concurrent_table(std::move(x),al_,x.exclusive_access()){}
+  ~concurrent_table()=default;
+
+  concurrent_table& operator=(const concurrent_table& x)
+  {
+    auto lck=exclusive_access(*this,x);
+    super::operator=(x);
+    return *this;
+  }
+
+  concurrent_table& operator=(concurrent_table&& x)
+  {
+    auto lck=exclusive_access(*this,x);
+    super::operator=(std::move(x));
+    return *this;
+  }
+
+  concurrent_table& operator=(std::initializer_list<value_type> il) {
+    auto lck=exclusive_access();
+    super::clear();
+    super::noshrink_reserve(il.size());
+    for (auto const& v : il) {
+      this->unprotected_emplace(v);
+    }
+    return *this;
+  }
+
+  allocator_type get_allocator()const noexcept
+  {
+    auto lck=shared_access();
+    return super::get_allocator();
+  }
+
+  template<typename Key,typename F>
+  BOOST_FORCEINLINE std::size_t visit(const Key& x,F&& f)
+  {
+    return visit_impl(group_exclusive{},x,std::forward<F>(f));
+  }
+
+  template<typename Key,typename F>
+  BOOST_FORCEINLINE std::size_t visit(const Key& x,F&& f)const
+  {
+    return visit_impl(group_shared{},x,std::forward<F>(f));
+  }
+
+  template<typename Key,typename F>
+  BOOST_FORCEINLINE std::size_t cvisit(const Key& x,F&& f)const
+  {
+    return visit(x,std::forward<F>(f));
+  }
+
+  template<typename F> std::size_t visit_all(F&& f)
+  {
+    return visit_all_impl(group_exclusive{},std::forward<F>(f));
+  }
+
+  template<typename F> std::size_t visit_all(F&& f)const
+  {
+    return visit_all_impl(group_shared{},std::forward<F>(f));
+  }
+
+  template<typename F> std::size_t cvisit_all(F&& f)const
+  {
+    return visit_all(std::forward<F>(f));
+  }
+
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+  template<typename ExecutionPolicy,typename F>
+  void visit_all(ExecutionPolicy&& policy,F&& f)
+  {
+    visit_all_impl(
+      group_exclusive{},
+      std::forward<ExecutionPolicy>(policy),std::forward<F>(f));
+  }
+
+  template<typename ExecutionPolicy,typename F>
+  void visit_all(ExecutionPolicy&& policy,F&& f)const
+  {
+    visit_all_impl(
+      group_shared{},
+      std::forward<ExecutionPolicy>(policy),std::forward<F>(f));
+  }
+
+  template<typename ExecutionPolicy,typename F>
+  void cvisit_all(ExecutionPolicy&& policy,F&& f)const
+  {
+    visit_all(std::forward<ExecutionPolicy>(policy),std::forward<F>(f));
+  }
+#endif
+
+  bool empty()const noexcept{return size()==0;}
+  
+  std::size_t size()const noexcept
+  {
+    auto lck=shared_access();
+    return unprotected_size();
+  }
+
+  using super::max_size; 
+
+  template<typename... Args>
+  BOOST_FORCEINLINE bool emplace(Args&&... args)
+  {
+    return construct_and_emplace(std::forward<Args>(args)...);
+  }
+
+  BOOST_FORCEINLINE bool
+  insert(const init_type& x){return emplace_impl(x);}
+
+  BOOST_FORCEINLINE bool
+  insert(init_type&& x){return emplace_impl(std::move(x));}
+
+  /* template<typename=void> tilts call ambiguities in favor of init_type */
+
+  template<typename=void>
+  BOOST_FORCEINLINE bool
+  insert(const value_type& x){return emplace_impl(x);}
+  
+  template<typename=void>
+  BOOST_FORCEINLINE bool
+  insert(value_type&& x){return emplace_impl(std::move(x));}
+
+  template<typename Key,typename... Args>
+  BOOST_FORCEINLINE bool try_emplace(Key&& x,Args&&... args)
+  {
+    return emplace_impl(
+      try_emplace_args_t{},std::forward<Key>(x),std::forward<Args>(args)...);
+  }
+
+  template<typename Key,typename... Args>
+  BOOST_FORCEINLINE bool try_emplace_or_visit(Key&& x,Args&&... args)
+  {
+    return emplace_or_visit_flast(
+      group_exclusive{},
+      try_emplace_args_t{},std::forward<Key>(x),std::forward<Args>(args)...);
+  }
+
+  template<typename Key,typename... Args>
+  BOOST_FORCEINLINE bool try_emplace_or_cvisit(Key&& x,Args&&... args)
+  {
+    return emplace_or_visit_flast(
+      group_shared{},
+      try_emplace_args_t{},std::forward<Key>(x),std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  BOOST_FORCEINLINE bool emplace_or_visit(Args&&... args)
+  {
+    return construct_and_emplace_or_visit_flast(
+      group_exclusive{},std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  BOOST_FORCEINLINE bool emplace_or_cvisit(Args&&... args)
+  {
+    return construct_and_emplace_or_visit_flast(
+      group_shared{},std::forward<Args>(args)...);
+  }
+
+  template<typename F>
+  BOOST_FORCEINLINE bool insert_or_visit(const init_type& x,F&& f)
+  {
+    return emplace_or_visit_impl(group_exclusive{},std::forward<F>(f),x);
+  }
+
+  template<typename F>
+  BOOST_FORCEINLINE bool insert_or_cvisit(const init_type& x,F&& f)
+  {
+    return emplace_or_visit_impl(group_shared{},std::forward<F>(f),x);
+  }
+
+  template<typename F>
+  BOOST_FORCEINLINE bool insert_or_visit(init_type&& x,F&& f)
+  {
+    return emplace_or_visit_impl(
+      group_exclusive{},std::forward<F>(f),std::move(x));
+  }
+
+  template<typename F>
+  BOOST_FORCEINLINE bool insert_or_cvisit(init_type&& x,F&& f)
+  {
+    return emplace_or_visit_impl(
+      group_shared{},std::forward<F>(f),std::move(x));
+  }
+
+  /* SFINAE tilts call ambiguities in favor of init_type */
+
+  template<typename Value,typename F>
+  BOOST_FORCEINLINE auto insert_or_visit(const Value& x,F&& f)
+    ->enable_if_is_value_type<Value,bool>
+  {
+    return emplace_or_visit_impl(group_exclusive{},std::forward<F>(f),x);
+  }
+
+  template<typename Value,typename F>
+  BOOST_FORCEINLINE auto insert_or_cvisit(const Value& x,F&& f)
+    ->enable_if_is_value_type<Value,bool>
+  {
+    return emplace_or_visit_impl(group_shared{},std::forward<F>(f),x);
+  }
+
+  template<typename Value,typename F>
+  BOOST_FORCEINLINE auto insert_or_visit(Value&& x,F&& f)
+    ->enable_if_is_value_type<Value,bool>
+  {
+    return emplace_or_visit_impl(
+      group_exclusive{},std::forward<F>(f),std::move(x));
+  }
+
+  template<typename Value,typename F>
+  BOOST_FORCEINLINE auto insert_or_cvisit(Value&& x,F&& f)
+    ->enable_if_is_value_type<Value,bool>
+  {
+    return emplace_or_visit_impl(
+      group_shared{},std::forward<F>(f),std::move(x));
+  }
+
+  template<typename Key>
+  BOOST_FORCEINLINE std::size_t erase(const Key& x)
+  {
+    return erase_if(x,[](const value_type&){return true;});
+  }
+
+  template<typename Key,typename F>
+  BOOST_FORCEINLINE auto erase_if(const Key& x,F&& f)->typename std::enable_if<
+    !is_execution_policy<Key>::value,std::size_t>::type
+  {
+    auto        lck=shared_access();
+    auto        hash=this->hash_for(x);
+    std::size_t res=0;
+    unprotected_internal_visit(
+      group_exclusive{},x,this->position_for(hash),hash,
+      [&,this](group_type* pg,unsigned int n,element_type* p)
+      {
+        if(f(cast_for(group_exclusive{},type_policy::value_from(*p)))){
+          super::erase(pg,n,p);
+          res=1;
+        }
+      });
+    return res;
+  }
+
+  template<typename F>
+  std::size_t erase_if(F&& f)
+  {
+    auto        lck=shared_access();
+    std::size_t res=0;
+    for_all_elements(
+      group_exclusive{},
+      [&,this](group_type* pg,unsigned int n,element_type* p){
+        if(f(cast_for(group_exclusive{},type_policy::value_from(*p)))){
+          super::erase(pg,n,p);
+          ++res;
+        }
+      });
+    return res;
+  }
+
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+  template<typename ExecutionPolicy,typename F>
+  auto erase_if(ExecutionPolicy&& policy,F&& f)->typename std::enable_if<
+    is_execution_policy<ExecutionPolicy>::value,void>::type
+  {
+    auto lck=shared_access();
+    for_all_elements(
+      group_exclusive{},std::forward<ExecutionPolicy>(policy),
+      [&,this](group_type* pg,unsigned int n,element_type* p){
+        if(f(cast_for(group_exclusive{},type_policy::value_from(*p)))){
+          super::erase(pg,n,p);
+        }
+      });
+  }
+#endif
+
+  void swap(concurrent_table& x)
+    noexcept(noexcept(std::declval<super&>().swap(std::declval<super&>())))
+  {
+    auto lck=exclusive_access(*this,x);
+    super::swap(x);
+  }
+
+  void clear()noexcept
+  {
+    auto lck=exclusive_access();
+    super::clear();
+  }
+
+  // TODO: should we accept different allocator too?
+  template<typename Hash2,typename Pred2>
+  size_type merge(concurrent_table<TypePolicy,Hash2,Pred2,Allocator>& x)
+  {
+    using merge_table_type=concurrent_table<TypePolicy,Hash2,Pred2,Allocator>;
+    using super2=typename merge_table_type::super;
+
+    // for clang
+    boost::ignore_unused<super2>();
+
+    auto      lck=exclusive_access(*this,x);
+    size_type s=super::size();
+    x.super2::for_all_elements( /* super2::for_all_elements -> unprotected */
+      [&,this](group_type* pg,unsigned int n,element_type* p){
+        typename merge_table_type::erase_on_exit e{x,pg,n,p};
+        if(!unprotected_emplace(type_policy::move(*p)))e.rollback();
+      });
+    return size_type{super::size()-s};
+  }
+
+  template<typename Hash2,typename Pred2>
+  void merge(concurrent_table<TypePolicy,Hash2,Pred2,Allocator>&& x){merge(x);}
+
+  hasher hash_function()const
+  {
+    auto lck=shared_access();
+    return super::hash_function();
+  }
+
+  key_equal key_eq()const
+  {
+    auto lck=shared_access();
+    return super::key_eq();
+  }
+
+  template<typename Key>
+  BOOST_FORCEINLINE std::size_t count(Key&& x)const
+  {
+    return (std::size_t)contains(std::forward<Key>(x));
+  }
+
+  template<typename Key>
+  BOOST_FORCEINLINE bool contains(Key&& x)const
+  {
+    return visit(std::forward<Key>(x),[](const value_type&){})!=0;
+  }
+
+  std::size_t capacity()const noexcept
+  {
+    auto lck=shared_access();
+    return super::capacity();
+  }
+
+  float load_factor()const noexcept
+  {
+    auto lck=shared_access();
+    if(super::capacity()==0)return 0;
+    else                    return float(unprotected_size())/
+                                   float(super::capacity());
+  }
+
+  using super::max_load_factor;
+
+  std::size_t max_load()const noexcept
+  {
+    auto lck=shared_access();
+    return super::max_load();
+  }
+
+  void rehash(std::size_t n)
+  {
+    auto lck=exclusive_access();
+    super::rehash(n);
+  }
+
+  void reserve(std::size_t n)
+  {
+    auto lck=exclusive_access();
+    super::reserve(n);
+  }
+
+  template<typename Predicate>
+  friend std::size_t erase_if(concurrent_table& x,Predicate&& pr)
+  {
+    return x.erase_if(std::forward<Predicate>(pr));
+  }
+
+  friend bool operator==(const concurrent_table& x,const concurrent_table& y)
+  {
+    auto lck=exclusive_access(x,y);
+    return static_cast<const super&>(x)==static_cast<const super&>(y);
+  }
+
+  friend bool operator!=(const concurrent_table& x,const concurrent_table& y)
+  {
+    return !(x==y);
+  }
+
+private:
+  using mutex_type=rw_spinlock;
+  using multimutex_type=multimutex<mutex_type,128>; // TODO: adapt 128 to the machine
+  using shared_lock_guard=shared_lock<mutex_type>;
+  using exclusive_lock_guard=lock_guard<multimutex_type>;
+  using exclusive_bilock_guard=scoped_bilock<multimutex_type>;
+  using group_shared_lock_guard=typename group_access::shared_lock_guard;
+  using group_exclusive_lock_guard=typename group_access::exclusive_lock_guard;
+  using group_insert_counter_type=typename group_access::insert_counter_type;
+
+  concurrent_table(const concurrent_table& x,exclusive_lock_guard):
+    super{x}{}
+  concurrent_table(concurrent_table&& x,exclusive_lock_guard):
+    super{std::move(x)}{}
+  concurrent_table(
+    const concurrent_table& x,const Allocator& al_,exclusive_lock_guard):
+    super{x,al_}{}
+  concurrent_table(
+    concurrent_table&& x,const Allocator& al_,exclusive_lock_guard):
+    super{std::move(x),al_}{}
+
+  inline shared_lock_guard shared_access()const
+  {
+    thread_local auto id=(++thread_counter)%mutexes.size();
+
+    return shared_lock_guard{mutexes[id]};
+  }
+
+  inline exclusive_lock_guard exclusive_access()const
+  {
+    return exclusive_lock_guard{mutexes};
+  }
+
+  static inline exclusive_bilock_guard exclusive_access(
+    const concurrent_table& x,const concurrent_table& y)
+  {
+    return {x.mutexes,y.mutexes};
+  }
+
+  template<typename Hash2,typename Pred2>
+  static inline exclusive_bilock_guard exclusive_access(
+    const concurrent_table& x,
+    const concurrent_table<TypePolicy,Hash2,Pred2,Allocator>& y)
+  {
+    return {x.mutexes,y.mutexes};
+  }
+
+  /* Tag-dispatched shared/exclusive group access */
+
+  using group_shared=std::false_type;
+  using group_exclusive=std::true_type;
+
+  inline group_shared_lock_guard access(group_shared,std::size_t pos)const
+  {
+    return this->arrays.group_accesses[pos].shared_access();
+  }
+
+  inline group_exclusive_lock_guard access(
+    group_exclusive,std::size_t pos)const
+  {
+    return this->arrays.group_accesses[pos].exclusive_access();
+  }
+
+  inline group_insert_counter_type& insert_counter(std::size_t pos)const
+  {
+    return this->arrays.group_accesses[pos].insert_counter();
+  }
+
+  /* Const casts value_type& according to the level of group access for
+   * safe passing to visitation functions. When type_policy is set-like,
+   * access is always const regardless of group access.
+   */
+
+  static inline const value_type&
+  cast_for(group_shared,value_type& x){return x;}
+
+  static inline typename std::conditional<
+    std::is_same<key_type,value_type>::value,
+    const value_type&,
+    value_type&
+  >::type
+  cast_for(group_exclusive,value_type& x){return x;}
+
+  struct erase_on_exit
+  {
+    erase_on_exit(
+      concurrent_table& x_,
+      group_type* pg_,unsigned int pos_,element_type* p_):
+      x{x_},pg{pg_},pos{pos_},p{p_}{}
+    ~erase_on_exit(){if(!rollback_)x.super::erase(pg,pos,p);}
+
+    void rollback(){rollback_=true;}
+
+    concurrent_table &x;
+    group_type       *pg;
+    unsigned  int     pos;
+    element_type     *p;
+    bool              rollback_=false;
+  };
+
+  template<typename GroupAccessMode,typename Key,typename F>
+  BOOST_FORCEINLINE std::size_t visit_impl(
+    GroupAccessMode access_mode,const Key& x,F&& f)const
+  {
+    auto lck=shared_access();
+    auto hash=this->hash_for(x);
+    return unprotected_visit(
+      access_mode,x,this->position_for(hash),hash,std::forward<F>(f));
+  }
+
+  template<typename GroupAccessMode,typename F>
+  std::size_t visit_all_impl(GroupAccessMode access_mode,F&& f)const
+  {
+    auto lck=shared_access();
+    std::size_t res=0;
+    for_all_elements(access_mode,[&](element_type* p){
+      f(cast_for(access_mode,type_policy::value_from(*p)));
+      ++res;
+    });
+    return res;
+  }
+
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+  template<typename GroupAccessMode,typename ExecutionPolicy,typename F>
+  void visit_all_impl(
+    GroupAccessMode access_mode,ExecutionPolicy&& policy,F&& f)const
+  {
+    auto lck=shared_access();
+    for_all_elements(
+      access_mode,std::forward<ExecutionPolicy>(policy),
+      [&](element_type* p){
+        f(cast_for(access_mode,type_policy::value_from(*p)));
+      });
+  }
+#endif
+
+  template<typename GroupAccessMode,typename Key,typename F>
+  BOOST_FORCEINLINE std::size_t unprotected_visit(
+    GroupAccessMode access_mode,
+    const Key& x,std::size_t pos0,std::size_t hash,F&& f)const
+  {
+    return unprotected_internal_visit(
+      access_mode,x,pos0,hash,
+      [&](group_type*,unsigned int,element_type* p)
+        {f(cast_for(access_mode,type_policy::value_from(*p)));});
+  }
+
+#if defined(BOOST_MSVC)
+/* warning: forcing value to bool 'true' or 'false' in bool(pred()...) */
+#pragma warning(push)
+#pragma warning(disable:4800)
+#endif
+
+  template<typename GroupAccessMode,typename Key,typename F>
+  BOOST_FORCEINLINE std::size_t unprotected_internal_visit(
+    GroupAccessMode access_mode,
+    const Key& x,std::size_t pos0,std::size_t hash,F&& f)const
+  {    
+    prober pb(pos0);
+    do{
+      auto pos=pb.get();
+      auto pg=this->arrays.groups+pos;
+      auto mask=pg->match(hash);
+      if(mask){
+        auto p=this->arrays.elements+pos*N;
+        BOOST_UNORDERED_PREFETCH_ELEMENTS(p,N);
+        auto lck=access(access_mode,pos);
+        do{
+          auto n=unchecked_countr_zero(mask);
+          if(BOOST_LIKELY(
+            pg->is_occupied(n)&&bool(this->pred()(x,this->key_from(p[n]))))){
+            f(pg,n,p+n);
+            return 1;
+          }
+          mask&=mask-1;
+        }while(mask);
+      }
+      if(BOOST_LIKELY(pg->is_not_overflowed(hash))){
+        return 0;
+      }
+    }
+    while(BOOST_LIKELY(pb.next(this->arrays.groups_size_mask)));
+    return 0;
+  }
+
+#if defined(BOOST_MSVC)
+#pragma warning(pop) /* C4800 */
+#endif
+
+  std::size_t unprotected_size()const
+  {
+    std::size_t m=this->size_ctrl.ml;
+    std::size_t s=this->size_ctrl.size;
+    return s<=m?s:m;
+  }
+
+  template<typename... Args>
+  BOOST_FORCEINLINE bool construct_and_emplace(Args&&... args)
+  {
+    return construct_and_emplace_or_visit(
+      group_shared{},[](const value_type&){},std::forward<Args>(args)...);
+  }
+
+  struct call_construct_and_emplace_or_visit
+  {
+    template<typename... Args>
+    BOOST_FORCEINLINE bool operator()(
+      concurrent_table* this_,Args&&... args)const
+    {
+      return this_->construct_and_emplace_or_visit(
+        std::forward<Args>(args)...);
+    }
+  };
+
+  template<typename GroupAccessMode,typename... Args>
+  BOOST_FORCEINLINE bool construct_and_emplace_or_visit_flast(
+    GroupAccessMode access_mode,Args&&... args)
+  {
+    return mp11::tuple_apply(
+      call_construct_and_emplace_or_visit{},
+      std::tuple_cat(
+        std::make_tuple(this,access_mode),
+        tuple_rotate_right(std::forward_as_tuple(std::forward<Args>(args)...))
+      )
+    );
+  }
+
+  template<typename GroupAccessMode,typename F,typename... Args>
+  BOOST_FORCEINLINE bool construct_and_emplace_or_visit(
+    GroupAccessMode access_mode,F&& f,Args&&... args)
+  {
+    auto lck=shared_access();
+
+    auto x=alloc_make_insert_type<type_policy>(
+      this->al(),std::forward<Args>(args)...);
+    int res=unprotected_norehash_emplace_or_visit(
+      access_mode,std::forward<F>(f),type_policy::move(x.value()));
+    if(BOOST_LIKELY(res>=0))return res!=0;
+
+    lck.unlock();
+
+    rehash_if_full();
+    return noinline_emplace_or_visit(
+      access_mode,std::forward<F>(f),type_policy::move(x.value()));
+  }
+
+  template<typename... Args>
+  BOOST_FORCEINLINE bool emplace_impl(Args&&... args)
+  {
+    return emplace_or_visit_impl(
+      group_shared{},[](const value_type&){},std::forward<Args>(args)...);
+  }
+
+  template<typename GroupAccessMode,typename F,typename... Args>
+  BOOST_NOINLINE bool noinline_emplace_or_visit(
+    GroupAccessMode access_mode,F&& f,Args&&... args)
+  {
+    return emplace_or_visit_impl(
+      access_mode,std::forward<F>(f),std::forward<Args>(args)...);
+  }
+
+  struct call_emplace_or_visit_impl
+  {
+    template<typename... Args>
+    BOOST_FORCEINLINE bool operator()(
+      concurrent_table* this_,Args&&... args)const
+    {
+      return this_->emplace_or_visit_impl(std::forward<Args>(args)...);
+    }
+  };
+
+  template<typename GroupAccessMode,typename... Args>
+  BOOST_FORCEINLINE bool emplace_or_visit_flast(
+    GroupAccessMode access_mode,Args&&... args)
+  {
+    return mp11::tuple_apply(
+      call_emplace_or_visit_impl{},
+      std::tuple_cat(
+        std::make_tuple(this,access_mode),
+        tuple_rotate_right(std::forward_as_tuple(std::forward<Args>(args)...))
+      )
+    );
+  }
+
+  template<typename GroupAccessMode,typename F,typename... Args>
+  BOOST_FORCEINLINE bool emplace_or_visit_impl(
+    GroupAccessMode access_mode,F&& f,Args&&... args)
+  {
+    for(;;){
+      {
+        auto lck=shared_access();
+        int res=unprotected_norehash_emplace_or_visit(
+          access_mode,std::forward<F>(f),std::forward<Args>(args)...);
+        if(BOOST_LIKELY(res>=0))return res!=0;
+      }
+      rehash_if_full();
+    }
+  }
+
+  template<typename... Args>
+  BOOST_FORCEINLINE bool unprotected_emplace(Args&&... args)
+  {
+    const auto &k=this->key_from(std::forward<Args>(args)...);
+    auto        hash=this->hash_for(k);
+    auto        pos0=this->position_for(hash);
+
+    if(this->find(k,pos0,hash))return false;
+
+    if(BOOST_LIKELY(this->size_ctrl.size<this->size_ctrl.ml)){
+      this->unchecked_emplace_at(pos0,hash,std::forward<Args>(args)...);
+    }
+    else{
+      this->unchecked_emplace_with_rehash(hash,std::forward<Args>(args)...);
+    }
+    return true;
+  }
+
+  struct reserve_size
+  {
+    reserve_size(concurrent_table& x_):x{x_}
+    {
+      size_=++x.size_ctrl.size;
+    }
+
+    ~reserve_size()
+    {
+      if(!commit_)--x.size_ctrl.size;
+    }
+
+    bool succeeded()const{return size_<=x.size_ctrl.ml;}
+
+    void commit(){commit_=true;}
+
+    concurrent_table &x;
+    std::size_t       size_;
+    bool              commit_=false;
+  };
+
+  struct reserve_slot
+  {
+    reserve_slot(group_type* pg_,std::size_t pos_,std::size_t hash):
+      pg{pg_},pos{pos_}
+    {
+      pg->set(pos,hash);
+    }
+
+    ~reserve_slot()
+    {
+      if(!commit_)pg->reset(pos);
+    }
+
+    void commit(){commit_=true;}
+
+    group_type  *pg;
+    std::size_t pos;
+    bool        commit_=false;
+  };
+
+  template<typename GroupAccessMode,typename F,typename... Args>
+  BOOST_FORCEINLINE int
+  unprotected_norehash_emplace_or_visit(
+    GroupAccessMode access_mode,F&& f,Args&&... args)
+  {
+    const auto &k=this->key_from(std::forward<Args>(args)...);
+    auto        hash=this->hash_for(k);
+    auto        pos0=this->position_for(hash);
+
+    for(;;){
+    startover:
+      boost::uint32_t counter=insert_counter(pos0);
+      if(unprotected_visit(
+        access_mode,k,pos0,hash,std::forward<F>(f)))return 0;
+
+      reserve_size rsize(*this);
+      if(BOOST_LIKELY(rsize.succeeded())){
+        for(prober pb(pos0);;pb.next(this->arrays.groups_size_mask)){
+          auto pos=pb.get();
+          auto pg=this->arrays.groups+pos;
+          auto lck=access(group_exclusive{},pos);
+          auto mask=pg->match_available();
+          if(BOOST_LIKELY(mask!=0)){
+            auto n=unchecked_countr_zero(mask);
+            reserve_slot rslot{pg,n,hash};
+            if(BOOST_UNLIKELY(insert_counter(pos0)++!=counter)){
+              /* other thread inserted from pos0, need to start over */
+              goto startover;
+            }
+            auto p=this->arrays.elements+pos*N+n;
+            this->construct_element(p,std::forward<Args>(args)...);
+            rslot.commit();
+            rsize.commit();
+            return 1;
+          }
+          pg->mark_overflow(hash);
+        }
+      }
+      else return -1;
+    }
+  }
+
+  void rehash_if_full()
+  {
+    auto lck=exclusive_access();
+    if(this->size_ctrl.size==this->size_ctrl.ml){
+      this->unchecked_rehash_for_growth();
+    }
+  }
+
+  template<typename GroupAccessMode,typename F>
+  auto for_all_elements(GroupAccessMode access_mode,F f)const
+    ->decltype(f(nullptr),void())
+  {
+    for_all_elements(
+      access_mode,[&](group_type*,unsigned int,element_type* p){f(p);});
+  }
+
+  template<typename GroupAccessMode,typename F>
+  auto for_all_elements(GroupAccessMode access_mode,F f)const
+    ->decltype(f(nullptr,0,nullptr),void())
+  {
+    auto p=this->arrays.elements;
+    if(!p)return;
+    for(auto pg=this->arrays.groups,last=pg+this->arrays.groups_size_mask+1;
+        pg!=last;++pg,p+=N){
+      auto lck=access(access_mode,(std::size_t)(pg-this->arrays.groups));
+      auto mask=this->match_really_occupied(pg,last);
+      while(mask){
+        auto n=unchecked_countr_zero(mask);
+        f(pg,n,p+n);
+        mask&=mask-1;
+      }
+    }
+  }
+
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+  template<typename GroupAccessMode,typename ExecutionPolicy,typename F>
+  auto for_all_elements(
+    GroupAccessMode access_mode,ExecutionPolicy&& policy,F f)const
+    ->decltype(f(nullptr),void())
+  {
+    for_all_elements(
+      access_mode,std::forward<ExecutionPolicy>(policy),
+      [&](group_type*,unsigned int,element_type* p){f(p);});
+  }
+
+  template<typename GroupAccessMode,typename ExecutionPolicy,typename F>
+  auto for_all_elements(
+    GroupAccessMode access_mode,ExecutionPolicy&& policy,F f)const
+    ->decltype(f(nullptr,0,nullptr),void())
+  {
+    if(!this->arrays.elements)return;
+    auto first=this->arrays.groups,
+         last=first+this->arrays.groups_size_mask+1;
+    std::for_each(std::forward<ExecutionPolicy>(policy),first,last,
+      [&,this](group_type& g){
+        std::size_t pos=static_cast<std::size_t>(&g-first);
+        auto        p=this->arrays.elements+pos*N;
+        auto        lck=access(access_mode,pos);
+        auto        mask=this->match_really_occupied(&g,last);
+        while(mask){
+          auto n=unchecked_countr_zero(mask);
+          f(&g,n,p+n);
+          mask&=mask-1;
+        }
+      }
+    );
+  }
+#endif
+
+  static std::atomic<std::size_t> thread_counter;
+  mutable multimutex_type         mutexes;
+};
+
+template<typename T,typename H,typename P,typename A>
+std::atomic<std::size_t> concurrent_table<T,H,P,A>::thread_counter={};
+
+#if defined(BOOST_MSVC)
+#pragma warning(pop) /* C4714 */
+#endif
+
+#include <boost/unordered/detail/foa/restore_wshadow.hpp>
+
+} /* namespace foa */
+} /* namespace detail */
+} /* namespace unordered */
+} /* namespace boost */
+
+#endif
diff --git a/include/boost/unordered/detail/foa.hpp b/include/boost/unordered/detail/foa/core.hpp
similarity index 71%
rename from include/boost/unordered/detail/foa.hpp
rename to include/boost/unordered/detail/foa/core.hpp
index 30a27272..fbe3aa59 100644
--- a/include/boost/unordered/detail/foa.hpp
+++ b/include/boost/unordered/detail/foa/core.hpp
@@ -1,4 +1,4 @@
-/* Fast open-addressing hash table.
+/* Common base for Boost.Unordered open-addressing tables.
  *
  * Copyright 2022-2023 Joaquin M Lopez Munoz.
  * Copyright 2023 Christian Mazakas.
@@ -9,8 +9,8 @@
  * See https://www.boost.org/libs/unordered for library home page.
  */
 
-#ifndef BOOST_UNORDERED_DETAIL_FOA_HPP
-#define BOOST_UNORDERED_DETAIL_FOA_HPP
+#ifndef BOOST_UNORDERED_DETAIL_FOA_CORE_HPP
+#define BOOST_UNORDERED_DETAIL_FOA_CORE_HPP
 
 #include <boost/assert.hpp>
 #include <boost/config.hpp>
@@ -22,19 +22,21 @@
 #include <boost/core/pointer_traits.hpp>
 #include <boost/cstdint.hpp>
 #include <boost/predef.h>
+#include <boost/static_assert.hpp>
+#include <boost/type_traits/has_trivial_constructor.hpp>
 #include <boost/type_traits/has_trivial_copy.hpp>
+#include <boost/type_traits/has_trivial_assign.hpp>
 #include <boost/type_traits/is_nothrow_swappable.hpp>
 #include <boost/unordered/detail/narrow_cast.hpp>
-#include <boost/unordered/detail/xmx.hpp>
 #include <boost/unordered/detail/mulx.hpp>
 #include <boost/unordered/hash_traits.hpp>
 #include <climits>
 #include <cmath>
 #include <cstddef>
 #include <cstring>
-#include <iterator>
 #include <limits>
 #include <memory>
+#include <new>
 #include <tuple>
 #include <type_traits>
 #include <utility>
@@ -84,6 +86,53 @@
   }while(0)
 #endif
 
+/* We use BOOST_UNORDERED_PREFETCH[_ELEMENTS] macros rather than proper
+ * functions because of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109985
+ */
+
+#if defined(BOOST_GCC)||defined(BOOST_CLANG)
+#define BOOST_UNORDERED_PREFETCH(p) __builtin_prefetch((const char*)(p))
+#elif defined(BOOST_UNORDERED_SSE2)
+#define BOOST_UNORDERED_PREFETCH(p) _mm_prefetch((const char*)(p),_MM_HINT_T0)
+#else
+#define BOOST_UNORDERED_PREFETCH(p) ((void)0)
+#endif
+
+/* We have experimentally confirmed that ARM architectures get a higher
+ * speedup when around the first half of the element slots in a group are
+ * prefetched, whereas for Intel just the first cache line is best.
+ * Please report back if you find better tunings for some particular
+ * architectures.
+ */
+
+#if BOOST_ARCH_ARM
+/* Cache line size can't be known at compile time, so we settle on
+ * the very frequent value of 64B.
+ */
+
+#define BOOST_UNORDERED_PREFETCH_ELEMENTS(p,N)                          \
+  do{                                                                   \
+    auto           BOOST_UNORDERED_P=(p);                               \
+    constexpr int  cache_line=64;                                       \
+    const char    *p0=reinterpret_cast<const char*>(BOOST_UNORDERED_P), \
+                  *p1=p0+sizeof(*BOOST_UNORDERED_P)*(N)/2;              \
+    for(;p0<p1;p0+=cache_line)BOOST_UNORDERED_PREFETCH(p0);             \
+  }while(0)
+#else
+#define BOOST_UNORDERED_PREFETCH_ELEMENTS(p,N) BOOST_UNORDERED_PREFETCH(p)
+#endif
+
+#ifdef __has_feature
+#define BOOST_UNORDERED_HAS_FEATURE(x) __has_feature(x)
+#else
+#define BOOST_UNORDERED_HAS_FEATURE(x) 0
+#endif
+
+#if BOOST_UNORDERED_HAS_FEATURE(thread_sanitizer)|| \
+    defined(__SANITIZE_THREAD__)
+#define BOOST_UNORDERED_THREAD_SANITIZER
+#endif
+
 #define BOOST_UNORDERED_STATIC_ASSERT_HASH_PRED(Hash, Pred)                    \
   static_assert(boost::is_nothrow_swappable<Hash>::value,                      \
     "Template parameter Hash is required to be nothrow Swappable.");           \
@@ -95,10 +144,12 @@ namespace unordered{
 namespace detail{
 namespace foa{
 
-static const std::size_t default_bucket_count = 0;
+static constexpr std::size_t default_bucket_count=0;
 
-/* foa::table is an open-addressing hash table serving as the foundational core
- * of boost::unordered_flat_[map|set]. Its main internal design aspects are:
+/* foa::table_core is the common base of foa::table and foa::concurrent_table,
+ * which in their turn serve as the foundational core of
+ * boost::unordered_(flat|node)_(map|set) and boost::concurrent_flat_map,
+ * respectively. Its main internal design aspects are:
  * 
  *   - Element slots are logically split into groups of size N=15. The number
  *     of groups is always a power of two, so the number of allocated slots
@@ -151,24 +202,32 @@ static const std::size_t default_bucket_count = 0;
  * "logical" 128-bit word, and so forth. With this layout, match can be
  * implemented with 4 ANDs, 3 shifts, 2 XORs, 1 OR and 1 NOT.
  * 
- * group15 has no user-defined ctor so that it's a trivial type and can be
- * initialized via memset etc. Where needed, group15::initialize sets the
- * metadata to all zeros.
+ * IntegralWrapper<Integral> is used to implement group15's underlying
+ * metadata: it behaves as a plain integral for foa::table or introduces
+ * atomic ops for foa::concurrent_table. If IntegralWrapper<...> is trivially
+ * constructible, so is group15, in which case it can be initialized via memset
+ * etc. Where needed, group15::initialize resets the metadata to the all
+ * zeros (default state).
  */
 
 #if defined(BOOST_UNORDERED_SSE2)
 
+template<template<typename> class IntegralWrapper>
 struct group15
 {
-  static constexpr int N=15;
-  static constexpr bool regular_layout=true;
+  static constexpr std::size_t N=15;
+  static constexpr bool        regular_layout=true;
 
   struct dummy_group_type
   {
     alignas(16) unsigned char storage[N+1]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0};
   };
 
-  inline void initialize(){m=_mm_setzero_si128();}
+  inline void initialize()
+  {
+    _mm_store_si128(
+      reinterpret_cast<__m128i*>(m),_mm_setzero_si128());
+  }
 
   inline void set(std::size_t pos,std::size_t hash)
   {
@@ -200,13 +259,13 @@ struct group15
 
   static inline void reset(unsigned char* pc)
   {
-    *pc=available_;
+    *reinterpret_cast<slot_type*>(pc)=available_;
   }
 
   inline int match(std::size_t hash)const
   {
     return _mm_movemask_epi8(
-      _mm_cmpeq_epi8(m,_mm_set1_epi32(match_word(hash))))&0x7FFF;
+      _mm_cmpeq_epi8(load_metadata(),_mm_set1_epi32(match_word(hash))))&0x7FFF;
   }
 
   inline bool is_not_overflowed(std::size_t hash)const
@@ -218,11 +277,7 @@ struct group15
 
   inline void mark_overflow(std::size_t hash)
   {
-#if BOOST_WORKAROUND(BOOST_GCC, >= 50000 && BOOST_GCC < 60000)
-    overflow() = static_cast<unsigned char>( overflow() | static_cast<unsigned char>(1<<(hash%8)) );
-#else
     overflow()|=static_cast<unsigned char>(1<<(hash%8));
-#endif
   }
 
   static inline bool maybe_caused_overflow(unsigned char* pc)
@@ -235,12 +290,18 @@ struct group15
   inline int match_available()const
   {
     return _mm_movemask_epi8(
-      _mm_cmpeq_epi8(m,_mm_setzero_si128()))&0x7FFF;
+      _mm_cmpeq_epi8(load_metadata(),_mm_setzero_si128()))&0x7FFF;
+  }
+
+  inline bool is_occupied(std::size_t pos)const
+  {
+    BOOST_ASSERT(pos<N);
+    return at(pos)!=available_;
   }
 
   static inline bool is_occupied(unsigned char* pc)noexcept
   {
-    return *pc!=available_;
+    return *reinterpret_cast<slot_type*>(pc)!=available_;
   }
 
   inline int match_occupied()const
@@ -248,15 +309,30 @@ struct group15
     return (~match_available())&0x7FFF;
   }
 
-  inline int match_really_occupied()const /* excluding sentinel */
-  {
-    return at(N-1)==sentinel_?match_occupied()&0x3FFF:match_occupied();
-  }
-
 private:
+  using slot_type=IntegralWrapper<unsigned char>;
+  BOOST_STATIC_ASSERT(sizeof(slot_type)==1);
+
   static constexpr unsigned char available_=0,
                                  sentinel_=1;
 
+  inline __m128i load_metadata()const
+  {
+#if defined(BOOST_UNORDERED_THREAD_SANITIZER)
+    /* ThreadSanitizer complains on 1-byte atomic writes combined with
+     * 16-byte atomic reads.
+     */
+
+    return _mm_set_epi8(
+      (char)m[15],(char)m[14],(char)m[13],(char)m[12],
+      (char)m[11],(char)m[10],(char)m[ 9],(char)m[ 8],
+      (char)m[ 7],(char)m[ 6],(char)m[ 5],(char)m[ 4],
+      (char)m[ 3],(char)m[ 2],(char)m[ 1],(char)m[ 0]);
+#else
+    return _mm_load_si128(reinterpret_cast<const __m128i*>(m));
+#endif
+  }
+
   inline static int match_word(std::size_t hash)
   {
     static constexpr boost::uint32_t word[]=
@@ -303,42 +379,46 @@ private:
     return narrow_cast<unsigned char>(match_word(hash));
   }
 
-  inline unsigned char& at(std::size_t pos)
+  inline slot_type& at(std::size_t pos)
   {
-    return reinterpret_cast<unsigned char*>(&m)[pos];
+    return m[pos];
   }
 
-  inline unsigned char at(std::size_t pos)const
+  inline const slot_type& at(std::size_t pos)const
   {
-    return reinterpret_cast<const unsigned char*>(&m)[pos];
+    return m[pos];
   }
 
-  inline unsigned char& overflow()
+  inline slot_type& overflow()
   {
     return at(N);
   }
 
-  inline unsigned char overflow()const
+  inline const slot_type& overflow()const
   {
     return at(N);
   }
 
-  alignas(16) __m128i m;
+  alignas(16) slot_type m[16];
 };
 
 #elif defined(BOOST_UNORDERED_LITTLE_ENDIAN_NEON)
 
+template<template<typename> class IntegralWrapper>
 struct group15
 {
-  static constexpr int N=15;
-  static constexpr bool regular_layout=true;
+  static constexpr std::size_t N=15;
+  static constexpr bool        regular_layout=true;
 
   struct dummy_group_type
   {
     alignas(16) unsigned char storage[N+1]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0};
   };
 
-  inline void initialize(){m=vdupq_n_s8(0);}
+  inline void initialize()
+  {
+    vst1q_u8(reinterpret_cast<uint8_t*>(m),vdupq_n_u8(0));
+  }
 
   inline void set(std::size_t pos,std::size_t hash)
   {
@@ -359,7 +439,7 @@ struct group15
 
   static inline bool is_sentinel(unsigned char* pc)noexcept
   {
-    return *pc==sentinel_;
+    return *reinterpret_cast<slot_type*>(pc)==sentinel_;
   }
 
   inline void reset(std::size_t pos)
@@ -370,13 +450,13 @@ struct group15
 
   static inline void reset(unsigned char* pc)
   {
-    *pc=available_;
+    *reinterpret_cast<slot_type*>(pc)=available_;
   }
 
   inline int match(std::size_t hash)const
   {
-    return simde_mm_movemask_epi8(vceqq_s8(
-      m,vdupq_n_s8(static_cast<signed char>(reduced_hash(hash)))))&0x7FFF;
+    return simde_mm_movemask_epi8(vceqq_u8(
+      load_metadata(),vdupq_n_u8(reduced_hash(hash))))&0x7FFF;
   }
 
   inline bool is_not_overflowed(std::size_t hash)const
@@ -400,29 +480,50 @@ struct group15
 
   inline int match_available()const
   {
-    return simde_mm_movemask_epi8(vceqq_s8(m,vdupq_n_s8(0)))&0x7FFF;
+    return simde_mm_movemask_epi8(vceqq_u8(
+      load_metadata(),vdupq_n_u8(0)))&0x7FFF;
+  }
+
+  inline bool is_occupied(std::size_t pos)const
+  {
+    BOOST_ASSERT(pos<N);
+    return at(pos)!=available_;
   }
 
   static inline bool is_occupied(unsigned char* pc)noexcept
   {
-    return *pc!=available_;
+    return *reinterpret_cast<slot_type*>(pc)!=available_;
   }
 
   inline int match_occupied()const
   {
-    return simde_mm_movemask_epi8(
-      vcgtq_u8(vreinterpretq_u8_s8(m),vdupq_n_u8(0)))&0x7FFF;
-  }
-
-  inline int match_really_occupied()const /* excluding sentinel */
-  {
-    return at(N-1)==sentinel_?match_occupied()&0x3FFF:match_occupied();
+    return simde_mm_movemask_epi8(vcgtq_u8(
+      load_metadata(),vdupq_n_u8(0)))&0x7FFF;
   }
 
 private:
+  using slot_type=IntegralWrapper<unsigned char>;
+  BOOST_STATIC_ASSERT(sizeof(slot_type)==1);
+
   static constexpr unsigned char available_=0,
                                  sentinel_=1;
 
+  inline uint8x16_t load_metadata()const
+  {
+#if defined(BOOST_UNORDERED_THREAD_SANITIZER)
+    /* ThreadSanitizer complains on 1-byte atomic writes combined with
+     * 16-byte atomic reads.
+     */
+
+    alignas(16) uint8_t data[16]={
+      m[ 0],m[ 1],m[ 2],m[ 3],m[ 4],m[ 5],m[ 6],m[ 7],
+      m[ 8],m[ 9],m[10],m[11],m[12],m[13],m[14],m[15]};
+    return vld1q_u8(data);
+#else
+    return vld1q_u8(reinterpret_cast<const uint8_t*>(m));
+#endif
+  }
+
   inline static unsigned char reduced_hash(std::size_t hash)
   {
     static constexpr unsigned char table[]={
@@ -472,35 +573,36 @@ private:
 #endif
   }
 
-  inline unsigned char& at(std::size_t pos)
+  inline slot_type& at(std::size_t pos)
   {
-    return reinterpret_cast<unsigned char*>(&m)[pos];
+    return m[pos];
   }
 
-  inline unsigned char at(std::size_t pos)const
+  inline const slot_type& at(std::size_t pos)const
   {
-    return reinterpret_cast<const unsigned char*>(&m)[pos];
+    return m[pos];
   }
 
-  inline unsigned char& overflow()
+  inline slot_type& overflow()
   {
     return at(N);
   }
 
-  inline unsigned char overflow()const
+  inline const slot_type& overflow()const
   {
     return at(N);
   }
 
-  alignas(16) int8x16_t m;
+  alignas(16) slot_type m[16];
 };
 
 #else /* non-SIMD */
 
+template<template<typename> class IntegralWrapper>
 struct group15
 {
-  static constexpr int N=15;
-  static constexpr bool regular_layout=false;
+  static constexpr std::size_t N=15;
+  static constexpr bool        regular_layout=false;
 
   struct dummy_group_type
   {
@@ -575,6 +677,13 @@ struct group15
     return y&0x7FFF;
   }
 
+  inline bool is_occupied(std::size_t pos)const
+  {
+    BOOST_ASSERT(pos<N);
+    boost::uint64_t x=m[0]|m[1];
+    return (x&(0x0001000100010001ull<<pos))!=0;
+  }
+
   inline int match_occupied()const
   {
     boost::uint64_t x=m[0]|m[1];
@@ -583,12 +692,10 @@ struct group15
     return y&0x7FFF;
   }
 
-  inline int match_really_occupied()const /* excluding sentinel */
-  {
-    return ~(match_impl(0)|match_impl(1))&0x7FFF;
-  }
-
 private:
+  using word_type=IntegralWrapper<uint64_t>;
+  BOOST_STATIC_ASSERT(sizeof(word_type)==8);
+
   static constexpr unsigned char available_=0,
                                  sentinel_=1;
 
@@ -623,7 +730,7 @@ private:
     set_impl(m[1],pos,n>>4);
   }
 
-  static inline void set_impl(boost::uint64_t& x,std::size_t pos,std::size_t n)
+  static inline void set_impl(word_type& x,std::size_t pos,std::size_t n)
   {
     static constexpr boost::uint64_t mask[]=
     {
@@ -669,14 +776,14 @@ private:
     return          y&0x7FFF;
   }
 
-  alignas(16) boost::uint64_t m[2];
+  alignas(16) word_type m[2];
 };
 
 #endif
 
-/* foa::table uses a size policy to obtain the permissible sizes of the group
- * array (and, by implication, the element array) and to do the hash->group
- * mapping.
+/* foa::table_core uses a size policy to obtain the permissible sizes of the
+ * group array (and, by implication, the element array) and to do the
+ * hash->group mapping.
  * 
  *   - size_index(n) returns an unspecified "index" number used in other policy
  *     operations.
@@ -762,12 +869,12 @@ private:
   std::size_t pos,step=0;
 };
 
-/* Mixing policies: no_mix is the identity function, xmx_mix uses the
- * xmx function defined in <boost/unordered/detail/xmx.hpp>, and mulx_mix
+/* Mixing policies: no_mix is the identity function, and mulx_mix
  * uses the mulx function from <boost/unordered/detail/mulx.hpp>.
  *
- * foa::table mixes hash results with mulx_mix unless the hash is marked as
- * avalanching, i.e. of good quality (see <boost/unordered/hash_traits.hpp>).
+ * foa::table_core mixes hash results with mulx_mix unless the hash is marked
+ * as avalanching, i.e. of good quality
+ * (see <boost/unordered/hash_traits.hpp>).
  */
 
 struct no_mix
@@ -779,15 +886,6 @@ struct no_mix
   }
 };
 
-struct xmx_mix
-{
-  template<typename Hash,typename T>
-  static inline std::size_t mix(const Hash& h,const T& x)
-  {
-    return xmx(h(x));
-  }
-};
-
 struct mulx_mix
 {
   template<typename Hash,typename T>
@@ -813,151 +911,6 @@ inline unsigned int unchecked_countr_zero(int x)
 #endif
 }
 
-template<typename,typename,typename,typename>
-class table;
-
-/* table_iterator keeps two pointers:
- * 
- *   - A pointer p to the element slot.
- *   - A pointer pc to the n-th byte of the associated group metadata, where n
- *     is the position of the element in the group.
- *
- * A simpler solution would have been to keep a pointer p to the element, a
- * pointer pg to the group, and the position n, but that would increase
- * sizeof(table_iterator) by 4/8 bytes. In order to make this compact
- * representation feasible, it is required that group objects are aligned
- * to their size, so that we can recover pg and n as
- * 
- *   - n = pc%sizeof(group)
- *   - pg = pc-n
- * 
- * (for explanatory purposes pg and pc are treated above as if they were memory
- * addresses rather than pointers).
- * 
- * p = nullptr is conventionally used to mark end() iterators.
- */
-
-/* internal conversion from const_iterator to iterator */
-class const_iterator_cast_tag {}; 
-
-template<typename TypePolicy,typename Group,bool Const>
-class table_iterator
-{
-  using type_policy=TypePolicy;
-  using table_element_type=typename type_policy::element_type;
-  using group_type=Group;
-  static constexpr auto N=group_type::N;
-  static constexpr auto regular_layout=group_type::regular_layout;
-
-public:
-  using difference_type=std::ptrdiff_t;
-  using value_type=typename type_policy::value_type;
-  using pointer=
-    typename std::conditional<Const,value_type const*,value_type*>::type;
-  using reference=
-    typename std::conditional<Const,value_type const&,value_type&>::type;
-  using iterator_category=std::forward_iterator_tag;
-  using element_type=
-    typename std::conditional<Const,value_type const,value_type>::type;
-
-  table_iterator()=default;
-  template<bool Const2,typename std::enable_if<!Const2>::type* =nullptr>
-  table_iterator(const table_iterator<TypePolicy,Group,Const2>& x):
-    pc{x.pc},p{x.p}{}
-  table_iterator(
-    const_iterator_cast_tag, const table_iterator<TypePolicy,Group,true>& x):
-    pc{x.pc},p{x.p}{}
-
-  inline reference operator*()const noexcept{return type_policy::value_from(*p);}
-  inline pointer operator->()const noexcept
-    {return std::addressof(type_policy::value_from(*p));}
-  inline table_iterator& operator++()noexcept{increment();return *this;}
-  inline table_iterator operator++(int)noexcept
-    {auto x=*this;increment();return x;}
-  friend inline bool operator==(
-    const table_iterator& x,const table_iterator& y)
-    {return x.p==y.p;}
-  friend inline bool operator!=(
-    const table_iterator& x,const table_iterator& y)
-    {return !(x==y);}
-
-private:
-  template<typename,typename,bool> friend class table_iterator;
-  template<typename,typename,typename,typename> friend class table;
-
-  table_iterator(Group* pg,std::size_t n,const table_element_type* p_):
-    pc{reinterpret_cast<unsigned char*>(const_cast<group_type*>(pg))+n},
-    p{const_cast<table_element_type*>(p_)}
-    {}
-
-  inline void increment()noexcept
-  {
-    BOOST_ASSERT(p!=nullptr);
-    increment(std::integral_constant<bool,regular_layout>{});
-  }
-
-  inline void increment(std::true_type /* regular layout */)noexcept
-  {
-    for(;;){
-      ++p;
-      if(reinterpret_cast<uintptr_t>(pc)%sizeof(group_type)==N-1){
-        pc+=sizeof(group_type)-(N-1);
-        break;
-      }
-      ++pc;
-      if(!group_type::is_occupied(pc))continue;
-      if(BOOST_UNLIKELY(group_type::is_sentinel(pc)))p=nullptr;
-      return;
-    }
-
-    for(;;){
-      int mask=reinterpret_cast<group_type*>(pc)->match_occupied();
-      if(mask!=0){
-        auto n=unchecked_countr_zero(mask);
-        if(BOOST_UNLIKELY(reinterpret_cast<group_type*>(pc)->is_sentinel(n))){
-          p=nullptr;
-        }
-        else{
-          pc+=n;
-          p+=n;
-        }
-        return;
-      }
-      pc+=sizeof(group_type);
-      p+=N;
-    }
-  }
-
-  inline void increment(std::false_type /* interleaved */)noexcept
-  {
-    std::size_t n0=reinterpret_cast<uintptr_t>(pc)%sizeof(group_type);
-    pc-=n0;
-
-    int mask=(
-      reinterpret_cast<group_type*>(pc)->match_occupied()>>(n0+1))<<(n0+1);
-    if(!mask){
-      do{
-        pc+=sizeof(group_type);
-        p+=N;
-      }
-      while((mask=reinterpret_cast<group_type*>(pc)->match_occupied())==0);
-    }
-
-    auto n=unchecked_countr_zero(mask);
-    if(BOOST_UNLIKELY(reinterpret_cast<group_type*>(pc)->is_sentinel(n))){
-      p=nullptr;
-    }
-    else{
-      pc+=n;
-      p-=n0;
-      p+=n;
-    }
-  }
-
-  unsigned char      *pc=nullptr;
-  table_element_type *p=nullptr;
-};
-
 /* table_arrays controls allocation, initialization and deallocation of
  * paired arrays of groups and element slots. Only one chunk of memory is
  * allocated to place both arrays: this is not done for efficiency reasons,
@@ -993,6 +946,9 @@ struct table_arrays
   static constexpr auto N=group_type::N;
   using size_policy=SizePolicy;
 
+  table_arrays(std::size_t gsi,std::size_t gsm,group_type *pg,value_type *pe):
+    groups_size_index{gsi},groups_size_mask{gsm},groups{pg},elements{pe}{}
+
   template<typename Allocator>
   static table_arrays new_(Allocator& al,std::size_t n)
   {
@@ -1021,11 +977,17 @@ struct table_arrays
           reinterpret_cast<uintptr_t>(p))%sizeof(group_type);
       arrays.groups=reinterpret_cast<group_type*>(p);
 
-      /* memset is faster/not slower than initializing groups individually.
-       * This assumes all zeros is group_type's default layout. 
-       */
-
-      std::memset(arrays.groups,0,sizeof(group_type)*groups_size);
+      initialize_groups(
+        arrays.groups,groups_size,
+        std::integral_constant<
+          bool,
+#if BOOST_WORKAROUND(BOOST_LIBSTDCXX_VERSION,<50000)
+        /* std::is_trivially_constructible not provided */
+        boost::has_trivial_constructor<group_type>::value
+#else
+        std::is_trivially_constructible<group_type>::value
+#endif  
+        >{});
       arrays.groups[groups_size-1].set_sentinel();
     }
     return arrays;
@@ -1061,6 +1023,25 @@ struct table_arrays
     return (buffer_bytes+sizeof(value_type)-1)/sizeof(value_type);
   }
 
+  static void initialize_groups(
+    group_type* groups_,std::size_t size,std::true_type /* memset */)
+  {
+    /* memset faster/not slower than manual, assumes all zeros is group_type's
+     * default layout.
+     * reinterpret_cast: GCC may complain about group_type not being trivially
+     * copy-assignable when we're relying on trivial copy constructibility.
+     */
+
+    std::memset(
+      reinterpret_cast<unsigned char*>(groups_),0,sizeof(group_type)*size);
+  }
+
+  static void initialize_groups(
+    group_type* groups_,std::size_t size,std::false_type /* manual */)
+  {
+    while(size--!=0)::new (groups_++) group_type();
+  }
+
   std::size_t  groups_size_index;
   std::size_t  groups_size_mask;
   group_type  *groups;
@@ -1093,18 +1074,6 @@ void swap_if(T& x,T& y){using std::swap; swap(x,y);}
 template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr>
 void swap_if(T&,T&){}
 
-inline void prefetch(const void* p)
-{
-  (void) p;
-#if defined(BOOST_GCC)||defined(BOOST_CLANG)
-  __builtin_prefetch((const char*)p);
-#elif defined(BOOST_UNORDERED_SSE2)
-  _mm_prefetch((const char*)p,_MM_HINT_T0);
-#endif    
-}
-
-struct try_emplace_args_t{};
-
 template<typename Allocator>
 struct is_std_allocator:std::false_type{};
 
@@ -1146,78 +1115,69 @@ _STL_RESTORE_DEPRECATED_WARNING
 #pragma warning(pop)
 #endif
 
-#if defined(BOOST_GCC)
-/* GCC's -Wshadow triggers at scenarios like this: 
- *
- *   struct foo{};
- *   template<typename Base>
- *   struct derived:Base
- *   {
- *     void f(){int foo;}
- *   };
- * 
- *   derived<foo>x;
- *   x.f(); // declaration of "foo" in derived::f shadows base type "foo"
- *
- * This makes shadowing warnings unavoidable in general when a class template
- * derives from user-provided classes, as is the case with table and
- * empty_value's below.
- */
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#endif
-
-#if defined(BOOST_MSVC)
-#pragma warning(push)
-#pragma warning(disable:4714) /* marked as __forceinline not inlined */
-#endif
-
-#if BOOST_WORKAROUND(BOOST_MSVC,<=1900)
-/* VS2015 marks as unreachable generic catch clauses around non-throwing
- * code.
- */
-#pragma warning(push)
-#pragma warning(disable:4702)
-#endif
-
 /* We expose the hard-coded max load factor so that tests can use it without
  * needing to pull it from an instantiated class template such as the table
- * class
+ * class.
  */
-constexpr static float const mlf = 0.875f;
+static constexpr float mlf=0.875f;
 
-template <class T>
-union uninitialized_storage
+template<typename Group,typename Element>
+struct table_locator
 {
-  T t_;
-  uninitialized_storage(){}
-  ~uninitialized_storage(){}
+  table_locator()=default;
+  table_locator(Group* pg_,unsigned int n_,Element* p_):pg{pg_},n{n_},p{p_}{}
+
+  explicit operator bool()const noexcept{return p!=nullptr;}
+
+  Group        *pg=nullptr;
+  unsigned int  n=0;
+  Element      *p=nullptr;
 };
 
-/* foa::table interface departs in a number of ways from that of C++ unordered
- * associative containers because it's not for end-user consumption
- * (boost::unordered_[flat|node]_[map|set]) wrappers complete it as
- * appropriate).
- *
- * The table supports two main modes of operation: node-based and flat. In the
- * node-based case, buckets store pointers to individually heap-allocated
- * elements. For flat, buckets directly store elements.
- *
- * For both tables:
- *
- *   - begin() is not O(1).
- *   - No bucket API.
- *   - Load factor is fixed and can't be set by the user.
- * 
- * For the inline table:
- *
- *   - value_type must be moveable.
- *   - Pointer stability is not kept under rehashing.
- *   - No extract API.
- *
- * The TypePolicy template parameter is used to generate instantiations
- * suitable for either maps or sets, and introduces non-standard init_type:
+struct try_emplace_args_t{};
+
+template<typename TypePolicy,typename Allocator,typename... Args>
+class alloc_cted_insert_type
+{
+  using emplace_type=typename std::conditional<
+    std::is_constructible<typename TypePolicy::init_type,Args...>::value,
+    typename TypePolicy::init_type,
+    typename TypePolicy::value_type
+  >::type;
+
+  using insert_type=typename std::conditional<
+    std::is_constructible<typename TypePolicy::value_type,emplace_type>::value,
+    emplace_type,typename TypePolicy::element_type
+  >::type;
+
+  alignas(insert_type) unsigned char storage[sizeof(insert_type)];
+  Allocator                          al;
+
+public:
+  alloc_cted_insert_type(const Allocator& al_,Args&&... args):al{al_}
+  {
+    TypePolicy::construct(al,data(),std::forward<Args>(args)...);
+  }
+
+  ~alloc_cted_insert_type()
+  {
+    TypePolicy::destroy(al,data());
+  }
+
+  insert_type* data(){return reinterpret_cast<insert_type*>(&storage);}
+  insert_type& value(){return *data();}
+};
+
+template<typename TypePolicy,typename Allocator,typename... Args>
+alloc_cted_insert_type<TypePolicy,Allocator,Args...>
+alloc_make_insert_type(const Allocator& al,Args&&... args)
+{
+  return {al,std::forward<Args>(args)...};
+}
+
+/* table_core. The TypePolicy template parameter is used to generate
+ * instantiations suitable for either maps or sets, and introduces non-standard
+ * init_type and element_type:
  *
  *   - TypePolicy::key_type and TypePolicy::value_type have the obvious
  *     meaning.
@@ -1246,34 +1206,47 @@ union uninitialized_storage
  *     decltype(TypePolicy::move(...)).
  *
  *   - TypePolicy::element_type is the type that table_arrays uses when
- *     allocating buckets. For flat containers, this is value_type. For node
- *     containers, this is a strong typedef to value_type*.
+ *     allocating buckets, which allows us to have flat and node container.
+ *     For flat containers, element_type is value_type. For node
+ *     containers, it is a strong typedef to value_type*.
  *
  *   - TypePolicy::value_from returns a mutable reference to value_type from
  *     a given element_type. This is used when elements of the table themselves
  *     need to be moved, such as during move construction/assignment when
  *     allocators are unequal and there is no propagation. For all other cases,
  *     the element_type itself is moved.
- *
- * try_emplace, erase and find support heterogenous lookup by default, that is,
- * without checking for any ::is_transparent typedefs --the checking is done by
- * boost::unordered_[flat|node]_[map|set].
  */
 
-template<typename TypePolicy,typename Hash,typename Pred,typename Allocator>
+#include <boost/unordered/detail/foa/ignore_wshadow.hpp>
+
+#if defined(BOOST_MSVC)
+#pragma warning(push)
+#pragma warning(disable:4714) /* marked as __forceinline not inlined */
+#endif
+
+#if BOOST_WORKAROUND(BOOST_MSVC,<=1900)
+/* VS2015 marks as unreachable generic catch clauses around non-throwing
+ * code.
+ */
+#pragma warning(push)
+#pragma warning(disable:4702)
+#endif
+
+template<
+  typename TypePolicy,typename Group,template<typename...> class Arrays,
+  typename SizeControl,typename Hash,typename Pred,typename Allocator
+>
 class 
 
 #if defined(_MSC_VER)&&_MSC_FULL_VER>=190023918
 __declspec(empty_bases) /* activate EBO with multiple inheritance */
 #endif
 
-table:empty_value<Hash,0>,empty_value<Pred,1>,empty_value<Allocator,2>
+table_core:empty_value<Hash,0>,empty_value<Pred,1>,empty_value<Allocator,2>
 {
-  using hash_base=empty_value<Hash,0>;
-  using pred_base=empty_value<Pred,1>;
-  using allocator_base=empty_value<Allocator,2>;
+public:
   using type_policy=TypePolicy;
-  using group_type=group15;
+  using group_type=Group;
   static constexpr auto N=group_type::N;
   using size_policy=pow2_size_policy;
   using prober=pow2_quadratic_prober;
@@ -1283,18 +1256,13 @@ table:empty_value<Hash,0>,empty_value<Pred,1>,empty_value<Allocator,2>
     mulx_mix
   >::type;
   using alloc_traits=boost::allocator_traits<Allocator>;
+  using element_type=typename type_policy::element_type;
+  using arrays_type=Arrays<element_type,group_type,size_policy>;
+  using size_ctrl_type=SizeControl;
 
-public:
   using key_type=typename type_policy::key_type;
   using init_type=typename type_policy::init_type;
   using value_type=typename type_policy::value_type;
-  using element_type=typename type_policy::element_type;
-
-private:
-  static constexpr bool has_mutable_iterator=
-    !std::is_same<key_type,value_type>::value;
-
-public:
   using hasher=Hash;
   using key_equal=Pred;
   using allocator_type=Allocator;
@@ -1304,24 +1272,20 @@ public:
   using const_reference=const value_type&;
   using size_type=std::size_t;
   using difference_type=std::ptrdiff_t;
-  using const_iterator=table_iterator<type_policy,group_type,true>;
-  using iterator=typename std::conditional<
-    has_mutable_iterator,
-    table_iterator<type_policy,group_type,false>,
-    const_iterator>::type;
+  using locator=table_locator<group_type,element_type>;
 
-  table(
-    std::size_t n=0,const Hash& h_=Hash(),const Pred& pred_=Pred(),
-    const Allocator& al_=Allocator()):
+  table_core(
+    std::size_t n=default_bucket_count,const Hash& h_=Hash(),
+    const Pred& pred_=Pred(),const Allocator& al_=Allocator()):
     hash_base{empty_init,h_},pred_base{empty_init,pred_},
-    allocator_base{empty_init,al_},size_{0},arrays(new_arrays(n)),
-    ml{initial_max_load()}
+    allocator_base{empty_init,al_},arrays(new_arrays(n)),
+    size_ctrl{initial_max_load(),0}
     {}
 
-  table(const table& x):
-    table{x,alloc_traits::select_on_container_copy_construction(x.al())}{}
+  table_core(const table_core& x):
+    table_core{x,alloc_traits::select_on_container_copy_construction(x.al())}{}
 
-  table(table&& x)
+  table_core(table_core&& x)
     noexcept(
       std::is_nothrow_move_constructible<Hash>::value&&
       std::is_nothrow_move_constructible<Pred>::value&&
@@ -1329,26 +1293,26 @@ public:
     hash_base{empty_init,std::move(x.h())},
     pred_base{empty_init,std::move(x.pred())},
     allocator_base{empty_init,std::move(x.al())},
-    size_{x.size_},arrays(x.arrays),ml{x.ml}
+    arrays(x.arrays),size_ctrl(x.size_ctrl)
   {
-    x.size_=0;
     x.arrays=x.new_arrays(0);
-    x.ml=x.initial_max_load();
+    x.size_ctrl.ml=x.initial_max_load();
+    x.size_ctrl.size=0;
   }
 
-  table(const table& x,const Allocator& al_):
-    table{std::size_t(std::ceil(float(x.size())/mlf)),x.h(),x.pred(),al_}
+  table_core(const table_core& x,const Allocator& al_):
+    table_core{std::size_t(std::ceil(float(x.size())/mlf)),x.h(),x.pred(),al_}
   {
     copy_elements_from(x);
   }
 
-  table(table&& x,const Allocator& al_):
-    table{0,std::move(x.h()),std::move(x.pred()),al_}
+  table_core(table_core&& x,const Allocator& al_):
+    table_core{std::move(x.h()),std::move(x.pred()),al_}
   {
     if(al()==x.al()){
-      std::swap(size_,x.size_);
-      std::swap(arrays,x.arrays);
-      std::swap(ml,x.ml);
+      using std::swap;
+      swap(arrays,x.arrays);
+      swap(size_ctrl,x.size_ctrl);
     }
     else{
       reserve(x.size());
@@ -1364,7 +1328,7 @@ public:
     }
   }
 
-  ~table()noexcept
+  ~table_core()noexcept
   {
     for_all_elements([this](element_type* p){
       destroy_element(p);
@@ -1372,7 +1336,7 @@ public:
     delete_arrays(arrays);
   }
 
-  table& operator=(const table& x)
+  table_core& operator=(const table_core& x)
   {
     BOOST_UNORDERED_STATIC_ASSERT_HASH_PRED(Hash, Pred)
 
@@ -1380,18 +1344,21 @@ public:
       alloc_traits::propagate_on_container_copy_assignment::value;
 
     if(this!=std::addressof(x)){
-      // if copy construction here winds up throwing, the container is still
-      // left intact so we perform these operations first
+      /* If copy construction here winds up throwing, the container is still
+       * left intact so we perform these operations first.
+       */
       hasher    tmp_h=x.h();
       key_equal tmp_p=x.pred();
 
-      // already noexcept, clear() before we swap the Hash, Pred just in case
-      // the clear() impl relies on them at some point in the future
-      clear(); 
+      /* already noexcept, clear() before we swap the Hash, Pred just in case
+       * the clear() impl relies on them at some point in the future.
+       */
+      clear();
 
-      // because we've asserted at compile-time that Hash and Pred are nothrow
-      // swappable, we can safely mutate our source container and maintain
-      // consistency between the Hash, Pred compatibility
+      /* Because we've asserted at compile-time that Hash and Pred are nothrow
+       * swappable, we can safely mutate our source container and maintain
+       * consistency between the Hash, Pred compatibility.
+       */
       using std::swap;
       swap(h(),tmp_h);
       swap(pred(),tmp_p);
@@ -1412,7 +1379,7 @@ public:
 #pragma warning(disable:4127) /* conditional expression is constant */
 #endif
 
-  table& operator=(table&& x)
+  table_core& operator=(table_core&& x)
     noexcept(
       alloc_traits::propagate_on_container_move_assignment::value||
       alloc_traits::is_always_equal::value)
@@ -1442,9 +1409,8 @@ public:
       if(pocma||al()==x.al()){
         reserve(0);
         move_assign_if<pocma>(al(),x.al());
-        swap(size_,x.size_);
         swap(arrays,x.arrays);
-        swap(ml,x.ml);
+        swap(size_ctrl,x.size_ctrl);
       }
       else{
         /* noshrink: favor memory reuse over tightness */
@@ -1469,109 +1435,71 @@ public:
 
   allocator_type get_allocator()const noexcept{return al();}
 
-  iterator begin()noexcept
-  {
-    iterator it{arrays.groups,0,arrays.elements};
-    if(arrays.elements&&!(arrays.groups[0].match_occupied()&0x1))++it;
-    return it;
-  }
-
-  const_iterator begin()const noexcept
-                   {return const_cast<table*>(this)->begin();}
-  iterator       end()noexcept{return {};}
-  const_iterator end()const noexcept{return const_cast<table*>(this)->end();}
-  const_iterator cbegin()const noexcept{return begin();}
-  const_iterator cend()const noexcept{return end();}
-
   bool        empty()const noexcept{return size()==0;}
-  std::size_t size()const noexcept{return size_;}
+  std::size_t size()const noexcept{return size_ctrl.size;}
   std::size_t max_size()const noexcept{return SIZE_MAX;}
 
-  template<typename... Args>
-  BOOST_FORCEINLINE std::pair<iterator,bool> emplace(Args&&... args)
+  BOOST_FORCEINLINE
+  void erase(group_type* pg,unsigned int pos,element_type* p)noexcept
   {
-    using emplace_type=typename std::conditional<
-      std::is_constructible<init_type,Args...>::value,
-      init_type,
-      value_type
-    >::type;
-
-    using insert_type=typename std::conditional<
-      std::is_constructible<
-        value_type,emplace_type>::value,
-      emplace_type,element_type
-    >::type;
-
-    uninitialized_storage<insert_type> s;
-    auto                              *p=std::addressof(s.t_);
-
-    type_policy::construct(al(),p,std::forward<Args>(args)...);
-
-    destroy_on_exit<insert_type> guard{al(),p};
-    return emplace_impl(type_policy::move(*p));
+    destroy_element(p);
+    recover_slot(pg,pos);
   }
 
-  template<typename Key,typename... Args>
-  BOOST_FORCEINLINE std::pair<iterator,bool> try_emplace(
-    Key&& x,Args&&... args)
-  {
-    return emplace_impl(
-      try_emplace_args_t{},std::forward<Key>(x),std::forward<Args>(args)...);
-  }
-
-  BOOST_FORCEINLINE std::pair<iterator,bool>
-  insert(const init_type& x){return emplace_impl(x);}
-
-  BOOST_FORCEINLINE std::pair<iterator,bool>
-  insert(init_type&& x){return emplace_impl(std::move(x));}
-
-  /* template<typename=void> tilts call ambiguities in favor of init_type */
-
-  template<typename=void>
-  BOOST_FORCEINLINE std::pair<iterator,bool>
-  insert(const value_type& x){return emplace_impl(x);}
-
-  template<typename=void>
-  BOOST_FORCEINLINE std::pair<iterator,bool>
-  insert(value_type&& x){return emplace_impl(std::move(x));}
-
-  template<typename T=element_type>
   BOOST_FORCEINLINE
-  typename std::enable_if<
-    !std::is_same<T,value_type>::value,
-    std::pair<iterator,bool>
-  >::type
-  insert(element_type&& x){return emplace_impl(std::move(x));}
-
-  template<
-    bool dependent_value=false,
-    typename std::enable_if<
-      has_mutable_iterator||dependent_value>::type* =nullptr
-  >
-  void erase(iterator pos)noexcept{return erase(const_iterator(pos));}
-
-  BOOST_FORCEINLINE
-  void erase(const_iterator pos)noexcept
+  void erase(unsigned char* pc,element_type* p)noexcept
   {
-    destroy_element(pos.p);
-    recover_slot(pos.pc);
+    destroy_element(p);
+    recover_slot(pc);
   }
 
   template<typename Key>
-  BOOST_FORCEINLINE
-  auto erase(Key&& x) -> typename std::enable_if<
-    !std::is_convertible<Key,iterator>::value&&
-    !std::is_convertible<Key,const_iterator>::value, std::size_t>::type
+  BOOST_FORCEINLINE locator find(const Key& x)const
   {
-    auto it=find(x);
-    if(it!=end()){
-      erase(it);
-      return 1;
-    }
-    else return 0;
+    auto hash=hash_for(x);
+    return find(x,position_for(hash),hash);
   }
 
-  void swap(table& x)
+#if defined(BOOST_MSVC)
+/* warning: forcing value to bool 'true' or 'false' in bool(pred()...) */
+#pragma warning(push)
+#pragma warning(disable:4800)
+#endif
+
+  template<typename Key>
+  BOOST_FORCEINLINE locator find(
+    const Key& x,std::size_t pos0,std::size_t hash)const
+  {    
+    prober pb(pos0);
+    do{
+      auto pos=pb.get();
+      auto pg=arrays.groups+pos;
+      auto mask=pg->match(hash);
+      if(mask){
+        BOOST_UNORDERED_ASSUME(arrays.elements!=nullptr);
+        auto p=arrays.elements+pos*N;
+        BOOST_UNORDERED_PREFETCH_ELEMENTS(p,N);
+        do{
+          auto n=unchecked_countr_zero(mask);
+          if(BOOST_LIKELY(bool(pred()(x,key_from(p[n]))))){
+            return {pg,n,p+n};
+          }
+          mask&=mask-1;
+        }while(mask);
+      }
+      if(BOOST_LIKELY(pg->is_not_overflowed(hash))){
+        return {};
+      }
+    }
+    while(BOOST_LIKELY(pb.next(arrays.groups_size_mask)));
+    return {};
+  }
+
+#if defined(BOOST_MSVC)
+#pragma warning(pop) /* C4800 */
+#endif
+
+  void swap(table_core& x)
     noexcept(
       alloc_traits::propagate_on_container_swap::value||
       alloc_traits::is_always_equal::value)
@@ -1592,9 +1520,8 @@ public:
 
     swap(h(),x.h());
     swap(pred(),x.pred());
-    swap(size_,x.size_);
     swap(arrays,x.arrays);
-    swap(ml,x.ml);
+    swap(size_ctrl,x.size_ctrl);
   }
 
   void clear()noexcept
@@ -1603,7 +1530,7 @@ public:
     if(p){
       for(auto pg=arrays.groups,last=pg+arrays.groups_size_mask+1;
           pg!=last;++pg,p+=N){
-        auto mask=pg->match_really_occupied();
+        auto mask=match_really_occupied(pg,last);
         while(mask){
           destroy_element(p+unchecked_countr_zero(mask));
           mask&=mask-1;
@@ -1612,48 +1539,14 @@ public:
         pg->initialize();
       }
       arrays.groups[arrays.groups_size_mask].set_sentinel();
-      size_=0;
-      ml=initial_max_load();
+      size_ctrl.ml=initial_max_load();
+      size_ctrl.size=0;
     }
   }
 
-  element_type extract(const_iterator pos)
-  {
-    BOOST_ASSERT(pos!=end());
-    erase_on_exit e{*this,pos};
-    (void)e;
-    return std::move(*pos.p);
-  }
-
-  // TODO: should we accept different allocator too?
-  template<typename Hash2,typename Pred2>
-  void merge(table<TypePolicy,Hash2,Pred2,Allocator>& x)
-  {
-    x.for_all_elements([&,this](group_type* pg,unsigned int n,element_type* p){
-      erase_on_exit e{x,{pg,n,p}};
-      if(!emplace_impl(type_policy::move(*p)).second)e.rollback();
-    });
-  }
-
-  template<typename Hash2,typename Pred2>
-  void merge(table<TypePolicy,Hash2,Pred2,Allocator>&& x){merge(x);}
-
   hasher hash_function()const{return h();}
   key_equal key_eq()const{return pred();}
 
-  template<typename Key>
-  BOOST_FORCEINLINE iterator find(const Key& x)
-  {
-    auto hash=hash_for(x);
-    return find_impl(x,position_for(hash),hash);
-  }
-
-  template<typename Key>
-  BOOST_FORCEINLINE const_iterator find(const Key& x)const
-  {
-    return const_cast<table*>(this)->find(x);
-  }
-
   std::size_t capacity()const noexcept
   {
     return arrays.elements?(arrays.groups_size_mask+1)*N-1:0;
@@ -1661,13 +1554,13 @@ public:
   
   float load_factor()const noexcept
   {
-    if (capacity() == 0) { return 0; }
-    return float(size())/float(capacity());
+    if(capacity()==0)return 0;
+    else             return float(size())/float(capacity());
   }
 
   float max_load_factor()const noexcept{return mlf;}
 
-  std::size_t max_load()const noexcept{return ml;}
+  std::size_t max_load()const noexcept{return size_ctrl.ml;}
 
   void rehash(std::size_t n)
   {
@@ -1683,40 +1576,27 @@ public:
     rehash(std::size_t(std::ceil(float(n)/mlf)));
   }
 
-  template<typename Predicate>
-  friend std::size_t erase_if(table& x,Predicate pr)
+  friend bool operator==(const table_core& x,const table_core& y)
   {
-    return x.erase_if_impl(pr);
+    return
+      x.size()==y.size()&&
+      x.for_all_elements_while([&](element_type* p){
+        auto loc=y.find(key_from(*p));
+        return loc&&
+          const_cast<const value_type&>(type_policy::value_from(*p))==
+          const_cast<const value_type&>(type_policy::value_from(*loc.p));
+      });
   }
 
-private:
-  template<typename,typename,typename,typename> friend class table;
-  using arrays_type=table_arrays<element_type,group_type,size_policy>;
+  friend bool operator!=(const table_core& x,const table_core& y)
+  {
+    return !(x==y);
+  }
 
   struct clear_on_exit
   {
     ~clear_on_exit(){x.clear();}
-    table& x;
-  };
-
-  struct erase_on_exit
-  {
-    erase_on_exit(table& x_,const_iterator it_):x{x_},it{it_}{}
-    ~erase_on_exit(){if(!rollback_)x.erase(it);}
-
-    void rollback(){rollback_=true;}
-
-    table&         x;
-    const_iterator it;
-    bool           rollback_=false;
-  };
-
-  template <class T>
-  struct destroy_on_exit
-  {
-    Allocator &a;
-    T         *p;
-    ~destroy_on_exit(){type_policy::destroy(a,p);};
+    table_core& x;
   };
 
   Hash&            h(){return hash_base::get();}
@@ -1726,16 +1606,6 @@ private:
   Allocator&       al(){return allocator_base::get();}
   const Allocator& al()const{return allocator_base::get();}
 
-  arrays_type new_arrays(std::size_t n)
-  {
-    return arrays_type::new_(al(),n);
-  }
-
-  void delete_arrays(arrays_type& arrays_)noexcept
-  {
-    arrays_type::delete_(al(),arrays_);
-  }
-
   template<typename... Args>
   void construct_element(element_type* p,Args&&... args)
   {
@@ -1751,28 +1621,6 @@ private:
       std::forward<Args>(args)...);
   }
 
-  template<typename Key,typename... Args>
-  void construct_element_from_try_emplace_args(
-    element_type* p,std::false_type,Key&& x,Args&&... args)
-  {
-    type_policy::construct(
-      al(),p,
-      std::piecewise_construct,
-      std::forward_as_tuple(std::forward<Key>(x)),
-      std::forward_as_tuple(std::forward<Args>(args)...));
-  }
-
-  /* This overload allows boost::unordered_flat_set to internally use
-   * try_emplace to implement heterogeneous insert (P2363).
-   */
-
-  template<typename Key>
-  void construct_element_from_try_emplace_args(
-    element_type* p,std::true_type,Key&& x)
-  {
-    type_policy::construct(al(),p,std::forward<Key>(x));
-  }
-
   void destroy_element(element_type* p)noexcept
   {
     type_policy::destroy(al(),p);
@@ -1781,115 +1629,10 @@ private:
   struct destroy_element_on_exit
   {
     ~destroy_element_on_exit(){this_->destroy_element(p);}
-    table        *this_;
+    table_core   *this_;
     element_type *p;
   };
 
-  void copy_elements_from(const table& x)
-  {
-    BOOST_ASSERT(empty());
-    BOOST_ASSERT(this!=std::addressof(x));
-    if(arrays.groups_size_mask==x.arrays.groups_size_mask){
-      fast_copy_elements_from(x);
-    }
-    else{
-      x.for_all_elements([this](const element_type* p){
-        unchecked_insert(*p);
-      });
-    }
-  }
-
-  void fast_copy_elements_from(const table& x)
-  {
-    if(arrays.elements){
-      copy_elements_array_from(x);
-      std::memcpy(
-        arrays.groups,x.arrays.groups,
-        (arrays.groups_size_mask+1)*sizeof(group_type));
-      size_=x.size();
-    }
-  }
-
-  void copy_elements_array_from(const table& x)
-  {
-    copy_elements_array_from(
-      x,
-      std::integral_constant<
-        bool,
-#if BOOST_WORKAROUND(BOOST_LIBSTDCXX_VERSION,<50000)
-        /* std::is_trivially_copy_constructible not provided */
-        boost::has_trivial_copy<element_type>::value
-#else
-        std::is_trivially_copy_constructible<element_type>::value
-#endif
-        &&(
-          is_std_allocator<Allocator>::value||
-          !alloc_has_construct<Allocator,value_type*,const value_type&>::value)
-      >{}
-    );
-  }
-
-  void copy_elements_array_from(const table& x,std::true_type /* -> memcpy */)
-  {
-    /* reinterpret_cast: GCC may complain about value_type not being trivially
-     * copy-assignable when we're relying on trivial copy constructibility.
-     */
-    std::memcpy(
-      reinterpret_cast<unsigned char*>(arrays.elements),
-      reinterpret_cast<unsigned char*>(x.arrays.elements),
-      x.capacity()*sizeof(value_type));
-  }
-
-  void copy_elements_array_from(const table& x,std::false_type /* -> manual */)
-  {
-    std::size_t num_constructed=0;
-    BOOST_TRY{
-      x.for_all_elements([&,this](const element_type* p){
-        construct_element(arrays.elements+(p-x.arrays.elements),*p);
-        ++num_constructed;
-      });
-    }
-    BOOST_CATCH(...){
-      if(num_constructed){
-        x.for_all_elements_while([&,this](const element_type* p){
-          destroy_element(arrays.elements+(p-x.arrays.elements));
-          return --num_constructed!=0;
-        });
-      }
-      BOOST_RETHROW
-    }
-    BOOST_CATCH_END
-  }
-
-  void recover_slot(unsigned char* pc)
-  {
-    /* If this slot potentially caused overflow, we decrease the maximum load so
-     * that average probe length won't increase unboundedly in repeated
-     * insert/erase cycles (drift).
-     */
-    ml-=group_type::maybe_caused_overflow(pc);
-    group_type::reset(pc);
-    --size_;
-  }
-
-  void recover_slot(group_type* pg,std::size_t pos)
-  {
-    recover_slot(reinterpret_cast<unsigned char*>(pg)+pos);
-  }
-
-  std::size_t initial_max_load()const
-  {
-    static constexpr std::size_t small_capacity=2*N-1;
-
-    auto capacity_=capacity();
-    if(capacity_<=small_capacity){
-      return capacity_; /* we allow 100% usage */
-    }
-    else{
-      return (std::size_t)(mlf*(float)(capacity_));
-    }
-  }
-
   template<typename T>
   static inline auto key_from(const T& x)
     ->decltype(type_policy::extract(x))
@@ -1921,114 +1664,34 @@ private:
     return size_policy::position(hash,arrays_.groups_size_index);
   }
 
-  static inline void prefetch_elements(const element_type* p)
+  static inline int match_really_occupied(group_type* pg,group_type* last)
   {
-    /* We have experimentally confirmed that ARM architectures get a higher
-     * speedup when around the first half of the element slots in a group are
-     * prefetched, whereas for Intel just the first cache line is best.
-     * Please report back if you find better tunings for some particular
-     * architectures.
-     */
-
-#if BOOST_ARCH_ARM
-    /* Cache line size can't be known at compile time, so we settle on
-     * the very frequent value of 64B.
-     */
-    constexpr int  cache_line=64;
-    const char    *p0=reinterpret_cast<const char*>(p),
-                  *p1=p0+sizeof(value_type)*N/2;
-    for(;p0<p1;p0+=cache_line)prefetch(p0);
-#else
-    prefetch(p);
-#endif
-  }
-
-#if defined(BOOST_MSVC)
-/* warning: forcing value to bool 'true' or 'false' in bool(pred()...) */
-#pragma warning(push)
-#pragma warning(disable:4800)
-#endif
-
-  template<typename Key>
-  BOOST_FORCEINLINE iterator find_impl(
-    const Key& x,std::size_t pos0,std::size_t hash)const
-  {    
-    prober pb(pos0);
-    do{
-      auto pos=pb.get();
-      auto pg=arrays.groups+pos;
-      auto mask=pg->match(hash);
-      if(mask){
-        BOOST_UNORDERED_ASSUME(arrays.elements != nullptr);
-        auto p=arrays.elements+pos*N;
-        prefetch_elements(p);
-        do{
-          auto n=unchecked_countr_zero(mask);
-          if(BOOST_LIKELY(bool(pred()(x,key_from(p[n]))))){
-            return {pg,n,p+n};
-          }
-          mask&=mask-1;
-        }while(mask);
-      }
-      if(BOOST_LIKELY(pg->is_not_overflowed(hash))){
-        return {}; /* end() */
-      }
-    }
-    while(BOOST_LIKELY(pb.next(arrays.groups_size_mask)));
-    return {}; /* end() */
-  }
-
-#if defined(BOOST_MSVC)
-#pragma warning(pop) /* C4800 */
-#endif
-
-  template<typename... Args>
-  BOOST_FORCEINLINE std::pair<iterator,bool> emplace_impl(Args&&... args)
-  {
-    const auto &k=key_from(std::forward<Args>(args)...);
-    auto        hash=hash_for(k);
-    auto        pos0=position_for(hash);
-    auto        it=find_impl(k,pos0,hash);
-
-    if(it!=end()){
-      return {it,false};
-    }
-    if(BOOST_LIKELY(size_<ml)){
-      return {
-        unchecked_emplace_at(pos0,hash,std::forward<Args>(args)...),
-        true
-      };  
-    }
-    else{
-      return {
-        unchecked_emplace_with_rehash(hash,std::forward<Args>(args)...),
-        true
-      };  
-    }
-  }
-
-  static std::size_t capacity_for(std::size_t n)
-  {
-    return size_policy::size(size_index_for<group_type,size_policy>(n))*N-1;
+    /* excluding the sentinel */
+    return pg->match_occupied()&~(int(pg==last-1)<<(N-1));
   }
 
   template<typename... Args>
-  BOOST_NOINLINE iterator
+  locator unchecked_emplace_at(
+    std::size_t pos0,std::size_t hash,Args&&... args)
+  {
+    auto res=nosize_unchecked_emplace_at(
+      arrays,pos0,hash,std::forward<Args>(args)...);
+    ++size_ctrl.size;
+    return res;
+  }
+
+  BOOST_NOINLINE void unchecked_rehash_for_growth()
+  {
+    auto new_arrays_=new_arrays_for_growth();
+    unchecked_rehash(new_arrays_);
+  }
+
+  template<typename... Args>
+  BOOST_NOINLINE locator
   unchecked_emplace_with_rehash(std::size_t hash,Args&&... args)
   {
-    /* Due to the anti-drift mechanism (see recover_slot), new_arrays_ may be
-     * of the same size as the old arrays; in the limit, erasing one element at
-     * full load and then inserting could bring us back to the same capacity
-     * after a costly rehash. To avoid this, we jump to the next capacity level
-     * when the number of erased elements is <= 10% of total elements at full
-     * load, which is implemented by requesting additional F*size elements,
-     * with F = P * 10% / (1 - P * 10%), where P is the probability of an
-     * element having caused overflow; P has been measured as ~0.162 under
-     * ideal conditions, yielding F ~ 0.0165 ~ 1/61.
-     */
-    auto     new_arrays_=new_arrays(std::size_t(
-               std::ceil(static_cast<float>(size_+size_/61+1)/mlf)));
-    iterator it;
+    auto    new_arrays_=new_arrays_for_growth();
+    locator it;
     BOOST_TRY{
       /* strong exception guarantee -> try insertion before rehash */
       it=nosize_unchecked_emplace_at(
@@ -2043,10 +1706,292 @@ private:
 
     /* new_arrays_ lifetime taken care of by unchecked_rehash */
     unchecked_rehash(new_arrays_);
-    ++size_;
+    ++size_ctrl.size;
     return it;
   }
 
+  void noshrink_reserve(std::size_t n)
+  {
+    /* used only on assignment after element clearance */
+    BOOST_ASSERT(empty());
+
+    if(n){
+      n=std::size_t(std::ceil(float(n)/mlf)); /* elements -> slots */
+      n=capacity_for(n); /* exact resulting capacity */
+
+      if(n>capacity()){
+        auto new_arrays_=new_arrays(n);
+        delete_arrays(arrays);
+        arrays=new_arrays_;
+        size_ctrl.ml=initial_max_load();
+      }
+    }
+  }
+
+  template<typename F>
+  void for_all_elements(F f)const
+  {
+    for_all_elements(arrays,f);
+  }
+
+  template<typename F>
+  static auto for_all_elements(const arrays_type& arrays_,F f)
+    ->decltype(f(nullptr),void())
+  {
+    for_all_elements_while(arrays_,[&](element_type* p){f(p);return true;});
+  }
+
+  template<typename F>
+  static auto for_all_elements(const arrays_type& arrays_,F f)
+    ->decltype(f(nullptr,0,nullptr),void())
+  {
+    for_all_elements_while(
+      arrays_,[&](group_type* pg,unsigned int n,element_type* p)
+        {f(pg,n,p);return true;});
+  }
+
+  template<typename F>
+  bool for_all_elements_while(F f)const
+  {
+    return for_all_elements_while(arrays,f);
+  }
+
+  template<typename F>
+  static auto for_all_elements_while(const arrays_type& arrays_,F f)
+    ->decltype(f(nullptr),bool())
+  {
+    return for_all_elements_while(
+      arrays_,[&](group_type*,unsigned int,element_type* p){return f(p);});
+  }
+
+  template<typename F>
+  static auto for_all_elements_while(const arrays_type& arrays_,F f)
+    ->decltype(f(nullptr,0,nullptr),bool())
+  {
+    auto p=arrays_.elements;
+    if(p){
+      for(auto pg=arrays_.groups,last=pg+arrays_.groups_size_mask+1;
+          pg!=last;++pg,p+=N){
+        auto mask=match_really_occupied(pg,last);
+        while(mask){
+          auto n=unchecked_countr_zero(mask);
+          if(!f(pg,n,p+n))return false;
+          mask&=mask-1;
+        }
+      }
+    }
+    return true;
+  }
+
+  arrays_type    arrays;
+  size_ctrl_type size_ctrl;
+
+private:
+  template<
+    typename,typename,template<typename...> class,
+    typename,typename,typename,typename
+  >
+  friend class table_core;
+
+  using hash_base=empty_value<Hash,0>;
+  using pred_base=empty_value<Pred,1>;
+  using allocator_base=empty_value<Allocator,2>;
+
+  /* used by allocator-extended move ctor */
+
+  table_core(Hash&& h_,Pred&& pred_,const Allocator& al_):
+    hash_base{empty_init,std::move(h_)},
+    pred_base{empty_init,std::move(pred_)},
+    allocator_base{empty_init,al_},arrays(new_arrays(0)),
+    size_ctrl{initial_max_load(),0}
+    {}
+
+  arrays_type new_arrays(std::size_t n)
+  {
+    return arrays_type::new_(al(),n);
+  }
+
+  arrays_type new_arrays_for_growth()
+  {
+    /* Due to the anti-drift mechanism (see recover_slot), the new arrays may
+     * be of the same size as the old arrays; in the limit, erasing one
+     * element at full load and then inserting could bring us back to the same
+     * capacity after a costly rehash. To avoid this, we jump to the next
+     * capacity level when the number of erased elements is <= 10% of total
+     * elements at full load, which is implemented by requesting additional
+     * F*size elements, with F = P * 10% / (1 - P * 10%), where P is the
+     * probability of an element having caused overflow; P has been measured as
+     * ~0.162 under ideal conditions, yielding F ~ 0.0165 ~ 1/61.
+     */
+    return new_arrays(std::size_t(
+      std::ceil(static_cast<float>(size()+size()/61+1)/mlf)));
+  }
+
+  void delete_arrays(arrays_type& arrays_)noexcept
+  {
+    arrays_type::delete_(al(),arrays_);
+  }
+
+  template<typename Key,typename... Args>
+  void construct_element_from_try_emplace_args(
+    element_type* p,std::false_type,Key&& x,Args&&... args)
+  {
+    type_policy::construct(
+      this->al(),p,
+      std::piecewise_construct,
+      std::forward_as_tuple(std::forward<Key>(x)),
+      std::forward_as_tuple(std::forward<Args>(args)...));
+  }
+
+  /* This overload allows boost::unordered_flat_set to internally use
+   * try_emplace to implement heterogeneous insert (P2363).
+   */
+
+  template<typename Key>
+  void construct_element_from_try_emplace_args(
+    element_type* p,std::true_type,Key&& x)
+  {
+    type_policy::construct(this->al(),p,std::forward<Key>(x));
+  }
+
+  void copy_elements_from(const table_core& x)
+  {
+    BOOST_ASSERT(empty());
+    BOOST_ASSERT(this!=std::addressof(x));
+    if(arrays.groups_size_mask==x.arrays.groups_size_mask){
+      fast_copy_elements_from(x);
+    }
+    else{
+      x.for_all_elements([this](const element_type* p){
+        unchecked_insert(*p);
+      });
+    }
+  }
+
+  void fast_copy_elements_from(const table_core& x)
+  {
+    if(arrays.elements){
+      copy_elements_array_from(x);
+      copy_groups_array_from(x);
+      size_ctrl.size=std::size_t(x.size_ctrl.size);
+    }
+  }
+
+  void copy_elements_array_from(const table_core& x)
+  {
+    copy_elements_array_from(
+      x,
+      std::integral_constant<
+        bool,
+#if BOOST_WORKAROUND(BOOST_LIBSTDCXX_VERSION,<50000)
+        /* std::is_trivially_copy_constructible not provided */
+        boost::has_trivial_copy<element_type>::value
+#else
+        std::is_trivially_copy_constructible<element_type>::value
+#endif
+        &&(
+          is_std_allocator<Allocator>::value||
+          !alloc_has_construct<Allocator,value_type*,const value_type&>::value)
+      >{}
+    );
+  }
+
+  void copy_elements_array_from(
+    const table_core& x,std::true_type /* -> memcpy */)
+  {
+    /* reinterpret_cast: GCC may complain about value_type not being trivially
+     * copy-assignable when we're relying on trivial copy constructibility.
+     */
+    std::memcpy(
+      reinterpret_cast<unsigned char*>(arrays.elements),
+      reinterpret_cast<unsigned char*>(x.arrays.elements),
+      x.capacity()*sizeof(value_type));
+  }
+
+  void copy_elements_array_from(
+    const table_core& x,std::false_type /* -> manual */)
+  {
+    std::size_t num_constructed=0;
+    BOOST_TRY{
+      x.for_all_elements([&,this](const element_type* p){
+        construct_element(arrays.elements+(p-x.arrays.elements),*p);
+        ++num_constructed;
+      });
+    }
+    BOOST_CATCH(...){
+      if(num_constructed){
+        x.for_all_elements_while([&,this](const element_type* p){
+          destroy_element(arrays.elements+(p-x.arrays.elements));
+          return --num_constructed!=0;
+        });
+      }
+      BOOST_RETHROW
+    }
+    BOOST_CATCH_END
+  }
+
+  void copy_groups_array_from(const table_core& x) {
+    copy_groups_array_from(x, std::integral_constant<bool,
+#if BOOST_WORKAROUND(BOOST_LIBSTDCXX_VERSION,<50000)
+      /* std::is_trivially_copy_assignable not provided */
+      boost::has_trivial_assign<group_type>::value
+#else
+      std::is_trivially_copy_assignable<group_type>::value
+#endif
+      >{}
+    );
+  }
+
+  void copy_groups_array_from(
+    const table_core& x, std::true_type /* -> memcpy */)
+  {
+    std::memcpy(
+      arrays.groups,x.arrays.groups,
+      (arrays.groups_size_mask+1)*sizeof(group_type));
+  }
+
+  void copy_groups_array_from(
+    const table_core& x, std::false_type /* -> manual */) 
+  {
+    for(std::size_t i=0;i<arrays.groups_size_mask+1;++i){
+      arrays.groups[i]=x.arrays.groups[i];
+    }
+  }
+
+  void recover_slot(unsigned char* pc)
+  {
+    /* If this slot potentially caused overflow, we decrease the maximum load so
+     * that average probe length won't increase unboundedly in repeated
+     * insert/erase cycles (drift).
+     */
+    size_ctrl.ml-=group_type::maybe_caused_overflow(pc);
+    group_type::reset(pc);
+    --size_ctrl.size;
+  }
+
+  void recover_slot(group_type* pg,std::size_t pos)
+  {
+    recover_slot(reinterpret_cast<unsigned char*>(pg)+pos);
+  }
+
+  std::size_t initial_max_load()const
+  {
+    static constexpr std::size_t small_capacity=2*N-1;
+
+    auto capacity_=capacity();
+    if(capacity_<=small_capacity){
+      return capacity_; /* we allow 100% usage */
+    }
+    else{
+      return (std::size_t)(mlf*(float)(capacity_));
+    }
+  }  
+
+  static std::size_t capacity_for(std::size_t n)
+  {
+    return size_policy::size(size_index_for<group_type,size_policy>(n))*N-1;
+  }
+
   BOOST_NOINLINE void unchecked_rehash(std::size_t n)
   {
     auto new_arrays_=new_arrays(n);
@@ -2087,25 +2032,7 @@ private:
     }
     delete_arrays(arrays);
     arrays=new_arrays_;
-    ml=initial_max_load();
-  }
-
-  void noshrink_reserve(std::size_t n)
-  {
-    /* used only on assignment after element clearance */
-    BOOST_ASSERT(empty());
-
-    if(n){
-      n=std::size_t(std::ceil(float(n)/mlf)); /* elements -> slots */
-      n=capacity_for(n); /* exact resulting capacity */
-
-      if(n>capacity()){
-        auto new_arrays_=new_arrays(n);
-        delete_arrays(arrays);
-        arrays=new_arrays_;
-        ml=initial_max_load();
-      }
-    }
+    size_ctrl.ml=initial_max_load();
   }
 
   template<typename Value>
@@ -2151,17 +2078,7 @@ private:
   }
 
   template<typename... Args>
-  iterator unchecked_emplace_at(
-    std::size_t pos0,std::size_t hash,Args&&... args)
-  {
-    auto res=nosize_unchecked_emplace_at(
-      arrays,pos0,hash,std::forward<Args>(args)...);
-    ++size_;
-    return res;
-  }
-
-  template<typename... Args>
-  iterator nosize_unchecked_emplace_at(
+  locator nosize_unchecked_emplace_at(
     const arrays_type& arrays_,std::size_t pos0,std::size_t hash,
     Args&&... args)
   {
@@ -2179,73 +2096,6 @@ private:
       else pg->mark_overflow(hash);
     }
   }
-
-  template<typename Predicate>
-  std::size_t erase_if_impl(Predicate pr)
-  {
-    std::size_t s=size();
-    for_all_elements([&,this](group_type* pg,unsigned int n,element_type* p){
-      if(pr(type_policy::value_from(*p))) erase(iterator{pg,n,p});
-    });
-    return std::size_t(s-size());
-  }
-
-  template<typename F>
-  void for_all_elements(F f)const
-  {
-    for_all_elements(arrays,f);
-  }
-
-  template<typename F>
-  static auto for_all_elements(const arrays_type& arrays_,F f)
-    ->decltype(f(nullptr),void())
-  {
-    for_all_elements_while(arrays_,[&](element_type* p){f(p);return true;});
-  }
-
-  template<typename F>
-  static auto for_all_elements(const arrays_type& arrays_,F f)
-    ->decltype(f(nullptr,0,nullptr),void())
-  {
-    for_all_elements_while(
-      arrays_,[&](group_type* pg,unsigned int n,element_type* p)
-        {f(pg,n,p);return true;});
-  }
-
-  template<typename F>
-  void for_all_elements_while(F f)const
-  {
-    for_all_elements_while(arrays,f);
-  }
-
-  template<typename F>
-  static auto for_all_elements_while(const arrays_type& arrays_,F f)
-    ->decltype(f(nullptr),void())
-  {
-    for_all_elements_while(
-      arrays_,[&](group_type*,unsigned int,element_type* p){return f(p);});
-  }
-
-  template<typename F>
-  static auto for_all_elements_while(const arrays_type& arrays_,F f)
-    ->decltype(f(nullptr,0,nullptr),void())
-  {
-    auto p=arrays_.elements;
-    if(!p){return;}
-    for(auto pg=arrays_.groups,last=pg+arrays_.groups_size_mask+1;
-        pg!=last;++pg,p+=N){
-      auto mask=pg->match_really_occupied();
-      while(mask){
-        auto n=unchecked_countr_zero(mask);
-        if(!f(pg,n,p+n))return;
-        mask&=mask-1;
-      }
-    }
-  }
-
-  std::size_t size_;
-  arrays_type arrays;
-  std::size_t ml;
 };
 
 #if BOOST_WORKAROUND(BOOST_MSVC,<=1900)
@@ -2256,9 +2106,7 @@ private:
 #pragma warning(pop) /* C4714 */
 #endif
 
-#if defined(BOOST_GCC)
-#pragma GCC diagnostic pop /* ignored "-Wshadow" */
-#endif
+#include <boost/unordered/detail/foa/restore_wshadow.hpp>
 
 } /* namespace foa */
 } /* namespace detail */
@@ -2266,6 +2114,6 @@ private:
 } /* namespace boost */
 
 #undef BOOST_UNORDERED_STATIC_ASSERT_HASH_PRED
-#undef BOOST_UNORDERED_ASSUME
+#undef BOOST_UNORDERED_HAS_FEATURE
 #undef BOOST_UNORDERED_HAS_BUILTIN
 #endif
diff --git a/include/boost/unordered/detail/foa/flat_map_types.hpp b/include/boost/unordered/detail/foa/flat_map_types.hpp
new file mode 100644
index 00000000..8a4d25f2
--- /dev/null
+++ b/include/boost/unordered/detail/foa/flat_map_types.hpp
@@ -0,0 +1,73 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_UNORDERED_DETAIL_FOA_FLAT_MAP_TYPES_HPP
+#define BOOST_UNORDERED_DETAIL_FOA_FLAT_MAP_TYPES_HPP
+
+#include <boost/core/allocator_access.hpp>
+
+namespace boost {
+  namespace unordered {
+    namespace detail {
+      namespace foa {
+        template <class Key, class T> struct flat_map_types
+        {
+          using key_type = Key;
+          using raw_key_type = typename std::remove_const<Key>::type;
+          using raw_mapped_type = typename std::remove_const<T>::type;
+
+          using init_type = std::pair<raw_key_type, raw_mapped_type>;
+          using moved_type = std::pair<raw_key_type&&, raw_mapped_type&&>;
+          using value_type = std::pair<Key const, T>;
+
+          using element_type = value_type;
+
+          static value_type& value_from(element_type& x) { return x; }
+
+          template <class K, class V>
+          static raw_key_type const& extract(std::pair<K, V> const& kv)
+          {
+            return kv.first;
+          }
+
+          static moved_type move(init_type& x)
+          {
+            return {std::move(x.first), std::move(x.second)};
+          }
+
+          static moved_type move(element_type& x)
+          {
+            // TODO: we probably need to launder here
+            return {std::move(const_cast<raw_key_type&>(x.first)),
+              std::move(const_cast<raw_mapped_type&>(x.second))};
+          }
+
+          template <class A, class... Args>
+          static void construct(A& al, init_type* p, Args&&... args)
+          {
+            boost::allocator_construct(al, p, std::forward<Args>(args)...);
+          }
+
+          template <class A, class... Args>
+          static void construct(A& al, value_type* p, Args&&... args)
+          {
+            boost::allocator_construct(al, p, std::forward<Args>(args)...);
+          }
+
+          template <class A> static void destroy(A& al, init_type* p) noexcept
+          {
+            boost::allocator_destroy(al, p);
+          }
+
+          template <class A> static void destroy(A& al, value_type* p) noexcept
+          {
+            boost::allocator_destroy(al, p);
+          }
+        };
+      } // namespace foa
+    }   // namespace detail
+  }     // namespace unordered
+} // namespace boost
+
+#endif // BOOST_UNORDERED_DETAIL_FOA_FLAT_MAP_TYPES_HPP
diff --git a/include/boost/unordered/detail/foa/flat_set_types.hpp b/include/boost/unordered/detail/foa/flat_set_types.hpp
new file mode 100644
index 00000000..493cb4fe
--- /dev/null
+++ b/include/boost/unordered/detail/foa/flat_set_types.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_UNORDERED_DETAIL_FOA_FLAT_SET_TYPES_HPP
+#define BOOST_UNORDERED_DETAIL_FOA_FLAT_SET_TYPES_HPP
+
+#include <boost/core/allocator_access.hpp>
+
+namespace boost {
+  namespace unordered {
+    namespace detail {
+      namespace foa {
+        template <class Key> struct flat_set_types
+        {
+          using key_type = Key;
+          using init_type = Key;
+          using value_type = Key;
+
+          static Key const& extract(value_type const& key) { return key; }
+
+          using element_type = value_type;
+
+          static Key& value_from(element_type& x) { return x; }
+
+          static element_type&& move(element_type& x) { return std::move(x); }
+
+          template <class A, class... Args>
+          static void construct(A& al, value_type* p, Args&&... args)
+          {
+            boost::allocator_construct(al, p, std::forward<Args>(args)...);
+          }
+
+          template <class A> static void destroy(A& al, value_type* p) noexcept
+          {
+            boost::allocator_destroy(al, p);
+          }
+        };
+      } // namespace foa
+    }   // namespace detail
+  }     // namespace unordered
+} // namespace boost
+
+#endif // BOOST_UNORDERED_DETAIL_FOA_FLAT_SET_TYPES_HPP
diff --git a/include/boost/unordered/detail/foa/ignore_wshadow.hpp b/include/boost/unordered/detail/foa/ignore_wshadow.hpp
new file mode 100644
index 00000000..f84262bc
--- /dev/null
+++ b/include/boost/unordered/detail/foa/ignore_wshadow.hpp
@@ -0,0 +1,35 @@
+/* Copyright 2023 Joaquin M Lopez Munoz.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/unordered for library home page.
+ */
+
+#include <boost/config.hpp>
+
+#if defined(BOOST_GCC)
+#if !defined(BOOST_UNORDERED_DETAIL_RESTORE_WSHADOW)
+ /* GCC's -Wshadow triggers at scenarios like this: 
+ *
+ *   struct foo{};
+ *   template<typename Base>
+ *   struct derived:Base
+ *   {
+ *     void f(){int foo;}
+ *   };
+ * 
+ *   derived<foo>x;
+ *   x.f(); // declaration of "foo" in derived::f shadows base type "foo"
+ *
+ * This makes shadowing warnings unavoidable in general when a class template
+ * derives from user-provided classes, as is the case with foa::table_core
+ * deriving from empty_value.
+ */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#else
+#pragma GCC diagnostic pop
+#endif
+#endif
diff --git a/include/boost/unordered/detail/foa/node_map_types.hpp b/include/boost/unordered/detail/foa/node_map_types.hpp
new file mode 100644
index 00000000..0853dfe9
--- /dev/null
+++ b/include/boost/unordered/detail/foa/node_map_types.hpp
@@ -0,0 +1,131 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_UNORDERED_DETAIL_FOA_NODE_MAP_TYPES_HPP
+#define BOOST_UNORDERED_DETAIL_FOA_NODE_MAP_TYPES_HPP
+
+#include <boost/core/allocator_access.hpp>
+#include <boost/core/pointer_traits.hpp>
+
+namespace boost {
+  namespace unordered {
+    namespace detail {
+      namespace foa {
+        template <class Key, class T> struct node_map_types
+        {
+          using key_type = Key;
+          using mapped_type = T;
+          using raw_key_type = typename std::remove_const<Key>::type;
+          using raw_mapped_type = typename std::remove_const<T>::type;
+
+          using init_type = std::pair<raw_key_type, raw_mapped_type>;
+          using value_type = std::pair<Key const, T>;
+          using moved_type = std::pair<raw_key_type&&, raw_mapped_type&&>;
+
+          using element_type = foa::element_type<value_type>;
+
+          static value_type& value_from(element_type const& x)
+          {
+            return *(x.p);
+          }
+
+          template <class K, class V>
+          static raw_key_type const& extract(std::pair<K, V> const& kv)
+          {
+            return kv.first;
+          }
+
+          static raw_key_type const& extract(element_type const& kv)
+          {
+            return kv.p->first;
+          }
+
+          static element_type&& move(element_type& x) { return std::move(x); }
+          static moved_type move(init_type& x)
+          {
+            return {std::move(x.first), std::move(x.second)};
+          }
+
+          static moved_type move(value_type& x)
+          {
+            return {std::move(const_cast<raw_key_type&>(x.first)),
+              std::move(const_cast<raw_mapped_type&>(x.second))};
+          }
+
+          template <class A>
+          static void construct(A&, element_type* p, element_type&& x) noexcept
+          {
+            p->p = x.p;
+            x.p = nullptr;
+          }
+
+          template <class A>
+          static void construct(
+            A& al, element_type* p, element_type const& copy)
+          {
+            construct(al, p, *copy.p);
+          }
+
+          template <class A, class... Args>
+          static void construct(A& al, init_type* p, Args&&... args)
+          {
+            boost::allocator_construct(al, p, std::forward<Args>(args)...);
+          }
+
+          template <class A, class... Args>
+          static void construct(A& al, value_type* p, Args&&... args)
+          {
+            boost::allocator_construct(al, p, std::forward<Args>(args)...);
+          }
+
+          template <class A, class... Args>
+          static void construct(A& al, element_type* p, Args&&... args)
+          {
+            p->p = boost::to_address(boost::allocator_allocate(al, 1));
+            BOOST_TRY
+            {
+              boost::allocator_construct(al, p->p, std::forward<Args>(args)...);
+            }
+            BOOST_CATCH(...)
+            {
+              using pointer_type = typename boost::allocator_pointer<A>::type;
+              using pointer_traits = boost::pointer_traits<pointer_type>;
+
+              boost::allocator_deallocate(
+                al, pointer_traits::pointer_to(*(p->p)), 1);
+              BOOST_RETHROW
+            }
+            BOOST_CATCH_END
+          }
+
+          template <class A> static void destroy(A& al, value_type* p) noexcept
+          {
+            boost::allocator_destroy(al, p);
+          }
+
+          template <class A> static void destroy(A& al, init_type* p) noexcept
+          {
+            boost::allocator_destroy(al, p);
+          }
+
+          template <class A>
+          static void destroy(A& al, element_type* p) noexcept
+          {
+            if (p->p) {
+              using pointer_type = typename boost::allocator_pointer<A>::type;
+              using pointer_traits = boost::pointer_traits<pointer_type>;
+
+              destroy(al, p->p);
+              boost::allocator_deallocate(
+                al, pointer_traits::pointer_to(*(p->p)), 1);
+            }
+          }
+        };
+
+      } // namespace foa
+    }   // namespace detail
+  }     // namespace unordered
+} // namespace boost
+
+#endif // BOOST_UNORDERED_DETAIL_FOA_NODE_MAP_TYPES_HPP
diff --git a/include/boost/unordered/detail/foa/node_set_types.hpp b/include/boost/unordered/detail/foa/node_set_types.hpp
new file mode 100644
index 00000000..68c20986
--- /dev/null
+++ b/include/boost/unordered/detail/foa/node_set_types.hpp
@@ -0,0 +1,94 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_UNORDERED_DETAIL_FOA_NODE_SET_TYPES_HPP
+#define BOOST_UNORDERED_DETAIL_FOA_NODE_SET_TYPES_HPP
+
+#include <boost/core/allocator_access.hpp>
+#include <boost/core/pointer_traits.hpp>
+
+namespace boost {
+  namespace unordered {
+    namespace detail {
+      namespace foa {
+
+        template <class Key> struct node_set_types
+        {
+          using key_type = Key;
+          using init_type = Key;
+          using value_type = Key;
+
+          static Key const& extract(value_type const& key) { return key; }
+
+          using element_type = foa::element_type<value_type>;
+
+          static value_type& value_from(element_type const& x) { return *x.p; }
+          static Key const& extract(element_type const& k) { return *k.p; }
+          static element_type&& move(element_type& x) { return std::move(x); }
+          static value_type&& move(value_type& x) { return std::move(x); }
+
+          template <class A>
+          static void construct(
+            A& al, element_type* p, element_type const& copy)
+          {
+            construct(al, p, *copy.p);
+          }
+
+          template <typename Allocator>
+          static void construct(
+            Allocator&, element_type* p, element_type&& x) noexcept
+          {
+            p->p = x.p;
+            x.p = nullptr;
+          }
+
+          template <class A, class... Args>
+          static void construct(A& al, value_type* p, Args&&... args)
+          {
+            boost::allocator_construct(al, p, std::forward<Args>(args)...);
+          }
+
+          template <class A, class... Args>
+          static void construct(A& al, element_type* p, Args&&... args)
+          {
+            p->p = boost::to_address(boost::allocator_allocate(al, 1));
+            BOOST_TRY
+            {
+              boost::allocator_construct(al, p->p, std::forward<Args>(args)...);
+            }
+            BOOST_CATCH(...)
+            {
+              boost::allocator_deallocate(al,
+                boost::pointer_traits<typename boost::allocator_pointer<
+                  A>::type>::pointer_to(*p->p),
+                1);
+              BOOST_RETHROW
+            }
+            BOOST_CATCH_END
+          }
+
+          template <class A> static void destroy(A& al, value_type* p) noexcept
+          {
+            boost::allocator_destroy(al, p);
+          }
+
+          template <class A>
+          static void destroy(A& al, element_type* p) noexcept
+          {
+            if (p->p) {
+              destroy(al, p->p);
+              boost::allocator_deallocate(al,
+                boost::pointer_traits<typename boost::allocator_pointer<
+                  A>::type>::pointer_to(*(p->p)),
+                1);
+            }
+          }
+        };
+
+      } // namespace foa
+    }   // namespace detail
+  }     // namespace unordered
+} // namespace boost
+
+#endif // BOOST_UNORDERED_DETAIL_FOA_NODE_SET_TYPES_HPP
diff --git a/include/boost/unordered/detail/foa/restore_wshadow.hpp b/include/boost/unordered/detail/foa/restore_wshadow.hpp
new file mode 100644
index 00000000..89c32c23
--- /dev/null
+++ b/include/boost/unordered/detail/foa/restore_wshadow.hpp
@@ -0,0 +1,11 @@
+/* Copyright 2023 Joaquin M Lopez Munoz.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/unordered for library home page.
+ */
+
+#define BOOST_UNORDERED_DETAIL_RESTORE_WSHADOW
+#include <boost/unordered/detail/foa/ignore_wshadow.hpp>
+#undef BOOST_UNORDERED_DETAIL_RESTORE_WSHADOW
diff --git a/include/boost/unordered/detail/foa/rw_spinlock.hpp b/include/boost/unordered/detail/foa/rw_spinlock.hpp
new file mode 100644
index 00000000..83e00255
--- /dev/null
+++ b/include/boost/unordered/detail/foa/rw_spinlock.hpp
@@ -0,0 +1,187 @@
+#ifndef BOOST_UNORDERED_DETAIL_FOA_RW_SPINLOCK_HPP_INCLUDED
+#define BOOST_UNORDERED_DETAIL_FOA_RW_SPINLOCK_HPP_INCLUDED
+
+// Copyright 2023 Peter Dimov
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/core/yield_primitives.hpp>
+#include <atomic>
+#include <cstdint>
+
+namespace boost{
+namespace unordered{
+namespace detail{
+namespace foa{
+
+class rw_spinlock
+{
+private:
+
+    // bit 31: locked exclusive
+    // bit 30: writer pending
+    // bit 29..0: reader lock count
+
+    static constexpr std::uint32_t locked_exclusive_mask = 1u << 31; // 0x8000'0000
+    static constexpr std::uint32_t writer_pending_mask = 1u << 30; // 0x4000'0000
+    static constexpr std::uint32_t reader_lock_count_mask = writer_pending_mask - 1; // 0x3FFF'FFFF
+
+    std::atomic<std::uint32_t> state_ = {};
+
+private:
+
+    // number of times to spin before sleeping
+    static constexpr int spin_count = 24576;
+
+public:
+
+    bool try_lock_shared() noexcept
+    {
+        std::uint32_t st = state_.load( std::memory_order_relaxed );
+
+        if( st >= reader_lock_count_mask )
+        {
+            // either bit 31 set, bit 30 set, or reader count is max
+            return false;
+        }
+
+        std::uint32_t newst = st + 1;
+        return state_.compare_exchange_strong( st, newst, std::memory_order_acquire, std::memory_order_relaxed );
+    }
+
+    void lock_shared() noexcept
+    {
+        for( ;; )
+        {
+            for( int k = 0; k < spin_count; ++k )
+            {
+                std::uint32_t st = state_.load( std::memory_order_relaxed );
+
+                if( st < reader_lock_count_mask )
+                {
+                    std::uint32_t newst = st + 1;
+                    if( state_.compare_exchange_weak( st, newst, std::memory_order_acquire, std::memory_order_relaxed ) ) return;
+                }
+
+                boost::core::sp_thread_pause();
+            }
+
+            boost::core::sp_thread_sleep();
+        }
+    }
+
+    void unlock_shared() noexcept
+    {
+        // pre: locked shared, not locked exclusive
+
+        state_.fetch_sub( 1, std::memory_order_release );
+
+        // if the writer pending bit is set, there's a writer waiting
+        // let it acquire the lock; it will clear the bit on unlock
+    }
+
+    bool try_lock() noexcept
+    {
+        std::uint32_t st = state_.load( std::memory_order_relaxed );
+
+        if( st & locked_exclusive_mask )
+        {
+            // locked exclusive
+            return false;
+        }
+
+        if( st & reader_lock_count_mask )
+        {
+            // locked shared
+            return false;
+        }
+
+        std::uint32_t newst = locked_exclusive_mask;
+        return state_.compare_exchange_strong( st, newst, std::memory_order_acquire, std::memory_order_relaxed );
+    }
+
+    void lock() noexcept
+    {
+        for( ;; )
+        {
+            for( int k = 0; k < spin_count; ++k )
+            {
+                std::uint32_t st = state_.load( std::memory_order_relaxed );
+
+                if( st & locked_exclusive_mask )
+                {
+                    // locked exclusive, spin
+                }
+                else if( ( st & reader_lock_count_mask ) == 0 )
+                {
+                    // not locked exclusive, not locked shared, try to lock
+
+                    std::uint32_t newst = locked_exclusive_mask;
+                    if( state_.compare_exchange_weak( st, newst, std::memory_order_acquire, std::memory_order_relaxed ) ) return;
+                }
+                else if( st & writer_pending_mask )
+                {
+                    // writer pending bit already set, nothing to do
+                }
+                else
+                {
+                    // locked shared, set writer pending bit
+
+                    std::uint32_t newst = st | writer_pending_mask;
+                    state_.compare_exchange_weak( st, newst, std::memory_order_relaxed, std::memory_order_relaxed );
+                }
+
+                boost::core::sp_thread_pause();
+            }
+
+            // clear writer pending bit before going to sleep
+
+            {
+                std::uint32_t st = state_.load( std::memory_order_relaxed );
+
+                for( ;; )
+                {
+                    if( st & locked_exclusive_mask )
+                    {
+                        // locked exclusive, nothing to do
+                        break;
+                    }
+                    else if( ( st & reader_lock_count_mask ) == 0 )
+                    {
+                        // lock free, try to take it
+
+                        std::uint32_t newst = locked_exclusive_mask;
+                        if( state_.compare_exchange_weak( st, newst, std::memory_order_acquire, std::memory_order_relaxed ) ) return;
+                    }
+                    else if( ( st & writer_pending_mask ) == 0 )
+                    {
+                        // writer pending bit already clear, nothing to do
+                        break;
+                    }
+                    else
+                    {
+                        // clear writer pending bit
+
+                        std::uint32_t newst = st & ~writer_pending_mask;
+                        if( state_.compare_exchange_weak( st, newst, std::memory_order_relaxed, std::memory_order_relaxed ) ) break;
+                    }
+                }
+            }
+
+            boost::core::sp_thread_sleep();
+        }
+    }
+
+    void unlock() noexcept
+    {
+        // pre: locked exclusive, not locked shared
+        state_.store( 0, std::memory_order_release );
+    }
+};
+
+} /* namespace foa */
+} /* namespace detail */
+} /* namespace unordered */
+} /* namespace boost */
+
+#endif // BOOST_UNORDERED_DETAIL_FOA_RW_SPINLOCK_HPP_INCLUDED
diff --git a/include/boost/unordered/detail/foa/table.hpp b/include/boost/unordered/detail/foa/table.hpp
new file mode 100644
index 00000000..a2fa96e6
--- /dev/null
+++ b/include/boost/unordered/detail/foa/table.hpp
@@ -0,0 +1,513 @@
+/* Fast open-addressing hash table.
+ *
+ * Copyright 2022-2023 Joaquin M Lopez Munoz.
+ * Copyright 2023 Christian Mazakas.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/unordered for library home page.
+ */
+
+#ifndef BOOST_UNORDERED_DETAIL_FOA_TABLE_HPP
+#define BOOST_UNORDERED_DETAIL_FOA_TABLE_HPP
+
+#include <boost/assert.hpp>
+#include <boost/config.hpp>
+#include <boost/config/workaround.hpp>
+#include <boost/unordered/detail/foa/core.hpp>
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace boost{
+namespace unordered{
+namespace detail{
+namespace foa{
+
+/* use plain integrals for group metadata storage */
+
+template<typename Integral>
+struct plain_integral
+{
+  operator Integral()const{return n;}
+  void operator=(Integral m){n=m;}
+
+#if BOOST_WORKAROUND(BOOST_GCC,>=50000 && BOOST_GCC<60000)
+  void operator|=(Integral m){n=static_cast<Integral>(n|m);}
+  void operator&=(Integral m){n=static_cast<Integral>(n&m);}
+#else
+  void operator|=(Integral m){n|=m;}
+  void operator&=(Integral m){n&=m;}
+#endif
+
+  Integral n;
+};
+
+struct plain_size_control
+{
+  std::size_t ml;
+  std::size_t size;
+};
+
+template<typename,typename,typename,typename>
+class table;
+
+/* table_iterator keeps two pointers:
+ * 
+ *   - A pointer p to the element slot.
+ *   - A pointer pc to the n-th byte of the associated group metadata, where n
+ *     is the position of the element in the group.
+ *
+ * A simpler solution would have been to keep a pointer p to the element, a
+ * pointer pg to the group, and the position n, but that would increase
+ * sizeof(table_iterator) by 4/8 bytes. In order to make this compact
+ * representation feasible, it is required that group objects are aligned
+ * to their size, so that we can recover pg and n as
+ * 
+ *   - n = pc%sizeof(group)
+ *   - pg = pc-n
+ * 
+ * (for explanatory purposes pg and pc are treated above as if they were memory
+ * addresses rather than pointers).
+ * 
+ * p = nullptr is conventionally used to mark end() iterators.
+ */
+
+/* internal conversion from const_iterator to iterator */
+struct const_iterator_cast_tag{}; 
+
+template<typename TypePolicy,typename Group,bool Const>
+class table_iterator
+{
+  using type_policy=TypePolicy;
+  using table_element_type=typename type_policy::element_type;
+  using group_type=Group;
+  static constexpr auto N=group_type::N;
+  static constexpr auto regular_layout=group_type::regular_layout;
+
+public:
+  using difference_type=std::ptrdiff_t;
+  using value_type=typename type_policy::value_type;
+  using pointer=
+    typename std::conditional<Const,value_type const*,value_type*>::type;
+  using reference=
+    typename std::conditional<Const,value_type const&,value_type&>::type;
+  using iterator_category=std::forward_iterator_tag;
+  using element_type=
+    typename std::conditional<Const,value_type const,value_type>::type;
+
+  table_iterator()=default;
+  template<bool Const2,typename std::enable_if<!Const2>::type* =nullptr>
+  table_iterator(const table_iterator<TypePolicy,Group,Const2>& x):
+    pc{x.pc},p{x.p}{}
+  table_iterator(
+    const_iterator_cast_tag, const table_iterator<TypePolicy,Group,true>& x):
+    pc{x.pc},p{x.p}{}
+
+  inline reference operator*()const noexcept
+    {return type_policy::value_from(*p);}
+  inline pointer operator->()const noexcept
+    {return std::addressof(type_policy::value_from(*p));}
+  inline table_iterator& operator++()noexcept{increment();return *this;}
+  inline table_iterator operator++(int)noexcept
+    {auto x=*this;increment();return x;}
+  friend inline bool operator==(
+    const table_iterator& x,const table_iterator& y)
+    {return x.p==y.p;}
+  friend inline bool operator!=(
+    const table_iterator& x,const table_iterator& y)
+    {return !(x==y);}
+
+private:
+  template<typename,typename,bool> friend class table_iterator;
+  template<typename,typename,typename,typename> friend class table;
+
+  table_iterator(Group* pg,std::size_t n,const table_element_type* p_):
+    pc{reinterpret_cast<unsigned char*>(const_cast<group_type*>(pg))+n},
+    p{const_cast<table_element_type*>(p_)}
+    {}
+
+  inline void increment()noexcept
+  {
+    BOOST_ASSERT(p!=nullptr);
+    increment(std::integral_constant<bool,regular_layout>{});
+  }
+
+  inline void increment(std::true_type /* regular layout */)noexcept
+  {
+    for(;;){
+      ++p;
+      if(reinterpret_cast<uintptr_t>(pc)%sizeof(group_type)==N-1){
+        pc+=sizeof(group_type)-(N-1);
+        break;
+      }
+      ++pc;
+      if(!group_type::is_occupied(pc))continue;
+      if(BOOST_UNLIKELY(group_type::is_sentinel(pc)))p=nullptr;
+      return;
+    }
+
+    for(;;){
+      int mask=reinterpret_cast<group_type*>(pc)->match_occupied();
+      if(mask!=0){
+        auto n=unchecked_countr_zero(mask);
+        if(BOOST_UNLIKELY(reinterpret_cast<group_type*>(pc)->is_sentinel(n))){
+          p=nullptr;
+        }
+        else{
+          pc+=n;
+          p+=n;
+        }
+        return;
+      }
+      pc+=sizeof(group_type);
+      p+=N;
+    }
+  }
+
+  inline void increment(std::false_type /* interleaved */)noexcept
+  {
+    std::size_t n0=reinterpret_cast<uintptr_t>(pc)%sizeof(group_type);
+    pc-=n0;
+
+    int mask=(
+      reinterpret_cast<group_type*>(pc)->match_occupied()>>(n0+1))<<(n0+1);
+    if(!mask){
+      do{
+        pc+=sizeof(group_type);
+        p+=N;
+      }
+      while((mask=reinterpret_cast<group_type*>(pc)->match_occupied())==0);
+    }
+
+    auto n=unchecked_countr_zero(mask);
+    if(BOOST_UNLIKELY(reinterpret_cast<group_type*>(pc)->is_sentinel(n))){
+      p=nullptr;
+    }
+    else{
+      pc+=n;
+      p-=n0;
+      p+=n;
+    }
+  }
+
+  unsigned char      *pc=nullptr;
+  table_element_type *p=nullptr;
+};
+
+/* foa::table interface departs in a number of ways from that of C++ unordered
+ * associative containers because it's not for end-user consumption
+ * (boost::unordered_(flat|node)_(map|set) wrappers complete it as
+ * appropriate).
+ *
+ * The table supports two main modes of operation: flat and node-based. In the
+ * flat case, buckets directly store elements. For node-based, buckets store
+ * pointers to individually heap-allocated elements.
+ *
+ * For both flat and node-based:
+ *
+ *   - begin() is not O(1).
+ *   - No bucket API.
+ *   - Load factor is fixed and can't be set by the user.
+ * 
+ * For flat only:
+ *
+ *   - value_type must be moveable.
+ *   - Pointer stability is not kept under rehashing.
+ *   - No extract API.
+ *
+ * try_emplace, erase and find support heterogeneous lookup by default,
+ * that is, without checking for any ::is_transparent typedefs --the
+ * checking is done by boost::unordered_(flat|node)_(map|set).
+ */
+
+template <typename TypePolicy,typename Hash,typename Pred,typename Allocator>
+using table_core_impl=
+  table_core<TypePolicy,group15<plain_integral>,table_arrays,
+  plain_size_control,Hash,Pred,Allocator>;
+
+#include <boost/unordered/detail/foa/ignore_wshadow.hpp>
+
+#if defined(BOOST_MSVC)
+#pragma warning(push)
+#pragma warning(disable:4714) /* marked as __forceinline not inlined */
+#endif
+
+template<typename TypePolicy,typename Hash,typename Pred,typename Allocator>
+class table:table_core_impl<TypePolicy,Hash,Pred,Allocator>
+{
+  using super=table_core_impl<TypePolicy,Hash,Pred,Allocator>;
+  using type_policy=typename super::type_policy;
+  using group_type=typename super::group_type;
+  using super::N;
+  using prober=typename super::prober;
+  using locator=typename super::locator;
+
+public:
+  using key_type=typename super::key_type;
+  using init_type=typename super::init_type;
+  using value_type=typename super::value_type;
+  using element_type=typename super::element_type;
+
+private:
+  static constexpr bool has_mutable_iterator=
+    !std::is_same<key_type,value_type>::value;
+
+public:
+  using hasher=typename super::hasher;
+  using key_equal=typename super::key_equal;
+  using allocator_type=typename super::allocator_type;
+  using pointer=typename super::pointer;
+  using const_pointer=typename super::const_pointer;
+  using reference=typename super::reference;
+  using const_reference=typename super::const_reference;
+  using size_type=typename super::size_type;
+  using difference_type=typename super::difference_type;
+  using const_iterator=table_iterator<type_policy,group_type,true>;
+  using iterator=typename std::conditional<
+    has_mutable_iterator,
+    table_iterator<type_policy,group_type,false>,
+    const_iterator>::type;
+
+  table(
+    std::size_t n=default_bucket_count,const Hash& h_=Hash(),
+    const Pred& pred_=Pred(),const Allocator& al_=Allocator()):
+    super{n,h_,pred_,al_}
+    {}
+
+  table(const table& x)=default;
+  table(table&& x)=default;
+  table(const table& x,const Allocator& al_):super{x,al_}{}
+  table(table&& x,const Allocator& al_):super{std::move(x),al_}{}
+  ~table()=default;
+
+  table& operator=(const table& x)=default;
+  table& operator=(table&& x)=default;
+
+  using super::get_allocator;
+
+  iterator begin()noexcept
+  {
+    iterator it{this->arrays.groups,0,this->arrays.elements};
+    if(this->arrays.elements&&
+       !(this->arrays.groups[0].match_occupied()&0x1))++it;
+    return it;
+  }
+
+  const_iterator begin()const noexcept
+                   {return const_cast<table*>(this)->begin();}
+  iterator       end()noexcept{return {};}
+  const_iterator end()const noexcept{return const_cast<table*>(this)->end();}
+  const_iterator cbegin()const noexcept{return begin();}
+  const_iterator cend()const noexcept{return end();}
+
+  using super::empty;
+  using super::size;
+  using super::max_size;
+
+  template<typename... Args>
+  BOOST_FORCEINLINE std::pair<iterator,bool> emplace(Args&&... args)
+  {
+    auto x=alloc_make_insert_type<type_policy>(
+      this->al(),std::forward<Args>(args)...);
+    return emplace_impl(type_policy::move(x.value()));
+  }
+
+  template<typename Key,typename... Args>
+  BOOST_FORCEINLINE std::pair<iterator,bool> try_emplace(
+    Key&& x,Args&&... args)
+  {
+    return emplace_impl(
+      try_emplace_args_t{},std::forward<Key>(x),std::forward<Args>(args)...);
+  }
+
+  BOOST_FORCEINLINE std::pair<iterator,bool>
+  insert(const init_type& x){return emplace_impl(x);}
+
+  BOOST_FORCEINLINE std::pair<iterator,bool>
+  insert(init_type&& x){return emplace_impl(std::move(x));}
+
+  /* template<typename=void> tilts call ambiguities in favor of init_type */
+
+  template<typename=void>
+  BOOST_FORCEINLINE std::pair<iterator,bool>
+  insert(const value_type& x){return emplace_impl(x);}
+
+  template<typename=void>
+  BOOST_FORCEINLINE std::pair<iterator,bool>
+  insert(value_type&& x){return emplace_impl(std::move(x));}
+
+  template<typename T=element_type>
+  BOOST_FORCEINLINE
+  typename std::enable_if<
+    !std::is_same<T,value_type>::value,
+    std::pair<iterator,bool>
+  >::type
+  insert(element_type&& x){return emplace_impl(std::move(x));}
+
+  template<
+    bool dependent_value=false,
+    typename std::enable_if<
+      has_mutable_iterator||dependent_value>::type* =nullptr
+  >
+  void erase(iterator pos)noexcept{return erase(const_iterator(pos));}
+
+  BOOST_FORCEINLINE
+  void erase(const_iterator pos)noexcept
+  {
+    super::erase(pos.pc,pos.p);
+  }
+
+  template<typename Key>
+  BOOST_FORCEINLINE
+  auto erase(Key&& x) -> typename std::enable_if<
+    !std::is_convertible<Key,iterator>::value&&
+    !std::is_convertible<Key,const_iterator>::value, std::size_t>::type
+  {
+    auto it=find(x);
+    if(it!=end()){
+      erase(it);
+      return 1;
+    }
+    else return 0;
+  }
+
+  void swap(table& x)
+    noexcept(noexcept(std::declval<super&>().swap(std::declval<super&>())))
+  {
+    super::swap(x);
+  }
+
+  using super::clear;
+
+  element_type extract(const_iterator pos)
+  {
+    BOOST_ASSERT(pos!=end());
+    erase_on_exit e{*this,pos};
+    (void)e;
+    return std::move(*pos.p);
+  }
+
+  // TODO: should we accept different allocator too?
+  template<typename Hash2,typename Pred2>
+  void merge(table<TypePolicy,Hash2,Pred2,Allocator>& x)
+  {
+    x.for_all_elements([&,this](group_type* pg,unsigned int n,element_type* p){
+      erase_on_exit e{x,{pg,n,p}};
+      if(!emplace_impl(type_policy::move(*p)).second)e.rollback();
+    });
+  }
+
+  template<typename Hash2,typename Pred2>
+  void merge(table<TypePolicy,Hash2,Pred2,Allocator>&& x){merge(x);}
+
+  using super::hash_function;
+  using super::key_eq;
+
+  template<typename Key>
+  BOOST_FORCEINLINE iterator find(const Key& x)
+  {
+    return make_iterator(super::find(x));
+  }
+
+  template<typename Key>
+  BOOST_FORCEINLINE const_iterator find(const Key& x)const
+  {
+    return const_cast<table*>(this)->find(x);
+  }
+
+  using super::capacity;
+  using super::load_factor;
+  using super::max_load_factor;
+  using super::max_load;
+  using super::rehash;
+  using super::reserve;
+
+  template<typename Predicate>
+  friend std::size_t erase_if(table& x,Predicate& pr)
+  {
+    using value_reference=typename std::conditional<
+      std::is_same<key_type,value_type>::value,
+      const_reference,
+      reference
+    >::type;
+
+    std::size_t s=x.size();
+    x.for_all_elements(
+      [&](group_type* pg,unsigned int n,element_type* p){
+        if(pr(const_cast<value_reference>(type_policy::value_from(*p)))){
+          x.super::erase(pg,n,p);
+        }
+      });
+    return std::size_t(s-x.size());
+  }
+
+  friend bool operator==(const table& x,const table& y)
+  {
+    return static_cast<const super&>(x)==static_cast<const super&>(y);
+  }
+
+  friend bool operator!=(const table& x,const table& y){return !(x==y);}
+
+private:
+  struct erase_on_exit
+  {
+    erase_on_exit(table& x_,const_iterator it_):x{x_},it{it_}{}
+    ~erase_on_exit(){if(!rollback_)x.erase(it);}
+
+    void rollback(){rollback_=true;}
+
+    table&         x;
+    const_iterator it;
+    bool           rollback_=false;
+  };
+
+  static inline iterator make_iterator(const locator& l)noexcept
+  {
+    return {l.pg,l.n,l.p};
+  }
+
+  template<typename... Args>
+  BOOST_FORCEINLINE std::pair<iterator,bool> emplace_impl(Args&&... args)
+  {
+    const auto &k=this->key_from(std::forward<Args>(args)...);
+    auto        hash=this->hash_for(k);
+    auto        pos0=this->position_for(hash);
+    auto        loc=super::find(k,pos0,hash);
+
+    if(loc){
+      return {make_iterator(loc),false};
+    }
+    if(BOOST_LIKELY(this->size_ctrl.size<this->size_ctrl.ml)){
+      return {
+        make_iterator(
+          this->unchecked_emplace_at(pos0,hash,std::forward<Args>(args)...)),
+        true
+      };  
+    }
+    else{
+      return {
+        make_iterator(
+          this->unchecked_emplace_with_rehash(
+            hash,std::forward<Args>(args)...)),
+        true
+      };  
+    }
+  }
+};
+
+#if defined(BOOST_MSVC)
+#pragma warning(pop) /* C4714 */
+#endif
+
+#include <boost/unordered/detail/foa/restore_wshadow.hpp>
+
+} /* namespace foa */
+} /* namespace detail */
+} /* namespace unordered */
+} /* namespace boost */
+
+#endif
diff --git a/include/boost/unordered/detail/foa/tuple_rotate_right.hpp b/include/boost/unordered/detail/foa/tuple_rotate_right.hpp
new file mode 100644
index 00000000..c95077b2
--- /dev/null
+++ b/include/boost/unordered/detail/foa/tuple_rotate_right.hpp
@@ -0,0 +1,52 @@
+/* Copyright 2023 Joaquin M Lopez Munoz.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ *
+ * See https://www.boost.org/libs/unordered for library home page.
+ */
+
+#ifndef BOOST_UNORDERED_DETAIL_FOA_TUPLE_ROTATE_RIGHT_HPP
+#define BOOST_UNORDERED_DETAIL_FOA_TUPLE_ROTATE_RIGHT_HPP
+
+#include <boost/mp11/algorithm.hpp>
+#include <boost/mp11/integer_sequence.hpp>
+#include <tuple>
+#include <utility>
+
+namespace boost{
+namespace unordered{
+namespace detail{
+namespace foa{
+
+template<typename Tuple>
+using tuple_rotate_right_return_type=mp11::mp_rotate_right_c<
+  typename std::remove_cv<typename std::remove_reference<Tuple>::type>::type,
+  1
+>;
+
+template<std::size_t... Is,typename Tuple>
+tuple_rotate_right_return_type<Tuple>
+tuple_rotate_right_aux(mp11::index_sequence<Is...>,Tuple&& x)
+{
+  return tuple_rotate_right_return_type<Tuple>{
+    std::get<(Is+sizeof...(Is)-1)%sizeof...(Is)>(std::forward<Tuple>(x))...};
+}
+
+template<typename Tuple>
+tuple_rotate_right_return_type<Tuple> tuple_rotate_right(Tuple&& x)
+{
+  using RawTuple=typename std::remove_cv<
+    typename std::remove_reference<Tuple>::type>::type;
+
+  return tuple_rotate_right_aux(
+    mp11::make_index_sequence<std::tuple_size<RawTuple>::value>{},
+    std::forward<Tuple>(x));
+}
+
+} /* namespace foa */
+} /* namespace detail */
+} /* namespace unordered */
+} /* namespace boost */
+
+#endif
diff --git a/include/boost/unordered/detail/fwd.hpp b/include/boost/unordered/detail/fwd.hpp
index acaa8f11..7fcb770e 100644
--- a/include/boost/unordered/detail/fwd.hpp
+++ b/include/boost/unordered/detail/fwd.hpp
@@ -61,4 +61,90 @@ namespace boost {
   }
 }
 
+// BOOST_UNORDERED_EMPLACE_LIMIT = The maximum number of parameters in
+// emplace (not including things like hints). Don't set it to a lower value, as
+// that might break something.
+
+#if !defined BOOST_UNORDERED_EMPLACE_LIMIT
+#define BOOST_UNORDERED_EMPLACE_LIMIT 10
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Configuration
+//
+// Unless documented elsewhere these configuration macros should be considered
+// an implementation detail, I'll try not to break them, but you never know.
+
+// Use Sun C++ workarounds
+// I'm not sure which versions of the compiler require these workarounds, so
+// I'm just using them of everything older than the current test compilers
+// (as of May 2017).
+
+#if !defined(BOOST_UNORDERED_SUN_WORKAROUNDS1)
+#if BOOST_COMP_SUNPRO && BOOST_COMP_SUNPRO < BOOST_VERSION_NUMBER(5, 20, 0)
+#define BOOST_UNORDERED_SUN_WORKAROUNDS1 1
+#else
+#define BOOST_UNORDERED_SUN_WORKAROUNDS1 0
+#endif
+#endif
+
+// BOOST_UNORDERED_TUPLE_ARGS
+//
+// Maximum number of std::tuple members to support, or 0 if std::tuple
+// isn't avaiable. More are supported when full C++11 is used.
+
+// Already defined, so do nothing
+#if defined(BOOST_UNORDERED_TUPLE_ARGS)
+
+// Assume if we have C++11 tuple it's properly variadic,
+// and just use a max number of 10 arguments.
+#elif !defined(BOOST_NO_CXX11_HDR_TUPLE)
+#define BOOST_UNORDERED_TUPLE_ARGS 10
+
+// Visual C++ has a decent enough tuple for piecewise construction,
+// so use that if available, using _VARIADIC_MAX for the maximum
+// number of parameters. Note that this comes after the check
+// for a full C++11 tuple.
+#elif defined(BOOST_MSVC)
+#if !BOOST_UNORDERED_HAVE_PIECEWISE_CONSTRUCT
+#define BOOST_UNORDERED_TUPLE_ARGS 0
+#elif defined(_VARIADIC_MAX)
+#define BOOST_UNORDERED_TUPLE_ARGS _VARIADIC_MAX
+#else
+#define BOOST_UNORDERED_TUPLE_ARGS 5
+#endif
+
+// Assume that we don't have std::tuple
+#else
+#define BOOST_UNORDERED_TUPLE_ARGS 0
+#endif
+
+#if BOOST_UNORDERED_TUPLE_ARGS
+#include <tuple>
+#endif
+
+// BOOST_UNORDERED_CXX11_CONSTRUCTION
+//
+// Use C++11 construction, requires variadic arguments, good construct support
+// in allocator_traits and piecewise construction of std::pair
+// Otherwise allocators aren't used for construction/destruction
+
+#if BOOST_UNORDERED_HAVE_PIECEWISE_CONSTRUCT &&                                \
+  !defined(BOOST_NO_CXX11_VARIADIC_TEMPLATES) && BOOST_UNORDERED_TUPLE_ARGS
+#if BOOST_COMP_SUNPRO && BOOST_LIB_STD_GNU
+// Sun C++ std::pair piecewise construction doesn't seem to be exception safe.
+// (At least for Sun C++ 12.5 using libstdc++).
+#define BOOST_UNORDERED_CXX11_CONSTRUCTION 0
+#elif BOOST_COMP_GNUC && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(4, 7, 0)
+// Piecewise construction in GCC 4.6 doesn't work for uncopyable types.
+#define BOOST_UNORDERED_CXX11_CONSTRUCTION 0
+#elif !defined(BOOST_NO_CXX11_ALLOCATOR)
+#define BOOST_UNORDERED_CXX11_CONSTRUCTION 1
+#endif
+#endif
+
+#if !defined(BOOST_UNORDERED_CXX11_CONSTRUCTION)
+#define BOOST_UNORDERED_CXX11_CONSTRUCTION 0
+#endif
+
 #endif
diff --git a/include/boost/unordered/detail/implementation.hpp b/include/boost/unordered/detail/implementation.hpp
index 13351171..68f312a5 100644
--- a/include/boost/unordered/detail/implementation.hpp
+++ b/include/boost/unordered/detail/implementation.hpp
@@ -60,92 +60,6 @@
 #include <type_traits>
 #endif
 
-////////////////////////////////////////////////////////////////////////////////
-// Configuration
-//
-// Unless documented elsewhere these configuration macros should be considered
-// an implementation detail, I'll try not to break them, but you never know.
-
-// Use Sun C++ workarounds
-// I'm not sure which versions of the compiler require these workarounds, so
-// I'm just using them of everything older than the current test compilers
-// (as of May 2017).
-
-#if !defined(BOOST_UNORDERED_SUN_WORKAROUNDS1)
-#if BOOST_COMP_SUNPRO && BOOST_COMP_SUNPRO < BOOST_VERSION_NUMBER(5, 20, 0)
-#define BOOST_UNORDERED_SUN_WORKAROUNDS1 1
-#else
-#define BOOST_UNORDERED_SUN_WORKAROUNDS1 0
-#endif
-#endif
-
-// BOOST_UNORDERED_EMPLACE_LIMIT = The maximum number of parameters in
-// emplace (not including things like hints). Don't set it to a lower value, as
-// that might break something.
-
-#if !defined BOOST_UNORDERED_EMPLACE_LIMIT
-#define BOOST_UNORDERED_EMPLACE_LIMIT 10
-#endif
-
-// BOOST_UNORDERED_TUPLE_ARGS
-//
-// Maximum number of std::tuple members to support, or 0 if std::tuple
-// isn't avaiable. More are supported when full C++11 is used.
-
-// Already defined, so do nothing
-#if defined(BOOST_UNORDERED_TUPLE_ARGS)
-
-// Assume if we have C++11 tuple it's properly variadic,
-// and just use a max number of 10 arguments.
-#elif !defined(BOOST_NO_CXX11_HDR_TUPLE)
-#define BOOST_UNORDERED_TUPLE_ARGS 10
-
-// Visual C++ has a decent enough tuple for piecewise construction,
-// so use that if available, using _VARIADIC_MAX for the maximum
-// number of parameters. Note that this comes after the check
-// for a full C++11 tuple.
-#elif defined(BOOST_MSVC)
-#if !BOOST_UNORDERED_HAVE_PIECEWISE_CONSTRUCT
-#define BOOST_UNORDERED_TUPLE_ARGS 0
-#elif defined(_VARIADIC_MAX)
-#define BOOST_UNORDERED_TUPLE_ARGS _VARIADIC_MAX
-#else
-#define BOOST_UNORDERED_TUPLE_ARGS 5
-#endif
-
-// Assume that we don't have std::tuple
-#else
-#define BOOST_UNORDERED_TUPLE_ARGS 0
-#endif
-
-#if BOOST_UNORDERED_TUPLE_ARGS
-#include <tuple>
-#endif
-
-// BOOST_UNORDERED_CXX11_CONSTRUCTION
-//
-// Use C++11 construction, requires variadic arguments, good construct support
-// in allocator_traits and piecewise construction of std::pair
-// Otherwise allocators aren't used for construction/destruction
-
-#if BOOST_UNORDERED_HAVE_PIECEWISE_CONSTRUCT &&                                \
-  !defined(BOOST_NO_CXX11_VARIADIC_TEMPLATES) && BOOST_UNORDERED_TUPLE_ARGS
-#if BOOST_COMP_SUNPRO && BOOST_LIB_STD_GNU
-// Sun C++ std::pair piecewise construction doesn't seem to be exception safe.
-// (At least for Sun C++ 12.5 using libstdc++).
-#define BOOST_UNORDERED_CXX11_CONSTRUCTION 0
-#elif BOOST_COMP_GNUC && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(4, 7, 0)
-// Piecewise construction in GCC 4.6 doesn't work for uncopyable types.
-#define BOOST_UNORDERED_CXX11_CONSTRUCTION 0
-#elif !defined(BOOST_NO_CXX11_ALLOCATOR)
-#define BOOST_UNORDERED_CXX11_CONSTRUCTION 1
-#endif
-#endif
-
-#if !defined(BOOST_UNORDERED_CXX11_CONSTRUCTION)
-#define BOOST_UNORDERED_CXX11_CONSTRUCTION 0
-#endif
-
 #if BOOST_UNORDERED_CXX11_CONSTRUCTION
 #include <boost/mp11/list.hpp>
 #include <boost/mp11/algorithm.hpp>
diff --git a/include/boost/unordered/detail/type_traits.hpp b/include/boost/unordered/detail/type_traits.hpp
index fd37a8e4..838611ce 100644
--- a/include/boost/unordered/detail/type_traits.hpp
+++ b/include/boost/unordered/detail/type_traits.hpp
@@ -20,6 +20,9 @@
 #include <boost/type_traits/enable_if.hpp>
 #include <boost/type_traits/is_integral.hpp>
 #include <boost/type_traits/remove_const.hpp>
+
+#include <iterator>
+#include <utility>
 #endif
 
 // BOOST_UNORDERED_TEMPLATE_DEDUCTION_GUIDES
@@ -101,6 +104,16 @@ namespace boost {
         !boost::is_integral<H>::value && !is_allocator_v<H>;
 
       template <class P> constexpr bool const is_pred_v = !is_allocator_v<P>;
+
+      template <typename T>
+      using iter_key_t =
+        typename std::iterator_traits<T>::value_type::first_type;
+      template <typename T>
+      using iter_val_t =
+        typename std::iterator_traits<T>::value_type::second_type;
+      template <typename T>
+      using iter_to_alloc_t =
+        typename std::pair<iter_key_t<T> const, iter_val_t<T> >;
 #endif
     } // namespace detail
   }   // namespace unordered
diff --git a/include/boost/unordered/unordered_flat_map.hpp b/include/boost/unordered/unordered_flat_map.hpp
index 7fb736f6..1012798f 100644
--- a/include/boost/unordered/unordered_flat_map.hpp
+++ b/include/boost/unordered/unordered_flat_map.hpp
@@ -10,7 +10,8 @@
 #pragma once
 #endif
 
-#include <boost/unordered/detail/foa.hpp>
+#include <boost/unordered/detail/foa/flat_map_types.hpp>
+#include <boost/unordered/detail/foa/table.hpp>
 #include <boost/unordered/detail/type_traits.hpp>
 #include <boost/unordered/unordered_flat_map_fwd.hpp>
 
@@ -32,67 +33,10 @@ namespace boost {
 #pragma warning(disable : 4714) /* marked as __forceinline not inlined */
 #endif
 
-    namespace detail {
-      template <class Key, class T> struct flat_map_types
-      {
-        using key_type = Key;
-        using raw_key_type = typename std::remove_const<Key>::type;
-        using raw_mapped_type = typename std::remove_const<T>::type;
-
-        using init_type = std::pair<raw_key_type, raw_mapped_type>;
-        using moved_type = std::pair<raw_key_type&&, raw_mapped_type&&>;
-        using value_type = std::pair<Key const, T>;
-
-        using element_type = value_type;
-
-        static value_type& value_from(element_type& x) { return x; }
-
-        template <class K, class V>
-        static raw_key_type const& extract(std::pair<K, V> const& kv)
-        {
-          return kv.first;
-        }
-
-        static moved_type move(init_type& x)
-        {
-          return {std::move(x.first), std::move(x.second)};
-        }
-
-        static moved_type move(element_type& x)
-        {
-          // TODO: we probably need to launder here
-          return {std::move(const_cast<raw_key_type&>(x.first)),
-            std::move(const_cast<raw_mapped_type&>(x.second))};
-        }
-
-        template <class A, class... Args>
-        static void construct(A& al, init_type* p, Args&&... args)
-        {
-          boost::allocator_construct(al, p, std::forward<Args>(args)...);
-        }
-
-        template <class A, class... Args>
-        static void construct(A& al, value_type* p, Args&&... args)
-        {
-          boost::allocator_construct(al, p, std::forward<Args>(args)...);
-        }
-
-        template <class A> static void destroy(A& al, init_type* p) noexcept
-        {
-          boost::allocator_destroy(al, p);
-        }
-
-        template <class A> static void destroy(A& al, value_type* p) noexcept
-        {
-          boost::allocator_destroy(al, p);
-        }
-      };
-    } // namespace detail
-
     template <class Key, class T, class Hash, class KeyEqual, class Allocator>
     class unordered_flat_map
     {
-      using map_types = detail::flat_map_types<Key, T>;
+      using map_types = detail::foa::flat_map_types<Key, T>;
 
       using table_type = detail::foa::table<map_types, Hash, KeyEqual,
         typename boost::allocator_rebind<Allocator,
@@ -100,6 +44,10 @@ namespace boost {
 
       table_type table_;
 
+      template <class K, class V, class H, class KE, class A>
+      bool friend operator==(unordered_flat_map<K, V, H, KE, A> const& lhs,
+        unordered_flat_map<K, V, H, KE, A> const& rhs);
+
       template <class K, class V, class H, class KE, class A, class Pred>
       typename unordered_flat_map<K, V, H, KE, A>::size_type friend erase_if(
         unordered_flat_map<K, V, H, KE, A>& set, Pred pred);
@@ -702,19 +650,7 @@ namespace boost {
       unordered_flat_map<Key, T, Hash, KeyEqual, Allocator> const& lhs,
       unordered_flat_map<Key, T, Hash, KeyEqual, Allocator> const& rhs)
     {
-      if (&lhs == &rhs) {
-        return true;
-      }
-
-      return (lhs.size() == rhs.size()) && ([&] {
-        for (auto const& kvp : lhs) {
-          auto pos = rhs.find(kvp.first);
-          if ((pos == rhs.end()) || (*pos != kvp)) {
-            return false;
-          }
-        }
-        return true;
-      })();
+      return lhs.table_ == rhs.table_;
     }
 
     template <class Key, class T, class Hash, class KeyEqual, class Allocator>
@@ -748,18 +684,6 @@ namespace boost {
 
 #if BOOST_UNORDERED_TEMPLATE_DEDUCTION_GUIDES
 
-    namespace detail {
-      template <typename T>
-      using iter_key_t =
-        typename std::iterator_traits<T>::value_type::first_type;
-      template <typename T>
-      using iter_val_t =
-        typename std::iterator_traits<T>::value_type::second_type;
-      template <typename T>
-      using iter_to_alloc_t =
-        typename std::pair<iter_key_t<T> const, iter_val_t<T> >;
-    } // namespace detail
-
     template <class InputIterator,
       class Hash =
         boost::hash<boost::unordered::detail::iter_key_t<InputIterator> >,
diff --git a/include/boost/unordered/unordered_flat_map_fwd.hpp b/include/boost/unordered/unordered_flat_map_fwd.hpp
index 2c34d0fb..275a4327 100644
--- a/include/boost/unordered/unordered_flat_map_fwd.hpp
+++ b/include/boost/unordered/unordered_flat_map_fwd.hpp
@@ -12,7 +12,6 @@
 #endif
 
 #include <boost/functional/hash_fwd.hpp>
-#include <boost/unordered/detail/fwd.hpp>
 #include <functional>
 #include <memory>
 
diff --git a/include/boost/unordered/unordered_flat_set.hpp b/include/boost/unordered/unordered_flat_set.hpp
index 2138a9f3..fc3b5f1b 100644
--- a/include/boost/unordered/unordered_flat_set.hpp
+++ b/include/boost/unordered/unordered_flat_set.hpp
@@ -10,7 +10,8 @@
 #pragma once
 #endif
 
-#include <boost/unordered/detail/foa.hpp>
+#include <boost/unordered/detail/foa/flat_set_types.hpp>
+#include <boost/unordered/detail/foa/table.hpp>
 #include <boost/unordered/detail/type_traits.hpp>
 #include <boost/unordered/unordered_flat_set_fwd.hpp>
 
@@ -30,38 +31,10 @@ namespace boost {
 #pragma warning(disable : 4714) /* marked as __forceinline not inlined */
 #endif
 
-    namespace detail {
-      template <class Key> struct flat_set_types
-      {
-        using key_type = Key;
-        using init_type = Key;
-        using value_type = Key;
-
-        static Key const& extract(value_type const& key) { return key; }
-
-        using element_type = value_type;
-
-        static Key& value_from(element_type& x) { return x; }
-
-        static element_type&& move(element_type& x) { return std::move(x); }
-
-        template <class A, class... Args>
-        static void construct(A& al, value_type* p, Args&&... args)
-        {
-          boost::allocator_construct(al, p, std::forward<Args>(args)...);
-        }
-
-        template <class A> static void destroy(A& al, value_type* p) noexcept
-        {
-          boost::allocator_destroy(al, p);
-        }
-      };
-    } // namespace detail
-
     template <class Key, class Hash, class KeyEqual, class Allocator>
     class unordered_flat_set
     {
-      using set_types = detail::flat_set_types<Key>;
+      using set_types = detail::foa::flat_set_types<Key>;
 
       using table_type = detail::foa::table<set_types, Hash, KeyEqual,
         typename boost::allocator_rebind<Allocator,
@@ -69,6 +42,10 @@ namespace boost {
 
       table_type table_;
 
+      template <class K, class H, class KE, class A>
+      bool friend operator==(unordered_flat_set<K, H, KE, A> const& lhs,
+        unordered_flat_set<K, H, KE, A> const& rhs);
+
       template <class K, class H, class KE, class A, class Pred>
       typename unordered_flat_set<K, H, KE, A>::size_type friend erase_if(
         unordered_flat_set<K, H, KE, A>& set, Pred pred);
@@ -499,19 +476,7 @@ namespace boost {
       unordered_flat_set<Key, Hash, KeyEqual, Allocator> const& lhs,
       unordered_flat_set<Key, Hash, KeyEqual, Allocator> const& rhs)
     {
-      if (&lhs == &rhs) {
-        return true;
-      }
-
-      return (lhs.size() == rhs.size()) && ([&] {
-        for (auto const& key : lhs) {
-          auto pos = rhs.find(key);
-          if ((pos == rhs.end()) || (key != *pos)) {
-            return false;
-          }
-        }
-        return true;
-      })();
+      return lhs.table_ == rhs.table_;
     }
 
     template <class Key, class Hash, class KeyEqual, class Allocator>
diff --git a/include/boost/unordered/unordered_flat_set_fwd.hpp b/include/boost/unordered/unordered_flat_set_fwd.hpp
index 51f534ef..9e670b22 100644
--- a/include/boost/unordered/unordered_flat_set_fwd.hpp
+++ b/include/boost/unordered/unordered_flat_set_fwd.hpp
@@ -12,7 +12,6 @@
 #endif
 
 #include <boost/functional/hash_fwd.hpp>
-#include <boost/unordered/detail/fwd.hpp>
 #include <functional>
 #include <memory>
 
diff --git a/include/boost/unordered/unordered_map.hpp b/include/boost/unordered/unordered_map.hpp
index 8a8bc062..35eb2f2e 100644
--- a/include/boost/unordered/unordered_map.hpp
+++ b/include/boost/unordered/unordered_map.hpp
@@ -1061,18 +1061,6 @@ namespace boost {
 
 #if BOOST_UNORDERED_TEMPLATE_DEDUCTION_GUIDES
 
-    namespace detail {
-      template <typename T>
-      using iter_key_t =
-        typename std::iterator_traits<T>::value_type::first_type;
-      template <typename T>
-      using iter_val_t =
-        typename std::iterator_traits<T>::value_type::second_type;
-      template <typename T>
-      using iter_to_alloc_t =
-        typename std::pair<iter_key_t<T> const, iter_val_t<T> >;
-    } // namespace detail
-
     template <class InputIterator,
       class Hash =
         boost::hash<boost::unordered::detail::iter_key_t<InputIterator> >,
diff --git a/include/boost/unordered/unordered_map_fwd.hpp b/include/boost/unordered/unordered_map_fwd.hpp
index d713cc49..46be934a 100644
--- a/include/boost/unordered/unordered_map_fwd.hpp
+++ b/include/boost/unordered/unordered_map_fwd.hpp
@@ -13,7 +13,6 @@
 #endif
 
 #include <boost/functional/hash_fwd.hpp>
-#include <boost/unordered/detail/fwd.hpp>
 #include <functional>
 #include <memory>
 
diff --git a/include/boost/unordered/unordered_node_map.hpp b/include/boost/unordered/unordered_node_map.hpp
index 450e7809..f0ce0974 100644
--- a/include/boost/unordered/unordered_node_map.hpp
+++ b/include/boost/unordered/unordered_node_map.hpp
@@ -10,9 +10,10 @@
 #pragma once
 #endif
 
-#include <boost/unordered/detail/foa.hpp>
 #include <boost/unordered/detail/foa/element_type.hpp>
 #include <boost/unordered/detail/foa/node_handle.hpp>
+#include <boost/unordered/detail/foa/node_map_types.hpp>
+#include <boost/unordered/detail/foa/table.hpp>
 #include <boost/unordered/detail/type_traits.hpp>
 #include <boost/unordered/unordered_node_map_fwd.hpp>
 
@@ -35,112 +36,6 @@ namespace boost {
 #endif
 
     namespace detail {
-      template <class Key, class T> struct node_map_types
-      {
-        using key_type = Key;
-        using mapped_type = T;
-        using raw_key_type = typename std::remove_const<Key>::type;
-        using raw_mapped_type = typename std::remove_const<T>::type;
-
-        using init_type = std::pair<raw_key_type, raw_mapped_type>;
-        using value_type = std::pair<Key const, T>;
-        using moved_type = std::pair<raw_key_type&&, raw_mapped_type&&>;
-
-        using element_type=foa::element_type<value_type>;
-
-        static value_type& value_from(element_type const& x) { return *(x.p); }
-
-        template <class K, class V>
-        static raw_key_type const& extract(std::pair<K, V> const& kv)
-        {
-          return kv.first;
-        }
-
-        static raw_key_type const& extract(element_type const& kv)
-        {
-          return kv.p->first;
-        }
-
-        static element_type&& move(element_type& x) { return std::move(x); }
-        static moved_type move(init_type& x)
-        {
-          return {std::move(x.first), std::move(x.second)};
-        }
-
-        static moved_type move(value_type& x)
-        {
-          return {std::move(const_cast<raw_key_type&>(x.first)),
-            std::move(const_cast<raw_mapped_type&>(x.second))};
-        }
-
-        template <class A>
-        static void construct(A&, element_type* p, element_type&& x) noexcept
-        {
-          p->p = x.p;
-          x.p = nullptr;
-        }
-
-        template <class A>
-        static void construct(A& al, element_type* p, element_type const& copy)
-        {
-          construct(al, p, *copy.p);
-        }
-
-        template <class A, class... Args>
-        static void construct(A& al, init_type* p, Args&&... args)
-        {
-          boost::allocator_construct(al, p, std::forward<Args>(args)...);
-        }
-
-        template <class A, class... Args>
-        static void construct(A& al, value_type* p, Args&&... args)
-        {
-          boost::allocator_construct(al, p, std::forward<Args>(args)...);
-        }
-
-        template <class A, class... Args>
-        static void construct(A& al, element_type* p, Args&&... args)
-        {
-          p->p = boost::to_address(boost::allocator_allocate(al, 1));
-          BOOST_TRY
-          {
-            boost::allocator_construct(al, p->p, std::forward<Args>(args)...);
-          }
-          BOOST_CATCH(...)
-          {
-            using pointer_type = typename boost::allocator_pointer<A>::type;
-            using pointer_traits = boost::pointer_traits<pointer_type>;
-
-            boost::allocator_deallocate(
-              al, pointer_traits::pointer_to(*(p->p)), 1);
-            BOOST_RETHROW
-          }
-          BOOST_CATCH_END
-        }
-
-        template <class A> static void destroy(A& al, value_type* p) noexcept
-        {
-          boost::allocator_destroy(al, p);
-        }
-
-        template <class A> static void destroy(A& al, init_type* p) noexcept
-        {
-          boost::allocator_destroy(al, p);
-        }
-
-        template <class A> static void destroy(A& al, element_type* p) noexcept
-        {
-          if (p->p) {
-            using pointer_type = typename boost::allocator_pointer<A>::type;
-            using pointer_traits = boost::pointer_traits<pointer_type>;
-
-            destroy(al, p->p);
-            boost::allocator_deallocate(
-              al, pointer_traits::pointer_to(*(p->p)), 1);
-          }
-        }
-      };
-
       template <class TypePolicy, class Allocator>
       struct node_map_handle
           : public detail::foa::node_handle_base<TypePolicy, Allocator>
@@ -179,7 +74,7 @@ namespace boost {
     template <class Key, class T, class Hash, class KeyEqual, class Allocator>
     class unordered_node_map
     {
-      using map_types = detail::node_map_types<Key, T>;
+      using map_types = detail::foa::node_map_types<Key, T>;
 
       using table_type = detail::foa::table<map_types, Hash, KeyEqual,
         typename boost::allocator_rebind<Allocator,
@@ -187,6 +82,10 @@ namespace boost {
 
       table_type table_;
 
+      template <class K, class V, class H, class KE, class A>
+      bool friend operator==(unordered_node_map<K, V, H, KE, A> const& lhs,
+        unordered_node_map<K, V, H, KE, A> const& rhs);
+
       template <class K, class V, class H, class KE, class A, class Pred>
       typename unordered_node_map<K, V, H, KE, A>::size_type friend erase_if(
         unordered_node_map<K, V, H, KE, A>& set, Pred pred);
@@ -854,19 +753,7 @@ namespace boost {
       unordered_node_map<Key, T, Hash, KeyEqual, Allocator> const& lhs,
       unordered_node_map<Key, T, Hash, KeyEqual, Allocator> const& rhs)
     {
-      if (&lhs == &rhs) {
-        return true;
-      }
-
-      return (lhs.size() == rhs.size()) && ([&] {
-        for (auto const& kvp : lhs) {
-          auto pos = rhs.find(kvp.first);
-          if ((pos == rhs.end()) || (*pos != kvp)) {
-            return false;
-          }
-        }
-        return true;
-      })();
+      return lhs.table_ == rhs.table_;
     }
 
     template <class Key, class T, class Hash, class KeyEqual, class Allocator>
@@ -900,18 +787,6 @@ namespace boost {
 
 #if BOOST_UNORDERED_TEMPLATE_DEDUCTION_GUIDES
 
-    namespace detail {
-      template <typename T>
-      using iter_key_t =
-        typename std::iterator_traits<T>::value_type::first_type;
-      template <typename T>
-      using iter_val_t =
-        typename std::iterator_traits<T>::value_type::second_type;
-      template <typename T>
-      using iter_to_alloc_t =
-        typename std::pair<iter_key_t<T> const, iter_val_t<T> >;
-    } // namespace detail
-
     template <class InputIterator,
       class Hash =
         boost::hash<boost::unordered::detail::iter_key_t<InputIterator> >,
diff --git a/include/boost/unordered/unordered_node_map_fwd.hpp b/include/boost/unordered/unordered_node_map_fwd.hpp
index 0e08a905..35da84ba 100644
--- a/include/boost/unordered/unordered_node_map_fwd.hpp
+++ b/include/boost/unordered/unordered_node_map_fwd.hpp
@@ -12,7 +12,6 @@
 #endif
 
 #include <boost/functional/hash_fwd.hpp>
-#include <boost/unordered/detail/fwd.hpp>
 #include <functional>
 #include <memory>
 
diff --git a/include/boost/unordered/unordered_node_set.hpp b/include/boost/unordered/unordered_node_set.hpp
index 30a63502..68be4a6e 100644
--- a/include/boost/unordered/unordered_node_set.hpp
+++ b/include/boost/unordered/unordered_node_set.hpp
@@ -10,9 +10,10 @@
 #pragma once
 #endif
 
-#include <boost/unordered/detail/foa.hpp>
 #include <boost/unordered/detail/foa/element_type.hpp>
 #include <boost/unordered/detail/foa/node_handle.hpp>
+#include <boost/unordered/detail/foa/node_set_types.hpp>
+#include <boost/unordered/detail/foa/table.hpp>
 #include <boost/unordered/detail/type_traits.hpp>
 #include <boost/unordered/unordered_node_set_fwd.hpp>
 
@@ -34,77 +35,6 @@ namespace boost {
 #endif
 
     namespace detail {
-      template <class Key> struct node_set_types
-      {
-        using key_type = Key;
-        using init_type = Key;
-        using value_type = Key;
-
-        static Key const& extract(value_type const& key) { return key; }
-
-        using element_type=foa::element_type<value_type>;
-
-        static value_type& value_from(element_type const& x) { return *x.p; }
-        static Key const& extract(element_type const& k) { return *k.p; }
-        static element_type&& move(element_type& x) { return std::move(x); }
-        static value_type&& move(value_type& x) { return std::move(x); }
-
-        template <class A>
-        static void construct(A& al, element_type* p, element_type const& copy)
-        {
-          construct(al, p, *copy.p);
-        }
-
-        template <typename Allocator>
-        static void construct(
-          Allocator&, element_type* p, element_type&& x) noexcept
-        {
-          p->p = x.p;
-          x.p = nullptr;
-        }
-
-        template <class A, class... Args>
-        static void construct(A& al, value_type* p, Args&&... args)
-        {
-          boost::allocator_construct(al, p, std::forward<Args>(args)...);
-        }
-
-        template <class A, class... Args>
-        static void construct(A& al, element_type* p, Args&&... args)
-        {
-          p->p = boost::to_address(boost::allocator_allocate(al, 1));
-          BOOST_TRY
-          {
-            boost::allocator_construct(al, p->p, std::forward<Args>(args)...);
-          }
-          BOOST_CATCH(...)
-          {
-            boost::allocator_deallocate(al,
-              boost::pointer_traits<
-                typename boost::allocator_pointer<A>::type>::pointer_to(*p->p),
-              1);
-            BOOST_RETHROW
-          }
-          BOOST_CATCH_END
-        }
-
-        template <class A> static void destroy(A& al, value_type* p) noexcept
-        {
-          boost::allocator_destroy(al, p);
-        }
-
-        template <class A> static void destroy(A& al, element_type* p) noexcept
-        {
-          if (p->p) {
-            destroy(al, p->p);
-            boost::allocator_deallocate(al,
-              boost::pointer_traits<typename boost::allocator_pointer<
-                A>::type>::pointer_to(*(p->p)),
-              1);
-          }
-        }
-      };
-
       template <class TypePolicy, class Allocator>
       struct node_set_handle
           : public detail::foa::node_handle_base<TypePolicy, Allocator>
@@ -135,7 +65,7 @@ namespace boost {
     template <class Key, class Hash, class KeyEqual, class Allocator>
     class unordered_node_set
     {
-      using set_types = detail::node_set_types<Key>;
+      using set_types = detail::foa::node_set_types<Key>;
 
       using table_type = detail::foa::table<set_types, Hash, KeyEqual,
         typename boost::allocator_rebind<Allocator,
@@ -143,6 +73,10 @@ namespace boost {
 
       table_type table_;
 
+      template <class K, class H, class KE, class A>
+      bool friend operator==(unordered_node_set<K, H, KE, A> const& lhs,
+        unordered_node_set<K, H, KE, A> const& rhs);
+
       template <class K, class H, class KE, class A, class Pred>
       typename unordered_node_set<K, H, KE, A>::size_type friend erase_if(
         unordered_node_set<K, H, KE, A>& set, Pred pred);
@@ -638,19 +572,7 @@ namespace boost {
       unordered_node_set<Key, Hash, KeyEqual, Allocator> const& lhs,
       unordered_node_set<Key, Hash, KeyEqual, Allocator> const& rhs)
     {
-      if (&lhs == &rhs) {
-        return true;
-      }
-
-      return (lhs.size() == rhs.size()) && ([&] {
-        for (auto const& key : lhs) {
-          auto pos = rhs.find(key);
-          if ((pos == rhs.end()) || (key != *pos)) {
-            return false;
-          }
-        }
-        return true;
-      })();
+      return lhs.table_ == rhs.table_;
     }
 
     template <class Key, class Hash, class KeyEqual, class Allocator>
diff --git a/include/boost/unordered/unordered_node_set_fwd.hpp b/include/boost/unordered/unordered_node_set_fwd.hpp
index bdd7fd0b..9f87badc 100644
--- a/include/boost/unordered/unordered_node_set_fwd.hpp
+++ b/include/boost/unordered/unordered_node_set_fwd.hpp
@@ -12,7 +12,6 @@
 #endif
 
 #include <boost/functional/hash_fwd.hpp>
-#include <boost/unordered/detail/fwd.hpp>
 #include <functional>
 #include <memory>
 
diff --git a/include/boost/unordered/unordered_set_fwd.hpp b/include/boost/unordered/unordered_set_fwd.hpp
index 3cd34bc9..3722d25a 100644
--- a/include/boost/unordered/unordered_set_fwd.hpp
+++ b/include/boost/unordered/unordered_set_fwd.hpp
@@ -13,7 +13,6 @@
 #endif
 
 #include <boost/functional/hash_fwd.hpp>
-#include <boost/unordered/detail/fwd.hpp>
 #include <functional>
 #include <memory>
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 51758339..f9dd0e45 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -6,12 +6,158 @@ include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST)
 
 if(HAVE_BOOST_TEST)
 
-boost_test_jamfile(
-  FILE Jamfile.v2 
-  LINK_LIBRARIES 
-    Boost::unordered 
-    Boost::core
-    Boost::concept_check
-)
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+find_package(Threads REQUIRED)
+
+set(BOOST_TEST_LINK_LIBRARIES Boost::unordered Boost::core Boost::concept_check)
+
+function(fca_tests)
+  boost_test(PREFIX boost_unordered ${ARGN})
+endfunction()
+
+function(foa_tests)
+  boost_test(PREFIX boost_unordered_foa COMPILE_DEFINITIONS BOOST_UNORDERED_FOA_TESTS ${ARGN})
+endfunction()
+
+function(cfoa_tests)
+  boost_test(PREFIX boost_unordered_cfoa LINK_LIBRARIES Threads::Threads ${ARGN})
+endfunction()
+
+# FCA tests
+
+fca_tests(SOURCES unordered/prime_fmod_tests.cpp)
+fca_tests(SOURCES unordered/fwd_set_test.cpp)
+fca_tests(SOURCES unordered/fwd_map_test.cpp)
+fca_tests(SOURCES unordered/allocator_traits.cpp)
+fca_tests(SOURCES unordered/minimal_allocator.cpp)
+fca_tests(SOURCES unordered/compile_set.cpp)
+fca_tests(SOURCES unordered/compile_map.cpp)
+fca_tests(SOURCES unordered/noexcept_tests.cpp)
+fca_tests(SOURCES unordered/link_test_1.cpp unordered/link_test_2.cpp)
+fca_tests(SOURCES unordered/incomplete_test.cpp)
+fca_tests(SOURCES unordered/simple_tests.cpp)
+fca_tests(SOURCES unordered/equivalent_keys_tests.cpp)
+fca_tests(SOURCES unordered/constructor_tests.cpp)
+fca_tests(SOURCES unordered/copy_tests.cpp)
+fca_tests(SOURCES unordered/move_tests.cpp)
+fca_tests(SOURCES unordered/post_move_tests.cpp)
+fca_tests(SOURCES unordered/assign_tests.cpp)
+fca_tests(SOURCES unordered/insert_tests.cpp)
+fca_tests(SOURCES unordered/insert_stable_tests.cpp)
+fca_tests(SOURCES unordered/insert_hint_tests.cpp)
+fca_tests(SOURCES unordered/emplace_tests.cpp)
+fca_tests(SOURCES unordered/unnecessary_copy_tests.cpp)
+fca_tests(SOURCES unordered/erase_tests.cpp COMPILE_DEFINITIONS BOOST_UNORDERED_SUPPRESS_DEPRECATED)
+fca_tests(SOURCES unordered/erase_equiv_tests.cpp)
+fca_tests(SOURCES unordered/extract_tests.cpp)
+fca_tests(SOURCES unordered/node_handle_tests.cpp)
+fca_tests(SOURCES unordered/merge_tests.cpp)
+fca_tests(SOURCES unordered/find_tests.cpp)
+fca_tests(SOURCES unordered/at_tests.cpp)
+fca_tests(SOURCES unordered/bucket_tests.cpp)
+fca_tests(SOURCES unordered/load_factor_tests.cpp)
+fca_tests(SOURCES unordered/rehash_tests.cpp)
+fca_tests(SOURCES unordered/equality_tests.cpp)
+fca_tests(SOURCES unordered/swap_tests.cpp)
+fca_tests(SOURCES unordered/deduction_tests.cpp)
+fca_tests(SOURCES unordered/scoped_allocator.cpp)
+fca_tests(SOURCES unordered/transparent_tests.cpp)
+fca_tests(SOURCES unordered/reserve_tests.cpp)
+fca_tests(SOURCES unordered/contains_tests.cpp)
+fca_tests(SOURCES unordered/erase_if.cpp)
+fca_tests(SOURCES unordered/scary_tests.cpp)
+fca_tests(SOURCES exception/constructor_exception_tests.cpp)
+fca_tests(SOURCES exception/copy_exception_tests.cpp)
+fca_tests(SOURCES exception/assign_exception_tests.cpp)
+fca_tests(SOURCES exception/move_assign_exception_tests.cpp)
+fca_tests(SOURCES exception/insert_exception_tests.cpp)
+fca_tests(SOURCES exception/erase_exception_tests.cpp)
+fca_tests(SOURCES exception/rehash_exception_tests.cpp)
+fca_tests(SOURCES exception/swap_exception_tests.cpp COMPILE_DEFINITIONS BOOST_UNORDERED_SWAP_METHOD=2)
+fca_tests(SOURCES exception/merge_exception_tests.cpp)
+fca_tests(SOURCES exception/less_tests.cpp)
+fca_tests(SOURCES unordered/narrow_cast_tests.cpp)
+fca_tests(SOURCES unordered/compile_set.cpp COMPILE_DEFINITIONS BOOST_UNORDERED_USE_MOVE NAME bmove_compile_set)
+fca_tests(SOURCES unordered/compile_map.cpp COMPILE_DEFINITIONS BOOST_UNORDERED_USE_MOVE NAME bmove_compile_map)
+fca_tests(SOURCES unordered/copy_tests.cpp COMPILE_DEFINITIONS BOOST_UNORDERED_USE_MOVE NAME bmove_copy)
+fca_tests(SOURCES unordered/move_tests.cpp COMPILE_DEFINITIONS BOOST_UNORDERED_USE_MOVE NAME bmove_move)
+fca_tests(SOURCES unordered/assign_tests.cpp COMPILE_DEFINITIONS BOOST_UNORDERED_USE_MOVE NAME bmove_assign)
+fca_tests(SOURCES quick.cpp)
+
+fca_tests(TYPE compile-fail NAME insert_node_type_fail_map COMPILE_DEFINITIONS UNORDERED_TEST_MAP SOURCES unordered/insert_node_type_fail.cpp)
+fca_tests(TYPE compile-fail NAME insert_node_type_fail_multimap COMPILE_DEFINITIONS UNORDERED_TEST_MULTIMAP SOURCES unordered/insert_node_type_fail.cpp)
+fca_tests(TYPE compile-fail NAME insert_node_type_fail_set COMPILE_DEFINITIONS UNORDERED_TEST_SET SOURCES unordered/insert_node_type_fail.cpp)
+fca_tests(TYPE compile-fail NAME insert_node_type_fail_multiset COMPILE_DEFINITIONS UNORDERED_TEST_MULTISET SOURCES unordered/insert_node_type_fail.cpp)
+
+# FOA tests
+
+foa_tests(SOURCES unordered/fwd_set_test.cpp)
+foa_tests(SOURCES unordered/fwd_map_test.cpp)
+foa_tests(SOURCES unordered/compile_set.cpp)
+foa_tests(SOURCES unordered/compile_map.cpp)
+foa_tests(SOURCES unordered/noexcept_tests.cpp)
+foa_tests(SOURCES unordered/incomplete_test.cpp)
+foa_tests(SOURCES unordered/simple_tests.cpp)
+foa_tests(SOURCES unordered/equivalent_keys_tests.cpp)
+foa_tests(SOURCES unordered/constructor_tests.cpp)
+foa_tests(SOURCES unordered/copy_tests.cpp)
+foa_tests(SOURCES unordered/move_tests.cpp)
+foa_tests(SOURCES unordered/post_move_tests.cpp)
+foa_tests(SOURCES unordered/assign_tests.cpp)
+foa_tests(SOURCES unordered/insert_tests.cpp)
+foa_tests(SOURCES unordered/insert_hint_tests.cpp)
+foa_tests(SOURCES unordered/emplace_tests.cpp)
+foa_tests(SOURCES unordered/erase_tests.cpp)
+foa_tests(SOURCES unordered/merge_tests.cpp)
+foa_tests(SOURCES unordered/find_tests.cpp)
+foa_tests(SOURCES unordered/at_tests.cpp)
+foa_tests(SOURCES unordered/load_factor_tests.cpp)
+foa_tests(SOURCES unordered/rehash_tests.cpp)
+foa_tests(SOURCES unordered/equality_tests.cpp)
+foa_tests(SOURCES unordered/swap_tests.cpp)
+foa_tests(SOURCES unordered/transparent_tests.cpp)
+foa_tests(SOURCES unordered/reserve_tests.cpp)
+foa_tests(SOURCES unordered/contains_tests.cpp)
+foa_tests(SOURCES unordered/erase_if.cpp)
+foa_tests(SOURCES unordered/scary_tests.cpp)
+foa_tests(SOURCES unordered/init_type_insert_tests.cpp)
+foa_tests(SOURCES unordered/max_load_tests.cpp)
+foa_tests(SOURCES unordered/extract_tests.cpp)
+foa_tests(SOURCES unordered/node_handle_tests.cpp)
+foa_tests(SOURCES unordered/uses_allocator.cpp)
+foa_tests(SOURCES unordered/link_test_1.cpp unordered/link_test_2.cpp )
+foa_tests(SOURCES unordered/scoped_allocator.cpp)
+foa_tests(SOURCES unordered/hash_is_avalanching_test.cpp)
+foa_tests(SOURCES exception/constructor_exception_tests.cpp)
+foa_tests(SOURCES exception/copy_exception_tests.cpp)
+foa_tests(SOURCES exception/assign_exception_tests.cpp)
+foa_tests(SOURCES exception/move_assign_exception_tests.cpp)
+foa_tests(SOURCES exception/insert_exception_tests.cpp)
+foa_tests(SOURCES exception/erase_exception_tests.cpp)
+foa_tests(SOURCES exception/rehash_exception_tests.cpp)
+foa_tests(SOURCES exception/swap_exception_tests.cpp)
+foa_tests(SOURCES exception/merge_exception_tests.cpp)
+
+# CFOA tests
+
+cfoa_tests(SOURCES cfoa/latch_tests.cpp)
+cfoa_tests(SOURCES cfoa/insert_tests.cpp)
+cfoa_tests(SOURCES cfoa/erase_tests.cpp)
+cfoa_tests(SOURCES cfoa/try_emplace_tests.cpp)
+cfoa_tests(SOURCES cfoa/emplace_tests.cpp)
+cfoa_tests(SOURCES cfoa/visit_tests.cpp)
+cfoa_tests(SOURCES cfoa/constructor_tests.cpp)
+cfoa_tests(SOURCES cfoa/assign_tests.cpp)
+cfoa_tests(SOURCES cfoa/clear_tests.cpp)
+cfoa_tests(SOURCES cfoa/swap_tests.cpp)
+cfoa_tests(SOURCES cfoa/merge_tests.cpp)
+cfoa_tests(SOURCES cfoa/rehash_tests.cpp)
+cfoa_tests(SOURCES cfoa/equality_tests.cpp)
+cfoa_tests(SOURCES cfoa/fwd_tests.cpp)
+cfoa_tests(SOURCES cfoa/exception_insert_tests.cpp)
+cfoa_tests(SOURCES cfoa/exception_erase_tests.cpp)
+cfoa_tests(SOURCES cfoa/exception_constructor_tests.cpp)
+cfoa_tests(SOURCES cfoa/exception_assign_tests.cpp)
+cfoa_tests(SOURCES cfoa/exception_merge_tests.cpp)
 
 endif()
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
index 31b17efd..4e256e2a 100644
--- a/test/Jamfile.v2
+++ b/test/Jamfile.v2
@@ -104,50 +104,51 @@ import ../../config/checks/config : requires ;
 
 CPP11 = [ requires cxx11_constexpr cxx11_noexcept cxx11_decltype cxx11_alignas ] ;
 
-rule build_foa ( name )
+local FOA_TESTS =
+  fwd_set_test
+  fwd_map_test
+  compile_set
+  compile_map
+  noexcept_tests
+  incomplete_test
+  simple_tests
+  equivalent_keys_tests
+  constructor_tests
+  copy_tests
+  move_tests
+  post_move_tests
+  assign_tests
+  insert_tests
+  insert_hint_tests
+  emplace_tests
+  erase_tests
+  merge_tests
+  find_tests
+  at_tests
+  load_factor_tests
+  rehash_tests
+  equality_tests
+  swap_tests
+  transparent_tests
+  reserve_tests
+  contains_tests
+  erase_if
+  scary_tests
+  init_type_insert_tests
+  max_load_tests
+  extract_tests
+  node_handle_tests
+  uses_allocator
+;
+
+for local test in $(FOA_TESTS)
 {
-  run unordered/$(name).cpp : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_$(name) ;
+  run unordered/$(test).cpp : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_$(test) ;
 }
 
-build_foa fwd_set_test ;
-build_foa fwd_map_test ;
-build_foa compile_set ;
-build_foa compile_map ;
-build_foa noexcept_tests ;
 run unordered/link_test_1.cpp unordered/link_test_2.cpp : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_link_test ;
-build_foa incomplete_test ;
-build_foa simple_tests ;
-build_foa equivalent_keys_tests ;
-build_foa constructor_tests ;
-build_foa copy_tests ;
-build_foa move_tests ;
-build_foa post_move_tests ;
-build_foa assign_tests ;
-build_foa insert_tests ;
-build_foa insert_hint_tests ;
-build_foa emplace_tests ;
-build_foa erase_tests ;
-build_foa merge_tests ;
-build_foa find_tests ;
-build_foa at_tests ;
-build_foa load_factor_tests ;
-build_foa rehash_tests ;
-build_foa equality_tests ;
-build_foa swap_tests ;
 run unordered/scoped_allocator.cpp : : : $(CPP11) <toolset>msvc-14.0:<build>no <define>BOOST_UNORDERED_FOA_TESTS : foa_scoped_allocator ;
-build_foa transparent_tests ;
-build_foa reserve_tests ;
-build_foa contains_tests ;
-build_foa erase_if ;
-build_foa scary_tests ;
-build_foa init_type_insert_tests ;
-build_foa max_load_tests ;
-build_foa extract_tests ;
-build_foa node_handle_tests ;
-build_foa uses_allocator ;
-
 run unordered/hash_is_avalanching_test.cpp ;
-
 run exception/constructor_exception_tests.cpp : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_constructor_exception_tests ;
 run exception/copy_exception_tests.cpp        : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_copy_exception_tests ;
 run exception/assign_exception_tests.cpp      : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_assign_exception_tests ;
@@ -157,3 +158,51 @@ run exception/erase_exception_tests.cpp       : : : $(CPP11) <define>BOOST_UNORD
 run exception/rehash_exception_tests.cpp      : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_rehash_exception_tests ;
 run exception/swap_exception_tests.cpp        : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_swap_exception_tests ;
 run exception/merge_exception_tests.cpp       : : : $(CPP11) <define>BOOST_UNORDERED_FOA_TESTS : foa_merge_exception_tests ;
+
+alias foa_tests :
+  foa_$(FOA_TESTS)
+  foa_link_test
+  foa_scoped_allocator
+  hash_is_avalanching_test
+  foa_constructor_exception_tests
+  foa_copy_exception_tests
+  foa_assign_exception_tests
+  foa_move_assign_exception_tests
+  foa_insert_exception_tests
+  foa_erase_exception_tests
+  foa_rehash_exception_tests
+  foa_swap_exception_tests
+  foa_merge_exception_tests
+;
+
+local CFOA_TESTS =
+  latch_tests
+  insert_tests
+  erase_tests
+  try_emplace_tests
+  emplace_tests
+  visit_tests
+  constructor_tests
+  assign_tests
+  clear_tests
+  swap_tests
+  merge_tests
+  rehash_tests
+  equality_tests
+  fwd_tests
+  exception_insert_tests
+  exception_erase_tests
+  exception_constructor_tests
+  exception_assign_tests
+  exception_merge_tests
+;
+
+for local test in $(CFOA_TESTS)
+{
+  run cfoa/$(test).cpp
+  : requirements $(CPP11) <threading>multi
+  : target-name cfoa_$(test)
+  ;
+}
+
+alias cfoa_tests : cfoa_$(CFOA_TESTS) ;
diff --git a/test/cfoa/assign_tests.cpp b/test/cfoa/assign_tests.cpp
new file mode 100644
index 00000000..aa17e0ac
--- /dev/null
+++ b/test/cfoa/assign_tests.cpp
@@ -0,0 +1,865 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#if defined(__clang__) && defined(__has_warning)
+
+#if __has_warning("-Wself-assign-overloaded")
+#pragma clang diagnostic ignored "-Wself-assign-overloaded"
+#endif
+
+#if __has_warning("-Wself-move")
+#pragma clang diagnostic ignored "-Wself-move"
+#endif
+
+#endif /* defined(__clang__) && defined(__has_warning) */
+
+#if defined(BOOST_GCC) && BOOST_GCC >= 130000
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wself-move"
+#endif
+
+test::seed_t initialize_seed{2762556623};
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+using map_value_type = typename map_type::value_type;
+
+template <class T> struct pocca_allocator
+{
+  using propagate_on_container_copy_assignment = std::true_type;
+
+  int x_ = -1;
+
+  using value_type = T;
+
+  pocca_allocator() = default;
+  pocca_allocator(pocca_allocator const&) = default;
+  pocca_allocator(pocca_allocator&&) = default;
+
+  pocca_allocator(int const x) : x_{x} {}
+
+  pocca_allocator& operator=(pocca_allocator const& rhs)
+  {
+    if (this != &rhs) {
+      x_ = rhs.x_;
+    }
+    return *this;
+  }
+
+  template <class U> pocca_allocator(pocca_allocator<U> const& rhs) : x_{rhs.x_}
+  {
+  }
+
+  T* allocate(std::size_t n)
+  {
+    return static_cast<T*>(::operator new(n * sizeof(T)));
+  }
+
+  void deallocate(T* p, std::size_t) { ::operator delete(p); }
+
+  bool operator==(pocca_allocator const& rhs) const { return x_ == rhs.x_; }
+  bool operator!=(pocca_allocator const& rhs) const { return x_ != rhs.x_; }
+};
+
+template <class T> struct pocma_allocator
+{
+  using propagate_on_container_move_assignment = std::true_type;
+
+  int x_ = -1;
+
+  using value_type = T;
+
+  pocma_allocator() = default;
+  pocma_allocator(pocma_allocator const&) = default;
+  pocma_allocator(pocma_allocator&&) = default;
+
+  pocma_allocator(int const x) : x_{x} {}
+
+  pocma_allocator& operator=(pocma_allocator const& rhs)
+  {
+    if (this != &rhs) {
+      x_ = rhs.x_;
+    }
+    return *this;
+  }
+
+  template <class U> pocma_allocator(pocma_allocator<U> const& rhs) : x_{rhs.x_}
+  {
+  }
+
+  T* allocate(std::size_t n)
+  {
+    return static_cast<T*>(::operator new(n * sizeof(T)));
+  }
+
+  void deallocate(T* p, std::size_t) { ::operator delete(p); }
+
+  bool operator==(pocma_allocator const& rhs) const { return x_ == rhs.x_; }
+  bool operator!=(pocma_allocator const& rhs) const { return x_ != rhs.x_; }
+};
+
+namespace {
+  template <class G> void copy_assign(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    // lhs empty, rhs empty
+    {
+      raii::reset_counts();
+
+      map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+
+      thread_runner(values, [&x](boost::span<map_value_type> s) {
+        (void)s;
+
+        map_type y;
+
+        BOOST_TEST(x.empty());
+        BOOST_TEST(y.empty());
+
+        y = x;
+
+        BOOST_TEST_EQ(x.hash_function(), y.hash_function());
+        BOOST_TEST_EQ(x.key_eq(), y.key_eq());
+        BOOST_TEST(x.get_allocator() != y.get_allocator());
+      });
+
+      BOOST_TEST_EQ(raii::destructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+    }
+
+    // lhs non-empty, rhs empty
+    {
+      raii::reset_counts();
+
+      map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+
+      auto const old_size = reference_map.size();
+
+      thread_runner(values, [&x, &values](boost::span<map_value_type> s) {
+        (void)s;
+
+        map_type y(values.size());
+        for (auto const& v : values) {
+          y.insert(v);
+        }
+
+        BOOST_TEST(x.empty());
+        BOOST_TEST(!y.empty());
+
+        y = x;
+
+        BOOST_TEST_EQ(x.hash_function(), y.hash_function());
+        BOOST_TEST_EQ(x.key_eq(), y.key_eq());
+        BOOST_TEST(x.get_allocator() != y.get_allocator());
+
+        BOOST_TEST(y.empty());
+      });
+
+      BOOST_TEST_EQ(raii::destructor, num_threads * (2 * old_size));
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(
+        raii::copy_constructor, num_threads * 2 * reference_map.size());
+    }
+    check_raii_counts();
+
+    // lhs empty, rhs non-empty
+    {
+      raii::reset_counts();
+
+      map_type x(values.size(), hasher(1), key_equal(2), allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_cc = +raii::copy_constructor;
+
+      thread_runner(
+        values, [&x, &reference_map](boost::span<map_value_type> s) {
+          (void)s;
+
+          map_type y;
+
+          BOOST_TEST(!x.empty());
+          BOOST_TEST(y.empty());
+
+          y = x;
+
+          BOOST_TEST_EQ(x.hash_function(), y.hash_function());
+          BOOST_TEST_EQ(x.key_eq(), y.key_eq());
+          BOOST_TEST(x.get_allocator() != y.get_allocator());
+
+          test_matches_reference(y, reference_map);
+        });
+
+      BOOST_TEST_EQ(raii::destructor, num_threads * 2 * x.size());
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(
+        raii::copy_constructor, old_cc + (num_threads * 2 * x.size()));
+    }
+    check_raii_counts();
+
+    // lhs non-empty, rhs non-empty
+    {
+      raii::reset_counts();
+
+      map_type x(values.size(), hasher(1), key_equal(2), allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_size = x.size();
+      auto const old_cc = +raii::copy_constructor;
+
+      thread_runner(values, [&x, &values](boost::span<map_value_type> s) {
+        (void)s;
+
+        map_type y(values.size());
+        for (auto const& v : values) {
+          y.insert(v);
+        }
+
+        BOOST_TEST(!x.empty());
+        BOOST_TEST(!y.empty());
+
+        y = x;
+
+        BOOST_TEST_EQ(x.hash_function(), y.hash_function());
+        BOOST_TEST_EQ(x.key_eq(), y.key_eq());
+        BOOST_TEST(x.get_allocator() != y.get_allocator());
+      });
+
+      BOOST_TEST_EQ(raii::destructor, 2 * num_threads * 2 * old_size);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(
+        raii::copy_constructor, old_cc + (2 * num_threads * 2 * x.size()));
+    }
+    check_raii_counts();
+
+    // self-assign
+    {
+      raii::reset_counts();
+
+      map_type x(values.size(), hasher(1), key_equal(2), allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_cc = +raii::copy_constructor;
+
+      thread_runner(
+        values, [&x, &reference_map](boost::span<map_value_type> s) {
+          (void)s;
+
+          BOOST_TEST(!x.empty());
+
+          x = x;
+
+          BOOST_TEST_EQ(x.hash_function(), hasher(1));
+          BOOST_TEST_EQ(x.key_eq(), key_equal(2));
+          BOOST_TEST(x.get_allocator() == allocator_type(3));
+
+          test_matches_reference(x, reference_map);
+        });
+
+      BOOST_TEST_EQ(raii::destructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+    }
+    check_raii_counts();
+
+    // propagation
+    {
+      using pocca_allocator_type =
+        pocca_allocator<std::pair<const raii, raii> >;
+
+      using pocca_map_type = boost::unordered::concurrent_flat_map<raii, raii,
+        hasher, key_equal, pocca_allocator_type>;
+
+      raii::reset_counts();
+
+      pocca_map_type x(
+        values.size(), hasher(1), key_equal(2), pocca_allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_size = x.size();
+      auto const old_cc = +raii::copy_constructor;
+
+      thread_runner(values, [&x, &values](boost::span<map_value_type> s) {
+        (void)s;
+
+        pocca_map_type y(values.size());
+        for (auto const& v : values) {
+          y.insert(v);
+        }
+
+        BOOST_TEST(!x.empty());
+        BOOST_TEST(!y.empty());
+
+        BOOST_TEST(x.get_allocator() != y.get_allocator());
+
+        y = x;
+
+        BOOST_TEST_EQ(x.hash_function(), y.hash_function());
+        BOOST_TEST_EQ(x.key_eq(), y.key_eq());
+        BOOST_TEST(x.get_allocator() == y.get_allocator());
+      });
+
+      BOOST_TEST_EQ(raii::destructor, 2 * num_threads * 2 * old_size);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(
+        raii::copy_constructor, old_cc + (2 * num_threads * 2 * x.size()));
+    }
+    check_raii_counts();
+  }
+
+  template <class G> void move_assign(G gen, test::random_generator rg)
+  {
+    using pocma_allocator_type = pocma_allocator<std::pair<const raii, raii> >;
+
+    using pocma_map_type = boost::unordered::concurrent_flat_map<raii, raii,
+      hasher, key_equal, pocma_allocator_type>;
+
+    BOOST_STATIC_ASSERT(
+      std::is_nothrow_move_assignable<boost::unordered::concurrent_flat_map<int,
+        int, std::hash<int>, std::equal_to<int>,
+        std::allocator<std::pair<int const, int> > > >::value);
+
+    BOOST_STATIC_ASSERT(
+      std::is_nothrow_move_assignable<boost::unordered::concurrent_flat_map<int,
+        int, std::hash<int>, std::equal_to<int>,
+        pocma_allocator<std::pair<int const, int> > > >::value);
+
+    BOOST_STATIC_ASSERT(
+      !std::is_nothrow_move_assignable<boost::unordered::concurrent_flat_map<
+        int, int, std::hash<int>, std::equal_to<int>,
+        stateful_allocator<std::pair<int const, int> > > >::value);
+
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    // move assignment has more complex requirements than copying
+    // equal allocators:
+    // lhs empty, rhs non-empty
+    // lhs non-empty, rhs empty
+    // lhs non-empty, rhs non-empty
+    //
+    // unequal allocators:
+    // lhs non-empty, rhs non-empty
+    //
+    // pocma
+    // self move-assign
+
+    // lhs empty, rhs empty
+    {
+      raii::reset_counts();
+
+      map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+
+      std::atomic<unsigned> num_transfers{0};
+
+      thread_runner(
+        values, [&x, &num_transfers](boost::span<map_value_type> s) {
+          (void)s;
+
+          map_type y(0, hasher(2), key_equal(1), allocator_type(3));
+
+          BOOST_TEST(x.empty());
+          BOOST_TEST(y.empty());
+          BOOST_TEST(x.get_allocator() == y.get_allocator());
+
+          y = std::move(x);
+          if (y.hash_function() == hasher(1)) {
+            ++num_transfers;
+            BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+          } else {
+            BOOST_TEST_EQ(y.hash_function(), hasher(2));
+            BOOST_TEST_EQ(y.key_eq(), key_equal(1));
+          }
+
+          BOOST_TEST_EQ(x.hash_function(), hasher(2));
+          BOOST_TEST_EQ(x.key_eq(), key_equal(1));
+          BOOST_TEST(x.get_allocator() == y.get_allocator());
+        });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+
+      BOOST_TEST_EQ(raii::destructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+    }
+
+    // lhs non-empty, rhs empty
+    {
+      raii::reset_counts();
+
+      map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+
+      std::atomic<unsigned> num_transfers{0};
+
+      thread_runner(
+        values, [&x, &values, &num_transfers](boost::span<map_value_type> s) {
+          (void)s;
+
+          map_type y(values.size(), hasher(2), key_equal(1), allocator_type(3));
+          for (auto const& v : values) {
+            y.insert(v);
+          }
+
+          BOOST_TEST(x.empty());
+          BOOST_TEST(!y.empty());
+          BOOST_TEST(x.get_allocator() == y.get_allocator());
+
+          y = std::move(x);
+          if (y.hash_function() == hasher(1)) {
+            ++num_transfers;
+            BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+          } else {
+            BOOST_TEST_EQ(y.hash_function(), hasher(2));
+            BOOST_TEST_EQ(y.key_eq(), key_equal(1));
+          }
+
+          BOOST_TEST_EQ(x.hash_function(), hasher(2));
+          BOOST_TEST_EQ(x.key_eq(), key_equal(1));
+          BOOST_TEST(x.get_allocator() == y.get_allocator());
+
+          BOOST_TEST(y.empty());
+        });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+
+      BOOST_TEST_EQ(raii::destructor, num_threads * 2 * reference_map.size());
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(
+        raii::copy_constructor, num_threads * 2 * reference_map.size());
+    }
+    check_raii_counts();
+
+    // lhs empty, rhs non-empty
+    {
+      raii::reset_counts();
+
+      map_type x(values.size(), hasher(1), key_equal(2), allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+      std::atomic<unsigned> num_transfers{0};
+
+      thread_runner(values,
+        [&x, &reference_map, &num_transfers](boost::span<map_value_type> s) {
+          (void)s;
+
+          map_type y(allocator_type(3));
+
+          BOOST_TEST(y.empty());
+          BOOST_TEST(x.get_allocator() == y.get_allocator());
+
+          y = std::move(x);
+          if (!y.empty()) {
+            ++num_transfers;
+            test_matches_reference(y, reference_map);
+
+            BOOST_TEST_EQ(y.hash_function(), hasher(1));
+            BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+          } else {
+            BOOST_TEST_EQ(y.hash_function(), hasher());
+            BOOST_TEST_EQ(y.key_eq(), key_equal());
+          }
+
+          BOOST_TEST(x.empty());
+
+          BOOST_TEST_EQ(x.hash_function(), hasher());
+          BOOST_TEST_EQ(x.key_eq(), key_equal());
+          BOOST_TEST(x.get_allocator() == y.get_allocator());
+        });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+
+      BOOST_TEST_EQ(raii::destructor, 2 * reference_map.size());
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+    }
+    check_raii_counts();
+
+    // lhs non-empty, rhs non-empty
+    {
+      raii::reset_counts();
+
+      map_type x(values.size(), hasher(1), key_equal(2), allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_size = x.size();
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      std::atomic<unsigned> num_transfers{0};
+
+      thread_runner(values, [&x, &values, &num_transfers, &reference_map](
+                              boost::span<map_value_type> s) {
+        (void)s;
+
+        map_type y(values.size(), hasher(2), key_equal(1), allocator_type(3));
+        for (auto const& v : values) {
+          y.insert(v);
+        }
+
+        BOOST_TEST(!y.empty());
+        BOOST_TEST(x.get_allocator() == y.get_allocator());
+
+        y = std::move(x);
+        if (y.hash_function() == hasher(1)) {
+          ++num_transfers;
+          test_matches_reference(y, reference_map);
+
+          BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+        } else {
+          BOOST_TEST_EQ(y.hash_function(), hasher(2));
+          BOOST_TEST_EQ(y.key_eq(), key_equal(1));
+        }
+
+        BOOST_TEST(x.empty());
+
+        BOOST_TEST_EQ(x.hash_function(), hasher(2));
+        BOOST_TEST_EQ(x.key_eq(), key_equal(1));
+        BOOST_TEST(x.get_allocator() == y.get_allocator());
+      });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+
+      BOOST_TEST_EQ(
+        raii::destructor, 2 * old_size + num_threads * 2 * old_size);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+      BOOST_TEST_EQ(raii::copy_constructor,
+        old_cc + (num_threads * 2 * reference_map.size()));
+    }
+    check_raii_counts();
+
+    // lhs non-empty, rhs non-empty, unequal allocators, no propagation
+    {
+      raii::reset_counts();
+
+      map_type x(values.size(), hasher(1), key_equal(2), allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_size = x.size();
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      std::atomic<unsigned> num_transfers{0};
+
+      thread_runner(values, [&x, &values, &num_transfers, &reference_map](
+                              boost::span<map_value_type> s) {
+        (void)s;
+
+        map_type y(values.size(), hasher(2), key_equal(1), allocator_type(13));
+        for (auto const& v : values) {
+          y.insert(v);
+        }
+
+        BOOST_TEST(
+          !boost::allocator_is_always_equal<allocator_type>::type::value);
+
+        BOOST_TEST(!boost::allocator_propagate_on_container_move_assignment<
+                   allocator_type>::type::value);
+
+        BOOST_TEST(!y.empty());
+        BOOST_TEST(x.get_allocator() != y.get_allocator());
+
+        y = std::move(x);
+        if (y.hash_function() == hasher(1)) {
+          ++num_transfers;
+          test_matches_reference(y, reference_map);
+
+          BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+        } else {
+          BOOST_TEST_EQ(y.hash_function(), hasher(2));
+          BOOST_TEST_EQ(y.key_eq(), key_equal(1));
+        }
+
+        BOOST_TEST(x.empty());
+
+        BOOST_TEST_EQ(x.hash_function(), hasher(2));
+        BOOST_TEST_EQ(x.key_eq(), key_equal(1));
+        BOOST_TEST(x.get_allocator() != y.get_allocator());
+      });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+
+      BOOST_TEST_EQ(
+        raii::destructor, 2 * 2 * old_size + num_threads * 2 * old_size);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc + 2 * old_size);
+      BOOST_TEST_EQ(raii::copy_constructor,
+        old_cc + (num_threads * 2 * reference_map.size()));
+    }
+    check_raii_counts();
+
+    // lhs non-empty, rhs non-empty, pocma
+    {
+      raii::reset_counts();
+
+      pocma_map_type x(
+        values.size(), hasher(1), key_equal(2), pocma_allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_size = x.size();
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      std::atomic<unsigned> num_transfers{0};
+
+      thread_runner(values, [&x, &values, &num_transfers, &reference_map](
+                              boost::span<map_value_type> s) {
+        (void)s;
+
+        pocma_map_type y(
+          values.size(), hasher(2), key_equal(1), pocma_allocator_type(13));
+        for (auto const& v : values) {
+          y.insert(v);
+        }
+
+        BOOST_TEST(!y.empty());
+        BOOST_TEST(x.get_allocator() != y.get_allocator());
+
+        y = std::move(x);
+        if (y.hash_function() == hasher(1)) {
+          ++num_transfers;
+          test_matches_reference(y, reference_map);
+
+          BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+        } else {
+          BOOST_TEST_EQ(y.hash_function(), hasher(2));
+          BOOST_TEST_EQ(y.key_eq(), key_equal(1));
+        }
+
+        BOOST_TEST(x.empty());
+
+        BOOST_TEST_EQ(x.hash_function(), hasher(2));
+        BOOST_TEST_EQ(x.key_eq(), key_equal(1));
+        BOOST_TEST(x.get_allocator() == y.get_allocator());
+      });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+
+      BOOST_TEST_EQ(
+        raii::destructor, 2 * old_size + num_threads * 2 * old_size);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+      BOOST_TEST_EQ(raii::copy_constructor,
+        old_cc + (num_threads * 2 * reference_map.size()));
+    }
+    check_raii_counts();
+
+    // self-assign
+    {
+      raii::reset_counts();
+
+      map_type x(values.size(), hasher(1), key_equal(2), allocator_type(3));
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      thread_runner(
+        values, [&x, &reference_map](boost::span<map_value_type> s) {
+          (void)s;
+
+          x = std::move(x);
+
+          BOOST_TEST(!x.empty());
+
+          BOOST_TEST_EQ(x.hash_function(), hasher(1));
+          BOOST_TEST_EQ(x.key_eq(), key_equal(2));
+          BOOST_TEST(x.get_allocator() == allocator_type(3));
+
+          test_matches_reference(x, reference_map);
+        });
+
+      BOOST_TEST_EQ(raii::destructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+    }
+    check_raii_counts();
+  }
+
+  UNORDERED_AUTO_TEST (initializer_list_assignment) {
+    std::initializer_list<map_value_type> values{
+      map_value_type{raii{0}, raii{0}},
+      map_value_type{raii{1}, raii{1}},
+      map_value_type{raii{2}, raii{2}},
+      map_value_type{raii{3}, raii{3}},
+      map_value_type{raii{4}, raii{4}},
+      map_value_type{raii{5}, raii{5}},
+      map_value_type{raii{6}, raii{6}},
+      map_value_type{raii{6}, raii{6}},
+      map_value_type{raii{7}, raii{7}},
+      map_value_type{raii{8}, raii{8}},
+      map_value_type{raii{9}, raii{9}},
+      map_value_type{raii{10}, raii{10}},
+      map_value_type{raii{9}, raii{9}},
+      map_value_type{raii{8}, raii{8}},
+      map_value_type{raii{7}, raii{7}},
+      map_value_type{raii{6}, raii{6}},
+      map_value_type{raii{5}, raii{5}},
+      map_value_type{raii{4}, raii{4}},
+      map_value_type{raii{3}, raii{3}},
+      map_value_type{raii{2}, raii{2}},
+      map_value_type{raii{1}, raii{1}},
+      map_value_type{raii{0}, raii{0}},
+    };
+
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+    auto v = std::vector<map_value_type>(values.begin(), values.end());
+
+    {
+      raii::reset_counts();
+      map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+
+      thread_runner(v, [&x, &values](boost::span<map_value_type> s) {
+        (void)s;
+        x = values;
+      });
+
+      test_matches_reference(x, reference_map);
+      BOOST_TEST_EQ(x.hash_function(), hasher(1));
+      BOOST_TEST_EQ(x.key_eq(), key_equal(2));
+      BOOST_TEST(x.get_allocator() == allocator_type(3));
+
+      BOOST_TEST_EQ(raii::copy_constructor, num_threads * 2 * x.size());
+      BOOST_TEST_EQ(raii::destructor, (num_threads - 1) * 2 * x.size());
+      BOOST_TEST_EQ(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+    check_raii_counts();
+  }
+
+  template <class G> void insert_and_assign(G gen, test::random_generator rg)
+  {
+
+    std::thread t1, t2, t3;
+
+    boost::latch start_latch(2), end_latch(2);
+
+    auto v1 = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto v2 = v1;
+    shuffle_values(v2);
+
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(v1.begin(), v1.end());
+
+    raii::reset_counts();
+    {
+      map_type map1(v1.size(), hasher(1), key_equal(2), allocator_type(3));
+      map_type map2(v2.size(), hasher(1), key_equal(2), allocator_type(3));
+
+      t1 = std::thread([&v1, &map1, &start_latch, &end_latch] {
+        start_latch.arrive_and_wait();
+        for (auto const& v : v1) {
+          map1.insert(v);
+        }
+        end_latch.arrive_and_wait();
+      });
+
+      t2 = std::thread([&v2, &map2, &end_latch, &start_latch] {
+        start_latch.arrive_and_wait();
+        for (auto const& v : v2) {
+          map2.insert(v);
+        }
+        end_latch.arrive_and_wait();
+      });
+
+      std::atomic<unsigned> num_assignments{0};
+      t3 = std::thread([&map1, &map2, &end_latch, &num_assignments] {
+        while (map1.empty() && map2.empty()) {
+          std::this_thread::sleep_for(std::chrono::microseconds(10));
+        }
+
+        do {
+          map1 = map2;
+          std::this_thread::sleep_for(std::chrono::milliseconds(100));
+          map2 = map1;
+          std::this_thread::sleep_for(std::chrono::milliseconds(100));
+          ++num_assignments;
+        } while (!end_latch.try_wait());
+      });
+
+      t1.join();
+      t2.join();
+      t3.join();
+
+      BOOST_TEST_GT(num_assignments, 0u);
+
+      test_fuzzy_matches_reference(map1, reference_map, rg);
+      test_fuzzy_matches_reference(map2, reference_map, rg);
+    }
+    check_raii_counts();
+  }
+} // namespace
+
+// clang-format off
+UNORDERED_TEST(
+  copy_assign,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  move_assign,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  insert_and_assign,
+  ((init_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/clear_tests.cpp b/test/cfoa/clear_tests.cpp
new file mode 100644
index 00000000..79240db3
--- /dev/null
+++ b/test/cfoa/clear_tests.cpp
@@ -0,0 +1,126 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+test::seed_t initialize_seed{674140082};
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+using map_value_type = typename map_type::value_type;
+
+namespace {
+  template <class G> void clear_tests(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+
+    raii::reset_counts();
+
+    map_type x(values.begin(), values.end(), values.size(), hasher(1),
+      key_equal(2), allocator_type(3));
+
+    auto const old_size = x.size();
+    auto const old_d = +raii::destructor;
+
+    thread_runner(values, [&x](boost::span<map_value_type> s) {
+      (void)s;
+      x.clear();
+    });
+
+    BOOST_TEST(x.empty());
+    BOOST_TEST_EQ(raii::destructor, old_d + 2 * old_size);
+
+    check_raii_counts();
+  }
+
+  template <class G> void insert_and_clear(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    std::thread t1, t2;
+
+    {
+      map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+
+      std::mutex m;
+      std::condition_variable cv;
+      std::atomic<bool> done{false};
+      std::atomic<unsigned> num_clears{0};
+
+      bool ready = false;
+
+      t1 = std::thread([&x, &values, &cv, &done, &m, &ready] {
+        for (auto i = 0u; i < values.size(); ++i) {
+          x.insert(values[i]);
+          if (i % (values.size() / 128) == 0) {
+            {
+              std::unique_lock<std::mutex> lk(m);
+              ready = true;
+            }
+            cv.notify_all();
+          }
+        }
+
+        done = true;
+        {
+          std::unique_lock<std::mutex> lk(m);
+          ready = true;
+        }
+        cv.notify_all();
+      });
+
+      t2 = std::thread([&x, &m, &cv, &done, &ready, &num_clears] {
+        do {
+          {
+            std::unique_lock<std::mutex> lk(m);
+            cv.wait(lk, [&ready] { return ready; });
+            ready = false;
+          }
+          x.clear();
+          ++num_clears;
+        } while (!done);
+      });
+
+      t1.join();
+      t2.join();
+
+      BOOST_TEST_GE(num_clears, 1u);
+
+      if (!x.empty()) {
+        test_fuzzy_matches_reference(x, reference_map, rg);
+      }
+    }
+
+    check_raii_counts();
+  }
+
+} // namespace
+
+// clang-format off
+UNORDERED_TEST(
+  clear_tests,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(insert_and_clear,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/constructor_tests.cpp b/test/cfoa/constructor_tests.cpp
new file mode 100644
index 00000000..f6e0e069
--- /dev/null
+++ b/test/cfoa/constructor_tests.cpp
@@ -0,0 +1,823 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+test::seed_t initialize_seed(4122023);
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+template <class T> struct soccc_allocator
+{
+  int x_ = -1;
+
+  using value_type = T;
+
+  soccc_allocator() = default;
+  soccc_allocator(soccc_allocator const&) = default;
+  soccc_allocator(soccc_allocator&&) = default;
+
+  soccc_allocator(int const x) : x_{x} {}
+
+  template <class U> soccc_allocator(soccc_allocator<U> const& rhs) : x_{rhs.x_}
+  {
+  }
+
+  T* allocate(std::size_t n)
+  {
+    return static_cast<T*>(::operator new(n * sizeof(T)));
+  }
+
+  void deallocate(T* p, std::size_t) { ::operator delete(p); }
+
+  soccc_allocator select_on_container_copy_construction() const
+  {
+    return {x_ + 1};
+  }
+
+  bool operator==(soccc_allocator const& rhs) const { return x_ == rhs.x_; }
+  bool operator!=(soccc_allocator const& rhs) const { return x_ != rhs.x_; }
+};
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+using map_value_type = typename map_type::value_type;
+
+UNORDERED_AUTO_TEST (default_constructor) {
+  boost::unordered::concurrent_flat_map<raii, raii> x;
+  BOOST_TEST(x.empty());
+  BOOST_TEST_EQ(x.size(), 0u);
+}
+
+UNORDERED_AUTO_TEST (bucket_count_with_hasher_key_equal_and_allocator) {
+  raii::reset_counts();
+  {
+    map_type x(0);
+
+    BOOST_TEST(x.empty());
+    BOOST_TEST_EQ(x.size(), 0u);
+    BOOST_TEST_EQ(x.hash_function(), hasher());
+    BOOST_TEST_EQ(x.key_eq(), key_equal());
+  }
+
+  {
+    map_type x(0, hasher(1));
+
+    BOOST_TEST(x.empty());
+    BOOST_TEST_EQ(x.size(), 0u);
+    BOOST_TEST_EQ(x.hash_function(), hasher(1));
+    BOOST_TEST_EQ(x.key_eq(), key_equal());
+  }
+
+  {
+    map_type x(0, hasher(1), key_equal(2));
+
+    BOOST_TEST(x.empty());
+    BOOST_TEST_EQ(x.size(), 0u);
+    BOOST_TEST_EQ(x.hash_function(), hasher(1));
+    BOOST_TEST_EQ(x.key_eq(), key_equal(2));
+  }
+
+  {
+    map_type x(0, hasher(1), key_equal(2), allocator_type{});
+
+    BOOST_TEST(x.empty());
+    BOOST_TEST_EQ(x.size(), 0u);
+    BOOST_TEST_EQ(x.hash_function(), hasher(1));
+    BOOST_TEST_EQ(x.key_eq(), key_equal(2));
+    BOOST_TEST(x.get_allocator() == allocator_type{});
+  }
+}
+
+UNORDERED_AUTO_TEST (soccc) {
+  raii::reset_counts();
+
+  boost::unordered::concurrent_flat_map<raii, raii, hasher, key_equal,
+    soccc_allocator<std::pair<raii const, raii> > >
+    x;
+
+  boost::unordered::concurrent_flat_map<raii, raii, hasher, key_equal,
+    soccc_allocator<std::pair<raii const, raii> > >
+    y(x);
+
+  BOOST_TEST_EQ(y.hash_function(), x.hash_function());
+  BOOST_TEST_EQ(y.key_eq(), x.key_eq());
+  BOOST_TEST(y.get_allocator() != x.get_allocator());
+}
+
+namespace {
+  template <class G> void from_iterator_range(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+    raii::reset_counts();
+
+    {
+      map_type x(values.begin(), values.end());
+
+      test_matches_reference(x, reference_map);
+      BOOST_TEST_GT(x.size(), 0u);
+      BOOST_TEST_LE(x.size(), values.size());
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type{});
+      if (rg == sequential) {
+        BOOST_TEST_EQ(x.size(), values.size());
+      }
+    }
+
+    {
+      map_type x(values.begin(), values.end(), 0);
+
+      test_matches_reference(x, reference_map);
+      BOOST_TEST_GT(x.size(), 0u);
+      BOOST_TEST_LE(x.size(), values.size());
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type{});
+      if (rg == sequential) {
+        BOOST_TEST_EQ(x.size(), values.size());
+      }
+    }
+
+    {
+      map_type x(values.begin(), values.end(), 0, hasher(1));
+
+      test_matches_reference(x, reference_map);
+      BOOST_TEST_GT(x.size(), 0u);
+      BOOST_TEST_LE(x.size(), values.size());
+      BOOST_TEST_EQ(x.hash_function(), hasher(1));
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type{});
+      if (rg == sequential) {
+        BOOST_TEST_EQ(x.size(), values.size());
+      }
+    }
+
+    {
+      map_type x(values.begin(), values.end(), 0, hasher(1), key_equal(2));
+
+      test_matches_reference(x, reference_map);
+      BOOST_TEST_GT(x.size(), 0u);
+      BOOST_TEST_LE(x.size(), values.size());
+      BOOST_TEST_EQ(x.hash_function(), hasher(1));
+      BOOST_TEST_EQ(x.key_eq(), key_equal(2));
+      BOOST_TEST(x.get_allocator() == allocator_type{});
+      if (rg == sequential) {
+        BOOST_TEST_EQ(x.size(), values.size());
+      }
+    }
+
+    {
+      map_type x(values.begin(), values.end(), 0, hasher(1), key_equal(2),
+        allocator_type{});
+
+      test_matches_reference(x, reference_map);
+      BOOST_TEST_GT(x.size(), 0u);
+      BOOST_TEST_LE(x.size(), values.size());
+      BOOST_TEST_EQ(x.hash_function(), hasher(1));
+      BOOST_TEST_EQ(x.key_eq(), key_equal(2));
+      BOOST_TEST(x.get_allocator() == allocator_type{});
+      if (rg == sequential) {
+        BOOST_TEST_EQ(x.size(), values.size());
+      }
+    }
+
+    check_raii_counts();
+  }
+
+  template <class G> void copy_constructor(G gen, test::random_generator rg)
+  {
+    {
+      map_type x(0, hasher(1), key_equal(2), allocator_type{});
+      map_type y(x);
+
+      BOOST_TEST_EQ(y.size(), x.size());
+      BOOST_TEST_EQ(y.hash_function(), x.hash_function());
+      BOOST_TEST_EQ(y.key_eq(), x.key_eq());
+      BOOST_TEST(y.get_allocator() == x.get_allocator());
+    }
+
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    {
+      map_type x(values.begin(), values.end(), 0, hasher(1), key_equal(2),
+        allocator_type{});
+
+      thread_runner(
+        values, [&x, &reference_map](
+                  boost::span<span_value_type<decltype(values)> > s) {
+          (void)s;
+          map_type y(x);
+
+          test_matches_reference(x, reference_map);
+          test_matches_reference(y, reference_map);
+          BOOST_TEST_EQ(y.size(), x.size());
+          BOOST_TEST_EQ(y.hash_function(), x.hash_function());
+          BOOST_TEST_EQ(y.key_eq(), x.key_eq());
+          BOOST_TEST(y.get_allocator() == x.get_allocator());
+        });
+    }
+
+    check_raii_counts();
+
+    raii::reset_counts();
+
+    {
+      allocator_type a;
+
+      map_type x(values.begin(), values.end(), 0, hasher(1), key_equal(2), a);
+
+      thread_runner(
+        values, [&x, &reference_map, a](
+                  boost::span<span_value_type<decltype(values)> > s) {
+          (void)s;
+          map_type y(x, a);
+
+          test_matches_reference(x, reference_map);
+          test_matches_reference(y, reference_map);
+          BOOST_TEST_EQ(y.size(), x.size());
+          BOOST_TEST_EQ(y.hash_function(), x.hash_function());
+          BOOST_TEST_EQ(y.key_eq(), x.key_eq());
+          BOOST_TEST(y.get_allocator() == x.get_allocator());
+        });
+    }
+
+    check_raii_counts();
+  }
+
+  template <class G>
+  void copy_constructor_with_insertion(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+    raii::reset_counts();
+
+    std::mutex m;
+    std::condition_variable cv;
+    bool ready = false;
+
+    {
+      map_type x(0, hasher(1), key_equal(2), allocator_type{});
+
+      auto f = [&x, &values, &m, &cv, &ready] {
+        {
+          std::lock_guard<std::mutex> guard(m);
+          ready = true;
+        }
+        cv.notify_all();
+
+        for (auto const& val : values) {
+          x.insert(val);
+        }
+      };
+
+      std::thread t1(f);
+      std::thread t2(f);
+
+      thread_runner(
+        values, [&x, &reference_map, &values, rg, &m, &cv, &ready](
+                  boost::span<span_value_type<decltype(values)> > s) {
+          (void)s;
+
+          {
+            std::unique_lock<std::mutex> lk(m);
+            cv.wait(lk, [&] { return ready; });
+          }
+
+          map_type y(x);
+
+          BOOST_TEST_LE(y.size(), values.size());
+          BOOST_TEST_EQ(y.hash_function(), x.hash_function());
+          BOOST_TEST_EQ(y.key_eq(), x.key_eq());
+          BOOST_TEST(y.get_allocator() == x.get_allocator());
+
+          x.visit_all([&reference_map, rg](
+                        typename map_type::value_type const& val) {
+            BOOST_TEST(reference_map.contains(val.first));
+            if (rg == sequential) {
+              BOOST_TEST_EQ(val.second, reference_map.find(val.first)->second);
+            }
+          });
+        });
+
+      t1.join();
+      t2.join();
+    }
+
+    check_raii_counts();
+  }
+
+  template <class G> void move_constructor(G gen, test::random_generator rg)
+  {
+    {
+      map_type x(0, hasher(1), key_equal(2), allocator_type{});
+      auto const old_size = x.size();
+
+      map_type y(std::move(x));
+
+      BOOST_TEST_EQ(y.size(), old_size);
+      BOOST_TEST_EQ(y.hash_function(), hasher(1));
+      BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+
+      BOOST_TEST_EQ(x.size(), 0u);
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+
+      BOOST_TEST(y.get_allocator() == x.get_allocator());
+    }
+
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    {
+      map_type x(values.begin(), values.end(), 0, hasher(1), key_equal(2),
+        allocator_type{});
+
+      std::atomic_uint num_transfers{0};
+
+      auto const old_mc = +raii::move_constructor;
+
+      thread_runner(
+        values, [&x, &reference_map, &num_transfers](
+                  boost::span<span_value_type<decltype(values)> > s) {
+          (void)s;
+
+          auto const old_size = x.size();
+          map_type y(std::move(x));
+
+          if (!y.empty()) {
+            ++num_transfers;
+
+            test_matches_reference(y, reference_map);
+            BOOST_TEST_EQ(y.size(), old_size);
+            BOOST_TEST_EQ(y.hash_function(), hasher(1));
+            BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+          } else {
+            BOOST_TEST_EQ(y.size(), 0u);
+            BOOST_TEST_EQ(y.hash_function(), hasher());
+            BOOST_TEST_EQ(y.key_eq(), key_equal());
+          }
+
+          BOOST_TEST_EQ(x.size(), 0u);
+          BOOST_TEST_EQ(x.hash_function(), hasher());
+          BOOST_TEST_EQ(x.key_eq(), key_equal());
+
+          BOOST_TEST(y.get_allocator() == x.get_allocator());
+        });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+    }
+
+    check_raii_counts();
+
+    // allocator-aware move constructor, unequal allocators
+    raii::reset_counts();
+
+    {
+      map_type x(values.begin(), values.end(), 0, hasher(1), key_equal(2),
+        allocator_type{1});
+
+      std::atomic_uint num_transfers{0};
+
+      auto const old_mc = +raii::move_constructor;
+      auto const old_size = x.size();
+
+      thread_runner(
+        values, [&x, &reference_map, &num_transfers, old_size](
+                  boost::span<span_value_type<decltype(values)> > s) {
+          (void)s;
+
+          auto a = allocator_type{2};
+          BOOST_TEST(a != x.get_allocator());
+
+          map_type y(std::move(x), a);
+
+          if (!y.empty()) {
+            ++num_transfers;
+
+            test_matches_reference(y, reference_map);
+            BOOST_TEST_EQ(y.size(), old_size);
+            BOOST_TEST_EQ(y.hash_function(), hasher(1));
+            BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+          } else {
+            BOOST_TEST_EQ(y.size(), 0u);
+            BOOST_TEST_EQ(y.hash_function(), hasher());
+            BOOST_TEST_EQ(y.key_eq(), key_equal());
+          }
+
+          BOOST_TEST_EQ(x.size(), 0u);
+          BOOST_TEST_EQ(x.hash_function(), hasher());
+          BOOST_TEST_EQ(x.key_eq(), key_equal());
+
+          BOOST_TEST(y.get_allocator() != x.get_allocator());
+          BOOST_TEST(y.get_allocator() == a);
+        });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc + (2 * old_size));
+    }
+
+    check_raii_counts();
+
+    // allocator-aware move constructor, equal allocators
+    raii::reset_counts();
+
+    {
+      map_type x(values.begin(), values.end(), 0, hasher(1), key_equal(2),
+        allocator_type{1});
+
+      std::atomic_uint num_transfers{0};
+
+      auto const old_mc = +raii::move_constructor;
+      auto const old_size = x.size();
+
+      thread_runner(
+        values, [&x, &reference_map, &num_transfers, old_size](
+                  boost::span<span_value_type<decltype(values)> > s) {
+          (void)s;
+
+          auto a = allocator_type{1};
+          BOOST_TEST(a == x.get_allocator());
+
+          map_type y(std::move(x), a);
+
+          if (!y.empty()) {
+            ++num_transfers;
+
+            test_matches_reference(y, reference_map);
+            BOOST_TEST_EQ(y.size(), old_size);
+            BOOST_TEST_EQ(y.hash_function(), hasher(1));
+            BOOST_TEST_EQ(y.key_eq(), key_equal(2));
+          } else {
+            BOOST_TEST_EQ(y.size(), 0u);
+            BOOST_TEST_EQ(y.hash_function(), hasher());
+            BOOST_TEST_EQ(y.key_eq(), key_equal());
+          }
+
+          BOOST_TEST_EQ(x.size(), 0u);
+          BOOST_TEST_EQ(x.hash_function(), hasher());
+          BOOST_TEST_EQ(x.key_eq(), key_equal());
+
+          BOOST_TEST(y.get_allocator() == x.get_allocator());
+          BOOST_TEST(y.get_allocator() == a);
+        });
+
+      BOOST_TEST_EQ(num_transfers, 1u);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+    }
+
+    check_raii_counts();
+  }
+
+  template <class G>
+  void move_constructor_with_insertion(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    std::mutex m;
+    std::condition_variable cv;
+    bool ready = false;
+
+    {
+      map_type x(0, hasher(1), key_equal(2), allocator_type{});
+
+      std::atomic_uint num_transfers{0};
+
+      std::thread t1([&x, &values] {
+        for (auto const& val : values) {
+          x.insert(val);
+        }
+      });
+
+      std::thread t2([&x, &m, &cv, &ready] {
+        while (x.empty()) {
+          std::this_thread::yield();
+        }
+
+        {
+          std::lock_guard<std::mutex> guard(m);
+          ready = true;
+        }
+        cv.notify_all();
+      });
+
+      thread_runner(
+        values, [&x, &reference_map, &num_transfers, rg, &m, &ready, &cv](
+                  boost::span<span_value_type<decltype(values)> > s) {
+          (void)s;
+
+          {
+            std::unique_lock<std::mutex> lk(m);
+            cv.wait(lk, [&] { return ready; });
+          }
+
+          map_type y(std::move(x));
+
+          if (!y.empty()) {
+            ++num_transfers;
+            y.cvisit_all([&reference_map, rg](map_value_type const& val) {
+              BOOST_TEST(reference_map.contains(val.first));
+              if (rg == sequential) {
+                BOOST_TEST_EQ(
+                  val.second, reference_map.find(val.first)->second);
+              }
+            });
+          }
+        });
+
+      t1.join();
+      t2.join();
+
+      BOOST_TEST_GE(num_transfers, 1u);
+    }
+
+    check_raii_counts();
+  }
+
+  template <class G>
+  void iterator_range_with_allocator(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    {
+      allocator_type a;
+      map_type x(values.begin(), values.end(), a);
+
+      BOOST_TEST_GT(x.size(), 0u);
+      BOOST_TEST_LE(x.size(), values.size());
+      if (rg == sequential) {
+        BOOST_TEST_EQ(x.size(), values.size());
+      }
+
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+
+      BOOST_TEST(x.get_allocator() == a);
+
+      test_fuzzy_matches_reference(x, reference_map, rg);
+    }
+
+    check_raii_counts();
+  }
+
+  UNORDERED_AUTO_TEST (explicit_allocator) {
+    raii::reset_counts();
+
+    {
+      allocator_type a;
+      map_type x(a);
+
+      BOOST_TEST_EQ(x.size(), 0u);
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+
+      BOOST_TEST(x.get_allocator() == a);
+    }
+  }
+
+  UNORDERED_AUTO_TEST (initializer_list_with_all_params) {
+    // hard-code 11 unique values
+    std::initializer_list<map_value_type> ilist{
+      map_value_type{raii{0}, raii{0}},
+      map_value_type{raii{1}, raii{1}},
+      map_value_type{raii{2}, raii{2}},
+      map_value_type{raii{3}, raii{3}},
+      map_value_type{raii{4}, raii{4}},
+      map_value_type{raii{5}, raii{5}},
+      map_value_type{raii{6}, raii{6}},
+      map_value_type{raii{6}, raii{6}},
+      map_value_type{raii{7}, raii{7}},
+      map_value_type{raii{8}, raii{8}},
+      map_value_type{raii{9}, raii{9}},
+      map_value_type{raii{10}, raii{10}},
+      map_value_type{raii{9}, raii{9}},
+      map_value_type{raii{8}, raii{8}},
+      map_value_type{raii{7}, raii{7}},
+      map_value_type{raii{6}, raii{6}},
+      map_value_type{raii{5}, raii{5}},
+      map_value_type{raii{4}, raii{4}},
+      map_value_type{raii{3}, raii{3}},
+      map_value_type{raii{2}, raii{2}},
+      map_value_type{raii{1}, raii{1}},
+      map_value_type{raii{0}, raii{0}},
+    };
+
+    {
+      raii::reset_counts();
+
+      map_type x(ilist, 0, hasher(1), key_equal(2), allocator_type(3));
+
+      BOOST_TEST_EQ(x.size(), 11u);
+      BOOST_TEST_EQ(x.hash_function(), hasher(1));
+      BOOST_TEST_EQ(x.key_eq(), key_equal(2));
+      BOOST_TEST(x.get_allocator() == allocator_type(3));
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * ilist.size());
+      BOOST_TEST_EQ(raii::move_constructor, 2 * 11u);
+    }
+    check_raii_counts();
+
+    {
+      raii::reset_counts();
+
+      map_type x(ilist, allocator_type(3));
+
+      BOOST_TEST_EQ(x.size(), 11u);
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type(3));
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * ilist.size());
+      BOOST_TEST_EQ(raii::move_constructor, 2 * 11u);
+    }
+    check_raii_counts();
+
+    {
+      raii::reset_counts();
+
+      map_type x(ilist, 0, allocator_type(3));
+
+      BOOST_TEST_EQ(x.size(), 11u);
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type(3));
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * ilist.size());
+      BOOST_TEST_EQ(raii::move_constructor, 2 * 11u);
+    }
+    check_raii_counts();
+
+    {
+      raii::reset_counts();
+
+      map_type x(ilist, 0, hasher(1), allocator_type(3));
+
+      BOOST_TEST_EQ(x.size(), 11u);
+      BOOST_TEST_EQ(x.hash_function(), hasher(1));
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type(3));
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * ilist.size());
+      BOOST_TEST_EQ(raii::move_constructor, 2 * 11u);
+    }
+    check_raii_counts();
+  }
+
+  UNORDERED_AUTO_TEST (bucket_count_and_allocator) {
+    raii::reset_counts();
+
+    {
+      map_type x(0, allocator_type(3));
+      BOOST_TEST_EQ(x.size(), 0u);
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type(3));
+    }
+
+    {
+      map_type x(4096, allocator_type(3));
+      BOOST_TEST_EQ(x.size(), 0u);
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type(3));
+    }
+  }
+
+  UNORDERED_AUTO_TEST (bucket_count_with_hasher_and_allocator) {
+    raii::reset_counts();
+
+    {
+      map_type x(0, hasher(1), allocator_type(3));
+      BOOST_TEST_EQ(x.size(), 0u);
+      BOOST_TEST_EQ(x.hash_function(), hasher(1));
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == allocator_type(3));
+    }
+  }
+
+  template <class G>
+  void iterator_range_with_bucket_count_and_allocator(
+    G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    {
+      allocator_type a(3);
+      map_type x(values.begin(), values.end(), 0, a);
+      test_fuzzy_matches_reference(x, reference_map, rg);
+
+      BOOST_TEST_EQ(x.hash_function(), hasher());
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == a);
+    }
+
+    check_raii_counts();
+  }
+
+  template <class G>
+  void iterator_range_with_bucket_count_hasher_and_allocator(
+    G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    {
+      allocator_type a(3);
+      hasher hf(1);
+      map_type x(values.begin(), values.end(), 0, hf, a);
+      test_fuzzy_matches_reference(x, reference_map, rg);
+
+      BOOST_TEST_EQ(x.hash_function(), hf);
+      BOOST_TEST_EQ(x.key_eq(), key_equal());
+      BOOST_TEST(x.get_allocator() == a);
+    }
+
+    check_raii_counts();
+  }
+
+} // namespace
+
+// clang-format off
+UNORDERED_TEST(
+  from_iterator_range,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  copy_constructor,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  copy_constructor_with_insertion,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  move_constructor,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  move_constructor_with_insertion,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  iterator_range_with_allocator,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  iterator_range_with_bucket_count_and_allocator,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  iterator_range_with_bucket_count_hasher_and_allocator,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/emplace_tests.cpp b/test/cfoa/emplace_tests.cpp
new file mode 100644
index 00000000..ba8ac70d
--- /dev/null
+++ b/test/cfoa/emplace_tests.cpp
@@ -0,0 +1,167 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+
+namespace {
+  test::seed_t initialize_seed(335740237);
+
+  struct lvalue_emplacer_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto const& r : s) {
+          bool b = x.emplace(r.first.x_, r.second.x_);
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(raii::default_constructor, 2 * values.size());
+
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_GE(raii::move_constructor, 2 * x.size());
+
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_emplacer;
+
+  struct norehash_lvalue_emplacer_type : public lvalue_emplacer_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      x.reserve(values.size());
+      lvalue_emplacer_type::operator()(values, x);
+      BOOST_TEST_EQ(raii::move_constructor, 2 * x.size());
+    }
+  } norehash_lvalue_emplacer;
+
+  struct lvalue_emplace_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.emplace_or_cvisit(
+            r.first.x_, r.second.x_,
+            [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 2 * values.size());
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_GE(raii::move_constructor, 2 * x.size());
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+    }
+  } lvalue_emplace_or_cvisit;
+
+  struct lvalue_emplace_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.emplace_or_visit(
+            r.first.x_, r.second.x_,
+            [&num_invokes](typename X::value_type& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 2 * values.size());
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_GE(raii::move_constructor, 2 * x.size());
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+    }
+  } lvalue_emplace_or_visit;
+
+  template <class X, class G, class F>
+  void emplace(X*, G gen, F emplacer, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+    raii::reset_counts();
+
+    {
+      X x;
+
+      emplacer(values, x);
+
+      BOOST_TEST_EQ(x.size(), reference_map.size());
+
+      using value_type = typename X::value_type;
+      BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+        BOOST_TEST(reference_map.contains(kv.first));
+        if (rg == test::sequential) {
+          BOOST_TEST_EQ(kv.second, reference_map[kv.first]);
+        }
+      }));
+    }
+
+    BOOST_TEST_GE(raii::default_constructor, 0u);
+    BOOST_TEST_GE(raii::copy_constructor, 0u);
+    BOOST_TEST_GE(raii::move_constructor, 0u);
+    BOOST_TEST_GT(raii::destructor, 0u);
+
+    BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
+                    raii::move_constructor,
+      raii::destructor);
+  }
+
+  boost::unordered::concurrent_flat_map<raii, raii>* map;
+
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+
+UNORDERED_TEST(
+  emplace,
+  ((map))
+  ((value_type_generator)(init_type_generator))
+  ((lvalue_emplacer)(norehash_lvalue_emplacer)
+   (lvalue_emplace_or_cvisit)(lvalue_emplace_or_visit))
+  ((default_generator)(sequential)(limited_range)))
+
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/equality_tests.cpp b/test/cfoa/equality_tests.cpp
new file mode 100644
index 00000000..8ab2fbb6
--- /dev/null
+++ b/test/cfoa/equality_tests.cpp
@@ -0,0 +1,142 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+test::seed_t initialize_seed{1634048962};
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+using map_value_type = typename map_type::value_type;
+
+namespace {
+
+  UNORDERED_AUTO_TEST (simple_equality) {
+    {
+      map_type x1(
+        {{1, 11}, {2, 22}}, 0, hasher(1), key_equal(2), allocator_type(3));
+
+      map_type x2(
+        {{1, 11}, {2, 22}}, 0, hasher(2), key_equal(2), allocator_type(3));
+
+      map_type x3(
+        {{1, 11}, {2, 23}}, 0, hasher(2), key_equal(2), allocator_type(3));
+
+      map_type x4({{1, 11}}, 0, hasher(2), key_equal(2), allocator_type(3));
+
+      BOOST_TEST_EQ(x1.size(), x2.size());
+      BOOST_TEST(x1 == x2);
+      BOOST_TEST(!(x1 != x2));
+
+      BOOST_TEST_EQ(x1.size(), x3.size());
+      BOOST_TEST(!(x1 == x3));
+      BOOST_TEST(x1 != x3);
+
+      BOOST_TEST(x1.size() != x4.size());
+      BOOST_TEST(!(x1 == x4));
+      BOOST_TEST(x1 != x4);
+    }
+  }
+
+  template <class G> void insert_and_compare(G gen, test::random_generator rg)
+  {
+    auto vals1 = make_random_values(1024 * 8, [&] { return gen(rg); });
+    boost::unordered_flat_map<raii, raii> reference_map(
+      vals1.begin(), vals1.end());
+
+    {
+      raii::reset_counts();
+
+      map_type x1(vals1.size(), hasher(1), key_equal(2), allocator_type(3));
+      map_type x2(vals1.begin(), vals1.end(), vals1.size(), hasher(2),
+        key_equal(2), allocator_type(3));
+
+      std::thread t1, t2;
+
+      std::mutex m;
+      std::condition_variable cv;
+      std::atomic_bool done{false};
+      std::atomic<unsigned> num_compares{0};
+      bool ready = false;
+
+      BOOST_TEST(x1.empty());
+
+      t1 = std::thread([&x1, &m, &cv, &vals1, &done, &ready] {
+        for (std::size_t idx = 0; idx < vals1.size(); ++idx) {
+          auto const& v = vals1[idx];
+          x1.insert(v);
+
+          if (idx % (vals1.size() / 128) == 0) {
+            {
+              std::unique_lock<std::mutex> lk(m);
+              ready = true;
+            }
+            cv.notify_all();
+          }
+          std::this_thread::yield();
+        }
+
+        done = true;
+        {
+          std::unique_lock<std::mutex> lk(m);
+          ready = true;
+        }
+        cv.notify_all();
+      });
+
+      t2 = std::thread([&x1, &x2, &m, &cv, &done, &num_compares, &ready] {
+        do {
+          {
+            std::unique_lock<std::mutex> lk(m);
+            cv.wait(lk, [&ready] { return ready; });
+            ready = false;
+          }
+
+          volatile bool b = false;
+
+          b = x1 == x2;
+          b = x1 != x2;
+
+          b;
+
+          ++num_compares;
+          std::this_thread::yield();
+        } while (!done);
+
+        BOOST_TEST(done);
+      });
+
+      t1.join();
+      t2.join();
+
+      BOOST_TEST_GE(num_compares, 1u);
+
+      BOOST_TEST(x1 == x2);
+      BOOST_TEST(!(x1 != x2));
+
+      test_matches_reference(x1, reference_map);
+    }
+    check_raii_counts();
+  }
+} // namespace
+
+// clang-format off
+UNORDERED_TEST(
+  insert_and_compare,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/erase_tests.cpp b/test/cfoa/erase_tests.cpp
new file mode 100644
index 00000000..0bf4041b
--- /dev/null
+++ b/test/cfoa/erase_tests.cpp
@@ -0,0 +1,408 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+
+namespace {
+  test::seed_t initialize_seed(3292023);
+
+  struct lvalue_eraser_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_erased{0};
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
+                      raii::move_constructor,
+        raii::destructor + 2 * x.size());
+
+      thread_runner(values, [&values, &num_erased, &x](boost::span<T>) {
+        for (auto const& k : values) {
+          auto count = x.erase(k.first);
+          num_erased += count;
+          BOOST_TEST_LE(count, 1u);
+          BOOST_TEST_GE(count, 0u);
+        }
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * old_size);
+
+      BOOST_TEST_EQ(x.size(), 0u);
+      BOOST_TEST(x.empty());
+      BOOST_TEST_EQ(num_erased, old_size);
+    }
+  } lvalue_eraser;
+
+  struct transp_lvalue_eraser_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_erased{0};
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
+                      raii::move_constructor,
+        raii::destructor + 2 * x.size());
+
+      thread_runner(values, [&num_erased, &x](boost::span<T> s) {
+        for (auto const& k : s) {
+          auto count = x.erase(k.first.x_);
+          num_erased += count;
+          BOOST_TEST_LE(count, 1u);
+          BOOST_TEST_GE(count, 0u);
+        }
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * num_erased);
+
+      BOOST_TEST_EQ(x.size(), 0u);
+      BOOST_TEST(x.empty());
+      BOOST_TEST_EQ(num_erased, old_size);
+    }
+  } transp_lvalue_eraser;
+
+  struct lvalue_eraser_if_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> num_erased{0};
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      auto max = 0;
+      x.visit_all([&max](value_type const& v) {
+        if (v.second.x_ > max) {
+          max = v.second.x_;
+        }
+      });
+
+      auto threshold = max / 2;
+
+      auto expected_erasures = 0u;
+      x.visit_all([&expected_erasures, threshold](value_type const& v) {
+        if (v.second.x_ > threshold) {
+          ++expected_erasures;
+        }
+      });
+
+      thread_runner(values, [&num_erased, &x, threshold](boost::span<T> s) {
+        for (auto const& k : s) {
+          auto count = x.erase_if(k.first,
+            [threshold](value_type& v) { return v.second.x_ > threshold; });
+          num_erased += count;
+          BOOST_TEST_LE(count, 1u);
+          BOOST_TEST_GE(count, 0u);
+        }
+      });
+
+      BOOST_TEST_EQ(num_erased, expected_erasures);
+      BOOST_TEST_EQ(x.size(), old_size - num_erased);
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * num_erased);
+    }
+  } lvalue_eraser_if;
+
+  struct transp_lvalue_eraser_if_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> num_erased{0};
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      auto max = 0;
+      x.visit_all([&max](value_type const& v) {
+        if (v.second.x_ > max) {
+          max = v.second.x_;
+        }
+      });
+
+      auto threshold = max / 2;
+
+      auto expected_erasures = 0u;
+      x.visit_all([&expected_erasures, threshold](value_type const& v) {
+        if (v.second.x_ > threshold) {
+          ++expected_erasures;
+        }
+      });
+
+      thread_runner(values, [&num_erased, &x, threshold](boost::span<T> s) {
+        for (auto const& k : s) {
+          auto count = x.erase_if(k.first.x_,
+            [threshold](value_type& v) { return v.second.x_ > threshold; });
+          num_erased += count;
+          BOOST_TEST_LE(count, 1u);
+          BOOST_TEST_GE(count, 0u);
+        }
+      });
+
+      BOOST_TEST_EQ(num_erased, expected_erasures);
+      BOOST_TEST_EQ(x.size(), old_size - num_erased);
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * num_erased);
+    }
+  } transp_lvalue_eraser_if;
+
+  struct erase_if_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> num_erased{0};
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      auto max = 0;
+      x.visit_all([&max](value_type const& v) {
+        if (v.second.x_ > max) {
+          max = v.second.x_;
+        }
+      });
+
+      auto threshold = max / 2;
+
+      auto expected_erasures = 0u;
+      x.visit_all([&expected_erasures, threshold](value_type const& v) {
+        if (v.second.x_ > threshold) {
+          ++expected_erasures;
+        }
+      });
+
+      thread_runner(
+        values, [&num_erased, &x, threshold](boost::span<T> /* s */) {
+          for (std::size_t i = 0; i < 128; ++i) {
+            auto count = x.erase_if(
+              [threshold](value_type& v) { return v.second.x_ > threshold; });
+            num_erased += count;
+          }
+        });
+
+      BOOST_TEST_EQ(num_erased, expected_erasures);
+      BOOST_TEST_EQ(x.size(), old_size - num_erased);
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * num_erased);
+    }
+  } erase_if;
+
+  struct free_fn_erase_if_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> num_erased{0};
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      auto max = 0;
+      x.visit_all([&max](value_type const& v) {
+        if (v.second.x_ > max) {
+          max = v.second.x_;
+        }
+      });
+
+      auto threshold = max / 2;
+
+      auto expected_erasures = 0u;
+      x.visit_all([&expected_erasures, threshold](value_type const& v) {
+        if (v.second.x_ > threshold) {
+          ++expected_erasures;
+        }
+      });
+
+      thread_runner(
+        values, [&num_erased, &x, threshold](boost::span<T> /* s */) {
+          for (std::size_t i = 0; i < 128; ++i) {
+            auto count = boost::unordered::erase_if(x,
+              [threshold](value_type& v) { return v.second.x_ > threshold; });
+            num_erased += count;
+          }
+        });
+
+      BOOST_TEST_EQ(num_erased, expected_erasures);
+      BOOST_TEST_EQ(x.size(), old_size - num_erased);
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * num_erased);
+    }
+  } free_fn_erase_if;
+
+  struct erase_if_exec_policy_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> num_invokes{0};
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      auto max = 0;
+      x.visit_all([&max](value_type const& v) {
+        if (v.second.x_ > max) {
+          max = v.second.x_;
+        }
+      });
+
+      auto threshold = max / 2;
+
+      auto expected_erasures = 0u;
+      x.visit_all([&expected_erasures, threshold](value_type const& v) {
+        if (v.second.x_ > threshold) {
+          ++expected_erasures;
+        }
+      });
+
+      thread_runner(values, [&num_invokes, &x, threshold](boost::span<T> s) {
+        (void)s;
+        x.erase_if(
+          std::execution::par, [&num_invokes, threshold](value_type& v) {
+            ++num_invokes;
+            return v.second.x_ > threshold;
+          });
+      });
+
+      BOOST_TEST_GE(+num_invokes, old_size);
+      BOOST_TEST_LE(+num_invokes, old_size * num_threads);
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * expected_erasures);
+#else
+      (void)values;
+      (void)x;
+#endif
+    }
+  } erase_if_exec_policy;
+
+  template <class X, class G, class F>
+  void erase(X*, G gen, F eraser, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+    raii::reset_counts();
+
+    {
+      X x;
+
+      x.insert(values.begin(), values.end());
+
+      BOOST_TEST_EQ(x.size(), reference_map.size());
+
+      test_fuzzy_matches_reference(x, reference_map, rg);
+
+      eraser(values, x);
+      test_fuzzy_matches_reference(x, reference_map, rg);
+    }
+
+    check_raii_counts();
+  }
+
+  boost::unordered::concurrent_flat_map<raii, raii>* map;
+  boost::unordered::concurrent_flat_map<raii, raii, transp_hash,
+    transp_key_equal>* transparent_map;
+
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+UNORDERED_TEST(
+  erase,
+  ((map))
+  ((value_type_generator)(init_type_generator))
+  ((lvalue_eraser)(lvalue_eraser_if)(erase_if)(free_fn_erase_if)(erase_if_exec_policy))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  erase,
+  ((transparent_map))
+  ((value_type_generator)(init_type_generator))
+  ((transp_lvalue_eraser)(transp_lvalue_eraser_if)(erase_if_exec_policy))
+  ((default_generator)(sequential)(limited_range)))
+
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/exception_assign_tests.cpp b/test/cfoa/exception_assign_tests.cpp
new file mode 100644
index 00000000..36e94367
--- /dev/null
+++ b/test/cfoa/exception_assign_tests.cpp
@@ -0,0 +1,172 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "exception_helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+namespace {
+  test::seed_t initialize_seed(1794114520);
+
+  template <class G> void copy_assign(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+
+    {
+      raii::reset_counts();
+
+      unsigned num_throws = 0;
+
+      auto begin = values.begin();
+      auto mid =
+        values.begin() + static_cast<std::ptrdiff_t>(values.size() / 2);
+      auto end = values.end();
+
+      auto reference_map = boost::unordered_flat_map<raii, raii>(begin, mid);
+
+      map_type x(
+        begin, mid, values.size(), hasher(1), key_equal(2), allocator_type(3));
+
+      map_type y(
+        mid, end, values.size(), hasher(2), key_equal(1), allocator_type(4));
+
+      BOOST_TEST(!y.empty());
+
+      enable_exceptions();
+      for (std::size_t i = 0; i < 2 * alloc_throw_threshold; ++i) {
+        try {
+          y = x;
+        } catch (...) {
+          ++num_throws;
+        }
+      }
+
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_throws, 0u);
+      test_fuzzy_matches_reference(y, reference_map, rg);
+    }
+    check_raii_counts();
+  }
+
+  template <class G> void move_assign(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+
+    {
+      raii::reset_counts();
+
+      unsigned num_throws = 0;
+
+      auto begin = values.begin();
+      auto mid =
+        values.begin() + static_cast<std::ptrdiff_t>(values.size() / 2);
+      auto end = values.end();
+
+      auto reference_map = boost::unordered_flat_map<raii, raii>(begin, mid);
+
+      BOOST_TEST(
+        !boost::allocator_is_always_equal<allocator_type>::type::value);
+
+      BOOST_TEST(!boost::allocator_propagate_on_container_move_assignment<
+                 allocator_type>::type::value);
+
+      for (std::size_t i = 0; i < 2 * alloc_throw_threshold; ++i) {
+        disable_exceptions();
+
+        map_type x(begin, mid, values.size(), hasher(1), key_equal(2),
+          allocator_type(3));
+
+        map_type y(
+          mid, end, values.size(), hasher(2), key_equal(1), allocator_type(4));
+
+        enable_exceptions();
+        try {
+          y = std::move(x);
+        } catch (...) {
+          ++num_throws;
+        }
+        disable_exceptions();
+        test_fuzzy_matches_reference(y, reference_map, rg);
+      }
+
+      BOOST_TEST_GT(num_throws, 0u);
+    }
+    check_raii_counts();
+  }
+
+  UNORDERED_AUTO_TEST (intializer_list_assign) {
+    using value_type = typename map_type::value_type;
+
+    std::initializer_list<value_type> values{
+      value_type{raii{0}, raii{0}},
+      value_type{raii{1}, raii{1}},
+      value_type{raii{2}, raii{2}},
+      value_type{raii{3}, raii{3}},
+      value_type{raii{4}, raii{4}},
+      value_type{raii{5}, raii{5}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{7}, raii{7}},
+      value_type{raii{8}, raii{8}},
+      value_type{raii{9}, raii{9}},
+      value_type{raii{10}, raii{10}},
+      value_type{raii{9}, raii{9}},
+      value_type{raii{8}, raii{8}},
+      value_type{raii{7}, raii{7}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{5}, raii{5}},
+      value_type{raii{4}, raii{4}},
+      value_type{raii{3}, raii{3}},
+      value_type{raii{2}, raii{2}},
+      value_type{raii{1}, raii{1}},
+      value_type{raii{0}, raii{0}},
+    };
+
+    {
+      raii::reset_counts();
+      unsigned num_throws = 0;
+
+      for (std::size_t i = 0; i < throw_threshold; ++i) {
+        map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+        enable_exceptions();
+        try {
+          x = values;
+        } catch (...) {
+          ++num_throws;
+        }
+        disable_exceptions();
+      }
+
+      BOOST_TEST_GT(num_throws, 0u);
+      check_raii_counts();
+    }
+  }
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+UNORDERED_TEST(
+  copy_assign,
+  ((exception_value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  move_assign,
+  ((exception_value_type_generator))
+  ((default_generator)(sequential)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/exception_constructor_tests.cpp b/test/cfoa/exception_constructor_tests.cpp
new file mode 100644
index 00000000..998a6411
--- /dev/null
+++ b/test/cfoa/exception_constructor_tests.cpp
@@ -0,0 +1,304 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "exception_helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+namespace {
+  test::seed_t initialize_seed(795610904);
+
+  UNORDERED_AUTO_TEST (bucket_constructor) {
+    raii::reset_counts();
+
+    bool was_thrown = false;
+
+    enable_exceptions();
+    for (std::size_t i = 0; i < alloc_throw_threshold; ++i) {
+      try {
+        map_type m(128);
+      } catch (...) {
+        was_thrown = true;
+      }
+    }
+    disable_exceptions();
+
+    BOOST_TEST(was_thrown);
+  }
+
+  template <class G> void iterator_range(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+
+    {
+      raii::reset_counts();
+
+      bool was_thrown = false;
+
+      enable_exceptions();
+      try {
+        map_type x(values.begin(), values.end(), 0, hasher(1), key_equal(2),
+          allocator_type(3));
+      } catch (...) {
+        was_thrown = true;
+      }
+      disable_exceptions();
+
+      BOOST_TEST(was_thrown);
+      check_raii_counts();
+    }
+
+    {
+      raii::reset_counts();
+
+      bool was_thrown = false;
+
+      enable_exceptions();
+      try {
+        map_type x(values.begin(), values.end(), allocator_type(3));
+      } catch (...) {
+        was_thrown = true;
+      }
+      disable_exceptions();
+
+      BOOST_TEST(was_thrown);
+      check_raii_counts();
+    }
+
+    {
+      raii::reset_counts();
+
+      bool was_thrown = false;
+
+      enable_exceptions();
+      try {
+        map_type x(
+          values.begin(), values.end(), values.size(), allocator_type(3));
+      } catch (...) {
+        was_thrown = true;
+      }
+      disable_exceptions();
+
+      BOOST_TEST(was_thrown);
+      check_raii_counts();
+    }
+
+    {
+      raii::reset_counts();
+
+      bool was_thrown = false;
+
+      enable_exceptions();
+      try {
+        map_type x(values.begin(), values.end(), values.size(), hasher(1),
+          allocator_type(3));
+      } catch (...) {
+        was_thrown = true;
+      }
+      disable_exceptions();
+
+      BOOST_TEST(was_thrown);
+      check_raii_counts();
+    }
+  }
+
+  template <class G> void copy_constructor(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+
+    {
+      raii::reset_counts();
+
+      bool was_thrown = false;
+
+      try {
+        map_type x(values.begin(), values.end(), 0);
+
+        enable_exceptions();
+        map_type y(x);
+      } catch (...) {
+        was_thrown = true;
+      }
+      disable_exceptions();
+
+      BOOST_TEST(was_thrown);
+      check_raii_counts();
+    }
+
+    {
+      raii::reset_counts();
+
+      bool was_thrown = false;
+
+      try {
+        map_type x(values.begin(), values.end(), 0);
+
+        enable_exceptions();
+        map_type y(x, allocator_type(4));
+      } catch (...) {
+        was_thrown = true;
+      }
+      disable_exceptions();
+
+      BOOST_TEST(was_thrown);
+      check_raii_counts();
+    }
+  }
+
+  template <class G> void move_constructor(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+
+    {
+      raii::reset_counts();
+
+      bool was_thrown = false;
+
+      try {
+        map_type x(values.begin(), values.end(), 0);
+
+        enable_exceptions();
+        map_type y(std::move(x), allocator_type(4));
+      } catch (...) {
+        was_thrown = true;
+      }
+      disable_exceptions();
+
+      BOOST_TEST(was_thrown);
+      check_raii_counts();
+    }
+  }
+
+  UNORDERED_AUTO_TEST (initializer_list_bucket_count) {
+    using value_type = typename map_type::value_type;
+
+    std::initializer_list<value_type> values{
+      value_type{raii{0}, raii{0}},
+      value_type{raii{1}, raii{1}},
+      value_type{raii{2}, raii{2}},
+      value_type{raii{3}, raii{3}},
+      value_type{raii{4}, raii{4}},
+      value_type{raii{5}, raii{5}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{7}, raii{7}},
+      value_type{raii{8}, raii{8}},
+      value_type{raii{9}, raii{9}},
+      value_type{raii{10}, raii{10}},
+      value_type{raii{9}, raii{9}},
+      value_type{raii{8}, raii{8}},
+      value_type{raii{7}, raii{7}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{5}, raii{5}},
+      value_type{raii{4}, raii{4}},
+      value_type{raii{3}, raii{3}},
+      value_type{raii{2}, raii{2}},
+      value_type{raii{1}, raii{1}},
+      value_type{raii{0}, raii{0}},
+    };
+
+    {
+      raii::reset_counts();
+      unsigned num_throws = 0;
+
+      enable_exceptions();
+      for (std::size_t i = 0; i < throw_threshold; ++i) {
+        try {
+          map_type x(values, 0, hasher(1), key_equal(2), allocator_type(3));
+        } catch (...) {
+          ++num_throws;
+        }
+      }
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_throws, 0u);
+      check_raii_counts();
+    }
+
+    {
+      raii::reset_counts();
+      unsigned num_throws = 0;
+
+      enable_exceptions();
+      for (std::size_t i = 0; i < alloc_throw_threshold * 2; ++i) {
+        try {
+          map_type x(values, allocator_type(3));
+        } catch (...) {
+          ++num_throws;
+        }
+      }
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_throws, 0u);
+      check_raii_counts();
+    }
+
+    {
+      raii::reset_counts();
+      unsigned num_throws = 0;
+
+      enable_exceptions();
+      for (std::size_t i = 0; i < alloc_throw_threshold * 2; ++i) {
+        try {
+          map_type x(values, values.size() * 2, allocator_type(3));
+        } catch (...) {
+          ++num_throws;
+        }
+      }
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_throws, 0u);
+      check_raii_counts();
+    }
+
+    {
+      raii::reset_counts();
+      unsigned num_throws = 0;
+
+      enable_exceptions();
+      for (std::size_t i = 0; i < throw_threshold; ++i) {
+        try {
+          map_type x(values, values.size() * 2, hasher(1), allocator_type(3));
+        } catch (...) {
+          ++num_throws;
+        }
+      }
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_throws, 0u);
+      check_raii_counts();
+    }
+  }
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+UNORDERED_TEST(
+  iterator_range,
+  ((exception_value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  copy_constructor,
+  ((exception_value_type_generator))
+  ((default_generator)(sequential)))
+
+UNORDERED_TEST(
+  move_constructor,
+  ((exception_value_type_generator))
+  ((default_generator)(sequential)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/exception_erase_tests.cpp b/test/cfoa/exception_erase_tests.cpp
new file mode 100644
index 00000000..11d167e7
--- /dev/null
+++ b/test/cfoa/exception_erase_tests.cpp
@@ -0,0 +1,265 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "exception_helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+
+namespace {
+  test::seed_t initialize_seed(3202923);
+
+  struct lvalue_eraser_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_erased{0};
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      enable_exceptions();
+      thread_runner(values, [&values, &num_erased, &x](boost::span<T>) {
+        for (auto const& k : values) {
+          try {
+            auto count = x.erase(k.first);
+            BOOST_TEST_LE(count, 1u);
+            BOOST_TEST_GE(count, 0u);
+
+            num_erased += count;
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(x.size(), old_size - num_erased);
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * num_erased);
+    }
+  } lvalue_eraser;
+
+  struct lvalue_eraser_if_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> num_erased{0};
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      auto max = 0;
+      x.visit_all([&max](value_type const& v) {
+        if (v.second.x_ > max) {
+          max = v.second.x_;
+        }
+      });
+
+      auto threshold = max / 2;
+
+      auto expected_erasures = 0u;
+      x.visit_all([&expected_erasures, threshold](value_type const& v) {
+        if (v.second.x_ > threshold) {
+          ++expected_erasures;
+        }
+      });
+
+      enable_exceptions();
+      thread_runner(values, [&num_erased, &x, threshold](boost::span<T> s) {
+        for (auto const& k : s) {
+          try {
+            auto count = x.erase_if(k.first,
+              [threshold](value_type& v) { return v.second.x_ > threshold; });
+            num_erased += count;
+            BOOST_TEST_LE(count, 1u);
+            BOOST_TEST_GE(count, 0u);
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_LE(num_erased, expected_erasures);
+      BOOST_TEST_EQ(x.size(), old_size - num_erased);
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * num_erased);
+    }
+  } lvalue_eraser_if;
+
+  struct erase_if_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using value_type = typename X::value_type;
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      auto max = 0;
+      x.visit_all([&max](value_type const& v) {
+        if (v.second.x_ > max) {
+          max = v.second.x_;
+        }
+      });
+
+      auto threshold = max / 2;
+
+      auto expected_erasures = 0u;
+      x.visit_all([&expected_erasures, threshold](value_type const& v) {
+        if (v.second.x_ > threshold) {
+          ++expected_erasures;
+        }
+      });
+
+      enable_exceptions();
+      thread_runner(values, [&x, threshold](boost::span<T> /* s */) {
+        for (std::size_t i = 0; i < 256; ++i) {
+          try {
+            x.erase_if([threshold](value_type& v) {
+              static std::atomic<std::uint32_t> c{0};
+              auto t = ++c;
+              if (should_throw && (t % throw_threshold == 0)) {
+                throw exception_tag{};
+              }
+
+              return v.second.x_ > threshold;
+            });
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * (old_size - x.size()));
+    }
+  } erase_if;
+
+  struct free_fn_erase_if_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using value_type = typename X::value_type;
+
+      auto const old_size = x.size();
+
+      auto const old_dc = +raii::default_constructor;
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      auto const old_d = +raii::destructor;
+
+      auto max = 0;
+      x.visit_all([&max](value_type const& v) {
+        if (v.second.x_ > max) {
+          max = v.second.x_;
+        }
+      });
+
+      auto threshold = max / 2;
+
+      enable_exceptions();
+      thread_runner(values, [&x, threshold](boost::span<T> /* s */) {
+        for (std::size_t i = 0; i < 256; ++i) {
+          try {
+            boost::unordered::erase_if(x, [threshold](value_type& v) {
+              static std::atomic<std::uint32_t> c{0};
+              auto t = ++c;
+              if (should_throw && (t % throw_threshold == 0)) {
+                throw exception_tag{};
+              }
+
+              return v.second.x_ > threshold;
+            });
+
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::default_constructor, old_dc);
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      BOOST_TEST_EQ(raii::destructor, old_d + 2 * (old_size - x.size()));
+    }
+  } free_fn_erase_if;
+
+  template <class X, class G, class F>
+  void erase(X*, G gen, F eraser, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    {
+      X x(values.size());
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+
+      BOOST_TEST_EQ(x.size(), reference_map.size());
+      BOOST_TEST_EQ(raii::destructor, 0u);
+
+      test_fuzzy_matches_reference(x, reference_map, rg);
+
+      eraser(values, x);
+      test_fuzzy_matches_reference(x, reference_map, rg);
+    }
+
+    check_raii_counts();
+  }
+
+  boost::unordered::concurrent_flat_map<raii, raii, stateful_hash,
+    stateful_key_equal, stateful_allocator<std::pair<raii const, raii> > >* map;
+
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+UNORDERED_TEST(
+  erase,
+  ((map))
+  ((exception_value_type_generator)(exception_init_type_generator))
+  ((lvalue_eraser)(lvalue_eraser_if)(erase_if)(free_fn_erase_if))
+  ((default_generator)(sequential)(limited_range)))
+
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/exception_helpers.hpp b/test/cfoa/exception_helpers.hpp
new file mode 100644
index 00000000..691936f6
--- /dev/null
+++ b/test/cfoa/exception_helpers.hpp
@@ -0,0 +1,445 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "latch.hpp"
+
+#include "../helpers/generators.hpp"
+#include "../helpers/test.hpp"
+
+#include <boost/container_hash/hash.hpp>
+#include <boost/core/span.hpp>
+#include <boost/unordered/unordered_flat_map.hpp>
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <condition_variable>
+#include <cstddef>
+#include <iostream>
+#include <mutex>
+#include <random>
+#include <thread>
+#include <type_traits>
+#include <vector>
+
+static std::size_t const num_threads =
+  std::max(2u, std::thread::hardware_concurrency());
+
+std::atomic_bool should_throw{false};
+
+constexpr std::uint32_t throw_threshold = 2500;
+constexpr std::uint32_t alloc_throw_threshold = 10;
+
+void enable_exceptions() { should_throw = true; }
+void disable_exceptions() { should_throw = false; }
+
+struct exception_tag
+{
+};
+
+struct stateful_hash
+{
+  int x_ = -1;
+
+  static std::atomic<std::uint32_t> c;
+
+  void throw_helper() const
+  {
+    auto n = ++c;
+    if (should_throw && ((n + 1) % throw_threshold == 0)) {
+      throw exception_tag{};
+    }
+  }
+
+  stateful_hash() {}
+  stateful_hash(stateful_hash const& rhs) : x_(rhs.x_) {}
+  stateful_hash(stateful_hash&& rhs) noexcept
+  {
+    auto tmp = x_;
+    x_ = rhs.x_;
+    rhs.x_ = tmp;
+  }
+
+  stateful_hash(int const x) : x_{x} {}
+
+  template <class T> std::size_t operator()(T const& t) const
+  {
+    throw_helper();
+    std::size_t h = static_cast<std::size_t>(x_);
+    boost::hash_combine(h, t);
+    return h;
+  }
+
+  bool operator==(stateful_hash const& rhs) const { return x_ == rhs.x_; }
+
+  friend std::ostream& operator<<(std::ostream& os, stateful_hash const& rhs)
+  {
+    os << "{ x_: " << rhs.x_ << " }";
+    return os;
+  }
+
+  friend void swap(stateful_hash& lhs, stateful_hash& rhs) noexcept
+  {
+    if (&lhs != &rhs) {
+      std::swap(lhs.x_, rhs.x_);
+    }
+  }
+};
+
+std::atomic<std::uint32_t> stateful_hash::c{0};
+
+struct stateful_key_equal
+{
+  int x_ = -1;
+  static std::atomic<std::uint32_t> c;
+
+  void throw_helper() const
+  {
+    auto n = ++c;
+    if (should_throw && ((n + 1) % throw_threshold == 0)) {
+      throw exception_tag{};
+    }
+  }
+
+  stateful_key_equal() = default;
+  stateful_key_equal(stateful_key_equal const&) = default;
+  stateful_key_equal(stateful_key_equal&& rhs) noexcept
+  {
+    auto tmp = x_;
+    x_ = rhs.x_;
+    rhs.x_ = tmp;
+  }
+
+  stateful_key_equal(int const x) : x_{x} {}
+
+  template <class T, class U> bool operator()(T const& t, U const& u) const
+  {
+    throw_helper();
+    return t == u;
+  }
+
+  bool operator==(stateful_key_equal const& rhs) const { return x_ == rhs.x_; }
+
+  friend std::ostream& operator<<(
+    std::ostream& os, stateful_key_equal const& rhs)
+  {
+    os << "{ x_: " << rhs.x_ << " }";
+    return os;
+  }
+
+  friend void swap(stateful_key_equal& lhs, stateful_key_equal& rhs) noexcept
+  {
+    if (&lhs != &rhs) {
+      std::swap(lhs.x_, rhs.x_);
+    }
+  }
+};
+std::atomic<std::uint32_t> stateful_key_equal::c{0};
+
+static std::atomic<std::uint32_t> allocator_c = {};
+
+template <class T> struct stateful_allocator
+{
+  int x_ = -1;
+
+  void throw_helper() const
+  {
+    auto n = ++allocator_c;
+    if (should_throw && ((n + 1) % alloc_throw_threshold == 0)) {
+      throw exception_tag{};
+    }
+  }
+
+  using value_type = T;
+
+  stateful_allocator() = default;
+  stateful_allocator(stateful_allocator const&) = default;
+  stateful_allocator(stateful_allocator&&) = default;
+
+  stateful_allocator(int const x) : x_{x} {}
+
+  template <class U>
+  stateful_allocator(stateful_allocator<U> const& rhs) : x_{rhs.x_}
+  {
+  }
+
+  T* allocate(std::size_t n)
+  {
+    throw_helper();
+    return static_cast<T*>(::operator new(n * sizeof(T)));
+  }
+
+  void deallocate(T* p, std::size_t) { ::operator delete(p); }
+
+  bool operator==(stateful_allocator const& rhs) const { return x_ == rhs.x_; }
+  bool operator!=(stateful_allocator const& rhs) const { return x_ != rhs.x_; }
+};
+
+struct raii
+{
+  static std::atomic<std::uint32_t> default_constructor;
+  static std::atomic<std::uint32_t> copy_constructor;
+  static std::atomic<std::uint32_t> move_constructor;
+  static std::atomic<std::uint32_t> destructor;
+
+  static std::atomic<std::uint32_t> copy_assignment;
+  static std::atomic<std::uint32_t> move_assignment;
+
+  static std::atomic<std::uint32_t> c;
+  void throw_helper() const
+  {
+    auto n = ++c;
+    if (should_throw && ((n + 1) % throw_threshold == 0)) {
+      throw exception_tag{};
+    }
+  }
+
+  int x_ = -1;
+
+  raii()
+  {
+    throw_helper();
+    ++default_constructor;
+  }
+
+  raii(int const x) : x_{x}
+  {
+    throw_helper();
+    ++default_constructor;
+  }
+
+  raii(raii const& rhs) : x_{rhs.x_}
+  {
+    throw_helper();
+    ++copy_constructor;
+  }
+  raii(raii&& rhs) noexcept : x_{rhs.x_}
+  {
+    rhs.x_ = -1;
+    ++move_constructor;
+  }
+  ~raii() { ++destructor; }
+
+  raii& operator=(raii const& rhs)
+  {
+    throw_helper();
+    ++copy_assignment;
+    if (this != &rhs) {
+      x_ = rhs.x_;
+    }
+    return *this;
+  }
+
+  raii& operator=(raii&& rhs) noexcept
+  {
+    ++move_assignment;
+    if (this != &rhs) {
+      x_ = rhs.x_;
+      rhs.x_ = -1;
+    }
+    return *this;
+  }
+
+  friend bool operator==(raii const& lhs, raii const& rhs)
+  {
+    return lhs.x_ == rhs.x_;
+  }
+
+  friend bool operator!=(raii const& lhs, raii const& rhs)
+  {
+    return !(lhs == rhs);
+  }
+
+  friend bool operator==(raii const& lhs, int const x) { return lhs.x_ == x; }
+  friend bool operator!=(raii const& lhs, int const x)
+  {
+    return !(lhs.x_ == x);
+  }
+
+  friend bool operator==(int const x, raii const& rhs) { return rhs.x_ == x; }
+
+  friend bool operator!=(int const x, raii const& rhs)
+  {
+    return !(rhs.x_ == x);
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, raii const& rhs)
+  {
+    os << "{ x_: " << rhs.x_ << " }";
+    return os;
+  }
+
+  friend std::ostream& operator<<(
+    std::ostream& os, std::pair<raii const, raii> const& rhs)
+  {
+    os << "pair<" << rhs.first << ", " << rhs.second << ">";
+    return os;
+  }
+
+  static void reset_counts()
+  {
+    default_constructor = 0;
+    copy_constructor = 0;
+    move_constructor = 0;
+    destructor = 0;
+    copy_assignment = 0;
+    move_assignment = 0;
+    c = 0;
+
+    stateful_hash::c = 0;
+    stateful_key_equal::c = 0;
+    allocator_c = 0;
+  }
+
+  friend void swap(raii& lhs, raii& rhs) { std::swap(lhs.x_, rhs.x_); }
+};
+
+std::atomic<std::uint32_t> raii::default_constructor{0};
+std::atomic<std::uint32_t> raii::copy_constructor{0};
+std::atomic<std::uint32_t> raii::move_constructor{0};
+std::atomic<std::uint32_t> raii::destructor{0};
+std::atomic<std::uint32_t> raii::copy_assignment{0};
+std::atomic<std::uint32_t> raii::move_assignment{0};
+std::atomic<std::uint32_t> raii::c{0};
+
+std::size_t hash_value(raii const& r) noexcept
+{
+  boost::hash<int> hasher;
+  return hasher(r.x_);
+}
+
+struct exception_value_type_generator_type
+{
+  std::pair<raii const, raii> operator()(test::random_generator rg)
+  {
+    int* p = nullptr;
+    int a = generate(p, rg);
+    int b = generate(p, rg);
+    return std::make_pair(raii{a}, raii{b});
+  }
+} exception_value_type_generator;
+
+struct exception_init_type_generator_type
+{
+  std::pair<raii, raii> operator()(test::random_generator rg)
+  {
+    int* p = nullptr;
+    int a = generate(p, rg);
+    int b = generate(p, rg);
+    return std::make_pair(raii{a}, raii{b});
+  }
+} exception_init_type_generator;
+
+template <class T>
+std::vector<boost::span<T> > split(
+  boost::span<T> s, std::size_t const nt /* num threads*/)
+{
+  std::vector<boost::span<T> > subslices;
+  subslices.reserve(nt);
+
+  auto a = s.size() / nt;
+  auto b = a;
+  if (s.size() % nt != 0) {
+    ++b;
+  }
+
+  auto num_a = nt;
+  auto num_b = std::size_t{0};
+
+  if (nt * b > s.size()) {
+    num_a = nt * b - s.size();
+    num_b = nt - num_a;
+  }
+
+  auto sub_b = s.subspan(0, num_b * b);
+  auto sub_a = s.subspan(num_b * b);
+
+  for (std::size_t i = 0; i < num_b; ++i) {
+    subslices.push_back(sub_b.subspan(i * b, b));
+  }
+
+  for (std::size_t i = 0; i < num_a; ++i) {
+    auto const is_last = i == (num_a - 1);
+    subslices.push_back(
+      sub_a.subspan(i * a, is_last ? boost::dynamic_extent : a));
+  }
+
+  return subslices;
+}
+
+template <class T, class F> void thread_runner(std::vector<T>& values, F f)
+{
+  boost::latch latch(static_cast<std::ptrdiff_t>(num_threads));
+
+  std::vector<std::thread> threads;
+  auto subslices = split<T>(values, num_threads);
+
+  for (std::size_t i = 0; i < num_threads; ++i) {
+    threads.emplace_back([&f, &subslices, i, &latch] {
+      latch.arrive_and_wait();
+
+      auto s = subslices[i];
+      f(s);
+    });
+  }
+
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
+template <class X, class Y>
+void test_matches_reference(X const& x, Y const& reference_map)
+{
+  using value_type = typename X::value_type;
+  BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+    BOOST_TEST(reference_map.contains(kv.first));
+    BOOST_TEST_EQ(kv.second, reference_map.find(kv.first)->second);
+  }));
+}
+
+template <class X, class Y>
+void test_fuzzy_matches_reference(
+  X const& x, Y const& reference_map, test::random_generator rg)
+{
+  using value_type = typename X::value_type;
+  BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+    BOOST_TEST(reference_map.contains(kv.first));
+    if (rg == test::sequential) {
+      BOOST_TEST_EQ(kv.second, reference_map.find(kv.first)->second);
+    }
+  }));
+}
+
+template <class T> using span_value_type = typename T::value_type;
+
+void check_raii_counts()
+{
+  BOOST_TEST_GT(raii::destructor, 0u);
+
+  BOOST_TEST_EQ(
+    raii::default_constructor + raii::copy_constructor + raii::move_constructor,
+    raii::destructor);
+}
+
+template <class T> void shuffle_values(std::vector<T>& v)
+{
+  std::random_device rd;
+  std::mt19937 g(rd());
+
+  std::shuffle(v.begin(), v.end(), g);
+}
+
+template <class F>
+auto make_random_values(std::size_t count, F f) -> std::vector<decltype(f())>
+{
+  using vector_type = std::vector<decltype(f())>;
+
+  vector_type v;
+  v.reserve(count);
+  for (std::size_t i = 0; i < count; ++i) {
+    v.emplace_back(f());
+  }
+  return v;
+}
diff --git a/test/cfoa/exception_insert_tests.cpp b/test/cfoa/exception_insert_tests.cpp
new file mode 100644
index 00000000..4804caa6
--- /dev/null
+++ b/test/cfoa/exception_insert_tests.cpp
@@ -0,0 +1,431 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "exception_helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+
+namespace {
+  test::seed_t initialize_seed(73987);
+
+  struct lvalue_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      enable_exceptions();
+
+      std::atomic<std::uint64_t> num_inserts{0};
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto const& r : s) {
+          try {
+            bool b = x.insert(r);
+            if (b) {
+              ++num_inserts;
+            }
+          } catch (...) {
+          }
+        }
+      });
+
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_inserter;
+
+  struct norehash_lvalue_inserter_type : public lvalue_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      x.reserve(values.size());
+      lvalue_inserter_type::operator()(values, x);
+      BOOST_TEST_GT(raii::copy_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_constructor, 0u);
+    }
+  } norehash_lvalue_inserter;
+
+  struct rvalue_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+
+      enable_exceptions();
+
+      std::atomic<std::uint64_t> num_inserts{0};
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+            bool b = x.insert(std::move(r));
+            if (b) {
+              ++num_inserts;
+            }
+          } catch (...) {
+          }
+        }
+      });
+
+      disable_exceptions();
+
+      if (!std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      }
+
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } rvalue_inserter;
+
+  struct norehash_rvalue_inserter_type : public rvalue_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      x.reserve(values.size());
+
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_constructor, 0u);
+
+      rvalue_inserter_type::operator()(values, x);
+
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+        BOOST_TEST_EQ(raii::move_constructor, x.size());
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+        BOOST_TEST_EQ(raii::move_constructor, 2 * x.size());
+      }
+    }
+  } norehash_rvalue_inserter;
+
+  struct iterator_range_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      for (std::size_t i = 0; i < 10; ++i) {
+        x.insert(values[i]);
+      }
+
+      enable_exceptions();
+      thread_runner(values, [&x](boost::span<T> s) {
+        try {
+          x.insert(s.begin(), s.end());
+        } catch (...) {
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } iterator_range_inserter;
+
+  struct lvalue_insert_or_assign_copy_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      enable_exceptions();
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+            x.insert_or_assign(r.first, r.second);
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_GT(raii::copy_constructor, 0u);
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_insert_or_assign_copy_assign;
+
+  struct lvalue_insert_or_assign_move_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      enable_exceptions();
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+
+            x.insert_or_assign(r.first, std::move(r.second));
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_GT(raii::copy_constructor, 0u);
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+    }
+  } lvalue_insert_or_assign_move_assign;
+
+  struct rvalue_insert_or_assign_copy_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      enable_exceptions();
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+            x.insert_or_assign(std::move(r.first), r.second);
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_GT(raii::copy_constructor, 0u);
+      BOOST_TEST_GT(raii::move_constructor, x.size()); // rehashing
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } rvalue_insert_or_assign_copy_assign;
+
+  struct rvalue_insert_or_assign_move_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      enable_exceptions();
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+            x.insert_or_assign(std::move(r.first), std::move(r.second));
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+    }
+  } rvalue_insert_or_assign_move_assign;
+
+  struct lvalue_insert_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+
+      enable_exceptions();
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+            bool b = x.insert_or_cvisit(
+              r, [](typename X::value_type const& v) { (void)v; });
+
+            if (b) {
+              ++num_inserts;
+            }
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_inserts, 0u);
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      // don't check move construction count here because of rehashing
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_insert_or_cvisit;
+
+  struct lvalue_insert_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+
+      enable_exceptions();
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+            bool b =
+              x.insert_or_visit(r, [](typename X::value_type& v) { (void)v; });
+
+            if (b) {
+              ++num_inserts;
+            }
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_inserts, 0u);
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+
+      // don't check move construction count here because of rehashing
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_insert_or_visit;
+
+  struct rvalue_insert_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+
+      enable_exceptions();
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+            bool b = x.insert_or_cvisit(
+              std::move(r), [](typename X::value_type const& v) { (void)v; });
+
+            if (b) {
+              ++num_inserts;
+            }
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_inserts, 0u);
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+    }
+  } rvalue_insert_or_cvisit;
+
+  struct rvalue_insert_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+
+      enable_exceptions();
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto& r : s) {
+          try {
+            bool b = x.insert_or_visit(
+              std::move(r), [](typename X::value_type& v) { (void)v; });
+
+            if (b) {
+              ++num_inserts;
+            }
+          } catch (...) {
+          }
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_GT(num_inserts, 0u);
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      if (!std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      }
+    }
+  } rvalue_insert_or_visit;
+
+  struct iterator_range_insert_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      for (std::size_t i = 0; i < 10; ++i) {
+        x.insert(values[i]);
+      }
+
+      enable_exceptions();
+      thread_runner(values, [&x](boost::span<T> s) {
+        try {
+          x.insert_or_cvisit(s.begin(), s.end(),
+            [](typename X::value_type const& v) { (void)v; });
+        } catch (...) {
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+    }
+  } iterator_range_insert_or_cvisit;
+
+  struct iterator_range_insert_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      for (std::size_t i = 0; i < 10; ++i) {
+        x.insert(values[i]);
+      }
+
+      enable_exceptions();
+      thread_runner(values, [&x](boost::span<T> s) {
+        try {
+          x.insert_or_visit(s.begin(), s.end(),
+            [](typename X::value_type const& v) { (void)v; });
+        } catch (...) {
+        }
+      });
+      disable_exceptions();
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+    }
+  } iterator_range_insert_or_visit;
+
+  template <class X, class G, class F>
+  void insert(X*, G gen, F inserter, test::random_generator rg)
+  {
+    disable_exceptions();
+
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+    {
+      X x;
+
+      inserter(values, x);
+
+      test_fuzzy_matches_reference(x, reference_map, rg);
+    }
+    check_raii_counts();
+  }
+
+  boost::unordered::concurrent_flat_map<raii, raii, stateful_hash,
+    stateful_key_equal, stateful_allocator<std::pair<raii const, raii> > >* map;
+
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+UNORDERED_TEST(
+  insert,
+  ((map))
+  ((exception_value_type_generator)(exception_init_type_generator))
+  ((lvalue_inserter)(rvalue_inserter)(iterator_range_inserter)
+   (norehash_lvalue_inserter)(norehash_rvalue_inserter)
+   (lvalue_insert_or_cvisit)(lvalue_insert_or_visit)
+   (rvalue_insert_or_cvisit)(rvalue_insert_or_visit)
+   (iterator_range_insert_or_cvisit)(iterator_range_insert_or_visit))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  insert,
+  ((map))
+  ((exception_init_type_generator))
+  ((lvalue_insert_or_assign_copy_assign)(lvalue_insert_or_assign_move_assign)
+   (rvalue_insert_or_assign_copy_assign)(rvalue_insert_or_assign_move_assign))
+  ((default_generator)(sequential)(limited_range)))
+
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/exception_merge_tests.cpp b/test/cfoa/exception_merge_tests.cpp
new file mode 100644
index 00000000..0f54eb27
--- /dev/null
+++ b/test/cfoa/exception_merge_tests.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "exception_helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+namespace {
+  test::seed_t initialize_seed(223333016);
+
+  template <class G> void merge(G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    auto begin = values.begin();
+    auto mid = begin + static_cast<std::ptrdiff_t>(values.size() / 2);
+    auto end = values.end();
+
+    {
+      unsigned num_throws = 0;
+
+      for (unsigned i = 0; i < 5 * alloc_throw_threshold; ++i) {
+        disable_exceptions();
+
+        map_type x1(0, hasher(1), key_equal(2), allocator_type(3));
+        x1.insert(begin, mid);
+
+        map_type x2(0, hasher(2), key_equal(1), allocator_type(3));
+        x2.insert(mid, end);
+
+        enable_exceptions();
+        try {
+          x1.merge(x2);
+        } catch (...) {
+          ++num_throws;
+        }
+
+        disable_exceptions();
+        test_fuzzy_matches_reference(x1, reference_map, rg);
+        test_fuzzy_matches_reference(x2, reference_map, rg);
+      }
+
+      BOOST_TEST_GT(num_throws, 0u);
+    }
+
+    check_raii_counts();
+  }
+
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+UNORDERED_TEST(
+  merge,
+  ((exception_value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/fwd_tests.cpp b/test/cfoa/fwd_tests.cpp
new file mode 100644
index 00000000..dced611e
--- /dev/null
+++ b/test/cfoa/fwd_tests.cpp
@@ -0,0 +1,70 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+#include <boost/config/workaround.hpp>
+#include <boost/unordered/concurrent_flat_map_fwd.hpp>
+#include <limits>
+
+test::seed_t initialize_seed{32304628};
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+template <class T>
+void swap_call(boost::unordered::concurrent_flat_map<T, T>& x1,
+  boost::unordered::concurrent_flat_map<T, T>& x2)
+{
+  swap(x1, x2);
+}
+
+template <class T>
+bool equal_call(boost::unordered::concurrent_flat_map<T, T>& x1,
+  boost::unordered::concurrent_flat_map<T, T>& x2)
+{
+  return x1 == x2;
+}
+
+template <class T>
+bool unequal_call(boost::unordered::concurrent_flat_map<T, T>& x1,
+  boost::unordered::concurrent_flat_map<T, T>& x2)
+{
+  return x1 != x2;
+}
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+using map_type = boost::unordered::concurrent_flat_map<int, int>;
+
+#if !defined(BOOST_CLANG_VERSION) ||                                           \
+  BOOST_WORKAROUND(BOOST_CLANG_VERSION, < 30700) ||                            \
+  BOOST_WORKAROUND(BOOST_CLANG_VERSION, >= 30800)
+// clang-3.7 seems to have a codegen bug here so we workaround it
+UNORDERED_AUTO_TEST (fwd_swap_call) {
+  map_type x1, x2;
+  swap_call(x1, x2);
+}
+
+#endif
+
+UNORDERED_AUTO_TEST (fwd_equal_call) {
+  map_type x1, x2;
+  BOOST_TEST(equal_call(x1, x2));
+}
+
+UNORDERED_AUTO_TEST (fwd_unequal_call) {
+  map_type x1, x2;
+  BOOST_TEST_NOT(unequal_call(x1, x2));
+}
+
+// this isn't the best place for this test but it's better than introducing a
+// new file
+UNORDERED_AUTO_TEST (max_size) {
+  map_type x1;
+  BOOST_TEST_EQ(
+    x1.max_size(), std::numeric_limits<typename map_type::size_type>::max());
+}
+
+RUN_TESTS()
diff --git a/test/cfoa/helpers.hpp b/test/cfoa/helpers.hpp
new file mode 100644
index 00000000..517326bf
--- /dev/null
+++ b/test/cfoa/helpers.hpp
@@ -0,0 +1,594 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_UNORDERED_TEST_CFOA_HELPERS_HPP
+#define BOOST_UNORDERED_TEST_CFOA_HELPERS_HPP
+
+#include "latch.hpp"
+
+#include "../helpers/generators.hpp"
+#include "../helpers/test.hpp"
+
+#include <boost/container_hash/hash.hpp>
+#include <boost/core/span.hpp>
+#include <boost/unordered/unordered_flat_map.hpp>
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <condition_variable>
+#include <cstddef>
+#include <iostream>
+#include <mutex>
+#include <random>
+#include <thread>
+#include <type_traits>
+#include <vector>
+
+static std::size_t const num_threads =
+  std::max(2u, std::thread::hardware_concurrency());
+
+struct transp_hash
+{
+  using is_transparent = void;
+
+  template <class T> std::size_t operator()(T const& t) const noexcept
+  {
+    return boost::hash<T>()(t);
+  }
+};
+
+struct transp_key_equal
+{
+  using is_transparent = void;
+
+  template <class T, class U> bool operator()(T const& lhs, U const& rhs) const
+  {
+    return lhs == rhs;
+  }
+};
+
+struct stateful_hash
+{
+  int x_ = -1;
+
+  stateful_hash() = default;
+  stateful_hash(stateful_hash const&) = default;
+  stateful_hash(stateful_hash&& rhs) noexcept
+  {
+    auto tmp = x_;
+    x_ = rhs.x_;
+    rhs.x_ = tmp;
+  }
+
+  stateful_hash(int const x) : x_{x} {}
+
+  template <class T> std::size_t operator()(T const& t) const noexcept
+  {
+    std::size_t h = static_cast<std::size_t>(x_);
+    boost::hash_combine(h, t);
+    return h;
+  }
+
+  bool operator==(stateful_hash const& rhs) const { return x_ == rhs.x_; }
+
+  friend std::ostream& operator<<(std::ostream& os, stateful_hash const& rhs)
+  {
+    os << "{ x_: " << rhs.x_ << " }";
+    return os;
+  }
+
+  friend void swap(stateful_hash& lhs, stateful_hash& rhs) noexcept
+  {
+    if (&lhs != &rhs) {
+      std::swap(lhs.x_, rhs.x_);
+    }
+  }
+};
+
+struct stateful_key_equal
+{
+  int x_ = -1;
+
+  stateful_key_equal() = default;
+  stateful_key_equal(stateful_key_equal const&) = default;
+  stateful_key_equal(stateful_key_equal&& rhs) noexcept
+  {
+    auto tmp = x_;
+    x_ = rhs.x_;
+    rhs.x_ = tmp;
+  }
+
+  stateful_key_equal(int const x) : x_{x} {}
+
+  template <class T, class U> bool operator()(T const& t, U const& u) const
+  {
+    return t == u;
+  }
+
+  bool operator==(stateful_key_equal const& rhs) const { return x_ == rhs.x_; }
+
+  friend std::ostream& operator<<(
+    std::ostream& os, stateful_key_equal const& rhs)
+  {
+    os << "{ x_: " << rhs.x_ << " }";
+    return os;
+  }
+
+  friend void swap(stateful_key_equal& lhs, stateful_key_equal& rhs) noexcept
+  {
+    if (&lhs != &rhs) {
+      std::swap(lhs.x_, rhs.x_);
+    }
+  }
+};
+
+template <class T> struct stateful_allocator
+{
+  int x_ = -1;
+
+  using value_type = T;
+
+  stateful_allocator() = default;
+  stateful_allocator(stateful_allocator const&) = default;
+  stateful_allocator(stateful_allocator&&) = default;
+
+  stateful_allocator(int const x) : x_{x} {}
+
+  template <class U>
+  stateful_allocator(stateful_allocator<U> const& rhs) : x_{rhs.x_}
+  {
+  }
+
+  T* allocate(std::size_t n)
+  {
+    return static_cast<T*>(::operator new(n * sizeof(T)));
+  }
+
+  void deallocate(T* p, std::size_t) { ::operator delete(p); }
+
+  bool operator==(stateful_allocator const& rhs) const { return x_ == rhs.x_; }
+  bool operator!=(stateful_allocator const& rhs) const { return x_ != rhs.x_; }
+};
+
+struct raii
+{
+  static std::atomic<std::uint32_t> default_constructor;
+  static std::atomic<std::uint32_t> copy_constructor;
+  static std::atomic<std::uint32_t> move_constructor;
+  static std::atomic<std::uint32_t> destructor;
+
+  static std::atomic<std::uint32_t> copy_assignment;
+  static std::atomic<std::uint32_t> move_assignment;
+
+  int x_ = -1;
+
+  raii() { ++default_constructor; }
+  raii(int const x) : x_{x} { ++default_constructor; }
+  raii(raii const& rhs) : x_{rhs.x_} { ++copy_constructor; }
+  raii(raii&& rhs) noexcept : x_{rhs.x_}
+  {
+    rhs.x_ = -1;
+    ++move_constructor;
+  }
+  ~raii() { ++destructor; }
+
+  raii& operator=(raii const& rhs)
+  {
+    ++copy_assignment;
+    if (this != &rhs) {
+      x_ = rhs.x_;
+    }
+    return *this;
+  }
+
+  raii& operator=(raii&& rhs) noexcept
+  {
+    ++move_assignment;
+    if (this != &rhs) {
+      x_ = rhs.x_;
+      rhs.x_ = -1;
+    }
+    return *this;
+  }
+
+  friend bool operator==(raii const& lhs, raii const& rhs)
+  {
+    return lhs.x_ == rhs.x_;
+  }
+
+  friend bool operator!=(raii const& lhs, raii const& rhs)
+  {
+    return !(lhs == rhs);
+  }
+
+  friend bool operator==(raii const& lhs, int const x) { return lhs.x_ == x; }
+  friend bool operator!=(raii const& lhs, int const x)
+  {
+    return !(lhs.x_ == x);
+  }
+
+  friend bool operator==(int const x, raii const& rhs) { return rhs.x_ == x; }
+
+  friend bool operator!=(int const x, raii const& rhs)
+  {
+    return !(rhs.x_ == x);
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, raii const& rhs)
+  {
+    os << "{ x_: " << rhs.x_ << " }";
+    return os;
+  }
+
+  friend std::ostream& operator<<(
+    std::ostream& os, std::pair<raii const, raii> const& rhs)
+  {
+    os << "pair<" << rhs.first << ", " << rhs.second << ">";
+    return os;
+  }
+
+  static void reset_counts()
+  {
+    default_constructor = 0;
+    copy_constructor = 0;
+    move_constructor = 0;
+    destructor = 0;
+    copy_assignment = 0;
+    move_assignment = 0;
+  }
+
+  friend void swap(raii& lhs, raii& rhs) { std::swap(lhs.x_, rhs.x_); }
+};
+
+std::atomic<std::uint32_t> raii::default_constructor{0};
+std::atomic<std::uint32_t> raii::copy_constructor{0};
+std::atomic<std::uint32_t> raii::move_constructor{0};
+std::atomic<std::uint32_t> raii::destructor{0};
+std::atomic<std::uint32_t> raii::copy_assignment{0};
+std::atomic<std::uint32_t> raii::move_assignment{0};
+
+std::size_t hash_value(raii const& r) noexcept
+{
+  boost::hash<int> hasher;
+  return hasher(r.x_);
+}
+
+namespace std {
+  template <> struct hash<raii>
+  {
+    std::size_t operator()(raii const& r) const noexcept
+    {
+      return hash_value(r);
+    }
+  };
+} // namespace std
+
+template <class F>
+auto make_random_values(std::size_t count, F f) -> std::vector<decltype(f())>
+{
+  using vector_type = std::vector<decltype(f())>;
+
+  vector_type v;
+  v.reserve(count);
+  for (std::size_t i = 0; i < count; ++i) {
+    v.emplace_back(f());
+  }
+  return v;
+}
+
+struct value_type_generator_type
+{
+  std::pair<raii const, raii> operator()(test::random_generator rg)
+  {
+    int* p = nullptr;
+    int a = generate(p, rg);
+    int b = generate(p, rg);
+    return std::make_pair(raii{a}, raii{b});
+  }
+} value_type_generator;
+
+struct init_type_generator_type
+{
+  std::pair<raii, raii> operator()(test::random_generator rg)
+  {
+    int* p = nullptr;
+    int a = generate(p, rg);
+    int b = generate(p, rg);
+    return std::make_pair(raii{a}, raii{b});
+  }
+} init_type_generator;
+
+template <class T>
+std::vector<boost::span<T> > split(
+  boost::span<T> s, std::size_t const nt /* num threads*/)
+{
+  std::vector<boost::span<T> > subslices;
+  subslices.reserve(nt);
+
+  auto a = s.size() / nt;
+  auto b = a;
+  if (s.size() % nt != 0) {
+    ++b;
+  }
+
+  auto num_a = nt;
+  auto num_b = std::size_t{0};
+
+  if (nt * b > s.size()) {
+    num_a = nt * b - s.size();
+    num_b = nt - num_a;
+  }
+
+  auto sub_b = s.subspan(0, num_b * b);
+  auto sub_a = s.subspan(num_b * b);
+
+  for (std::size_t i = 0; i < num_b; ++i) {
+    subslices.push_back(sub_b.subspan(i * b, b));
+  }
+
+  for (std::size_t i = 0; i < num_a; ++i) {
+    auto const is_last = i == (num_a - 1);
+    subslices.push_back(
+      sub_a.subspan(i * a, is_last ? boost::dynamic_extent : a));
+  }
+
+  return subslices;
+}
+
+template <class T, class F> void thread_runner(std::vector<T>& values, F f)
+{
+  boost::latch latch(static_cast<std::ptrdiff_t>(num_threads));
+
+  std::vector<std::thread> threads;
+  auto subslices = split<T>(values, num_threads);
+
+  for (std::size_t i = 0; i < num_threads; ++i) {
+    threads.emplace_back([&f, &subslices, i, &latch] {
+      latch.arrive_and_wait();
+
+      auto s = subslices[i];
+      f(s);
+    });
+  }
+
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
+template <class X, class Y>
+void test_matches_reference(X const& x, Y const& reference_map)
+{
+  using value_type = typename X::value_type;
+  BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+    BOOST_TEST(reference_map.contains(kv.first));
+    BOOST_TEST_EQ(kv.second, reference_map.find(kv.first)->second);
+  }));
+}
+
+template <class X, class Y>
+void test_fuzzy_matches_reference(
+  X const& x, Y const& reference_map, test::random_generator rg)
+{
+  using value_type = typename X::value_type;
+  BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+    BOOST_TEST(reference_map.contains(kv.first));
+    if (rg == test::sequential) {
+      BOOST_TEST_EQ(kv.second, reference_map.find(kv.first)->second);
+    }
+  }));
+}
+
+template <class T> using span_value_type = typename T::value_type;
+
+void check_raii_counts()
+{
+  BOOST_TEST_GT(raii::destructor, 0u);
+
+  BOOST_TEST_EQ(
+    raii::default_constructor + raii::copy_constructor + raii::move_constructor,
+    raii::destructor);
+}
+
+template <class T> void shuffle_values(std::vector<T>& v)
+{
+  std::random_device rd;
+  std::mt19937 g(rd());
+
+  std::shuffle(v.begin(), v.end(), g);
+}
+
+template <class T> class ptr;
+template <class T> class const_ptr;
+template <class T> class fancy_allocator;
+
+struct void_ptr
+{
+  template <typename T> friend class ptr;
+
+private:
+  void* ptr_;
+
+public:
+  void_ptr() : ptr_(0) {}
+
+  template <typename T> explicit void_ptr(ptr<T> const& x) : ptr_(x.ptr_) {}
+
+  // I'm not using the safe bool idiom because the containers should be
+  // able to cope with bool conversions.
+  operator bool() const { return !!ptr_; }
+
+  bool operator==(void_ptr const& x) const { return ptr_ == x.ptr_; }
+  bool operator!=(void_ptr const& x) const { return ptr_ != x.ptr_; }
+};
+
+class void_const_ptr
+{
+  template <typename T> friend class const_ptr;
+
+private:
+  void* ptr_;
+
+public:
+  void_const_ptr() : ptr_(0) {}
+
+  template <typename T>
+  explicit void_const_ptr(const_ptr<T> const& x) : ptr_(x.ptr_)
+  {
+  }
+
+  // I'm not using the safe bool idiom because the containers should be
+  // able to cope with bool conversions.
+  operator bool() const { return !!ptr_; }
+
+  bool operator==(void_const_ptr const& x) const { return ptr_ == x.ptr_; }
+  bool operator!=(void_const_ptr const& x) const { return ptr_ != x.ptr_; }
+};
+
+template <class T> class ptr
+{
+  friend class fancy_allocator<T>;
+  friend class const_ptr<T>;
+  friend struct void_ptr;
+
+  T* ptr_;
+
+  ptr(T* x) : ptr_(x) {}
+
+public:
+  ptr() : ptr_(0) {}
+  explicit ptr(void_ptr const& x) : ptr_((T*)x.ptr_) {}
+
+  T& operator*() const { return *ptr_; }
+  T* operator->() const { return ptr_; }
+  ptr& operator++()
+  {
+    ++ptr_;
+    return *this;
+  }
+  ptr operator++(int)
+  {
+    ptr tmp(*this);
+    ++ptr_;
+    return tmp;
+  }
+  ptr operator+(std::ptrdiff_t s) const { return ptr<T>(ptr_ + s); }
+  friend ptr operator+(std::ptrdiff_t s, ptr p) { return ptr<T>(s + p.ptr_); }
+
+  std::ptrdiff_t operator-(ptr p) const { return ptr_ - p.ptr_; }
+  ptr operator-(std::ptrdiff_t s) const { return ptr(ptr_ - s); }
+  T& operator[](std::ptrdiff_t s) const { return ptr_[s]; }
+  bool operator!() const { return !ptr_; }
+
+  static ptr pointer_to(T& p) { return ptr(boost::addressof(p)); }
+
+  // I'm not using the safe bool idiom because the containers should be
+  // able to cope with bool conversions.
+  operator bool() const { return !!ptr_; }
+
+  bool operator==(ptr const& x) const { return ptr_ == x.ptr_; }
+  bool operator!=(ptr const& x) const { return ptr_ != x.ptr_; }
+  bool operator<(ptr const& x) const { return ptr_ < x.ptr_; }
+  bool operator>(ptr const& x) const { return ptr_ > x.ptr_; }
+  bool operator<=(ptr const& x) const { return ptr_ <= x.ptr_; }
+  bool operator>=(ptr const& x) const { return ptr_ >= x.ptr_; }
+};
+
+template <class T> class const_ptr
+{
+  friend class fancy_allocator<T>;
+  friend struct const_void_ptr;
+
+  T const* ptr_;
+
+  const_ptr(T const* ptr) : ptr_(ptr) {}
+
+public:
+  const_ptr() : ptr_(0) {}
+  const_ptr(ptr<T> const& x) : ptr_(x.ptr_) {}
+  explicit const_ptr(void_const_ptr const& x) : ptr_((T const*)x.ptr_) {}
+
+  T const& operator*() const { return *ptr_; }
+  T const* operator->() const { return ptr_; }
+  const_ptr& operator++()
+  {
+    ++ptr_;
+    return *this;
+  }
+  const_ptr operator++(int)
+  {
+    const_ptr tmp(*this);
+    ++ptr_;
+    return tmp;
+  }
+  const_ptr operator+(std::ptrdiff_t s) const { return const_ptr(ptr_ + s); }
+  friend const_ptr operator+(std::ptrdiff_t s, const_ptr p)
+  {
+    return ptr<T>(s + p.ptr_);
+  }
+  T const& operator[](int s) const { return ptr_[s]; }
+  bool operator!() const { return !ptr_; }
+  operator bool() const { return !!ptr_; }
+
+  bool operator==(const_ptr const& x) const { return ptr_ == x.ptr_; }
+  bool operator!=(const_ptr const& x) const { return ptr_ != x.ptr_; }
+  bool operator<(const_ptr const& x) const { return ptr_ < x.ptr_; }
+  bool operator>(const_ptr const& x) const { return ptr_ > x.ptr_; }
+  bool operator<=(const_ptr const& x) const { return ptr_ <= x.ptr_; }
+  bool operator>=(const_ptr const& x) const { return ptr_ >= x.ptr_; }
+};
+
+template <class T> class fancy_allocator
+{
+public:
+  typedef std::size_t size_type;
+  typedef std::ptrdiff_t difference_type;
+  typedef void_ptr void_pointer;
+  typedef void_const_ptr const_void_pointer;
+  typedef ptr<T> pointer;
+  typedef const_ptr<T> const_pointer;
+  typedef T& reference;
+  typedef T const& const_reference;
+  typedef T value_type;
+
+  template <class U> struct rebind
+  {
+    typedef fancy_allocator<U> other;
+  };
+
+  fancy_allocator() {}
+  template <class Y> fancy_allocator(fancy_allocator<Y> const&) {}
+  fancy_allocator(fancy_allocator const&) {}
+  ~fancy_allocator() {}
+
+  pointer address(reference r) { return pointer(&r); }
+  const_pointer address(const_reference r) { return const_pointer(&r); }
+
+  pointer allocate(size_type n)
+  {
+    return pointer(static_cast<T*>(::operator new(n * sizeof(T))));
+  }
+
+  template <class Y> pointer allocate(size_type n, const_ptr<Y>)
+  {
+    return pointer(static_cast<T*>(::operator new(n * sizeof(T))));
+  }
+
+  void deallocate(pointer p, size_type) { ::operator delete((void*)p.ptr_); }
+
+  template <class U, class... Args> void construct(U* p, Args&&... args)
+  {
+    new ((void*)p) U(std::forward<Args>(args)...);
+  }
+
+  template <class U> void destroy(U* p) { p->~U(); }
+
+  size_type max_size() const { return 1000; }
+
+public:
+  fancy_allocator& operator=(fancy_allocator const&) { return *this; }
+};
+
+#endif // BOOST_UNORDERED_TEST_CFOA_HELPERS_HPP
\ No newline at end of file
diff --git a/test/cfoa/insert_tests.cpp b/test/cfoa/insert_tests.cpp
new file mode 100644
index 00000000..b3bd4d51
--- /dev/null
+++ b/test/cfoa/insert_tests.cpp
@@ -0,0 +1,647 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+
+struct raii_convertible
+{
+  int x, y;
+  raii_convertible(int x_, int y_) : x{x_}, y{y_} {}
+
+  operator std::pair<raii const, raii>() { return {x, y}; }
+};
+
+namespace {
+  test::seed_t initialize_seed(78937);
+
+  struct lvalue_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto const& r : s) {
+          bool b = x.insert(r);
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * x.size());
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_inserter;
+
+  struct norehash_lvalue_inserter_type : public lvalue_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      x.reserve(values.size());
+      lvalue_inserter_type::operator()(values, x);
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * x.size());
+      BOOST_TEST_EQ(raii::move_constructor, 0u);
+    }
+  } norehash_lvalue_inserter;
+
+  struct rvalue_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+
+      std::atomic<std::uint64_t> num_inserts{0};
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.insert(std::move(r));
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+      BOOST_TEST_EQ(num_inserts, x.size());
+
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      }
+
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } rvalue_inserter;
+
+  struct norehash_rvalue_inserter_type : public rvalue_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      x.reserve(values.size());
+
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_constructor, 0u);
+
+      rvalue_inserter_type::operator()(values, x);
+
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+        BOOST_TEST_EQ(raii::move_constructor, x.size());
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+        BOOST_TEST_EQ(raii::move_constructor, 2 * x.size());
+      }
+    }
+  } norehash_rvalue_inserter;
+
+  struct iterator_range_inserter_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::vector<raii_convertible> values2;
+      values2.reserve(values.size());
+      for (auto const& p : values) {
+        values2.push_back(raii_convertible(p.first.x_, p.second.x_));
+      }
+
+      thread_runner(values2, [&x](boost::span<raii_convertible> s) {
+        x.insert(s.begin(), s.end());
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, 2 * values2.size());
+#if BOOST_WORKAROUND(BOOST_GCC_VERSION, >= 50300) && \
+    BOOST_WORKAROUND(BOOST_GCC_VERSION, <  50500)
+      // some versions of old gcc have trouble eliding copies here
+      // https://godbolt.org/z/Ebo6TbvaG
+#else
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+#endif
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } iterator_range_inserter;
+
+  struct lvalue_insert_or_assign_copy_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          x.insert_or_assign(r.first, r.second);
+        }
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * x.size());
+      // don't check move construction count here because of rehashing
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, values.size() - x.size());
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_insert_or_assign_copy_assign;
+
+  struct lvalue_insert_or_assign_move_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          x.insert_or_assign(r.first, std::move(r.second));
+        }
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, x.size());
+      BOOST_TEST_GT(raii::move_constructor, x.size()); // rehashing
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, values.size() - x.size());
+    }
+  } lvalue_insert_or_assign_move_assign;
+
+  struct rvalue_insert_or_assign_copy_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          x.insert_or_assign(std::move(r.first), r.second);
+        }
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, x.size());
+      BOOST_TEST_GT(raii::move_constructor, x.size()); // rehashing
+      BOOST_TEST_EQ(raii::copy_assignment, values.size() - x.size());
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } rvalue_insert_or_assign_copy_assign;
+
+  struct rvalue_insert_or_assign_move_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          x.insert_or_assign(std::move(r.first), std::move(r.second));
+        }
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_GE(raii::move_constructor, 2 * x.size());
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, values.size() - x.size());
+    }
+  } rvalue_insert_or_assign_move_assign;
+
+  struct trans_insert_or_assign_copy_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using is_transparent =
+        typename boost::make_void<typename X::hasher::is_transparent,
+          typename X::key_equal::is_transparent>::type;
+
+      boost::ignore_unused<is_transparent>();
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          x.insert_or_assign(r.first.x_, r.second);
+        }
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, x.size());
+      BOOST_TEST_GT(raii::move_constructor, x.size()); // rehashing
+      BOOST_TEST_EQ(raii::copy_assignment, values.size() - x.size());
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } trans_insert_or_assign_copy_assign;
+
+  struct trans_insert_or_assign_move_assign_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using is_transparent =
+        typename boost::make_void<typename X::hasher::is_transparent,
+          typename X::key_equal::is_transparent>::type;
+
+      boost::ignore_unused<is_transparent>();
+
+      thread_runner(values, [&x](boost::span<T> s) {
+        for (auto& r : s) {
+          x.insert_or_assign(r.first.x_, std::move(r.second));
+        }
+      });
+
+      BOOST_TEST_EQ(raii::default_constructor, x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_GT(raii::move_constructor, 2 * x.size()); // rehashing
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, values.size() - x.size());
+    }
+  } trans_insert_or_assign_move_assign;
+
+  struct lvalue_insert_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.insert_or_cvisit(
+            r, [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * x.size());
+      // don't check move construction count here because of rehashing
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_insert_or_cvisit;
+
+  struct lvalue_insert_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b =
+            x.insert_or_visit(r, [&num_invokes](typename X::value_type& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_constructor, 2 * x.size());
+      // don't check move construction count here because of rehashing
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_insert_or_visit;
+
+  struct rvalue_insert_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.insert_or_cvisit(
+            std::move(r), [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+        BOOST_TEST_GE(raii::move_constructor, x.size());
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+        BOOST_TEST_GE(raii::move_constructor, 2 * x.size());
+      }
+    }
+  } rvalue_insert_or_cvisit;
+
+  struct rvalue_insert_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.insert_or_visit(
+            std::move(r), [&num_invokes](typename X::value_type& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+        BOOST_TEST_GE(raii::move_constructor, x.size());
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+        BOOST_TEST_GE(raii::move_constructor, 2 * x.size());
+      }
+    }
+  } rvalue_insert_or_visit;
+
+  struct iterator_range_insert_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::vector<raii_convertible> values2;
+      values2.reserve(values.size());
+      for (auto const& p : values) {
+        values2.push_back(raii_convertible(p.first.x_, p.second.x_));
+      }
+
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(
+        values2, [&x, &num_invokes](boost::span<raii_convertible> s) {
+          x.insert_or_cvisit(s.begin(), s.end(),
+            [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+        });
+
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 2 * values2.size());
+#if BOOST_WORKAROUND(BOOST_GCC_VERSION, >= 50300) && \
+    BOOST_WORKAROUND(BOOST_GCC_VERSION, <  50500)
+      // skip test
+#else
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+#endif
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+    }
+  } iterator_range_insert_or_cvisit;
+
+  struct iterator_range_insert_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::vector<raii_convertible> values2;
+      values2.reserve(values.size());
+      for (auto const& p : values) {
+        values2.push_back(raii_convertible(p.first.x_, p.second.x_));
+      }
+
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(
+        values2, [&x, &num_invokes](boost::span<raii_convertible> s) {
+          x.insert_or_visit(s.begin(), s.end(),
+            [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+        });
+
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 2 * values2.size());
+#if BOOST_WORKAROUND(BOOST_GCC_VERSION, >= 50300) && \
+    BOOST_WORKAROUND(BOOST_GCC_VERSION, <  50500)
+      // skip test
+#else
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+#endif
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+    }
+  } iterator_range_insert_or_visit;
+
+  template <class X, class G, class F>
+  void insert(X*, G gen, F inserter, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+    raii::reset_counts();
+
+    {
+      X x;
+
+      inserter(values, x);
+
+      BOOST_TEST_EQ(x.size(), reference_map.size());
+
+      using value_type = typename X::value_type;
+      BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+        BOOST_TEST(reference_map.contains(kv.first));
+        if (rg == test::sequential) {
+          BOOST_TEST_EQ(kv.second, reference_map[kv.first]);
+        }
+      }));
+    }
+
+    BOOST_TEST_GE(raii::default_constructor, 0u);
+    BOOST_TEST_GE(raii::copy_constructor, 0u);
+    BOOST_TEST_GE(raii::move_constructor, 0u);
+    BOOST_TEST_GT(raii::destructor, 0u);
+
+    BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
+                    raii::move_constructor,
+      raii::destructor);
+  }
+
+  template <class X> void insert_initializer_list(X*)
+  {
+    using value_type = typename X::value_type;
+
+    std::initializer_list<value_type> values{
+      value_type{raii{0}, raii{0}},
+      value_type{raii{1}, raii{1}},
+      value_type{raii{2}, raii{2}},
+      value_type{raii{3}, raii{3}},
+      value_type{raii{4}, raii{4}},
+      value_type{raii{5}, raii{5}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{7}, raii{7}},
+      value_type{raii{8}, raii{8}},
+      value_type{raii{9}, raii{9}},
+      value_type{raii{10}, raii{10}},
+      value_type{raii{9}, raii{9}},
+      value_type{raii{8}, raii{8}},
+      value_type{raii{7}, raii{7}},
+      value_type{raii{6}, raii{6}},
+      value_type{raii{5}, raii{5}},
+      value_type{raii{4}, raii{4}},
+      value_type{raii{3}, raii{3}},
+      value_type{raii{2}, raii{2}},
+      value_type{raii{1}, raii{1}},
+      value_type{raii{0}, raii{0}},
+    };
+
+    std::vector<raii> dummy;
+
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+    raii::reset_counts();
+
+    {
+      {
+        X x;
+
+        thread_runner(
+          dummy, [&x, &values](boost::span<raii>) { x.insert(values); });
+
+        BOOST_TEST_EQ(x.size(), reference_map.size());
+
+        BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+          BOOST_TEST(reference_map.contains(kv.first));
+          BOOST_TEST_EQ(kv.second, reference_map[kv.first]);
+        }));
+      }
+
+      BOOST_TEST_GE(raii::default_constructor, 0u);
+      BOOST_TEST_GE(raii::copy_constructor, 0u);
+      BOOST_TEST_GE(raii::move_constructor, 0u);
+      BOOST_TEST_GT(raii::destructor, 0u);
+
+      BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
+                      raii::move_constructor,
+        raii::destructor);
+
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+
+    {
+      {
+        std::atomic<std::uint64_t> num_invokes{0};
+
+        X x;
+
+        thread_runner(dummy, [&x, &values, &num_invokes](boost::span<raii>) {
+          x.insert_or_visit(values, [&num_invokes](typename X::value_type& v) {
+            (void)v;
+            ++num_invokes;
+          });
+
+          x.insert_or_cvisit(
+            values, [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+        });
+
+        BOOST_TEST_EQ(num_invokes, (values.size() - x.size()) +
+                                     (num_threads - 1) * values.size() +
+                                     num_threads * values.size());
+        BOOST_TEST_EQ(x.size(), reference_map.size());
+
+        BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+          BOOST_TEST(reference_map.contains(kv.first));
+          BOOST_TEST_EQ(kv.second, reference_map[kv.first]);
+        }));
+      }
+
+      BOOST_TEST_GE(raii::default_constructor, 0u);
+      BOOST_TEST_GE(raii::copy_constructor, 0u);
+      BOOST_TEST_GE(raii::move_constructor, 0u);
+      BOOST_TEST_GT(raii::destructor, 0u);
+
+      BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
+                      raii::move_constructor,
+        raii::destructor);
+
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  }
+
+  UNORDERED_AUTO_TEST (insert_sfinae_test) {
+    // mostly a compile-time tests to ensure that there's no ambiguity when a
+    // user does this
+    using value_type =
+      typename boost::unordered::concurrent_flat_map<raii, raii>::value_type;
+    boost::unordered::concurrent_flat_map<raii, raii> x;
+    x.insert({1, 2});
+
+    x.insert_or_visit({2, 3}, [](value_type&) {});
+    x.insert_or_cvisit({3, 4}, [](value_type const&) {});
+  }
+
+  boost::unordered::concurrent_flat_map<raii, raii>* map;
+  boost::unordered::concurrent_flat_map<raii, raii, transp_hash,
+    transp_key_equal>* trans_map;
+  boost::unordered::concurrent_flat_map<raii, raii, boost::hash<raii>,
+    std::equal_to<raii>, fancy_allocator<std::pair<raii const, raii> > >*
+    fancy_map;
+
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+UNORDERED_TEST(
+  insert_initializer_list,
+  ((map)))
+
+UNORDERED_TEST(
+  insert,
+  ((map)(fancy_map))
+  ((value_type_generator)(init_type_generator))
+  ((lvalue_inserter)(rvalue_inserter)(iterator_range_inserter)
+   (norehash_lvalue_inserter)(norehash_rvalue_inserter)
+   (lvalue_insert_or_cvisit)(lvalue_insert_or_visit)
+   (rvalue_insert_or_cvisit)(rvalue_insert_or_visit)
+   (iterator_range_insert_or_cvisit)(iterator_range_insert_or_visit))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  insert,
+  ((map))
+  ((init_type_generator))
+  ((lvalue_insert_or_assign_copy_assign)(lvalue_insert_or_assign_move_assign)
+   (rvalue_insert_or_assign_copy_assign)(rvalue_insert_or_assign_move_assign))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  insert,
+  ((trans_map))
+  ((init_type_generator))
+  ((trans_insert_or_assign_copy_assign)(trans_insert_or_assign_move_assign))
+  ((default_generator)(sequential)(limited_range)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/latch.hpp b/test/cfoa/latch.hpp
new file mode 100644
index 00000000..bee42119
--- /dev/null
+++ b/test/cfoa/latch.hpp
@@ -0,0 +1,87 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_UNORDERED_TEST_CFOA_LATCH_HPP
+#define BOOST_UNORDERED_TEST_CFOA_LATCH_HPP
+
+#include <boost/assert.hpp>
+
+#include <climits>
+#include <condition_variable>
+#include <cstddef>
+#include <mutex>
+
+namespace boost {
+  class latch
+  {
+  private:
+    std::ptrdiff_t n_;
+    mutable std::mutex m_;
+    mutable std::condition_variable cv_;
+
+  public:
+    explicit latch(std::ptrdiff_t expected) : n_{expected}, m_{}, cv_{}
+    {
+      BOOST_ASSERT(n_ >= 0);
+      BOOST_ASSERT(n_ <= max());
+    }
+
+    latch(latch const&) = delete;
+    latch& operator=(latch const&) = delete;
+
+    ~latch() = default;
+
+    void count_down(std::ptrdiff_t n = 1)
+    {
+      std::unique_lock<std::mutex> lk(m_);
+      count_down_and_notify(lk, n);
+    }
+
+    bool try_wait() const noexcept
+    {
+      std::unique_lock<std::mutex> lk(m_);
+      return is_ready();
+    }
+
+    void wait() const
+    {
+      std::unique_lock<std::mutex> lk(m_);
+      wait_impl(lk);
+    }
+
+    void arrive_and_wait(std::ptrdiff_t n = 1)
+    {
+      std::unique_lock<std::mutex> lk(m_);
+      bool should_wait = count_down_and_notify(lk, n);
+      if (should_wait) {
+        wait_impl(lk);
+      }
+    }
+
+    static constexpr std::ptrdiff_t max() noexcept { return INT_MAX; }
+
+  private:
+    bool is_ready() const { return n_ == 0; }
+
+    bool count_down_and_notify(
+      std::unique_lock<std::mutex>& lk, std::ptrdiff_t n)
+    {
+      n_ -= n;
+      if (n_ == 0) {
+        lk.unlock();
+        cv_.notify_all();
+        return false;
+      }
+
+      return true;
+    }
+
+    void wait_impl(std::unique_lock<std::mutex>& lk) const
+    {
+      cv_.wait(lk, [this] { return this->is_ready(); });
+    }
+  };
+} // namespace boost
+
+#endif // BOOST_UNORDERED_TEST_CFOA_LATCH_HPP
diff --git a/test/cfoa/latch_tests.cpp b/test/cfoa/latch_tests.cpp
new file mode 100644
index 00000000..0c7e9ece
--- /dev/null
+++ b/test/cfoa/latch_tests.cpp
@@ -0,0 +1,155 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_ENABLE_ASSERT_HANDLER
+
+#include "latch.hpp"
+
+#include <boost/core/lightweight_test.hpp>
+
+#include <thread>
+#include <vector>
+
+struct exception
+{
+};
+
+namespace boost {
+  void assertion_failed(
+    char const* expr, char const* function, char const* file, long line)
+  {
+    (void)expr;
+    (void)function;
+    (void)file;
+    (void)line;
+    throw exception{};
+  }
+} // namespace boost
+
+namespace {
+  void test_max() { BOOST_TEST_EQ(boost::latch::max(), INT_MAX); }
+
+  void test_constructor()
+  {
+    {
+      auto const f = [] {
+        boost::latch l(-1);
+        (void)l;
+      };
+      BOOST_TEST_THROWS(f(), exception);
+    }
+
+    {
+      std::ptrdiff_t n = 0;
+
+      boost::latch l(n);
+      BOOST_TEST(l.try_wait());
+    }
+
+    {
+      std::ptrdiff_t n = 16;
+
+      boost::latch l(n);
+      BOOST_TEST_NOT(l.try_wait());
+
+      l.count_down(16);
+      BOOST_TEST(l.try_wait());
+    }
+
+#if PTRDIFF_MAX > INT_MAX
+    {
+      auto const f = [] {
+        std::ptrdiff_t n = INT_MAX;
+        n += 10;
+        boost::latch l(n);
+        (void)l;
+      };
+      BOOST_TEST_THROWS(f(), exception);
+    }
+#endif
+  }
+
+  void test_count_down_and_wait()
+  {
+    constexpr std::ptrdiff_t n = 1024;
+
+    boost::latch l(2 * n);
+
+    bool bs[] = {false, false};
+
+    std::thread t1([&] {
+      l.wait();
+      BOOST_TEST(bs[0]);
+      BOOST_TEST(bs[1]);
+    });
+
+    std::thread t2([&] {
+      for (int i = 0; i < n; ++i) {
+        if (i == (n - 1)) {
+          bs[0] = true;
+        } else {
+          BOOST_TEST_NOT(l.try_wait());
+        }
+
+        l.count_down(1);
+      }
+    });
+
+    for (int i = 0; i < n; ++i) {
+      if (i == (n - 1)) {
+        bs[1] = true;
+      } else {
+        BOOST_TEST_NOT(l.try_wait());
+      }
+
+      l.count_down(1);
+    }
+
+    t1.join();
+    t2.join();
+
+    BOOST_TEST(l.try_wait());
+  }
+
+  void test_arrive_and_wait()
+  {
+    std::ptrdiff_t const n = 16;
+
+    boost::latch l(2 * n);
+
+    int xs[n] = {0};
+
+    std::vector<std::thread> threads;
+    for (int i = 0; i < n; ++i) {
+      threads.emplace_back([&l, &xs, i, n] {
+        (void)n;
+        for (int j = 0; j < n; ++j) {
+          BOOST_TEST_EQ(xs[j], 0);
+        }
+
+        l.arrive_and_wait(2);
+
+        xs[i] = 1;
+      });
+    }
+
+    for (auto& t : threads) {
+      t.join();
+    }
+
+    for (int i = 0; i < n; ++i) {
+      BOOST_TEST_EQ(xs[i], 1);
+    }
+  }
+} // namespace
+
+int main()
+{
+  test_max();
+  test_constructor();
+  test_count_down_and_wait();
+  test_arrive_and_wait();
+
+  return boost::report_errors();
+}
diff --git a/test/cfoa/merge_tests.cpp b/test/cfoa/merge_tests.cpp
new file mode 100644
index 00000000..92770ecb
--- /dev/null
+++ b/test/cfoa/merge_tests.cpp
@@ -0,0 +1,219 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+test::seed_t initialize_seed{402031699};
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+using map_value_type = typename map_type::value_type;
+
+struct
+{
+  template <class X1, class X2>
+  std::size_t operator()(X1& x1, X2& x2) const noexcept
+  {
+    return x1.merge(x2);
+  }
+} lvalue_merge;
+
+struct
+{
+  template <class X1, class X2>
+  std::size_t operator()(X1& x1, X2& x2) const noexcept
+  {
+    return x1.merge(std::move(x2));
+  }
+} rvalue_merge;
+
+namespace {
+  template <class F, class G>
+  void merge_tests(F merger, G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 8, [&] { return gen(rg); });
+
+    auto ref_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    {
+      raii::reset_counts();
+
+      map_type x(values.size(), hasher(1), key_equal(2), allocator_type(3));
+
+      auto const old_cc = +raii::copy_constructor;
+
+      std::atomic<unsigned long long> expected_copies{0};
+      std::atomic<unsigned long long> num_merged{0};
+
+      thread_runner(values, [&x, &expected_copies, &num_merged, merger](
+                              boost::span<map_value_type> s) {
+        using map2_type = boost::unordered::concurrent_flat_map<raii, raii,
+          std::hash<raii>, std::equal_to<raii>, allocator_type>;
+
+        map2_type y(s.size(), allocator_type(3));
+        for (auto const& v : s) {
+          y.insert(v);
+        }
+        expected_copies += 2 * y.size();
+
+        BOOST_TEST(x.get_allocator() == y.get_allocator());
+        num_merged += merger(x, y);
+      });
+
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc + expected_copies);
+      BOOST_TEST_EQ(raii::move_constructor, 2 * ref_map.size());
+      BOOST_TEST_EQ(+num_merged, ref_map.size());
+
+      test_fuzzy_matches_reference(x, ref_map, rg);
+    }
+    check_raii_counts();
+  }
+
+  template <class G>
+  void insert_and_merge_tests(G gen, test::random_generator rg)
+  {
+    using map2_type = boost::unordered::concurrent_flat_map<raii, raii,
+      std::hash<raii>, std::equal_to<raii>, allocator_type>;
+
+    auto vals1 = make_random_values(1024 * 8, [&] { return gen(rg); });
+    auto vals2 = make_random_values(1024 * 4, [&] { return gen(rg); });
+
+    auto ref_map = boost::unordered_flat_map<raii, raii>();
+    ref_map.insert(vals1.begin(), vals1.end());
+    ref_map.insert(vals2.begin(), vals2.end());
+
+    {
+      raii::reset_counts();
+
+      map_type x1(2 * vals1.size(), hasher(1), key_equal(2), allocator_type(3));
+
+      map2_type x2(2 * vals1.size(), allocator_type(3));
+
+      std::thread t1, t2, t3;
+      boost::latch l(2);
+
+      std::mutex m;
+      std::condition_variable cv;
+      std::atomic_bool done1{false}, done2{false};
+      std::atomic<unsigned long long> num_merges{0};
+      std::atomic<unsigned long long> call_count{0};
+      bool ready = false;
+
+      auto const old_mc = +raii::move_constructor;
+      BOOST_TEST_EQ(old_mc, 0u);
+
+      t1 = std::thread([&x1, &vals1, &l, &done1, &cv, &ready, &m] {
+        l.arrive_and_wait();
+
+        for (std::size_t idx = 0; idx < vals1.size(); ++idx) {
+          auto const& val = vals1[idx];
+          x1.insert(val);
+
+          if (idx % (vals1.size() / 128) == 0) {
+            {
+              std::unique_lock<std::mutex> lk(m);
+              ready = true;
+            }
+            cv.notify_all();
+            std::this_thread::yield();
+          }
+        }
+
+        done1 = true;
+        {
+          std::unique_lock<std::mutex> lk(m);
+          ready = true;
+        }
+        cv.notify_all();
+      });
+
+      t2 = std::thread([&x2, &vals2, &l, &done2, &cv, &m, &ready] {
+        l.arrive_and_wait();
+
+        for (std::size_t idx = 0; idx < vals2.size(); ++idx) {
+          auto const& val = vals2[idx];
+          x2.insert(val);
+          if (idx % 100 == 0) {
+            std::this_thread::yield();
+          }
+        }
+
+        done2 = true;
+        {
+          std::unique_lock<std::mutex> lk(m);
+          ready = true;
+        }
+        cv.notify_all();
+      });
+
+      t3 = std::thread(
+        [&x1, &x2, &m, &cv, &done1, &done2, &num_merges, &call_count, &ready] {
+          while (x1.empty() && x2.empty()) {
+          }
+
+          do {
+            {
+              std::unique_lock<std::mutex> lk(m);
+              cv.wait(lk, [&ready] { return ready; });
+              ready = false;
+            }
+
+            num_merges += x1.merge(x2);
+            std::this_thread::yield();
+            num_merges += x2.merge(x1);
+
+            call_count += 1;
+
+          } while (!done1 || !done2);
+
+          BOOST_TEST(done1);
+          BOOST_TEST(done2);
+        });
+
+      t1.join();
+      t2.join();
+      t3.join();
+
+      if (num_merges > 0) {
+        // num merges is 0 most commonly in the cast of the limited_range
+        // generator as both maps will contains keys from 0 to 99
+        BOOST_TEST_EQ(+raii::move_constructor, 2 * num_merges);
+        BOOST_TEST_GE(call_count, 1u);
+      }
+
+      x1.merge(x2);
+      test_fuzzy_matches_reference(x1, ref_map, rg);
+    }
+
+    check_raii_counts();
+  }
+
+} // namespace
+
+// clang-format off
+UNORDERED_TEST(
+  merge_tests,
+  ((lvalue_merge)(rvalue_merge))
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  insert_and_merge_tests,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/rehash_tests.cpp b/test/cfoa/rehash_tests.cpp
new file mode 100644
index 00000000..1a3092aa
--- /dev/null
+++ b/test/cfoa/rehash_tests.cpp
@@ -0,0 +1,178 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+using map_value_type = typename map_type::value_type;
+
+namespace {
+  test::seed_t initialize_seed{748775921};
+
+  UNORDERED_AUTO_TEST (rehash_no_insert) {
+    map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+    BOOST_TEST_EQ(x.bucket_count(), 0u);
+
+    x.rehash(1024);
+    BOOST_TEST_GE(x.bucket_count(), 1024u);
+
+    x.rehash(512);
+    BOOST_TEST_GE(x.bucket_count(), 512u);
+    BOOST_TEST_LT(x.bucket_count(), 1024u);
+
+    x.rehash(0);
+    BOOST_TEST_EQ(x.bucket_count(), 0u);
+  }
+
+  UNORDERED_AUTO_TEST (reserve_no_insert) {
+    using size_type = map_type::size_type;
+
+    map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+
+    auto f = [&x](double c) {
+      return static_cast<size_type>(std::ceil(c / x.max_load_factor()));
+    };
+
+    BOOST_TEST_EQ(x.bucket_count(), f(0.0));
+
+    x.reserve(1024);
+    BOOST_TEST_GE(x.bucket_count(), f(1024.0));
+
+    x.reserve(512);
+    BOOST_TEST_GE(x.bucket_count(), f(512.0));
+    BOOST_TEST_LT(x.bucket_count(), f(1024.0));
+
+    x.reserve(0);
+    BOOST_TEST_EQ(x.bucket_count(), f(0.0));
+  }
+
+  template <class G>
+  void insert_and_erase_with_rehash(G gen, test::random_generator rg)
+  {
+    auto vals1 = make_random_values(1024 * 8, [&] { return gen(rg); });
+
+    auto erase_indices = std::vector<std::size_t>(vals1.size());
+    for (std::size_t idx = 0; idx < erase_indices.size(); ++idx) {
+      erase_indices[idx] = idx;
+    }
+    shuffle_values(erase_indices);
+
+    auto ref_map = boost::unordered_flat_map<raii, raii>();
+    ref_map.insert(vals1.begin(), vals1.end());
+
+    {
+      raii::reset_counts();
+
+      map_type x(0, hasher(1), key_equal(2), allocator_type(3));
+
+      std::thread t1, t2, t3;
+      boost::latch l(2);
+
+      std::mutex m;
+      std::condition_variable cv;
+      std::atomic_bool done1{false}, done2{false};
+      std::atomic<unsigned long long> call_count{0};
+      bool ready = false;
+
+      auto const old_mc = +raii::move_constructor;
+      BOOST_TEST_EQ(old_mc, 0u);
+
+      t1 = std::thread([&x, &vals1, &l, &done1, &cv, &ready, &m] {
+        l.arrive_and_wait();
+
+        for (std::size_t idx = 0; idx < vals1.size(); ++idx) {
+          auto const& val = vals1[idx];
+          x.insert(val);
+
+          if (idx % (vals1.size() / 128) == 0) {
+            {
+              std::unique_lock<std::mutex> lk(m);
+              ready = true;
+            }
+            cv.notify_all();
+            std::this_thread::yield();
+          }
+        }
+
+        done1 = true;
+        {
+          std::unique_lock<std::mutex> lk(m);
+          ready = true;
+        }
+        cv.notify_all();
+      });
+
+      t2 =
+        std::thread([&x, &vals1, &erase_indices, &l, &done2, &cv, &m, &ready] {
+          l.arrive_and_wait();
+
+          for (std::size_t idx = 0; idx < erase_indices.size(); ++idx) {
+            auto const& val = vals1[erase_indices[idx]];
+            x.erase(val.first);
+            if (idx % 100 == 0) {
+              std::this_thread::yield();
+            }
+          }
+
+          done2 = true;
+          {
+            std::unique_lock<std::mutex> lk(m);
+            ready = true;
+          }
+          cv.notify_all();
+        });
+
+      t3 =
+        std::thread([&x, &vals1, &m, &cv, &done1, &done2, &call_count, &ready] {
+          do {
+            {
+              std::unique_lock<std::mutex> lk(m);
+              cv.wait(lk, [&ready] { return ready; });
+              ready = false;
+            }
+
+            auto const bc = static_cast<std::size_t>(rand()) % vals1.size();
+            x.rehash(bc);
+            call_count += 1;
+
+            std::this_thread::yield();
+          } while (!done1 || !done2);
+
+          BOOST_TEST(done1);
+          BOOST_TEST(done2);
+        });
+
+      t1.join();
+      t2.join();
+      t3.join();
+
+      BOOST_TEST_GE(call_count, 1u);
+
+      test_fuzzy_matches_reference(x, ref_map, rg);
+    }
+
+    check_raii_counts();
+  }
+} // namespace
+
+// clang-format off
+UNORDERED_TEST(
+  insert_and_erase_with_rehash,
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/swap_tests.cpp b/test/cfoa/swap_tests.cpp
new file mode 100644
index 00000000..3ab29791
--- /dev/null
+++ b/test/cfoa/swap_tests.cpp
@@ -0,0 +1,303 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+test::seed_t initialize_seed{996130204};
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+template <class T> struct pocs_allocator
+{
+  using propagate_on_container_swap = std::true_type;
+
+  int x_ = -1;
+
+  using value_type = T;
+
+  pocs_allocator() = default;
+  pocs_allocator(pocs_allocator const&) = default;
+  pocs_allocator(pocs_allocator&&) = default;
+
+  pocs_allocator(int const x) : x_{x} {}
+
+  pocs_allocator& operator=(pocs_allocator const& rhs)
+  {
+    if (this != &rhs) {
+      x_ = rhs.x_;
+    }
+    return *this;
+  }
+
+  template <class U> pocs_allocator(pocs_allocator<U> const& rhs) : x_{rhs.x_}
+  {
+  }
+
+  T* allocate(std::size_t n)
+  {
+    return static_cast<T*>(::operator new(n * sizeof(T)));
+  }
+
+  void deallocate(T* p, std::size_t) { ::operator delete(p); }
+
+  bool operator==(pocs_allocator const& rhs) const { return x_ == rhs.x_; }
+  bool operator!=(pocs_allocator const& rhs) const { return x_ != rhs.x_; }
+
+  friend void swap(pocs_allocator& lhs, pocs_allocator& rhs) noexcept
+  {
+    std::swap(lhs.x_, rhs.x_);
+  }
+};
+
+using hasher = stateful_hash;
+using key_equal = stateful_key_equal;
+using allocator_type = stateful_allocator<std::pair<raii const, raii> >;
+
+using map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, allocator_type>;
+
+using map_value_type = typename map_type::value_type;
+
+using pocs_allocator_type = pocs_allocator<std::pair<const raii, raii> >;
+
+using pocs_map_type = boost::unordered::concurrent_flat_map<raii, raii, hasher,
+  key_equal, pocs_allocator_type>;
+
+template <class T> struct is_nothrow_member_swappable
+{
+  static bool const value =
+    noexcept(std::declval<T&>().swap(std::declval<T&>()));
+};
+
+BOOST_STATIC_ASSERT(is_nothrow_member_swappable<
+  boost::unordered::concurrent_flat_map<int, int, std::hash<int>,
+    std::equal_to<int>, std::allocator<std::pair<int const, int> > > >::value);
+
+BOOST_STATIC_ASSERT(is_nothrow_member_swappable<pocs_map_type>::value);
+
+BOOST_STATIC_ASSERT(!is_nothrow_member_swappable<map_type>::value);
+
+namespace {
+  struct
+  {
+    template <class T> void operator()(T& x1, T& x2) const { x1.swap(x2); }
+  } member_fn_swap;
+
+  struct
+  {
+    template <class T> void operator()(T& x1, T& x2) const
+    {
+      using boost::unordered::swap;
+      swap(x1, x2);
+    }
+  } free_fn_swap;
+
+  template <class X, class F, class G>
+  void swap_tests(X*, F swapper, G gen, test::random_generator rg)
+  {
+    using allocator = typename X::allocator_type;
+
+    bool const pocs =
+      boost::allocator_propagate_on_container_swap<allocator>::type::value;
+
+    auto vals1 = make_random_values(1024 * 8, [&] { return gen(rg); });
+    auto vals2 = make_random_values(1024 * 4, [&] { return gen(rg); });
+
+    auto ref_map1 =
+      boost::unordered_flat_map<raii, raii>(vals1.begin(), vals1.end());
+
+    auto ref_map2 =
+      boost::unordered_flat_map<raii, raii>(vals2.begin(), vals2.end());
+
+    {
+      raii::reset_counts();
+
+      X x1(vals1.begin(), vals1.end(), vals1.size(), hasher(1), key_equal(2),
+        allocator(3));
+
+      X x2(vals2.begin(), vals2.end(), vals2.size(), hasher(2), key_equal(1),
+        pocs ? allocator(4) : allocator(3));
+
+      if (pocs) {
+        BOOST_TEST(x1.get_allocator() != x2.get_allocator());
+      } else {
+        BOOST_TEST(x1.get_allocator() == x2.get_allocator());
+      }
+
+      auto const old_cc = +raii::copy_constructor;
+      auto const old_mc = +raii::move_constructor;
+
+      thread_runner(vals1, [&x1, &x2, swapper](boost::span<map_value_type> s) {
+        (void)s;
+
+        swapper(x1, x2);
+        swapper(x2, x1);
+      });
+
+      BOOST_TEST_EQ(raii::copy_constructor, old_cc);
+      BOOST_TEST_EQ(raii::move_constructor, old_mc);
+
+      if (pocs) {
+        if (x1.get_allocator() == allocator(3)) {
+          BOOST_TEST(x2.get_allocator() == allocator(4));
+        } else {
+          BOOST_TEST(x1.get_allocator() == allocator(4));
+          BOOST_TEST(x2.get_allocator() == allocator(3));
+        }
+      } else {
+        BOOST_TEST(x1.get_allocator() == allocator(3));
+        BOOST_TEST(x1.get_allocator() == x2.get_allocator());
+      }
+
+      if (x1.size() == ref_map1.size()) {
+        test_matches_reference(x1, ref_map1);
+        test_matches_reference(x2, ref_map2);
+
+        BOOST_TEST_EQ(x1.hash_function(), hasher(1));
+        BOOST_TEST_EQ(x1.key_eq(), key_equal(2));
+
+        BOOST_TEST_EQ(x2.hash_function(), hasher(2));
+        BOOST_TEST_EQ(x2.key_eq(), key_equal(1));
+      } else {
+        test_matches_reference(x2, ref_map1);
+        test_matches_reference(x1, ref_map2);
+
+        BOOST_TEST_EQ(x1.hash_function(), hasher(2));
+        BOOST_TEST_EQ(x1.key_eq(), key_equal(1));
+
+        BOOST_TEST_EQ(x2.hash_function(), hasher(1));
+        BOOST_TEST_EQ(x2.key_eq(), key_equal(2));
+      }
+    }
+    check_raii_counts();
+  }
+
+  template <class F, class G>
+  void insert_and_swap(F swapper, G gen, test::random_generator rg)
+  {
+    auto vals1 = make_random_values(1024 * 8, [&] { return gen(rg); });
+    auto vals2 = make_random_values(1024 * 4, [&] { return gen(rg); });
+
+    {
+      raii::reset_counts();
+
+      map_type x1(vals1.size(), hasher(1), key_equal(2), allocator_type(3));
+      map_type x2(vals2.size(), hasher(2), key_equal(1), allocator_type(3));
+
+      std::thread t1, t2, t3;
+      boost::latch l(2);
+
+      std::mutex m;
+      std::condition_variable cv;
+      std::atomic_bool done1{false}, done2{false};
+      std::atomic<unsigned> num_swaps{0};
+      bool ready = false;
+
+      t1 = std::thread([&x1, &vals1, &l, &done1, &cv, &ready, &m] {
+        l.arrive_and_wait();
+
+        for (std::size_t idx = 0; idx < vals1.size(); ++idx) {
+          auto const& val = vals1[idx];
+          x1.insert(val);
+          if (idx % (vals1.size() / 128) == 0) {
+            {
+              std::unique_lock<std::mutex> lk(m);
+              ready = true;
+            }
+            cv.notify_all();
+          }
+          std::this_thread::yield();
+        }
+
+        done1 = true;
+        {
+          std::unique_lock<std::mutex> lk(m);
+          ready = true;
+        }
+        cv.notify_all();
+      });
+
+      t2 = std::thread([&x2, &vals2, &l, &done2, &ready, &cv, &m] {
+        l.arrive_and_wait();
+
+        for (std::size_t idx = 0; idx < vals2.size(); ++idx) {
+          auto const& val = vals2[idx];
+          x2.insert(val);
+          if (idx % 100 == 0) {
+            std::this_thread::yield();
+          }
+        }
+
+        done2 = true;
+        {
+          std::unique_lock<std::mutex> lk(m);
+          ready = true;
+        }
+        cv.notify_all();
+      });
+
+      t3 = std::thread(
+        [&x1, &x2, &m, &cv, &done1, &done2, &num_swaps, swapper, &ready] {
+          do {
+            {
+              std::unique_lock<std::mutex> lk(m);
+              cv.wait(lk, [&ready] { return ready; });
+              ready = false;
+            }
+            swapper(x1, x2);
+            ++num_swaps;
+            std::this_thread::yield();
+          } while (!done1 || !done2);
+
+          BOOST_TEST(done1);
+          BOOST_TEST(done2);
+        });
+
+      t1.join();
+      t2.join();
+      t3.join();
+
+      BOOST_TEST_GT(num_swaps, 0u);
+
+      if (x1.hash_function() == hasher(1)) {
+        BOOST_TEST_EQ(x1.key_eq(), key_equal(2));
+
+        BOOST_TEST_EQ(x2.hash_function(), hasher(2));
+        BOOST_TEST_EQ(x2.key_eq(), key_equal(1));
+      } else {
+        BOOST_TEST_EQ(x1.hash_function(), hasher(2));
+        BOOST_TEST_EQ(x1.key_eq(), key_equal(1));
+
+        BOOST_TEST_EQ(x2.hash_function(), hasher(1));
+        BOOST_TEST_EQ(x2.key_eq(), key_equal(2));
+      }
+    }
+
+    check_raii_counts();
+  }
+
+  map_type* map;
+  pocs_map_type* pocs_map;
+
+} // namespace
+
+// clang-format off
+UNORDERED_TEST(
+  swap_tests,
+  ((map)(pocs_map))
+  ((member_fn_swap)(free_fn_swap))
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(insert_and_swap,
+  ((member_fn_swap)(free_fn_swap))
+  ((value_type_generator))
+  ((default_generator)(sequential)(limited_range)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/try_emplace_tests.cpp b/test/cfoa/try_emplace_tests.cpp
new file mode 100644
index 00000000..374c5f02
--- /dev/null
+++ b/test/cfoa/try_emplace_tests.cpp
@@ -0,0 +1,396 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+
+namespace {
+  test::seed_t initialize_seed(511933564);
+
+  struct lvalue_try_emplacer_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto const& r : s) {
+          bool b = x.try_emplace(r.first, r.second.x_);
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, x.size());
+      BOOST_TEST_EQ(raii::default_constructor, x.size());
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } lvalue_try_emplacer;
+
+  struct norehash_lvalue_try_emplacer_type : public lvalue_try_emplacer_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      x.reserve(values.size());
+      lvalue_try_emplacer_type::operator()(values, x);
+      BOOST_TEST_EQ(raii::move_constructor, 0u);
+    }
+  } norehash_lvalue_try_emplacer;
+
+  struct rvalue_try_emplacer_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+
+      std::atomic<std::uint64_t> num_inserts{0};
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.try_emplace(std::move(r.first), r.second.x_);
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+        BOOST_TEST_GE(raii::move_constructor, x.size());
+      }
+
+      BOOST_TEST_EQ(raii::default_constructor, x.size());
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } rvalue_try_emplacer;
+
+  struct norehash_rvalue_try_emplacer_type : public rvalue_try_emplacer_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      x.reserve(values.size());
+
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_constructor, 0u);
+
+      rvalue_try_emplacer_type::operator()(values, x);
+
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+        BOOST_TEST_EQ(raii::move_constructor, 0u);
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+        BOOST_TEST_EQ(raii::move_constructor, x.size());
+      }
+    }
+  } norehash_rvalue_try_emplacer;
+
+  struct transp_try_emplace_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      using is_transparent =
+        typename boost::make_void<typename X::hasher::is_transparent,
+          typename X::key_equal::is_transparent>::type;
+
+      boost::ignore_unused<is_transparent>();
+
+      BOOST_TEST_EQ(raii::default_constructor, 0u);
+
+      std::atomic<std::uint64_t> num_inserts{0};
+
+      thread_runner(values, [&x, &num_inserts](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.try_emplace(r.first.x_, r.second.x_);
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(raii::default_constructor, 2 * x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+    }
+  } transp_try_emplace;
+
+  struct norehash_transp_try_emplace_type : public transp_try_emplace_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      x.reserve(values.size());
+      transp_try_emplace_type::operator()(values, x);
+      BOOST_TEST_EQ(raii::move_constructor, 0u);
+    }
+  } norehash_transp_try_emplace;
+
+  struct lvalue_try_emplace_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.try_emplace_or_cvisit(
+            r.first, r.second.x_,
+            [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, x.size());
+      // don't check move construction count here because of rehashing
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+    }
+  } lvalue_try_emplace_or_cvisit;
+
+  struct lvalue_try_emplace_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.try_emplace_or_visit(
+            r.first, r.second.x_,
+            [&num_invokes](typename X::value_type& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, x.size());
+      // don't check move construction count here because of rehashing
+      BOOST_TEST_GT(raii::move_constructor, 0u);
+      BOOST_TEST_EQ(raii::move_assignment, 0u);
+      BOOST_TEST_EQ(raii::copy_assignment, 0u);
+    }
+  } lvalue_try_emplace_or_visit;
+
+  struct rvalue_try_emplace_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.try_emplace_or_cvisit(
+            std::move(r.first), r.second.x_,
+            [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, x.size());
+
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+        BOOST_TEST_GE(raii::move_constructor, x.size());
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+        BOOST_TEST_GE(raii::move_constructor, x.size());
+      }
+    }
+  } rvalue_try_emplace_or_cvisit;
+
+  struct rvalue_try_emplace_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.try_emplace_or_visit(
+            std::move(r.first), r.second.x_,
+            [&num_invokes](typename X::value_type& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, x.size());
+      if (std::is_same<T, typename X::value_type>::value) {
+        BOOST_TEST_EQ(raii::copy_constructor, x.size());
+        BOOST_TEST_GE(raii::move_constructor, x.size());
+      } else {
+        BOOST_TEST_EQ(raii::copy_constructor, 0u);
+        BOOST_TEST_GE(raii::move_constructor, x.size());
+      }
+    }
+  } rvalue_try_emplace_or_visit;
+
+  struct transp_try_emplace_or_cvisit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.try_emplace_or_cvisit(
+            r.first.x_, r.second.x_,
+            [&num_invokes](typename X::value_type const& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+      BOOST_TEST_EQ(raii::default_constructor, 2 * x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+    }
+  } transp_try_emplace_or_cvisit;
+
+  struct transp_try_emplace_or_visit_type
+  {
+    template <class T, class X> void operator()(std::vector<T>& values, X& x)
+    {
+      std::atomic<std::uint64_t> num_inserts{0};
+      std::atomic<std::uint64_t> num_invokes{0};
+      thread_runner(values, [&x, &num_inserts, &num_invokes](boost::span<T> s) {
+        for (auto& r : s) {
+          bool b = x.try_emplace_or_visit(
+            r.first.x_, r.second.x_,
+            [&num_invokes](typename X::value_type& v) {
+              (void)v;
+              ++num_invokes;
+            });
+
+          if (b) {
+            ++num_inserts;
+          }
+        }
+      });
+
+      BOOST_TEST_EQ(num_inserts, x.size());
+      BOOST_TEST_EQ(num_invokes, values.size() - x.size());
+
+      BOOST_TEST_EQ(raii::default_constructor, 2 * x.size());
+      BOOST_TEST_EQ(raii::copy_constructor, 0u);
+    }
+  } transp_try_emplace_or_visit;
+
+  template <class X, class G, class F>
+  void try_emplace(X*, G gen, F try_emplacer, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+    raii::reset_counts();
+
+    {
+      X x;
+
+      try_emplacer(values, x);
+
+      BOOST_TEST_EQ(x.size(), reference_map.size());
+
+      using value_type = typename X::value_type;
+      BOOST_TEST_EQ(x.size(), x.visit_all([&](value_type const& kv) {
+        BOOST_TEST(reference_map.contains(kv.first));
+        if (rg == test::sequential) {
+          BOOST_TEST_EQ(kv.second, reference_map[kv.first]);
+        }
+      }));
+    }
+
+    BOOST_TEST_GE(raii::default_constructor, 0u);
+    BOOST_TEST_GE(raii::copy_constructor, 0u);
+    BOOST_TEST_GE(raii::move_constructor, 0u);
+    BOOST_TEST_GT(raii::destructor, 0u);
+
+    BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
+                    raii::move_constructor,
+      raii::destructor);
+  }
+
+  boost::unordered::concurrent_flat_map<raii, raii>* map;
+  boost::unordered::concurrent_flat_map<raii, raii, transp_hash,
+    transp_key_equal>* transp_map;
+
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+UNORDERED_TEST(
+  try_emplace,
+  ((map))
+  ((value_type_generator)(init_type_generator))
+  ((lvalue_try_emplacer)(norehash_lvalue_try_emplacer)
+   (rvalue_try_emplacer)(norehash_rvalue_try_emplacer)
+   (lvalue_try_emplace_or_cvisit)(lvalue_try_emplace_or_visit)
+   (rvalue_try_emplace_or_cvisit)(rvalue_try_emplace_or_visit))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  try_emplace,
+  ((transp_map))
+  ((init_type_generator))
+  ((transp_try_emplace)(norehash_transp_try_emplace)
+   (transp_try_emplace_or_cvisit)(transp_try_emplace_or_visit))
+  ((default_generator)(sequential)(limited_range)))
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/cfoa/visit_tests.cpp b/test/cfoa/visit_tests.cpp
new file mode 100644
index 00000000..fae4deae
--- /dev/null
+++ b/test/cfoa/visit_tests.cpp
@@ -0,0 +1,599 @@
+// Copyright (C) 2023 Christian Mazakas
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "helpers.hpp"
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
+#include <boost/core/ignore_unused.hpp>
+
+#include <functional>
+#include <vector>
+
+namespace {
+  test::seed_t initialize_seed(335740237);
+
+  struct lvalue_visitor_type
+  {
+    template <class T, class X, class M>
+    void operator()(std::vector<T>& values, X& x, M const& reference_map)
+    {
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> num_visits{0};
+      std::atomic<std::uint64_t> total_count{0};
+
+      auto mut_visitor = [&num_visits, &reference_map](value_type& v) {
+        BOOST_TEST(reference_map.contains(v.first));
+        BOOST_TEST_EQ(v.second, reference_map.find(v.first)->second);
+        ++num_visits;
+      };
+
+      auto const_visitor = [&num_visits, &reference_map](value_type const& v) {
+        BOOST_TEST(reference_map.contains(v.first));
+        BOOST_TEST_EQ(v.second, reference_map.find(v.first)->second);
+        ++num_visits;
+      };
+
+      {
+        thread_runner(
+          values, [&x, &mut_visitor, &total_count](boost::span<T> s) {
+            for (auto const& val : s) {
+              auto r = val.first.x_;
+              BOOST_TEST(r >= 0);
+
+              auto count = x.visit(val.first, mut_visitor);
+              BOOST_TEST_EQ(count, 1u);
+              total_count += count;
+
+              count = x.visit(val.second, mut_visitor);
+              BOOST_TEST_EQ(count, 0u);
+            }
+          });
+
+        BOOST_TEST_EQ(num_visits, values.size());
+        BOOST_TEST_EQ(total_count, values.size());
+
+        num_visits = 0;
+        total_count = 0;
+      }
+
+      {
+        thread_runner(
+          values, [&x, &const_visitor, &total_count](boost::span<T> s) {
+            for (auto const& val : s) {
+              auto r = val.first.x_;
+              BOOST_TEST(r >= 0);
+
+              auto const& y = x;
+              auto count = y.visit(val.first, const_visitor);
+
+              BOOST_TEST_EQ(count, 1u);
+              total_count += count;
+
+              count = y.visit(val.second, const_visitor);
+              BOOST_TEST_EQ(count, 0u);
+            }
+          });
+
+        BOOST_TEST_EQ(num_visits, values.size());
+        BOOST_TEST_EQ(total_count, values.size());
+
+        num_visits = 0;
+        total_count = 0;
+      }
+
+      {
+        thread_runner(
+          values, [&x, &const_visitor, &total_count](boost::span<T> s) {
+            for (auto const& val : s) {
+              auto r = val.first.x_;
+              BOOST_TEST(r >= 0);
+
+              auto count = x.cvisit(val.first, const_visitor);
+
+              BOOST_TEST_EQ(count, 1u);
+              total_count += count;
+
+              count = x.cvisit(val.second, const_visitor);
+              BOOST_TEST_EQ(count, 0u);
+            }
+          });
+
+        BOOST_TEST_EQ(num_visits, values.size());
+        BOOST_TEST_EQ(total_count, values.size());
+
+        num_visits = 0;
+        total_count = 0;
+      }
+
+      {
+        thread_runner(values, [&x, &total_count](boost::span<T> s) {
+          for (auto const& val : s) {
+            auto r = val.first.x_;
+            BOOST_TEST(r >= 0);
+
+            auto count = x.count(val.first);
+            BOOST_TEST_EQ(count, 1u);
+            total_count += count;
+
+            count = x.count(val.second);
+            BOOST_TEST_EQ(count, 0u);
+          }
+        });
+
+        BOOST_TEST_EQ(total_count, values.size());
+
+        num_visits = 0;
+        total_count = 0;
+      }
+
+      {
+        thread_runner(values, [&x](boost::span<T> s) {
+          for (auto const& val : s) {
+            auto r = val.first.x_;
+            BOOST_TEST(r >= 0);
+
+            auto contains = x.contains(val.first);
+            BOOST_TEST(contains);
+
+            contains = x.contains(val.second);
+            BOOST_TEST(!contains);
+          }
+        });
+
+        num_visits = 0;
+        total_count = 0;
+      }
+    }
+  } lvalue_visitor;
+
+  struct transp_visitor_type
+  {
+    template <class T, class X, class M>
+    void operator()(std::vector<T>& values, X& x, M const& reference_map)
+    {
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> num_visits{0};
+      std::atomic<std::uint64_t> total_count{0};
+
+      auto mut_visitor = [&num_visits, &reference_map](value_type& v) {
+        BOOST_TEST(reference_map.contains(v.first));
+        BOOST_TEST_EQ(v.second, reference_map.find(v.first)->second);
+        ++num_visits;
+      };
+
+      auto const_visitor = [&num_visits, &reference_map](value_type const& v) {
+        BOOST_TEST(reference_map.contains(v.first));
+        BOOST_TEST_EQ(v.second, reference_map.find(v.first)->second);
+        ++num_visits;
+      };
+
+      {
+        thread_runner(
+          values, [&x, &mut_visitor, &total_count](boost::span<T> s) {
+            for (auto const& val : s) {
+              auto r = val.first.x_;
+              BOOST_TEST(r >= 0);
+
+              auto count = x.visit(val.first.x_, mut_visitor);
+
+              BOOST_TEST_EQ(count, 1u);
+              total_count += count;
+
+              count = x.visit(val.second.x_, mut_visitor);
+              BOOST_TEST_EQ(count, 0u);
+            }
+          });
+
+        BOOST_TEST_EQ(num_visits, values.size());
+        BOOST_TEST_EQ(total_count, values.size());
+
+        num_visits = 0;
+        total_count = 0;
+      }
+
+      {
+        thread_runner(
+          values, [&x, &const_visitor, &total_count](boost::span<T> s) {
+            for (auto const& val : s) {
+              auto r = val.first.x_;
+              BOOST_TEST(r >= 0);
+
+              auto const& y = x;
+              auto count = y.visit(val.first.x_, const_visitor);
+
+              BOOST_TEST_EQ(count, 1u);
+              total_count += count;
+
+              count = y.visit(val.second.x_, const_visitor);
+              BOOST_TEST_EQ(count, 0u);
+            }
+          });
+
+        BOOST_TEST_EQ(num_visits, values.size());
+        BOOST_TEST_EQ(total_count, values.size());
+
+        num_visits = 0;
+        total_count = 0;
+      }
+
+      {
+        thread_runner(
+          values, [&x, &const_visitor, &total_count](boost::span<T> s) {
+            for (auto const& val : s) {
+              auto r = val.first.x_;
+              BOOST_TEST(r >= 0);
+
+              auto count = x.cvisit(val.first.x_, const_visitor);
+
+              BOOST_TEST_EQ(count, 1u);
+              total_count += count;
+
+              count = x.cvisit(val.second.x_, const_visitor);
+              BOOST_TEST_EQ(count, 0u);
+            }
+          });
+
+        BOOST_TEST_EQ(num_visits, values.size());
+        BOOST_TEST_EQ(total_count, values.size());
+
+        num_visits = 0;
+        total_count = 0;
+      }
+
+      {
+        thread_runner(values, [&x, &total_count](boost::span<T> s) {
+          for (auto const& val : s) {
+            auto r = val.first.x_;
+            BOOST_TEST(r >= 0);
+
+            auto count = x.count(val.first.x_);
+            BOOST_TEST_EQ(count, 1u);
+            total_count += count;
+
+            count = x.count(val.second.x_);
+            BOOST_TEST_EQ(count, 0u);
+          }
+        });
+
+        BOOST_TEST_EQ(total_count, values.size());
+
+        num_visits = 0;
+        total_count = 0;
+      }
+
+      {
+        thread_runner(values, [&x](boost::span<T> s) {
+          for (auto const& val : s) {
+            auto r = val.first.x_;
+            BOOST_TEST(r >= 0);
+
+            auto contains = x.contains(val.first.x_);
+            BOOST_TEST(contains);
+
+            contains = x.contains(val.second.x_);
+            BOOST_TEST(!contains);
+          }
+        });
+
+        num_visits = 0;
+        total_count = 0;
+      }
+    }
+  } transp_visitor;
+
+  struct visit_all_type
+  {
+    template <class T, class X, class M>
+    void operator()(std::vector<T>& values, X& x, M const& reference_map)
+    {
+      using value_type = typename X::value_type;
+
+      std::atomic<std::uint64_t> total_count{0};
+
+      auto mut_visitor = [&reference_map](std::atomic<uint64_t>& num_visits) {
+        return [&reference_map, &num_visits](value_type& kv) {
+          BOOST_TEST(reference_map.contains(kv.first));
+          BOOST_TEST_EQ(kv.second, reference_map.find(kv.first)->second);
+          ++num_visits;
+        };
+      };
+
+      auto const_visitor = [&reference_map](std::atomic<uint64_t>& num_visits) {
+        return [&reference_map, &num_visits](value_type const& kv) {
+          BOOST_TEST(reference_map.contains(kv.first));
+          BOOST_TEST_EQ(kv.second, reference_map.find(kv.first)->second);
+          ++num_visits;
+        };
+      };
+
+      {
+        thread_runner(values, [&x, &total_count, &mut_visitor](boost::span<T>) {
+          std::atomic<std::uint64_t> num_visits{0};
+          total_count += x.visit_all(mut_visitor(num_visits));
+          BOOST_TEST_EQ(x.size(), num_visits);
+        });
+
+        BOOST_TEST_EQ(total_count, num_threads * x.size());
+        total_count = 0;
+      }
+
+      {
+        thread_runner(
+          values, [&x, &total_count, &const_visitor](boost::span<T>) {
+            std::atomic<std::uint64_t> num_visits{0};
+            auto const& y = x;
+            total_count += y.visit_all(const_visitor(num_visits));
+            BOOST_TEST_EQ(x.size(), num_visits);
+          });
+
+        BOOST_TEST_EQ(total_count, num_threads * x.size());
+        total_count = 0;
+      }
+
+      {
+        thread_runner(
+          values, [&x, &total_count, &const_visitor](boost::span<T>) {
+            std::atomic<std::uint64_t> num_visits{0};
+            total_count += x.cvisit_all(const_visitor(num_visits));
+            BOOST_TEST_EQ(x.size(), num_visits);
+          });
+
+        BOOST_TEST_EQ(total_count, num_threads * x.size());
+        total_count = 0;
+      }
+    }
+
+  } visit_all;
+
+  struct exec_policy_visit_all_type
+  {
+    template <class T, class X, class M>
+    void operator()(std::vector<T>& values, X& x, M const& reference_map)
+    {
+#if defined(BOOST_UNORDERED_PARALLEL_ALGORITHMS)
+      using value_type = typename X::value_type;
+
+      auto mut_visitor = [&reference_map](std::atomic<uint64_t>& num_visits) {
+        return [&reference_map, &num_visits](value_type& kv) {
+          BOOST_TEST(reference_map.contains(kv.first));
+          BOOST_TEST_EQ(kv.second, reference_map.find(kv.first)->second);
+          ++num_visits;
+        };
+      };
+
+      auto const_visitor = [&reference_map](std::atomic<uint64_t>& num_visits) {
+        return [&reference_map, &num_visits](value_type const& kv) {
+          BOOST_TEST(reference_map.contains(kv.first));
+          BOOST_TEST_EQ(kv.second, reference_map.find(kv.first)->second);
+          ++num_visits;
+        };
+      };
+
+      {
+        thread_runner(values, [&x, &mut_visitor](boost::span<T>) {
+          std::atomic<std::uint64_t> num_visits{0};
+
+          x.visit_all(std::execution::par, mut_visitor(num_visits));
+          BOOST_TEST_EQ(x.size(), num_visits);
+        });
+      }
+
+      {
+        thread_runner(values, [&x, &const_visitor](boost::span<T>) {
+          std::atomic<std::uint64_t> num_visits{0};
+          auto const& y = x;
+
+          y.visit_all(std::execution::par, const_visitor(num_visits));
+          BOOST_TEST_EQ(x.size(), num_visits);
+        });
+      }
+
+      {
+        thread_runner(values, [&x, &const_visitor](boost::span<T>) {
+          std::atomic<std::uint64_t> num_visits{0};
+          x.cvisit_all(std::execution::par, const_visitor(num_visits));
+          BOOST_TEST_EQ(x.size(), num_visits);
+        });
+      }
+#else
+      (void)values;
+      (void)x;
+      (void)reference_map;
+#endif
+    }
+  } exec_policy_visit_all;
+
+  template <class X, class G, class F>
+  void visit(X*, G gen, F visitor, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    for (auto& val : values) {
+      if (val.second.x_ == 0) {
+        val.second.x_ = 1;
+      }
+      val.second.x_ *= -1;
+    }
+
+    auto reference_map =
+      boost::unordered_flat_map<raii, raii>(values.begin(), values.end());
+
+    raii::reset_counts();
+
+    {
+      X x;
+      for (auto const& v : values) {
+        x.insert(v);
+      }
+      BOOST_TEST_EQ(x.size(), reference_map.size());
+
+      std::uint64_t old_default_constructor = raii::default_constructor;
+      std::uint64_t old_copy_constructor = raii::copy_constructor;
+      std::uint64_t old_move_constructor = raii::move_constructor;
+      std::uint64_t old_copy_assignment = raii::copy_assignment;
+      std::uint64_t old_move_assignment = raii::move_assignment;
+
+      visitor(values, x, reference_map);
+
+      BOOST_TEST_EQ(old_default_constructor, raii::default_constructor);
+      BOOST_TEST_EQ(old_copy_constructor, raii::copy_constructor);
+      BOOST_TEST_EQ(old_move_constructor, raii::move_constructor);
+      BOOST_TEST_EQ(old_copy_assignment, raii::copy_assignment);
+      BOOST_TEST_EQ(old_move_assignment, raii::move_assignment);
+    }
+
+    BOOST_TEST_GE(raii::default_constructor, 0u);
+    BOOST_TEST_GE(raii::copy_constructor, 0u);
+    BOOST_TEST_GE(raii::move_constructor, 0u);
+    BOOST_TEST_GT(raii::destructor, 0u);
+
+    BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
+                    raii::move_constructor,
+      raii::destructor);
+  }
+
+  template <class X, class G>
+  void empty_visit(X*, G gen, test::random_generator rg)
+  {
+    auto values = make_random_values(1024 * 16, [&] { return gen(rg); });
+    using values_type = decltype(values);
+    using span_value_type = typename values_type::value_type;
+
+    raii::reset_counts();
+
+    {
+      X x;
+
+      std::uint64_t old_default_constructor = raii::default_constructor;
+      std::uint64_t old_copy_constructor = raii::copy_constructor;
+      std::uint64_t old_move_constructor = raii::move_constructor;
+      std::uint64_t old_copy_assignment = raii::copy_assignment;
+      std::uint64_t old_move_assignment = raii::move_assignment;
+
+      {
+        thread_runner(values, [&x](boost::span<span_value_type> s) {
+          std::atomic<std::uint64_t> num_visits{0};
+
+          x.visit_all(
+            [&num_visits](typename X::value_type const&) { ++num_visits; });
+          BOOST_TEST_EQ(num_visits, 0u);
+
+          for (auto const& val : s) {
+            auto count = x.visit(val.first,
+              [&num_visits](typename X::value_type const&) { ++num_visits; });
+            BOOST_TEST_EQ(count, 0u);
+          }
+        });
+      }
+
+      BOOST_TEST_EQ(old_default_constructor, raii::default_constructor);
+      BOOST_TEST_EQ(old_copy_constructor, raii::copy_constructor);
+      BOOST_TEST_EQ(old_move_constructor, raii::move_constructor);
+      BOOST_TEST_EQ(old_copy_assignment, raii::copy_assignment);
+      BOOST_TEST_EQ(old_move_assignment, raii::move_assignment);
+    }
+
+    BOOST_TEST_EQ(raii::default_constructor, 0u);
+    BOOST_TEST_EQ(raii::copy_constructor, 0u);
+    BOOST_TEST_EQ(raii::move_constructor, 0u);
+    BOOST_TEST_EQ(raii::destructor, 0u);
+  }
+
+  template <class X, class G>
+  void insert_and_visit(X*, G gen, test::random_generator rg)
+  {
+    // here we attempt to ensure happens-before and synchronizes-with
+    // the visitation thread essentially chases the insertion one
+    // we double-check unreloated loads/stores to ensure that a store is visible
+    // in the visitation thread
+
+    BOOST_TEST(rg == test::sequential);
+
+    auto const values = make_random_values(1024 * 16, [&] { return gen(rg); });
+
+    {
+      raii::reset_counts();
+
+      X x;
+
+      std::thread t1, t2;
+      boost::latch l(2);
+      std::vector<std::string> strs(values.size());
+
+      t1 = std::thread([&l, &values, &x, &strs] {
+        l.arrive_and_wait();
+        for (std::size_t idx = 0; idx < values.size(); ++idx) {
+          strs[idx] = "rawr";
+          auto const& val = values[idx];
+          x.insert(val);
+        }
+      });
+
+      t2 = std::thread([&l, &values, &x, &strs] {
+        l.arrive_and_wait();
+
+        for (std::size_t idx = 0; idx < values.size(); ++idx) {
+          std::atomic_bool b{false};
+          while (!b) {
+            x.cvisit(values[idx].first,
+              [&b, &strs, idx, &values](typename X::value_type const& v) {
+                BOOST_TEST_EQ(v.second, values[idx].second);
+                BOOST_TEST_EQ(strs[idx], "rawr");
+                b = true;
+              });
+          }
+        }
+      });
+
+      t1.join();
+      t2.join();
+    }
+    check_raii_counts();
+  }
+
+  boost::unordered::concurrent_flat_map<raii, raii>* map;
+  boost::unordered::concurrent_flat_map<raii, raii, transp_hash,
+    transp_key_equal>* transp_map;
+
+} // namespace
+
+using test::default_generator;
+using test::limited_range;
+using test::sequential;
+
+// clang-format off
+
+UNORDERED_TEST(
+  visit,
+  ((map))
+  ((value_type_generator)(init_type_generator))
+  ((lvalue_visitor)(visit_all)(exec_policy_visit_all))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  visit,
+  ((transp_map))
+  ((value_type_generator)(init_type_generator))
+  ((transp_visitor))
+  ((default_generator)(sequential)(limited_range)))
+
+UNORDERED_TEST(
+  empty_visit,
+  ((map)(transp_map))
+  ((value_type_generator)(init_type_generator))
+  ((default_generator)(sequential)(limited_range))
+)
+
+UNORDERED_TEST(
+  insert_and_visit,
+  ((map))
+  ((value_type_generator))
+  ((sequential))
+)
+
+// clang-format on
+
+RUN_TESTS()
diff --git a/test/helpers/test.hpp b/test/helpers/test.hpp
index dabd7c24..3417178d 100644
--- a/test/helpers/test.hpp
+++ b/test/helpers/test.hpp
@@ -6,6 +6,7 @@
 #if !defined(BOOST_UNORDERED_TEST_TEST_HEADER)
 #define BOOST_UNORDERED_TEST_TEST_HEADER
 
+#include <boost/unordered/detail/fwd.hpp>
 #include <boost/core/lightweight_test.hpp>
 #include <boost/preprocessor/cat.hpp>
 #include <boost/preprocessor/stringize.hpp>
diff --git a/test/helpers/unordered.hpp b/test/helpers/unordered.hpp
index d3cbf17f..2f84dea9 100644
--- a/test/helpers/unordered.hpp
+++ b/test/helpers/unordered.hpp
@@ -14,6 +14,7 @@
 #include <boost/unordered/unordered_node_map.hpp>
 #include <boost/unordered/unordered_node_set.hpp>
 #include <boost/unordered/detail/implementation.hpp>
+#include <boost/unordered/detail/fwd.hpp>
 #else
 #include <boost/unordered_set.hpp>
 #include <boost/unordered_map.hpp>
diff --git a/test/unordered/deduction_tests.cpp b/test/unordered/deduction_tests.cpp
index 7c942186..571aa7fa 100644
--- a/test/unordered/deduction_tests.cpp
+++ b/test/unordered/deduction_tests.cpp
@@ -13,6 +13,7 @@
 
 #include <boost/unordered/unordered_flat_map.hpp>
 #include <boost/unordered/unordered_flat_set.hpp>
+#include <boost/unordered/concurrent_flat_map.hpp>
 
 struct hash_equals
 {
@@ -432,6 +433,7 @@ int main()
   map_tests<boost::unordered_map>();
   map_tests<boost::unordered_multimap>();
   map_tests<boost::unordered_flat_map>();
+  map_tests<boost::concurrent_flat_map>();
   set_tests<boost::unordered_set>();
   set_tests<boost::unordered_multiset>();
   set_tests<boost::unordered_flat_set>();
diff --git a/test/unordered/link_test_1.cpp b/test/unordered/link_test_1.cpp
index 3419d9da..7b03c973 100644
--- a/test/unordered/link_test_1.cpp
+++ b/test/unordered/link_test_1.cpp
@@ -7,8 +7,12 @@
 #include "../helpers/unordered.hpp"
 
 #ifdef BOOST_UNORDERED_FOA_TESTS
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
 void foo(boost::unordered_flat_set<int>&, boost::unordered_flat_map<int, int>&,
-  boost::unordered_node_set<int>&, boost::unordered_node_map<int, int>&);
+  boost::unordered_node_set<int>&, boost::unordered_node_map<int, int>&,
+  boost::concurrent_flat_map<int, int>&);
 
 int main()
 {
@@ -16,8 +20,9 @@ int main()
   boost::unordered_flat_map<int, int> x2;
   boost::unordered_node_set<int> x3;
   boost::unordered_node_map<int, int> x4;
+  boost::concurrent_flat_map<int, int> x5;
 
-  foo(x1, x2, x3, x4);
+  foo(x1, x2, x3, x4, x5);
 
   return 0;
 }
diff --git a/test/unordered/link_test_2.cpp b/test/unordered/link_test_2.cpp
index aea7a728..917544dd 100644
--- a/test/unordered/link_test_2.cpp
+++ b/test/unordered/link_test_2.cpp
@@ -7,9 +7,13 @@
 #include "../helpers/unordered.hpp"
 
 #ifdef BOOST_UNORDERED_FOA_TESTS
+
+#include <boost/unordered/concurrent_flat_map.hpp>
+
 void foo(boost::unordered_flat_set<int>& x1,
   boost::unordered_flat_map<int, int>& x2, boost::unordered_node_set<int>& x3,
-  boost::unordered_node_map<int, int>& x4)
+  boost::unordered_node_map<int, int>& x4,
+  boost::concurrent_flat_map<int, int>& x5)
 {
 #if BOOST_WORKAROUND(BOOST_CODEGEARC, BOOST_TESTED_AT(0x0613))
   struct dummy
@@ -23,6 +27,7 @@ void foo(boost::unordered_flat_set<int>& x1,
   x2[2] = 2;
   x3.insert(3);
   x4.insert(std::make_pair(4, 5));
+  x5.insert(std::make_pair(5, 6));
 }
 #else
 void foo(boost::unordered_set<int>& x1, boost::unordered_map<int, int>& x2,
diff --git a/test/unordered/scoped_allocator.cpp b/test/unordered/scoped_allocator.cpp
index 01aa1202..86e901ba 100644
--- a/test/unordered/scoped_allocator.cpp
+++ b/test/unordered/scoped_allocator.cpp
@@ -7,7 +7,9 @@
 #include <boost/config/pragma_message.hpp>
 #include <boost/config/workaround.hpp>
 
-#if BOOST_CXX_VERSION <= 199711L || BOOST_WORKAROUND(BOOST_GCC_VERSION, < 40800)
+#if BOOST_CXX_VERSION <= 199711L ||                                            \
+  BOOST_WORKAROUND(BOOST_GCC_VERSION, < 40800) ||                              \
+  BOOST_WORKAROUND(BOOST_MSVC, == 1900)
 
 BOOST_PRAGMA_MESSAGE(
   "scoped allocator adaptor tests only work under C++11 and above")