diff --git a/doc/unordered/concurrent.adoc b/doc/unordered/concurrent.adoc
index 9026b31e..3410570a 100644
--- a/doc/unordered/concurrent.adoc
+++ b/doc/unordered/concurrent.adoc
@@ -132,7 +132,7 @@ by using `cvisit` overloads (for instance, `insert_or_cvisit`) and may result
 in higher parallelization. Consult the xref:#concurrent_flat_map[reference]
 for a complete list of available operations.
 
-== Whole-table Visitation
+== Whole-Table Visitation
 
 In the absence of iterators, `boost::concurrent_flat_map` provides `visit_all`
 as an alternative way to process all the elements in the map:
diff --git a/doc/unordered/intro.adoc b/doc/unordered/intro.adoc
index 0a8a14a9..46bc899c 100644
--- a/doc/unordered/intro.adoc
+++ b/doc/unordered/intro.adoc
@@ -34,7 +34,7 @@ boost::unordered_set +
 boost::unordered_map +
 boost::unordered_multiset +
 boost::unordered_multimap
-^| N/A
+^|
 
 ^.^h|*Open addressing*
 ^m| boost::unordered_node_set + 
@@ -85,7 +85,7 @@ namespace boost {
 }
 ----
 
-To store an object in an unordered associative container requires both a
+Storing an object in an unordered associative container requires both a
 key equality function and a hash function. The default function objects in
 the standard containers support a few basic types including integer types,
 floating point types, pointer types, and the standard strings. Since
diff --git a/include/boost/unordered/detail/foa/concurrent_table.hpp b/include/boost/unordered/detail/foa/concurrent_table.hpp
index 2bc55744..0f1f1145 100644
--- a/include/boost/unordered/detail/foa/concurrent_table.hpp
+++ b/include/boost/unordered/detail/foa/concurrent_table.hpp
@@ -18,6 +18,7 @@
 #include <boost/core/no_exceptions_support.hpp>
 #include <boost/cstdint.hpp>
 #include <boost/mp11/tuple.hpp>
+#include <boost/static_assert.hpp>
 #include <boost/unordered/detail/foa/core.hpp>
 #include <boost/unordered/detail/foa/rw_spinlock.hpp>
 #include <boost/unordered/detail/foa/tuple_rotate_right.hpp>
@@ -64,6 +65,8 @@ using is_execution_policy=std::false_type;
 
 namespace foa{
 
+static constexpr std::size_t cacheline_size=64;
+
 template<typename T,std::size_t N>
 class cache_aligned_array
 {
@@ -76,20 +79,20 @@ public:
   T& operator[](std::size_t pos)noexcept{return *data(pos);}
 
 private:
-  static constexpr std::size_t cacheline=64;
   static constexpr std::size_t element_offset=
-    (sizeof(T)+cacheline-1)/cacheline*cacheline;
+    (sizeof(T)+cacheline_size-1)/cacheline_size*cacheline_size;
 
-  BOOST_STATIC_ASSERT(alignof(T)<=cacheline);
+  BOOST_STATIC_ASSERT(alignof(T)<=cacheline_size);
 
   T* data(std::size_t pos)noexcept
   {
     return reinterpret_cast<T*>(
-      (reinterpret_cast<uintptr_t>(&buf)+cacheline-1)/cacheline*cacheline+
-      pos*element_offset);
+      (reinterpret_cast<uintptr_t>(&buf)+cacheline_size-1)/
+        cacheline_size*cacheline_size
+      +pos*element_offset);
   }
 
-  unsigned char buf[element_offset*N+cacheline-1];
+  unsigned char buf[element_offset*N+cacheline_size-1];
 };
 
 template<typename Mutex,std::size_t N>
@@ -297,6 +300,40 @@ struct concurrent_table_arrays:table_arrays<Value,Group,SizePolicy>
   group_access *group_accesses;
 };
 
+struct atomic_size_control
+{
+  static constexpr auto atomic_size_t_size=sizeof(std::atomic<std::size_t>);
+  BOOST_STATIC_ASSERT(atomic_size_t_size<cacheline_size);
+
+  atomic_size_control(std::size_t ml_,std::size_t size_):
+    pad0_{},ml{ml_},pad1_{},size{size_}{}
+  atomic_size_control(atomic_size_control& x):
+    pad0_{},ml{x.ml.load()},pad1_{},size{x.size.load()}{}
+
+  /* padding to avoid false sharing internally and with sorrounding data */
+
+  unsigned char            pad0_[cacheline_size-atomic_size_t_size];
+  std::atomic<std::size_t> ml;
+  unsigned char            pad1_[cacheline_size-atomic_size_t_size];
+  std::atomic<std::size_t> size;
+};
+
+/* std::swap can't be used on non-assignable atomics */
+
+inline void
+swap_atomic_size_t(std::atomic<std::size_t>& x,std::atomic<std::size_t>& y)
+{
+  std::size_t tmp=x;
+  x=static_cast<std::size_t>(y);
+  y=tmp;
+}
+
+inline void swap(atomic_size_control& x,atomic_size_control& y)
+{
+  swap_atomic_size_t(x.ml,y.ml);
+  swap_atomic_size_t(x.size,y.size);
+}
+
 /* foa::concurrent_table serves as the foundation for end-user concurrent
  * hash containers. The TypePolicy parameter can specify flat/node-based
  * map-like and set-like containers, though currently we're only providing
@@ -357,7 +394,7 @@ struct concurrent_table_arrays:table_arrays<Value,Group,SizePolicy>
 template <typename TypePolicy,typename Hash,typename Pred,typename Allocator>
 using concurrent_table_core_impl=table_core<
   TypePolicy,group15<atomic_integral>,concurrent_table_arrays,
-  std::atomic<std::size_t>,Hash,Pred,Allocator>;
+  atomic_size_control,Hash,Pred,Allocator>;
 
 #include <boost/unordered/detail/foa/ignore_wshadow.hpp>
 
@@ -988,8 +1025,8 @@ private:
 
   std::size_t unprotected_size()const
   {
-    std::size_t m=this->ml;
-    std::size_t s=this->size_;
+    std::size_t m=this->size_ctrl.ml;
+    std::size_t s=this->size_ctrl.size;
     return s<=m?s:m;
   }
 
@@ -1105,7 +1142,7 @@ private:
 
     if(this->find(k,pos0,hash))return false;
 
-    if(BOOST_LIKELY(this->size_<this->ml)){
+    if(BOOST_LIKELY(this->size_ctrl.size<this->size_ctrl.ml)){
       this->unchecked_emplace_at(pos0,hash,std::forward<Args>(args)...);
     }
     else{
@@ -1118,15 +1155,15 @@ private:
   {
     reserve_size(concurrent_table& x_):x{x_}
     {
-      size_=++x.size_;
+      size_=++x.size_ctrl.size;
     }
 
     ~reserve_size()
     {
-      if(!commit_)--x.size_;
+      if(!commit_)--x.size_ctrl.size;
     }
 
-    bool succeeded()const{return size_<=x.ml;}
+    bool succeeded()const{return size_<=x.size_ctrl.ml;}
 
     void commit(){commit_=true;}
 
@@ -1200,7 +1237,9 @@ private:
   void rehash_if_full()
   {
     auto lck=exclusive_access();
-    if(this->size_==this->ml)this->unchecked_rehash_for_growth();
+    if(this->size_ctrl.size==this->size_ctrl.ml){
+      this->unchecked_rehash_for_growth();
+    }
   }
 
   template<typename GroupAccessMode,typename F>
diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp
index 805f866f..0b985265 100644
--- a/include/boost/unordered/detail/foa/core.hpp
+++ b/include/boost/unordered/detail/foa/core.hpp
@@ -1234,7 +1234,7 @@ alloc_make_insert_type(const Allocator& al,Args&&... args)
 
 template<
   typename TypePolicy,typename Group,template<typename...> class Arrays,
-  typename SizeImpl,typename Hash,typename Pred,typename Allocator
+  typename SizeControl,typename Hash,typename Pred,typename Allocator
 >
 class 
 
@@ -1258,7 +1258,7 @@ public:
   using alloc_traits=boost::allocator_traits<Allocator>;
   using element_type=typename type_policy::element_type;
   using arrays_type=Arrays<element_type,group_type,size_policy>;
-  using size_impl_type=SizeImpl;
+  using size_ctrl_type=SizeControl;
 
   using key_type=typename type_policy::key_type;
   using init_type=typename type_policy::init_type;
@@ -1279,7 +1279,7 @@ public:
     const Pred& pred_=Pred(),const Allocator& al_=Allocator()):
     hash_base{empty_init,h_},pred_base{empty_init,pred_},
     allocator_base{empty_init,al_},arrays(new_arrays(n)),
-    ml{initial_max_load()},size_{0}
+    size_ctrl{initial_max_load(),0}
     {}
 
   table_core(const table_core& x):
@@ -1293,11 +1293,11 @@ public:
     hash_base{empty_init,std::move(x.h())},
     pred_base{empty_init,std::move(x.pred())},
     allocator_base{empty_init,std::move(x.al())},
-    arrays(x.arrays),ml{std::size_t(x.ml)},size_{std::size_t(x.size_)}
+    arrays(x.arrays),size_ctrl{x.size_ctrl}
   {
     x.arrays=x.new_arrays(0);
-    x.ml=x.initial_max_load();
-    x.size_=0;
+    x.size_ctrl.ml=x.initial_max_load();
+    x.size_ctrl.size=0;
   }
 
   table_core(const table_core& x,const Allocator& al_):
@@ -1310,9 +1310,9 @@ public:
     table_core{std::move(x.h()),std::move(x.pred()),al_}
   {
     if(al()==x.al()){
-      std::swap(arrays,x.arrays);
-      swap_size_impl(size_,x.size_);
-      swap_size_impl(ml,x.ml);
+      using std::swap;
+      swap(arrays,x.arrays);
+      swap(size_ctrl,x.size_ctrl);
     }
     else{
       reserve(x.size());
@@ -1410,8 +1410,7 @@ public:
         reserve(0);
         move_assign_if<pocma>(al(),x.al());
         swap(arrays,x.arrays);
-        swap_size_impl(ml,x.ml);
-        swap_size_impl(size_,x.size_);
+        swap(size_ctrl,x.size_ctrl);
       }
       else{
         /* noshrink: favor memory reuse over tightness */
@@ -1437,7 +1436,7 @@ public:
   allocator_type get_allocator()const noexcept{return al();}
 
   bool        empty()const noexcept{return size()==0;}
-  std::size_t size()const noexcept{return size_;}
+  std::size_t size()const noexcept{return size_ctrl.size;}
   std::size_t max_size()const noexcept{return SIZE_MAX;}
 
   BOOST_FORCEINLINE
@@ -1522,8 +1521,7 @@ public:
     swap(h(),x.h());
     swap(pred(),x.pred());
     swap(arrays,x.arrays);
-    swap_size_impl(ml,x.ml);
-    swap_size_impl(size_,x.size_);
+    swap(size_ctrl,x.size_ctrl);
   }
 
   void clear()noexcept
@@ -1541,8 +1539,8 @@ public:
         pg->initialize();
       }
       arrays.groups[arrays.groups_size_mask].set_sentinel();
-      ml=initial_max_load();
-      size_=0;
+      size_ctrl.ml=initial_max_load();
+      size_ctrl.size=0;
     }
   }
 
@@ -1562,7 +1560,7 @@ public:
 
   float max_load_factor()const noexcept{return mlf;}
 
-  std::size_t max_load()const noexcept{return ml;}
+  std::size_t max_load()const noexcept{return size_ctrl.ml;}
 
   void rehash(std::size_t n)
   {
@@ -1678,7 +1676,7 @@ public:
   {
     auto res=nosize_unchecked_emplace_at(
       arrays,pos0,hash,std::forward<Args>(args)...);
-    ++size_;
+    ++size_ctrl.size;
     return res;
   }
 
@@ -1708,7 +1706,7 @@ public:
 
     /* new_arrays_ lifetime taken care of by unchecked_rehash */
     unchecked_rehash(new_arrays_);
-    ++size_;
+    ++size_ctrl.size;
     return it;
   }
 
@@ -1725,7 +1723,7 @@ public:
         auto new_arrays_=new_arrays(n);
         delete_arrays(arrays);
         arrays=new_arrays_;
-        ml=initial_max_load();
+        size_ctrl.ml=initial_max_load();
       }
     }
   }
@@ -1786,8 +1784,7 @@ public:
   }
 
   arrays_type    arrays;
-  size_impl_type ml;
-  size_impl_type size_;
+  size_ctrl_type size_ctrl;
 
 private:
   template<
@@ -1806,7 +1803,7 @@ private:
     hash_base{empty_init,std::move(h_)},
     pred_base{empty_init,std::move(pred_)},
     allocator_base{empty_init,al_},arrays(new_arrays(0)),
-    ml{initial_max_load()},size_{0}
+    size_ctrl{initial_max_load(),0}
     {}
 
   arrays_type new_arrays(std::size_t n)
@@ -1876,7 +1873,7 @@ private:
     if(arrays.elements){
       copy_elements_array_from(x);
       copy_groups_array_from(x);
-      size_=std::size_t(x.size_);
+      size_ctrl.size=std::size_t(x.size_ctrl.size);
     }
   }
 
@@ -1961,23 +1958,15 @@ private:
     }
   }
 
-  static inline void swap_size_impl(size_impl_type& x,size_impl_type& y)
-  {
-    /* std::swap can't be used on non-assignable atomics */
-    std::size_t tmp=x;
-    x=static_cast<std::size_t>(y);
-    y=tmp;
-  }
-
   void recover_slot(unsigned char* pc)
   {
     /* If this slot potentially caused overflow, we decrease the maximum load so
      * that average probe length won't increase unboundedly in repeated
      * insert/erase cycles (drift).
      */
-    ml-=group_type::maybe_caused_overflow(pc);
+    size_ctrl.ml-=group_type::maybe_caused_overflow(pc);
     group_type::reset(pc);
-    --size_;
+    --size_ctrl.size;
   }
 
   void recover_slot(group_type* pg,std::size_t pos)
@@ -2043,7 +2032,7 @@ private:
     }
     delete_arrays(arrays);
     arrays=new_arrays_;
-    ml=initial_max_load();
+    size_ctrl.ml=initial_max_load();
   }
 
   template<typename Value>
diff --git a/include/boost/unordered/detail/foa/table.hpp b/include/boost/unordered/detail/foa/table.hpp
index 4bb66fd5..a2fa96e6 100644
--- a/include/boost/unordered/detail/foa/table.hpp
+++ b/include/boost/unordered/detail/foa/table.hpp
@@ -46,6 +46,12 @@ struct plain_integral
   Integral n;
 };
 
+struct plain_size_control
+{
+  std::size_t ml;
+  std::size_t size;
+};
+
 template<typename,typename,typename,typename>
 class table;
 
@@ -221,7 +227,7 @@ private:
 template <typename TypePolicy,typename Hash,typename Pred,typename Allocator>
 using table_core_impl=
   table_core<TypePolicy,group15<plain_integral>,table_arrays,
-  std::size_t,Hash,Pred,Allocator>;
+  plain_size_control,Hash,Pred,Allocator>;
 
 #include <boost/unordered/detail/foa/ignore_wshadow.hpp>
 
@@ -475,7 +481,7 @@ private:
     if(loc){
       return {make_iterator(loc),false};
     }
-    if(BOOST_LIKELY(this->size_<this->ml)){
+    if(BOOST_LIKELY(this->size_ctrl.size<this->size_ctrl.ml)){
       return {
         make_iterator(
           this->unchecked_emplace_at(pos0,hash,std::forward<Args>(args)...)),