From e712c37e9f437fd9667f8a25baaa52d830476980 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Sun, 28 Apr 2024 18:44:23 +0200 Subject: [PATCH 01/39] added stats to foa::table --- .../unordered/detail/cumulative_stats.hpp | 155 ++++++++++++++++++ include/boost/unordered/detail/foa/core.hpp | 83 ++++++++++ include/boost/unordered/detail/foa/table.hpp | 9 + 3 files changed, 247 insertions(+) create mode 100644 include/boost/unordered/detail/cumulative_stats.hpp diff --git a/include/boost/unordered/detail/cumulative_stats.hpp b/include/boost/unordered/detail/cumulative_stats.hpp new file mode 100644 index 00000000..18f96d39 --- /dev/null +++ b/include/boost/unordered/detail/cumulative_stats.hpp @@ -0,0 +1,155 @@ +/* Copyright 2024 Joaquin M Lopez Munoz. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + * See https://www.boost.org/libs/unordered for library home page. + */ + +#ifndef BOOST_UNORDERED_DETAIL_CUMULATIVE_STATS_HPP +#define BOOST_UNORDERED_DETAIL_CUMULATIVE_STATS_HPP + +#include +#include +#include +#include +#include + +#if defined(BOOST_HAS_THREADS) +#include +#endif + +namespace boost{ +namespace unordered{ +namespace detail{ + +/* Cumulative one-pass calculation of the average, variance and deviation of + * running sequences. + */ + +struct cumulative_stats_summary +{ + double average; + double variance; + double deviation; +}; + +struct cumulative_stats_data +{ + double m=0.0; + double m_prior=0.0; + double s=0.0; +}; + +struct welfords_algorithm /* 0-based */ +{ + template + int operator()(T&& x,cumulative_stats_data& d)const + { + d.m_prior=d.m; + d.m+=(static_cast(x)-d.m)/static_cast(n); + d.s+=(n!=1)* + (static_cast(x)-d.m_prior)*(static_cast(x)-d.m); + + return 0; /* mp11::tuple_transform requires that return type not be void */ + } + + std::size_t n; +}; + +/* Stats calculated jointly for N same-sized sequences to save the space + * for n. + */ + +template +class cumulative_stats +{ +public: + void reset()noexcept{*this=cumulative_stats();} + + template + void add(Ts&&... xs) + { + static_assert( + sizeof...(Ts)==N,"A sample must be provided for each sequence."); + + mp11::tuple_transform( + welfords_algorithm{++n}, + std::forward_as_tuple(std::forward(xs)...), + data); + } + + template + cumulative_stats_summary get_summary()const noexcept + { + double average=data[I].m, + variance=n!=0?data[I].s/static_cast(n):0.0, /* biased */ + deviation=std::sqrt(variance); + return {average,variance,deviation}; + } + +private: + std::size_t n=0; + std::array data; +}; + +#if defined(BOOST_HAS_THREADS) + +template +class concurrent_cumulative_stats:cumulative_stats +{ + using super=cumulative_stats; + using lock_guard=std::lock_guard; + +public: + concurrent_cumulative_stats()noexcept:super{}{} + concurrent_cumulative_stats(const concurrent_cumulative_stats& x)noexcept: + concurrent_cumulative_stats{x,lock_guard{x.mut}}{} + + concurrent_cumulative_stats& + operator=(const concurrent_cumulative_stats& x)noexcept + { + auto x1=x; + lock_guard lck{mut}; + static_cast(*this)=x1; + return *this; + } + + void reset()noexcept + { + lock_guard lck{mut}; + super::reset(); + } + + template + void add(Ts&&... xs) + { + lock_guard lck{mut}; + super::add(std::forward(xs)...); + } + + template + cumulative_stats_summary get_summary()const noexcept + { + lock_guard lck{mut}; + return super::template get_summary(); + } + +private: + concurrent_cumulative_stats(const super& x,lock_guard&&):super{x}{} + + mutable std::mutex mut; +}; + +#else + +template +using concurrent_cumulative_stats=cumulative_stats; + +#endif + +} /* namespace detail */ +} /* namespace unordered */ +} /* namespace boost */ + +#endif diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index 9955ca1d..6fcc746a 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -40,6 +40,10 @@ #include #include +#if defined(BOOST_UNORDERED_ENABLE_STATS) +#include +#endif + #if !defined(BOOST_UNORDERED_DISABLE_SSE2) #if defined(BOOST_UNORDERED_ENABLE_SSE2)|| \ defined(__SSE2__)|| \ @@ -864,6 +868,7 @@ struct pow2_quadratic_prober pow2_quadratic_prober(std::size_t pos_):pos{pos_}{} inline std::size_t get()const{return pos;} + inline std::size_t length()const{return step+1;} /* next returns false when the whole array has been traversed, which ends * probing (in practice, full-table probing will only happen with very small @@ -1125,6 +1130,39 @@ struct table_arrays value_type_pointer elements_; }; +#if defined(BOOST_UNORDERED_ENABLE_STATS) +/* stats support */ + +struct table_core_insertion_stats +{ + cumulative_stats_summary probe_length; +}; + +struct table_core_lookup_stats +{ + cumulative_stats_summary probe_length; + cumulative_stats_summary num_comparisons; +}; + +struct table_core_stats +{ + table_core_insertion_stats insertion; + table_core_lookup_stats successful_lookup, + unsuccessful_lookup; +}; + +#define BOOST_UNORDERED_ADD_STATS(stats,args) stats.add args +#define BOOST_UNORDERED_STATS_COUNTER(name) std::size_t name=0 +#define BOOST_UNORDERED_INCREMENT_STATS_COUNTER(name) ++name + +#else + +#define BOOST_UNORDERED_ADD_STATS(stats,args) +#define BOOST_UNORDERED_STATS_COUNTER(name) +#define BOOST_UNORDERED_INCREMENT_STATS_COUNTER(name) + +#endif + struct if_constexpr_void_else{void operator()()const{}}; template @@ -1395,6 +1433,10 @@ public: using locator=table_locator; using arrays_holder_type=arrays_holder; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats=table_core_stats; +#endif + table_core( std::size_t n=default_bucket_count,const Hash& h_=Hash(), const Pred& pred_=Pred(),const Allocator& al_=Allocator()): @@ -1639,6 +1681,7 @@ public: BOOST_FORCEINLINE locator find( const Key& x,std::size_t pos0,std::size_t hash)const { + BOOST_UNORDERED_STATS_COUNTER(num_cmps); prober pb(pos0); do{ auto pos=pb.get(); @@ -1650,18 +1693,25 @@ public: auto p=elements+pos*N; BOOST_UNORDERED_PREFETCH_ELEMENTS(p,N); do{ + BOOST_UNORDERED_INCREMENT_STATS_COUNTER(num_cmps); auto n=unchecked_countr_zero(mask); if(BOOST_LIKELY(bool(pred()(x,key_from(p[n]))))){ + BOOST_UNORDERED_ADD_STATS( + successful_lookup_stats,(pb.length(),num_cmps)); return {pg,n,p+n}; } mask&=mask-1; }while(mask); } if(BOOST_LIKELY(pg->is_not_overflowed(hash))){ + BOOST_UNORDERED_ADD_STATS( + unsuccessful_lookup_stats,(pb.length(),num_cmps)); return {}; } } while(BOOST_LIKELY(pb.next(arrays.groups_size_mask))); + BOOST_UNORDERED_ADD_STATS( + unsuccessful_lookup_stats,(pb.length(),num_cmps)); return {}; } @@ -1746,6 +1796,32 @@ public: rehash(std::size_t(std::ceil(float(n)/mlf))); } +#if defined(BOOST_UNORDERED_ENABLE_STATS) + stats get_stats()const + { + return { + { + insertion_stats.get_summary<0>() + }, + { + successful_lookup_stats.get_summary<0>(), + successful_lookup_stats.get_summary<1>() + }, + { + unsuccessful_lookup_stats.get_summary<0>(), + unsuccessful_lookup_stats.get_summary<1>() + } + }; + } + + void reset_stats() + { + insertion_stats.reset(); + successful_lookup_stats.reset(); + unsuccessful_lookup_stats.reset(); + } +#endif + friend bool operator==(const table_core& x,const table_core& y) { return @@ -1956,6 +2032,12 @@ public: arrays_type arrays; size_ctrl_type size_ctrl; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + concurrent_cumulative_stats<1> insertion_stats; + mutable concurrent_cumulative_stats<2> successful_lookup_stats, + unsuccessful_lookup_stats; +#endif + private: template< typename,typename,template class, @@ -2243,6 +2325,7 @@ private: auto p=arrays_.elements()+pos*N+n; construct_element(p,std::forward(args)...); pg->set(n,hash); + BOOST_UNORDERED_ADD_STATS(insertion_stats,(pb.length())); return {pg,n,p}; } else pg->mark_overflow(hash); diff --git a/include/boost/unordered/detail/foa/table.hpp b/include/boost/unordered/detail/foa/table.hpp index 87aa0495..2f02a7a4 100644 --- a/include/boost/unordered/detail/foa/table.hpp +++ b/include/boost/unordered/detail/foa/table.hpp @@ -361,6 +361,10 @@ public: const_iterator>::type; using erase_return_type=table_erase_return_type; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats=typename super::stats; +#endif + table( std::size_t n=default_bucket_count,const Hash& h_=Hash(), const Pred& pred_=Pred(),const Allocator& al_=Allocator()): @@ -542,6 +546,11 @@ public: using super::rehash; using super::reserve; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using super::get_stats; + using super::reset_stats; +#endif + template friend std::size_t erase_if(table& x,Predicate& pr) { From 05b66e1034b7f20794c065d4a149d23b766dd07c Mon Sep 17 00:00:00 2001 From: joaquintides Date: Sun, 28 Apr 2024 18:44:40 +0200 Subject: [PATCH 02/39] added stats to boost::unordered_flat_map --- include/boost/unordered/unordered_flat_map.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/boost/unordered/unordered_flat_map.hpp b/include/boost/unordered/unordered_flat_map.hpp index 41ce7081..dad235ad 100644 --- a/include/boost/unordered/unordered_flat_map.hpp +++ b/include/boost/unordered/unordered_flat_map.hpp @@ -76,6 +76,10 @@ namespace boost { using iterator = typename table_type::iterator; using const_iterator = typename table_type::const_iterator; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats = typename table_type::stats; +#endif + unordered_flat_map() : unordered_flat_map(0) {} explicit unordered_flat_map(size_type n, hasher const& h = hasher(), @@ -654,6 +658,15 @@ namespace boost { void reserve(size_type n) { table_.reserve(n); } +#if defined(BOOST_UNORDERED_ENABLE_STATS) + /// Stats + /// + + stats get_stats() const { return table_.get_stats(); } + + void reset_stats() { table_.reset_stats(); } +#endif + /// Observers /// From b07cee08c4280d85803275628d84f4bb629dd06f Mon Sep 17 00:00:00 2001 From: joaquintides Date: Mon, 29 Apr 2024 11:23:15 +0200 Subject: [PATCH 03/39] added stats to the rest of open-addressing containers --- include/boost/unordered/unordered_flat_set.hpp | 13 +++++++++++++ include/boost/unordered/unordered_node_map.hpp | 13 +++++++++++++ include/boost/unordered/unordered_node_set.hpp | 13 +++++++++++++ 3 files changed, 39 insertions(+) diff --git a/include/boost/unordered/unordered_flat_set.hpp b/include/boost/unordered/unordered_flat_set.hpp index a6aba397..4f595ef2 100644 --- a/include/boost/unordered/unordered_flat_set.hpp +++ b/include/boost/unordered/unordered_flat_set.hpp @@ -72,6 +72,10 @@ namespace boost { using iterator = typename table_type::iterator; using const_iterator = typename table_type::const_iterator; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats = typename table_type::stats; +#endif + unordered_flat_set() : unordered_flat_set(0) {} explicit unordered_flat_set(size_type n, hasher const& h = hasher(), @@ -474,6 +478,15 @@ namespace boost { void reserve(size_type n) { table_.reserve(n); } +#if defined(BOOST_UNORDERED_ENABLE_STATS) + /// Stats + /// + + stats get_stats() const { return table_.get_stats(); } + + void reset_stats() { table_.reset_stats(); } +#endif + /// Observers /// diff --git a/include/boost/unordered/unordered_node_map.hpp b/include/boost/unordered/unordered_node_map.hpp index b7a6a409..9a0afe7b 100644 --- a/include/boost/unordered/unordered_node_map.hpp +++ b/include/boost/unordered/unordered_node_map.hpp @@ -115,6 +115,10 @@ namespace boost { using insert_return_type = detail::foa::insert_return_type; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats = typename table_type::stats; +#endif + unordered_node_map() : unordered_node_map(0) {} explicit unordered_node_map(size_type n, hasher const& h = hasher(), @@ -749,6 +753,15 @@ namespace boost { void reserve(size_type n) { table_.reserve(n); } +#if defined(BOOST_UNORDERED_ENABLE_STATS) + /// Stats + /// + + stats get_stats() const { return table_.get_stats(); } + + void reset_stats() { table_.reset_stats(); } +#endif + /// Observers /// diff --git a/include/boost/unordered/unordered_node_set.hpp b/include/boost/unordered/unordered_node_set.hpp index 14b44bed..9da992ec 100644 --- a/include/boost/unordered/unordered_node_set.hpp +++ b/include/boost/unordered/unordered_node_set.hpp @@ -105,6 +105,10 @@ namespace boost { using insert_return_type = detail::foa::insert_return_type; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats = typename table_type::stats; +#endif + unordered_node_set() : unordered_node_set(0) {} explicit unordered_node_set(size_type n, hasher const& h = hasher(), @@ -563,6 +567,15 @@ namespace boost { void reserve(size_type n) { table_.reserve(n); } +#if defined(BOOST_UNORDERED_ENABLE_STATS) + /// Stats + /// + + stats get_stats() const { return table_.get_stats(); } + + void reset_stats() { table_.reset_stats(); } +#endif + /// Observers /// From 443113840da3dc00e784169bdec653cae56c9f27 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Mon, 29 Apr 2024 12:06:28 +0200 Subject: [PATCH 04/39] added stats to foa::concurrent_table --- .../unordered/detail/foa/concurrent_table.hpp | 33 ++++++++++++++++--- include/boost/unordered/detail/foa/core.hpp | 26 +++++++++++++-- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/include/boost/unordered/detail/foa/concurrent_table.hpp b/include/boost/unordered/detail/foa/concurrent_table.hpp index 218a78a4..e1d625dd 100644 --- a/include/boost/unordered/detail/foa/concurrent_table.hpp +++ b/include/boost/unordered/detail/foa/concurrent_table.hpp @@ -469,6 +469,10 @@ public: using size_type=typename super::size_type; static constexpr std::size_t bulk_visit_size=16; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats=typename super::stats; +#endif + private: template using enable_if_is_value_type=typename std::enable_if< @@ -965,6 +969,13 @@ public: super::reserve(n); } +#if defined(BOOST_UNORDERED_ENABLE_STATS) + /* already thread safe */ + + using super::get_stats; + using super::reset_stats; +#endif + template friend std::size_t erase_if(concurrent_table& x,Predicate&& pr) { @@ -1186,6 +1197,7 @@ private: GroupAccessMode access_mode, const Key& x,std::size_t pos0,std::size_t hash,F&& f)const { + BOOST_UNORDERED_STATS_COUNTER(num_cmps); prober pb(pos0); do{ auto pos=pb.get(); @@ -1197,19 +1209,30 @@ private: auto lck=access(access_mode,pos); do{ auto n=unchecked_countr_zero(mask); - if(BOOST_LIKELY( - pg->is_occupied(n)&&bool(this->pred()(x,this->key_from(p[n]))))){ - f(pg,n,p+n); - return 1; + if(BOOST_LIKELY(pg->is_occupied(n))){ + BOOST_UNORDERED_INCREMENT_STATS_COUNTER(num_cmps); + if(BOOST_LIKELY(bool(this->pred()(x,this->key_from(p[n]))))){ + f(pg,n,p+n); + BOOST_UNORDERED_ADD_STATS( + this->successful_lookup_cumulative_stats(), + (pb.length(),num_cmps)); + return 1; + } } mask&=mask-1; }while(mask); } if(BOOST_LIKELY(pg->is_not_overflowed(hash))){ + BOOST_UNORDERED_ADD_STATS( + this->unsuccessful_lookup_cumulative_stats(), + (pb.length(),num_cmps)); return 0; } } while(BOOST_LIKELY(pb.next(this->arrays.groups_size_mask))); + BOOST_UNORDERED_ADD_STATS( + this->unsuccessful_lookup_cumulative_stats(), + (pb.length(),num_cmps)); return 0; } @@ -1490,6 +1513,8 @@ private: this->construct_element(p,std::forward(args)...); rslot.commit(); rsize.commit(); + BOOST_UNORDERED_ADD_STATS( + this->insertion_cumulative_stats(),(pb.length())); return 1; } pg->mark_overflow(hash); diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index 6fcc746a..927d8204 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1435,6 +1435,9 @@ public: #if defined(BOOST_UNORDERED_ENABLE_STATS) using stats=table_core_stats; + using cumulative_insertion_stats=concurrent_cumulative_stats<1>; + using cumulative_successful_lookup_stats=concurrent_cumulative_stats<2>; + using cumulative_unsuccessful_lookup_stats=concurrent_cumulative_stats<2>; #endif table_core( @@ -1814,6 +1817,23 @@ public: }; } + cumulative_insertion_stats& insertion_cumulative_stats()noexcept + { + return insertion_stats; + } + + cumulative_successful_lookup_stats& + successful_lookup_cumulative_stats()const noexcept + { + return successful_lookup_stats; + } + + cumulative_unsuccessful_lookup_stats& + unsuccessful_lookup_cumulative_stats()const noexcept + { + return unsuccessful_lookup_stats; + } + void reset_stats() { insertion_stats.reset(); @@ -2033,9 +2053,9 @@ public: size_ctrl_type size_ctrl; #if defined(BOOST_UNORDERED_ENABLE_STATS) - concurrent_cumulative_stats<1> insertion_stats; - mutable concurrent_cumulative_stats<2> successful_lookup_stats, - unsuccessful_lookup_stats; + cumulative_insertion_stats insertion_stats; + mutable cumulative_successful_lookup_stats successful_lookup_stats; + mutable cumulative_unsuccessful_lookup_stats unsuccessful_lookup_stats; #endif private: From 1ecb92deb566122fb4b3200d23259997f44d3db6 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Mon, 29 Apr 2024 12:06:50 +0200 Subject: [PATCH 05/39] added stats to boost::concurrent_flat_map --- include/boost/unordered/concurrent_flat_map.hpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/boost/unordered/concurrent_flat_map.hpp b/include/boost/unordered/concurrent_flat_map.hpp index 15f45de2..72101d7b 100644 --- a/include/boost/unordered/concurrent_flat_map.hpp +++ b/include/boost/unordered/concurrent_flat_map.hpp @@ -1,7 +1,7 @@ /* Fast open-addressing concurrent hashmap. * * Copyright 2023 Christian Mazakas. - * Copyright 2023 Joaquin M Lopez Munoz. + * Copyright 2023-2024 Joaquin M Lopez Munoz. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) @@ -75,6 +75,10 @@ namespace boost { typename boost::allocator_const_pointer::type; static constexpr size_type bulk_visit_size = table_type::bulk_visit_size; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats = typename table_type::stats; +#endif + concurrent_flat_map() : concurrent_flat_map(detail::foa::default_bucket_count) { @@ -714,6 +718,15 @@ namespace boost { void rehash(size_type n) { table_.rehash(n); } void reserve(size_type n) { table_.reserve(n); } +#if defined(BOOST_UNORDERED_ENABLE_STATS) + /// Stats + /// + + stats get_stats() const { return table_.get_stats(); } + + void reset_stats() { table_.reset_stats(); } +#endif + /// Observers /// allocator_type get_allocator() const noexcept From 18797a3f329fa9bc1d47c4f9a39deeef05b20960 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Mon, 29 Apr 2024 19:28:35 +0200 Subject: [PATCH 06/39] added noexcept guarantees to cumulative stats calculation --- include/boost/unordered/detail/cumulative_stats.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/boost/unordered/detail/cumulative_stats.hpp b/include/boost/unordered/detail/cumulative_stats.hpp index 18f96d39..8be500c1 100644 --- a/include/boost/unordered/detail/cumulative_stats.hpp +++ b/include/boost/unordered/detail/cumulative_stats.hpp @@ -44,8 +44,12 @@ struct cumulative_stats_data struct welfords_algorithm /* 0-based */ { template - int operator()(T&& x,cumulative_stats_data& d)const + int operator()(T&& x,cumulative_stats_data& d)const noexcept { + static_assert( + noexcept(static_cast(x)), + "Argument conversion to double must not throw."); + d.m_prior=d.m; d.m+=(static_cast(x)-d.m)/static_cast(n); d.s+=(n!=1)* @@ -68,7 +72,7 @@ public: void reset()noexcept{*this=cumulative_stats();} template - void add(Ts&&... xs) + void add(Ts&&... xs)noexcept { static_assert( sizeof...(Ts)==N,"A sample must be provided for each sequence."); @@ -122,7 +126,7 @@ public: } template - void add(Ts&&... xs) + void add(Ts&&... xs)noexcept { lock_guard lck{mut}; super::add(std::forward(xs)...); From 76c460a703e7bf74c49e6bc489f2ff3ea1fcf563 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Mon, 29 Apr 2024 20:30:16 +0200 Subject: [PATCH 07/39] grouped foa::table_core cumulative stats in one member --- .../unordered/detail/foa/concurrent_table.hpp | 9 ++- include/boost/unordered/detail/foa/core.hpp | 61 ++++++++----------- 2 files changed, 30 insertions(+), 40 deletions(-) diff --git a/include/boost/unordered/detail/foa/concurrent_table.hpp b/include/boost/unordered/detail/foa/concurrent_table.hpp index e1d625dd..57a309d3 100644 --- a/include/boost/unordered/detail/foa/concurrent_table.hpp +++ b/include/boost/unordered/detail/foa/concurrent_table.hpp @@ -1214,7 +1214,7 @@ private: if(BOOST_LIKELY(bool(this->pred()(x,this->key_from(p[n]))))){ f(pg,n,p+n); BOOST_UNORDERED_ADD_STATS( - this->successful_lookup_cumulative_stats(), + this->get_cumulative_stats().successful_lookup, (pb.length(),num_cmps)); return 1; } @@ -1224,15 +1224,14 @@ private: } if(BOOST_LIKELY(pg->is_not_overflowed(hash))){ BOOST_UNORDERED_ADD_STATS( - this->unsuccessful_lookup_cumulative_stats(), + this->get_cumulative_stats().unsuccessful_lookup, (pb.length(),num_cmps)); return 0; } } while(BOOST_LIKELY(pb.next(this->arrays.groups_size_mask))); BOOST_UNORDERED_ADD_STATS( - this->unsuccessful_lookup_cumulative_stats(), - (pb.length(),num_cmps)); + this->get_cumulative_stats().unsuccessful_lookup,(pb.length(),num_cmps)); return 0; } @@ -1514,7 +1513,7 @@ private: rslot.commit(); rsize.commit(); BOOST_UNORDERED_ADD_STATS( - this->insertion_cumulative_stats(),(pb.length())); + this->get_cumulative_stats().insertion,(pb.length())); return 1; } pg->mark_overflow(hash); diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index 927d8204..fa66961f 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1133,6 +1133,13 @@ struct table_arrays #if defined(BOOST_UNORDERED_ENABLE_STATS) /* stats support */ +struct table_core_cumulative_stats +{ + cumulative_stats<1> insertion; + cumulative_stats<2> successful_lookup, + unsuccessful_lookup; +}; + struct table_core_insertion_stats { cumulative_stats_summary probe_length; @@ -1434,10 +1441,8 @@ public: using arrays_holder_type=arrays_holder; #if defined(BOOST_UNORDERED_ENABLE_STATS) + using cumulative_stats=table_core_cumulative_stats; using stats=table_core_stats; - using cumulative_insertion_stats=concurrent_cumulative_stats<1>; - using cumulative_successful_lookup_stats=concurrent_cumulative_stats<2>; - using cumulative_unsuccessful_lookup_stats=concurrent_cumulative_stats<2>; #endif table_core( @@ -1700,7 +1705,7 @@ public: auto n=unchecked_countr_zero(mask); if(BOOST_LIKELY(bool(pred()(x,key_from(p[n]))))){ BOOST_UNORDERED_ADD_STATS( - successful_lookup_stats,(pb.length(),num_cmps)); + get_cumulative_stats().successful_lookup,(pb.length(),num_cmps)); return {pg,n,p+n}; } mask&=mask-1; @@ -1708,13 +1713,13 @@ public: } if(BOOST_LIKELY(pg->is_not_overflowed(hash))){ BOOST_UNORDERED_ADD_STATS( - unsuccessful_lookup_stats,(pb.length(),num_cmps)); + get_cumulative_stats().unsuccessful_lookup,(pb.length(),num_cmps)); return {}; } } while(BOOST_LIKELY(pb.next(arrays.groups_size_mask))); BOOST_UNORDERED_ADD_STATS( - unsuccessful_lookup_stats,(pb.length(),num_cmps)); + get_cumulative_stats().unsuccessful_lookup,(pb.length(),num_cmps)); return {}; } @@ -1804,41 +1809,28 @@ public: { return { { - insertion_stats.get_summary<0>() + cstats.insertion.get_summary<0>() }, { - successful_lookup_stats.get_summary<0>(), - successful_lookup_stats.get_summary<1>() + cstats.successful_lookup.get_summary<0>(), + cstats.successful_lookup.get_summary<1>() }, { - unsuccessful_lookup_stats.get_summary<0>(), - unsuccessful_lookup_stats.get_summary<1>() + cstats.unsuccessful_lookup.get_summary<0>(), + cstats.unsuccessful_lookup.get_summary<1>() } }; } - cumulative_insertion_stats& insertion_cumulative_stats()noexcept + cumulative_stats& get_cumulative_stats()const noexcept { - return insertion_stats; + return cstats; } - - cumulative_successful_lookup_stats& - successful_lookup_cumulative_stats()const noexcept - { - return successful_lookup_stats; - } - - cumulative_unsuccessful_lookup_stats& - unsuccessful_lookup_cumulative_stats()const noexcept - { - return unsuccessful_lookup_stats; - } - void reset_stats() { - insertion_stats.reset(); - successful_lookup_stats.reset(); - unsuccessful_lookup_stats.reset(); + cstats.insertion.reset(); + cstats.successful_lookup.reset(); + cstats.unsuccessful_lookup.reset(); } #endif @@ -2049,13 +2041,11 @@ public: return true; } - arrays_type arrays; - size_ctrl_type size_ctrl; + arrays_type arrays; + size_ctrl_type size_ctrl; #if defined(BOOST_UNORDERED_ENABLE_STATS) - cumulative_insertion_stats insertion_stats; - mutable cumulative_successful_lookup_stats successful_lookup_stats; - mutable cumulative_unsuccessful_lookup_stats unsuccessful_lookup_stats; + mutable cumulative_stats cstats; #endif private: @@ -2345,7 +2335,8 @@ private: auto p=arrays_.elements()+pos*N+n; construct_element(p,std::forward(args)...); pg->set(n,hash); - BOOST_UNORDERED_ADD_STATS(insertion_stats,(pb.length())); + BOOST_UNORDERED_ADD_STATS( + get_cumulative_stats().insertion,(pb.length())); return {pg,n,p}; } else pg->mark_overflow(hash); From 9806e75cc912e504f95b4cee82d628d6211df20c Mon Sep 17 00:00:00 2001 From: joaquintides Date: Mon, 29 Apr 2024 20:32:27 +0200 Subject: [PATCH 08/39] added stats to boost::concurrent_flat_set --- include/boost/unordered/concurrent_flat_set.hpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/boost/unordered/concurrent_flat_set.hpp b/include/boost/unordered/concurrent_flat_set.hpp index 57f20cdd..473d993f 100644 --- a/include/boost/unordered/concurrent_flat_set.hpp +++ b/include/boost/unordered/concurrent_flat_set.hpp @@ -1,7 +1,7 @@ /* Fast open-addressing concurrent hashset. * * Copyright 2023 Christian Mazakas. - * Copyright 2023 Joaquin M Lopez Munoz. + * Copyright 2023-2024 Joaquin M Lopez Munoz. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) @@ -72,6 +72,10 @@ namespace boost { typename boost::allocator_const_pointer::type; static constexpr size_type bulk_visit_size = table_type::bulk_visit_size; +#if defined(BOOST_UNORDERED_ENABLE_STATS) + using stats = typename table_type::stats; +#endif + concurrent_flat_set() : concurrent_flat_set(detail::foa::default_bucket_count) { @@ -582,6 +586,15 @@ namespace boost { void rehash(size_type n) { table_.rehash(n); } void reserve(size_type n) { table_.reserve(n); } +#if defined(BOOST_UNORDERED_ENABLE_STATS) + /// Stats + /// + + stats get_stats() const { return table_.get_stats(); } + + void reset_stats() { table_.reset_stats(); } +#endif + /// Observers /// allocator_type get_allocator() const noexcept From 2d8fd43cc4ec85122d73574517a21d1175c38b49 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Tue, 30 Apr 2024 17:25:10 +0200 Subject: [PATCH 09/39] editorial --- include/boost/unordered/concurrent_flat_map.hpp | 3 +-- include/boost/unordered/concurrent_flat_set.hpp | 3 +-- include/boost/unordered/unordered_flat_map.hpp | 3 +-- include/boost/unordered/unordered_flat_set.hpp | 3 +-- include/boost/unordered/unordered_node_map.hpp | 3 +-- include/boost/unordered/unordered_node_set.hpp | 3 +-- 6 files changed, 6 insertions(+), 12 deletions(-) diff --git a/include/boost/unordered/concurrent_flat_map.hpp b/include/boost/unordered/concurrent_flat_map.hpp index 72101d7b..82689497 100644 --- a/include/boost/unordered/concurrent_flat_map.hpp +++ b/include/boost/unordered/concurrent_flat_map.hpp @@ -720,8 +720,7 @@ namespace boost { #if defined(BOOST_UNORDERED_ENABLE_STATS) /// Stats - /// - + /// stats get_stats() const { return table_.get_stats(); } void reset_stats() { table_.reset_stats(); } diff --git a/include/boost/unordered/concurrent_flat_set.hpp b/include/boost/unordered/concurrent_flat_set.hpp index 473d993f..c4e8a969 100644 --- a/include/boost/unordered/concurrent_flat_set.hpp +++ b/include/boost/unordered/concurrent_flat_set.hpp @@ -588,8 +588,7 @@ namespace boost { #if defined(BOOST_UNORDERED_ENABLE_STATS) /// Stats - /// - + /// stats get_stats() const { return table_.get_stats(); } void reset_stats() { table_.reset_stats(); } diff --git a/include/boost/unordered/unordered_flat_map.hpp b/include/boost/unordered/unordered_flat_map.hpp index dad235ad..20e60d96 100644 --- a/include/boost/unordered/unordered_flat_map.hpp +++ b/include/boost/unordered/unordered_flat_map.hpp @@ -660,8 +660,7 @@ namespace boost { #if defined(BOOST_UNORDERED_ENABLE_STATS) /// Stats - /// - + /// stats get_stats() const { return table_.get_stats(); } void reset_stats() { table_.reset_stats(); } diff --git a/include/boost/unordered/unordered_flat_set.hpp b/include/boost/unordered/unordered_flat_set.hpp index 4f595ef2..f1c09c31 100644 --- a/include/boost/unordered/unordered_flat_set.hpp +++ b/include/boost/unordered/unordered_flat_set.hpp @@ -480,8 +480,7 @@ namespace boost { #if defined(BOOST_UNORDERED_ENABLE_STATS) /// Stats - /// - + /// stats get_stats() const { return table_.get_stats(); } void reset_stats() { table_.reset_stats(); } diff --git a/include/boost/unordered/unordered_node_map.hpp b/include/boost/unordered/unordered_node_map.hpp index 9a0afe7b..33b91316 100644 --- a/include/boost/unordered/unordered_node_map.hpp +++ b/include/boost/unordered/unordered_node_map.hpp @@ -755,8 +755,7 @@ namespace boost { #if defined(BOOST_UNORDERED_ENABLE_STATS) /// Stats - /// - + /// stats get_stats() const { return table_.get_stats(); } void reset_stats() { table_.reset_stats(); } diff --git a/include/boost/unordered/unordered_node_set.hpp b/include/boost/unordered/unordered_node_set.hpp index 9da992ec..b9ee0057 100644 --- a/include/boost/unordered/unordered_node_set.hpp +++ b/include/boost/unordered/unordered_node_set.hpp @@ -569,8 +569,7 @@ namespace boost { #if defined(BOOST_UNORDERED_ENABLE_STATS) /// Stats - /// - + /// stats get_stats() const { return table_.get_stats(); } void reset_stats() { table_.reset_stats(); } From 648f6fd23e236d3726f5976339d150f6108a9ab0 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Tue, 30 Apr 2024 17:32:53 +0200 Subject: [PATCH 10/39] protected cumulative_stats against count wraparound --- include/boost/unordered/detail/cumulative_stats.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/boost/unordered/detail/cumulative_stats.hpp b/include/boost/unordered/detail/cumulative_stats.hpp index 8be500c1..288f752c 100644 --- a/include/boost/unordered/detail/cumulative_stats.hpp +++ b/include/boost/unordered/detail/cumulative_stats.hpp @@ -77,8 +77,12 @@ public: static_assert( sizeof...(Ts)==N,"A sample must be provided for each sequence."); + if(BOOST_UNLIKELY(++n==0)){ /* wraparound */ + reset(); + n=1; + } mp11::tuple_transform( - welfords_algorithm{++n}, + welfords_algorithm{n}, std::forward_as_tuple(std::forward(xs)...), data); } From 83abd9cc4ed7ee37a1089cb8883e087c7873c9f5 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Tue, 30 Apr 2024 17:49:52 +0200 Subject: [PATCH 11/39] avoided -Wextra-semi-stmt warning --- include/boost/unordered/detail/foa/core.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index fa66961f..693bcc3f 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1164,9 +1164,9 @@ struct table_core_stats #else -#define BOOST_UNORDERED_ADD_STATS(stats,args) -#define BOOST_UNORDERED_STATS_COUNTER(name) -#define BOOST_UNORDERED_INCREMENT_STATS_COUNTER(name) +#define BOOST_UNORDERED_ADD_STATS(stats,args) ((void)0) +#define BOOST_UNORDERED_STATS_COUNTER(name) ((void)0) +#define BOOST_UNORDERED_INCREMENT_STATS_COUNTER(name) ((void)0) #endif From 5feb7459ee0a6c2b511bbe7f981a8a5ea904311e Mon Sep 17 00:00:00 2001 From: joaquintides Date: Tue, 30 Apr 2024 19:18:15 +0200 Subject: [PATCH 12/39] added stat counts --- include/boost/unordered/detail/cumulative_stats.hpp | 2 ++ include/boost/unordered/detail/foa/core.hpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/include/boost/unordered/detail/cumulative_stats.hpp b/include/boost/unordered/detail/cumulative_stats.hpp index 288f752c..21edba9f 100644 --- a/include/boost/unordered/detail/cumulative_stats.hpp +++ b/include/boost/unordered/detail/cumulative_stats.hpp @@ -87,6 +87,8 @@ public: data); } + std::size_t count()const noexcept{return n;} + template cumulative_stats_summary get_summary()const noexcept { diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index 693bcc3f..b3feb877 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1142,11 +1142,13 @@ struct table_core_cumulative_stats struct table_core_insertion_stats { + std::size_t count; cumulative_stats_summary probe_length; }; struct table_core_lookup_stats { + std::size_t count; cumulative_stats_summary probe_length; cumulative_stats_summary num_comparisons; }; @@ -1809,13 +1811,16 @@ public: { return { { + cstats.insertion.count(), cstats.insertion.get_summary<0>() }, { + cstats.successful_lookup.count(), cstats.successful_lookup.get_summary<0>(), cstats.successful_lookup.get_summary<1>() }, { + cstats.unsuccessful_lookup.count(), cstats.unsuccessful_lookup.get_summary<0>(), cstats.unsuccessful_lookup.get_summary<1>() } @@ -1826,6 +1831,7 @@ public: { return cstats; } + void reset_stats() { cstats.insertion.reset(); From d01ae76074c40ec3a50d7ca45d768ac1ee3a5775 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 16:34:53 +0200 Subject: [PATCH 13/39] grouped cumulative_stats summarization in one operation --- .../unordered/detail/cumulative_stats.hpp | 55 +++++++++++-------- include/boost/unordered/detail/foa/core.hpp | 39 +++++++------ 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/include/boost/unordered/detail/cumulative_stats.hpp b/include/boost/unordered/detail/cumulative_stats.hpp index 21edba9f..51a1b5ed 100644 --- a/include/boost/unordered/detail/cumulative_stats.hpp +++ b/include/boost/unordered/detail/cumulative_stats.hpp @@ -27,14 +27,7 @@ namespace detail{ * running sequences. */ -struct cumulative_stats_summary -{ - double average; - double variance; - double deviation; -}; - -struct cumulative_stats_data +struct sequence_stats_data { double m=0.0; double m_prior=0.0; @@ -44,7 +37,7 @@ struct cumulative_stats_data struct welfords_algorithm /* 0-based */ { template - int operator()(T&& x,cumulative_stats_data& d)const noexcept + int operator()(T&& x,sequence_stats_data& d)const noexcept { static_assert( noexcept(static_cast(x)), @@ -61,14 +54,27 @@ struct welfords_algorithm /* 0-based */ std::size_t n; }; +struct sequence_stats_summary +{ + double average; + double variance; + double deviation; +}; + /* Stats calculated jointly for N same-sized sequences to save the space - * for n. + * for count. */ template class cumulative_stats { public: + struct summary + { + std::size_t count; + std::array sequence_summary; + }; + void reset()noexcept{*this=cumulative_stats();} template @@ -87,20 +93,22 @@ public: data); } - std::size_t count()const noexcept{return n;} - - template - cumulative_stats_summary get_summary()const noexcept + summary get_summary()const noexcept { - double average=data[I].m, - variance=n!=0?data[I].s/static_cast(n):0.0, /* biased */ - deviation=std::sqrt(variance); - return {average,variance,deviation}; + summary res; + res.count=n; + for(std::size_t i=0;i(n):0.0, /* biased */ + deviation=std::sqrt(variance); + res.sequence_summary[i]={average,variance,deviation}; + } + return res; } private: - std::size_t n=0; - std::array data; + std::size_t n=0; + std::array data; }; #if defined(BOOST_HAS_THREADS) @@ -112,6 +120,8 @@ class concurrent_cumulative_stats:cumulative_stats using lock_guard=std::lock_guard; public: + using summary=super::summary; + concurrent_cumulative_stats()noexcept:super{}{} concurrent_cumulative_stats(const concurrent_cumulative_stats& x)noexcept: concurrent_cumulative_stats{x,lock_guard{x.mut}}{} @@ -138,11 +148,10 @@ public: super::add(std::forward(xs)...); } - template - cumulative_stats_summary get_summary()const noexcept + summary get_summary()const noexcept { lock_guard lck{mut}; - return super::template get_summary(); + return super::get_summary(); } private: diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index b3feb877..49ca91e9 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1135,22 +1135,22 @@ struct table_arrays struct table_core_cumulative_stats { - cumulative_stats<1> insertion; - cumulative_stats<2> successful_lookup, - unsuccessful_lookup; + concurrent_cumulative_stats<1> insertion; + concurrent_cumulative_stats<2> successful_lookup, + unsuccessful_lookup; }; struct table_core_insertion_stats { - std::size_t count; - cumulative_stats_summary probe_length; + std::size_t count; + sequence_stats_summary probe_length; }; struct table_core_lookup_stats { - std::size_t count; - cumulative_stats_summary probe_length; - cumulative_stats_summary num_comparisons; + std::size_t count; + sequence_stats_summary probe_length; + sequence_stats_summary num_comparisons; }; struct table_core_stats @@ -1809,21 +1809,24 @@ public: #if defined(BOOST_UNORDERED_ENABLE_STATS) stats get_stats()const { - return { + auto insertion=cstats.insertion.get_summary(); + auto successful_lookup=cstats.successful_lookup.get_summary(); + auto unsuccessful_lookup=cstats.unsuccessful_lookup.get_summary(); + return{ { - cstats.insertion.count(), - cstats.insertion.get_summary<0>() + insertion.count, + insertion.sequence_summary[0] }, { - cstats.successful_lookup.count(), - cstats.successful_lookup.get_summary<0>(), - cstats.successful_lookup.get_summary<1>() + successful_lookup.count, + successful_lookup.sequence_summary[0], + successful_lookup.sequence_summary[1] }, { - cstats.unsuccessful_lookup.count(), - cstats.unsuccessful_lookup.get_summary<0>(), - cstats.unsuccessful_lookup.get_summary<1>() - } + unsuccessful_lookup.count, + unsuccessful_lookup.sequence_summary[0], + unsuccessful_lookup.sequence_summary[1] + }, }; } From e588e04a1bc85425a1bb6abc61bcd2f092a9c1ea Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 18:11:01 +0200 Subject: [PATCH 14/39] swapped stats on move construction/assignment --- include/boost/unordered/detail/foa/core.hpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index 49ca91e9..cfba7dc3 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1161,12 +1161,16 @@ struct table_core_stats }; #define BOOST_UNORDERED_ADD_STATS(stats,args) stats.add args +#define BOOST_UNORDERED_SWAP_STATS(stats1,stats2) std::swap(stats1,stats2) +#define BOOST_UNORDERED_RESET_STATS_OF(x) x.reset_stats() #define BOOST_UNORDERED_STATS_COUNTER(name) std::size_t name=0 #define BOOST_UNORDERED_INCREMENT_STATS_COUNTER(name) ++name #else #define BOOST_UNORDERED_ADD_STATS(stats,args) ((void)0) +#define BOOST_UNORDERED_SWAP_STATS(stats1,stats2) ((void)0) +#define BOOST_UNORDERED_RESET_STATS_OF(x) ((void)0) #define BOOST_UNORDERED_STATS_COUNTER(name) ((void)0) #define BOOST_UNORDERED_INCREMENT_STATS_COUNTER(name) ((void)0) @@ -1481,6 +1485,7 @@ public: x.arrays=ah.release(); x.size_ctrl.ml=x.initial_max_load(); x.size_ctrl.size=0; + BOOST_UNORDERED_SWAP_STATS(cstats,x.cstats); } table_core(table_core&& x) @@ -1506,11 +1511,13 @@ public: using std::swap; swap(arrays,x.arrays); swap(size_ctrl,x.size_ctrl); + BOOST_UNORDERED_SWAP_STATS(cstats,x.cstats); } else{ reserve(x.size()); clear_on_exit c{x}; (void)c; /* unused var warning */ + BOOST_UNORDERED_RESET_STATS_OF(x); /* This works because subsequent x.clear() does not depend on the * elements' values. From 6215406c02fcb0acc36eb38ffeb9ecbc8715c17b Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 18:15:14 +0200 Subject: [PATCH 15/39] added tests for stats feature --- test/Jamfile.v2 | 2 + test/cfoa/stats_tests.cpp | 6 + test/unordered/stats_test.cpp | 255 ++++++++++++++++++++++++++++++++++ 3 files changed, 263 insertions(+) create mode 100644 test/cfoa/stats_tests.cpp create mode 100644 test/unordered/stats_test.cpp diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index f487247a..dc2cccca 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -228,6 +228,7 @@ local FOA_TESTS = hash_is_avalanching_test fancy_pointer_noleak pmr_allocator_tests + stats_tests ; for local test in $(FOA_TESTS) @@ -333,6 +334,7 @@ local CFOA_TESTS = reentrancy_check_test explicit_alloc_ctor_tests pmr_allocator_tests + stats_tests ; for local test in $(CFOA_TESTS) diff --git a/test/cfoa/stats_tests.cpp b/test/cfoa/stats_tests.cpp new file mode 100644 index 00000000..043f7570 --- /dev/null +++ b/test/cfoa/stats_tests.cpp @@ -0,0 +1,6 @@ +// Copyright 2024 Joaquin M Lopez Muoz. +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_UNORDERED_CFOA_TESTS +#include "../unordered/stats_tests.cpp" diff --git a/test/unordered/stats_test.cpp b/test/unordered/stats_test.cpp new file mode 100644 index 00000000..500104cc --- /dev/null +++ b/test/unordered/stats_test.cpp @@ -0,0 +1,255 @@ +// Copyright 2024 Joaquin M Lopez Muoz. +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_UNORDERED_ENABLE_STATS + +#ifdef BOOST_UNORDERED_CFOA_TESTS +#include +#include +#include "../cfoa/helpers.hpp" +#else +#include "../helpers/unordered.hpp" +#endif + +#include "../helpers/helpers.hpp" +#include "../helpers/random_values.hpp" +#include "../helpers/test.hpp" + +template struct unequal_allocator +{ + typedef T value_type; + + unequal_allocator(int n_ = 0): n{n_} {} + unequal_allocator(unequal_allocator const&) = default; + unequal_allocator(unequal_allocator&&) = default; + + template + unequal_allocator(unequal_allocator const& x): n{x.n} {} + + BOOST_ATTRIBUTE_NODISCARD T* allocate(std::size_t n) + { + return static_cast(::operator new(n * sizeof(T))); + } + + void deallocate(T* p, std::size_t) noexcept { ::operator delete(p); } + + bool operator==(unequal_allocator const& x) const { return n == x.n; } + bool operator!=(unequal_allocator const& x) const { return n != x.n; } + + int n; +}; + +template void check_stat(const Stats& s, bool full) +{ + if (full) { + BOOST_TEST_NE(s.average, 0.0); + if(s.variance) { + BOOST_TEST_NE(s.deviation, 0.0); + } + } + else { + BOOST_TEST_EQ(s.average, 0.0); + BOOST_TEST_EQ(s.variance, 0.0); + BOOST_TEST_EQ(s.deviation, 0.0); + } +} + +template void check_stat(const Stats& s1, const Stats& s2) +{ + BOOST_TEST_EQ(s1.average, s2.average); + BOOST_TEST_EQ(s1.variance, s2.variance); + BOOST_TEST_EQ(s1.deviation, s2.deviation); +} + +template void check_insertion_stats(const Stats& s, bool full) +{ + if (full) { + BOOST_TEST_NE(s.count, 0); + } + else { + BOOST_TEST_EQ(s.count, 0); + } + check_stat(s.probe_length, full); +} + +template +void check_insertion_stats(const Stats& s1, const Stats& s2) +{ + BOOST_TEST_EQ(s1.count, s2.count); + check_stat(s1.probe_length, s2.probe_length); +} + +template void check_lookup_stats(const Stats& s, bool full) +{ + if (full) { + BOOST_TEST_NE(s.count, 0); + } + else { + BOOST_TEST_EQ(s.count, 0); + } + check_stat(s.probe_length, full); + check_stat(s.num_comparisons, full); +} + +template +void check_lookup_stats(const Stats& s1, const Stats& s2) +{ + BOOST_TEST_EQ(s1.count, s2.count); + check_stat(s1.probe_length, s2.probe_length); + check_stat(s1.num_comparisons, s2.num_comparisons); +} + +template void check_container_stats(const Stats& s, bool full) +{ + check_insertion_stats(s.insertion, full); + check_lookup_stats(s.successful_lookup, full); + check_lookup_stats(s.unsuccessful_lookup, full); +} + +template +void check_container_stats(const Stats& s1, const Stats& s2) +{ + check_insertion_stats(s1.insertion, s2.insertion); + check_lookup_stats(s1.successful_lookup, s2.successful_lookup); + check_lookup_stats(s1.unsuccessful_lookup, s2.unsuccessful_lookup); +} + +template void test_stats() +{ + using value_type = Container::value_type; + using allocator_type = Container::allocator_type; + using stats = Container::stats; + const bool full = true, empty = false; + + Container c; + const Container& cc = c; + + stats s = cc.get_stats(); + check_container_stats(s, empty); + + test::reset_sequence(); + +#if defined(BOOST_UNORDERED_CFOA_TESTS) + + test::random_values l(10000, test::sequential); + std::vector v(l.begin(), l.end()); + thread_runner(v, [&c](boost::span s) { + for (auto const& x : s) { + c.insert(x); + } + }); + +#else + + test::random_values v(10000, test::sequential); + c.insert(v.begin(),v.end()); + +#endif + + s = cc.get_stats(); + check_insertion_stats(s.insertion, full); + check_lookup_stats(s.successful_lookup, empty); + check_lookup_stats(s.unsuccessful_lookup, full); + +#if !defined(BOOST_UNORDERED_CFOA_TESTS) + // Due to rehashing, may not hold in concurrent containers + // because of insertion retries + BOOST_TEST_GT( + s.insertion.count, s.unsuccessful_lookup.count); +#endif + + c.reset_stats(); + s = cc.get_stats(); + check_container_stats(s, empty); + + test::reset_sequence(); + +#if defined(BOOST_UNORDERED_CFOA_TESTS) + + test::random_values l2(15000, test::sequential); + std::vector v2(l2.begin(), l2.end()); + std::atomic found = 0, not_found = 0; + thread_runner(v2, [&cc, &found, ¬_found](boost::span s) { + for (auto const& x : s) { + if(cc.contains(test::get_key(x))) ++found; + else ++not_found; + } + }); + +#else + + test::random_values v2(15000, test::sequential); + int found = 0, not_found = 0; + for (const auto& x: v2) { + if (cc.contains(test::get_key(x))) ++found; + else ++not_found; + } + +#endif + + s=cc.get_stats(); + check_lookup_stats(s.successful_lookup, full); + check_lookup_stats(s.unsuccessful_lookup, full); + BOOST_TEST_EQ(s.successful_lookup.count, found); + BOOST_TEST_EQ(s.unsuccessful_lookup.count, not_found); + + c.reset_stats(); + s = cc.get_stats(); + check_container_stats(s, empty); + + test::reset_sequence(); + test::random_values v3(1000, test::sequential); + c.clear(); + c.insert(v.begin(),v.end()); + c.insert(v.begin(),v.end()); // produces successful lookups + + s = cc.get_stats(); + Container c2 = std::move(c); + check_container_stats(c.get_stats(), empty); + check_container_stats(c2.get_stats(), s); + + Container c3(std::move(c2), allocator_type()); + check_container_stats(c2.get_stats(), empty); + check_container_stats(c3.get_stats(), s); + + Container c4(std::move(c3), allocator_type(1)); + check_container_stats(c3.get_stats(), empty); + check_insertion_stats(c4.get_stats().insertion, full); + check_lookup_stats(c4.get_stats().successful_lookup, empty); + check_lookup_stats(c4.get_stats().unsuccessful_lookup, empty); + + // TODO: move assignment + // TODO: concurrent<->unordered interop +} + +UNORDERED_AUTO_TEST (stats) { +#if defined(BOOST_UNORDERED_CFOA_TESTS) + test_stats< + boost::concurrent_flat_map< + int, int, boost::hash, std::equal_to, + unequal_allocator< std::pair< const int, int> >>>(); + test_stats< + boost::concurrent_flat_set< + int, boost::hash, std::equal_to, unequal_allocator>>(); +#elif defined(BOOST_UNORDERED_FOA_TESTS) + test_stats< + boost::unordered_flat_map< + int, int, boost::hash, std::equal_to, + unequal_allocator< std::pair< const int, int> >>>(); + test_stats< + boost::unordered_flat_set< + int, boost::hash, std::equal_to, unequal_allocator>>(); + test_stats< + boost::unordered_node_map< + int, int, boost::hash, std::equal_to, + unequal_allocator< std::pair< const int, int> >>>(); + test_stats< + boost::unordered_node_set< + int, boost::hash, std::equal_to, unequal_allocator>>(); +#else + // Closed-addressing containers do not provide stats +#endif +} + +RUN_TESTS() From b0c480839baa307d154670d83ef89051a134c5bc Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 18:27:45 +0200 Subject: [PATCH 16/39] renamed test file --- test/unordered/{stats_test.cpp => stats_tests.cpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/unordered/{stats_test.cpp => stats_tests.cpp} (100%) diff --git a/test/unordered/stats_test.cpp b/test/unordered/stats_tests.cpp similarity index 100% rename from test/unordered/stats_test.cpp rename to test/unordered/stats_tests.cpp From 3b2eae07d4338fc5e2363ebb14b4f7caefaad1d1 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 20:28:12 +0200 Subject: [PATCH 17/39] avoided name hiding warning --- test/unordered/stats_tests.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 500104cc..e1041cd2 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -20,12 +20,12 @@ template struct unequal_allocator { typedef T value_type; - unequal_allocator(int n_ = 0): n{n_} {} + unequal_allocator(int n = 0): n_{n} {} unequal_allocator(unequal_allocator const&) = default; unequal_allocator(unequal_allocator&&) = default; template - unequal_allocator(unequal_allocator const& x): n{x.n} {} + unequal_allocator(unequal_allocator const& x): n_{x.n_} {} BOOST_ATTRIBUTE_NODISCARD T* allocate(std::size_t n) { @@ -34,10 +34,10 @@ template struct unequal_allocator void deallocate(T* p, std::size_t) noexcept { ::operator delete(p); } - bool operator==(unequal_allocator const& x) const { return n == x.n; } - bool operator!=(unequal_allocator const& x) const { return n != x.n; } + bool operator==(unequal_allocator const& x) const { return n_ == x.n_; } + bool operator!=(unequal_allocator const& x) const { return n_ != x.n_; } - int n; + int n_; }; template void check_stat(const Stats& s, bool full) From ee77a65fae51762e038380943c7e2404534dc2bd Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 20:32:12 +0200 Subject: [PATCH 18/39] added missing typename --- include/boost/unordered/detail/cumulative_stats.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/unordered/detail/cumulative_stats.hpp b/include/boost/unordered/detail/cumulative_stats.hpp index 51a1b5ed..1378c0db 100644 --- a/include/boost/unordered/detail/cumulative_stats.hpp +++ b/include/boost/unordered/detail/cumulative_stats.hpp @@ -120,7 +120,7 @@ class concurrent_cumulative_stats:cumulative_stats using lock_guard=std::lock_guard; public: - using summary=super::summary; + using summary=typename super::summary; concurrent_cumulative_stats()noexcept:super{}{} concurrent_cumulative_stats(const concurrent_cumulative_stats& x)noexcept: From 0c1b7199457cf1aef11f522eb5bf2755bfee6ce9 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 20:52:36 +0200 Subject: [PATCH 19/39] added missing typedefs --- test/unordered/stats_tests.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index e1041cd2..9c7ee544 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -117,9 +117,9 @@ void check_container_stats(const Stats& s1, const Stats& s2) template void test_stats() { - using value_type = Container::value_type; - using allocator_type = Container::allocator_type; - using stats = Container::stats; + using value_type = typename Container::value_type; + using allocator_type = typename Container::allocator_type; + using stats = typename Container::stats; const bool full = true, empty = false; Container c; From 7b0e4da61e4109137cfc865fadb73525b138dc6d Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 20:56:05 +0200 Subject: [PATCH 20/39] editorial --- test/unordered/stats_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 9c7ee544..b7b8e159 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -153,8 +153,8 @@ template void test_stats() check_lookup_stats(s.unsuccessful_lookup, full); #if !defined(BOOST_UNORDERED_CFOA_TESTS) - // Due to rehashing, may not hold in concurrent containers - // because of insertion retries + // Due to rehashing. + // May not hold in concurrent containers because of insertion retries BOOST_TEST_GT( s.insertion.count, s.unsuccessful_lookup.count); #endif From 04006547a2d4b6804b8c8ece958f3e717e0efd3d Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 20:57:02 +0200 Subject: [PATCH 21/39] editorial --- test/unordered/stats_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index b7b8e159..250b78eb 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -153,8 +153,8 @@ template void test_stats() check_lookup_stats(s.unsuccessful_lookup, full); #if !defined(BOOST_UNORDERED_CFOA_TESTS) - // Due to rehashing. - // May not hold in concurrent containers because of insertion retries + // Inequality due to rehashing. + // May not hold in concurrent containers because of insertion retries. BOOST_TEST_GT( s.insertion.count, s.unsuccessful_lookup.count); #endif From 83ab4b4b9fd548450682ea729811bc6081acd696 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 1 May 2024 21:32:49 +0200 Subject: [PATCH 22/39] avoided unused-local-type warning --- test/unordered/stats_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 250b78eb..98ca10c6 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -117,7 +117,6 @@ void check_container_stats(const Stats& s1, const Stats& s2) template void test_stats() { - using value_type = typename Container::value_type; using allocator_type = typename Container::allocator_type; using stats = typename Container::stats; const bool full = true, empty = false; @@ -131,6 +130,7 @@ template void test_stats() test::reset_sequence(); #if defined(BOOST_UNORDERED_CFOA_TESTS) + using value_type = typename Container::value_type; test::random_values l(10000, test::sequential); std::vector v(l.begin(), l.end()); From 90ca4fb07edcf65355342738cd70e57230889fed Mon Sep 17 00:00:00 2001 From: joaquintides Date: Thu, 2 May 2024 09:49:11 +0200 Subject: [PATCH 23/39] avoided float-equal warnings --- test/unordered/stats_tests.cpp | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 98ca10c6..651ac1eb 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -15,6 +15,7 @@ #include "../helpers/helpers.hpp" #include "../helpers/random_values.hpp" #include "../helpers/test.hpp" +#include template struct unequal_allocator { @@ -40,26 +41,39 @@ template struct unequal_allocator int n_; }; +bool exact_same(double x, double y) +{ + return std::memcmp( + reinterpret_cast(&x), reinterpret_cast(&y), + sizeof(double))==0; +} + +bool not_exact_same(double x, double y) +{ + return !exact_same(x, y); +} + template void check_stat(const Stats& s, bool full) { if (full) { - BOOST_TEST_NE(s.average, 0.0); - if(s.variance) { - BOOST_TEST_NE(s.deviation, 0.0); + BOOST_TEST_GT(s.average, 0.0); + if(not_exact_same(s.variance, 0.0)) { + BOOST_TEST_GT(s.variance, 0.0); + BOOST_TEST_GT(s.deviation, 0.0); } } else { - BOOST_TEST_EQ(s.average, 0.0); - BOOST_TEST_EQ(s.variance, 0.0); - BOOST_TEST_EQ(s.deviation, 0.0); + BOOST_TEST(exact_same(s.average, 0.0)); + BOOST_TEST(exact_same(s.variance, 0.0)); + BOOST_TEST(exact_same(s.deviation, 0.0)); } } template void check_stat(const Stats& s1, const Stats& s2) { - BOOST_TEST_EQ(s1.average, s2.average); - BOOST_TEST_EQ(s1.variance, s2.variance); - BOOST_TEST_EQ(s1.deviation, s2.deviation); + BOOST_TEST(exact_same(s1.average, s2.average)); + BOOST_TEST(exact_same(s1.variance, s2.variance)); + BOOST_TEST(exact_same(s1.deviation, s2.deviation)); } template void check_insertion_stats(const Stats& s, bool full) From 45c51840f7cc4b295494da0de549fd9ec4219490 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Thu, 2 May 2024 09:52:58 +0200 Subject: [PATCH 24/39] avoided shadowed declaration warning --- test/unordered/stats_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 651ac1eb..45afe5e2 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -132,13 +132,13 @@ void check_container_stats(const Stats& s1, const Stats& s2) template void test_stats() { using allocator_type = typename Container::allocator_type; - using stats = typename Container::stats; + using stats_type = typename Container::stats; const bool full = true, empty = false; Container c; const Container& cc = c; - stats s = cc.get_stats(); + stats_type s = cc.get_stats(); check_container_stats(s, empty); test::reset_sequence(); From d5605671bd26b25b32eb73c9fb61f94d3407b181 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Thu, 2 May 2024 10:22:14 +0200 Subject: [PATCH 25/39] avoided shadowed local variable warnings --- test/unordered/stats_tests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 45afe5e2..09d4bdbe 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -148,8 +148,8 @@ template void test_stats() test::random_values l(10000, test::sequential); std::vector v(l.begin(), l.end()); - thread_runner(v, [&c](boost::span s) { - for (auto const& x : s) { + thread_runner(v, [&c](boost::span sp) { + for (auto const& x : sp) { c.insert(x); } }); @@ -184,8 +184,8 @@ template void test_stats() test::random_values l2(15000, test::sequential); std::vector v2(l2.begin(), l2.end()); std::atomic found = 0, not_found = 0; - thread_runner(v2, [&cc, &found, ¬_found](boost::span s) { - for (auto const& x : s) { + thread_runner(v2, [&cc, &found, ¬_found](boost::span sp) { + for (auto const& x : sp) { if(cc.contains(test::get_key(x))) ++found; else ++not_found; } From 6212f2642c474d117054bff3fb4a7e9f1939264f Mon Sep 17 00:00:00 2001 From: joaquintides Date: Thu, 2 May 2024 11:15:29 +0200 Subject: [PATCH 26/39] avoided spurious check of deleted copy ctor --- test/unordered/stats_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 09d4bdbe..6f0dec20 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -183,7 +183,7 @@ template void test_stats() test::random_values l2(15000, test::sequential); std::vector v2(l2.begin(), l2.end()); - std::atomic found = 0, not_found = 0; + std::atomic found{0}, not_found{0}; thread_runner(v2, [&cc, &found, ¬_found](boost::span sp) { for (auto const& x : sp) { if(cc.contains(test::get_key(x))) ++found; From d121c91c8840ef2bae1b138cf700602a976d04a9 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Thu, 2 May 2024 12:48:48 +0200 Subject: [PATCH 27/39] avoided shadowed declaration warning --- test/unordered/stats_tests.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 6f0dec20..e4bc08d4 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -132,13 +132,13 @@ void check_container_stats(const Stats& s1, const Stats& s2) template void test_stats() { using allocator_type = typename Container::allocator_type; - using stats_type = typename Container::stats; + using stats = typename Container::stats; const bool full = true, empty = false; Container c; const Container& cc = c; - stats_type s = cc.get_stats(); + stats s = cc.get_stats(); check_container_stats(s, empty); test::reset_sequence(); @@ -237,7 +237,7 @@ template void test_stats() // TODO: concurrent<->unordered interop } -UNORDERED_AUTO_TEST (stats) { +UNORDERED_AUTO_TEST (stats_) { #if defined(BOOST_UNORDERED_CFOA_TESTS) test_stats< boost::concurrent_flat_map< From 8452b30608907f2de32eeb50a303c973fbc532fe Mon Sep 17 00:00:00 2001 From: joaquintides Date: Thu, 2 May 2024 20:41:27 +0200 Subject: [PATCH 28/39] implemented proper stats handling on move assignment --- include/boost/unordered/detail/foa/core.hpp | 5 + test/unordered/stats_tests.cpp | 110 ++++++++++++++------ 2 files changed, 81 insertions(+), 34 deletions(-) diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index cfba7dc3..ca69f7b3 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1162,6 +1162,7 @@ struct table_core_stats #define BOOST_UNORDERED_ADD_STATS(stats,args) stats.add args #define BOOST_UNORDERED_SWAP_STATS(stats1,stats2) std::swap(stats1,stats2) +#define BOOST_UNORDERED_COPY_STATS(stats1,stats2) stats1=stats2 #define BOOST_UNORDERED_RESET_STATS_OF(x) x.reset_stats() #define BOOST_UNORDERED_STATS_COUNTER(name) std::size_t name=0 #define BOOST_UNORDERED_INCREMENT_STATS_COUNTER(name) ++name @@ -1170,6 +1171,7 @@ struct table_core_stats #define BOOST_UNORDERED_ADD_STATS(stats,args) ((void)0) #define BOOST_UNORDERED_SWAP_STATS(stats1,stats2) ((void)0) +#define BOOST_UNORDERED_COPY_STATS(stats1,stats2) ((void)0) #define BOOST_UNORDERED_RESET_STATS_OF(x) ((void)0) #define BOOST_UNORDERED_STATS_COUNTER(name) ((void)0) #define BOOST_UNORDERED_INCREMENT_STATS_COUNTER(name) ((void)0) @@ -1633,9 +1635,11 @@ public: arrays=x.arrays; size_ctrl.ml=std::size_t(x.size_ctrl.ml); size_ctrl.size=std::size_t(x.size_ctrl.size); + BOOST_UNORDERED_COPY_STATS(cstats,x.cstats); x.arrays=ah.release(); x.size_ctrl.ml=x.initial_max_load(); x.size_ctrl.size=0; + BOOST_UNORDERED_RESET_STATS_OF(x); } else{ swap(h(),x.h()); @@ -1645,6 +1649,7 @@ public: noshrink_reserve(x.size()); clear_on_exit c{x}; (void)c; /* unused var warning */ + BOOST_UNORDERED_RESET_STATS_OF(x); /* This works because subsequent x.clear() does not depend on the * elements' values. diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index e4bc08d4..12ce1da5 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -129,6 +129,26 @@ void check_container_stats(const Stats& s1, const Stats& s2) check_lookup_stats(s1.unsuccessful_lookup, s2.unsuccessful_lookup); } +template void insert_n(Container& c, std::size_t n) +{ +#if defined(BOOST_UNORDERED_CFOA_TESTS) + using value_type = typename Container::value_type; + + test::reset_sequence(); + test::random_values l(n, test::sequential); + std::vector v(l.begin(), l.end()); + thread_runner(v, [&c](boost::span sp) { + for (auto const& x : sp) { + c.insert(x); + } + }); +#else + test::reset_sequence(); + test::random_values l(n, test::sequential); + c.insert(l.begin(), l.end()); +#endif +} + template void test_stats() { using allocator_type = typename Container::allocator_type; @@ -138,49 +158,36 @@ template void test_stats() Container c; const Container& cc = c; - stats s = cc.get_stats(); + // Stats initially empty + stats s = cc.get_stats(); // using cc -> get_stats() is const check_container_stats(s, empty); - test::reset_sequence(); - -#if defined(BOOST_UNORDERED_CFOA_TESTS) - using value_type = typename Container::value_type; - - test::random_values l(10000, test::sequential); - std::vector v(l.begin(), l.end()); - thread_runner(v, [&c](boost::span sp) { - for (auto const& x : sp) { - c.insert(x); - } - }); - -#else - - test::random_values v(10000, test::sequential); - c.insert(v.begin(),v.end()); - -#endif - + // Stats after insertion + insert_n(c, 10000); s = cc.get_stats(); - check_insertion_stats(s.insertion, full); - check_lookup_stats(s.successful_lookup, empty); - check_lookup_stats(s.unsuccessful_lookup, full); + check_insertion_stats(s.insertion, full); // insertions happened + check_lookup_stats(s.successful_lookup, empty); // no duplicate values + check_lookup_stats(s.unsuccessful_lookup, full); // from insertion #if !defined(BOOST_UNORDERED_CFOA_TESTS) - // Inequality due to rehashing. - // May not hold in concurrent containers because of insertion retries. + // Inequality due to rehashing + // May not hold in concurrent containers because of insertion retries BOOST_TEST_GT( s.insertion.count, s.unsuccessful_lookup.count); #endif + // resets_stats() actually clears stats c.reset_stats(); - s = cc.get_stats(); - check_container_stats(s, empty); + check_container_stats(cc.get_stats(), empty); + + // Stats after lookup test::reset_sequence(); #if defined(BOOST_UNORDERED_CFOA_TESTS) + using value_type = typename Container::value_type; + test::random_values l2(15000, test::sequential); std::vector v2(l2.begin(), l2.end()); std::atomic found{0}, not_found{0}; @@ -202,6 +209,7 @@ template void test_stats() #endif + // As many [un]successful lookups as recorded externally s=cc.get_stats(); check_lookup_stats(s.successful_lookup, full); check_lookup_stats(s.unsuccessful_lookup, full); @@ -212,28 +220,62 @@ template void test_stats() s = cc.get_stats(); check_container_stats(s, empty); - test::reset_sequence(); - test::random_values v3(1000, test::sequential); - c.clear(); - c.insert(v.begin(),v.end()); - c.insert(v.begin(),v.end()); // produces successful lookups + // Move constructor tests + c.clear(); + insert_n(c, 1000); + insert_n(c, 1000); // produces successful lookups + + // Move contructor + // Stats transferred to target and reset in source s = cc.get_stats(); Container c2 = std::move(c); check_container_stats(c.get_stats(), empty); check_container_stats(c2.get_stats(), s); + // Move constructor with equal allocator + // Stats transferred to target and reset in source Container c3(std::move(c2), allocator_type()); check_container_stats(c2.get_stats(), empty); check_container_stats(c3.get_stats(), s); + // Move constructor with unequal allocator + // Target only has insertions, stats reset in source Container c4(std::move(c3), allocator_type(1)); check_container_stats(c3.get_stats(), empty); check_insertion_stats(c4.get_stats().insertion, full); check_lookup_stats(c4.get_stats().successful_lookup, empty); check_lookup_stats(c4.get_stats().unsuccessful_lookup, empty); - // TODO: move assignment + // Move assignment tests + + // Move assignment with equal allocator + // Stats transferred to target and reset in source + Container c5, c6; + insert_n(c5,1000); + insert_n(c5,1000); // produces successful lookups + insert_n(c6,500); + insert_n(c6,500); // produces successful lookups + s = c5.get_stats(); + check_container_stats(s, full); + check_container_stats(c6.get_stats(), full); + c6 = std::move(c5); + check_container_stats(c5.get_stats(), empty); + check_container_stats(c6.get_stats(), s); + + // Move assignment with unequal allocator + // Target only has insertions (if reset previously), stats reset in source + Container c7(allocator_type(1)); + insert_n(c7,250); + insert_n(c7,250); // produces successful lookups + check_container_stats(c7.get_stats(), full); + c7.reset_stats(); + c7 = std::move(c6); + check_container_stats(c6.get_stats(), empty); + check_insertion_stats(c7.get_stats().insertion, full); + check_lookup_stats(c7.get_stats().successful_lookup, empty); + check_lookup_stats(c7.get_stats().unsuccessful_lookup, empty); + // TODO: concurrent<->unordered interop } From 37451ecc94b5f542f11b4a3232bf5a7ac4e0e289 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Fri, 3 May 2024 13:07:09 +0200 Subject: [PATCH 29/39] relaxed checks for non-empty unsuccessful lookup stats --- test/unordered/stats_tests.cpp | 125 ++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 49 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 12ce1da5..2d0f9cfe 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -15,6 +15,7 @@ #include "../helpers/helpers.hpp" #include "../helpers/random_values.hpp" #include "../helpers/test.hpp" +#include #include template struct unequal_allocator @@ -53,19 +54,38 @@ bool not_exact_same(double x, double y) return !exact_same(x, y); } -template void check_stat(const Stats& s, bool full) +enum check_stats_contition { - if (full) { + stats_empty=0, + stats_full, + stats_mostly_full // unsuccesful lookups may result in num_comparisons == 0 +}; + +template +void check_stat(const Stats& s, check_stats_contition cond) +{ + switch (cond) { + case stats_empty: + BOOST_TEST(exact_same(s.average, 0.0)); + BOOST_TEST(exact_same(s.variance, 0.0)); + BOOST_TEST(exact_same(s.deviation, 0.0)); + break; + case stats_full: BOOST_TEST_GT(s.average, 0.0); if(not_exact_same(s.variance, 0.0)) { BOOST_TEST_GT(s.variance, 0.0); BOOST_TEST_GT(s.deviation, 0.0); } - } - else { - BOOST_TEST(exact_same(s.average, 0.0)); - BOOST_TEST(exact_same(s.variance, 0.0)); - BOOST_TEST(exact_same(s.deviation, 0.0)); + break; + case stats_mostly_full: + if(not_exact_same(s.variance, 0.0)) { + BOOST_TEST_GT(s.average, 0.0); + BOOST_TEST_GT(s.variance, 0.0); + BOOST_TEST_GT(s.deviation, 0.0); + } + break; + default: + break; } } @@ -76,15 +96,21 @@ template void check_stat(const Stats& s1, const Stats& s2) BOOST_TEST(exact_same(s1.deviation, s2.deviation)); } -template void check_insertion_stats(const Stats& s, bool full) +template +void check_insertion_stats(const Stats& s, check_stats_contition cond) { - if (full) { - BOOST_TEST_NE(s.count, 0); - } - else { + switch (cond) { + case stats_empty: BOOST_TEST_EQ(s.count, 0); + check_stat(s.probe_length, stats_empty); + break; + case stats_full: + BOOST_TEST_NE(s.count, 0); + check_stat(s.probe_length, stats_full); + break; + default: + BOOST_ASSERT(false); // insertion can't be mostly full } - check_stat(s.probe_length, full); } template @@ -94,16 +120,11 @@ void check_insertion_stats(const Stats& s1, const Stats& s2) check_stat(s1.probe_length, s2.probe_length); } -template void check_lookup_stats(const Stats& s, bool full) +template +void check_lookup_stats(const Stats& s, check_stats_contition cond) { - if (full) { - BOOST_TEST_NE(s.count, 0); - } - else { - BOOST_TEST_EQ(s.count, 0); - } - check_stat(s.probe_length, full); - check_stat(s.num_comparisons, full); + check_stat(s.probe_length, cond == stats_empty? stats_empty : stats_full); + check_stat(s.num_comparisons, cond); } template @@ -114,11 +135,17 @@ void check_lookup_stats(const Stats& s1, const Stats& s2) check_stat(s1.num_comparisons, s2.num_comparisons); } -template void check_container_stats(const Stats& s, bool full) +template +void check_container_stats(const Stats& s, check_stats_contition cond) { - check_insertion_stats(s.insertion, full); - check_lookup_stats(s.successful_lookup, full); - check_lookup_stats(s.unsuccessful_lookup, full); + if(cond == stats_mostly_full) { + BOOST_ASSERT(false); // mostly full only applies to unsuccessful lookup + } + check_insertion_stats(s.insertion, cond); + check_lookup_stats(s.successful_lookup, cond); + check_lookup_stats( + s.unsuccessful_lookup, + cond == stats_empty? stats_empty : stats_mostly_full); } template @@ -153,21 +180,21 @@ template void test_stats() { using allocator_type = typename Container::allocator_type; using stats = typename Container::stats; - const bool full = true, empty = false; Container c; const Container& cc = c; // Stats initially empty stats s = cc.get_stats(); // using cc -> get_stats() is const - check_container_stats(s, empty); + check_container_stats(s, stats_empty); // Stats after insertion insert_n(c, 10000); s = cc.get_stats(); - check_insertion_stats(s.insertion, full); // insertions happened - check_lookup_stats(s.successful_lookup, empty); // no duplicate values - check_lookup_stats(s.unsuccessful_lookup, full); // from insertion + check_insertion_stats(s.insertion, stats_full); // insertions happened + check_lookup_stats(s.successful_lookup, stats_empty); // no duplicate values + check_lookup_stats( + s.unsuccessful_lookup, stats_mostly_full); // from insertion #if !defined(BOOST_UNORDERED_CFOA_TESTS) // Inequality due to rehashing @@ -178,7 +205,7 @@ template void test_stats() // resets_stats() actually clears stats c.reset_stats(); - check_container_stats(cc.get_stats(), empty); + check_container_stats(cc.get_stats(), stats_empty); // Stats after lookup @@ -211,14 +238,14 @@ template void test_stats() // As many [un]successful lookups as recorded externally s=cc.get_stats(); - check_lookup_stats(s.successful_lookup, full); - check_lookup_stats(s.unsuccessful_lookup, full); + check_lookup_stats(s.successful_lookup, stats_full); + check_lookup_stats(s.unsuccessful_lookup, stats_mostly_full); BOOST_TEST_EQ(s.successful_lookup.count, found); BOOST_TEST_EQ(s.unsuccessful_lookup.count, not_found); c.reset_stats(); s = cc.get_stats(); - check_container_stats(s, empty); + check_container_stats(s, stats_empty); // Move constructor tests @@ -230,22 +257,22 @@ template void test_stats() // Stats transferred to target and reset in source s = cc.get_stats(); Container c2 = std::move(c); - check_container_stats(c.get_stats(), empty); + check_container_stats(c.get_stats(), stats_empty); check_container_stats(c2.get_stats(), s); // Move constructor with equal allocator // Stats transferred to target and reset in source Container c3(std::move(c2), allocator_type()); - check_container_stats(c2.get_stats(), empty); + check_container_stats(c2.get_stats(), stats_empty); check_container_stats(c3.get_stats(), s); // Move constructor with unequal allocator // Target only has insertions, stats reset in source Container c4(std::move(c3), allocator_type(1)); - check_container_stats(c3.get_stats(), empty); - check_insertion_stats(c4.get_stats().insertion, full); - check_lookup_stats(c4.get_stats().successful_lookup, empty); - check_lookup_stats(c4.get_stats().unsuccessful_lookup, empty); + check_container_stats(c3.get_stats(), stats_empty); + check_insertion_stats(c4.get_stats().insertion, stats_full); + check_lookup_stats(c4.get_stats().successful_lookup, stats_empty); + check_lookup_stats(c4.get_stats().unsuccessful_lookup, stats_empty); // Move assignment tests @@ -257,10 +284,10 @@ template void test_stats() insert_n(c6,500); insert_n(c6,500); // produces successful lookups s = c5.get_stats(); - check_container_stats(s, full); - check_container_stats(c6.get_stats(), full); + check_container_stats(s, stats_full); + check_container_stats(c6.get_stats(), stats_full); c6 = std::move(c5); - check_container_stats(c5.get_stats(), empty); + check_container_stats(c5.get_stats(), stats_empty); check_container_stats(c6.get_stats(), s); // Move assignment with unequal allocator @@ -268,13 +295,13 @@ template void test_stats() Container c7(allocator_type(1)); insert_n(c7,250); insert_n(c7,250); // produces successful lookups - check_container_stats(c7.get_stats(), full); + check_container_stats(c7.get_stats(), stats_full); c7.reset_stats(); c7 = std::move(c6); - check_container_stats(c6.get_stats(), empty); - check_insertion_stats(c7.get_stats().insertion, full); - check_lookup_stats(c7.get_stats().successful_lookup, empty); - check_lookup_stats(c7.get_stats().unsuccessful_lookup, empty); + check_container_stats(c6.get_stats(), stats_empty); + check_insertion_stats(c7.get_stats().insertion, stats_full); + check_lookup_stats(c7.get_stats().successful_lookup, stats_empty); + check_lookup_stats(c7.get_stats().unsuccessful_lookup, stats_empty); // TODO: concurrent<->unordered interop } From 223f64752ded9d9217da3f2c4531b23d437dccde Mon Sep 17 00:00:00 2001 From: joaquintides Date: Fri, 3 May 2024 18:04:21 +0200 Subject: [PATCH 30/39] implemented proper stats handling on concurrent<->unordered move construction --- .../unordered/detail/foa/concurrent_table.hpp | 2 ++ include/boost/unordered/detail/foa/table.hpp | 4 ++- test/unordered/stats_tests.cpp | 28 +++++++++++++++++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/include/boost/unordered/detail/foa/concurrent_table.hpp b/include/boost/unordered/detail/foa/concurrent_table.hpp index 57a309d3..cae13539 100644 --- a/include/boost/unordered/detail/foa/concurrent_table.hpp +++ b/include/boost/unordered/detail/foa/concurrent_table.hpp @@ -514,6 +514,8 @@ public: x.arrays=ah.release(); x.size_ctrl.ml=x.initial_max_load(); x.size_ctrl.size=0; + BOOST_UNORDERED_SWAP_STATS( + this->get_cumulative_stats(),x.get_cumulative_stats()); } concurrent_table(compatible_nonconcurrent_table&& x): diff --git a/include/boost/unordered/detail/foa/table.hpp b/include/boost/unordered/detail/foa/table.hpp index 2f02a7a4..cf96e622 100644 --- a/include/boost/unordered/detail/foa/table.hpp +++ b/include/boost/unordered/detail/foa/table.hpp @@ -1,6 +1,6 @@ /* Fast open-addressing hash table. * - * Copyright 2022-2023 Joaquin M Lopez Munoz. + * Copyright 2022-2024 Joaquin M Lopez Munoz. * Copyright 2023 Christian Mazakas. * Copyright 2024 Braden Ganetsky. * Distributed under the Boost Software License, Version 1.0. @@ -593,6 +593,8 @@ private: x.arrays=ah.release(); x.size_ctrl.ml=x.initial_max_load(); x.size_ctrl.size=0; + BOOST_UNORDERED_SWAP_STATS( + this->get_cumulative_stats(),x.get_cumulative_stats()); } template diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 2d0f9cfe..64492edf 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -7,6 +7,8 @@ #ifdef BOOST_UNORDERED_CFOA_TESTS #include #include +#include +#include #include "../cfoa/helpers.hpp" #else #include "../helpers/unordered.hpp" @@ -302,10 +304,26 @@ template void test_stats() check_insertion_stats(c7.get_stats().insertion, stats_full); check_lookup_stats(c7.get_stats().successful_lookup, stats_empty); check_lookup_stats(c7.get_stats().unsuccessful_lookup, stats_empty); - - // TODO: concurrent<->unordered interop } +#if defined(BOOST_UNORDERED_CFOA_TESTS) +template +void test_stats_concurrent_unordered_interop() +{ + ConcurrentContainer cc1; + insert_n(cc1,5000); + insert_n(cc1,5000); // produces successful lookups + auto s=cc1.get_stats(); + Container c1(std::move(cc1)); + check_container_stats(cc1.get_stats(),stats_empty); + check_container_stats(c1.get_stats(),s); + + ConcurrentContainer cc2(std::move(c1)); + check_container_stats(c1.get_stats(),stats_empty); + check_container_stats(cc2.get_stats(),s); +} +#endif + UNORDERED_AUTO_TEST (stats_) { #if defined(BOOST_UNORDERED_CFOA_TESTS) test_stats< @@ -315,6 +333,12 @@ UNORDERED_AUTO_TEST (stats_) { test_stats< boost::concurrent_flat_set< int, boost::hash, std::equal_to, unequal_allocator>>(); + test_stats_concurrent_unordered_interop< + boost::unordered_flat_map, + boost::concurrent_flat_map>(); + test_stats_concurrent_unordered_interop< + boost::unordered_flat_set, + boost::concurrent_flat_set>(); #elif defined(BOOST_UNORDERED_FOA_TESTS) test_stats< boost::unordered_flat_map< From c1317cb5bee0057712c31f52667b6c0852745c16 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Fri, 3 May 2024 18:11:13 +0200 Subject: [PATCH 31/39] replaced get_cumulative_stats() with direct access to cstats --- .../unordered/detail/foa/concurrent_table.hpp | 14 +++++--------- include/boost/unordered/detail/foa/core.hpp | 14 ++++---------- include/boost/unordered/detail/foa/table.hpp | 3 +-- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/include/boost/unordered/detail/foa/concurrent_table.hpp b/include/boost/unordered/detail/foa/concurrent_table.hpp index cae13539..8531fac9 100644 --- a/include/boost/unordered/detail/foa/concurrent_table.hpp +++ b/include/boost/unordered/detail/foa/concurrent_table.hpp @@ -514,8 +514,7 @@ public: x.arrays=ah.release(); x.size_ctrl.ml=x.initial_max_load(); x.size_ctrl.size=0; - BOOST_UNORDERED_SWAP_STATS( - this->get_cumulative_stats(),x.get_cumulative_stats()); + BOOST_UNORDERED_SWAP_STATS(this->cstats,x.cstats); } concurrent_table(compatible_nonconcurrent_table&& x): @@ -1216,8 +1215,7 @@ private: if(BOOST_LIKELY(bool(this->pred()(x,this->key_from(p[n]))))){ f(pg,n,p+n); BOOST_UNORDERED_ADD_STATS( - this->get_cumulative_stats().successful_lookup, - (pb.length(),num_cmps)); + this->cstats.successful_lookup,(pb.length(),num_cmps)); return 1; } } @@ -1226,14 +1224,13 @@ private: } if(BOOST_LIKELY(pg->is_not_overflowed(hash))){ BOOST_UNORDERED_ADD_STATS( - this->get_cumulative_stats().unsuccessful_lookup, - (pb.length(),num_cmps)); + this->cstats.unsuccessful_lookup,(pb.length(),num_cmps)); return 0; } } while(BOOST_LIKELY(pb.next(this->arrays.groups_size_mask))); BOOST_UNORDERED_ADD_STATS( - this->get_cumulative_stats().unsuccessful_lookup,(pb.length(),num_cmps)); + this->cstats.unsuccessful_lookup,(pb.length(),num_cmps)); return 0; } @@ -1514,8 +1511,7 @@ private: this->construct_element(p,std::forward(args)...); rslot.commit(); rsize.commit(); - BOOST_UNORDERED_ADD_STATS( - this->get_cumulative_stats().insertion,(pb.length())); + BOOST_UNORDERED_ADD_STATS(this->cstats.insertion,(pb.length())); return 1; } pg->mark_overflow(hash); diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index ca69f7b3..ab2fb86c 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1719,7 +1719,7 @@ public: auto n=unchecked_countr_zero(mask); if(BOOST_LIKELY(bool(pred()(x,key_from(p[n]))))){ BOOST_UNORDERED_ADD_STATS( - get_cumulative_stats().successful_lookup,(pb.length(),num_cmps)); + cstats.successful_lookup,(pb.length(),num_cmps)); return {pg,n,p+n}; } mask&=mask-1; @@ -1727,13 +1727,13 @@ public: } if(BOOST_LIKELY(pg->is_not_overflowed(hash))){ BOOST_UNORDERED_ADD_STATS( - get_cumulative_stats().unsuccessful_lookup,(pb.length(),num_cmps)); + cstats.unsuccessful_lookup,(pb.length(),num_cmps)); return {}; } } while(BOOST_LIKELY(pb.next(arrays.groups_size_mask))); BOOST_UNORDERED_ADD_STATS( - get_cumulative_stats().unsuccessful_lookup,(pb.length(),num_cmps)); + cstats.unsuccessful_lookup,(pb.length(),num_cmps)); return {}; } @@ -1842,11 +1842,6 @@ public: }; } - cumulative_stats& get_cumulative_stats()const noexcept - { - return cstats; - } - void reset_stats() { cstats.insertion.reset(); @@ -2356,8 +2351,7 @@ private: auto p=arrays_.elements()+pos*N+n; construct_element(p,std::forward(args)...); pg->set(n,hash); - BOOST_UNORDERED_ADD_STATS( - get_cumulative_stats().insertion,(pb.length())); + BOOST_UNORDERED_ADD_STATS(cstats.insertion,(pb.length())); return {pg,n,p}; } else pg->mark_overflow(hash); diff --git a/include/boost/unordered/detail/foa/table.hpp b/include/boost/unordered/detail/foa/table.hpp index cf96e622..529a514c 100644 --- a/include/boost/unordered/detail/foa/table.hpp +++ b/include/boost/unordered/detail/foa/table.hpp @@ -593,8 +593,7 @@ private: x.arrays=ah.release(); x.size_ctrl.ml=x.initial_max_load(); x.size_ctrl.size=0; - BOOST_UNORDERED_SWAP_STATS( - this->get_cumulative_stats(),x.get_cumulative_stats()); + BOOST_UNORDERED_SWAP_STATS(this->cstats,x.cstats); } template From 22d8cca03bd5b293e190b6205f1c7abae543dd85 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Sun, 5 May 2024 17:56:03 +0200 Subject: [PATCH 32/39] added stats to bulk visitation --- .../unordered/detail/foa/concurrent_table.hpp | 18 ++++++++++++------ test/unordered/stats_tests.cpp | 16 +++++++++++++++- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/include/boost/unordered/detail/foa/concurrent_table.hpp b/include/boost/unordered/detail/foa/concurrent_table.hpp index 8531fac9..66c25791 100644 --- a/include/boost/unordered/detail/foa/concurrent_table.hpp +++ b/include/boost/unordered/detail/foa/concurrent_table.hpp @@ -1265,6 +1265,7 @@ private: it=first; for(auto i=m;i--;++it){ + BOOST_UNORDERED_STATS_COUNTER(num_cmps); auto pos=positions[i]; prober pb(pos); auto pg=this->arrays.groups()+pos; @@ -1277,12 +1278,15 @@ private: auto lck=access(access_mode,pos); do{ auto n=unchecked_countr_zero(mask); - if(BOOST_LIKELY( - pg->is_occupied(n)&& - bool(this->pred()(*it,this->key_from(p[n]))))){ - f(cast_for(access_mode,type_policy::value_from(p[n]))); - ++res; - goto next_key; + if(BOOST_LIKELY(pg->is_occupied(n))){ + BOOST_UNORDERED_INCREMENT_STATS_COUNTER(num_cmps); + if(bool(this->pred()(*it,this->key_from(p[n])))){ + f(cast_for(access_mode,type_policy::value_from(p[n]))); + ++res; + BOOST_UNORDERED_ADD_STATS( + this->cstats.successful_lookup,(pb.length(),num_cmps)); + goto next_key; + } } mask&=mask-1; }while(mask); @@ -1291,6 +1295,8 @@ private: do{ if(BOOST_LIKELY(pg->is_not_overflowed(hashes[i]))|| BOOST_UNLIKELY(!pb.next(this->arrays.groups_size_mask))){ + BOOST_UNORDERED_ADD_STATS( + this->cstats.unsuccessful_lookup,(pb.length(),num_cmps)); goto next_key; } pos=pb.get(); diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index 64492edf..a9315cd4 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -18,6 +18,7 @@ #include "../helpers/random_values.hpp" #include "../helpers/test.hpp" #include +#include #include template struct unequal_allocator @@ -215,16 +216,29 @@ template void test_stats() #if defined(BOOST_UNORDERED_CFOA_TESTS) + using key_type = typename Container::key_type; using value_type = typename Container::value_type; test::random_values l2(15000, test::sequential); std::vector v2(l2.begin(), l2.end()); std::atomic found{0}, not_found{0}; thread_runner(v2, [&cc, &found, ¬_found](boost::span sp) { - for (auto const& x : sp) { + // Half the span looked up elementwise + auto sp1 = boost::make_span(sp.begin(), sp.size()/2); + for (auto const& x : sp1) { if(cc.contains(test::get_key(x))) ++found; else ++not_found; } + + // Second half looked up in bulk + std::vector ksp2; + for (auto const& x : boost::make_span(sp1.end(), sp.end())) { + ksp2.push_back(test::get_key(x)); + } + auto visited = cc.visit( + ksp2.begin(), ksp2.end(), [](const value_type&) {}); + found += visited; + not_found += ksp2.size() - visited; }); #else From 28fc6890119ab2528b242693b87a3e47c32c9de7 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Sun, 5 May 2024 18:30:42 +0200 Subject: [PATCH 33/39] avoided shorten-64-to-32 and sign-conversion warnings --- test/unordered/stats_tests.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/unordered/stats_tests.cpp b/test/unordered/stats_tests.cpp index a9315cd4..351f07d6 100644 --- a/test/unordered/stats_tests.cpp +++ b/test/unordered/stats_tests.cpp @@ -220,8 +220,8 @@ template void test_stats() using value_type = typename Container::value_type; test::random_values l2(15000, test::sequential); - std::vector v2(l2.begin(), l2.end()); - std::atomic found{0}, not_found{0}; + std::vector v2(l2.begin(), l2.end()); + std::atomic found{0}, not_found{0}; thread_runner(v2, [&cc, &found, ¬_found](boost::span sp) { // Half the span looked up elementwise auto sp1 = boost::make_span(sp.begin(), sp.size()/2); @@ -232,7 +232,8 @@ template void test_stats() // Second half looked up in bulk std::vector ksp2; - for (auto const& x : boost::make_span(sp1.end(), sp.end())) { + for (auto const& x : boost::make_span( + sp1.end(), static_cast(sp.end() - sp1.end()))) { ksp2.push_back(test::get_key(x)); } auto visited = cc.visit( @@ -244,7 +245,7 @@ template void test_stats() #else test::random_values v2(15000, test::sequential); - int found = 0, not_found = 0; + std::size_t found = 0, not_found = 0; for (const auto& x: v2) { if (cc.contains(test::get_key(x))) ++found; else ++not_found; From 4c0aea983ebec4a9491381e7e4f3a9da844d19ba Mon Sep 17 00:00:00 2001 From: joaquintides Date: Mon, 6 May 2024 13:26:30 +0200 Subject: [PATCH 34/39] made reset_stats noexcept --- include/boost/unordered/concurrent_flat_map.hpp | 2 +- include/boost/unordered/concurrent_flat_set.hpp | 2 +- include/boost/unordered/detail/foa/core.hpp | 2 +- include/boost/unordered/unordered_flat_map.hpp | 2 +- include/boost/unordered/unordered_flat_set.hpp | 2 +- include/boost/unordered/unordered_node_map.hpp | 2 +- include/boost/unordered/unordered_node_set.hpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/boost/unordered/concurrent_flat_map.hpp b/include/boost/unordered/concurrent_flat_map.hpp index 82689497..b67f2ad3 100644 --- a/include/boost/unordered/concurrent_flat_map.hpp +++ b/include/boost/unordered/concurrent_flat_map.hpp @@ -723,7 +723,7 @@ namespace boost { /// stats get_stats() const { return table_.get_stats(); } - void reset_stats() { table_.reset_stats(); } + void reset_stats() noexcept { table_.reset_stats(); } #endif /// Observers diff --git a/include/boost/unordered/concurrent_flat_set.hpp b/include/boost/unordered/concurrent_flat_set.hpp index c4e8a969..d0665f71 100644 --- a/include/boost/unordered/concurrent_flat_set.hpp +++ b/include/boost/unordered/concurrent_flat_set.hpp @@ -591,7 +591,7 @@ namespace boost { /// stats get_stats() const { return table_.get_stats(); } - void reset_stats() { table_.reset_stats(); } + void reset_stats() noexcept { table_.reset_stats(); } #endif /// Observers diff --git a/include/boost/unordered/detail/foa/core.hpp b/include/boost/unordered/detail/foa/core.hpp index ab2fb86c..db73a866 100644 --- a/include/boost/unordered/detail/foa/core.hpp +++ b/include/boost/unordered/detail/foa/core.hpp @@ -1842,7 +1842,7 @@ public: }; } - void reset_stats() + void reset_stats()noexcept { cstats.insertion.reset(); cstats.successful_lookup.reset(); diff --git a/include/boost/unordered/unordered_flat_map.hpp b/include/boost/unordered/unordered_flat_map.hpp index 20e60d96..55024102 100644 --- a/include/boost/unordered/unordered_flat_map.hpp +++ b/include/boost/unordered/unordered_flat_map.hpp @@ -663,7 +663,7 @@ namespace boost { /// stats get_stats() const { return table_.get_stats(); } - void reset_stats() { table_.reset_stats(); } + void reset_stats() noexcept { table_.reset_stats(); } #endif /// Observers diff --git a/include/boost/unordered/unordered_flat_set.hpp b/include/boost/unordered/unordered_flat_set.hpp index f1c09c31..a72d3567 100644 --- a/include/boost/unordered/unordered_flat_set.hpp +++ b/include/boost/unordered/unordered_flat_set.hpp @@ -483,7 +483,7 @@ namespace boost { /// stats get_stats() const { return table_.get_stats(); } - void reset_stats() { table_.reset_stats(); } + void reset_stats() noexcept { table_.reset_stats(); } #endif /// Observers diff --git a/include/boost/unordered/unordered_node_map.hpp b/include/boost/unordered/unordered_node_map.hpp index 33b91316..def251fa 100644 --- a/include/boost/unordered/unordered_node_map.hpp +++ b/include/boost/unordered/unordered_node_map.hpp @@ -758,7 +758,7 @@ namespace boost { /// stats get_stats() const { return table_.get_stats(); } - void reset_stats() { table_.reset_stats(); } + void reset_stats() noexcept { table_.reset_stats(); } #endif /// Observers diff --git a/include/boost/unordered/unordered_node_set.hpp b/include/boost/unordered/unordered_node_set.hpp index b9ee0057..e5e14115 100644 --- a/include/boost/unordered/unordered_node_set.hpp +++ b/include/boost/unordered/unordered_node_set.hpp @@ -572,7 +572,7 @@ namespace boost { /// stats get_stats() const { return table_.get_stats(); } - void reset_stats() { table_.reset_stats(); } + void reset_stats() noexcept { table_.reset_stats(); } #endif /// Observers From 86f622240dcad38e235eba5a284863a63f508057 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Mon, 6 May 2024 18:48:40 +0200 Subject: [PATCH 35/39] documented stats --- doc/unordered/changes.adoc | 1 + doc/unordered/concurrent_flat_map.adoc | 49 ++++++++++++++++++ doc/unordered/concurrent_flat_set.adoc | 49 ++++++++++++++++++ doc/unordered/ref.adoc | 1 + doc/unordered/stats.adoc | 71 ++++++++++++++++++++++++++ doc/unordered/unordered_flat_map.adoc | 51 ++++++++++++++++++ doc/unordered/unordered_flat_set.adoc | 51 ++++++++++++++++++ doc/unordered/unordered_node_map.adoc | 49 ++++++++++++++++++ doc/unordered/unordered_node_set.adoc | 49 ++++++++++++++++++ 9 files changed, 371 insertions(+) create mode 100644 doc/unordered/stats.adoc diff --git a/doc/unordered/changes.adoc b/doc/unordered/changes.adoc index 8ab7b639..b59bb90b 100644 --- a/doc/unordered/changes.adoc +++ b/doc/unordered/changes.adoc @@ -9,6 +9,7 @@ == Release 1.86.0 * Added container `pmr` aliases when header `` is available. The alias `boost::unordered::pmr::[container]` refers to `boost::unordered::[container]` with a `std::pmr::polymorphic_allocator` allocator type. +* Equipped open-addressing and concurrent containers to internally calculate and provide statistical metrics affected by the quality of the hash function. This functionality is enabled by the global macro `BOOST_UNORDERED_ENABLE_STATS`. == Release 1.85.0 diff --git a/doc/unordered/concurrent_flat_map.adoc b/doc/unordered/concurrent_flat_map.adoc index 6f87390c..50c840a9 100644 --- a/doc/unordered/concurrent_flat_map.adoc +++ b/doc/unordered/concurrent_flat_map.adoc @@ -50,6 +50,8 @@ namespace boost { using size_type = std::size_t; using difference_type = std::ptrdiff_t; + using stats = xref:stats_stats_type[__stats-type__]; // if statistics are xref:concurrent_flat_map_boost_unordered_enable_stats[enabled] + // constants static constexpr size_type xref:#concurrent_flat_map_constants[bulk_visit_size] = _implementation-defined_; @@ -228,6 +230,10 @@ namespace boost { size_type xref:#concurrent_flat_map_max_load[max_load]() const noexcept; void xref:#concurrent_flat_map_rehash[rehash](size_type n); void xref:#concurrent_flat_map_reserve[reserve](size_type n); + + // statistics (if xref:concurrent_flat_map_boost_unordered_enable_stats[enabled]) + stats xref:#concurrent_flat_map_get_stats[get_stats]() const; + void xref:#concurrent_flat_map_reset_stats[reset_stats]() noexcept; }; // Deduction Guides @@ -407,6 +413,15 @@ a function visiting elements of `m`) are detected and signalled through `BOOST_A When run-time speed is a concern, the feature can be disabled by globally defining this macro. +--- + +==== `BOOST_UNORDERED_ENABLE_STATS` + +Globally define this macro to enable xref:#stats[statistics calculation] for the table. Note +that this option decreases the overall performance of many operations. + +--- + === Constants ```cpp @@ -488,6 +503,8 @@ concurrent_flat_map(concurrent_flat_map&& other); The move constructor. The internal bucket array of `other` is transferred directly to the new table. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:concurrent_flat_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. [horizontal] Concurrency:;; Blocking on `other`. @@ -536,6 +553,9 @@ concurrent_flat_map(concurrent_flat_map&& other, Allocator const& a); If `a == other.get_allocator()`, the elements of `other` are transferred directly to the new table; otherwise, elements are moved-constructed from those of `other`. The hash function and predicate are moved-constructed from `other`, and the allocator is copy-constructed from `a`. +If statistics are xref:concurrent_flat_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff `a == other.get_allocator()`, +and always calls `other.reset_stats()`. [horizontal] Concurrency:;; Blocking on `other`. @@ -551,6 +571,8 @@ concurrent_flat_map(unordered_flat_map&& other); Move construction from a xref:#unordered_flat_map[`unordered_flat_map`]. The internal bucket array of `other` is transferred directly to the new container. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:concurrent_flat_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. [horizontal] Complexity:;; O(`bucket_count()`) @@ -709,6 +731,9 @@ The move assignment operator. Destroys previously existing elements, swaps the h and move-assigns the allocator from `other` if `Alloc::propagate_on_container_move_assignment` exists and `Alloc::propagate_on_container_move_assignment::value` is `true`. If at this point the allocator is equal to `other.get_allocator()`, the internal bucket array of `other` is transferred directly to `*this`; otherwise, inserts move-constructed copies of the elements of `other`. +If statistics are xref:concurrent_flat_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff the final allocator is equal to `other.get_allocator()`, +and always calls `other.reset_stats()`. [horizontal] Concurrency:;; Blocking on `*this` and `other`. @@ -1480,6 +1505,30 @@ Concurrency:;; Blocking on `*this`. --- +=== Statistics + +==== get_stats +```c++ +stats get_stats() const; +``` + +[horizontal] +Returns:;; A statistical description of the insertion and lookup operations performed by the table so far. +Notes:;; Only available if xref:stats[statistics calculation] is xref:concurrent_flat_map_boost_unordered_enable_stats[enabled]. + +--- + +==== reset_stats +```c++ +void reset_stats() noexcept; +``` + +[horizontal] +Effects:;; Sets to zero the internal statistics kept by the table. +Notes:;; Only available if xref:stats[statistics calculation] is xref:concurrent_flat_map_boost_unordered_enable_stats[enabled]. + +--- + === Deduction Guides A deduction guide will not participate in overload resolution if any of the following are true: diff --git a/doc/unordered/concurrent_flat_set.adoc b/doc/unordered/concurrent_flat_set.adoc index e7713352..c52725cc 100644 --- a/doc/unordered/concurrent_flat_set.adoc +++ b/doc/unordered/concurrent_flat_set.adoc @@ -45,6 +45,8 @@ namespace boost { using size_type = std::size_t; using difference_type = std::ptrdiff_t; + using stats = xref:stats_stats_type[__stats-type__]; // if statistics are xref:concurrent_flat_set_boost_unordered_enable_stats[enabled] + // constants static constexpr size_type xref:#concurrent_flat_set_constants[bulk_visit_size] = _implementation-defined_; @@ -188,6 +190,10 @@ namespace boost { size_type xref:#concurrent_flat_set_max_load[max_load]() const noexcept; void xref:#concurrent_flat_set_rehash[rehash](size_type n); void xref:#concurrent_flat_set_reserve[reserve](size_type n); + + // statistics (if xref:concurrent_flat_set_boost_unordered_enable_stats[enabled]) + stats xref:#concurrent_flat_set_get_stats[get_stats]() const; + void xref:#concurrent_flat_set_reset_stats[reset_stats]() noexcept; }; // Deduction Guides @@ -358,6 +364,15 @@ a function visiting elements of `m`) are detected and signalled through `BOOST_A When run-time speed is a concern, the feature can be disabled by globally defining this macro. +--- + +==== `BOOST_UNORDERED_ENABLE_STATS` + +Globally define this macro to enable xref:#stats[statistics calculation] for the table. Note +that this option decreases the overall performance of many operations. + +--- + === Constants ```cpp @@ -439,6 +454,8 @@ concurrent_flat_set(concurrent_flat_set&& other); The move constructor. The internal bucket array of `other` is transferred directly to the new table. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:concurrent_flat_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. [horizontal] Concurrency:;; Blocking on `other`. @@ -487,6 +504,9 @@ concurrent_flat_set(concurrent_flat_set&& other, Allocator const& a); If `a == other.get_allocator()`, the elements of `other` are transferred directly to the new table; otherwise, elements are moved-constructed from those of `other`. The hash function and predicate are moved-constructed from `other`, and the allocator is copy-constructed from `a`. +If statistics are xref:concurrent_flat_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff `a == other.get_allocator()`, +and always calls `other.reset_stats()`. [horizontal] Concurrency:;; Blocking on `other`. @@ -502,6 +522,8 @@ concurrent_flat_set(unordered_flat_set&& other); Move construction from a xref:#unordered_flat_set[`unordered_flat_set`]. The internal bucket array of `other` is transferred directly to the new container. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:concurrent_flat_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. [horizontal] Complexity:;; O(`bucket_count()`) @@ -659,6 +681,9 @@ The move assignment operator. Destroys previously existing elements, swaps the h and move-assigns the allocator from `other` if `Alloc::propagate_on_container_move_assignment` exists and `Alloc::propagate_on_container_move_assignment::value` is `true`. If at this point the allocator is equal to `other.get_allocator()`, the internal bucket array of `other` is transferred directly to `*this`; otherwise, inserts move-constructed copies of the elements of `other`. +If statistics are xref:concurrent_flat_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff the final allocator is equal to `other.get_allocator()`, +and always calls `other.reset_stats()`. [horizontal] Concurrency:;; Blocking on `*this` and `other`. @@ -1316,6 +1341,30 @@ Concurrency:;; Blocking on `*this`. --- +=== Statistics + +==== get_stats +```c++ +stats get_stats() const; +``` + +[horizontal] +Returns:;; A statistical description of the insertion and lookup operations performed by the table so far. +Notes:;; Only available if xref:stats[statistics calculation] is xref:concurrent_flat_set_boost_unordered_enable_stats[enabled]. + +--- + +==== reset_stats +```c++ +void reset_stats() noexcept; +``` + +[horizontal] +Effects:;; Sets to zero the internal statistics kept by the table. +Notes:;; Only available if xref:stats[statistics calculation] is xref:concurrent_flat_set_boost_unordered_enable_stats[enabled]. + +--- + === Deduction Guides A deduction guide will not participate in overload resolution if any of the following are true: diff --git a/doc/unordered/ref.adoc b/doc/unordered/ref.adoc index 08743fa6..6a0d22c4 100644 --- a/doc/unordered/ref.adoc +++ b/doc/unordered/ref.adoc @@ -6,6 +6,7 @@ include::unordered_multimap.adoc[] include::unordered_set.adoc[] include::unordered_multiset.adoc[] include::hash_traits.adoc[] +include::stats.adoc[] include::unordered_flat_map.adoc[] include::unordered_flat_set.adoc[] include::unordered_node_map.adoc[] diff --git a/doc/unordered/stats.adoc b/doc/unordered/stats.adoc new file mode 100644 index 00000000..944b6069 --- /dev/null +++ b/doc/unordered/stats.adoc @@ -0,0 +1,71 @@ +[#stats] +== Statistics + +:idprefix: stats_ + +Open-addressing and concurrent containers can be configured to keep running statistics +of some internal operations affected by the quality of the supplied hash function. + +=== Synopsis + +[listing,subs="+macros,+quotes"] +----- +struct xref:#stats_stats_summary_type[__stats-summary-type__] +{ + double average; + double variance; + double deviation; +}; + +struct xref:#stats_insertion_stats_type[__insertion-stats-type__] +{ + std::size_t count; + xref:#stats_stats_summary_type[__stats-summary-type__] probe_length; +}; + +struct xref:stats_lookup_stats_type[__lookup-stats-type__] +{ + std::size_t count; + xref:#stats_stats_summary_type[__stats-summary-type__] probe_length; + xref:#stats_stats_summary_type[__stats-summary-type__] num_comparisons; +}; + +struct xref:stats_stats_type[__stats-type__] +{ + xref:#stats_insertion_stats_type[__insertion-stats-type__] insertion; + xref:stats_lookup_stats_type[__lookup-stats-type__] successful_lookup, + unsuccessful_lookup; +}; +----- + +==== __stats-summary-type__ + +Provides the average value, variance and standard deviation of a sequence of numerical values. + +==== __insertion-stats-type__ + +Provides the number of insertion operations performed by a container and +statistics on the associated __probe length__ (number of +xref:#structures_open_addressing_containers[bucket groups] accessed per operation). + +==== __lookup-stats-type__ + +For successful (element found) or unsuccessful (not found) lookup, +provides the number of operations performed by a container and +statistics on the associated __probe length__ (number of +xref:#structures_open_addressing_containers[bucket groups] accessed) +and number of element comparisons per operation. + +==== __stats-type__ + +Provides statistics on insertion, successful and unsuccessful lookups performed by a container. +If the supplied hash function has good quality, then: + +* Average probe lenghts should be close to 1.0. +* For successful lookups, the average number of element comparisons should be close to 1.0. +* For unsuccessful lookups, the average number of element comparisons should be close to 0.0. + +These statistics can be used to determine if a given hash function +can be marked as xref:hash_traits_hash_is_avalanching[__avalanching__]. + +--- diff --git a/doc/unordered/unordered_flat_map.adoc b/doc/unordered/unordered_flat_map.adoc index 07f90d0e..13733f50 100644 --- a/doc/unordered/unordered_flat_map.adoc +++ b/doc/unordered/unordered_flat_map.adoc @@ -58,6 +58,8 @@ namespace boost { using iterator = _implementation-defined_; using const_iterator = _implementation-defined_; + using stats = xref:stats_stats_type[__stats-type__]; // if statistics are xref:unordered_flat_map_boost_unordered_enable_stats[enabled] + // construct/copy/destroy xref:#unordered_flat_map_default_constructor[unordered_flat_map](); explicit xref:#unordered_flat_map_bucket_count_constructor[unordered_flat_map](size_type n, @@ -214,6 +216,10 @@ namespace boost { size_type xref:#unordered_flat_map_max_load[max_load]() const noexcept; void xref:#unordered_flat_map_rehash[rehash](size_type n); void xref:#unordered_flat_map_reserve[reserve](size_type n); + + // statistics (if xref:unordered_flat_map_boost_unordered_enable_stats[enabled]) + stats xref:#unordered_flat_map_get_stats[get_stats]() const; + void xref:#unordered_flat_map_reset_stats[reset_stats]() noexcept; }; // Deduction Guides @@ -343,6 +349,15 @@ at the expense of extra computational cost. --- +=== Configuration Macros + +==== `BOOST_UNORDERED_ENABLE_STATS` + +Globally define this macro to enable xref:#stats[statistics calculation] for the container. Note +that this option decreases the overall performance of many operations. + +--- + === Typedefs [source,c++,subs=+quotes] @@ -439,6 +454,8 @@ unordered_flat_map(unordered_flat_map&& other); The move constructor. The internal bucket array of `other` is transferred directly to the new container. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:unordered_flat_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. --- @@ -481,6 +498,9 @@ unordered_flat_map(unordered_flat_map&& other, Allocator const& a); If `a == other.get_allocator()`, the elements of `other` are transferred directly to the new container; otherwise, elements are moved-constructed from those of `other`. The hash function and predicate are moved-constructed from `other`, and the allocator is copy-constructed from `a`. +If statistics are xref:unordered_flat_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff `a == other.get_allocator()`, +and always calls `other.reset_stats()`. --- @@ -493,6 +513,8 @@ unordered_flat_map(concurrent_flat_map&& other); Move construction from a xref:#concurrent_flat_map[`concurrent_flat_map`]. The internal bucket array of `other` is transferred directly to the new container. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:unordered_flat_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. [horizontal] Complexity:;; Constant time. @@ -651,6 +673,9 @@ The move assignment operator. Destroys previously existing elements, swaps the h and move-assigns the allocator from `other` if `Alloc::propagate_on_container_move_assignment` exists and `Alloc::propagate_on_container_move_assignment::value` is `true`. If at this point the allocator is equal to `other.get_allocator()`, the internal bucket array of `other` is transferred directly to the new container; otherwise, inserts move-constructed copies of the elements of `other`. +If statistics are xref:unordered_flat_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff the final allocator is equal to `other.get_allocator()`, +and always calls `other.reset_stats()`. --- @@ -1364,6 +1389,32 @@ Invalidates iterators, pointers and references, and changes the order of element [horizontal] Throws:;; The function has no effect if an exception is thrown, unless it is thrown by the container's hash function or comparison function. +--- + +=== Statistics + +==== get_stats +```c++ +stats get_stats() const; +``` + +[horizontal] +Returns:;; A statistical description of the insertion and lookup operations performed by the container so far. +Notes:;; Only available if xref:stats[statistics calculation] is xref:unordered_flat_map_boost_unordered_enable_stats[enabled]. + +--- + +==== reset_stats +```c++ +void reset_stats() noexcept; +``` + +[horizontal] +Effects:;; Sets to zero the internal statistics kept by the container. +Notes:;; Only available if xref:stats[statistics calculation] is xref:unordered_flat_map_boost_unordered_enable_stats[enabled]. + +--- + === Deduction Guides A deduction guide will not participate in overload resolution if any of the following are true: diff --git a/doc/unordered/unordered_flat_set.adoc b/doc/unordered/unordered_flat_set.adoc index 48ca0837..caa58586 100644 --- a/doc/unordered/unordered_flat_set.adoc +++ b/doc/unordered/unordered_flat_set.adoc @@ -53,6 +53,8 @@ namespace boost { using iterator = _implementation-defined_; using const_iterator = _implementation-defined_; + using stats = xref:stats_stats_type[__stats-type__]; // if statistics are xref:unordered_flat_set_boost_unordered_enable_stats[enabled] + // construct/copy/destroy xref:#unordered_flat_set_default_constructor[unordered_flat_set](); explicit xref:#unordered_flat_set_bucket_count_constructor[unordered_flat_set](size_type n, @@ -172,6 +174,10 @@ namespace boost { size_type xref:#unordered_flat_set_max_load[max_load]() const noexcept; void xref:#unordered_flat_set_rehash[rehash](size_type n); void xref:#unordered_flat_set_reserve[reserve](size_type n); + + // statistics (if xref:unordered_flat_set_boost_unordered_enable_stats[enabled]) + stats xref:#unordered_flat_set_get_stats[get_stats]() const; + void xref:#unordered_flat_set_reset_stats[reset_stats]() noexcept; }; // Deduction Guides @@ -291,6 +297,15 @@ at the expense of extra computational cost. --- +=== Configuration Macros + +==== `BOOST_UNORDERED_ENABLE_STATS` + +Globally define this macro to enable xref:#stats[statistics calculation] for the container. Note +that this option decreases the overall performance of many operations. + +--- + === Typedefs [source,c++,subs=+quotes] @@ -387,6 +402,8 @@ unordered_flat_set(unordered_flat_set&& other); The move constructor. The internal bucket array of `other` is transferred directly to the new container. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:unordered_flat_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. --- @@ -429,6 +446,9 @@ unordered_flat_set(unordered_flat_set&& other, Allocator const& a); If `a == other.get_allocator()`, the elements of `other` are transferred directly to the new container; otherwise, elements are moved-constructed from those of `other`. The hash function and predicate are moved-constructed from `other`, and the allocator is copy-constructed from `a`. +If statistics are xref:unordered_flat_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff `a == other.get_allocator()`, +and always calls `other.reset_stats()`. --- @@ -441,6 +461,8 @@ unordered_flat_set(concurrent_flat_set&& other); Move construction from a xref:#concurrent_flat_set[`concurrent_flat_set`]. The internal bucket array of `other` is transferred directly to the new container. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:unordered_flat_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. [horizontal] Complexity:;; Constant time. @@ -599,6 +621,9 @@ The move assignment operator. Destroys previously existing elements, swaps the h and move-assigns the allocator from `other` if `Alloc::propagate_on_container_move_assignment` exists and `Alloc::propagate_on_container_move_assignment::value` is `true`. If at this point the allocator is equal to `other.get_allocator()`, the internal bucket array of `other` is transferred directly to the new container; otherwise, inserts move-constructed copies of the elements of `other`. +If statistics are xref:unordered_flat_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff the final allocator is equal to `other.get_allocator()`, +and always calls `other.reset_stats()`. --- @@ -1137,6 +1162,32 @@ Invalidates iterators, pointers and references, and changes the order of element [horizontal] Throws:;; The function has no effect if an exception is thrown, unless it is thrown by the container's hash function or comparison function. +--- + +=== Statistics + +==== get_stats +```c++ +stats get_stats() const; +``` + +[horizontal] +Returns:;; A statistical description of the insertion and lookup operations performed by the container so far. +Notes:;; Only available if xref:stats[statistics calculation] is xref:unordered_flat_set_boost_unordered_enable_stats[enabled]. + +--- + +==== reset_stats +```c++ +void reset_stats() noexcept; +``` + +[horizontal] +Effects:;; Sets to zero the internal statistics kept by the container. +Notes:;; Only available if xref:stats[statistics calculation] is xref:unordered_flat_set_boost_unordered_enable_stats[enabled]. + +--- + === Deduction Guides A deduction guide will not participate in overload resolution if any of the following are true: diff --git a/doc/unordered/unordered_node_map.adoc b/doc/unordered/unordered_node_map.adoc index 8056bb64..f5f7d77f 100644 --- a/doc/unordered/unordered_node_map.adoc +++ b/doc/unordered/unordered_node_map.adoc @@ -57,6 +57,8 @@ namespace boost { using node_type = _implementation-defined_; using insert_return_type = _implementation-defined_; + using stats = xref:stats_stats_type[__stats-type__]; // if statistics are xref:unordered_node_map_boost_unordered_enable_stats[enabled] + // construct/copy/destroy xref:#unordered_node_map_default_constructor[unordered_node_map](); explicit xref:#unordered_node_map_bucket_count_constructor[unordered_node_map](size_type n, @@ -217,6 +219,10 @@ namespace boost { size_type xref:#unordered_node_map_max_load[max_load]() const noexcept; void xref:#unordered_node_map_rehash[rehash](size_type n); void xref:#unordered_node_map_reserve[reserve](size_type n); + + // statistics (if xref:unordered_node_map_boost_unordered_enable_stats[enabled]) + stats xref:#unordered_node_map_get_stats[get_stats]() const; + void xref:#unordered_node_map_reset_stats[reset_stats]() noexcept; }; // Deduction Guides @@ -345,6 +351,15 @@ at the expense of extra computational cost. --- +=== Configuration Macros + +==== `BOOST_UNORDERED_ENABLE_STATS` + +Globally define this macro to enable xref:#stats[statistics calculation] for the container. Note +that this option decreases the overall performance of many operations. + +--- + === Typedefs [source,c++,subs=+quotes] @@ -472,6 +487,8 @@ unordered_node_map(unordered_node_map&& other); The move constructor. The internal bucket array of `other` is transferred directly to the new container. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:unordered_node_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. --- @@ -514,6 +531,9 @@ unordered_node_map(unordered_node_map&& other, Allocator const& a); If `a == other.get_allocator()`, the element nodes of `other` are transferred directly to the new container; otherwise, elements are moved-constructed from those of `other`. The hash function and predicate are moved-constructed from `other`, and the allocator is copy-constructed from `a`. +If statistics are xref:unordered_node_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff `a == other.get_allocator()`, +and always calls `other.reset_stats()`. --- @@ -668,6 +688,9 @@ The move assignment operator. Destroys previously existing elements, swaps the h and move-assigns the allocator from `other` if `Alloc::propagate_on_container_move_assignment` exists and `Alloc::propagate_on_container_move_assignment::value` is `true`. If at this point the allocator is equal to `other.get_allocator()`, the internal bucket array of `other` is transferred directly to the new container; otherwise, inserts move-constructed copies of the elements of `other`. +If statistics are xref:unordered_node_map_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff the final allocator is equal to `other.get_allocator()`, +and always calls `other.reset_stats()`. --- @@ -1451,6 +1474,32 @@ Invalidates iterators and changes the order of elements. [horizontal] Throws:;; The function has no effect if an exception is thrown, unless it is thrown by the container's hash function or comparison function. +--- + +=== Statistics + +==== get_stats +```c++ +stats get_stats() const; +``` + +[horizontal] +Returns:;; A statistical description of the insertion and lookup operations performed by the container so far. +Notes:;; Only available if xref:stats[statistics calculation] is xref:unordered_node_map_boost_unordered_enable_stats[enabled]. + +--- + +==== reset_stats +```c++ +void reset_stats() noexcept; +``` + +[horizontal] +Effects:;; Sets to zero the internal statistics kept by the container. +Notes:;; Only available if xref:stats[statistics calculation] is xref:unordered_node_map_boost_unordered_enable_stats[enabled]. + +--- + === Deduction Guides A deduction guide will not participate in overload resolution if any of the following are true: diff --git a/doc/unordered/unordered_node_set.adoc b/doc/unordered/unordered_node_set.adoc index f5b5fdf2..af1255d5 100644 --- a/doc/unordered/unordered_node_set.adoc +++ b/doc/unordered/unordered_node_set.adoc @@ -52,6 +52,8 @@ namespace boost { using node_type = _implementation-defined_; using insert_return_type = _implementation-defined_; + using stats = xref:stats_stats_type[__stats-type__]; // if statistics are xref:unordered_node_set_boost_unordered_enable_stats[enabled] + // construct/copy/destroy xref:#unordered_node_set_default_constructor[unordered_node_set](); explicit xref:#unordered_node_set_bucket_count_constructor[unordered_node_set](size_type n, @@ -176,6 +178,10 @@ namespace boost { size_type xref:#unordered_node_set_max_load[max_load]() const noexcept; void xref:#unordered_node_set_rehash[rehash](size_type n); void xref:#unordered_node_set_reserve[reserve](size_type n); + + // statistics (if xref:unordered_node_set_boost_unordered_enable_stats[enabled]) + stats xref:#unordered_node_set_get_stats[get_stats]() const; + void xref:#unordered_node_set_reset_stats[reset_stats]() noexcept; }; // Deduction Guides @@ -294,6 +300,15 @@ at the expense of extra computational cost. --- +=== Configuration Macros + +==== `BOOST_UNORDERED_ENABLE_STATS` + +Globally define this macro to enable xref:#stats[statistics calculation] for the container. Note +that this option decreases the overall performance of many operations. + +--- + === Typedefs [source,c++,subs=+quotes] @@ -424,6 +439,8 @@ unordered_node_set(unordered_node_set&& other); The move constructor. The internal bucket array of `other` is transferred directly to the new container. The hash function, predicate and allocator are moved-constructed from `other`. +If statistics are xref:unordered_node_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` and calls `other.reset_stats()`. --- @@ -466,6 +483,9 @@ unordered_node_set(unordered_node_set&& other, Allocator const& a); If `a == other.get_allocator()`, the element nodes of `other` are transferred directly to the new container; otherwise, elements are moved-constructed from those of `other`. The hash function and predicate are moved-constructed from `other`, and the allocator is copy-constructed from `a`. +If statistics are xref:unordered_node_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff `a == other.get_allocator()`, +and always calls `other.reset_stats()`. --- @@ -620,6 +640,9 @@ The move assignment operator. Destroys previously existing elements, swaps the h and move-assigns the allocator from `other` if `Alloc::propagate_on_container_move_assignment` exists and `Alloc::propagate_on_container_move_assignment::value` is `true`. If at this point the allocator is equal to `other.get_allocator()`, the internal bucket array of `other` is transferred directly to the new container; otherwise, inserts move-constructed copies of the elements of `other`. +If statistics are xref:unordered_node_set_boost_unordered_enable_stats[enabled], +transfers the internal statistical information from `other` iff the final allocator is equal to `other.get_allocator()`, +and always calls `other.reset_stats()`. --- @@ -1228,6 +1251,32 @@ Invalidates iterators and changes the order of elements. [horizontal] Throws:;; The function has no effect if an exception is thrown, unless it is thrown by the container's hash function or comparison function. +--- + +=== Statistics + +==== get_stats +```c++ +stats get_stats() const; +``` + +[horizontal] +Returns:;; A statistical description of the insertion and lookup operations performed by the container so far. +Notes:;; Only available if xref:stats[statistics calculation] is xref:unordered_node_set_boost_unordered_enable_stats[enabled]. + +--- + +==== reset_stats +```c++ +void reset_stats() noexcept; +``` + +[horizontal] +Effects:;; Sets to zero the internal statistics kept by the container. +Notes:;; Only available if xref:stats[statistics calculation] is xref:unordered_node_set_boost_unordered_enable_stats[enabled]. + +--- + === Deduction Guides A deduction guide will not participate in overload resolution if any of the following are true: From a527745ff8a9898bd9c7bd8b73999f6dcb3b3741 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Tue, 7 May 2024 20:13:20 +0200 Subject: [PATCH 36/39] added example of stats for well- and ill-behaved hash functions --- benchmark/string_stats.cpp | 342 +++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 benchmark/string_stats.cpp diff --git a/benchmark/string_stats.cpp b/benchmark/string_stats.cpp new file mode 100644 index 00000000..bc7fa909 --- /dev/null +++ b/benchmark/string_stats.cpp @@ -0,0 +1,342 @@ +// Copyright 2021 Peter Dimov. +// Copyright 2023-2024 Joaquin M Lopez Munoz. +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define _SILENCE_CXX17_OLD_ALLOCATOR_MEMBERS_DEPRECATION_WARNING +#define _SILENCE_CXX20_CISO646_REMOVED_WARNING + +#define BOOST_UNORDERED_ENABLE_STATS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std::chrono_literals; + +static void print_time( std::chrono::steady_clock::time_point & t1, char const* label, std::uint32_t s, std::size_t size ) +{ + auto t2 = std::chrono::steady_clock::now(); + + std::cout << label << ": " << ( t2 - t1 ) / 1ms << " ms (s=" << s << ", size=" << size << ")\n"; + + t1 = t2; +} + +constexpr unsigned N = 50'000; +constexpr int K = 10; + +static std::vector indices1, indices2; + +static std::string make_index( unsigned x ) +{ + char buffer[ 64 ]; + std::snprintf( buffer, sizeof(buffer), "pfx_%u_sfx", x ); + + return buffer; +} + +static std::string make_random_index( unsigned x ) +{ + char buffer[ 64 ]; + std::snprintf( buffer, sizeof(buffer), "pfx_%0*d_%u_sfx", x % 8 + 1, 0, x ); + + return buffer; +} + +static void init_indices() +{ + indices1.reserve( N*2+1 ); + indices1.push_back( make_index( 0 ) ); + + for( unsigned i = 1; i <= N*2; ++i ) + { + indices1.push_back( make_index( i ) ); + } + + indices2.reserve( N*2+1 ); + indices2.push_back( make_index( 0 ) ); + + { + boost::detail::splitmix64 rng; + + for( unsigned i = 1; i <= N*2; ++i ) + { + indices2.push_back( make_random_index( static_cast( rng() ) ) ); + } + } +} + +template BOOST_NOINLINE void test_insert( Map& map, std::chrono::steady_clock::time_point & t1 ) +{ + for( unsigned i = 1; i <= N; ++i ) + { + map.insert( { indices1[ i ], i } ); + } + + print_time( t1, "Consecutive insert", 0, map.size() ); + + for( unsigned i = 1; i <= N; ++i ) + { + map.insert( { indices2[ i ], i } ); + } + + print_time( t1, "Random insert", 0, map.size() ); + + std::cout << std::endl; +} + +template BOOST_NOINLINE void test_lookup( Map& map, std::chrono::steady_clock::time_point & t1 ) +{ + std::uint32_t s; + + s = 0; + + for( int j = 0; j < K; ++j ) + { + for( unsigned i = 1; i <= N * 2; ++i ) + { + auto it = map.find( indices1[ i ] ); + if( it != map.end() ) s += it->second; + } + } + + print_time( t1, "Consecutive lookup", s, map.size() ); + + s = 0; + + for( int j = 0; j < K; ++j ) + { + for( unsigned i = 1; i <= N * 2; ++i ) + { + auto it = map.find( indices2[ i ] ); + if( it != map.end() ) s += it->second; + } + } + + print_time( t1, "Random lookup", s, map.size() ); + + std::cout << std::endl; +} + +template BOOST_NOINLINE void test_iteration( Map& map, std::chrono::steady_clock::time_point & t1 ) +{ + auto it = map.begin(); + + while( it != map.end() ) + { + if( it->second & 1 ) + { + if constexpr( std::is_void_v< decltype( map.erase( it ) ) > ) + { + map.erase( it++ ); + } + else + { + it = map.erase( it ); + } + } + else + { + ++it; + } + } + + print_time( t1, "Iterate and erase odd elements", 0, map.size() ); + + std::cout << std::endl; +} + +template BOOST_NOINLINE void test_erase( Map& map, std::chrono::steady_clock::time_point & t1 ) +{ + for( unsigned i = 1; i <= N; ++i ) + { + map.erase( indices1[ i ] ); + } + + print_time( t1, "Consecutive erase", 0, map.size() ); + + for( unsigned i = 1; i <= N; ++i ) + { + map.erase( indices2[ i ] ); + } + + print_time( t1, "Random erase", 0, map.size() ); + + std::cout << std::endl; +} + +// + +// All Unordered container use the same struct +using stats = boost::unordered_flat_map::stats; + +struct record +{ + std::string label_; + long long time_; + stats stats_; +}; + +static std::vector records; + +template class Map> BOOST_NOINLINE void test( char const* label ) +{ + std::cout << label << ":\n\n"; + + Map map; + + auto t0 = std::chrono::steady_clock::now(); + auto t1 = t0; + + test_insert( map, t1 ); + + record rec = { label, 0 }; + + test_lookup( map, t1 ); + test_iteration( map, t1 ); + test_lookup( map, t1 ); + test_erase( map, t1 ); + + auto tN = std::chrono::steady_clock::now(); + std::cout << "Total: " << ( tN - t0 ) / 1ms << " ms\n\n"; + + rec.time_ = ( tN - t0 ) / 1ms; + rec.stats_ = map.get_stats(); + records.push_back( rec ); +} + +// + +template using boost_unordered_flat_map = + boost::unordered_flat_map, std::equal_to>; + +// fnv1a_hash + +template struct fnv1a_hash_impl; + +template<> struct fnv1a_hash_impl<32> +{ + std::size_t operator()( std::string const& s ) const + { + std::size_t h = 0x811C9DC5u; + + char const * first = s.data(); + char const * last = first + s.size(); + + for( ; first != last; ++first ) + { + h ^= static_cast( *first ); + h *= 0x01000193ul; + } + + return h; + } +}; + +template<> struct fnv1a_hash_impl<64> +{ + std::size_t operator()( std::string const& s ) const + { + std::size_t h = 0xCBF29CE484222325ull; + + char const * first = s.data(); + char const * last = first + s.size(); + + for( ; first != last; ++first ) + { + h ^= static_cast( *first ); + h *= 0x00000100000001B3ull; + } + + return h; + } +}; + +struct fnv1a_hash: fnv1a_hash_impl< std::numeric_limits::digits > +{ + using is_avalanching = void; +}; + +template using boost_unordered_flat_map_fnv1a = + boost::unordered_flat_map>; + +// slightly bad hash + +struct slightly_bad_hash +{ + using is_avalanching = void; + + std::size_t operator()( std::string const& s ) const + { + std::size_t h = s.size(); + + for( auto ch: s ) + { + h *= 0x811C9DC4u; // multiplicative factor is even! + h += static_cast( ch ); + } + + return h; + } +}; + +template using boost_unordered_flat_map_slightly_bad_hash = + boost::unordered_flat_map>; + +// bad hash + +struct bad_hash +{ + using is_avalanching = void; + + std::size_t operator()( std::string const& s ) const + { + std::size_t h = s.size(); + + for( auto ch: s ) + { + h *= 31; + h += static_cast( ch ); + } + + return h; + } +}; + +template using boost_unordered_flat_map_bad_hash = + boost::unordered_flat_map>; + +// + +int main() +{ + init_indices(); + + test( "boost::unordered_flat_map" ); + test( "boost::unordered_flat_map, FNV-1a" ); + test( "boost::unordered_flat_map, slightly_bad_hash" ); + test( "boost::unordered_flat_map, bad_hash" ); + + std::cout << "---\n\n"; + + for( auto const& x: records ) + { + std::cout << std::setw( 46 ) << ( x.label_ + ": " ) << std::setw( 5 ) << x.time_ << " ms\n" + << std::setw( 46 ) << "insertion: " + << "probe length " << x.stats_.insertion.probe_length.average << "\n" + << std::setw( 46 ) << "successful lookup: " + << "probe length " << x.stats_.successful_lookup.probe_length.average + << ", num comparisons " << x.stats_.successful_lookup.num_comparisons.average << "\n" + << std::setw( 46 ) << "unsuccessful lookup: " + << "probe length " << x.stats_.unsuccessful_lookup.probe_length.average + << ", num comparisons " << x.stats_.unsuccessful_lookup.num_comparisons.average << "\n"; + } +} From d46e83296ca27cf6fc2f8132d36c1efd0dbd822e Mon Sep 17 00:00:00 2001 From: joaquintides Date: Tue, 7 May 2024 20:13:43 +0200 Subject: [PATCH 37/39] added section on hash quality, avalanching and stats --- doc/unordered.adoc | 1 + doc/unordered/hash_quality.adoc | 156 ++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 doc/unordered/hash_quality.adoc diff --git a/doc/unordered.adoc b/doc/unordered.adoc index 5da1b442..1febb283 100644 --- a/doc/unordered.adoc +++ b/doc/unordered.adoc @@ -15,6 +15,7 @@ include::unordered/buckets.adoc[] include::unordered/hash_equality.adoc[] include::unordered/regular.adoc[] include::unordered/concurrent.adoc[] +include::unordered/hash_quality.adoc[] include::unordered/compliance.adoc[] include::unordered/structures.adoc[] include::unordered/benchmarks.adoc[] diff --git a/doc/unordered/hash_quality.adoc b/doc/unordered/hash_quality.adoc new file mode 100644 index 00000000..ed819bf3 --- /dev/null +++ b/doc/unordered/hash_quality.adoc @@ -0,0 +1,156 @@ +[#hash_quality] += Hash Quality + +:idprefix: hash_quality_ + +In order to work properly, hash tables require that the supplied hash function +be of __good quality__, roughly meaning that it uses its `std::size_t` output +space as uniformly as possible, much like a random number generator would do +—except, of course, that the value of a hash function is not random but strictly determined +by its input argument. + +Closed-addressing containers in Boost.Unordered are fairly robust against +hash functions with less-than-ideal quality, but open-addressing and concurrent +containers are much more sensitive to this factor, and their performance can +degrade dramatically if the hash function is not appropriate. In general, if +you're using functions provided by or generated with link:../../../container_hash/index.html[Boost.Hash^], +the quality will be adequate, but you have to be careful when using alternative +hash algorithms. + +The rest of this section applies only to open-addressing and concurrent containers. + +== Hash Post-mixing and the Avalanching Property + +Even if your supplied hash function is of bad quality, chances are that +the performance of Boost.Unordered containers will be acceptable, because the library +executes an internal __post-mixing__ step that improves the statistical +properties of the calculated hash values. This comes with an extra computational +cost: if you'd like to opt out of post-mixing, annotate your hash function as +follows: + +[source,c++] +---- +struct my_string_hash_function +{ + using is_avalanching = void; // instruct Boost.Unordered to not use post-mixing + + std::size_t operator()(const std::string& x) const + { + ... + } +}; +---- + +By setting the +xref:#hash_traits_hash_is_avalanching[hash_is_avalanching] trait, we inform Boost.Unordered +that `my_string_hash_function` is of sufficient quality to be used directly without +any post-mixing safety net. This comes at the risk of degraded performance in the +cases where the hash function is not as well-behaved as we've declared. + +== Container Statistics + +If we globally define the macro `BOOST_UNORDERED_ENABLE_STATS`, open-addressing and +concurrent containers will calculate some internal statistics directly correlated to the +quality of the hash function: + +[source,c++] +---- +#define BOOST_UNORDERED_ENABLE_STATS +#include + +... + +int main() +{ + boost::unordered_flat_map m; + ... // use m + + auto stats = m.get_stats(); + ... // inspect stats +} +---- + +The `stats` object provide the following information: + +[%noheader, cols="1,1,1,1,~", frame=all, grid=rows] +|=== +|`stats`|||| + +||`.insertion`|||**Insertion operations** + +|||`.count`||Number of operations + +|||`.probe_length`||Probe length per operation + +||||`.average` + +`.variance` + +`.deviation`| + +||`.successful_lookup`|||**Lookup operations (element found)** + +|||`.count`||Number of operations + +|||`.probe_length`||Probe length per operation + +||||`.average` + +`.variance` + +`.deviation`| + +|||`.num_comparisons`||Elements compared to the key per operation + +||||`.average` + +`.variance` + +`.deviation`| + +||`.unsuccessful_lookup`|||**Lookup operations (element not found)** + +|||`.count`||Number of operations + +|||`.probe_length`||Probe length per operation + +||||`.average` + +`.variance` + +`.deviation`| + +|||`.num_comparisons`||Elements compared to the key per operation + +||||`.average` + +`.variance` + +`.deviation`| +|=== + +Statistics for three internal operations are maintained: insertions (without considering +the previous lookup to determine that the key is not present yet), successful lookups +and unsuccessful lookus. _Probe length_ is the number of +xref:#structures_open_addressing_containers[bucket groups] accessed per operation. +If the hash function has good quality: + +* Average probe lengths should be close to 1.0. +* The average number of comparisons per successful lookup should be close to 1.0 (that is, +just the element found is checked). +* The average number of comparisons per unsuccessful lookup should be close to 0.0. + +A link:../../benchmark/string_stats.cpp[example^] is provided that displays container +statistics for `boost::hash`, an implementation of the +https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash[FNV-1a hash^] +and two ill-behaved custom hash functions that have been incorrectly marked as avalanching: + +[listing] +---- + boost::unordered_flat_map: 319 ms + insertion: probe length 1.08771 + successful lookup: probe length 1.06206, num comparisons 1.02121 + unsuccessful lookup: probe length 1.12301, num comparisons 0.0388251 + boost::unordered_flat_map, FNV-1a: 301 ms + insertion: probe length 1.09567 + successful lookup: probe length 1.06202, num comparisons 1.0227 + unsuccessful lookup: probe length 1.12195, num comparisons 0.040527 +boost::unordered_flat_map, slightly_bad_hash: 654 ms + insertion: probe length 1.03443 + successful lookup: probe length 1.04137, num comparisons 6.22152 + unsuccessful lookup: probe length 1.29334, num comparisons 11.0335 + boost::unordered_flat_map, bad_hash: 12216 ms + insertion: probe length 699.218 + successful lookup: probe length 590.183, num comparisons 43.4886 + unsuccessful lookup: probe length 1361.65, num comparisons 75.238 +---- From 93f33c336bb28b262838f435893c75a2c31647e2 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 8 May 2024 11:11:09 +0200 Subject: [PATCH 38/39] typos/editorial --- benchmark/string_stats.cpp | 2 +- doc/unordered/hash_quality.adoc | 91 +++++++++++++++------------------ doc/unordered/rationale.adoc | 2 +- 3 files changed, 42 insertions(+), 53 deletions(-) diff --git a/benchmark/string_stats.cpp b/benchmark/string_stats.cpp index bc7fa909..b3bd3ba6 100644 --- a/benchmark/string_stats.cpp +++ b/benchmark/string_stats.cpp @@ -337,6 +337,6 @@ int main() << ", num comparisons " << x.stats_.successful_lookup.num_comparisons.average << "\n" << std::setw( 46 ) << "unsuccessful lookup: " << "probe length " << x.stats_.unsuccessful_lookup.probe_length.average - << ", num comparisons " << x.stats_.unsuccessful_lookup.num_comparisons.average << "\n"; + << ", num comparisons " << x.stats_.unsuccessful_lookup.num_comparisons.average << "\n\n"; } } diff --git a/doc/unordered/hash_quality.adoc b/doc/unordered/hash_quality.adoc index ed819bf3..28ec19aa 100644 --- a/doc/unordered/hash_quality.adoc +++ b/doc/unordered/hash_quality.adoc @@ -21,11 +21,12 @@ The rest of this section applies only to open-addressing and concurrent containe == Hash Post-mixing and the Avalanching Property -Even if your supplied hash function is of bad quality, chances are that +Even if your supplied hash function does not conform to the uniform behavior +required by open addressing, chances are that the performance of Boost.Unordered containers will be acceptable, because the library executes an internal __post-mixing__ step that improves the statistical properties of the calculated hash values. This comes with an extra computational -cost: if you'd like to opt out of post-mixing, annotate your hash function as +cost; if you'd like to opt out of post-mixing, annotate your hash function as follows: [source,c++] @@ -72,58 +73,43 @@ int main() The `stats` object provide the following information: -[%noheader, cols="1,1,1,1,~", frame=all, grid=rows] -|=== -|`stats`|||| - -||`.insertion`|||**Insertion operations** - -|||`.count`||Number of operations - -|||`.probe_length`||Probe length per operation - -||||`.average` + -`.variance` + -`.deviation`| - -||`.successful_lookup`|||**Lookup operations (element found)** - -|||`.count`||Number of operations - -|||`.probe_length`||Probe length per operation - -||||`.average` + -`.variance` + -`.deviation`| - -|||`.num_comparisons`||Elements compared to the key per operation - -||||`.average` + -`.variance` + -`.deviation`| - -||`.unsuccessful_lookup`|||**Lookup operations (element not found)** - -|||`.count`||Number of operations - -|||`.probe_length`||Probe length per operation - -||||`.average` + -`.variance` + -`.deviation`| - -|||`.num_comparisons`||Elements compared to the key per operation - -||||`.average` + -`.variance` + -`.deviation`| -|=== +[source,subs=+quotes] +---- +stats + .insertion // *Insertion operations* + .count // Number of operations + .probe_length // Probe length per operation + .average + .variance + .deviation + .successful_lookup // *Lookup operations (element found)* + .count // Number of operations + .probe_length // Probe length per operation + .average + .variance + .deviation + .num_comparisons // Elements compared per operation + .average + .variance + .deviation + .unsuccessful_lookup // *Lookup operations (element not found)* + .count // Number of operations + .probe_length // Probe length per operation + .average + .variance + .deviation + .num_comparisons // Elements compared per operation + .average + .variance + .deviation +---- Statistics for three internal operations are maintained: insertions (without considering -the previous lookup to determine that the key is not present yet), successful lookups -and unsuccessful lookus. _Probe length_ is the number of +the previous lookup to determine that the key is not present yet), successful lookups, +and unsuccessful lookups (including those issued internally when inserting elements). +_Probe length_ is the number of xref:#structures_open_addressing_containers[bucket groups] accessed per operation. -If the hash function has good quality: +If the hash function behaves properly: * Average probe lengths should be close to 1.0. * The average number of comparisons per successful lookup should be close to 1.0 (that is, @@ -141,14 +127,17 @@ and two ill-behaved custom hash functions that have been incorrectly marked as a insertion: probe length 1.08771 successful lookup: probe length 1.06206, num comparisons 1.02121 unsuccessful lookup: probe length 1.12301, num comparisons 0.0388251 + boost::unordered_flat_map, FNV-1a: 301 ms insertion: probe length 1.09567 successful lookup: probe length 1.06202, num comparisons 1.0227 unsuccessful lookup: probe length 1.12195, num comparisons 0.040527 + boost::unordered_flat_map, slightly_bad_hash: 654 ms insertion: probe length 1.03443 successful lookup: probe length 1.04137, num comparisons 6.22152 unsuccessful lookup: probe length 1.29334, num comparisons 11.0335 + boost::unordered_flat_map, bad_hash: 12216 ms insertion: probe length 699.218 successful lookup: probe length 590.183, num comparisons 43.4886 diff --git a/doc/unordered/rationale.adoc b/doc/unordered/rationale.adoc index 256800ab..a531875f 100644 --- a/doc/unordered/rationale.adoc +++ b/doc/unordered/rationale.adoc @@ -102,7 +102,7 @@ and *high* and *low* are the upper and lower halves of an extended word, respect In 64-bit architectures, _C_ is the integer part of 2^64^∕https://en.wikipedia.org/wiki/Golden_ratio[_φ_], whereas in 32 bits _C_ = 0xE817FB2Du has been obtained from https://arxiv.org/abs/2001.05304[Steele and Vigna (2021)^]. -When using a hash function directly suitable for open addressing, post-mixing can be opted out by via a dedicated <>trait. +When using a hash function directly suitable for open addressing, post-mixing can be opted out of via a dedicated <>trait. `boost::hash` specializations for string types are marked as avalanching. === Platform Interoperability From 475acdc351133a1e47364f43401f2fdb1cf25706 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Wed, 8 May 2024 11:43:11 +0200 Subject: [PATCH 39/39] typo --- doc/unordered/hash_quality.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/unordered/hash_quality.adoc b/doc/unordered/hash_quality.adoc index 28ec19aa..5024f3f1 100644 --- a/doc/unordered/hash_quality.adoc +++ b/doc/unordered/hash_quality.adoc @@ -71,7 +71,7 @@ int main() } ---- -The `stats` object provide the following information: +The `stats` object provides the following information: [source,subs=+quotes] ----