From 75a37c2616595d7ef49f83a22cfd1c6988cd9879 Mon Sep 17 00:00:00 2001 From: Peter Dimov Date: Mon, 19 Sep 2022 11:56:03 +0300 Subject: [PATCH] Update Reference section --- doc/hash/reference.adoc | 76 +++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/doc/hash/reference.adoc b/doc/hash/reference.adoc index 6f6cac9..4a6a0c5 100644 --- a/doc/hash/reference.adoc +++ b/doc/hash/reference.adoc @@ -142,9 +142,9 @@ template struct hash std::size_t operator()( T const& v ) const; ---- -Returns: :: `hash_value( v )`. +Returns: :: `hash_value(v)`. -Throws: :: Only throws if `hash_value( v )` throws. +Throws: :: Only throws if `hash_value(v)` throws. Remarks: :: The call to `hash_value` is unqualified, so that user-supplied overloads will be found via argument dependent lookup. @@ -159,44 +159,59 @@ template void hash_combine( std::size_t& seed, T const& v ); Called repeatedly to incrementally create a hash value from several variables. Effects: :: Updates `seed` with a new hash value generated by - deterministically combining it with the result of `boost::hash()( v )`. + deterministically combining it with the result of `boost::hash()(v)`. -Throws: :: Only throws if `boost::hash()( v )` throws. On exception, +Throws: :: Only throws if `boost::hash()(v)` throws. On exception, `seed` is not updated. Remarks: :: + -- -Equivalent to `seed = combine( seed, boost::hash()( v ) )`, +Equivalent to `seed = combine(seed, boost::hash()(v))`, where `combine(s, v)` is a mixing function that takes two arguments of type `std::size_t` and returns `std::size_t`, with the following desirable properties: -* For a constant `s`, when `v` takes all possible `size_t` values, +. For a constant `s`, when `v` takes all possible `size_t` values, `combine(s, v)` should also take all possible `size_t` values, producing a sequence that is close to random; that is, it should be a random permutation. + This guarantees that for a given `seed`, `combine` does not introduce -hash collisions when none were produced by `boost::hash( v )`. It -also implies that `combine(s, v)`, as a function of `v`, has good avalanche -properties; that is, small (e.g. single bit) perturbations in the input `v` -lead to large perturbations in the return value (half of the output bits -changing, on average). +hash collisions when none were produced by `boost::hash(v)`; that is, +it does not lose information from the input. It also implies that +`combine(s, v)`, as a function of `v`, has good avalanche properties; +that is, small (e.g. single bit) perturbations in the input `v` lead to +large perturbations in the return value (half of the output bits changing, +on average). -* For two different seeds `s1` and `s2`, `combine(s1, v)` and +. For two different seeds `s1` and `s2`, `combine(s1, v)` and `combine(s2, v)`, treated as functions of `v`, should produce two different random permutations. -* `combine(0, 0)` should not be 0. Since a common initial value of `seed` +. `combine(0, 0)` should not be 0. Since a common initial value of `seed` is zero, `combine(0, 0) == 0` would imply that applying `hash_combine` on any sequence of zeroes, regardless of length, will produce zero. This is undesirable, as it would lead to e.g. `std::vector()` and `std::vector(4)` to have the same hash value. The current implementation uses the function `mix(s + 0x9e3779b9 + v)` as -`combine(s, v)`, where `mix` is a high quality random permutation over the -`std::size_t` values (with the property that `mix(0)` is 0). +`combine(s, v)`, where `mix(x)` is a high quality mixing function that is a +bijection over the `std::size_t` values, of the form + +[source] +---- +x ^= x >> k1; +x *= m1; +x ^= x >> k2; +x *= m2; +x ^= x >> k3; +---- + +where the constants `k1`, `k2`, `k3`, `m1`, `m2` are suitably chosen. + +Note that `mix(0)` is 0. This is why we add the arbitrary constant +`0x9e3779b9` to meet the third requirement above. -- === hash_range @@ -208,14 +223,23 @@ template void hash_range( std::size_t& seed, It first, It last ); Effects: :: + +-- +When `typename std::iterator_traits::value_type` is not `char`, `signed char`, +`unsigned char`, + [source] ---- for( ; first != last; ++first ) { - hash_combine::value_type>( seed, *first ); + boost::hash_combine::value_type>( seed, *first ); } ---- +Otherwise, bytes from `[first, last)` are coalesced in an unspecified manner +and then passed to `hash_combine`, more than one at a time. This is done in +order to improve performance when hashing strings. +-- + [source] ---- template std::size_t hash_range( It first, It last ); @@ -226,7 +250,7 @@ Effects: :: [source] ---- size_t seed = 0; -hash_range( seed, first, last ); +boost::hash_range( seed, first, last ); return seed; ---- @@ -238,7 +262,7 @@ template void hash_unordered_range( std::size_t& seed, It first, It la ---- Effects: :: Updates `seed` with the values of - `boost::hash::value_type>()( *i )` + `boost::hash::value_type>()(*i)` for each `i` in `[first, last)`, such that the order of elements does not affect the final result. @@ -252,7 +276,7 @@ Effects: :: [source] ---- size_t seed = 0; -hash_unordered_range( seed, first, last ); +boost::hash_unordered_range( seed, first, last ); return seed; ---- @@ -282,7 +306,7 @@ Returns: :: `static_cast(v)`. Remarks: :: -`hash_value( std::to_underlying(v) )` would be better, but {cpp}03 +`hash_value(std::to_underlying(v))` would be better, but {cpp}03 compatibility mandates the current implementation. [source] @@ -315,7 +339,7 @@ template ---- Returns: :: -`hash_range( v, v + N )`. +`boost::hash_range( v, v + N )`. [source] ---- @@ -384,7 +408,7 @@ template ---- Returns: :: -`hash_range( v.begin(), v.end() )`. +`boost::hash_range( v.begin(), v.end() )`. Remarks: :: This overload is only enabled when @@ -402,7 +426,7 @@ template ---- Returns: :: -`hash_range( v.data(), v.data() + v.size() )`. +`boost::hash_range( v.data(), v.data() + v.size() )`. Remarks: :: This overload handles all standard contiguous containers, such as @@ -416,7 +440,7 @@ template ---- Returns: :: -`hash_unordered_range( v.begin(), v.end() )`. +`boost::hash_unordered_range( v.begin(), v.end() )`. Remarks: :: This overload handles the standard unordered containers, such as @@ -432,7 +456,7 @@ template ---- Returns: :: -`hash( v.get() )`. +`boost::hash( v.get() )`. [source] ---- @@ -468,7 +492,7 @@ template Returns: :: For a disengaged `v`, an unspecified constant value; otherwise, -`hash()( *v )`. +`boost::hash()( *v )`. [source] ----