From bea92e8842f65de6243ec4041d7337f7b339694e Mon Sep 17 00:00:00 2001 From: Daniel James Date: Sat, 16 Nov 2013 20:36:27 +0000 Subject: [PATCH] Merge unordered and hash from trunk. - Only use Visual C++ pragma with appropriate compilers. - Working link for Thomas Wang's hash function. - Updated unordered rationale. - Fix `unnecessary_copy_tests` for Visual C++ 12. - Some extra insert tests. [SVN r86728] --- doc/rationale.qbk | 21 +++++++----- test/unordered/insert_tests.cpp | 39 +++++++++++++++++++++++ test/unordered/unnecessary_copy_tests.cpp | 2 +- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/doc/rationale.qbk b/doc/rationale.qbk index 90d982e4..788cabc8 100644 --- a/doc/rationale.qbk +++ b/doc/rationale.qbk @@ -3,7 +3,7 @@ / file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) ] [def __wang__ - [@http://www.concentric.net/~Ttwang/tech/inthash.htm + [@http://web.archive.org/web/20121102023700/http://www.concentric.net/~Ttwang/tech/inthash.htm Thomas Wang's article on integer hash functions]] [section:rationale Implementation Rationale] @@ -85,7 +85,8 @@ of 2. Using a prime number of buckets, and choosing a bucket by using the modulus of the hash function's result will usually give a good result. The downside -is that the required modulus operation is fairly expensive. +is that the required modulus operation is fairly expensive. This is what the +containers do in most cases. Using a power of 2 allows for much quicker selection of the bucket to use, but at the expense of loosing the upper bits of the hash value. @@ -95,12 +96,16 @@ functions this can't be relied on. To avoid this a transformation could be applied to the hash function, for an example see __wang__. Unfortunately, a transformation like Wang's requires -knowledge of the number of bits in the hash value, so it isn't portable enough. -This leaves more expensive methods, such as Knuth's Multiplicative Method -(mentioned in Wang's article). These don't tend to work as well as taking the -modulus of a prime, and the extra computation required might negate -efficiency advantage of power of 2 hash tables. +knowledge of the number of bits in the hash value, so it isn't portable enough +to use as a default. It can applicable in certain cases so the containers +have a policy based implementation that can use this alternative technique. -So, this implementation uses a prime number for the hash table size. +Currently this is only done on 64 bit architecures, where prime number +modulus can be expensive. Although this varies depending on the architecture, +so I probably should revisit it. + +I'm also thinking of introducing a mechanism whereby a hash function can +indicate that it's safe to be used directly with power of 2 buckets, in +which case a faster plain power of 2 implementation can be used. [endsect] diff --git a/test/unordered/insert_tests.cpp b/test/unordered/insert_tests.cpp index 66469ba3..59911e12 100644 --- a/test/unordered/insert_tests.cpp +++ b/test/unordered/insert_tests.cpp @@ -576,6 +576,21 @@ UNORDERED_TEST(map_insert_range_test2, #if !defined(BOOST_NO_CXX11_HDR_INITIALIZER_LIST) +struct initialize_from_two_ints +{ + int a, b; + + friend std::size_t hash_value(initialize_from_two_ints const& x) + { + return x.a + x.b; + } + + bool operator==(initialize_from_two_ints const& x) const + { + return a == x.a && b == x.b; + } +}; + UNORDERED_AUTO_TEST(insert_initializer_list_set) { boost::unordered_set set; @@ -583,6 +598,30 @@ UNORDERED_AUTO_TEST(insert_initializer_list_set) BOOST_TEST_EQ(set.size(), 3u); BOOST_TEST(set.find(1) != set.end()); BOOST_TEST(set.find(4) == set.end()); + + boost::unordered_set set2; + + set2.insert({1, 2}); + BOOST_TEST(set2.size() == 1); + BOOST_TEST(set2.find({1,2}) != set2.end()); + BOOST_TEST(set2.find({2,1}) == set2.end()); + + set2.insert({{3,4},{5,6},{7,8}}); + BOOST_TEST(set2.size() == 4); + BOOST_TEST(set2.find({1,2}) != set2.end()); + BOOST_TEST(set2.find({3,4}) != set2.end()); + BOOST_TEST(set2.find({5,6}) != set2.end()); + BOOST_TEST(set2.find({7,8}) != set2.end()); + BOOST_TEST(set2.find({8,7}) == set2.end()); + + set2.insert({{2, 1}, {3,4}}); + BOOST_TEST(set2.size() == 5); + BOOST_TEST(set2.find({1,2}) != set2.end()); + BOOST_TEST(set2.find({2,1}) != set2.end()); + BOOST_TEST(set2.find({3,4}) != set2.end()); + BOOST_TEST(set2.find({5,6}) != set2.end()); + BOOST_TEST(set2.find({7,8}) != set2.end()); + BOOST_TEST(set2.find({8,7}) == set2.end()); } UNORDERED_AUTO_TEST(insert_initializer_list_multiset) diff --git a/test/unordered/unnecessary_copy_tests.cpp b/test/unordered/unnecessary_copy_tests.cpp index e33f999c..2c64bd7c 100644 --- a/test/unordered/unnecessary_copy_tests.cpp +++ b/test/unordered/unnecessary_copy_tests.cpp @@ -374,7 +374,7 @@ namespace unnecessary_copy_tests // COPY_COUNT(1) would be okay here. reset(); x.emplace(); -# if BOOST_WORKAROUND(BOOST_MSVC, >= 1700) +# if BOOST_WORKAROUND(BOOST_MSVC, == 1700) // This is a little odd, Visual C++ 11 seems to move the pair, which // results in one copy (for the const key) and one move (for the // non-const mapped value). Since 'emplace(boost::move(a))' (see below)