From baa5f0bbe70c42b3895211ef2230a265f24cfdaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Sat, 19 Mar 2016 12:20:15 +0100 Subject: [PATCH 01/16] Added adaptive sort test and refactored utilities between adaptive benches and the new test --- proj/vc7ide/Move.sln | 8 ++ proj/vc7ide/adaptive_sort_test.vcproj | 134 +++++++++++++++++++++ test/adaptive_sort_test.cpp | 91 ++++++++++++++ test/bench_merge.cpp | 163 ++++++++----------------- test/bench_sort.cpp | 164 ++++++++------------------ test/order_type.hpp | 82 +++++++++++++ 6 files changed, 412 insertions(+), 230 deletions(-) create mode 100644 proj/vc7ide/adaptive_sort_test.vcproj create mode 100644 test/adaptive_sort_test.cpp create mode 100644 test/order_type.hpp diff --git a/proj/vc7ide/Move.sln b/proj/vc7ide/Move.sln index 361ebae..905c497 100644 --- a/proj/vc7ide/Move.sln +++ b/proj/vc7ide/Move.sln @@ -119,6 +119,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bench_merge", "bench_merge. ProjectSection(ProjectDependencies) = postProject EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "adaptive_sort_test", "adaptive_sort_test.vcproj", "{CD617A28-6217-B79E-4CE2-6BA035379A6A}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject Global GlobalSection(SolutionConfiguration) = preSolution Debug = Debug @@ -247,6 +251,10 @@ Global {CD2617A8-6217-9EB7-24CE-6C9AA035376A}.Debug.Build.0 = Debug|Win32 {CD2617A8-6217-9EB7-24CE-6C9AA035376A}.Release.ActiveCfg = Release|Win32 {CD2617A8-6217-9EB7-24CE-6C9AA035376A}.Release.Build.0 = Release|Win32 + {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Debug.ActiveCfg = Debug|Win32 + {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Debug.Build.0 = Debug|Win32 + {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Release.ActiveCfg = Release|Win32 + {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Release.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionItems) = postSolution ..\..\..\..\boost\move\algo\adaptive_merge.hpp = ..\..\..\..\boost\move\algo\adaptive_merge.hpp diff --git a/proj/vc7ide/adaptive_sort_test.vcproj b/proj/vc7ide/adaptive_sort_test.vcproj new file mode 100644 index 0000000..7600c88 --- /dev/null +++ b/proj/vc7ide/adaptive_sort_test.vcproj @@ -0,0 +1,134 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/adaptive_sort_test.cpp b/test/adaptive_sort_test.cpp new file mode 100644 index 0000000..4f44eeb --- /dev/null +++ b/test/adaptive_sort_test.cpp @@ -0,0 +1,91 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2015-2016. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// + +#include //std::srand +#include //std::next_permutation +#include //std::cout + +#include + +#include +#include +#include + +using boost::timer::cpu_timer; +using boost::timer::cpu_times; +using boost::timer::nanosecond_type; + +#include "order_type.hpp" + +#include +#include + + +template +void adaptive_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen) +{ + boost::movelib::unique_ptr mem(new char[sizeof(T)*BufLen]); + boost::movelib::adaptive_sort(elements, elements + element_count, comp, reinterpret_cast(mem.get()), BufLen); +} + +template +bool test_all_permutations(std::size_t const element_count, std::size_t const num_keys, std::size_t const num_iter) +{ + boost::movelib::unique_ptr elements(new T[element_count]); + boost::movelib::unique_ptr key_reps(new std::size_t[num_keys ? num_keys : element_count]); + std::cout << "- - N: " << element_count << ", Keys: " << num_keys << ", It: " << num_iter << " \n"; + + //Initialize keys + for(std::size_t i=0; i < element_count; ++i){ + std::size_t key = num_keys ? (i % num_keys) : i; + elements[i].key=key; + } + + std::srand(255); + + for (std::size_t i = 0; i != num_iter; ++i) + { + std::random_shuffle(elements.get(), elements.get() + element_count); + for(std::size_t i = 0; i < (num_keys ? num_keys : element_count); ++i){ + key_reps[i]=0; + } + for(std::size_t i = 0; i < element_count; ++i){ + elements[i].val = key_reps[elements[i].key]++; + } + + boost::container::vector tmp(elements.get(), elements.get()+element_count); + + boost::movelib::adaptive_sort(tmp.data(), tmp.data()+element_count, order_type_less()); + + if (!is_order_type_ordered(tmp.data(), element_count)) + { + std::cout << "\n ERROR\n"; + throw int(0); + } + } + return true; +} + +int main() +{ + #ifdef NDEBUG + const std::size_t NIter = 100; + #else + const std::size_t NIter = 10; + #endif + test_all_permutations(10001, 65, NIter); + test_all_permutations(10001, 101, NIter); + test_all_permutations(10001, 1023, NIter); + test_all_permutations(10001, 4095, NIter); + test_all_permutations(10001, 0, NIter); + + return 0; +} diff --git a/test/bench_merge.cpp b/test/bench_merge.cpp index 2ee980b..d9d3272 100644 --- a/test/bench_merge.cpp +++ b/test/bench_merge.cpp @@ -18,62 +18,18 @@ #include #include +#include "order_type.hpp" + using boost::timer::cpu_timer; using boost::timer::cpu_times; using boost::timer::nanosecond_type; - -boost::ulong_long_type num_copy; -boost::ulong_long_type num_elements; - -struct merged_type -{ - public: - std::size_t key; - std::size_t val; - - merged_type() - { - ++num_elements; - } - - merged_type(const merged_type& other) - : key(other.key), val(other.val) - { - ++num_elements; - ++num_copy; - } - - merged_type & operator=(const merged_type& other) - { - ++num_copy; - key = other.key; - val = other.val; - return *this; - } - - ~merged_type () - { - --num_elements; - } -}; - -boost::ulong_long_type num_compare; - //#define BOOST_MOVE_ADAPTIVE_SORT_STATS void print_stats(const char *str, boost::ulong_long_type element_count) { - std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(num_compare)/element_count, double(num_copy)/element_count ); + std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_type::num_compare)/element_count, double(order_type::num_copy)/element_count ); } - -template -struct counted_less -{ - bool operator()(const T &a,T const &b) const - { ++num_compare; return a.key < b.key; } -}; - #include #include #include @@ -101,21 +57,7 @@ std::size_t generate_elements(T elements[], std::size_t element_count, std::size return split_count; } -template -bool test_order(T *elements, std::size_t element_count, bool stable = true) -{ - for(std::size_t i = 1; i < element_count; ++i){ - if(counted_less()(elements[i], elements[i-1])){ - std::printf("\n Ord KO !!!!"); - return false; - } - if( stable && !(counted_less()(elements[i-1], elements[i])) && (elements[i-1].val > elements[i].val) ){ - std::printf("\n Stb KO !!!! "); - return false; - } - } - return true; -} + template void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) @@ -150,53 +92,53 @@ BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); template bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count, std::size_t key_len, unsigned alg, nanosecond_type &prev_clock) { - std::size_t const split_pos = generate_elements(elements, element_count, key_reps, key_len, counted_less()); + std::size_t const split_pos = generate_elements(elements, element_count, key_reps, key_len, order_type_less()); std::printf("%s ", AlgoNames[alg]); - num_compare=0; - num_copy=0; - num_elements = element_count; + order_type::num_compare=0; + order_type::num_copy=0; + order_type::num_elements = element_count; cpu_timer timer; timer.resume(); switch(alg) { case InplaceMerge: - std::inplace_merge(elements, elements+split_pos, elements+element_count, counted_less()); + std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); break; case AdaptiveMerge: - boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, counted_less()); + boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); break; case SqrtHAdaptiveMerge: - adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, counted_less() + adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); break; case SqrtAdaptiveMerge: - adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, counted_less() + adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; case Sqrt2AdaptiveMerge: - adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, counted_less() + adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; case QuartAdaptiveMerge: - adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, counted_less() + adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , (element_count-1)/4+1); break; case BuflessMerge: - boost::movelib::merge_bufferless(elements, elements+split_pos, elements+element_count, counted_less()); + boost::movelib::merge_bufferless(elements, elements+split_pos, elements+element_count, order_type_less()); break; } timer.stop(); - if(num_elements == element_count){ + if(order_type::num_elements == element_count){ std::printf(" Tmp Ok "); } else{ std::printf(" Tmp KO "); } nanosecond_type new_clock = timer.elapsed().wall; - //std::cout << "Cmp:" << num_compare << " Cpy:" << num_copy; //for old compilers without ll size argument - std::printf("Cmp:%8.04f Cpy:%9.04f", double(num_compare)/element_count, double(num_copy)/element_count ); + //std::cout << "Cmp:" << order_type::num_compare << " Cpy:" << order_type::num_copy; //for old compilers without ll size argument + std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_type::num_compare)/element_count, double(order_type::num_copy)/element_count ); double time = double(new_clock); @@ -219,7 +161,7 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count , units , prev_clock ? double(new_clock)/double(prev_clock): 1.0); prev_clock = new_clock; - bool res = test_order(elements, element_count, true); + bool res = is_order_type_ordered(elements, element_count, true); return res; } @@ -261,62 +203,55 @@ bool measure_all(std::size_t L, std::size_t NK) return res; } -struct less -{ - template - bool operator()(const T &t, const U &u) - { return t < u; } -}; - //Undef it to run the long test #define BENCH_MERGE_SHORT int main() { try{ - measure_all(101,1); - measure_all(101,7); - measure_all(101,31); - measure_all(101,0); + measure_all(101,1); + measure_all(101,7); + measure_all(101,31); + measure_all(101,0); // - measure_all(1101,1); - measure_all(1001,7); - measure_all(1001,31); - measure_all(1001,127); - measure_all(1001,511); - measure_all(1001,0); + measure_all(1101,1); + measure_all(1001,7); + measure_all(1001,31); + measure_all(1001,127); + measure_all(1001,511); + measure_all(1001,0); // #ifndef BENCH_MERGE_SHORT - measure_all(10001,65); - measure_all(10001,255); - measure_all(10001,1023); - measure_all(10001,4095); - measure_all(10001,0); + measure_all(10001,65); + measure_all(10001,255); + measure_all(10001,1023); + measure_all(10001,4095); + measure_all(10001,0); // - measure_all(100001,511); - measure_all(100001,2047); - measure_all(100001,8191); - measure_all(100001,32767); - measure_all(100001,0); + measure_all(100001,511); + measure_all(100001,2047); + measure_all(100001,8191); + measure_all(100001,32767); + measure_all(100001,0); // #ifdef NDEBUG - measure_all(1000001,1); - measure_all(1000001,1024); - measure_all(1000001,32768); - measure_all(1000001,524287); - measure_all(1000001,0); - measure_all(1500001,0); - //measure_all(10000001,0); - //measure_all(15000001,0); - //measure_all(100000001,0); + measure_all(1000001,1); + measure_all(1000001,1024); + measure_all(1000001,32768); + measure_all(1000001,524287); + measure_all(1000001,0); + measure_all(1500001,0); + //measure_all(10000001,0); + //measure_all(15000001,0); + //measure_all(100000001,0); #endif //NDEBUG #endif //#ifndef BENCH_MERGE_SHORT - //measure_all(100000001,0); + //measure_all(100000001,0); } catch(...) { diff --git a/test/bench_sort.cpp b/test/bench_sort.cpp index 6fe30e8..4945efa 100644 --- a/test/bench_sort.cpp +++ b/test/bench_sort.cpp @@ -23,58 +23,15 @@ using boost::timer::cpu_timer; using boost::timer::cpu_times; using boost::timer::nanosecond_type; - -boost::ulong_long_type num_copy; -boost::ulong_long_type num_elements; - -struct sorted_type -{ - public: - std::size_t key; - std::size_t val; - - sorted_type() - { - ++num_elements; - } - - sorted_type(const sorted_type& other) - : key(other.key), val(other.val) - { - ++num_elements; - ++num_copy; - } - - sorted_type & operator=(const sorted_type& other) - { - ++num_copy; - key = other.key; - val = other.val; - return *this; - } - - ~sorted_type () - { - --num_elements; - } -}; - -boost::ulong_long_type num_compare; +#include "order_type.hpp" //#define BOOST_MOVE_ADAPTIVE_SORT_STATS void print_stats(const char *str, boost::ulong_long_type element_count) { - std::printf("%sCmp:%7.03f Cpy:%8.03f\n", str, double(num_compare)/element_count, double(num_copy)/element_count ); + std::printf("%sCmp:%7.03f Cpy:%8.03f\n", str, double(order_type::num_compare)/element_count, double(order_type::num_copy)/element_count ); } -template -struct counted_less -{ - bool operator()(const T &a,T const &b) const - { ++num_compare; return a.key < b.key; } -}; - #include #include #include @@ -99,22 +56,6 @@ void generate_elements(T elements[], std::size_t element_count, std::size_t key_ } } -template -bool test_order(T *elements, std::size_t element_count, bool stable = true) -{ - for(std::size_t i = 1; i < element_count; ++i){ - if(counted_less()(elements[i], elements[i-1])){ - std::printf("\n Ord KO !!!!"); - return false; - } - if( stable && !(counted_less()(elements[i-1], elements[i])) && (elements[i-1].val > elements[i].val) ){ - std::printf("\n Stb KO !!!! "); - return false; - } - } - return true; -} - template void adaptive_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen) { @@ -164,60 +105,60 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count generate_elements(elements, element_count, key_reps, key_len); std::printf("%s ", AlgoNames[alg]); - num_compare=0; - num_copy=0; - num_elements = element_count; + order_type::num_compare=0; + order_type::num_copy=0; + order_type::num_elements = element_count; cpu_timer timer; timer.resume(); switch(alg) { case MergeSort: - merge_sort_buffered(elements, element_count, counted_less()); + merge_sort_buffered(elements, element_count, order_type_less()); break; case StableSort: - std::stable_sort(elements,elements+element_count,counted_less()); + std::stable_sort(elements,elements+element_count,order_type_less()); break; case AdaptiveSort: - boost::movelib::adaptive_sort(elements, elements+element_count, counted_less()); + boost::movelib::adaptive_sort(elements, elements+element_count, order_type_less()); break; case SqrtHAdaptiveSort: - adaptive_sort_buffered( elements, element_count, counted_less() + adaptive_sort_buffered( elements, element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); break; case SqrtAdaptiveSort: - adaptive_sort_buffered( elements, element_count, counted_less() + adaptive_sort_buffered( elements, element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; case Sqrt2AdaptiveSort: - adaptive_sort_buffered( elements, element_count, counted_less() + adaptive_sort_buffered( elements, element_count, order_type_less() , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; case QuartAdaptiveSort: - adaptive_sort_buffered( elements, element_count, counted_less() + adaptive_sort_buffered( elements, element_count, order_type_less() , (element_count-1)/4+1); break; case NoBufMergeSort: - boost::movelib::bufferless_merge_sort(elements, elements+element_count, counted_less()); + boost::movelib::bufferless_merge_sort(elements, elements+element_count, order_type_less()); break; case SlowStableSort: - boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, counted_less()); + boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less()); break; case HeapSort: - std::make_heap(elements, elements+element_count, counted_less()); - std::sort_heap(elements, elements+element_count, counted_less()); + std::make_heap(elements, elements+element_count, order_type_less()); + std::sort_heap(elements, elements+element_count, order_type_less()); break; } timer.stop(); - if(num_elements == element_count){ + if(order_type::num_elements == element_count){ std::printf(" Tmp Ok "); } else{ std::printf(" Tmp KO "); } nanosecond_type new_clock = timer.elapsed().wall; - //std::cout << "Cmp:" << num_compare << " Cpy:" << num_copy; //for old compilers without ll size argument - std::printf("Cmp:%7.03f Cpy:%8.03f", double(num_compare)/element_count, double(num_copy)/element_count ); + //std::cout << "Cmp:" << order_type::num_compare << " Cpy:" << order_type::num_copy; //for old compilers without ll size argument + std::printf("Cmp:%7.03f Cpy:%8.03f", double(order_type::num_compare)/element_count, double(order_type::num_copy)/element_count ); double time = double(new_clock); @@ -240,7 +181,7 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count , units , prev_clock ? double(new_clock)/double(prev_clock): 1.0); prev_clock = new_clock; - bool res = test_order(elements, element_count, alg != HeapSort && alg != NoBufMergeSort); + bool res = is_order_type_ordered(elements, element_count, alg != HeapSort && alg != NoBufMergeSort); return res; } @@ -294,58 +235,49 @@ bool measure_all(std::size_t L, std::size_t NK) //Undef it to run the long test #define BENCH_SORT_SHORT -struct less -{ - template - bool operator()(const T &t, const U &u) - { return t < u; } -}; - - int main() { - measure_all(101,1); - measure_all(101,7); - measure_all(101,31); - measure_all(101,0); + measure_all(101,1); + measure_all(101,7); + measure_all(101,31); + measure_all(101,0); // - measure_all(1101,1); - measure_all(1001,7); - measure_all(1001,31); - measure_all(1001,127); - measure_all(1001,511); - measure_all(1001,0); + measure_all(1101,1); + measure_all(1001,7); + measure_all(1001,31); + measure_all(1001,127); + measure_all(1001,511); + measure_all(1001,0); // #ifndef BENCH_SORT_SHORT - measure_all(10001,65); - measure_all(10001,255); - measure_all(10001,1023); - measure_all(10001,4095); - measure_all(10001,0); + measure_all(10001,65); + measure_all(10001,255); + measure_all(10001,1023); + measure_all(10001,4095); + measure_all(10001,0); // - measure_all(100001,511); - measure_all(100001,2047); - measure_all(100001,8191); - measure_all(100001,32767); - measure_all(100001,0); + measure_all(100001,511); + measure_all(100001,2047); + measure_all(100001,8191); + measure_all(100001,32767); + measure_all(100001,0); // #ifdef NDEBUG - measure_all(1000001,1); - measure_all(1000001,1024); - measure_all(1000001,32768); - measure_all(1000001,524287); - measure_all(1000001,0); - measure_all(1500001,0); - //measure_all(10000001,0); + measure_all(1000001,1); + measure_all(1000001,1024); + measure_all(1000001,32768); + measure_all(1000001,524287); + measure_all(1000001,0); + measure_all(1500001,0); + //measure_all(10000001,0); #endif //NDEBUG #endif //#ifndef BENCH_SORT_SHORT - //measure_all(100000001,0); + //measure_all(100000001,0); return 0; } - diff --git a/test/order_type.hpp b/test/order_type.hpp new file mode 100644 index 0000000..5953cb4 --- /dev/null +++ b/test/order_type.hpp @@ -0,0 +1,82 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2015-2016. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// + +#ifndef BOOST_MOVE_TEST_ORDER_TYPE_HPP +#define BOOST_MOVE_TEST_ORDER_TYPE_HPP + +#include +#include +#include + +struct order_type +{ + public: + std::size_t key; + std::size_t val; + + order_type() + { + ++num_elements; + } + + order_type(const order_type& other) + : key(other.key), val(other.val) + { + ++num_elements; + ++num_copy; + } + + order_type & operator=(const order_type& other) + { + ++num_copy; + key = other.key; + val = other.val; + return *this; + } + + ~order_type () + { + --num_elements; + } + + static boost::ulong_long_type num_compare; + static boost::ulong_long_type num_copy; + static boost::ulong_long_type num_elements; +}; + +boost::ulong_long_type order_type::num_compare = 0; +boost::ulong_long_type order_type::num_copy = 0; +boost::ulong_long_type order_type::num_elements = 0; + +template +struct order_type_less +{ + bool operator()(const T &a,T const &b) const + { ++order_type::num_compare; return a.key < b.key; } +}; + +template +inline bool is_order_type_ordered(T *elements, std::size_t element_count, bool stable = true) +{ + for(std::size_t i = 1; i < element_count; ++i){ + if(order_type_less()(elements[i], elements[i-1])){ + std::printf("\n Ord KO !!!!"); + return false; + } + if( stable && !(order_type_less()(elements[i-1], elements[i])) && (elements[i-1].val > elements[i].val) ){ + std::printf("\n Stb KO !!!! "); + return false; + } + } + return true; +} + +#endif //BOOST_MOVE_TEST_ORDER_TYPE_HPP From f86a3a40bb72685e241b101b9db47f1634a39869 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Wed, 23 Mar 2016 22:10:27 +0100 Subject: [PATCH 02/16] Added adaptive merge test --- proj/vc7ide/Move.sln | 8 ++ proj/vc7ide/adaptive_merge_test.vcproj | 134 +++++++++++++++++++++++++ test/adaptive_merge_test.cpp | 87 ++++++++++++++++ 3 files changed, 229 insertions(+) create mode 100644 proj/vc7ide/adaptive_merge_test.vcproj create mode 100644 test/adaptive_merge_test.cpp diff --git a/proj/vc7ide/Move.sln b/proj/vc7ide/Move.sln index 905c497..6a1d7a4 100644 --- a/proj/vc7ide/Move.sln +++ b/proj/vc7ide/Move.sln @@ -123,6 +123,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "adaptive_sort_test", "adapt ProjectSection(ProjectDependencies) = postProject EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "adaptive_merge_test", "adaptive_merge_test.vcproj", "{CD617A28-6217-B79E-4CE2-6BA035379A6A}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject Global GlobalSection(SolutionConfiguration) = preSolution Debug = Debug @@ -255,6 +259,10 @@ Global {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Debug.Build.0 = Debug|Win32 {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Release.ActiveCfg = Release|Win32 {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Release.Build.0 = Release|Win32 + {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Debug.ActiveCfg = Debug|Win32 + {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Debug.Build.0 = Debug|Win32 + {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Release.ActiveCfg = Release|Win32 + {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Release.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionItems) = postSolution ..\..\..\..\boost\move\algo\adaptive_merge.hpp = ..\..\..\..\boost\move\algo\adaptive_merge.hpp diff --git a/proj/vc7ide/adaptive_merge_test.vcproj b/proj/vc7ide/adaptive_merge_test.vcproj new file mode 100644 index 0000000..516f1b1 --- /dev/null +++ b/proj/vc7ide/adaptive_merge_test.vcproj @@ -0,0 +1,134 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/adaptive_merge_test.cpp b/test/adaptive_merge_test.cpp new file mode 100644 index 0000000..5365e78 --- /dev/null +++ b/test/adaptive_merge_test.cpp @@ -0,0 +1,87 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2015-2016. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// + +#include //std::srand +#include //std::next_permutation +#include //std::cout + +#include + +#include +#include +#include + +using boost::timer::cpu_timer; +using boost::timer::cpu_times; +using boost::timer::nanosecond_type; + +#include "order_type.hpp" + +#include +#include + + +template +bool test_random_shuffled(std::size_t const element_count, std::size_t const num_keys, std::size_t const num_iter) +{ + boost::movelib::unique_ptr elements(new T[element_count]); + boost::movelib::unique_ptr key_reps(new std::size_t[num_keys ? num_keys : element_count]); + std::cout << "- - N: " << element_count << ", Keys: " << num_keys << ", It: " << num_iter << " \n"; + + //Initialize keys + for(std::size_t i=0; i < element_count; ++i){ + std::size_t key = num_keys ? (i % num_keys) : i; + elements[i].key=key; + } + + std::srand(0); + + for (std::size_t i = 0; i != num_iter; ++i) + { + std::random_shuffle(elements.get(), elements.get() + element_count); + for(std::size_t i = 0; i < (num_keys ? num_keys : element_count); ++i){ + key_reps[i]=0; + } + for(std::size_t i = 0; i < element_count; ++i){ + elements[i].val = key_reps[elements[i].key]++; + } + + boost::container::vector tmp(elements.get(), elements.get()+element_count); + std::size_t const split = std::size_t(std::rand()) % element_count; + std::stable_sort(tmp.data(), tmp.data()+split, order_type_less()); + std::stable_sort(tmp.data()+split, tmp.data()+element_count, order_type_less()); + + boost::movelib::adaptive_merge(tmp.data(), tmp.data()+split, tmp.data()+element_count, order_type_less()); + + if (!is_order_type_ordered(tmp.data(), element_count)) + { + std::cout << "\n ERROR\n"; + throw int(0); + } + } + return true; +} + +int main() +{ + #ifdef NDEBUG + const std::size_t NIter = 100; + #else + const std::size_t NIter = 10; + #endif + test_random_shuffled(10001, 65, NIter); + test_random_shuffled(10001, 101, NIter); + test_random_shuffled(10001, 1023, NIter); + test_random_shuffled(10001, 4095, NIter); + test_random_shuffled(10001, 0, NIter); + + return 0; +} From cae8d2dda36c4b1f0287fd05818e85bde03ef293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Wed, 23 Mar 2016 22:11:06 +0100 Subject: [PATCH 03/16] Refactored and documented the sort and merge algorithm --- .../move/algo/detail/adaptive_sort_merge.hpp | 478 ++++++++++++------ 1 file changed, 322 insertions(+), 156 deletions(-) diff --git a/include/boost/move/algo/detail/adaptive_sort_merge.hpp b/include/boost/move/algo/detail/adaptive_sort_merge.hpp index 46dba18..87828e8 100644 --- a/include/boost/move/algo/detail/adaptive_sort_merge.hpp +++ b/include/boost/move/algo/detail/adaptive_sort_merge.hpp @@ -37,8 +37,8 @@ // elements twice. // // The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts -// from the sorting algorithm and implementing a block merge algorithm -// without moving elements left or right, which is used when external memory +// from the sorting algorithm and implementing an additional block merge algorithm +// without moving elements to left or right, which is used when external memory // is available. ////////////////////////////////////////////////////////////////////////////// #ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP @@ -371,7 +371,7 @@ RandIt op_partial_merge_with_buf_impl //Now merge from buffer if(first2 != last2) while(1){ - if(comp(*first2, *buf_first1)) { + if(comp(*first2, *buf_first1)) { op(first2++, first1++); if(first2 == last2) break; @@ -450,7 +450,7 @@ void op_merge_blocks_with_buf skip_first_it = false; bool const last_it = key_first == key_end; //If the trailing block is empty, we'll make it equal to the previous if empty - bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); + bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); if(is_range1_A == is_range2_A){ if(buffer != buffer_end){ @@ -565,8 +565,8 @@ RandIt op_partial_merge_left_impl while(first1 != last1){ if(first2 == last2){ return first1; - } - if(comp(*first2, *first1)) { + } + if(comp(*first2, *first1)) { op(first2, buf_first); ++first2; } @@ -670,11 +670,11 @@ RandIt op_partial_merge_left_smart_impl BOOST_ASSERT(0 != (last1-first1)); if(first2 != last2) while(1){ - if(comp(*first2, *first1)) { + if(comp(*first2, *first1)) { op(first2++, dest++); if(first2 == last2){ return first1; - } + } } else{ op(first1++, dest++); @@ -719,7 +719,7 @@ void op_merge_blocks_left { if(n_bef_irreg2 == 0){ RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); - op_merge_left(first-l_block, first, last_reg, last_reg+l_irreg2, comp, op); + op_merge_left(first-l_block, first, last_reg, last_reg+l_irreg2, comp, op); } else { RandIt buffer = first - l_block; @@ -736,8 +736,8 @@ void op_merge_blocks_left skip_first_it = false; bool const last_it = key_first == key_end; //If the trailing block is empty, we'll make it equal to the previous if empty - bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); - bool const is_buffer_middle = last1 == buffer; + bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); + bool const is_buffer_middle = last1 == buffer; if(is_range1_A == is_range2_A){ //If range1 is buffered, write it to its final position @@ -824,7 +824,7 @@ RandIt op_partial_merge_right_impl { RandIt const first2 = last1; while(first2 != last2){ - if(last1 == first1){ + if(last1 == first1){ return last2; } --last2; @@ -881,7 +881,7 @@ void op_merge_blocks_right for(bool is_range2_A = false; key_first != key_end; last1 = first1, first1 -= l_block){ --key_end; bool const is_range1_A = key_comp(*key_end, midkey); - bool const is_buffer_middle = first2 == buffer_end; + bool const is_buffer_middle = first2 == buffer_end; if(is_range1_A == is_range2_A){ if(!is_buffer_middle){ @@ -937,17 +937,17 @@ RandIt partial_merge_bufferless_impl return first1; } bool const is_range1_A = *pis_range1_A; - if(first1 != last1 && comp(*last1, last1[-1])){ + if(first1 != last1 && comp(*last1, last1[-1])){ do{ RandIt const old_last1 = last1; - last1 = lower_bound(last1, last2, *first1, comp); + last1 = lower_bound(last1, last2, *first1, comp); first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported if(last1 == last2){ return first1; } do{ ++first1; - } while(last1 != first1 && !comp(*last1, *first1) ); + } while(last1 != first1 && !comp(*last1, *first1) ); } while(first1 != last1); } *pis_range1_A = !is_range1_A; @@ -993,7 +993,7 @@ void merge_blocks_bufferless bool is_range1_A = l_irreg1 ? true : key_comp(*key_first++, midkey); for( ; key_first != key_end; ++key_first){ - bool is_range2_A = key_comp(*key_first, midkey); + bool is_range2_A = key_comp(*key_first, midkey); if(is_range1_A == is_range2_A){ first1 = last1; } @@ -1077,9 +1077,9 @@ typename iterator_traits::size_type if(xbuf.capacity() >= max_collected){ value_type *const ph0 = xbuf.add(first); while(u != last && h < max_collected){ - value_type * const r = lower_bound(ph0, xbuf.end(), *u, comp); + value_type * const r = lower_bound(ph0, xbuf.end(), *u, comp); //If key not found add it to [h, h+h0) - if(r == xbuf.end() || comp(*u, *r) ){ + if(r == xbuf.end() || comp(*u, *r) ){ RandIt const new_h0 = boost::move(search_end, u, h0); search_end = u; ++search_end; @@ -1094,9 +1094,9 @@ typename iterator_traits::size_type } else{ while(u != last && h < max_collected){ - RandIt const r = lower_bound(h0, search_end, *u, comp); + RandIt const r = lower_bound(h0, search_end, *u, comp); //If key not found add it to [h, h+h0) - if(r == search_end || comp(*u, *r) ){ + if(r == search_end || comp(*u, *r) ){ RandIt const new_h0 = rotate_gcd(h0, search_end, u); search_end = u; ++search_end; @@ -1222,11 +1222,11 @@ void slow_stable_sort if(do_merge){ size_type const h_2 = 2*h; while((L-p0) > h_2){ - merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp); + merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp); p0 += h_2; } } - if((L-p0) > h){ + if((L-p0) > h){ merge_bufferless(first+p0, first+p0+h, last, comp); } } @@ -1401,7 +1401,7 @@ void combine_params size_type const irreg_off = is_merge_left ? 0u: l_irreg2-1; RandIt prev_block_first = first + l_combined - l_irreg2; const value_type &incomplete_block_first = prev_block_first[irreg_off]; - while(n_aft_irreg2 != n_reg_block && + while(n_aft_irreg2 != n_reg_block && comp(incomplete_block_first, (prev_block_first-= l_block)[reg_off]) ){ ++n_aft_irreg2; } @@ -1709,12 +1709,12 @@ void op_merge_right_step if(restk <= l_build_buf){ op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf); } - else{ + else{ op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op); } while(p>0){ p -= 2*l_build_buf; - op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op); + op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op); } } @@ -1954,13 +1954,13 @@ void stable_merge template -void final_merge( bool buffer_right - , RandIt const first - , typename iterator_traits::size_type const l_intbuf - , typename iterator_traits::size_type const n_keys - , typename iterator_traits::size_type const len - , adaptive_xbuf::value_type> & xbuf - , Compare comp) +void adaptive_sort_final_merge( bool buffer_right + , RandIt const first + , typename iterator_traits::size_type const l_intbuf + , typename iterator_traits::size_type const n_keys + , typename iterator_traits::size_type const len + , adaptive_xbuf::value_type> & xbuf + , Compare comp) { BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); xbuf.clear(); @@ -2009,7 +2009,7 @@ bool build_params //segments of size l_build_buf*2, maximizing the classic merge phase. l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); - //This is the minimum number of case to implement the ideal algorithm + //This is the minimum number of keys to implement the ideal algorithm // //l_intbuf is used as buffer plus the key count size_type n_min_ideal_keys = l_intbuf-1u; @@ -2030,10 +2030,10 @@ bool build_params // //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, //(to be used for keys in combine_all_blocks) as the whole l_build_buf - //we'll be backuped in the buffer during build_blocks. + //will be backuped in the buffer during build_blocks. bool const non_unique_buf = xbuf.capacity() >= 2*l_intbuf; size_type const to_collect = non_unique_buf ? l_intbuf : l_intbuf*2; - size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); + size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); //If available memory is 2*sqrt(l), then for "build_params" //the situation is the same as if 2*l_intbuf were collected. @@ -2044,7 +2044,7 @@ bool build_params //is possible (due to very low unique keys), then go to a slow sort based on rotations. if(collected < (n_min_ideal_keys+l_intbuf)){ if(collected < 4){ //No combination possible with less that 4 keys - return false; + return false; } n_keys = l_intbuf; while(n_keys&(n_keys-1)){ @@ -2053,6 +2053,7 @@ bool build_params while(n_keys > collected){ n_keys/=2; } + //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two l_base = min_value(n_keys, AdaptiveSortInsertionSortThreshold); l_intbuf = 0; l_build_buf = n_keys; @@ -2072,6 +2073,218 @@ bool build_params return true; } + +#define BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF + +template +inline void adaptive_merge_combine_blocks( RandIt first + , typename iterator_traits::size_type len1 + , typename iterator_traits::size_type len2 + , typename iterator_traits::size_type collected + , typename iterator_traits::size_type n_keys + , typename iterator_traits::size_type l_block + , bool use_internal_buf + , bool xbuf_used + , Compare comp + , adaptive_xbuf::value_type> & xbuf + ) +{ + typedef typename iterator_traits::size_type size_type; + size_type const len = len1+len2; + size_type const l_combine = len-collected; + size_type const l_combine1 = len1-collected; + size_type n_bef_irreg2, n_aft_irreg2, l_irreg1, l_irreg2, midkey_idx; + if(n_keys){ + RandIt const first_data = first+collected; + RandIt const keys = first; + combine_params( keys, comp, first_data, l_combine + , l_combine1, l_block, xbuf, comp + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); + if(xbuf_used){ + merge_blocks_with_buf + (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); + } + else if(use_internal_buf){ + + #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF + range_xbuf rbuf(first_data-l_block, first_data); + merge_blocks_with_buf + (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, rbuf, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len); + #else + merge_blocks_left + (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); + #endif + } + else{ + merge_blocks_bufferless + (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg bfl: ", len); + } + } + else{ + xbuf.clear(); + size_type *const uint_keys = xbuf.template aligned_trailing(l_block); + combine_params( uint_keys, less(), first, l_combine + , l_combine1, l_block, xbuf, comp + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); + merge_blocks_with_buf + (uint_keys, uint_keys[midkey_idx], less(), first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, true); + xbuf.clear(); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); + } + +} + +template +inline void adaptive_merge_final_merge( RandIt first + , typename iterator_traits::size_type len1 + , typename iterator_traits::size_type len2 + , typename iterator_traits::size_type collected + , typename iterator_traits::size_type l_intbuf + , typename iterator_traits::size_type l_block + , bool use_internal_buf + , bool xbuf_used + , Compare comp + , adaptive_xbuf::value_type> & xbuf + ) +{ + typedef typename iterator_traits::size_type size_type; + (void)l_block; + size_type n_keys = collected-l_intbuf; + size_type len = len1+len2; + if(use_internal_buf){ + if(xbuf_used){ + xbuf.clear(); + //Nothing to do + if(n_keys){ + stable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + } + else{ + #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF + xbuf.clear(); + stable_sort(first, first+collected, comp, xbuf); + stable_merge(first, first+collected, first+len, comp, xbuf); + #else + xbuf.clear(); + stable_sort(first+len-l_block, first+len, comp, xbuf); + RandIt const pos1 = lower_bound(first+n_keys, first+len-l_block, first[len-1], comp); + RandIt const pos2 = rotate_gcd(pos1, first+len-l_block, first+len); + stable_merge(first+n_keys, pos1, pos2, antistable(comp), xbuf); + if(n_keys){ + stable_sort(first, first+n_keys, comp, xbuf); + stable_merge(first, first+n_keys, first+len, comp, xbuf); + } + #endif + } + + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len); + } + else{ + stable_sort(first, first+collected, comp, xbuf); + xbuf.clear(); + if(xbuf.capacity() >= collected){ + buffered_merge(first, first+collected, first+len1+len2, comp, xbuf); + } + else{ + merge_bufferless(first, first+collected, first+len1+len2, comp); + } + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); +} + +template +inline SizeType adaptive_merge_n_keys_intbuf(SizeType l_block, SizeType len, Xbuf & xbuf, SizeType &l_intbuf_inout) +{ + typedef SizeType size_type; + size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; + + //This is the minimum number of keys to implement the ideal algorithm + //ceil(len/l_block) - 1 (as the first block is used as buffer) + size_type n_keys = l_block; + while(n_keys >= (len-l_intbuf-n_keys)/l_block){ + --n_keys; + } + ++n_keys; + BOOST_ASSERT(n_keys < l_block); + + if(xbuf.template supports_aligned_trailing(l_block, n_keys)){ + n_keys = 0u; + } + l_intbuf_inout = l_intbuf; + return n_keys; +} + +/////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////// + +// Main explanation of the sort algorithm. +// +// csqrtlen = ceil(sqrt(len)); +// +// * First, 2*csqrtlen unique elements elements are extracted from elements to be +// sorted and placed in the beginning of the range. +// +// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements +// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are +// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step +// 2*csqrtlen unique elements are again the leading elements of the whole range. +// +// * Step "combine_blocks": pairs of previously formed blocks are merged with a different +// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the +// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen +// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged. +// +// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to +// know if elements belong to the first or second block to be merged and another +// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step: +// +// Iteratively until all trailing (len-2*csqrtlen) elements are merged: +// Iteratively for each pair of previously merged block: +// * Blocks are divided groups of csqrtlen elements and +// 2*merged_block/csqrtlen keys are sorted to be used as markers +// * Groups are selection-sorted by first or last element (depending wheter they +// merged to left or right) and keys are reordered accordingly as an imitation-buffer. +// * Elements of each block pair is merged using the csqrtlen buffer taking into account +// if they belong to the first half or second half (marked by the key). +// +// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with +// rotations with the rest of sorted elements in the "combine_blocks" step. +// +// Corner cases: +// +// * If no 2*csqrtlen elements can be extracted: +// +// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used +// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This +// means that an additional "combine_blocks" step will be needed to merge all elements. +// +// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum, +// then reduces the number of elements used as buffer and keys in the "build_blocks" +// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction +// then uses a rotation based smart merge. +// +// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed. +// +// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used. +// +// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), +// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral +// keys to combine blocks. +// +// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks +// using classic merge. template void adaptive_sort_impl ( RandIt first @@ -2093,7 +2306,7 @@ void adaptive_sort_impl return; } - //Make sure it is at least two + //Make sure it is at least four BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4); size_type l_base = 0; @@ -2101,12 +2314,14 @@ void adaptive_sort_impl size_type n_keys = 0; size_type l_build_buf = 0; + //Calculate and extract needed unique elements. If a minimum is not achieved + //fallback to rotation-based merge if(!build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ stable_sort(first, first+len, comp, xbuf); return; } - //Otherwise, continue in adaptive_sort + //Otherwise, continue the adaptive_sort BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n After collect_unique: ", len); size_type const n_key_plus_buf = l_intbuf+n_keys; //l_build_buf is always power of two if l_intbuf is zero @@ -2122,9 +2337,51 @@ void adaptive_sort_impl (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); //Sort keys and buffer and merge the whole sequence - final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); + adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); } +// Main explanation of the merge algorithm. +// +// csqrtlen = ceil(sqrt(len)); +// +// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect +// unique elements are extracted from elements to be sorted and placed in the beginning of the range. +// +// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements +// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. +// +// Explanation of the "combine_blocks" step: +// +// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. +// Remaining elements that can't form a group are grouped in the front of those elements. +// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. +// Remaining elements that can't form a group are grouped in the back of those elements. +// * Groups are selection-sorted by first or last element (depending wheter they +// merged to left or right) and keys are reordered accordingly as an imitation-buffer. +// * Elements of each block pair is merged using the csqrtlen buffer taking into account +// if they belong to the first half or second half (marked by the key). +// +// * In the final merge step leading "to_collect" elements are merged with rotations +// with the rest of merged elements in the "combine_blocks" step. +// +// Corner cases: +// +// * If no "to_collect" elements can be extracted: +// +// * If more than a minimum number of elements is extracted +// then reduces the number of elements used as buffer and keys in the +// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction +// then uses a rotation based smart merge. +// +// * If the minimum number of keys can't be extracted, a rotation-based merge is performed. +// +// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. +// +// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. +// +// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), +// then no csqrtlen need to be extracted and "combine_blocks" will use integral +// keys to combine blocks. template void adaptive_merge_impl ( RandIt first @@ -2144,134 +2401,43 @@ void adaptive_merge_impl //Calculate ideal parameters and try to collect needed unique keys size_type l_block = size_type(ceil_sqrt(len)); + //One range is not big enough to extract keys and the internal buffer so a + //rotation-based based merge will do just fine if(len1 <= l_block*2 || len2 <= l_block*2){ merge_bufferless(first, first+len1, first+len1+len2, comp); return; } - size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; - - //This is the minimum number of case to implement the ideal algorithm - //ceil(len/l_block) - 1 (as the first block is used as buffer) - size_type n_keys = l_block; - while(n_keys >= (len-l_intbuf-n_keys)/l_block){ - --n_keys; - } - ++n_keys; - BOOST_ASSERT(n_keys < l_block); - - if(xbuf.template supports_aligned_trailing(l_block, n_keys)){ - n_keys = 0u; - } - + //Detail the number of keys and internal buffer. If xbuf has enough memory, no + //internal buffer is needed so l_intbuf will remain 0. + size_type l_intbuf = 0; + size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len, xbuf, l_intbuf); size_type const to_collect = l_intbuf+n_keys; - size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); - + //Try to extract needed unique values from the first range + size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n A collect: ", len); + + //Not the minimum number of keys is not available on the first range, so fallback to rotations if(collected != to_collect && collected < 4){ merge_bufferless(first, first+len1, first+len1+len2, comp); + return; } - else{ - bool use_internal_buf = true; - if (collected != to_collect){ - l_intbuf = 0u; - n_keys = collected; - use_internal_buf = false; - l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); - l_intbuf = use_internal_buf ? l_block : 0u; - } - bool xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; - size_type const l_combine = len-collected; - size_type const l_combine1 = len1-collected; - - size_type n_bef_irreg2, n_aft_irreg2, l_irreg1, l_irreg2, midkey_idx; - if(n_keys){ - RandIt const first_data = first+collected; - RandIt const keys = first; - combine_params( keys, comp, first_data, l_combine - , l_combine1, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); - if(xbuf_used){ - merge_blocks_with_buf - (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, xbuf_used); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); - } - else if(use_internal_buf){ - #define BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF - #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF - range_xbuf rbuf(first_data-l_block, first_data); - merge_blocks_with_buf - (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, rbuf, xbuf_used); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len); - #else - merge_blocks_left - (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); - #endif - } - else{ - merge_blocks_bufferless - (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg bfl: ", len); - } - } - else{ - xbuf.clear(); - size_type *const uint_keys = xbuf.template aligned_trailing(l_block); - combine_params( uint_keys, less(), first, l_combine - , l_combine1, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); - merge_blocks_with_buf - (uint_keys, uint_keys[midkey_idx], less(), first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, true); - xbuf.clear(); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); - } - - n_keys = collected-l_intbuf; - if(use_internal_buf){ - if(xbuf_used){ - xbuf.clear(); - //Nothing to do - if(n_keys){ - stable_sort(first, first+n_keys, comp, xbuf); - stable_merge(first, first+n_keys, first+len, comp, xbuf); - } - } - else{ - #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF - xbuf.clear(); - stable_sort(first, first+collected, comp, xbuf); - stable_merge(first, first+collected, first+len, comp, xbuf); - #else - xbuf.clear(); - stable_sort(first+len-l_block, first+len, comp, xbuf); - RandIt const pos1 = lower_bound(first+n_keys, first+len-l_block, first[len-1], comp); - RandIt const pos2 = rotate_gcd(pos1, first+len-l_block, first+len); - stable_merge(first+n_keys, pos1, pos2, antistable(comp), xbuf); - if(n_keys){ - stable_sort(first, first+n_keys, comp, xbuf); - stable_merge(first, first+n_keys, first+len, comp, xbuf); - } - #endif - } - - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len); - } - else{ - stable_sort(first, first+collected, comp, xbuf); - xbuf.clear(); - if(xbuf.capacity() >= collected){ - buffered_merge(first, first+collected, first+len1+len2, comp, xbuf); - } - else{ - merge_bufferless(first, first+collected, first+len1+len2, comp); - } - } - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); + //If not enough keys but more than minimum, adjust the internal buffer and key count + bool use_internal_buf = collected == to_collect; + if (!use_internal_buf){ + l_intbuf = 0u; + n_keys = collected; + l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); + //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used + l_intbuf = use_internal_buf ? l_block : 0u; } + + bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; + //Merge trailing elements using smart merges + adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); + //Merge buffer and keys with the rest of the values + adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); } } From 122916d8203e2261a4c4454b0efdc2dd36795400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Wed, 23 Mar 2016 22:11:25 +0100 Subject: [PATCH 04/16] Removed tabs. --- include/boost/move/adl_move_swap.hpp | 4 ++-- include/boost/move/algo/adaptive_merge.hpp | 2 +- include/boost/move/algo/detail/merge.hpp | 12 ++++++------ include/boost/move/algo/move.hpp | 4 ++-- include/boost/move/detail/type_traits.hpp | 4 ++-- test/adaptive_sort_test.cpp | 22 +++++++--------------- test/move_if_noexcept.cpp | 18 +++++++++--------- test/unique_ptr_functions.cpp | 10 +++++----- 8 files changed, 34 insertions(+), 42 deletions(-) diff --git a/include/boost/move/adl_move_swap.hpp b/include/boost/move/adl_move_swap.hpp index 9303201..d6906a4 100644 --- a/include/boost/move/adl_move_swap.hpp +++ b/include/boost/move/adl_move_swap.hpp @@ -231,8 +231,8 @@ BOOST_MOVE_FORCEINLINE void adl_move_swap(T& x, T& y) //! using boost::adl_move_swap. //! //! Parameters: -//! first1, last1 - the first range of elements to swap -//! first2 - beginning of the second range of elements to swap +//! first1, last1 - the first range of elements to swap +//! first2 - beginning of the second range of elements to swap //! //! Type requirements: //! - ForwardIt1, ForwardIt2 must meet the requirements of ForwardIterator. diff --git a/include/boost/move/algo/adaptive_merge.hpp b/include/boost/move/algo/adaptive_merge.hpp index ef20651..0233b23 100644 --- a/include/boost/move/algo/adaptive_merge.hpp +++ b/include/boost/move/algo/adaptive_merge.hpp @@ -56,7 +56,7 @@ void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp typedef typename iterator_traits::value_type value_type; ::boost::movelib::detail_adaptive::adaptive_xbuf xbuf(uninitialized, uninitialized_len); - ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf); + ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf); } } //namespace movelib { diff --git a/include/boost/move/algo/detail/merge.hpp b/include/boost/move/algo/detail/merge.hpp index a0a5afa..59a04df 100644 --- a/include/boost/move/algo/detail/merge.hpp +++ b/include/boost/move/algo/detail/merge.hpp @@ -167,7 +167,7 @@ void op_merge_left( RandIt buf_first op(forward_t(), first2, last2, buf_first); return; } - else if(comp(*first2, *first1)){ + else if(comp(*first2, *first1)){ op(first2, buf_first); ++first2; } @@ -214,7 +214,7 @@ void op_merge_right { RandIt const first2 = last1; while(first1 != last1){ - if(last2 == first2){ + if(last2 == first2){ op(backward_t(), first1, last1, buf_last); return; } @@ -230,7 +230,7 @@ void op_merge_right ++last1; } } - if(last2 != buf_last){ //In case all remaining elements are in the same place + if(last2 != buf_last){ //In case all remaining elements are in the same place //(e.g. buffer is exactly the size of the first half //and all elements from the second half are less) op(backward_t(), first2, last2, buf_last); @@ -257,7 +257,7 @@ void swap_merge_right op_merge_right(first1, last1, last2, buf_last, comp, swap_op()); } -// cost: min(L1,L2)^2+max(L1,L2) +//Complexity: min(len1,len2)^2 + max(len1,len2) template void merge_bufferless(RandIt first, RandIt middle, RandIt last, Compare comp) { @@ -271,12 +271,12 @@ void merge_bufferless(RandIt first, RandIt middle, RandIt last, Compare comp) } do{ ++first; - } while(first != middle && !comp(*middle, *first)); + } while(first != middle && !comp(*middle, *first)); } } else{ while(middle != last){ - RandIt p = upper_bound(first, middle, last[-1], comp); + RandIt p = upper_bound(first, middle, last[-1], comp); last = rotate_gcd(p, middle, last); middle = p; if(middle == first){ diff --git a/include/boost/move/algo/move.hpp b/include/boost/move/algo/move.hpp index 943f286..d35f04a 100644 --- a/include/boost/move/algo/move.hpp +++ b/include/boost/move/algo/move.hpp @@ -125,10 +125,10 @@ F uninitialized_move(I f, I l, F r } } BOOST_CATCH(...){ - for (; back != r; ++back){ + for (; back != r; ++back){ back->~input_value_type(); } - BOOST_RETHROW; + BOOST_RETHROW; } BOOST_CATCH_END return r; diff --git a/include/boost/move/detail/type_traits.hpp b/include/boost/move/detail/type_traits.hpp index 816fdca..e9c804d 100644 --- a/include/boost/move/detail/type_traits.hpp +++ b/include/boost/move/detail/type_traits.hpp @@ -1005,7 +1005,7 @@ BOOST_MOVE_ALIGNED_STORAGE_WITH_BOOST_ALIGNMENT(0x1000) template union aligned_union -{ +{ T aligner; char dummy[Len]; }; @@ -1023,7 +1023,7 @@ struct aligned_next //End of search defaults to max_align_t template struct aligned_next -{ typedef aligned_union type; }; +{ typedef aligned_union type; }; //Now define a search list through types #define BOOST_MOVE_ALIGNED_NEXT_STEP(TYPE, NEXT_TYPE)\ diff --git a/test/adaptive_sort_test.cpp b/test/adaptive_sort_test.cpp index 4f44eeb..4c24a32 100644 --- a/test/adaptive_sort_test.cpp +++ b/test/adaptive_sort_test.cpp @@ -28,16 +28,8 @@ using boost::timer::nanosecond_type; #include #include - -template -void adaptive_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen) -{ - boost::movelib::unique_ptr mem(new char[sizeof(T)*BufLen]); - boost::movelib::adaptive_sort(elements, elements + element_count, comp, reinterpret_cast(mem.get()), BufLen); -} - template -bool test_all_permutations(std::size_t const element_count, std::size_t const num_keys, std::size_t const num_iter) +bool test_random_shuffled(std::size_t const element_count, std::size_t const num_keys, std::size_t const num_iter) { boost::movelib::unique_ptr elements(new T[element_count]); boost::movelib::unique_ptr key_reps(new std::size_t[num_keys ? num_keys : element_count]); @@ -49,7 +41,7 @@ bool test_all_permutations(std::size_t const element_count, std::size_t const nu elements[i].key=key; } - std::srand(255); + std::srand(0); for (std::size_t i = 0; i != num_iter; ++i) { @@ -81,11 +73,11 @@ int main() #else const std::size_t NIter = 10; #endif - test_all_permutations(10001, 65, NIter); - test_all_permutations(10001, 101, NIter); - test_all_permutations(10001, 1023, NIter); - test_all_permutations(10001, 4095, NIter); - test_all_permutations(10001, 0, NIter); + test_random_shuffled(10001, 65, NIter); + test_random_shuffled(10001, 101, NIter); + test_random_shuffled(10001, 1023, NIter); + test_random_shuffled(10001, 4095, NIter); + test_random_shuffled(10001, 0, NIter); return 0; } diff --git a/test/move_if_noexcept.cpp b/test/move_if_noexcept.cpp index 2c5afd3..a03a821 100644 --- a/test/move_if_noexcept.cpp +++ b/test/move_if_noexcept.cpp @@ -139,7 +139,7 @@ int main() movable m4(function(boost::move_if_noexcept(m3))); BOOST_CHECK(m3.moved()); BOOST_CHECK(!m4.moved()); - } + } { movable m; movable m2(boost::move_if_noexcept(m)); @@ -149,7 +149,7 @@ int main() movable m4(functionr(boost::move_if_noexcept(m3))); BOOST_CHECK(m3.moved()); BOOST_CHECK(!m4.moved()); - } + } { movable m; movable m2(boost::move_if_noexcept(m)); @@ -159,7 +159,7 @@ int main() movable m4(function2(boost::move_if_noexcept(m3))); BOOST_CHECK(m3.moved()); BOOST_CHECK(!m4.moved()); - } + } { movable m; movable m2(boost::move_if_noexcept(m)); @@ -169,7 +169,7 @@ int main() movable m4(function2r(boost::move_if_noexcept(m3))); BOOST_CHECK(m3.moved()); BOOST_CHECK(!m4.moved()); - } + } { movable m; movable m2(boost::move_if_noexcept(m)); @@ -177,7 +177,7 @@ int main() BOOST_CHECK(!m2.moved()); movable m3(move_return_function()); BOOST_CHECK(!m3.moved()); - } + } { movable m; movable m2(boost::move_if_noexcept(m)); @@ -185,7 +185,7 @@ int main() BOOST_CHECK(!m2.moved()); movable m3(move_return_function2()); BOOST_CHECK(!m3.moved()); - } + } // copy_movable may throw during move, so it must be copied { @@ -197,7 +197,7 @@ int main() copy_movable m4(function(boost::move_if_noexcept(m3))); BOOST_CHECK(!m3.moved()); BOOST_CHECK(!m4.moved()); - } + } // copy_movable_noexcept can not throw during move @@ -210,7 +210,7 @@ int main() copy_movable_noexcept m4(function(boost::move_if_noexcept(m3))); BOOST_CHECK(m3.moved()); BOOST_CHECK(!m4.moved()); - } + } // movable_throwable can not throw during move but it has no copy constructor { @@ -222,7 +222,7 @@ int main() movable_throwable m4(function(boost::move_if_noexcept(m3))); BOOST_CHECK(m3.moved()); BOOST_CHECK(!m4.moved()); - } + } return boost::report_errors(); } diff --git a/test/unique_ptr_functions.cpp b/test/unique_ptr_functions.cpp index 26f6b5b..d3b4d6d 100644 --- a/test/unique_ptr_functions.cpp +++ b/test/unique_ptr_functions.cpp @@ -63,11 +63,11 @@ int volatile_memcmp(const volatile void *p1, const volatile void *p2, std::size_ unsigned char u1, u2; for ( ; len-- ; s1++, s2++) { - u1 = *s1; - u2 = *s2; - if (u1 != u2) { - return (u1-u2); - } + u1 = *s1; + u2 = *s2; + if (u1 != u2) { + return (u1-u2); + } } return 0; } From 38d556a5bd657d7d20b26dba8562d64aa197a54b Mon Sep 17 00:00:00 2001 From: Flamefire Date: Thu, 12 May 2016 13:30:08 +0200 Subject: [PATCH 05/16] Fix assignment of move-and-copy emulated classes --- include/boost/move/core.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/move/core.hpp b/include/boost/move/core.hpp index c9cbec2..96e15c0 100644 --- a/include/boost/move/core.hpp +++ b/include/boost/move/core.hpp @@ -261,7 +261,7 @@ #define BOOST_COPYABLE_AND_MOVABLE(TYPE)\ public:\ TYPE& operator=(TYPE &t)\ - { this->operator=(const_cast(t)); return *this;}\ + { this->operator=(static_cast&>(t)); }\ public:\ BOOST_MOVE_FORCEINLINE operator ::boost::rv&() \ { return *BOOST_MOVE_TO_RV_CAST(::boost::rv*, this); }\ From 94627830e69f13b1305d4014136b8338bdc94a1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Fri, 13 May 2016 00:19:37 +0200 Subject: [PATCH 06/16] Add reverse iterator to be used in sort algorithms --- .../boost/move/detail/reverse_iterator.hpp | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 include/boost/move/detail/reverse_iterator.hpp diff --git a/include/boost/move/detail/reverse_iterator.hpp b/include/boost/move/detail/reverse_iterator.hpp new file mode 100644 index 0000000..73f59ce --- /dev/null +++ b/include/boost/move/detail/reverse_iterator.hpp @@ -0,0 +1,171 @@ +///////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2014-2014 +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +///////////////////////////////////////////////////////////////////////////// + +#ifndef BOOST_MOVE_DETAIL_REVERSE_ITERATOR_HPP +#define BOOST_MOVE_DETAIL_REVERSE_ITERATOR_HPP + +#ifndef BOOST_CONFIG_HPP +# include +#endif + +#if defined(BOOST_HAS_PRAGMA_ONCE) +# pragma once +#endif + +#include +#include +#include + +namespace boost { +namespace movelib { + +template +class reverse_iterator +{ + public: + typedef typename boost::movelib::iterator_traits::pointer pointer; + typedef typename boost::movelib::iterator_traits::reference reference; + typedef typename boost::movelib::iterator_traits::difference_type difference_type; + typedef typename boost::movelib::iterator_traits::iterator_category iterator_category; + typedef typename boost::movelib::iterator_traits::value_type value_type; + + + typedef It iterator_type; + + reverse_iterator() + : m_current() //Value initialization to achieve "null iterators" (N3644) + {} + + explicit reverse_iterator(It r) + : m_current(r) + {} + + reverse_iterator(const reverse_iterator& r) + : m_current(r.base()) + {} + + template + reverse_iterator( const reverse_iterator& r + , typename boost::move_detail::enable_if_convertible::type* =0 + ) + : m_current(r.base()) + {} + + reverse_iterator & operator=( const reverse_iterator& r) + { m_current = r.base(); return *this; } + + template + typename boost::move_detail::enable_if_convertible::type + operator=( const reverse_iterator& r) + { m_current = r.base(); return *this; } + + It base() const + { return m_current; } + + reference operator*() const + { + It temp(m_current); + --temp; + reference r = *temp; + return r; + } + + pointer operator->() const + { + It temp(m_current); + --temp; + return iterator_arrow_result(temp); + } + + reference operator[](difference_type off) const + { + return this->m_current[-off - 1]; + } + + reverse_iterator& operator++() + { + --m_current; + return *this; + } + + reverse_iterator operator++(int) + { + reverse_iterator temp((*this)); + --m_current; + return temp; + } + + reverse_iterator& operator--() + { + ++m_current; + return *this; + } + + reverse_iterator operator--(int) + { + reverse_iterator temp((*this)); + ++m_current; + return temp; + } + + friend bool operator==(const reverse_iterator& l, const reverse_iterator& r) + { return l.m_current == r.m_current; } + + friend bool operator!=(const reverse_iterator& l, const reverse_iterator& r) + { return l.m_current != r.m_current; } + + friend bool operator<(const reverse_iterator& l, const reverse_iterator& r) + { return l.m_current > r.m_current; } + + friend bool operator<=(const reverse_iterator& l, const reverse_iterator& r) + { return l.m_current >= r.m_current; } + + friend bool operator>(const reverse_iterator& l, const reverse_iterator& r) + { return l.m_current < r.m_current; } + + friend bool operator>=(const reverse_iterator& l, const reverse_iterator& r) + { return l.m_current <= r.m_current; } + + reverse_iterator& operator+=(difference_type off) + { m_current -= off; return *this; } + + reverse_iterator& operator-=(difference_type off) + { m_current += off; return *this; } + + friend reverse_iterator operator+(reverse_iterator l, difference_type off) + { return (l += off); } + + friend reverse_iterator operator+(difference_type off, reverse_iterator r) + { return (r += off); } + + friend reverse_iterator operator-(reverse_iterator l, difference_type off) + { return (l-= off); } + + friend difference_type operator-(const reverse_iterator& l, const reverse_iterator& r) + { return r.m_current - l.m_current; } + + private: + It m_current; // the wrapped iterator +}; + +template< class Iterator > +reverse_iterator make_reverse_iterator( Iterator i ) +{ + return reverse_iterator(i); +} + +} //namespace movelib { +} //namespace boost { + +#include + +#endif //BOOST_MOVE_DETAIL_REVERSE_ITERATOR_HPP From 6e07bb846c3697b1543913ed8d8feab38585e0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Fri, 13 May 2016 00:22:20 +0200 Subject: [PATCH 07/16] Add three-way operations to move_op & swap_op --- include/boost/move/algo/detail/basic_op.hpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/boost/move/algo/detail/basic_op.hpp b/include/boost/move/algo/detail/basic_op.hpp index 936f7a2..ea0ce1b 100644 --- a/include/boost/move/algo/detail/basic_op.hpp +++ b/include/boost/move/algo/detail/basic_op.hpp @@ -21,12 +21,14 @@ #include #include +#include namespace boost { namespace movelib { struct forward_t{}; struct backward_t{}; +struct three_way_t{}; struct move_op { @@ -41,6 +43,13 @@ struct move_op template DestinationIt operator()(backward_t, SourceIt first, SourceIt last, DestinationIt dest_last) { return ::boost::move_backward(first, last, dest_last); } + + template + void operator()(three_way_t, SourceIt srcit, DestinationIt1 dest1it, DestinationIt2 dest2it) + { + *dest2it = boost::move(*dest1it); + *dest1it = boost::move(*srcit); + } }; struct swap_op @@ -56,6 +65,15 @@ struct swap_op template DestinationIt operator()(backward_t, SourceIt first, SourceIt last, DestinationIt dest_begin) { return boost::adl_move_swap_ranges_backward(first, last, dest_begin); } + + template + void operator()(three_way_t, SourceIt srcit, DestinationIt1 dest1it, DestinationIt2 dest2it) + { + typename ::boost::movelib::iterator_traits::value_type tmp(boost::move(*dest2it)); + *dest2it = boost::move(*dest1it); + *dest1it = boost::move(*srcit); + *srcit = boost::move(tmp); + } }; }} //namespace boost::movelib From 75983a43cef9bca4cdd5490b10075de468c0ce63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Fri, 13 May 2016 00:26:54 +0200 Subject: [PATCH 08/16] Added "negate" functor and merge_bufferless_ONlogN implementation --- include/boost/move/algo/detail/merge.hpp | 153 ++++++++++++++++++++++- 1 file changed, 151 insertions(+), 2 deletions(-) diff --git a/include/boost/move/algo/detail/merge.hpp b/include/boost/move/algo/detail/merge.hpp index 59a04df..11d5740 100644 --- a/include/boost/move/algo/detail/merge.hpp +++ b/include/boost/move/algo/detail/merge.hpp @@ -257,9 +257,104 @@ void swap_merge_right op_merge_right(first1, last1, last2, buf_last, comp, swap_op()); } +template +void merge_bufferless_ONlogN_recursive + (BidirIt first, BidirIt middle, BidirIt last, Distance len1, Distance len2, Compare comp) +{ + typedef typename iterator_traits::size_type size_type; + while(1) { + //#define MERGE_BUFFERLESS_RECURSIVE_OPT + #ifndef MERGE_BUFFERLESS_RECURSIVE_OPT + if (len2 == 0) { + return; + } + + if (!len1) { + return; + } + + if ((len1 | len2) == 1) { + if (comp(*middle, *first)) + adl_move_swap(*first, *middle); + return; + } + #else + if (len2 == 0) { + return; + } + + if (!len1) { + return; + } + BidirIt middle_prev = middle; --middle_prev; + if(!comp(*middle, *middle_prev)) + return; + + while(true) { + if (comp(*middle, *first)) + break; + ++first; + if(--len1 == 1) + break; + } + + if (len1 == 1 && len2 == 1) { + //comp(*middle, *first) == true already tested in the loop + adl_move_swap(*first, *middle); + return; + } + #endif + + BidirIt first_cut = first; + BidirIt second_cut = middle; + Distance len11 = 0; + Distance len22 = 0; + if (len1 > len2) { + len11 = len1 / 2; + first_cut += len11; + second_cut = lower_bound(middle, last, *first_cut, comp); + len22 = size_type(second_cut - middle); + } + else { + len22 = len2 / 2; + second_cut += len22; + first_cut = upper_bound(first, middle, *second_cut, comp); + len11 = size_type(first_cut - first); + } + BidirIt new_middle = rotate_gcd(first_cut, middle, second_cut); + + //Avoid one recursive call doing a manual tail call elimination on the biggest range + const Distance len_internal = len11+len22; + if( len_internal < (len1 + len2 - len_internal) ) { + merge_bufferless_ONlogN_recursive(first, first_cut, new_middle, len11, len22, comp); + //merge_bufferless_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp); + first = new_middle; + middle = second_cut; + len1 -= len11; + len2 -= len22; + } + else { + //merge_bufferless_recursive(first, first_cut, new_middle, len11, len22, comp); + merge_bufferless_ONlogN_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp); + middle = first_cut; + last = new_middle; + len1 = len11; + len2 = len22; + } + } +} + +//Complexity: NlogN +template +void merge_bufferless_ONlogN(BidirIt first, BidirIt middle, BidirIt last, Compare comp) +{ + merge_bufferless_ONlogN_recursive + (first, middle, last, middle - first, last - middle, comp); +} + //Complexity: min(len1,len2)^2 + max(len1,len2) template -void merge_bufferless(RandIt first, RandIt middle, RandIt last, Compare comp) +void merge_bufferless_ON2(RandIt first, RandIt middle, RandIt last, Compare comp) { if((middle - first) < (last - middle)){ while(first != middle){ @@ -290,10 +385,21 @@ void merge_bufferless(RandIt first, RandIt middle, RandIt last, Compare comp) } } +template +void merge_bufferless(RandIt first, RandIt middle, RandIt last, Compare comp) +{ + //#define BOOST_ADAPTIVE_MERGE_NLOGN_MERGE + #ifdef BOOST_ADAPTIVE_MERGE_NLOGN_MERGE + merge_bufferless_ONlogN(first, middle, last, comp); + #else + merge_bufferless_ON2(first, middle, last, comp); + #endif //BOOST_ADAPTIVE_MERGE_NLOGN_MERGE +} + template struct antistable { - antistable(Comp &comp) + explicit antistable(Comp &comp) : m_comp(comp) {} @@ -306,6 +412,49 @@ struct antistable Comp &m_comp; }; +template +class negate +{ + public: + negate() + {} + + explicit negate(Comp comp) + : m_comp(comp) + {} + + template + bool operator()(const T1& l, const T2& r) + { + return !m_comp(l, r); + } + + private: + Comp m_comp; +}; + + +template +class inverse +{ + public: + inverse() + {} + + explicit inverse(Comp comp) + : m_comp(comp) + {} + + template + bool operator()(const T1& l, const T2& r) + { + return m_comp(r, l); + } + + private: + Comp m_comp; +}; + // [r_first, r_last) are already in the right part of the destination range. template void op_merge_with_right_placed From 5a118de8034b83d7843baa3524a53fa6af7b7abf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Fri, 13 May 2016 00:28:29 +0200 Subject: [PATCH 09/16] Added recursive inplace_stable_sort implementation --- include/boost/move/algo/detail/merge_sort.hpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/boost/move/algo/detail/merge_sort.hpp b/include/boost/move/algo/detail/merge_sort.hpp index 8101fce..892639b 100644 --- a/include/boost/move/algo/detail/merge_sort.hpp +++ b/include/boost/move/algo/detail/merge_sort.hpp @@ -41,6 +41,21 @@ namespace movelib { static const unsigned MergeSortInsertionSortThreshold = 16; +template +void inplace_stable_sort(RandIt first, RandIt last, Compare comp) +{ + typedef typename iterator_traits::size_type size_type; + if (size_type(last - first) <= size_type(MergeSortInsertionSortThreshold)) { + insertion_sort(first, last, comp); + return; + } + RandIt middle = first + (last - first) / 2; + inplace_stable_sort(first, middle, comp); + inplace_stable_sort(middle, last, comp); + merge_bufferless_ONlogN_recursive + (first, middle, last, size_type(middle - first), size_type(last - middle), comp); +} + // @endcond template From 833e507326e1bd5b8c9b0c263844a8342158abff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Fri, 13 May 2016 00:34:40 +0200 Subject: [PATCH 10/16] Added cache usage to selection sort when external buffer is available. Refactored merge to right using reverse iterators. --- .../move/algo/detail/adaptive_sort_merge.hpp | 888 ++++++++---------- 1 file changed, 369 insertions(+), 519 deletions(-) diff --git a/include/boost/move/algo/detail/adaptive_sort_merge.hpp b/include/boost/move/algo/detail/adaptive_sort_merge.hpp index 87828e8..3d97212 100644 --- a/include/boost/move/algo/detail/adaptive_sort_merge.hpp +++ b/include/boost/move/algo/detail/adaptive_sort_merge.hpp @@ -45,6 +45,7 @@ #define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP #include +#include #include #include #include @@ -161,6 +162,29 @@ class adaptive_xbuf m_size = size; } + void shrink_to_fit(std::size_t const size) + { + if(m_size > size){ + for(std::size_t szt_i = size; szt_i != m_size; ++szt_i){ + m_ptr[szt_i].~T(); + } + m_size = size; + } + } + + void initialize_until(std::size_t const size, T &t) + { + BOOST_ASSERT(m_size < m_capacity); + if(m_size < size){ + ::new((void*)&m_ptr[m_size]) T(::boost::move(t)); + ++m_size; + for(; m_size != size; ++m_size){ + ::new((void*)&m_ptr[m_size]) T(::boost::move(m_ptr[m_size-1])); + } + t = ::boost::move(m_ptr[m_size-1]); + } + } + template bool supports_aligned_trailing(std::size_t size, std::size_t trail_count) const { @@ -210,11 +234,7 @@ class adaptive_xbuf void clear() { - std::size_t size = m_size; - while(size--){ - m_ptr[size].~T(); - } - m_size = 0u; + this->shrink_to_fit(0u); } private: @@ -288,41 +308,75 @@ class range_xbuf Iterator const m_cap; }; -template -bool three_way_init( RandIt first1, RandIt last1, Buf &buf - , typename Buf::iterator &buf_first, typename Buf::iterator &buf_last, move_op) + +template +RandIt skip_until_merge + ( RandIt first1, RandIt const last1 + , const typename iterator_traits::value_type &next_key, Compare comp) { - buf.move_assign(first1, last1-first1); - buf_first = buf.data(); - buf_last = buf.end(); - return true; + while(first1 != last1 && !comp(next_key, *first1)){ + ++first1; + } + return first1; } -template -bool three_way_init( RandIt first, RandIt last, Buf &buf - , typename Buf::iterator &buf_first, typename Buf::iterator &buf_last, swap_op) +template +OutputIt op_partial_merge + (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op) { - typedef typename iterator_traits::size_type size_type; - buf.clear(); - buf_first = buf.data(); - buf_last = buf_first + size_type(last-first); - return false; + InputIt1 first1(r_first1); + InputIt2 first2(r_first2); + if(first2 != last2 && last1 != first1) + while(1){ + if(comp(*first2, *first1)) { + op(first2++, d_first++); + if(first2 == last2){ + break; + } + } + else{ + op(first1++, d_first++); + if(first1 == last1){ + break; + } + } + } + r_first1 = first1; + r_first2 = first2; + return d_first; } -template -void three_way_move(T &a, T &b, Buf &buf, move_op) +template +RandItB op_buffered_partial_merge_to_left_placed + ( RandIt1 first1, RandIt1 const last1 + , RandIt2 &rfirst2, RandIt2 const last2 + , RandItB &rfirstb, Compare comp, Op op ) { - buf.add(&b); - b = boost::move(a); -} + RandItB firstb = rfirstb; + RandItB lastb = firstb; + RandIt2 first2 = rfirst2; -template -void three_way_move(T &a, T &b, Buf &buf, swap_op) -{ - T tmp(boost::move(*buf.end())); - buf.add(&b); - b = boost::move(a); - a = boost::move(tmp); + //Move to buffer while merging + //Three way moves need less moves when op is swap_op so use it + //when merging elements from range2 to the destination occupied by range1 + if(first1 != last1 && first2 != last2){ + op(three_way_t(), first2++, first1++, lastb++); + + while(true){ + if(first1 == last1){ + break; + } + if(first2 == last2){ + lastb = op(forward_t(), first1, last1, firstb); + break; + } + op(three_way_t(), comp(*first2, *firstb) ? first2++ : firstb++, first1++, lastb++); + } + } + + rfirst2 = first2; + rfirstb = firstb; + return lastb; } /////////////////////////////////////////////////////////////////////////////// @@ -331,7 +385,6 @@ void three_way_move(T &a, T &b, Buf &buf, swap_op) // /////////////////////////////////////////////////////////////////////////////// - template RandIt op_partial_merge_with_buf_impl ( RandIt first1, RandIt const last1, RandIt first2, RandIt last2 @@ -340,7 +393,6 @@ RandIt op_partial_merge_with_buf_impl ) { typedef typename Buf::iterator buf_iterator; - typedef typename iterator_traits::value_type value_type; BOOST_ASSERT(first1 != last1); BOOST_ASSERT(first2 != last2); @@ -349,40 +401,21 @@ RandIt op_partial_merge_with_buf_impl if(buf_first1 == buf_last1){ //Skip any element that does not need to be moved - while(!comp(*last1, *first1)){ - ++first1; - if(first1 == last1){ - return first1; - } - } - - //If initialization is successful, move to buffer while merging - //Three way moves need less moves when op is swap_op so use it - //when merging elements from range2 to the destination occupied by range1 - if(!three_way_init(first1, last1, buf, buf_first1, buf_last1, op)){ - three_way_move(*first2, *first1, buf, op); - for(++first1, ++first2; first1 != last1; ++first1){ - value_type &v = comp(*first2, *buf_first1) ? *first2++ : *buf_first1++; - three_way_move(v, *first1, buf, op); - } + first1 = skip_until_merge(first1, last1, *last1, comp); + if(first1 == last1){ + return first1; } + buf_first1 = buf.data(); + buf_last1 = op_buffered_partial_merge_to_left_placed(first1, last1, first2, last2, buf_first1, comp, op); + BOOST_ASSERT(buf_last1 == (buf.data() + (last1-first1))); + first1 = last1; + } + else{ + BOOST_ASSERT((last1-first1) == (buf_last1 - buf_first1)); } //Now merge from buffer - if(first2 != last2) - while(1){ - if(comp(*first2, *buf_first1)) { - op(first2++, first1++); - if(first2 == last2) - break; - } - else{ - op(buf_first1++, first1++); - if(buf_first1 == buf_last1) - break; - } - } - + first1 = op_partial_merge(buf_first1, buf_last1, first2, last2, first1, comp, op); buf_first1_in_out = buf_first1; buf_last1_in_out = buf_last1; return first1; @@ -429,72 +462,53 @@ void op_merge_blocks_with_buf , Op op , Buf & xbuf) { - if(n_bef_irreg2 == 0){ - RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); - op_buffered_merge(first, last_reg, last_reg+l_irreg2, comp, op, xbuf); - } - else { - typedef typename Buf::iterator buf_iterator; - buf_iterator buffer = xbuf.data(); - buf_iterator buffer_end = buffer; - RandIt first1 = first; - RandIt last1 = l_irreg1 ? first1 + l_irreg1 : first1 + l_block; - RandIt first2 = last1; - RandItKeys const key_end (key_first+n_bef_irreg2); + typedef typename Buf::iterator buf_iterator; + buf_iterator buffer = xbuf.data(); + buf_iterator buffer_end = buffer; + RandIt first1 = first; + RandIt last1 = first1 + l_irreg1; + RandItKeys const key_end (key_first+n_bef_irreg2); - for( bool is_range1_A = l_irreg1 ? true : key_comp(*key_first, midkey), skip_first_it = l_irreg1 != 0 - ; key_first != key_end; ){ - if(!skip_first_it){ - ++key_first; - } - skip_first_it = false; - bool const last_it = key_first == key_end; - //If the trailing block is empty, we'll make it equal to the previous if empty - bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); + bool is_range1_A = true; //first l_irreg1 elements are always from range A - if(is_range1_A == is_range2_A){ - if(buffer != buffer_end){ - first1 = op(forward_t(), buffer, buffer_end, first1); - BOOST_ASSERT(first1 == first2); - buffer_end = buffer; - } - first1 = first2; - if(last_it){ - xbuf.clear(); - last1 = first2+l_block*n_aft_irreg2; - op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); - break; - } - else{ - last1 = first2 + l_block; - } - first2 += l_block; - } - else { - BOOST_ASSERT(!last_it || (l_irreg2 || n_aft_irreg2)); - if(last_it){ - RandIt res = op(forward_t(), buffer, buffer_end, first1); - BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; - last1 += l_block*n_aft_irreg2; - xbuf.clear(); - op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); - break; - } - else{ - RandIt const last2 = first2 + l_block; - first1 = op_partial_merge_with_buf(first1, last1, first2, last2, xbuf, buffer, buffer_end, comp, op, is_range1_A); - if(buffer == buffer_end){ - is_range1_A = is_range2_A; - } - last1 = last2; - first2 = last1; - BOOST_ASSERT((buffer == buffer_end) || (buffer_end-buffer) == (last1-first1)); - } - } + for( ; key_first != key_end; ++key_first, last1 += l_block){ + //If the trailing block is empty, we'll make it equal to the previous if empty + bool const is_range2_A = key_comp(*key_first, midkey); + + if(is_range1_A == is_range2_A){ + //If buffered, put those elements in place + RandIt res = op(forward_t(), buffer, buffer_end, first1); + BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; + buffer_end = buffer; + first1 = last1; + } + else { + first1 = op_partial_merge_with_buf(first1, last1, last1, last1 + l_block, xbuf, buffer, buffer_end, comp, op, is_range1_A); + BOOST_ASSERT(buffer == buffer_end || (buffer_end-buffer) == (last1+l_block-first1)); + is_range1_A ^= buffer == buffer_end; } } + + //Now the trailing irregular block, first put buffered elements in place + RandIt res = op(forward_t(), buffer, buffer_end, first1); + BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; + + BOOST_ASSERT(l_irreg2 || n_aft_irreg2); + if(l_irreg2){ + bool const is_range2_A = false; //last l_irreg2 elements always from range B + if(is_range1_A == is_range2_A){ + first1 = last1; + last1 = last1+l_block*n_aft_irreg2; + } + else { + last1 += l_block*n_aft_irreg2; + } + xbuf.clear(); + op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); + } } + template void merge_blocks_with_buf ( RandItKeys key_first @@ -532,9 +546,8 @@ RandIt op_partial_merge_left_middle_buffer_impl , const typename iterator_traits::value_type &next_key, Compare comp , Op op) { - while(first1 != last1 && !comp(next_key, *first1)){ - ++first1; - } + first1 = skip_until_merge(first1, last1, next_key, comp); + //Even if we copy backward, no overlapping occurs so use forward copy //that can be faster specially with trivial types RandIt const new_first1 = first2 - (last1 - first1); @@ -552,72 +565,6 @@ RandIt op_partial_merge_left_middle_buffer : op_partial_merge_left_middle_buffer_impl(first1, last1, first2, next_key, antistable(comp), op); } -// Partially merges two ordered ranges. Partially means that elements are merged -// until one of two ranges is exhausted (M elements from ranges 1 y 2). -// [buf_first, ...) -> buffer that can be overwritten -// [first1, last1) merge [last1,last2) -> [buf_first, buf_first+M) -// Note: distance(buf_first, first1) >= distance(last1, last2), so no overlapping occurs. -template -RandIt op_partial_merge_left_impl - ( RandIt buf_first, RandIt first1, RandIt const last1, RandIt const last2, Compare comp, Op op) -{ - RandIt first2 = last1; - while(first1 != last1){ - if(first2 == last2){ - return first1; - } - if(comp(*first2, *first1)) { - op(first2, buf_first); - ++first2; - } - else{ - op(first1, buf_first); - ++first1; - } - ++buf_first; - } - return first2; -} - - -template -RandIt op_partial_merge_left - ( RandIt buf_first, RandIt first1, RandIt const last1, RandIt const last2, Compare comp, Op op, bool is_stable) -{ - return is_stable ? op_partial_merge_left_impl(buf_first, first1, last1, last2, comp, op) - : op_partial_merge_left_impl(buf_first, first1, last1, last2, antistable(comp), op); -} - -template -bool three_way_side_init( RandIt first1, RandIt last1, RandIt buf_first1, move_op) -{ - boost::move(first1, last1, buf_first1); - return true; -} - -template -bool three_way_side_init( RandIt, RandIt, RandIt, swap_op) -{ - return false; -} - -template -void three_way_side_move(T &a, T &b, T&c, move_op) -{ - c = boost::move(b); - b = boost::move(a); -} - -template -void three_way_side_move(T &a, T &b, T &c, swap_op) -{ - T tmp(boost::move(c)); - c = boost::move(b); - b = boost::move(a); - a = boost::move(tmp); -} - - // Partially merges two ordered ranges. Partially means that elements are merged // until one of two ranges is exhausted (M elements from ranges 1 y 2). // [buf_first, ...) -> buffer that can be overwritten @@ -627,39 +574,17 @@ template RandIt op_partial_merge_left_smart_impl ( RandIt first1, RandIt last1, RandIt first2, RandIt const last2, Compare comp, Op op) { - typedef typename iterator_traits::value_type value_type; - RandIt dest; if(last1 != first2){ + BOOST_ASSERT(0 != (last1-first1)); BOOST_ASSERT((first2-last1)==(last2-first2)); //Skip any element that does not need to be moved - while(!comp(*first2, *first1)){ - ++first1; - if(first1 == last1) - return first2; - } - + first1 = skip_until_merge(first1, last1, *first2, comp); + if(first1 == last1) + return first2; RandIt buf_first1 = first2 - (last1-first1); - - //If initialization is successful, move to buffer while merging - //Three way moves need less moves when op is swap_op so use it - //when merging elements from range2 to the destination occupied by range1 - if(!three_way_side_init(first1, last1, buf_first1, op)){ - RandIt buf_last1 = buf_first1; - three_way_side_move(*first2, *first1, *buf_last1++, op); - - RandIt const orig_first2 = first2;(void)(orig_first2); - for(++first1, ++first2; first1 != last1; ++first1, ++buf_last1){ - value_type &v = comp(*first2, *buf_first1) ? *first2++ : *buf_first1++; - three_way_side_move(v, *first1, *buf_last1, op); - } - BOOST_ASSERT(buf_last1 == orig_first2); - last1 = buf_last1; - } - else{ - last1 = first2; - } - dest = first1; + dest = last1; + last1 = op_buffered_partial_merge_to_left_placed(first1, last1, first2, last2, buf_first1, comp, op); first1 = buf_first1; BOOST_ASSERT((first1-dest) == (last2-first2)); } @@ -667,27 +592,10 @@ RandIt op_partial_merge_left_smart_impl dest = first1-(last2-first2); } - BOOST_ASSERT(0 != (last1-first1)); - if(first2 != last2) - while(1){ - if(comp(*first2, *first1)) { - op(first2++, dest++); - if(first2 == last2){ - return first1; - } - } - else{ - op(first1++, dest++); - if(first1 == last1){ - return first2; - } - } - } - return first1; + op_partial_merge(first1, last1, first2, last2, dest, comp, op); + return first1 == last1 ? first2 : first1; } - - template RandIt op_partial_merge_left_smart (RandIt first1, RandIt const last1, RandIt first2, RandIt const last2, Compare comp, Op op, bool is_stable) @@ -696,7 +604,6 @@ RandIt op_partial_merge_left_smart : op_partial_merge_left_smart_impl(first1, last1, first2, last2, antistable(comp), op); } - // first - first element to merge. // first[-l_block, 0) - buffer // l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded @@ -717,209 +624,58 @@ void op_merge_blocks_left , typename iterator_traits::size_type const l_irreg2 , Compare comp, Op op) { - if(n_bef_irreg2 == 0){ - RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); - op_merge_left(first-l_block, first, last_reg, last_reg+l_irreg2, comp, op); - } - else { - RandIt buffer = first - l_block; - RandIt first1 = first; - RandIt last1 = l_irreg1 ? first1 + l_irreg1 : first1 + l_block; - RandIt first2 = last1; - RandItKeys const key_end (key_first+n_bef_irreg2); - bool skip_first_it = l_irreg1 != 0; - for( bool is_range1_A = l_irreg1 ? true : key_comp(*key_first, midkey) - ; key_first != key_end; first2 += l_block){ - if(!skip_first_it){ - ++key_first; - } - skip_first_it = false; - bool const last_it = key_first == key_end; - //If the trailing block is empty, we'll make it equal to the previous if empty - bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); - bool const is_buffer_middle = last1 == buffer; - - if(is_range1_A == is_range2_A){ - //If range1 is buffered, write it to its final position - if(!is_buffer_middle){ - buffer = op(forward_t(), first1, last1, buffer); - } - - first1 = first2; - if(last_it){ - last1 = first2+l_block*n_aft_irreg2; - op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); - break; - } - else{ - last1 = first2 + l_block; - } - } - else { - BOOST_ASSERT(!last_it || (l_irreg2 || n_aft_irreg2)); - if(last_it){ - if(is_buffer_middle){ - //See comment below marked with (*) - first1 = op_partial_merge_left_middle_buffer(first1, last1, first2, first2[l_block*n_aft_irreg2], comp, op, is_range1_A); - last1 = first2; - buffer = first1 - l_block; - } - last1 += l_block*n_aft_irreg2; - op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); - break; - } - else{ - RandIt const last2 = first2 + l_block; - first1 = op_partial_merge_left_smart(first1, last1, first2, last2, comp, op, is_range1_A); - - if(first1 < first2){ //is_buffer_middle == true for the next iteration - last1 = first2; - buffer = last1; - } - else{ //is_buffer_middle == false for the next iteration - is_range1_A = is_range2_A; - buffer = first1 - l_block; - last1 = last2; - } - } - } - } - } -} - -/////////////////////////////////////////////////////////////////////////////// -// -// PARTIAL MERGE RIGHT -// -/////////////////////////////////////////////////////////////////////////////// - - -template -RandIt op_partial_merge_right_middle_buffer_impl - ( RandIt const last1, RandIt const first2, RandIt last2, Compare comp, Op op) -{ - while(first2 != last2 && !comp(last2[-1], last1[-1])){ - --last2; - } - return op(forward_t(), first2, last2, last1); -} - -template -RandIt op_partial_merge_right_middle_buffer - ( RandIt const last1, RandIt first2, RandIt const last2 - , Compare comp, Op op, bool is_stable) -{ - return is_stable ? op_partial_merge_right_middle_buffer_impl(last1, first2, last2, comp, op) - : op_partial_merge_right_middle_buffer_impl(last1, first2, last2, antistable(comp), op); -} - -// Partially merges two ordered ranges. Partially means that elements are merged -// until one of two ranges is exhausted (M elements from ranges 1 y 2). -// [last2, buf_last) -> buffer that can be overwritten -// [first1, last1) merge [last1,last2) -> [buf_last - M, buf_last) -// Note: distance(last2, buf_last) >= distance(first1, last1), so no overlapping occurs. -template -RandIt op_partial_merge_right_impl - ( RandIt const first1, RandIt last1, RandIt last2, RandIt buf_last, Compare comp, Op op) -{ - RandIt const first2 = last1; - while(first2 != last2){ - if(last1 == first1){ - return last2; - } - --last2; - --last1; - --buf_last; - if(comp(*last2, *last1)){ - op(last1, buf_last); - ++last2; - } - else{ - op(last2, buf_last); - ++last1; - } - } - return last1; -} - -template -RandIt op_partial_merge_right - ( RandIt first1, RandIt const last1, RandIt const last2, RandIt buf_last, Compare comp, Op op, bool is_stable) -{ - return is_stable ? op_partial_merge_right_impl(first1, last1, last2, buf_last, comp, op) - : op_partial_merge_right_impl(first1, last1, last2, buf_last, antistable(comp), op); -} - - -// first - first element to merge. -// last iterator is (first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2) -// [last, last+l_block) - buffer -// l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded -// key_first - sequence of keys, in same order as blocks. key -void op_merge_blocks_right - ( RandItKeys const key_first - , const typename iterator_traits::value_type &midkey - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type const l_block - , typename iterator_traits::size_type const n_bef_irreg2 - , typename iterator_traits::size_type const n_aft_irreg2 - , typename iterator_traits::size_type const l_irreg2 - , Compare comp, Op op) -{ - RandIt last1 = first + (n_bef_irreg2+n_aft_irreg2)*l_block; - RandIt first1 = last1-l_block; + RandIt buffer = first - l_block; + RandIt first1 = first; + RandIt last1 = first1 + l_irreg1; RandIt first2 = last1; - RandIt last2 = first2 + l_irreg2; - RandIt buffer_end = last2 + l_block; - RandItKeys key_end (key_first+(n_bef_irreg2+n_aft_irreg2)); - - for(bool is_range2_A = false; key_first != key_end; last1 = first1, first1 -= l_block){ - --key_end; - bool const is_range1_A = key_comp(*key_end, midkey); - bool const is_buffer_middle = first2 == buffer_end; + RandItKeys const key_end (key_first+n_bef_irreg2); + bool is_range1_A = true; + for( ; key_first != key_end; first2 += l_block, ++key_first){ + //If the trailing block is empty, we'll make it equal to the previous if empty + bool const is_range2_A = key_comp(*key_first, midkey); if(is_range1_A == is_range2_A){ - if(!is_buffer_middle){ - buffer_end = op(backward_t(), first2, last2, buffer_end); + if(last1 != buffer){ //equiv. to if(!is_buffer_middle) + buffer = op(forward_t(), first1, last1, buffer); } - //else //op forward already done on previous op_partial_merge_right - - first2 = first1; - last2 = last1; + first1 = first2; + last1 = first2 + l_block; } else { - if(is_buffer_middle){ - //A previous op_partial_merge_right has right range2 elements after the buffer. - //In order to merge it with the next block, move them to the start of the buffer so that - //buffer is placed to the right. Move only the minimum elements as some range1 elements - //won't be moved in the merge. - last2 = op_partial_merge_right_middle_buffer(last1, first2, last2, comp, op, !is_range2_A); - first2 = last1; - buffer_end = last2 + l_block; - } + RandIt const last2 = first2 + l_block; + first1 = op_partial_merge_left_smart(first1, last1, first2, last2, comp, op, is_range1_A); - //op_partial_merge_right merges two ranges, but stops moving elements - //once one range is emptied to avoid moving data twice in the next iteration - last2 = op_partial_merge_right(first1, last1, last2, buffer_end, comp, op, !is_range2_A); - if(last2 > first2){ //is_buffer_middle == true for the next iteration - buffer_end = first2; + if(first1 < first2){ //is_buffer_middle for the next iteration + last1 = first2; + buffer = last1; } - else{ //is_buffer_middle == false for the next iteration - is_range2_A = is_range1_A; - buffer_end = last2 + l_block; - first2 = first1; + else{ //!is_buffer_middle for the next iteration + is_range1_A = is_range2_A; + buffer = first1 - l_block; + last1 = last2; } } } - if(first2 != buffer_end){ - op(backward_t(), first2, last2, buffer_end); + //Now the trailing irregular block + bool const is_range2_A = false; //Trailing l_irreg2 is always from Range B + bool const is_buffer_middle = last1 == buffer; + + if(!l_irreg2 || is_range1_A == is_range2_A){ //trailing is always B type + //If range1 is buffered, write it to its final position + if(!is_buffer_middle){ + buffer = op(forward_t(), first1, last1, buffer); + } + first1 = first2; } + else { + if(is_buffer_middle){ + first1 = op_partial_merge_left_middle_buffer(first1, last1, first2, first2[l_block*n_aft_irreg2], comp, op, is_range1_A); + buffer = first1 - l_block; + } + } + last1 = first2 + l_block*n_aft_irreg2; + op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); } /////////////////////////////////////////////////////////////////////////////// @@ -1200,6 +956,18 @@ struct less // /////////////////////////////////////////////////////////////////////////////// +//#define ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN + +#if defined ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN +template +void slow_stable_sort + ( RandIt const first, RandIt const last, Compare comp) +{ + boost::movelib::inplace_stable_sort(first, last, comp); +} + +#else //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN + template void slow_stable_sort ( RandIt const first, RandIt const last, Compare comp) @@ -1232,6 +1000,8 @@ void slow_stable_sort } } +#endif //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN + //Returns new l_block and updates use_buf template Unsigned lblock_for_combine @@ -1271,7 +1041,7 @@ Unsigned lblock_for_combine //Although "cycle" sort is known to have the minimum number of writes to target //selection sort is more appropriate here as we want to minimize swaps. -template +template void selection_sort_blocks ( RandItKeys keys , typename iterator_traits::size_type &midkey_idx //inout @@ -1280,7 +1050,9 @@ void selection_sort_blocks , typename iterator_traits::size_type const l_block , typename iterator_traits::size_type const n_blocks , Compare comp - , bool use_first_element) + , bool use_first_element + , XBuf & xbuf +) { typedef typename iterator_traits::size_type size_type ; size_type const back_midkey_idx = midkey_idx; @@ -1294,6 +1066,10 @@ void selection_sort_blocks //One-past the position of the first untouched element of the second half size_type high_watermark = back_midkey_idx+1; BOOST_ASSERT(high_watermark <= n_blocks); + const bool b_cache_on = xbuf.capacity() >= l_block; + //const bool b_cache_on = false; + const size_type cached_none = size_type(-1); + size_type cached_block = cached_none; //Sort by first element if left merging, last element otherwise size_type const reg_off = use_first_element ? 0u: l_block-1; @@ -1303,35 +1079,89 @@ void selection_sort_blocks //Since we are searching for the minimum value in two sorted halves: //Optimization 1: If block belongs to first half, don't waste time comparing elements of the first half. //Optimization 2: It is enough to compare until the first untouched element of the second half. + //Optimization 3: If cache memory is available, instead of swapping blocks (3 writes per element), + // play with the cache to aproximate it to 2 writes per element. high_watermark = size_type(max_value(block+2, high_watermark)); BOOST_ASSERT(high_watermark <= n_blocks); for(size_type next_block = size_type(max_value(block+1, back_midkey_idx)); next_block < high_watermark; ++next_block){ - const value_type &v = first_block[next_block*l_block+reg_off]; - const value_type &min = first_block[min_block*l_block+reg_off]; - if( comp(v, min) || (!comp(min, v) && key_comp(keys[next_block], keys[min_block])) ){ - min_block=next_block; + const value_type &min_v = (b_cache_on && (cached_block == min_block) ? xbuf.data()[reg_off] : first_block[min_block*l_block+reg_off]); + const value_type &v = (b_cache_on && (cached_block == next_block) ? xbuf.data()[reg_off] : first_block[next_block*l_block+reg_off]); + + if( comp(v, min_v) || (!comp(min_v, v) && key_comp(keys[next_block], keys[min_block])) ){ + min_block = next_block; } } if(min_block != block){ BOOST_ASSERT(block >= back_midkey_idx || min_block >= back_midkey_idx); BOOST_ASSERT(min_block < high_watermark); - //Update high watermark if n_blocks is not surpassed + //Increase high watermark if not the maximum and min_block is just before the high watermark high_watermark += size_type((min_block + 1) != n_blocks && (min_block + 1) == high_watermark); BOOST_ASSERT(high_watermark <= n_blocks); - boost::adl_move_swap_ranges(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); + if(!b_cache_on){ + boost::adl_move_swap_ranges(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); + } + else if(cached_block == cached_none){ + //Cache the biggest block and put the minimum into its final position + xbuf.move_assign(first_block+block*l_block, l_block); + boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); + cached_block = min_block; + } + else if(cached_block == block){ + //Since block is cached and is not the minimum, just put the minimum directly into its final position and update the cache index + boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); + cached_block = min_block; + } + else if(cached_block == min_block){ + //Since the minimum is cached, move the block to the back position and flush the cache to its final position + boost::move(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); + boost::move(xbuf.data(), xbuf.end(), first_block+block*l_block); + cached_block = cached_none; + } + else{ + //Cached block is not any of two blocks to be exchanged, a smarter operation must be performed + BOOST_ASSERT(cached_block != min_block); + BOOST_ASSERT(cached_block != block); + BOOST_ASSERT(cached_block > block); + BOOST_ASSERT(cached_block < high_watermark); + //Instead of moving block to the slot of the minimum (which is typical selection sort), before copying + //data from the minimum slot to its final position: + // -> move it to free slot pointed by cached index, and + // -> move cached index into slot of the minimum. + //Since both cached_block and min_block belong to the still unordered range of blocks, the change + //does not break selection sort and saves one copy. + boost::move(first_block+block*l_block, first_block+(block+1)*l_block, first_block+cached_block*l_block); + boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); + //Note that this trick requires an additionl fix for keys and midkey index + boost::adl_move_swap(keys[cached_block], keys[min_block]); + if(midkey_idx == cached_block) + midkey_idx = min_block; + else if(midkey_idx == min_block) + midkey_idx = cached_block; + boost::adl_move_swap(cached_block, min_block); + } + //Once min_block and block are exchanged, fix the movement imitation key buffer and midkey index. boost::adl_move_swap(keys[block], keys[min_block]); if(midkey_idx == block) midkey_idx = min_block; else if(midkey_idx == min_block) midkey_idx = block; } + else if(b_cache_on && cached_block == block){ + //The selected block was the minimum, but since it was cached, move it to its final position + boost::move(xbuf.data(), xbuf.end(), first_block+block*l_block); + cached_block = cached_none; + } + } //main for loop + + if(b_cache_on && cached_block != cached_none){ + //The sort has ended with cached data, move it to its final position + boost::move(xbuf.data(), xbuf.end(), first_block+cached_block*l_block); } } -template -void stable_sort( RandIt first, RandIt last, Compare comp - , adaptive_xbuf::value_type> & xbuf) +template +void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf) { typedef typename iterator_traits::size_type size_type; size_type const len = size_type(last - first); @@ -1344,10 +1174,10 @@ void stable_sort( RandIt first, RandIt last, Compare comp } } -template +template void initialize_keys( RandIt first, RandIt last , Comp comp - , adaptive_xbuf::value_type> & xbuf) + , XBuf & xbuf) { stable_sort(first, last, comp, xbuf); } @@ -1365,7 +1195,7 @@ void initialize_keys( RandIt first, RandIt last } } -template +template void combine_params ( RandItKeys const keys , KeyCompare key_comp @@ -1373,7 +1203,7 @@ void combine_params , typename iterator_traits::size_type l_combined , typename iterator_traits::size_type const l_prev_merged , typename iterator_traits::size_type const l_block - , adaptive_xbuf::value_type> & xbuf + , XBuf & xbuf , Compare comp //Output , typename iterator_traits::size_type &midkey_idx @@ -1381,24 +1211,36 @@ void combine_params , typename iterator_traits::size_type &n_bef_irreg2 , typename iterator_traits::size_type &n_aft_irreg2 , typename iterator_traits::size_type &l_irreg2 - , bool is_merge_left) + //Options + , bool is_merge_left_or_bufferless + , bool do_initialize_keys = true) { typedef typename iterator_traits::size_type size_type; typedef typename iterator_traits::value_type value_type; + //Initial parameters for selection sort blocks l_irreg1 = l_prev_merged%l_block; l_irreg2 = (l_combined-l_irreg1)%l_block; BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0); size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block; midkey_idx = l_prev_merged/l_block; BOOST_ASSERT(n_reg_block>=midkey_idx); - initialize_keys(keys, keys+n_reg_block+(midkey_idx==n_reg_block), key_comp, xbuf); - selection_sort_blocks(keys, midkey_idx, key_comp, first+l_irreg1, l_block, n_reg_block, comp, is_merge_left); + //Key initialization + if (do_initialize_keys) { + initialize_keys(keys, keys+n_reg_block+(midkey_idx==n_reg_block), key_comp, xbuf); + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A initkey: ", l_combined + l_block); + + //Selection sort blocks + selection_sort_blocks(keys, midkey_idx, key_comp, first+l_irreg1, l_block, n_reg_block, comp, is_merge_left_or_bufferless, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A selsort: ", l_combined + l_block); + + //Special case for the last elements n_aft_irreg2 = 0; - if(l_irreg2!=0){ - size_type const reg_off = is_merge_left ? 0u: l_block-1; - size_type const irreg_off = is_merge_left ? 0u: l_irreg2-1; + if(l_irreg2 != 0){ + size_type const reg_off = is_merge_left_or_bufferless ? 0u: l_block-1; + size_type const irreg_off = is_merge_left_or_bufferless ? 0u: l_irreg2-1; RandIt prev_block_first = first + l_combined - l_irreg2; const value_type &incomplete_block_first = prev_block_first[irreg_off]; while(n_aft_irreg2 != n_reg_block && @@ -1461,14 +1303,17 @@ void merge_blocks_right , Compare comp , bool const xbuf_used) { - if(xbuf_used){ - op_merge_blocks_right - (key_first, midkey, key_comp, first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, move_op()); - } - else{ - op_merge_blocks_right - (key_first, midkey, key_comp, first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, swap_op()); - } + merge_blocks_left + ( make_reverse_iterator(key_first+n_aft_irreg2 + n_bef_irreg2) + , midkey + , negate(key_comp) + , make_reverse_iterator(first+(n_bef_irreg2+n_aft_irreg2)*l_block+l_irreg2) + , l_block + , l_irreg2 + , n_aft_irreg2 + n_bef_irreg2 + , 0 + , 0 + , inverse(comp), xbuf_used); } @@ -1530,8 +1375,8 @@ Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merg // Blocks of length l_prev_merged combined. We'll combine them in pairs // l_prev_merged and n_keys are powers of 2. (2*l_prev_merged/l_block) keys are guaranteed // Returns the number of combined elements (some trailing elements might be left uncombined) -template -void combine_blocks +template +void adaptive_sort_combine_blocks ( RandItKeys const keys , KeyCompare key_comp , RandIt const first @@ -1540,16 +1385,17 @@ void combine_blocks , typename iterator_traits::size_type const l_block , bool const use_buf , bool const xbuf_used - , adaptive_xbuf::value_type> & xbuf + , XBuf & xbuf , Compare comp , bool merge_left) { + (void)xbuf; typedef typename iterator_traits::size_type size_type; - size_type const l_combined = 2*l_prev_merged; + size_type const l_reg_combined = 2*l_prev_merged; size_type l_irreg_combined = 0; size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); - size_type const n_reg_combined = len/l_combined; + size_type const n_reg_combined = len/l_reg_combined; RandIt combined_first = first; (void)l_total_combined; @@ -1558,32 +1404,37 @@ void combine_blocks size_type n_bef_irreg2, n_aft_irreg2, midkey_idx, l_irreg1, l_irreg2; size_type const max_i = n_reg_combined + (l_irreg_combined != 0); - if(merge_left || !use_buf) - for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_combined) { + if(merge_left || !use_buf) { + for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) { bool const is_last = combined_i==n_reg_combined; - combine_params( keys, key_comp, combined_first, is_last ? l_irreg_combined : l_combined - , l_prev_merged, l_block, xbuf, comp + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + + range_xbuf rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); + combine_params( keys, key_comp, combined_first, l_cur_combined + , l_prev_merged, l_block, rbuf, comp , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs - //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_params: ", len + l_block); - BOOST_ASSERT(!l_irreg1); - if(use_buf){ - merge_blocks_left - (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); - } - else{ + //Now merge blocks + if(!use_buf){ merge_blocks_bufferless (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); } + else{ + merge_blocks_left + (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); + } //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_l: ", len + l_block); } + } else{ - combined_first += l_combined*(max_i-1); - for( size_type combined_i = max_i; combined_i--; combined_first -= l_combined) { + combined_first += l_reg_combined*(max_i-1); + for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) { bool const is_last = combined_i==n_reg_combined; - combine_params( keys, key_comp, combined_first, is_last ? l_irreg_combined : l_combined - , l_prev_merged, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, false); //Outputs - BOOST_ASSERT(!l_irreg1); + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + RandIt const combined_last(combined_first+l_cur_combined); + range_xbuf rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); + combine_params( keys, key_comp, combined_first, l_cur_combined + , l_prev_merged, l_block, rbuf, comp + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, false); //Outputs //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_params: ", len + l_block); merge_blocks_right (keys, keys[midkey_idx], key_comp, combined_first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); @@ -1741,7 +1592,7 @@ void op_merge_right_step // until all is merged or auxiliary memory is not large enough. template typename iterator_traits::size_type - build_blocks + adaptive_sort_build_blocks ( RandIt const first , typename iterator_traits::size_type const len , typename iterator_traits::size_type const l_base @@ -1830,7 +1681,7 @@ typename iterator_traits::size_type //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is //[buffer,buffer+l_intbuf) template -bool combine_all_blocks +bool adaptive_sort_combine_all_blocks ( RandIt keys , typename iterator_traits::size_type &n_keys , RandIt const buffer @@ -1847,14 +1698,7 @@ bool combine_all_blocks //Backup data to external buffer once if possible bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); if(common_xbuf){ - if(n_keys){ - xbuf.move_assign(buffer, l_intbuf); - } - else{ - xbuf.clear(); - merge_sort_uninitialized_copy(buffer, first, xbuf.data(), comp); - xbuf.set_size(l_intbuf); - } + xbuf.move_assign(buffer, l_intbuf); } bool prev_merge_left = true; @@ -1877,8 +1721,7 @@ bool combine_all_blocks bool const is_merge_left = (n&1) == 0; size_type const l_total_combined = calculate_total_combined(l_data, l_merged); - - if(prev_use_internal_buf && prev_merge_left){ + if(n && prev_use_internal_buf && prev_merge_left){ if(is_merge_left || !use_internal_buf){ move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); } @@ -1900,13 +1743,13 @@ bool combine_all_blocks //Combine to form l_merged*2 segments if(n_keys){ - combine_blocks + adaptive_sort_combine_blocks ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); } else{ size_type *const uint_keys = xbuf.template aligned_trailing(); - combine_blocks + adaptive_sort_combine_blocks ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); } @@ -1918,6 +1761,7 @@ bool combine_all_blocks } BOOST_ASSERT(l_prev_total_combined == l_data); bool const buffer_right = prev_use_internal_buf && prev_merge_left; + l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; n_keys = l_unique - l_intbuf; //Restore data from to external common buffer if used @@ -1962,7 +1806,7 @@ void adaptive_sort_final_merge( bool buffer_right , adaptive_xbuf::value_type> & xbuf , Compare comp) { - BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); + //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); xbuf.clear(); typedef typename iterator_traits::size_type size_type; @@ -1990,7 +1834,7 @@ void adaptive_sort_final_merge( bool buffer_right } template -bool build_params +bool adaptive_sort_build_params (RandIt first, Unsigned const len, Compare comp , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf , adaptive_xbuf & xbuf @@ -2094,20 +1938,21 @@ inline void adaptive_merge_combine_blocks( RandIt first size_type const l_combine = len-collected; size_type const l_combine1 = len1-collected; size_type n_bef_irreg2, n_aft_irreg2, l_irreg1, l_irreg2, midkey_idx; + if(n_keys){ RandIt const first_data = first+collected; RandIt const keys = first; combine_params( keys, comp, first_data, l_combine , l_combine1, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true, false); //Outputs BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); if(xbuf_used){ + BOOST_ASSERT(xbuf.size() >= l_block); merge_blocks_with_buf (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, xbuf_used); BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); } else if(use_internal_buf){ - #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF range_xbuf rbuf(first_data-l_block, first_data); merge_blocks_with_buf @@ -2122,20 +1967,24 @@ inline void adaptive_merge_combine_blocks( RandIt first else{ merge_blocks_bufferless (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg bfl: ", len); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); } } else{ - xbuf.clear(); + xbuf.shrink_to_fit(l_block); + if(xbuf.size() < l_block){ + xbuf.initialize_until(l_block, *first); + } size_type *const uint_keys = xbuf.template aligned_trailing(l_block); combine_params( uint_keys, less(), first, l_combine , l_combine1, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true, true); //Outputs BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); + BOOST_ASSERT(xbuf.size() >= l_block); merge_blocks_with_buf (uint_keys, uint_keys[midkey_idx], less(), first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, true); xbuf.clear(); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len); } } @@ -2164,39 +2013,40 @@ inline void adaptive_merge_final_merge( RandIt first if(n_keys){ stable_sort(first, first+n_keys, comp, xbuf); stable_merge(first, first+n_keys, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); } } else{ #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF xbuf.clear(); stable_sort(first, first+collected, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: ", len); stable_merge(first, first+collected, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: ", len); #else xbuf.clear(); stable_sort(first+len-l_block, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf srt: ", len); RandIt const pos1 = lower_bound(first+n_keys, first+len-l_block, first[len-1], comp); RandIt const pos2 = rotate_gcd(pos1, first+len-l_block, first+len); stable_merge(first+n_keys, pos1, pos2, antistable(comp), xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len); if(n_keys){ stable_sort(first, first+n_keys, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key srt: ", len); stable_merge(first, first+n_keys, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); } #endif } - - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len); } else{ - stable_sort(first, first+collected, comp, xbuf); xbuf.clear(); - if(xbuf.capacity() >= collected){ - buffered_merge(first, first+collected, first+len1+len2, comp, xbuf); - } - else{ - merge_bufferless(first, first+collected, first+len1+len2, comp); - } + stable_sort(first, first+collected, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: ", len); + stable_merge(first, first+collected, first+len1+len2, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: ", len); } - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); } template @@ -2207,12 +2057,12 @@ inline SizeType adaptive_merge_n_keys_intbuf(SizeType l_block, SizeType len, Xbu //This is the minimum number of keys to implement the ideal algorithm //ceil(len/l_block) - 1 (as the first block is used as buffer) - size_type n_keys = l_block; + size_type n_keys = len/l_block+1; while(n_keys >= (len-l_intbuf-n_keys)/l_block){ --n_keys; } ++n_keys; - BOOST_ASSERT(n_keys < l_block); + //BOOST_ASSERT(n_keys < l_block); if(xbuf.template supports_aligned_trailing(l_block, n_keys)){ n_keys = 0u; @@ -2316,7 +2166,7 @@ void adaptive_sort_impl //Calculate and extract needed unique elements. If a minimum is not achieved //fallback to rotation-based merge - if(!build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ + if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ stable_sort(first, first+len, comp, xbuf); return; } @@ -2328,12 +2178,12 @@ void adaptive_sort_impl BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); //Classic merge sort until internal buffer and xbuf are exhausted - size_type const l_merged = build_blocks + size_type const l_merged = adaptive_sort_build_blocks (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp); BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After build_blocks: ", len); //Non-trivial merge - bool const buffer_right = combine_all_blocks + bool const buffer_right = adaptive_sort_combine_all_blocks (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); //Sort keys and buffer and merge the whole sequence From 664d99bdfd7e0cce9daefbb27127476ea121f08b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Fri, 13 May 2016 00:35:44 +0200 Subject: [PATCH 11/16] Added BENCH_SORT|MERGE_UNIQUE_VALUES to speed up regression tests --- proj/vc7ide/Move.sln | 4 ++-- test/bench_merge.cpp | 31 +++++++++++++++++++++---------- test/bench_sort.cpp | 25 ++++++++++++++++++++++--- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/proj/vc7ide/Move.sln b/proj/vc7ide/Move.sln index 6a1d7a4..cde7565 100644 --- a/proj/vc7ide/Move.sln +++ b/proj/vc7ide/Move.sln @@ -283,8 +283,8 @@ Global ..\..\..\..\boost\move\detail\iterator_traits.hpp = ..\..\..\..\boost\move\detail\iterator_traits.hpp ..\..\doc\Jamfile.v2 = ..\..\doc\Jamfile.v2 ..\..\..\..\boost\move\make_unique.hpp = ..\..\..\..\boost\move\make_unique.hpp - ..\..\..\..\boost\move\algo\merge.hpp = ..\..\..\..\boost\move\algo\merge.hpp - ..\..\..\..\boost\move\algo\merge_sort.hpp = ..\..\..\..\boost\move\algo\merge_sort.hpp + ..\..\..\..\boost\move\algo\detail\merge.hpp = ..\..\..\..\boost\move\algo\detail\merge.hpp + ..\..\..\..\boost\move\algo\detail\merge_sort.hpp = ..\..\..\..\boost\move\algo\detail\merge_sort.hpp ..\..\..\..\boost\move\detail\meta_utils.hpp = ..\..\..\..\boost\move\detail\meta_utils.hpp ..\..\..\..\boost\move\detail\meta_utils_core.hpp = ..\..\..\..\boost\move\detail\meta_utils_core.hpp ..\..\..\..\boost\move\move.hpp = ..\..\..\..\boost\move\move.hpp diff --git a/test/bench_merge.cpp b/test/bench_merge.cpp index d9d3272..1ef92cd 100644 --- a/test/bench_merge.cpp +++ b/test/bench_merge.cpp @@ -68,23 +68,23 @@ void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::si enum AlgoType { - InplaceMerge, + StdMerge, AdaptiveMerge, SqrtHAdaptiveMerge, SqrtAdaptiveMerge, Sqrt2AdaptiveMerge, QuartAdaptiveMerge, - BuflessMerge, + StdInplaceMerge, MaxMerge }; -const char *AlgoNames [] = { "InplaceMerge " +const char *AlgoNames [] = { "StdMerge " , "AdaptMerge " , "SqrtHAdaptMerge " , "SqrtAdaptMerge " , "Sqrt2AdaptMerge " , "QuartAdaptMerge " - , "BuflessMerge " + , "StdInplaceMerge " }; BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); @@ -102,7 +102,7 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count timer.resume(); switch(alg) { - case InplaceMerge: + case StdMerge: std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); break; case AdaptiveMerge: @@ -124,8 +124,8 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , (element_count-1)/4+1); break; - case BuflessMerge: - boost::movelib::merge_bufferless(elements, elements+split_pos, elements+element_count, order_type_less()); + case StdInplaceMerge: + boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); break; } timer.stop(); @@ -177,7 +177,7 @@ bool measure_all(std::size_t L, std::size_t NK) nanosecond_type prev_clock = 0; nanosecond_type back_clock; bool res = true; - res = res && measure_algo(A,Keys,L,NK,InplaceMerge, prev_clock); + res = res && measure_algo(A,Keys,L,NK,StdMerge, prev_clock); back_clock = prev_clock;/* // prev_clock = back_clock; @@ -195,8 +195,8 @@ bool measure_all(std::size_t L, std::size_t NK) prev_clock = back_clock; res = res && measure_algo(A,Keys,L,NK,AdaptiveMerge, prev_clock); // - //prev_clock = back_clock; - //res = res && measure_algo(A,Keys,L,NK,BuflessMerge, prev_clock); + prev_clock = back_clock; + res = res && measure_algo(A,Keys,L,NK,StdInplaceMerge, prev_clock); // if(!res) throw int(0); @@ -205,43 +205,54 @@ bool measure_all(std::size_t L, std::size_t NK) //Undef it to run the long test #define BENCH_MERGE_SHORT +#define BENCH_SORT_UNIQUE_VALUES int main() { try{ + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(101,1); measure_all(101,7); measure_all(101,31); + #endif measure_all(101,0); // + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(1101,1); measure_all(1001,7); measure_all(1001,31); measure_all(1001,127); measure_all(1001,511); + #endif measure_all(1001,0); // #ifndef BENCH_MERGE_SHORT + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(10001,65); measure_all(10001,255); measure_all(10001,1023); measure_all(10001,4095); + #endif measure_all(10001,0); // + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(100001,511); measure_all(100001,2047); measure_all(100001,8191); measure_all(100001,32767); + #endif measure_all(100001,0); // #ifdef NDEBUG + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(1000001,1); measure_all(1000001,1024); measure_all(1000001,32768); measure_all(1000001,524287); + #endif measure_all(1000001,0); measure_all(1500001,0); //measure_all(10000001,0); diff --git a/test/bench_sort.cpp b/test/bench_sort.cpp index 4945efa..630da4f 100644 --- a/test/bench_sort.cpp +++ b/test/bench_sort.cpp @@ -80,6 +80,7 @@ enum AlgoType Sqrt2AdaptiveSort, QuartAdaptiveSort, NoBufMergeSort, + InplaceStableSort, SlowStableSort, HeapSort, MaxSort @@ -93,6 +94,7 @@ const char *AlgoNames [] = { "MergeSort " , "Sqrt2AdaptSort " , "QuartAdaptSort " , "NoBufMergeSort " + , "InplStableSort " , "SlowSort " , "HeapSort " }; @@ -140,6 +142,9 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count case NoBufMergeSort: boost::movelib::bufferless_merge_sort(elements, elements+element_count, order_type_less()); break; + case InplaceStableSort: + boost::movelib::inplace_stable_sort(elements, elements+element_count, order_type_less()); + break; case SlowStableSort: boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less()); break; @@ -222,6 +227,9 @@ bool measure_all(std::size_t L, std::size_t NK) res = res && measure_algo(A,Keys,L,NK,AdaptiveSort, prev_clock); // prev_clock = back_clock; + res = res && measure_algo(A,Keys,L,NK,InplaceStableSort, prev_clock); + // + prev_clock = back_clock; res = res && measure_algo(A,Keys,L,NK,NoBufMergeSort, prev_clock); // //prev_clock = back_clock; @@ -234,46 +242,57 @@ bool measure_all(std::size_t L, std::size_t NK) //Undef it to run the long test #define BENCH_SORT_SHORT +#define BENCH_SORT_UNIQUE_VALUES int main() { - measure_all(101,1); + #ifndef BENCH_SORT_UNIQUE_VALUES + //measure_all(101,1); measure_all(101,7); measure_all(101,31); + #endif measure_all(101,0); // + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(1101,1); measure_all(1001,7); measure_all(1001,31); measure_all(1001,127); measure_all(1001,511); + #endif measure_all(1001,0); // #ifndef BENCH_SORT_SHORT + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(10001,65); measure_all(10001,255); measure_all(10001,1023); measure_all(10001,4095); measure_all(10001,0); + #endif // + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(100001,511); measure_all(100001,2047); measure_all(100001,8191); measure_all(100001,32767); + #endif measure_all(100001,0); // - #ifdef NDEBUG + //#ifdef NDEBUG + #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(1000001,1); measure_all(1000001,1024); measure_all(1000001,32768); measure_all(1000001,524287); + #endif measure_all(1000001,0); measure_all(1500001,0); //measure_all(10000001,0); - #endif //NDEBUG + //#endif //NDEBUG #endif //#ifndef BENCH_SORT_SHORT From fec7e30d36785b86dd434993599863e71a1b28a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Thu, 30 Jun 2016 12:22:41 +0200 Subject: [PATCH 12/16] Use BOOST_MOVE_TO_RV_CAST in the newly fixed assignment operator. Update changelog --- doc/move.qbk | 7 +++++++ include/boost/move/core.hpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/move.qbk b/doc/move.qbk index a47c27f..50be531 100644 --- a/doc/move.qbk +++ b/doc/move.qbk @@ -762,6 +762,13 @@ Many thanks to all boosters that have tested, reviewed and improved the library. [section:release_notes Release Notes] +[section:release_notes_boost_1_62 Boost 1.62 Release] + +* Fixed bugs: + * [@https://github.com/boostorg/move/pull/9 Git Pull #9: ['"Fix assignment of move-and-copy emulated classes"]], + +[endsect] + [section:release_notes_boost_1_61 Boost 1.61 Release] * Experimental: asymptotically optimal bufferless merge and sort algorithms: [funcref boost::movelib::adaptive_merge adaptive_merge] diff --git a/include/boost/move/core.hpp b/include/boost/move/core.hpp index 96e15c0..37bca0d 100644 --- a/include/boost/move/core.hpp +++ b/include/boost/move/core.hpp @@ -261,7 +261,7 @@ #define BOOST_COPYABLE_AND_MOVABLE(TYPE)\ public:\ TYPE& operator=(TYPE &t)\ - { this->operator=(static_cast&>(t)); }\ + { this->operator=(*BOOST_MOVE_TO_RV_CAST(const ::boost::rv*, &t)); return *this;}\ public:\ BOOST_MOVE_FORCEINLINE operator ::boost::rv&() \ { return *BOOST_MOVE_TO_RV_CAST(::boost::rv*, this); }\ From daabab5261f27b760c02eb5aa8ea667afd5c7917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Thu, 30 Jun 2016 16:22:27 +0200 Subject: [PATCH 13/16] Add missing BOOST_MOVE_FORCEINLINE --- include/boost/move/core.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/move/core.hpp b/include/boost/move/core.hpp index 37bca0d..c32c194 100644 --- a/include/boost/move/core.hpp +++ b/include/boost/move/core.hpp @@ -260,7 +260,7 @@ #define BOOST_COPYABLE_AND_MOVABLE(TYPE)\ public:\ - TYPE& operator=(TYPE &t)\ + BOOST_MOVE_FORCEINLINE TYPE& operator=(TYPE &t)\ { this->operator=(*BOOST_MOVE_TO_RV_CAST(const ::boost::rv*, &t)); return *this;}\ public:\ BOOST_MOVE_FORCEINLINE operator ::boost::rv&() \ From e7d24400cb986efaefa0acbff5de6e74eae876d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Thu, 30 Jun 2016 16:23:05 +0200 Subject: [PATCH 14/16] Refactor trivial copy/assign traits. Assume intrinsics don't guarantee the copy constructor/assignment is callable. --- include/boost/move/detail/type_traits.hpp | 30 +++++++++-------------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/include/boost/move/detail/type_traits.hpp b/include/boost/move/detail/type_traits.hpp index e9c804d..1b5d838 100644 --- a/include/boost/move/detail/type_traits.hpp +++ b/include/boost/move/detail/type_traits.hpp @@ -55,8 +55,10 @@ // BOOST_MOVE_IS_POD(T) should evaluate to true if T is a POD type // BOOST_MOVE_HAS_TRIVIAL_CONSTRUCTOR(T) should evaluate to true if "T x;" has no effect // BOOST_MOVE_HAS_TRIVIAL_COPY(T) should evaluate to true if T(t) <==> memcpy +// (Note: this trait does not guarantee T is copy constructible, the copy constructor could be deleted but still be trivial) // BOOST_MOVE_HAS_TRIVIAL_MOVE_CONSTRUCTOR(T) should evaluate to true if T(boost::move(t)) <==> memcpy // BOOST_MOVE_HAS_TRIVIAL_ASSIGN(T) should evaluate to true if t = u <==> memcpy +// (Note: this trait does not guarantee T is assignable , the copy assignmen could be deleted but still be trivial) // BOOST_MOVE_HAS_TRIVIAL_MOVE_ASSIGN(T) should evaluate to true if t = boost::move(u) <==> memcpy // BOOST_MOVE_HAS_TRIVIAL_DESTRUCTOR(T) should evaluate to true if ~T() has no effect // BOOST_MOVE_HAS_NOTHROW_CONSTRUCTOR(T) should evaluate to true if "T x;" can not throw @@ -117,9 +119,7 @@ # define BOOST_MOVE_HAS_TRIVIAL_CONSTRUCTOR(T) __has_trivial_constructor(T) # endif # if __has_feature(has_trivial_copy) -# //There are problems with deleted copy constructors detected as trivially copyable. -# //http://stackoverflow.com/questions/12754886/has-trivial-copy-behaves-differently-in-clang-and-gcc-whos-right -# define BOOST_MOVE_HAS_TRIVIAL_COPY(T) (__has_trivial_copy(T) && ::boost::move_detail::is_copy_constructible::value) +# define BOOST_MOVE_HAS_TRIVIAL_COPY(T) __has_trivial_copy(T) # endif # if __has_feature(has_trivial_assign) # define BOOST_MOVE_HAS_TRIVIAL_ASSIGN(T) (__has_trivial_assign(T) ) @@ -235,7 +235,9 @@ #endif #ifdef BOOST_MOVE_HAS_TRIVIAL_COPY - #define BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) BOOST_MOVE_HAS_TRIVIAL_COPY(T) + #define BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) ::boost::move_detail::is_pod::value ||\ + (::boost::move_detail::is_copy_constructible::value &&\ + BOOST_MOVE_HAS_TRIVIAL_COPY(T)) #else #define BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) ::boost::move_detail::is_pod::value #endif @@ -246,12 +248,6 @@ #define BOOST_MOVE_IS_TRIVIALLY_DEFAULT_CONSTRUCTIBLE(T) ::boost::move_detail::is_pod::value #endif -#ifdef BOOST_MOVE_HAS_TRIVIAL_COPY - #define BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) BOOST_MOVE_HAS_TRIVIAL_COPY(T) -#else - #define BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) ::boost::move_detail::is_pod::value -#endif - #ifdef BOOST_MOVE_HAS_TRIVIAL_MOVE_CONSTRUCTOR #define BOOST_MOVE_IS_TRIVIALLY_MOVE_CONSTRUCTIBLE(T) BOOST_MOVE_HAS_TRIVIAL_MOVE_CONSTRUCTOR(T) #else @@ -259,7 +255,9 @@ #endif #ifdef BOOST_MOVE_HAS_TRIVIAL_ASSIGN - #define BOOST_MOVE_IS_TRIVIALLY_COPY_ASSIGNABLE(T) BOOST_MOVE_HAS_TRIVIAL_ASSIGN(T) + #define BOOST_MOVE_IS_TRIVIALLY_COPY_ASSIGNABLE(T) ::boost::move_detail::is_pod::value ||\ + ( ::boost::move_detail::is_copy_assignable::value &&\ + BOOST_MOVE_HAS_TRIVIAL_ASSIGN(T)) #else #define BOOST_MOVE_IS_TRIVIALLY_COPY_ASSIGNABLE(T) ::boost::move_detail::is_pod::value #endif @@ -821,9 +819,7 @@ struct is_trivially_copy_constructible { //In several compilers BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE return true even with //deleted copy constructors so make sure the type is copy constructible. - static const bool value = ::boost::move_detail::is_pod::value || - ( ::boost::move_detail::is_copy_constructible::value && - BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) ); + static const bool value = BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T); }; ////////////////////////////////////// @@ -831,7 +827,7 @@ struct is_trivially_copy_constructible ////////////////////////////////////// template struct is_trivially_move_constructible -{ static const bool value = BOOST_MOVE_IS_TRIVIALLY_MOVE_CONSTRUCTIBLE(T); }; +{ static const bool value = BOOST_MOVE_IS_TRIVIALLY_MOVE_CONSTRUCTIBLE(T); }; ////////////////////////////////////// // is_trivially_copy_assignable @@ -841,9 +837,7 @@ struct is_trivially_copy_assignable { //In several compilers BOOST_MOVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE return true even with //deleted copy constructors so make sure the type is copy constructible. - static const bool value = ::boost::move_detail::is_pod::value || - ( ::boost::move_detail::is_copy_assignable::value && - BOOST_MOVE_IS_TRIVIALLY_COPY_ASSIGNABLE(T) ); + static const bool value = BOOST_MOVE_IS_TRIVIALLY_COPY_ASSIGNABLE(T); }; ////////////////////////////////////// From 1194a39ab3195a17c849e1d11f4305ff6727df8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Tue, 5 Jul 2016 23:18:21 +0200 Subject: [PATCH 15/16] Undo commit b474e8c28a96d87763cccb9a33b5ff4b169482d3 as it breaks some Boost libraries. Emulation limitations will be documented. --- doc/move.qbk | 7 ------- include/boost/move/core.hpp | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/doc/move.qbk b/doc/move.qbk index 50be531..a47c27f 100644 --- a/doc/move.qbk +++ b/doc/move.qbk @@ -762,13 +762,6 @@ Many thanks to all boosters that have tested, reviewed and improved the library. [section:release_notes Release Notes] -[section:release_notes_boost_1_62 Boost 1.62 Release] - -* Fixed bugs: - * [@https://github.com/boostorg/move/pull/9 Git Pull #9: ['"Fix assignment of move-and-copy emulated classes"]], - -[endsect] - [section:release_notes_boost_1_61 Boost 1.61 Release] * Experimental: asymptotically optimal bufferless merge and sort algorithms: [funcref boost::movelib::adaptive_merge adaptive_merge] diff --git a/include/boost/move/core.hpp b/include/boost/move/core.hpp index c32c194..1dd8a8c 100644 --- a/include/boost/move/core.hpp +++ b/include/boost/move/core.hpp @@ -261,7 +261,7 @@ #define BOOST_COPYABLE_AND_MOVABLE(TYPE)\ public:\ BOOST_MOVE_FORCEINLINE TYPE& operator=(TYPE &t)\ - { this->operator=(*BOOST_MOVE_TO_RV_CAST(const ::boost::rv*, &t)); return *this;}\ + { this->operator=(const_cast(t)); return *this;}\ public:\ BOOST_MOVE_FORCEINLINE operator ::boost::rv&() \ { return *BOOST_MOVE_TO_RV_CAST(::boost::rv*, this); }\ From cfd6be4ab46223917cb79e7dd856f582df587d7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Fri, 29 Jul 2016 11:55:10 +0200 Subject: [PATCH 16/16] Documented limitations reported in Trac #12194 and Trac #12307 --- doc/move.qbk | 34 +++++ example/doc_template_assign.cpp | 98 +++++++++++++ include/boost/move/detail/meta_utils_core.hpp | 12 ++ proj/vc7ide/Move.sln | 8 ++ proj/vc7ide/doc_template_assign.vcproj | 134 ++++++++++++++++++ 5 files changed, 286 insertions(+) create mode 100644 example/doc_template_assign.cpp create mode 100644 proj/vc7ide/doc_template_assign.vcproj diff --git a/doc/move.qbk b/doc/move.qbk index a47c27f..ea62f8b 100644 --- a/doc/move.qbk +++ b/doc/move.qbk @@ -661,6 +661,32 @@ An alternative is to implement a single `operator =()` for copyable and movable However, "pass by value" is not optimal for classes (like containers, strings, etc.) that reuse resources (like previously allocated memory) when x is assigned from a lvalue. +[endsect] + +[section:templated_assignment_operator Templated assignment operator in copyable and movable types] + + +[import ../example/doc_template_assign.cpp] + +Given a movable and copyable class, if a templated assignment operator (*) is added: + +[template_assign_example_foo_bar] + +C++98 and C++11 compilers will behave different when assigning from a `[const] Foo` lvalue: + +[template_assign_example_main] + +This different behaviour is a side-effect of the move emulation that can't be easily avoided by +[*Boost.Move]. One workaround is to SFINAE-out the templated assignment operator with `disable_if`: + +[c++] + + template // Modified templated assignment + typename boost::disable_if, Foo&>::type + operator=(const U& rhs) + { i = -rhs.i; return *this; } //(2) + + [endsect] [endsect] @@ -762,6 +788,14 @@ Many thanks to all boosters that have tested, reviewed and improved the library. [section:release_notes Release Notes] +[section:release_notes_boost_1_62 Boost 1.62 Release] + +* Documented new limitations reported in Trac tickets + [@https://svn.boost.org/trac/boost/ticket/12194 #12194 ['"Copy assignment on moveable and copyable classes uses wrong type"]] and + [@https://svn.boost.org/trac/boost/ticket/12307 #12307 ['"Copy assignment on moveable and copyable classes uses wrong type"]]. + +[endsect] + [section:release_notes_boost_1_61 Boost 1.61 Release] * Experimental: asymptotically optimal bufferless merge and sort algorithms: [funcref boost::movelib::adaptive_merge adaptive_merge] diff --git a/example/doc_template_assign.cpp b/example/doc_template_assign.cpp new file mode 100644 index 0000000..e1959a9 --- /dev/null +++ b/example/doc_template_assign.cpp @@ -0,0 +1,98 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2014-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include + +//[template_assign_example_foo_bar + +class Foo +{ + BOOST_COPYABLE_AND_MOVABLE(Foo) + + public: + int i; + explicit Foo(int val) : i(val) {} + + Foo(BOOST_RV_REF(Foo) obj) : i(obj.i) {} + + Foo& operator=(BOOST_RV_REF(Foo) rhs) + { i = rhs.i; rhs.i = 0; return *this; } + + Foo& operator=(BOOST_COPY_ASSIGN_REF(Foo) rhs) + { i = rhs.i; return *this; } //(1) + + template //(*) TEMPLATED ASSIGNMENT, potential problem + //<- + #if 1 + typename ::boost::move_detail::disable_if_same::type + operator=(const U& rhs) + #else + //-> + Foo& operator=(const U& rhs) + //<- + #endif + //-> + { i = -rhs.i; return *this; } //(2) +}; +//] + +struct Bar +{ + int i; + explicit Bar(int val) : i(val) {} +}; + + +//<- +#ifdef NDEBUG +#undef NDEBUG +#endif +//-> +#include + +int main() +{ +//[template_assign_example_main + Foo foo1(1); + //<- + assert(foo1.i == 1); + //-> + Foo foo2(2); + //<- + assert(foo2.i == 2); + Bar bar(3); + assert(bar.i == 3); + //-> + foo2 = foo1; // Calls (1) in C++11 but (2) in C++98 + //<- + assert(foo2.i == 1); + assert(foo1.i == 1); //Fails in C++98 unless workaround is applied + foo1 = bar; + assert(foo1.i == -3); + foo2 = boost::move(foo1); + assert(foo1.i == 0); + assert(foo2.i == -3); + //-> + const Foo foo5(5); + foo2 = foo5; // Calls (1) in C++11 but (2) in C++98 + //<- + assert(foo2.i == 5); //Fails in C++98 unless workaround is applied + assert(foo5.i == 5); + //-> +//] + return 0; +} + + +#include diff --git a/include/boost/move/detail/meta_utils_core.hpp b/include/boost/move/detail/meta_utils_core.hpp index 4d715a0..40dbb6e 100644 --- a/include/boost/move/detail/meta_utils_core.hpp +++ b/include/boost/move/detail/meta_utils_core.hpp @@ -114,6 +114,18 @@ struct is_same static const bool value = true; }; +////////////////////////////////////// +// enable_if_same +////////////////////////////////////// +template +struct enable_if_same : enable_if, R> {}; + +////////////////////////////////////// +// disable_if_same +////////////////////////////////////// +template +struct disable_if_same : disable_if, R> {}; + } //namespace move_detail { } //namespace boost { diff --git a/proj/vc7ide/Move.sln b/proj/vc7ide/Move.sln index cde7565..e8fabd2 100644 --- a/proj/vc7ide/Move.sln +++ b/proj/vc7ide/Move.sln @@ -127,6 +127,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "adaptive_merge_test", "adap ProjectSection(ProjectDependencies) = postProject EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "doc_template_assign", "doc_template_assign.vcproj", "{7460CA18-D532-E4F8-F1F2-3A796D2A91E2}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject Global GlobalSection(SolutionConfiguration) = preSolution Debug = Debug @@ -263,6 +267,10 @@ Global {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Debug.Build.0 = Debug|Win32 {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Release.ActiveCfg = Release|Win32 {CD617A28-6217-B79E-4CE2-6BA035379A6A}.Release.Build.0 = Release|Win32 + {7460CA18-D532-E4F8-F1F2-3A796D2A91E2}.Debug.ActiveCfg = Debug|Win32 + {7460CA18-D532-E4F8-F1F2-3A796D2A91E2}.Debug.Build.0 = Debug|Win32 + {7460CA18-D532-E4F8-F1F2-3A796D2A91E2}.Release.ActiveCfg = Release|Win32 + {7460CA18-D532-E4F8-F1F2-3A796D2A91E2}.Release.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionItems) = postSolution ..\..\..\..\boost\move\algo\adaptive_merge.hpp = ..\..\..\..\boost\move\algo\adaptive_merge.hpp diff --git a/proj/vc7ide/doc_template_assign.vcproj b/proj/vc7ide/doc_template_assign.vcproj new file mode 100644 index 0000000..c37cb13 --- /dev/null +++ b/proj/vc7ide/doc_template_assign.vcproj @@ -0,0 +1,134 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +