From 10074dc83578fecdd2b186f8df1a46bc0ea3d7ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Mon, 31 Dec 2018 01:02:40 +0100 Subject: [PATCH] Optimize sequence generation and add STL-like adaptive algorithms to the benchmark. --- include/boost/move/algo/adaptive_sort.hpp | 2 +- test/bench_merge.cpp | 181 +++++++++++++++------- test/bench_sort.cpp | 140 ++++++++++++----- 3 files changed, 224 insertions(+), 99 deletions(-) diff --git a/include/boost/move/algo/adaptive_sort.hpp b/include/boost/move/algo/adaptive_sort.hpp index 4f50bf1..b32cdc0 100644 --- a/include/boost/move/algo/adaptive_sort.hpp +++ b/include/boost/move/algo/adaptive_sort.hpp @@ -612,7 +612,7 @@ void adaptive_sort( RandIt first, RandIt last, Compare comp typedef typename iterator_traits::size_type size_type; typedef typename iterator_traits::value_type value_type; - ::boost::movelib::detail_adaptive::adaptive_xbuf xbuf(uninitialized, uninitialized_len); + ::boost::movelib::adaptive_xbuf xbuf(uninitialized, uninitialized_len); ::boost::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf); } diff --git a/test/bench_merge.cpp b/test/bench_merge.cpp index 5f69018..2ef3440 100644 --- a/test/bench_merge.cpp +++ b/test/bench_merge.cpp @@ -9,9 +9,13 @@ // ////////////////////////////////////////////////////////////////////////////// +//#define BOOST_MOVE_ADAPTIVE_SORT_STATS +//#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2 + #include //std::inplace_merge #include //std::printf #include //std::cout +#include //boost::container::vector #include @@ -25,8 +29,6 @@ using boost::timer::cpu_timer; using boost::timer::cpu_times; using boost::timer::nanosecond_type; -//#define BOOST_MOVE_ADAPTIVE_SORT_STATS -//#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2 void print_stats(const char *str, boost::ulong_long_type element_count) { std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); @@ -37,30 +39,31 @@ void print_stats(const char *str, boost::ulong_long_type element_count) #include template -std::size_t generate_elements(T elements[], std::size_t element_count, std::size_t key_reps[], std::size_t key_len, Compare comp) +std::size_t generate_elements(boost::container::vector &elements, std::size_t L, std::size_t NK, Compare comp) { + elements.resize(L); + boost::movelib::unique_ptr key_reps(new std::size_t[NK ? NK : L]); + std::srand(0); - for(std::size_t i = 0; i < (key_len ? key_len : element_count); ++i){ - key_reps[i]=0; + for (std::size_t i = 0; i < (NK ? NK : L); ++i) { + key_reps[i] = 0; } - for(std::size_t i=0; i < element_count; ++i){ - std::size_t key = key_len ? (i % key_len) : i; - elements[i].key=key; + for (std::size_t i = 0; i < L; ++i) { + std::size_t key = NK ? (i % NK) : i; + elements[i].key = key; } - ::random_shuffle(elements, elements + element_count); - ::random_shuffle(elements, elements + element_count); - ::random_shuffle(elements, elements + element_count); - for(std::size_t i = 0; i < element_count; ++i){ + ::random_shuffle(elements.data(), elements.data() + L); + ::random_shuffle(elements.data(), elements.data() + L); + + for (std::size_t i = 0; i < L; ++i) { elements[i].val = key_reps[elements[i].key]++; } - std::size_t split_count = element_count/2; - std::stable_sort(elements, elements+split_count, comp); - std::stable_sort(elements+split_count, elements+element_count, comp); + std::size_t split_count = L / 2; + std::stable_sort(elements.data(), elements.data() + split_count, comp); + std::stable_sort(elements.data() + split_count, elements.data() + L, comp); return split_count; } - - template void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) { @@ -68,34 +71,47 @@ void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::si boost::movelib::adaptive_merge(elements, mid, last, comp, reinterpret_cast(mem.get()), BufLen); } +template +void std_like_adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) +{ + boost::movelib::unique_ptr mem(new char[sizeof(T)*BufLen]); + boost::movelib::merge_adaptive_ONlogN(elements, mid, last, comp, reinterpret_cast(mem.get()), BufLen); +} + enum AlgoType { StdMerge, - AdaptiveMerge, - SqrtHAdaptiveMerge, - SqrtAdaptiveMerge, - Sqrt2AdaptiveMerge, - QuartAdaptiveMerge, + AdaptMerge, + SqrtHAdaptMerge, + SqrtAdaptMerge, + Sqrt2AdaptMerge, + QuartAdaptMerge, StdInplaceMerge, + StdSqrtHAdaptMerge, + StdSqrtAdaptMerge, + StdSqrt2AdaptMerge, + StdQuartAdaptMerge, MaxMerge }; -const char *AlgoNames [] = { "StdMerge " - , "AdaptMerge " - , "SqrtHAdaptMerge " - , "SqrtAdaptMerge " - , "Sqrt2AdaptMerge " - , "QuartAdaptMerge " - , "StdInplaceMerge " +const char *AlgoNames [] = { "StdMerge " + , "AdaptMerge " + , "SqrtHAdaptMerge " + , "SqrtAdaptMerge " + , "Sqrt2AdaptMerge " + , "QuartAdaptMerge " + , "StdInplaceMerge " + , "StdSqrtHAdaptMerge " + , "StdSqrtAdaptMerge " + , "StdSqrt2AdaptMerge " + , "StdQuartAdaptMerge " }; BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); template -bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count, std::size_t key_len, unsigned alg, nanosecond_type &prev_clock) +bool measure_algo(T *elements, std::size_t element_count, std::size_t split_pos, std::size_t alg, nanosecond_type &prev_clock) { - std::size_t const split_pos = generate_elements(elements, element_count, key_reps, key_len, order_type_less()); - std::printf("%s ", AlgoNames[alg]); order_perf_type::num_compare=0; order_perf_type::num_copy=0; @@ -107,28 +123,44 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count case StdMerge: std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); break; - case AdaptiveMerge: + case AdaptMerge: boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); break; - case SqrtHAdaptiveMerge: + case SqrtHAdaptMerge: adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); break; - case SqrtAdaptiveMerge: + case SqrtAdaptMerge: adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; - case Sqrt2AdaptiveMerge: + case Sqrt2AdaptMerge: adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; - case QuartAdaptiveMerge: + case QuartAdaptMerge: adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() - , (element_count-1)/4+1); + , (element_count)/4+1); break; case StdInplaceMerge: boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); break; + case StdSqrtHAdaptMerge: + std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() + , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); + break; + case StdSqrtAdaptMerge: + std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() + , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); + break; + case StdSqrt2AdaptMerge: + std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() + , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); + break; + case StdQuartAdaptMerge: + std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() + , (element_count)/4+1); + break; } timer.stop(); @@ -170,36 +202,60 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count template bool measure_all(std::size_t L, std::size_t NK) { - boost::movelib::unique_ptr pdata(new T[L]); - boost::movelib::unique_ptr pkeys(new std::size_t[NK ? NK : L]); - T *A = pdata.get(); - std::size_t *Keys = pkeys.get(); + boost::container::vector original_elements, elements; + std::size_t split_pos = generate_elements(original_elements, L, NK, order_type_less()); std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK); nanosecond_type prev_clock = 0; nanosecond_type back_clock; bool res = true; - res = res && measure_algo(A,Keys,L,NK,StdMerge, prev_clock); + + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, StdMerge, prev_clock); back_clock = prev_clock; // + prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,QuartAdaptiveMerge, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, QuartAdaptMerge, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,Sqrt2AdaptiveMerge, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, StdQuartAdaptMerge, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,SqrtAdaptiveMerge, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, Sqrt2AdaptMerge, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,SqrtHAdaptiveMerge, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, StdSqrt2AdaptMerge, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,AdaptiveMerge, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, SqrtAdaptMerge, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,StdInplaceMerge, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, StdSqrtAdaptMerge, prev_clock); // + prev_clock = back_clock; + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, SqrtHAdaptMerge, prev_clock); + // + prev_clock = back_clock; + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, StdSqrtHAdaptMerge, prev_clock); + // + prev_clock = back_clock; + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos, AdaptMerge, prev_clock); + // + prev_clock = back_clock; + elements = original_elements; + res = res && measure_algo(elements.data(), L, split_pos,StdInplaceMerge, prev_clock); + // + if(!res) throw int(0); return res; @@ -214,6 +270,7 @@ int main() try{ #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(101,1); + measure_all(101,5); measure_all(101,7); measure_all(101,31); #endif @@ -228,8 +285,8 @@ int main() measure_all(1001,511); #endif measure_all(1001,0); + // - #ifndef BENCH_MERGE_SHORT #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(10001,65); measure_all(10001,255); @@ -239,6 +296,7 @@ int main() measure_all(10001,0); // + #if defined(NDEBUG) #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(100001,511); measure_all(100001,2047); @@ -248,21 +306,24 @@ int main() measure_all(100001,0); // - #ifdef NDEBUG + #if !defined(BENCH_MERGE_SHORT) #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(1000001,1); - measure_all(1000001,1024); - measure_all(1000001,32768); - measure_all(1000001,524287); + measure_all(1000001, 8192); + measure_all(1000001, 32768); + measure_all(1000001, 131072); + measure_all(1000001, 524288); #endif measure_all(1000001,0); - measure_all(3000001,0); - measure_all(5000001,0); - #endif //NDEBUG + #ifndef BENCH_SORT_UNIQUE_VALUES + measure_all(10000001, 65536); + measure_all(10000001, 262144); + measure_all(10000001, 1048576); + measure_all(10000001, 4194304); + #endif + measure_all(10000001,0); #endif //#ifndef BENCH_MERGE_SHORT - - //measure_all(100000001,0); + #endif //#ifdef NDEBUG } catch(...) { diff --git a/test/bench_sort.cpp b/test/bench_sort.cpp index 6544976..11aba35 100644 --- a/test/bench_sort.cpp +++ b/test/bench_sort.cpp @@ -13,9 +13,9 @@ #include //std::stable_sort, std::make|sort_heap, std::random_shuffle #include //std::printf #include //std::cout +#include //boost::container::vector #include - #include #include @@ -41,20 +41,23 @@ void print_stats(const char *str, boost::ulong_long_type element_count) #include template -void generate_elements(T elements[], std::size_t element_count, std::size_t key_reps[], std::size_t key_len) +void generate_elements(boost::container::vector &elements, std::size_t L, std::size_t NK) { + elements.resize(L); + boost::movelib::unique_ptr key_reps(new std::size_t[NK ? NK : L]); + std::srand(0); - for(std::size_t i = 0; i < (key_len ? key_len : element_count); ++i){ - key_reps[i]=0; + for (std::size_t i = 0; i < (NK ? NK : L); ++i) { + key_reps[i] = 0; } - for(std::size_t i=0; i < element_count; ++i){ - std::size_t key = key_len ? (i % key_len) : i; - elements[i].key=key; + for (std::size_t i = 0; i < L; ++i) { + std::size_t key = NK ? (i % NK) : i; + elements[i].key = key; } - ::random_shuffle(elements, elements + element_count); - ::random_shuffle(elements, elements + element_count); - ::random_shuffle(elements, elements + element_count); - for(std::size_t i = 0; i < element_count; ++i){ + ::random_shuffle(elements.data(), elements.data() + L); + ::random_shuffle(elements.data(), elements.data() + L); + + for (std::size_t i = 0; i < L; ++i) { elements[i].val = key_reps[elements[i].key]++; } } @@ -66,6 +69,13 @@ void adaptive_sort_buffered(T *elements, std::size_t element_count, Compare comp boost::movelib::adaptive_sort(elements, elements + element_count, comp, reinterpret_cast(mem.get()), BufLen); } +template +void std_like_adaptive_stable_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen) +{ + boost::movelib::unique_ptr mem(new char[sizeof(T)*BufLen]); + boost::movelib::stable_sort_adaptive_ONlogN2(elements, elements + element_count, comp, reinterpret_cast(mem.get()), BufLen); +} + template void merge_sort_buffered(T *elements, std::size_t element_count, Compare comp) { @@ -85,6 +95,10 @@ enum AlgoType Sqrt2AdaptiveSort, QuartAdaptiveSort, InplaceStableSort, + StdSqrtHAdpSort, + StdSqrtAdpSort, + StdSqrt2AdpSort, + StdQuartAdpSort, SlowStableSort, HeapSort, MaxSort @@ -100,6 +114,10 @@ const char *AlgoNames [] = { "MergeSort " , "Sqrt2AdaptSort " , "QuartAdaptSort " , "InplStableSort " + , "StdSqrtHAdpSort" + , "StdSqrtAdpSort " + , "StdSqrt2AdpSort" + , "StdQuartAdpSort" , "SlowSort " , "HeapSort " }; @@ -107,10 +125,8 @@ const char *AlgoNames [] = { "MergeSort " BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxSort); template -bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count, std::size_t key_len, unsigned alg, nanosecond_type &prev_clock) +bool measure_algo(T *elements, std::size_t element_count, std::size_t alg, nanosecond_type &prev_clock) { - generate_elements(elements, element_count, key_reps, key_len); - std::printf("%s ", AlgoNames[alg]); order_perf_type::num_compare=0; order_perf_type::num_copy=0; @@ -153,6 +169,22 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count case InplaceStableSort: boost::movelib::inplace_stable_sort(elements, elements+element_count, order_type_less()); break; + case StdSqrtHAdpSort: + std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less() + , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); + break; + case StdSqrtAdpSort: + std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less() + , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); + break; + case StdSqrt2AdpSort: + std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less() + , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); + break; + case StdQuartAdpSort: + std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less() + , (element_count-1)/4+1); + break; case SlowStableSort: boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less()); break; @@ -202,50 +234,76 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count template bool measure_all(std::size_t L, std::size_t NK) { - boost::movelib::unique_ptr pdata(new T[L]); - boost::movelib::unique_ptr pkeys(new std::size_t[NK ? NK : L]); - T *A = pdata.get(); - std::size_t *Keys = pkeys.get(); + boost::container::vector original_elements, elements; + generate_elements(original_elements, L, NK); std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK); nanosecond_type prev_clock = 0; nanosecond_type back_clock; bool res = true; - res = res && measure_algo(A,Keys,L,NK,MergeSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L,MergeSort, prev_clock); back_clock = prev_clock; // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,StableSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L,StableSort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,PdQsort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L,PdQsort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,StdSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L,StdSort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,HeapSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L,HeapSort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,QuartAdaptiveSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L,QuartAdaptiveSort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,Sqrt2AdaptiveSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, StdQuartAdpSort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,SqrtAdaptiveSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L,Sqrt2AdaptiveSort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,SqrtHAdaptiveSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, StdSqrt2AdpSort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,AdaptiveSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L,SqrtAdaptiveSort, prev_clock); // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,InplaceStableSort, prev_clock); + elements = original_elements; + res = res && measure_algo(elements.data(), L, StdSqrtAdpSort, prev_clock); + // + prev_clock = back_clock; + elements = original_elements; + res = res && measure_algo(elements.data(), L,SqrtHAdaptiveSort, prev_clock); + // + prev_clock = back_clock; + elements = original_elements; + res = res && measure_algo(elements.data(), L, StdSqrtHAdpSort, prev_clock); + // + prev_clock = back_clock; + elements = original_elements; + res = res && measure_algo(elements.data(), L,AdaptiveSort, prev_clock); + // + prev_clock = back_clock; + elements = original_elements; + res = res && measure_algo(elements.data(), L,InplaceStableSort, prev_clock); // //prev_clock = back_clock; - //res = res && measure_algo(A,Keys,L,NK,SlowStableSort, prev_clock); + //elements = original_elements; + //res = res && measure_algo(elements.data(), L,SlowStableSort, prev_clock); // if(!res) throw int(0); @@ -275,7 +333,6 @@ int main() #endif measure_all(1001,0); // - #ifndef BENCH_SORT_SHORT #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(10001,65); measure_all(10001,255); @@ -285,6 +342,7 @@ int main() measure_all(10001,0); // + #ifdef NDEBUG #ifndef BENCH_SORT_UNIQUE_VALUES measure_all(100001,511); measure_all(100001,2047); @@ -294,18 +352,24 @@ int main() measure_all(100001,0); // - #ifdef NDEBUG + #ifndef BENCH_SORT_SHORT #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(1000001,1); - measure_all(1000001,1024); - measure_all(1000001,32768); - measure_all(1000001,524287); + measure_all(1000001, 8192); + measure_all(1000001, 32768); + measure_all(1000001, 131072); + measure_all(1000001, 524288); #endif measure_all(1000001,0); - measure_all(1500001,0); - #endif //NDEBUG + #ifndef BENCH_SORT_UNIQUE_VALUES + measure_all(10000001, 65536); + measure_all(10000001, 262144); + measure_all(10000001, 1048576); + measure_all(10000001, 4194304); + #endif + measure_all(1000001,0); #endif //#ifndef BENCH_SORT_SHORT + #endif //NDEBUG //measure_all(100000001,0);