From de55af3cbb935660a68940fc968d2509bc9f0c92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Sat, 12 Nov 2016 18:58:16 +0100 Subject: [PATCH] Fixed adaptive_sort/merge bugs for stability. --- .../move/algo/detail/adaptive_sort_merge.hpp | 1793 +++++++++-------- include/boost/move/algo/detail/basic_op.hpp | 40 + .../algo/detail/bufferless_merge_sort.hpp | 120 -- include/boost/move/algo/detail/merge.hpp | 48 +- include/boost/move/detail/move_helpers.hpp | 50 +- include/boost/move/detail/workaround.hpp | 5 +- proj/vc7ide/Move.sln | 8 + proj/vc7ide/inplace_merge_test.vcproj | 134 ++ test/adaptive_merge_test.cpp | 40 +- test/adaptive_sort_test.cpp | 28 +- test/bench_merge.cpp | 93 +- test/bench_sort.cpp | 107 +- test/inplace_merge_test.cpp | 283 +++ test/order_type.hpp | 113 +- test/random_shuffle.hpp | 23 + 15 files changed, 1746 insertions(+), 1139 deletions(-) delete mode 100644 include/boost/move/algo/detail/bufferless_merge_sort.hpp create mode 100644 proj/vc7ide/inplace_merge_test.vcproj create mode 100644 test/inplace_merge_test.cpp create mode 100644 test/random_shuffle.hpp diff --git a/include/boost/move/algo/detail/adaptive_sort_merge.hpp b/include/boost/move/algo/detail/adaptive_sort_merge.hpp index 3d97212..63b3f45 100644 --- a/include/boost/move/algo/detail/adaptive_sort_merge.hpp +++ b/include/boost/move/algo/detail/adaptive_sort_merge.hpp @@ -29,8 +29,7 @@ // phase can form up to sqrt(len)*4 segments if enough keys are found. // - The merge-sort phase can take advantage of external memory to // save some additional combination steps. -// - Optimized comparisons when selection-sorting blocks as A and B blocks -// are already sorted. +// - Combination phase: Blocks are selection sorted and merged in parallel. // - The combination phase is performed alternating merge to left and merge // to right phases minimizing swaps due to internal buffer repositioning. // - When merging blocks special optimizations are made to avoid moving some @@ -38,8 +37,7 @@ // // The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts // from the sorting algorithm and implementing an additional block merge algorithm -// without moving elements to left or right, which is used when external memory -// is available. +// without moving elements to left or right. ////////////////////////////////////////////////////////////////////////////// #ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP #define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP @@ -63,6 +61,14 @@ #define BOOST_MOVE_ADAPTIVE_SORT_PRINT(STR, L) #endif +#ifdef BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS + #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT BOOST_ASSERT +#else + #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L) +#endif + + + namespace boost { namespace movelib { @@ -90,6 +96,64 @@ const T &max_value(const T &a, const T &b) return a > b ? a : b; } +template +bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred) +{ + if (first != last) { + ForwardIt next = first, cur(first); + while (++next != last) { + if (pred(*next, *cur)) + return false; + cur = next; + } + } + return true; +} + +#if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS) + +bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less) +{ + if (first != last) { + const order_perf_type *next = first, *cur(first); + while (++next != last) { + if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val))) + return false; + cur = next; + } + } + return true; +} + +#endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS + +template +bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred) +{ + if (first != last) { + ForwardIt next = first; + while (++next != last) { + if (!pred(*first, *next)) + return false; + first = next; + } + } + return true; +} + +template +typename iterator_traits::size_type + count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v) +{ + typedef typename iterator_traits::size_type size_type; + size_type count = 0; + while(first != last) { + count += static_cast(0 != pred(*first, v)); + ++first; + } + return count; +} + template class adaptive_xbuf { @@ -320,40 +384,15 @@ RandIt skip_until_merge return first1; } -template -OutputIt op_partial_merge - (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op) -{ - InputIt1 first1(r_first1); - InputIt2 first2(r_first2); - if(first2 != last2 && last1 != first1) - while(1){ - if(comp(*first2, *first1)) { - op(first2++, d_first++); - if(first2 == last2){ - break; - } - } - else{ - op(first1++, d_first++); - if(first1 == last1){ - break; - } - } - } - r_first1 = first1; - r_first2 = first2; - return d_first; -} template -RandItB op_buffered_partial_merge_to_left_placed +RandItB op_buffered_partial_merge_to_range1_and_buffer ( RandIt1 first1, RandIt1 const last1 , RandIt2 &rfirst2, RandIt2 const last2 , RandItB &rfirstb, Compare comp, Op op ) { RandItB firstb = rfirstb; - RandItB lastb = firstb; + RandItB lastb = firstb; RandIt2 first2 = rfirst2; //Move to buffer while merging @@ -372,315 +411,38 @@ RandItB op_buffered_partial_merge_to_left_placed } op(three_way_t(), comp(*first2, *firstb) ? first2++ : firstb++, first1++, lastb++); } + rfirst2 = first2; + rfirstb = firstb; } - rfirst2 = first2; - rfirstb = firstb; return lastb; } -/////////////////////////////////////////////////////////////////////////////// -// -// PARTIAL MERGE BUF -// -/////////////////////////////////////////////////////////////////////////////// - -template -RandIt op_partial_merge_with_buf_impl - ( RandIt first1, RandIt const last1, RandIt first2, RandIt last2 - , Buf &buf, typename Buf::iterator &buf_first1_in_out, typename Buf::iterator &buf_last1_in_out - , Compare comp, Op op - ) +template +void swap_and_update_key + ( bool is_next_far_away + , RandItKeys const key_next + , RandItKeys const key_range2 + , RandItKeys &key_mid + , RandIt const begin + , RandIt const end + , RandIt const with) { - typedef typename Buf::iterator buf_iterator; - - BOOST_ASSERT(first1 != last1); - BOOST_ASSERT(first2 != last2); - buf_iterator buf_first1 = buf_first1_in_out; - buf_iterator buf_last1 = buf_last1_in_out; - - if(buf_first1 == buf_last1){ - //Skip any element that does not need to be moved - first1 = skip_until_merge(first1, last1, *last1, comp); - if(first1 == last1){ - return first1; + if(is_next_far_away){ + ::boost::adl_move_swap_ranges(begin, end, with); + ::boost::adl_move_swap(*key_next, *key_range2); + if(key_next == key_mid){ + key_mid = key_range2; } - buf_first1 = buf.data(); - buf_last1 = op_buffered_partial_merge_to_left_placed(first1, last1, first2, last2, buf_first1, comp, op); - BOOST_ASSERT(buf_last1 == (buf.data() + (last1-first1))); - first1 = last1; - } - else{ - BOOST_ASSERT((last1-first1) == (buf_last1 - buf_first1)); - } - - //Now merge from buffer - first1 = op_partial_merge(buf_first1, buf_last1, first2, last2, first1, comp, op); - buf_first1_in_out = buf_first1; - buf_last1_in_out = buf_last1; - return first1; -} - -template -RandIt op_partial_merge_with_buf - ( RandIt first1, RandIt const last1, RandIt first2, RandIt last2 - , Buf &buf - , typename Buf::iterator &buf_first1_in_out - , typename Buf::iterator &buf_last1_in_out - , Compare comp - , Op op - , bool is_stable) -{ - return is_stable - ? op_partial_merge_with_buf_impl - (first1, last1, first2, last2, buf, buf_first1_in_out, buf_last1_in_out, comp, op) - : op_partial_merge_with_buf_impl - (first1, last1, first2, last2, buf, buf_first1_in_out, buf_last1_in_out, antistable(comp), op) - ; -} - -// key_first - sequence of keys, in same order as blocks. key_comp(key, midkey) means stream A -// first - first element to merge. -// first[-l_block, 0) - buffer -// l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded -// l_irreg1 is the irregular block to be merged before n_bef_irreg2 blocks (can be 0) -// n_bef_irreg2/n_aft_irreg2 are regular blocks -// l_irreg2 is a irregular block, that is to be merged after n_bef_irreg2 blocks and before n_aft_irreg2 blocks -// If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). -template -void op_merge_blocks_with_buf - ( RandItKeys key_first - , const typename iterator_traits::value_type &midkey - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type const l_block - , typename iterator_traits::size_type const l_irreg1 - , typename iterator_traits::size_type const n_bef_irreg2 - , typename iterator_traits::size_type const n_aft_irreg2 - , typename iterator_traits::size_type const l_irreg2 - , Compare comp - , Op op - , Buf & xbuf) -{ - typedef typename Buf::iterator buf_iterator; - buf_iterator buffer = xbuf.data(); - buf_iterator buffer_end = buffer; - RandIt first1 = first; - RandIt last1 = first1 + l_irreg1; - RandItKeys const key_end (key_first+n_bef_irreg2); - - bool is_range1_A = true; //first l_irreg1 elements are always from range A - - for( ; key_first != key_end; ++key_first, last1 += l_block){ - //If the trailing block is empty, we'll make it equal to the previous if empty - bool const is_range2_A = key_comp(*key_first, midkey); - - if(is_range1_A == is_range2_A){ - //If buffered, put those elements in place - RandIt res = op(forward_t(), buffer, buffer_end, first1); - BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; - buffer_end = buffer; - first1 = last1; + else if(key_mid == key_range2){ + key_mid = key_next; } - else { - first1 = op_partial_merge_with_buf(first1, last1, last1, last1 + l_block, xbuf, buffer, buffer_end, comp, op, is_range1_A); - BOOST_ASSERT(buffer == buffer_end || (buffer_end-buffer) == (last1+l_block-first1)); - is_range1_A ^= buffer == buffer_end; - } - } - - //Now the trailing irregular block, first put buffered elements in place - RandIt res = op(forward_t(), buffer, buffer_end, first1); - BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; - - BOOST_ASSERT(l_irreg2 || n_aft_irreg2); - if(l_irreg2){ - bool const is_range2_A = false; //last l_irreg2 elements always from range B - if(is_range1_A == is_range2_A){ - first1 = last1; - last1 = last1+l_block*n_aft_irreg2; - } - else { - last1 += l_block*n_aft_irreg2; - } - xbuf.clear(); - op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); - } -} - - -template -void merge_blocks_with_buf - ( RandItKeys key_first - , const typename iterator_traits::value_type &midkey - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type const l_block - , typename iterator_traits::size_type const l_irreg1 - , typename iterator_traits::size_type const n_bef_irreg2 - , typename iterator_traits::size_type const n_aft_irreg2 - , typename iterator_traits::size_type const l_irreg2 - , Compare comp - , Buf & xbuf - , bool const xbuf_used) -{ - if(xbuf_used){ - op_merge_blocks_with_buf - (key_first, midkey, key_comp, first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, move_op(), xbuf); - } - else{ - op_merge_blocks_with_buf - (key_first, midkey, key_comp, first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, swap_op(), xbuf); } } /////////////////////////////////////////////////////////////////////////////// // -// PARTIAL MERGE LEFT -// -/////////////////////////////////////////////////////////////////////////////// - -template -RandIt op_partial_merge_left_middle_buffer_impl - (RandIt first1, RandIt const last1, RandIt const first2 - , const typename iterator_traits::value_type &next_key, Compare comp - , Op op) -{ - first1 = skip_until_merge(first1, last1, next_key, comp); - - //Even if we copy backward, no overlapping occurs so use forward copy - //that can be faster specially with trivial types - RandIt const new_first1 = first2 - (last1 - first1); - BOOST_ASSERT(last1 <= new_first1); - op(forward_t(), first1, last1, new_first1); - return new_first1; -} - -template -RandIt op_partial_merge_left_middle_buffer - ( RandIt first1, RandIt const last1, RandIt const first2 - , const typename iterator_traits::value_type &next_key, Compare comp, Op op, bool is_stable) -{ - return is_stable ? op_partial_merge_left_middle_buffer_impl(first1, last1, first2, next_key, comp, op) - : op_partial_merge_left_middle_buffer_impl(first1, last1, first2, next_key, antistable(comp), op); -} - -// Partially merges two ordered ranges. Partially means that elements are merged -// until one of two ranges is exhausted (M elements from ranges 1 y 2). -// [buf_first, ...) -> buffer that can be overwritten -// [first1, last1) merge [last1,last2) -> [buf_first, buf_first+M) -// Note: distance(buf_first, first1) >= distance(last1, last2), so no overlapping occurs. -template -RandIt op_partial_merge_left_smart_impl - ( RandIt first1, RandIt last1, RandIt first2, RandIt const last2, Compare comp, Op op) -{ - RandIt dest; - if(last1 != first2){ - BOOST_ASSERT(0 != (last1-first1)); - BOOST_ASSERT((first2-last1)==(last2-first2)); - //Skip any element that does not need to be moved - first1 = skip_until_merge(first1, last1, *first2, comp); - if(first1 == last1) - return first2; - RandIt buf_first1 = first2 - (last1-first1); - dest = last1; - last1 = op_buffered_partial_merge_to_left_placed(first1, last1, first2, last2, buf_first1, comp, op); - first1 = buf_first1; - BOOST_ASSERT((first1-dest) == (last2-first2)); - } - else{ - dest = first1-(last2-first2); - } - - op_partial_merge(first1, last1, first2, last2, dest, comp, op); - return first1 == last1 ? first2 : first1; -} - -template -RandIt op_partial_merge_left_smart - (RandIt first1, RandIt const last1, RandIt first2, RandIt const last2, Compare comp, Op op, bool is_stable) -{ - return is_stable ? op_partial_merge_left_smart_impl(first1, last1, first2, last2, comp, op) - : op_partial_merge_left_smart_impl(first1, last1, first2, last2, antistable(comp), op); -} - -// first - first element to merge. -// first[-l_block, 0) - buffer -// l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded -// key_first - sequence of keys, in same order as blocks. key -void op_merge_blocks_left - ( RandItKeys key_first - , const typename iterator_traits::value_type &midkey - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type const l_block - , typename iterator_traits::size_type const l_irreg1 - , typename iterator_traits::size_type const n_bef_irreg2 - , typename iterator_traits::size_type const n_aft_irreg2 - , typename iterator_traits::size_type const l_irreg2 - , Compare comp, Op op) -{ - RandIt buffer = first - l_block; - RandIt first1 = first; - RandIt last1 = first1 + l_irreg1; - RandIt first2 = last1; - RandItKeys const key_end (key_first+n_bef_irreg2); - bool is_range1_A = true; - for( ; key_first != key_end; first2 += l_block, ++key_first){ - //If the trailing block is empty, we'll make it equal to the previous if empty - bool const is_range2_A = key_comp(*key_first, midkey); - - if(is_range1_A == is_range2_A){ - if(last1 != buffer){ //equiv. to if(!is_buffer_middle) - buffer = op(forward_t(), first1, last1, buffer); - } - first1 = first2; - last1 = first2 + l_block; - } - else { - RandIt const last2 = first2 + l_block; - first1 = op_partial_merge_left_smart(first1, last1, first2, last2, comp, op, is_range1_A); - - if(first1 < first2){ //is_buffer_middle for the next iteration - last1 = first2; - buffer = last1; - } - else{ //!is_buffer_middle for the next iteration - is_range1_A = is_range2_A; - buffer = first1 - l_block; - last1 = last2; - } - } - } - - //Now the trailing irregular block - bool const is_range2_A = false; //Trailing l_irreg2 is always from Range B - bool const is_buffer_middle = last1 == buffer; - - if(!l_irreg2 || is_range1_A == is_range2_A){ //trailing is always B type - //If range1 is buffered, write it to its final position - if(!is_buffer_middle){ - buffer = op(forward_t(), first1, last1, buffer); - } - first1 = first2; - } - else { - if(is_buffer_middle){ - first1 = op_partial_merge_left_middle_buffer(first1, last1, first2, first2[l_block*n_aft_irreg2], comp, op, is_range1_A); - buffer = first1 - l_block; - } - } - last1 = first2 + l_block*n_aft_irreg2; - op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); -} - -/////////////////////////////////////////////////////////////////////////////// -// -// PARTIAL MERGE BUFFERLESS +// MERGE BUFFERLESS // /////////////////////////////////////////////////////////////////////////////// @@ -719,54 +481,111 @@ RandIt partial_merge_bufferless : partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, antistable(comp)); } +template +static SizeType needed_keys_count(SizeType n_block_a, SizeType n_block_b) +{ + return n_block_a + n_block_b; +} +template +typename iterator_traits::size_type + find_next_block + ( RandItKeys key_first + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits::size_type const l_block + , typename iterator_traits::size_type const ix_first_block + , typename iterator_traits::size_type const ix_last_block + , Compare comp) +{ + typedef typename iterator_traits::size_type size_type; + typedef typename iterator_traits::value_type value_type; + typedef typename iterator_traits::value_type key_type; + BOOST_ASSERT(ix_first_block <= ix_last_block); + size_type ix_min_block = 0u; + for (size_type szt_i = ix_first_block; szt_i < ix_last_block; ++szt_i) { + const value_type &min_val = first[ix_min_block*l_block]; + const value_type &cur_val = first[szt_i*l_block]; + const key_type &min_key = key_first[ix_min_block]; + const key_type &cur_key = key_first[szt_i]; + + bool const less_than_minimum = comp(cur_val, min_val) || + (!comp(min_val, cur_val) && key_comp(cur_key, min_key)); + + if (less_than_minimum) { + ix_min_block = szt_i; + } + } + return ix_min_block; +} -// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded -// keys - sequence of keys, in same order as blocks. key0, n_aft_irreg2=0 is possible. template void merge_blocks_bufferless ( RandItKeys key_first - , const typename iterator_traits::value_type &midkey , KeyCompare key_comp - , RandIt first + , RandIt const first , typename iterator_traits::size_type const l_block , typename iterator_traits::size_type const l_irreg1 - , typename iterator_traits::size_type const n_bef_irreg2 - , typename iterator_traits::size_type const n_aft_irreg2 + , typename iterator_traits::size_type const n_block_a + , typename iterator_traits::size_type const n_block_b , typename iterator_traits::size_type const l_irreg2 , Compare comp) { - if(n_bef_irreg2 == 0){ - RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); - merge_bufferless(first, last_reg, last_reg+l_irreg2, comp); - } - else{ - RandIt first1 = first; - RandIt last1 = l_irreg1 ? first + l_irreg1: first + l_block; - RandItKeys const key_end (key_first+n_bef_irreg2); - bool is_range1_A = l_irreg1 ? true : key_comp(*key_first++, midkey); + typedef typename iterator_traits::size_type size_type; + size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; + //BOOST_ASSERT(n_block_a || n_block_b); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); - for( ; key_first != key_end; ++key_first){ - bool is_range2_A = key_comp(*key_first, midkey); - if(is_range1_A == is_range2_A){ - first1 = last1; - } - else{ - first1 = partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp); - } - last1 += l_block; - } + size_type n_bef_irreg2 = 0; + bool l_irreg_pos_count = true; + RandItKeys key_mid(key_first + n_block_a); + RandIt const first_irr2 = first + l_irreg1 + (n_block_a+n_block_b)*l_block; + RandIt const last_irr2 = first_irr2 + l_irreg2; - if(l_irreg2){ - if(!is_range1_A){ - first1 = last1; + { //Selection sort blocks + size_type n_block_left = n_block_b + n_block_a; + RandItKeys key_range2(key_first); + + size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; + size_type max_check = min_value(min_check+1, n_block_left); + for (RandIt f = first+l_irreg1; n_block_left; --n_block_left, ++key_range2, f += l_block, min_check -= min_check != 0, max_check -= max_check != 0) { + size_type const next_key_idx = find_next_block(key_range2, key_comp, f, l_block, min_check, max_check, comp); + RandItKeys const key_next(key_range2 + next_key_idx); + max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); + + RandIt const first_min = f + next_key_idx*l_block; + + //Check if irregular b block should go here. + //If so, break to the special code handling the irregular block + if (l_irreg_pos_count && l_irreg2 && comp(*first_irr2, *first_min)){ + l_irreg_pos_count = false; } - last1 += l_block*n_aft_irreg2; - merge_bufferless(first1, last1, last1+l_irreg2, comp); + n_bef_irreg2 += l_irreg_pos_count; + + swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, f, f + l_block, first_min); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(f, f+l_block, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, first_min + l_block, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((f == (first+l_irreg1)) || !comp(*f, *(f-l_block))); } } + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first+l_irreg1+n_bef_irreg2*l_block, first_irr2, comp)); + + RandIt first1 = first; + RandIt last1 = first+l_irreg1; + RandItKeys const key_end (key_first+n_bef_irreg2); + bool is_range1_A = true; + + for( ; key_first != key_end; ++key_first){ + bool is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_first, *key_mid); + first1 = is_range1_A == is_range2_A + ? last1 : partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp); + last1 += l_block; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp)); + } + + merge_bufferless(is_range1_A ? first1 : last1, first_irr2, last_irr2, comp); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp)); } /////////////////////////////////////////////////////////////////////////////// @@ -929,20 +748,6 @@ Unsigned ceil_sqrt_multiple(Unsigned const n, Unsigned *pbase = 0) return res; } -template -Unsigned ceil_sqrt_pow2(Unsigned const n) -{ - Unsigned r=1; - Unsigned exp = 0; - Unsigned pow = 1u; - while(pow != 0 && pow < n){ - r*=2; - ++exp; - pow = r << exp; - } - return r; -} - struct less { template @@ -1038,128 +843,6 @@ Unsigned lblock_for_combine } } - -//Although "cycle" sort is known to have the minimum number of writes to target -//selection sort is more appropriate here as we want to minimize swaps. -template -void selection_sort_blocks - ( RandItKeys keys - , typename iterator_traits::size_type &midkey_idx //inout - , KeyCompare key_comp - , RandIt const first_block - , typename iterator_traits::size_type const l_block - , typename iterator_traits::size_type const n_blocks - , Compare comp - , bool use_first_element - , XBuf & xbuf -) -{ - typedef typename iterator_traits::size_type size_type ; - size_type const back_midkey_idx = midkey_idx; - typedef typename iterator_traits::size_type size_type; - typedef typename iterator_traits::value_type value_type; - - //Nothing to sort if 0 or 1 blocks or all belong to the first ordered half - if(n_blocks < 2 || back_midkey_idx >= n_blocks){ - return; - } - //One-past the position of the first untouched element of the second half - size_type high_watermark = back_midkey_idx+1; - BOOST_ASSERT(high_watermark <= n_blocks); - const bool b_cache_on = xbuf.capacity() >= l_block; - //const bool b_cache_on = false; - const size_type cached_none = size_type(-1); - size_type cached_block = cached_none; - - //Sort by first element if left merging, last element otherwise - size_type const reg_off = use_first_element ? 0u: l_block-1; - - for(size_type block=0; block < n_blocks-1; ++block){ - size_type min_block = block; - //Since we are searching for the minimum value in two sorted halves: - //Optimization 1: If block belongs to first half, don't waste time comparing elements of the first half. - //Optimization 2: It is enough to compare until the first untouched element of the second half. - //Optimization 3: If cache memory is available, instead of swapping blocks (3 writes per element), - // play with the cache to aproximate it to 2 writes per element. - high_watermark = size_type(max_value(block+2, high_watermark)); - BOOST_ASSERT(high_watermark <= n_blocks); - for(size_type next_block = size_type(max_value(block+1, back_midkey_idx)); next_block < high_watermark; ++next_block){ - const value_type &min_v = (b_cache_on && (cached_block == min_block) ? xbuf.data()[reg_off] : first_block[min_block*l_block+reg_off]); - const value_type &v = (b_cache_on && (cached_block == next_block) ? xbuf.data()[reg_off] : first_block[next_block*l_block+reg_off]); - - if( comp(v, min_v) || (!comp(min_v, v) && key_comp(keys[next_block], keys[min_block])) ){ - min_block = next_block; - } - } - - if(min_block != block){ - BOOST_ASSERT(block >= back_midkey_idx || min_block >= back_midkey_idx); - BOOST_ASSERT(min_block < high_watermark); - //Increase high watermark if not the maximum and min_block is just before the high watermark - high_watermark += size_type((min_block + 1) != n_blocks && (min_block + 1) == high_watermark); - BOOST_ASSERT(high_watermark <= n_blocks); - if(!b_cache_on){ - boost::adl_move_swap_ranges(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); - } - else if(cached_block == cached_none){ - //Cache the biggest block and put the minimum into its final position - xbuf.move_assign(first_block+block*l_block, l_block); - boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); - cached_block = min_block; - } - else if(cached_block == block){ - //Since block is cached and is not the minimum, just put the minimum directly into its final position and update the cache index - boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); - cached_block = min_block; - } - else if(cached_block == min_block){ - //Since the minimum is cached, move the block to the back position and flush the cache to its final position - boost::move(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); - boost::move(xbuf.data(), xbuf.end(), first_block+block*l_block); - cached_block = cached_none; - } - else{ - //Cached block is not any of two blocks to be exchanged, a smarter operation must be performed - BOOST_ASSERT(cached_block != min_block); - BOOST_ASSERT(cached_block != block); - BOOST_ASSERT(cached_block > block); - BOOST_ASSERT(cached_block < high_watermark); - //Instead of moving block to the slot of the minimum (which is typical selection sort), before copying - //data from the minimum slot to its final position: - // -> move it to free slot pointed by cached index, and - // -> move cached index into slot of the minimum. - //Since both cached_block and min_block belong to the still unordered range of blocks, the change - //does not break selection sort and saves one copy. - boost::move(first_block+block*l_block, first_block+(block+1)*l_block, first_block+cached_block*l_block); - boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); - //Note that this trick requires an additionl fix for keys and midkey index - boost::adl_move_swap(keys[cached_block], keys[min_block]); - if(midkey_idx == cached_block) - midkey_idx = min_block; - else if(midkey_idx == min_block) - midkey_idx = cached_block; - boost::adl_move_swap(cached_block, min_block); - } - //Once min_block and block are exchanged, fix the movement imitation key buffer and midkey index. - boost::adl_move_swap(keys[block], keys[min_block]); - if(midkey_idx == block) - midkey_idx = min_block; - else if(midkey_idx == min_block) - midkey_idx = block; - } - else if(b_cache_on && cached_block == block){ - //The selected block was the minimum, but since it was cached, move it to its final position - boost::move(xbuf.data(), xbuf.end(), first_block+block*l_block); - cached_block = cached_none; - } - } //main for loop - - if(b_cache_on && cached_block != cached_none){ - //The sort has ended with cached data, move it to its final position - boost::move(xbuf.data(), xbuf.end(), first_block+cached_block*l_block); - } -} - template void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf) { @@ -1195,128 +878,6 @@ void initialize_keys( RandIt first, RandIt last } } -template -void combine_params - ( RandItKeys const keys - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type l_combined - , typename iterator_traits::size_type const l_prev_merged - , typename iterator_traits::size_type const l_block - , XBuf & xbuf - , Compare comp - //Output - , typename iterator_traits::size_type &midkey_idx - , typename iterator_traits::size_type &l_irreg1 - , typename iterator_traits::size_type &n_bef_irreg2 - , typename iterator_traits::size_type &n_aft_irreg2 - , typename iterator_traits::size_type &l_irreg2 - //Options - , bool is_merge_left_or_bufferless - , bool do_initialize_keys = true) -{ - typedef typename iterator_traits::size_type size_type; - typedef typename iterator_traits::value_type value_type; - - //Initial parameters for selection sort blocks - l_irreg1 = l_prev_merged%l_block; - l_irreg2 = (l_combined-l_irreg1)%l_block; - BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0); - size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block; - midkey_idx = l_prev_merged/l_block; - BOOST_ASSERT(n_reg_block>=midkey_idx); - - //Key initialization - if (do_initialize_keys) { - initialize_keys(keys, keys+n_reg_block+(midkey_idx==n_reg_block), key_comp, xbuf); - } - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A initkey: ", l_combined + l_block); - - //Selection sort blocks - selection_sort_blocks(keys, midkey_idx, key_comp, first+l_irreg1, l_block, n_reg_block, comp, is_merge_left_or_bufferless, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A selsort: ", l_combined + l_block); - - //Special case for the last elements - n_aft_irreg2 = 0; - if(l_irreg2 != 0){ - size_type const reg_off = is_merge_left_or_bufferless ? 0u: l_block-1; - size_type const irreg_off = is_merge_left_or_bufferless ? 0u: l_irreg2-1; - RandIt prev_block_first = first + l_combined - l_irreg2; - const value_type &incomplete_block_first = prev_block_first[irreg_off]; - while(n_aft_irreg2 != n_reg_block && - comp(incomplete_block_first, (prev_block_first-= l_block)[reg_off]) ){ - ++n_aft_irreg2; - } - } - n_bef_irreg2 = n_reg_block-n_aft_irreg2; -} - -// first - first element to merge. -// first[-l_block, 0) - buffer (if use_buf == true) -// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded -// keys - sequence of keys, in same order as blocks. key -void merge_blocks_left - ( RandItKeys const key_first - , const typename iterator_traits::value_type &midkey - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type const l_block - , typename iterator_traits::size_type const l_irreg1 - , typename iterator_traits::size_type const n_bef_irreg2 - , typename iterator_traits::size_type const n_aft_irreg2 - , typename iterator_traits::size_type const l_irreg2 - , Compare comp - , bool const xbuf_used) -{ - if(xbuf_used){ - op_merge_blocks_left - (key_first, midkey, key_comp, first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, move_op()); - } - else{ - op_merge_blocks_left - (key_first, midkey, key_comp, first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, swap_op()); - } -} - - -// first - first element to merge. -// [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer -// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded -// keys - sequence of keys, in same order as blocks. key -void merge_blocks_right - ( RandItKeys const key_first - , const typename iterator_traits::value_type &midkey - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type const l_block - , typename iterator_traits::size_type const n_bef_irreg2 - , typename iterator_traits::size_type const n_aft_irreg2 - , typename iterator_traits::size_type const l_irreg2 - , Compare comp - , bool const xbuf_used) -{ - merge_blocks_left - ( make_reverse_iterator(key_first+n_aft_irreg2 + n_bef_irreg2) - , midkey - , negate(key_comp) - , make_reverse_iterator(first+(n_bef_irreg2+n_aft_irreg2)*l_block+l_irreg2) - , l_block - , l_irreg2 - , n_aft_irreg2 + n_bef_irreg2 - , 0 - , 0 - , inverse(comp), xbuf_used); -} - - template void move_data_backward( RandIt cur_pos , typename iterator_traits::size_type const l_data @@ -1368,108 +929,620 @@ Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merg return l_total_combined; } -// keys are on the left of first: -// If use_buf: [first - l_block - n_keys, first - l_block). -// Otherwise: [first - n_keys, first). -// Buffer (if use_buf) is also on the left of first [first - l_block, first). -// Blocks of length l_prev_merged combined. We'll combine them in pairs -// l_prev_merged and n_keys are powers of 2. (2*l_prev_merged/l_block) keys are guaranteed -// Returns the number of combined elements (some trailing elements might be left uncombined) -template -void adaptive_sort_combine_blocks +template +void combine_params ( RandItKeys const keys , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type const len - , typename iterator_traits::size_type const l_prev_merged - , typename iterator_traits::size_type const l_block - , bool const use_buf - , bool const xbuf_used + , SizeType l_combined + , SizeType const l_prev_merged + , SizeType const l_block , XBuf & xbuf - , Compare comp - , bool merge_left) + //Output + , SizeType &n_block_a + , SizeType &n_block_b + , SizeType &l_irreg1 + , SizeType &l_irreg2 + //Options + , bool do_initialize_keys = true) { - (void)xbuf; - typedef typename iterator_traits::size_type size_type; + typedef SizeType size_type; - size_type const l_reg_combined = 2*l_prev_merged; - size_type l_irreg_combined = 0; - size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); - size_type const n_reg_combined = len/l_reg_combined; - RandIt combined_first = first; + //Initial parameters for selection sort blocks + l_irreg1 = l_prev_merged%l_block; + l_irreg2 = (l_combined-l_irreg1)%l_block; + BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0); + size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block; + n_block_a = l_prev_merged/l_block; + n_block_b = n_reg_block - n_block_a; + BOOST_ASSERT(n_reg_block>=n_block_a); - (void)l_total_combined; - BOOST_ASSERT(l_total_combined <= len); + //Key initialization + if (do_initialize_keys) { + initialize_keys(keys, keys + needed_keys_count(n_block_a, n_block_b), key_comp, xbuf); + } +} - size_type n_bef_irreg2, n_aft_irreg2, midkey_idx, l_irreg1, l_irreg2; - size_type const max_i = n_reg_combined + (l_irreg_combined != 0); +template +RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer + ( RandIt1 first1, RandIt1 const last1 + , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min + , RandItB &rfirstb, Compare comp, Op op ) +{ + RandItB firstb = rfirstb; + RandItB lastb = firstb; + RandIt2 first2 = rfirst2; - if(merge_left || !use_buf) { - for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) { - bool const is_last = combined_i==n_reg_combined; - size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + //Move to buffer while merging + //Three way moves need less moves when op is swap_op so use it + //when merging elements from range2 to the destination occupied by range1 + if(first1 != last1 && first2 != last2){ + RandIt2 first_min = rfirst_min; + op(four_way_t(), first2++, first_min++, first1++, lastb++); - range_xbuf rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); - combine_params( keys, key_comp, combined_first, l_cur_combined - , l_prev_merged, l_block, rbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs - //Now merge blocks - if(!use_buf){ - merge_blocks_bufferless - (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); + while(first1 != last1){ + if(first2 == last2){ + lastb = op(forward_t(), first1, last1, firstb); + break; + } + bool const min_less = comp(*first_min, *firstb); + + if(min_less){ + op( four_way_t(), first2++, first_min++, first1++, lastb++); } else{ - merge_blocks_left - (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); + op(three_way_t(), firstb++, first1++, lastb++); + } + } + rfirst2 = first2; + rfirstb = firstb; + rfirst_min = first_min; + } + + return lastb; +} + +////////////////////////////////// +// +// partial_merge +// +////////////////////////////////// +template +OutputIt op_partial_merge_impl + (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op) +{ + InputIt1 first1(r_first1); + InputIt2 first2(r_first2); + if(first2 != last2 && last1 != first1) + while(1){ + if(comp(*first2, *first1)) { + op(first2++, d_first++); + if(first2 == last2){ + break; + } + } + else{ + op(first1++, d_first++); + if(first1 == last1){ + break; } - //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_l: ", len + l_block); } } + r_first1 = first1; + r_first2 = first2; + return d_first; +} + +template +OutputIt op_partial_merge + (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op, bool is_stable) +{ + return is_stable ? op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, comp, op) + : op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, antistable(comp), op); +} + +////////////////////////////////// +// +// partial_merge_and_swap +// +////////////////////////////////// +template +OutputIt op_partial_merge_and_swap_impl + (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op) +{ + InputIt1 first1(r_first1); + InputIt2 first2(r_first2); + + if(first2 != last2 && last1 != first1) { + InputIt2 first_min(r_first_min); + bool non_empty_ranges = true; + do{ + if(comp(*first_min, *first1)) { + op(three_way_t(), first2++, first_min++, d_first++); + non_empty_ranges = first2 != last2; + } + else{ + op(first1++, d_first++); + non_empty_ranges = first1 != last1; + } + } while(non_empty_ranges); + r_first_min = first_min; + r_first1 = first1; + r_first2 = first2; + } + return d_first; +} + +template +RandIt op_partial_merge_and_swap + (RandIt &r_first1, RandIt const last1, RandIt &r_first2, RandIt const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op, bool is_stable) +{ + return is_stable ? op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, comp, op) + : op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, antistable(comp), op); +} + +template +RandIt op_partial_merge_and_save_impl + ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min + , RandItBuf &buf_first1_in_out, RandItBuf &buf_last1_in_out + , Compare comp, Op op + ) +{ + RandItBuf buf_first1 = buf_first1_in_out; + RandItBuf buf_last1 = buf_last1_in_out; + RandIt first2(rfirst2); + + bool const do_swap = first2 != first_min; + if(buf_first1 == buf_last1){ + //Skip any element that does not need to be moved + RandIt new_first1 = skip_until_merge(first1, last1, *first_min, comp); + buf_first1 += (new_first1-first1); + first1 = new_first1; + buf_last1 = do_swap ? op_buffered_partial_merge_and_swap_to_range1_and_buffer(first1, last1, first2, last2, first_min, buf_first1, comp, op) + : op_buffered_partial_merge_to_range1_and_buffer (first1, last1, first2, last2, buf_first1, comp, op); + first1 = last1; + } else{ - combined_first += l_reg_combined*(max_i-1); - for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) { - bool const is_last = combined_i==n_reg_combined; - size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; - RandIt const combined_last(combined_first+l_cur_combined); - range_xbuf rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); - combine_params( keys, key_comp, combined_first, l_cur_combined - , l_prev_merged, l_block, rbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, false); //Outputs - //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_params: ", len + l_block); - merge_blocks_right - (keys, keys[midkey_idx], key_comp, combined_first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); - //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_r: ", len + l_block); + BOOST_ASSERT((last1-first1) == (buf_last1 - buf_first1)); + } + + //Now merge from buffer + first1 = do_swap ? op_partial_merge_and_swap_impl(buf_first1, buf_last1, first2, last2, first_min, first1, comp, op) + : op_partial_merge_impl (buf_first1, buf_last1, first2, last2, first1, comp, op); + buf_first1_in_out = buf_first1; + buf_last1_in_out = buf_last1; + rfirst2 = first2; + return first1; +} + +template +RandIt op_partial_merge_and_save + ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min + , RandItBuf &buf_first1_in_out + , RandItBuf &buf_last1_in_out + , Compare comp + , Op op + , bool is_stable) +{ + return is_stable + ? op_partial_merge_and_save_impl + (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, comp, op) + : op_partial_merge_and_save_impl + (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, antistable(comp), op) + ; +} + + + +template +OutputIt op_merge_blocks_with_irreg + ( RandItKeys key_first + , RandItKeys key_mid + , KeyCompare key_comp + , RandIt first_reg + , RandIt2 &first_irr + , RandIt2 const last_irr + , OutputIt dest + , typename iterator_traits::size_type const l_block + , typename iterator_traits::size_type n_block_left + , typename iterator_traits::size_type min_check + , typename iterator_traits::size_type max_check + , Compare comp, bool const is_stable, Op op) +{ + typedef typename iterator_traits::size_type size_type; + + for(; n_block_left; --n_block_left, ++key_first, min_check -= min_check != 0, max_check -= max_check != 0){ + size_type next_key_idx = find_next_block(key_first, key_comp, first_reg, l_block, min_check, max_check, comp); + max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); + RandIt const last_reg = first_reg + l_block; + RandIt first_min = first_reg + next_key_idx*l_block; + RandIt const last_min = first_min + l_block; (void)last_min; + + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_reg, last_reg, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!next_key_idx || is_sorted(first_min, last_min, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((!next_key_idx || !comp(*first_reg, *first_min ))); + + OutputIt orig_dest = dest; (void)orig_dest; + dest = next_key_idx ? op_partial_merge_and_swap(first_irr, last_irr, first_reg, last_reg, first_min, dest, comp, op, is_stable) + : op_partial_merge (first_irr, last_irr, first_reg, last_reg, dest, comp, op, is_stable); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp)); + + if(first_reg == dest){ + dest = next_key_idx ? ::boost::adl_move_swap_ranges(first_min, last_min, first_reg) + : last_reg; } + else{ + dest = next_key_idx ? op(three_way_forward_t(), first_reg, last_reg, first_min, dest) + : op(forward_t(), first_reg, last_reg, dest); + } + + RandItKeys const key_next(key_first + next_key_idx); + swap_and_update_key(next_key_idx != 0, key_next, key_first, key_mid, last_reg, last_reg, first_min); + + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp)); + first_reg = last_reg; + } + return dest; +} + +template +void op_merge_blocks_left + ( RandItKeys const key_first + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits::size_type const l_block + , typename iterator_traits::size_type const l_irreg1 + , typename iterator_traits::size_type const n_block_a + , typename iterator_traits::size_type const n_block_b + , typename iterator_traits::size_type const l_irreg2 + , Compare comp, Op op) +{ + typedef typename iterator_traits::size_type size_type; + size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; +// BOOST_ASSERT(n_block_a || n_block_b); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); + + size_type n_block_b_left = n_block_b; + size_type n_block_a_left = n_block_a; + size_type n_block_left = n_block_b + n_block_a; + RandItKeys key_mid(key_first + n_block_a); + + RandIt buffer = first - l_block; + RandIt first1 = first; + RandIt last1 = first1 + l_irreg1; + RandIt first2 = last1; + RandIt const irreg2 = first2 + n_block_left*l_block; + bool is_range1_A = true; + + RandItKeys key_range2(key_first); + + //////////////////////////////////////////////////////////////////////////// + //Process all regular blocks before the irregular B block + //////////////////////////////////////////////////////////////////////////// + size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; + size_type max_check = min_value(min_check+1, n_block_left); + for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) { + size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp); + max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); + RandIt const first_min = first2 + next_key_idx*l_block; + RandIt const last_min = first_min + l_block; (void)last_min; + RandIt const last2 = first2 + l_block; + + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first1, last1, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp)); + + //Check if irregular b block should go here. + //If so, break to the special code handling the irregular block + if (!n_block_b_left && + ( (l_irreg2 && comp(*irreg2, *first_min)) || (!l_irreg2 && is_range1_A)) ){ + break; + } + + RandItKeys const key_next(key_range2 + next_key_idx); + bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid); + + bool const is_buffer_middle = last1 == buffer; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( ( is_buffer_middle && size_type(first2-buffer) == l_block && buffer == last1) || + (!is_buffer_middle && size_type(first1-buffer) == l_block && first2 == last1)); + + if(is_range1_A == is_range2_A){ + BOOST_ASSERT((first1 == last1) || !comp(*first_min, last1[-1])); + if(!is_buffer_middle){ + buffer = op(forward_t(), first1, last1, buffer); + } + swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min); + first1 = first2; + last1 = last2; + } + else { + RandIt unmerged; + RandIt buf_beg; + RandIt buf_end; + if(is_buffer_middle){ + buf_end = buf_beg = first2 - (last1-first1); + unmerged = op_partial_merge_and_save( first1, last1, first2, last2, first_min + , buf_beg, buf_end, comp, op, is_range1_A); + } + else{ + buf_beg = first1; + buf_end = last1; + unmerged = op_partial_merge_and_save + (buffer, buffer+(last1-first1), first2, last2, first_min, buf_beg, buf_end, comp, op, is_range1_A); + } + (void)unmerged; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, unmerged, comp)); + + swap_and_update_key( next_key_idx != 0, key_next, key_range2, key_mid, first2, last2 + , last_min - size_type(last2 - first2)); + + if(buf_beg != buf_end){ //range2 exhausted: is_buffer_middle for the next iteration + first1 = buf_beg; + last1 = buf_end; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buf_end == (last2-l_block)); + buffer = last1; + } + else{ //range1 exhausted: !is_buffer_middle for the next iteration + first1 = first2; + last1 = last2; + buffer = first2 - l_block; + is_range1_A = is_range2_A; + } + } + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left)); + is_range2_A ? --n_block_a_left : --n_block_b_left; + first2 = last2; + } + + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_range2 + n_block_left, key_comp, *key_mid)); + BOOST_ASSERT(!n_block_b_left); + + //////////////////////////////////////////////////////////////////////////// + //Process remaining range 1 left before the irregular B block + //////////////////////////////////////////////////////////////////////////// + bool const is_buffer_middle = last1 == buffer; + RandIt first_irr2 = irreg2; + RandIt const last_irr2 = first_irr2 + l_irreg2; + if(l_irreg2 && is_range1_A){ + if(is_buffer_middle){ + first1 = skip_until_merge(first1, last1, *first_irr2, comp); + //Even if we copy backward, no overlapping occurs so use forward copy + //that can be faster specially with trivial types + RandIt const new_first1 = first2 - (last1 - first1); + op(forward_t(), first1, last1, new_first1); + first1 = new_first1; + last1 = first2; + buffer = first1 - l_block; + } + buffer = op_partial_merge_impl(first1, last1, first_irr2, last_irr2, buffer, comp, op); + buffer = op(forward_t(), first1, last1, buffer); + } + else if(!is_buffer_middle){ + buffer = op(forward_t(), first1, last1, buffer); + } + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp)); + + //////////////////////////////////////////////////////////////////////////// + //Process irregular B block and remaining A blocks + //////////////////////////////////////////////////////////////////////////// + buffer = op_merge_blocks_with_irreg + ( key_range2, key_mid, key_comp, first2, first_irr2, last_irr2 + , buffer, l_block, n_block_left, min_check, max_check, comp, false, op); + buffer = op(forward_t(), first_irr2, last_irr2, buffer);(void)buffer; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp)); +} + +// first - first element to merge. +// first[-l_block, 0) - buffer (if use_buf == true) +// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded +// keys - sequence of keys, in same order as blocks. key +void merge_blocks_left + ( RandItKeys const key_first + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits::size_type const l_block + , typename iterator_traits::size_type const l_irreg1 + , typename iterator_traits::size_type const n_block_a + , typename iterator_traits::size_type const n_block_b + , typename iterator_traits::size_type const l_irreg2 + , Compare comp + , bool const xbuf_used) +{ + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + needed_keys_count(n_block_a, n_block_b), key_comp, key_first[n_block_a])); + if(xbuf_used){ + op_merge_blocks_left + (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op()); + } + else{ + op_merge_blocks_left + (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op()); } } -template -typename iterator_traits::size_type - buffered_merge_blocks - ( RandIt const first, RandIt const last - , typename iterator_traits::size_type const input_combined_size - , Compare comp - , adaptive_xbuf::value_type> &xbuf) +// first - first element to merge. +// [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer +// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded +// keys - sequence of keys, in same order as blocks. key +void merge_blocks_right + ( RandItKeys const key_first + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits::size_type const l_block + , typename iterator_traits::size_type const n_block_a + , typename iterator_traits::size_type const n_block_b + , typename iterator_traits::size_type const l_irreg2 + , Compare comp + , bool const xbuf_used) { - typedef typename iterator_traits::size_type size_type; - size_type combined_size = input_combined_size; + merge_blocks_left + ( make_reverse_iterator(key_first + needed_keys_count(n_block_a, n_block_b)) + , inverse(key_comp) + , make_reverse_iterator(first + ((n_block_a+n_block_b)*l_block+l_irreg2)) + , l_block + , l_irreg2 + , n_block_b + , n_block_a + , 0 + , inverse(comp), xbuf_used); +} - for( size_type const elements_in_blocks = size_type(last - first) - ; elements_in_blocks > combined_size && size_type(xbuf.capacity()) >= combined_size - ; combined_size *=2){ - RandIt merge_point = first; - while(size_type(last - merge_point) > 2*combined_size) { - RandIt const second_half = merge_point+combined_size; - RandIt const next_merge_point = second_half+combined_size; - buffered_merge(merge_point, second_half, next_merge_point, comp, xbuf); - merge_point = next_merge_point; +template +void op_merge_blocks_with_buf + ( RandItKeys key_first + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits::size_type const l_block + , typename iterator_traits::size_type const l_irreg1 + , typename iterator_traits::size_type const n_block_a + , typename iterator_traits::size_type const n_block_b + , typename iterator_traits::size_type const l_irreg2 + , Compare comp + , Op op + , RandItBuf const buf_first) +{ + typedef typename iterator_traits::size_type size_type; + size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; + //BOOST_ASSERT(n_block_a || n_block_b); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); + + size_type n_block_b_left = n_block_b; + size_type n_block_a_left = n_block_a; + size_type n_block_left = n_block_b + n_block_a; + RandItKeys key_mid(key_first + n_block_a); + + RandItBuf buffer = buf_first; + RandItBuf buffer_end = buffer; + RandIt first1 = first; + RandIt last1 = first1 + l_irreg1; + RandIt first2 = last1; + RandIt const first_irr2 = first2 + n_block_left*l_block; + bool is_range1_A = true; + + RandItKeys key_range2(key_first); + + //////////////////////////////////////////////////////////////////////////// + //Process all regular blocks before the irregular B block + //////////////////////////////////////////////////////////////////////////// + size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; + size_type max_check = min_value(min_check+1, n_block_left); + for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) { + size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp); + max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); + RandIt first_min = first2 + next_key_idx*l_block; + RandIt const last_min = first_min + l_block; (void)last_min; + RandIt const last2 = first2 + l_block; + + bool const buffer_empty = buffer == buffer_end; (void)buffer_empty; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buffer_empty ? is_sorted(first1, last1, comp) : is_sorted(buffer, buffer_end, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp)); + + //Check if irregular b block should go here. + //If so, break to the special code handling the irregular block + if (!n_block_b_left && + ( (l_irreg2 && comp(*first_irr2, *first_min)) || (!l_irreg2 && is_range1_A)) ){ + break; } - if(size_type(last-merge_point) > combined_size){ - buffered_merge(merge_point, merge_point+combined_size, last, comp, xbuf); + + RandItKeys const key_next(key_range2 + next_key_idx); + bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid); + + if(is_range1_A == is_range2_A){ + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((first1 == last1) || (buffer_empty ? !comp(*first_min, last1[-1]) : !comp(*first_min, buffer_end[-1]))); + //If buffered, put those elements in place + RandIt res = op(forward_t(), buffer, buffer_end, first1); + buffer = buffer_end = buf_first; + BOOST_ASSERT(buffer_empty || res == last1); (void)res; + swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp)); + first1 = first2; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp)); } + else { + RandIt const unmerged = op_partial_merge_and_save(first1, last1, first2, last2, first_min, buffer, buffer_end, comp, op, is_range1_A); + bool const is_range_1_empty = buffer == buffer_end; + BOOST_ASSERT(is_range_1_empty || (buffer_end-buffer) == (last1+l_block-unmerged)); + if(is_range_1_empty){ + buffer = buffer_end = buf_first; + first_min = last_min - (last2 - first2); + } + else{ + first_min = last_min; + } + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!is_range_1_empty || (last_min-first_min) == (last2-unmerged)); + swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min); + + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp)); + is_range1_A ^= is_range_1_empty; + first1 = unmerged; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, unmerged, comp)); + } + BOOST_ASSERT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left)); + is_range2_A ? --n_block_a_left : --n_block_b_left; + last1 += l_block; + first2 = last2; + } + + RandIt res = op(forward_t(), buffer, buffer_end, first1); (void)res; + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, res, comp)); + + //////////////////////////////////////////////////////////////////////////// + //Process irregular B block and remaining A blocks + //////////////////////////////////////////////////////////////////////////// + RandIt const last_irr2 = first_irr2 + l_irreg2; + op(forward_t(), first_irr2, first_irr2+l_irreg2, buf_first); + buffer = buf_first; + buffer_end = buffer+l_irreg2; + + reverse_iterator rbuf_beg(buffer_end); + RandIt dest = op_merge_blocks_with_irreg + ( make_reverse_iterator(key_first + n_block_b + n_block_a), make_reverse_iterator(key_mid), inverse(key_comp) + , make_reverse_iterator(first_irr2), rbuf_beg + , make_reverse_iterator(buffer), make_reverse_iterator(last_irr2) + , l_block, n_block_left, 0, n_block_left + , inverse(comp), true, op).base(); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(dest, last_irr2, comp)); + + buffer_end = rbuf_beg.base(); + BOOST_ASSERT((dest-last1) == (buffer_end-buffer)); + op_merge_with_left_placed(is_range1_A ? first1 : last1, last1, dest, buffer, buffer_end, comp, op); + + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp)); +} + +template +void merge_blocks_with_buf + ( RandItKeys key_first + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits::size_type const l_block + , typename iterator_traits::size_type const l_irreg1 + , typename iterator_traits::size_type const n_block_a + , typename iterator_traits::size_type const n_block_b + , typename iterator_traits::size_type const l_irreg2 + , Compare comp + , RandItBuf const buf_first + , bool const xbuf_used) +{ + if(xbuf_used){ + op_merge_blocks_with_buf + (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), buf_first); + } + else{ + op_merge_blocks_with_buf + (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), buf_first); } - return combined_size; } template @@ -1514,7 +1587,7 @@ typename iterator_traits::size_type template typename iterator_traits::size_type - op_merge_left_step + op_merge_left_step_multiple ( RandIt first_block , typename iterator_traits::size_type const elements_in_blocks , typename iterator_traits::size_type l_merged @@ -1529,14 +1602,17 @@ typename iterator_traits::size_type RandIt pos = first_block; while((elements_in_blocks - p0) > 2*l_merged) { op_merge_left(pos-l_merged, pos, pos+l_merged, pos+2*l_merged, comp, op); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos+l_merged, comp)); p0 += 2*l_merged; pos = first_block+p0; } if((elements_in_blocks-p0) > l_merged) { op_merge_left(pos-l_merged, pos, pos+l_merged, first_block+elements_in_blocks, comp, op); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos-l_merged+(first_block+elements_in_blocks-pos), comp)); } else { op(forward_t(), pos, first_block+elements_in_blocks, pos-l_merged); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, first_block+elements_in_blocks-l_merged, comp)); } first_block -= l_merged; l_left_space -= l_merged; @@ -1545,7 +1621,7 @@ typename iterator_traits::size_type } template -void op_merge_right_step +void op_merge_right_step_once ( RandIt first_block , typename iterator_traits::size_type const elements_in_blocks , typename iterator_traits::size_type const l_build_buf @@ -1613,67 +1689,131 @@ typename iterator_traits::size_type ////////////////////////////////// size_type l_merged = 0u; -// if(xbuf.capacity()>=2*l_build_buf){ - if(!l_build_buf){ - l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); - //2*l_build_buf already merged, now try to merge further - //using classic in-place mergesort if enough auxiliary memory is available - return buffered_merge_blocks - (first_block, first_block + elements_in_blocks, l_merged, comp, xbuf); + BOOST_ASSERT(l_build_buf); + //If there is no enough buffer for the insertion sort step, just avoid the external buffer + size_type kbuf = min_value(l_build_buf, size_type(xbuf.capacity())); + kbuf = kbuf < l_base ? 0 : kbuf; + + if(kbuf){ + //Backup internal buffer values in external buffer so they can be overwritten + xbuf.move_assign(first+l_build_buf-kbuf, kbuf); + l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); + + //Now combine them using the buffer. Elements from buffer can be + //overwritten since they've been saved to xbuf + l_merged = op_merge_left_step_multiple + ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op()); + + //Restore internal buffer from external buffer unless kbuf was l_build_buf, + //in that case restoration will happen later + if(kbuf != l_build_buf){ + boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); + } } else{ - //If there is no enough buffer for the insertion sort step, just avoid the external buffer - size_type kbuf = min_value(l_build_buf, size_type(xbuf.capacity())); - kbuf = kbuf < l_base ? 0 : kbuf; + l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); + rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks); + } - if(kbuf){ - //Backup internal buffer values in external buffer so they can be overwritten - xbuf.move_assign(first+l_build_buf-kbuf, kbuf); - l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); + //Now combine elements using the buffer. Elements from buffer can't be + //overwritten since xbuf was not big enough, so merge swapping elements. + l_merged = op_merge_left_step_multiple + (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op()); - //Now combine them using the buffer. Elements from buffer can be - //overwritten since they've been saved to xbuf - l_merged = op_merge_left_step - ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op()); + BOOST_ASSERT(l_merged == l_build_buf); - //Restore internal buffer from external buffer unless kbuf was l_build_buf, - //in that case restoration will happen later - if(kbuf != l_build_buf){ - boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); + ////////////////////////////////// + // Start of merge to right step + ////////////////////////////////// + + //If kbuf is l_build_buf then we can merge right without swapping + //Saved data is still in xbuf + if(kbuf && kbuf == l_build_buf){ + op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op()); + //Restore internal buffer from external buffer if kbuf was l_build_buf. + //as this operation was previously delayed. + boost::move(xbuf.data(), xbuf.data() + kbuf, first); + } + else{ + op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op()); + } + xbuf.clear(); + //2*l_build_buf or total already merged + return min_value(elements_in_blocks, 2*l_build_buf); +} + +template +void adaptive_sort_combine_blocks + ( RandItKeys const keys + , KeyCompare key_comp + , RandIt const first + , typename iterator_traits::size_type const len + , typename iterator_traits::size_type const l_prev_merged + , typename iterator_traits::size_type const l_block + , bool const use_buf + , bool const xbuf_used + , XBuf & xbuf + , Compare comp + , bool merge_left) +{ + (void)xbuf; + typedef typename iterator_traits::size_type size_type; + + size_type const l_reg_combined = 2*l_prev_merged; + size_type l_irreg_combined = 0; + size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); + size_type const n_reg_combined = len/l_reg_combined; + RandIt combined_first = first; + + (void)l_total_combined; + BOOST_ASSERT(l_total_combined <= len); + + size_type const max_i = n_reg_combined + (l_irreg_combined != 0); + + if(merge_left || !use_buf) { + for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) { + //Now merge blocks + bool const is_last = combined_i==n_reg_combined; + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + + range_xbuf rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, key_comp, l_cur_combined + , l_prev_merged, l_block, rbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combpar: ", len + l_block); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); + if(!use_buf){ + merge_blocks_bufferless + (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp); } + else{ + merge_blocks_left + (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_l: ", len + l_block); } - else{ - l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); - rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks); + } + else{ + combined_first += l_reg_combined*(max_i-1); + for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) { + bool const is_last = combined_i==n_reg_combined; + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + + RandIt const combined_last(combined_first+l_cur_combined); + range_xbuf rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, key_comp, l_cur_combined + , l_prev_merged, l_block, rbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combpar: ", len + l_block); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); + BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); + merge_blocks_right + (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_r: ", len + l_block); } - - //Now combine elements using the buffer. Elements from buffer can't be - //overwritten since xbuf was not big enough, so merge swapping elements. - l_merged = op_merge_left_step - (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op()); - - BOOST_ASSERT(l_merged == l_build_buf); - - ////////////////////////////////// - // Start of merge to right step - ////////////////////////////////// - - //If kbuf is l_build_buf then we can merge right without swapping - //Saved data is still in xbuf - if(kbuf && kbuf == l_build_buf){ - op_merge_right_step(first, elements_in_blocks, l_build_buf, comp, move_op()); - //Restore internal buffer from external buffer if kbuf was l_build_buf. - //as this operation was previously delayed. - boost::move(xbuf.data(), xbuf.data() + kbuf, first); - } - else{ - op_merge_right_step(first, elements_in_blocks, l_build_buf, comp, swap_op()); - } - xbuf.clear(); - //2*l_build_buf already merged, now try to merge further - //using classic in-place mergesort if enough auxiliary memory is available - return buffered_merge_blocks - (first_block, first_block + elements_in_blocks, l_build_buf*2, comp, xbuf); } } @@ -1702,7 +1842,7 @@ bool adaptive_sort_combine_all_blocks } bool prev_merge_left = true; - size_type l_prev_total_combined = 0u, l_prev_block = 0; + size_type l_prev_total_combined = l_merged, l_prev_block = 0; bool prev_use_internal_buf = true; for( size_type n = 0; l_data > l_merged @@ -1738,8 +1878,8 @@ bool adaptive_sort_combine_all_blocks move_data_forward(buf_end, l_diff, buf_beg, common_xbuf); } } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After move_data : ", l_data + l_intbuf); } - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After move_data : ", l_data + l_intbuf); //Combine to form l_merged*2 segments if(n_keys){ @@ -1853,17 +1993,22 @@ bool adaptive_sort_build_params //segments of size l_build_buf*2, maximizing the classic merge phase. l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); + //The internal buffer can be expanded if there is enough external memory + while(xbuf.capacity() >= l_intbuf*2){ + l_intbuf *= 2; + } + //This is the minimum number of keys to implement the ideal algorithm // //l_intbuf is used as buffer plus the key count - size_type n_min_ideal_keys = l_intbuf-1u; + size_type n_min_ideal_keys = l_intbuf-1; while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){ --n_min_ideal_keys; } - ++n_min_ideal_keys; - BOOST_ASSERT(n_min_ideal_keys < l_intbuf); + n_min_ideal_keys += 1; + BOOST_ASSERT(n_min_ideal_keys <= l_intbuf); - if(xbuf.template supports_aligned_trailing(l_intbuf, n_min_ideal_keys)){ + if(xbuf.template supports_aligned_trailing(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){ n_keys = 0u; l_build_buf = l_intbuf; } @@ -1875,18 +2020,29 @@ bool adaptive_sort_build_params //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, //(to be used for keys in combine_all_blocks) as the whole l_build_buf //will be backuped in the buffer during build_blocks. - bool const non_unique_buf = xbuf.capacity() >= 2*l_intbuf; - size_type const to_collect = non_unique_buf ? l_intbuf : l_intbuf*2; + bool const non_unique_buf = xbuf.capacity() >= l_intbuf; + size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2; size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); //If available memory is 2*sqrt(l), then for "build_params" //the situation is the same as if 2*l_intbuf were collected. - if(non_unique_buf && (collected >= n_min_ideal_keys)) - collected += l_intbuf; - + if(non_unique_buf && collected == n_min_ideal_keys){ + l_build_buf = l_intbuf; + n_keys = n_min_ideal_keys; + } + else if(collected == 2*l_intbuf){ + //l_intbuf*2 elements found. Use all of them in the build phase + l_build_buf = l_intbuf*2; + n_keys = l_intbuf; + } + else if(collected == (n_min_ideal_keys+l_intbuf)){ + l_build_buf = l_intbuf; + n_keys = n_min_ideal_keys; + } //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix //is possible (due to very low unique keys), then go to a slow sort based on rotations. - if(collected < (n_min_ideal_keys+l_intbuf)){ + else{ + BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf)); if(collected < 4){ //No combination possible with less that 4 keys return false; } @@ -1902,24 +2058,12 @@ bool adaptive_sort_build_params l_intbuf = 0; l_build_buf = n_keys; } - else if((collected - l_intbuf) >= l_intbuf){ - //l_intbuf*2 elements found. Use all of them in the build phase - l_build_buf = l_intbuf*2; - n_keys = l_intbuf; - } - else{ - l_build_buf = l_intbuf; - n_keys = n_min_ideal_keys; - } BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf); } return true; } - -#define BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF - template inline void adaptive_merge_combine_blocks( RandIt first , typename iterator_traits::size_type len1 @@ -1937,37 +2081,39 @@ inline void adaptive_merge_combine_blocks( RandIt first size_type const len = len1+len2; size_type const l_combine = len-collected; size_type const l_combine1 = len1-collected; - size_type n_bef_irreg2, n_aft_irreg2, l_irreg1, l_irreg2, midkey_idx; - if(n_keys){ + if(n_keys){ RandIt const first_data = first+collected; RandIt const keys = first; - combine_params( keys, comp, first_data, l_combine - , l_combine1, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true, false); //Outputs BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); if(xbuf_used){ + if(xbuf.size() < l_block){ + xbuf.initialize_until(l_block, *first); + } BOOST_ASSERT(xbuf.size() >= l_block); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, comp, l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs merge_blocks_with_buf - (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, xbuf_used); + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), xbuf_used); BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); } - else if(use_internal_buf){ - #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF - range_xbuf rbuf(first_data-l_block, first_data); - merge_blocks_with_buf - (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, rbuf, xbuf_used); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len); - #else - merge_blocks_left - (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); - #endif - } else{ - merge_blocks_bufferless - (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( keys, comp, l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs + if(use_internal_buf){ + merge_blocks_with_buf + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, first_data-l_block, xbuf_used); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len); + } + else{ + merge_blocks_bufferless + (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg nbf: ", len); + } } } else{ @@ -1976,17 +2122,17 @@ inline void adaptive_merge_combine_blocks( RandIt first xbuf.initialize_until(l_block, *first); } size_type *const uint_keys = xbuf.template aligned_trailing(l_block); - combine_params( uint_keys, less(), first, l_combine - , l_combine1, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true, true); //Outputs + size_type n_block_a, n_block_b, l_irreg1, l_irreg2; + combine_params( uint_keys, less(), l_combine + , l_combine1, l_block, xbuf + , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); BOOST_ASSERT(xbuf.size() >= l_block); merge_blocks_with_buf - (uint_keys, uint_keys[midkey_idx], less(), first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, true); + (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), true); xbuf.clear(); BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len); } - } template @@ -2017,27 +2163,11 @@ inline void adaptive_merge_final_merge( RandIt first } } else{ - #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF xbuf.clear(); stable_sort(first, first+collected, comp, xbuf); BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: ", len); stable_merge(first, first+collected, first+len, comp, xbuf); BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: ", len); - #else - xbuf.clear(); - stable_sort(first+len-l_block, first+len, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf srt: ", len); - RandIt const pos1 = lower_bound(first+n_keys, first+len-l_block, first[len-1], comp); - RandIt const pos2 = rotate_gcd(pos1, first+len-l_block, first+len); - stable_merge(first+n_keys, pos1, pos2, antistable(comp), xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len); - if(n_keys){ - stable_sort(first, first+n_keys, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key srt: ", len); - stable_merge(first, first+n_keys, first+len, comp, xbuf); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); - } - #endif } } else{ @@ -2050,24 +2180,29 @@ inline void adaptive_merge_final_merge( RandIt first } template -inline SizeType adaptive_merge_n_keys_intbuf(SizeType l_block, SizeType len, Xbuf & xbuf, SizeType &l_intbuf_inout) +inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) { typedef SizeType size_type; + size_type l_block = rl_block; size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; + while(xbuf.capacity() >= l_block*2){ + l_block *= 2; + } + //This is the minimum number of keys to implement the ideal algorithm - //ceil(len/l_block) - 1 (as the first block is used as buffer) - size_type n_keys = len/l_block+1; - while(n_keys >= (len-l_intbuf-n_keys)/l_block){ + size_type n_keys = len1/l_block+len2/l_block; + while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){ --n_keys; } ++n_keys; - //BOOST_ASSERT(n_keys < l_block); + BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); if(xbuf.template supports_aligned_trailing(l_block, n_keys)){ n_keys = 0u; } l_intbuf_inout = l_intbuf; + rl_block = l_block; return n_keys; } @@ -2106,7 +2241,7 @@ inline SizeType adaptive_merge_n_keys_intbuf(SizeType l_block, SizeType len, Xbu // 2*merged_block/csqrtlen keys are sorted to be used as markers // * Groups are selection-sorted by first or last element (depending wheter they // merged to left or right) and keys are reordered accordingly as an imitation-buffer. -// * Elements of each block pair is merged using the csqrtlen buffer taking into account +// * Elements of each block pair are merged using the csqrtlen buffer taking into account // if they belong to the first half or second half (marked by the key). // // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with @@ -2170,7 +2305,7 @@ void adaptive_sort_impl stable_sort(first, first+len, comp, xbuf); return; } - + BOOST_ASSERT(l_build_buf); //Otherwise, continue the adaptive_sort BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n After collect_unique: ", len); size_type const n_key_plus_buf = l_intbuf+n_keys; @@ -2203,13 +2338,14 @@ void adaptive_sort_impl // Explanation of the "combine_blocks" step: // // * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. -// Remaining elements that can't form a group are grouped in the front of those elements. +// Remaining elements that can't form a group are grouped in front of those elements. // * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. // Remaining elements that can't form a group are grouped in the back of those elements. -// * Groups are selection-sorted by first or last element (depending wheter they -// merged to left or right) and keys are reordered accordingly as an imitation-buffer. -// * Elements of each block pair is merged using the csqrtlen buffer taking into account -// if they belong to the first half or second half (marked by the key). +// * In parallel the following two steps are performed: +// * Groups are selection-sorted by first or last element (depending wheter they +// merged to left or right) and keys are reordered accordingly as an imitation-buffer. +// * Elements of each block pair are merged using the csqrtlen buffer taking into account +// if they belong to the first half or second half (marked by the key). // // * In the final merge step leading "to_collect" elements are merged with rotations // with the rest of merged elements in the "combine_blocks" step. @@ -2261,7 +2397,7 @@ void adaptive_merge_impl //Detail the number of keys and internal buffer. If xbuf has enough memory, no //internal buffer is needed so l_intbuf will remain 0. size_type l_intbuf = 0; - size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len, xbuf, l_intbuf); + size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); size_type const to_collect = l_intbuf+n_keys; //Try to extract needed unique values from the first range size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); @@ -2291,6 +2427,7 @@ void adaptive_merge_impl } } + } //namespace detail_adaptive { } //namespace movelib { } //namespace boost { diff --git a/include/boost/move/algo/detail/basic_op.hpp b/include/boost/move/algo/detail/basic_op.hpp index ea0ce1b..a9369e0 100644 --- a/include/boost/move/algo/detail/basic_op.hpp +++ b/include/boost/move/algo/detail/basic_op.hpp @@ -29,6 +29,8 @@ namespace movelib { struct forward_t{}; struct backward_t{}; struct three_way_t{}; +struct three_way_forward_t{}; +struct four_way_t{}; struct move_op { @@ -50,6 +52,24 @@ struct move_op *dest2it = boost::move(*dest1it); *dest1it = boost::move(*srcit); } + + template + DestinationIt2 operator()(three_way_forward_t, SourceIt srcit, SourceIt srcitend, DestinationIt1 dest1it, DestinationIt2 dest2it) + { + //Destination2 range can overlap SourceIt range so avoid boost::move + while(srcit != srcitend){ + this->operator()(three_way_t(), srcit++, dest1it++, dest2it++); + } + return dest2it; + } + + template + void operator()(four_way_t, SourceIt srcit, DestinationIt1 dest1it, DestinationIt2 dest2it, DestinationIt3 dest3it) + { + *dest3it = boost::move(*dest2it); + *dest2it = boost::move(*dest1it); + *dest1it = boost::move(*srcit); + } }; struct swap_op @@ -74,8 +94,28 @@ struct swap_op *dest1it = boost::move(*srcit); *srcit = boost::move(tmp); } + + template + DestinationIt2 operator()(three_way_forward_t, SourceIt srcit, SourceIt srcitend, DestinationIt1 dest1it, DestinationIt2 dest2it) + { + while(srcit != srcitend){ + this->operator()(three_way_t(), srcit++, dest1it++, dest2it++); + } + return dest2it; + } + + template + void operator()(four_way_t, SourceIt srcit, DestinationIt1 dest1it, DestinationIt2 dest2it, DestinationIt3 dest3it) + { + typename ::boost::movelib::iterator_traits::value_type tmp(boost::move(*dest3it)); + *dest3it = boost::move(*dest2it); + *dest2it = boost::move(*dest1it); + *dest1it = boost::move(*srcit); + *srcit = boost::move(tmp); + } }; + }} //namespace boost::movelib #endif //BOOST_MOVE_ALGO_BASIC_OP diff --git a/include/boost/move/algo/detail/bufferless_merge_sort.hpp b/include/boost/move/algo/detail/bufferless_merge_sort.hpp deleted file mode 100644 index a8e2763..0000000 --- a/include/boost/move/algo/detail/bufferless_merge_sort.hpp +++ /dev/null @@ -1,120 +0,0 @@ -////////////////////////////////////////////////////////////////////////////// -// -// (C) Copyright Ion Gaztanaga 2015-2016. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) -// -// See http://www.boost.org/libs/move for documentation. -// -////////////////////////////////////////////////////////////////////////////// - -//! \file - -#ifndef BOOST_MOVE_ALGO_BUFFERLESS_MERGE_SORT_HPP -#define BOOST_MOVE_ALGO_BUFFERLESS_MERGE_SORT_HPP - -#ifndef BOOST_CONFIG_HPP -# include -#endif -# -#if defined(BOOST_HAS_PRAGMA_ONCE) -# pragma once -#endif - -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include - -namespace boost { -namespace movelib { -// @cond -namespace detail_bufferless_mergesort { - -static const std::size_t UnbufferedMergeSortInsertionSortThreshold = 16; - -//A in-placed version based on: -//Jyrki Katajainen, Tomi Pasanen, Jukka Teuhola. -//``Practical in-place mergesort''. Nordic Journal of Computing, 1996. - -template -void bufferless_merge_sort(RandIt first, RandIt last, Compare comp); - -template -void swap_sort(RandIt const first, RandIt const last, RandIt const buffer_first, RandIt const buffer_last, Compare comp, bool buffer_at_right) -{ - typedef typename iterator_traits::size_type size_type; - if (size_type(last - first) > UnbufferedMergeSortInsertionSortThreshold) { - RandIt m = first + (last - first) / 2; - bufferless_merge_sort(first, m, comp); - bufferless_merge_sort(m, last, comp); - if(buffer_at_right){ - //Use antistable to minimize movements (if equal, move first half elements - //to maximize the chance last half elements are already in place. - boost::movelib::swap_merge_right(first, m, last, buffer_last, boost::movelib::antistable(comp)); - } - else{ - boost::movelib::swap_merge_left(buffer_first, first, m, last, comp); - } - } - else - boost::movelib::insertion_sort_swap(first, last, buffer_first, comp); -} - -template -void bufferless_merge_sort(RandIt const first, RandIt const last, Compare comp) -{ - typedef typename iterator_traits::size_type size_type; - size_type len = size_type(last - first); - if (len > size_type(UnbufferedMergeSortInsertionSortThreshold)) { - len /= 2; - RandIt h = last - len; //ceil(half) - RandIt f = h - len; //ceil(first) - swap_sort(f, h, h, last, comp, true); //[h, last) contains sorted elements - - //Divide unsorted first half in two - len = size_type(h - first); - while (len > size_type(UnbufferedMergeSortInsertionSortThreshold)) { - len /= 2; - RandIt n = h; //new end - h = n - len; //ceil(half') - f = h - len; //ceil(first') - swap_sort(h, n, f, h, comp, false); // the first half of the previous working area [f, h) - //contains sorted elements: working area in the middle [h, n) - //Now merge small (left) sorted with big (right) sorted (buffer is between them) - swap_merge_with_right_placed(f, h, h, n, last, comp); - } - - boost::movelib::insertion_sort(first, h, comp); - boost::movelib::merge_bufferless(first, h, last, comp); - } - else{ - boost::movelib::insertion_sort(first, last, comp); - } -} - -} //namespace detail_bufferless_mergesort { - -// @endcond - -//Unstable bufferless merge sort -template -void bufferless_merge_sort(RandIt first, RandIt last, Compare comp) -{ - detail_bufferless_mergesort::bufferless_merge_sort(first, last, comp); -} - -}} //namespace boost::movelib - -#include - -#endif //#ifndef BOOST_MOVE_ALGO_BUFFERLESS_MERGE_SORT_HPP diff --git a/include/boost/move/algo/detail/merge.hpp b/include/boost/move/algo/detail/merge.hpp index 11d5740..988ffd3 100644 --- a/include/boost/move/algo/detail/merge.hpp +++ b/include/boost/move/algo/detail/merge.hpp @@ -492,7 +492,7 @@ void swap_merge_with_right_placed op_merge_with_right_placed(first, last, dest_first, r_first, r_last, comp, swap_op()); } -// [r_first, r_last) are already in the right part of the destination range. +// [first, last) are already in the right part of the destination range. template void op_merge_with_left_placed ( BidirOutIterator const first, BidirOutIterator last, BidirOutIterator dest_last @@ -525,7 +525,7 @@ void op_merge_with_left_placed // @endcond -// [r_first, r_last) are already in the right part of the destination range. +// [irst, last) are already in the right part of the destination range. template void merge_with_left_placed ( BidirOutIterator const first, BidirOutIterator last, BidirOutIterator dest_last @@ -587,6 +587,50 @@ void uninitialized_merge_with_right_placed merge_with_right_placed(first, last, original_r_first, r_first, r_last, comp); } +/* +// [r_first, r_last) are already in the right part of the destination range. +// [dest_first, r_first) is uninitialized memory +template +void uninitialized_merge_with_left_placed + ( BidirOutIterator dest_first, BidirOutIterator r_first, BidirOutIterator r_last + , BidirIterator first, BidirIterator last + , Compare comp) +{ + BOOST_ASSERT((last - first) == (r_last - r_first)); + typedef typename iterator_traits::value_type value_type; + BidirOutIterator const original_r_last = r_last; + + destruct_n d(&*dest_last); + + while ( first != last && dest_first != original_r_first ) { + if (r_first == r_last) { + for(; dest_first != original_r_first; ++dest_first, ++first){ + ::new(&*dest_first) value_type(::boost::move(*first)); + d.incr(); + } + d.release(); + BidirOutIterator end = ::boost::move(first, last, original_r_first); + BOOST_ASSERT(end == r_last); + (void)end; + return; + } + else if (comp(*r_first, *first)) { + ::new(&*dest_first) value_type(::boost::move(*r_first)); + d.incr(); + ++r_first; + } + else { + ::new(&*dest_first) value_type(::boost::move(*first)); + d.incr(); + ++first; + } + ++dest_first; + } + d.release(); + merge_with_right_placed(first, last, original_r_first, r_first, r_last, comp); +} +*/ + } //namespace movelib { } //namespace boost { diff --git a/include/boost/move/detail/move_helpers.hpp b/include/boost/move/detail/move_helpers.hpp index a2502bf..1713844 100644 --- a/include/boost/move/detail/move_helpers.hpp +++ b/include/boost/move/detail/move_helpers.hpp @@ -69,13 +69,13 @@ {}; #define BOOST_MOVE_CONVERSION_AWARE_CATCH_COMMON(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION)\ - RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_CONST(TYPE) x)\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_CONST(TYPE) x)\ { return FWD_FUNCTION(static_cast(x)); }\ \ - RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ { return FWD_FUNCTION(::boost::move(x)); }\ \ - RETURN_VALUE PUB_FUNCTION(TYPE &x)\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(TYPE &x)\ { return FWD_FUNCTION(const_cast(x)); }\ // #if defined(BOOST_MOVE_HELPERS_RETURN_SFINAE_BROKEN) @@ -83,12 +83,12 @@ BOOST_MOVE_CONVERSION_AWARE_CATCH_COMMON(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION)\ \ template\ - RETURN_VALUE PUB_FUNCTION(const BOOST_MOVE_TEMPL_PARAM &u,\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(const BOOST_MOVE_TEMPL_PARAM &u,\ typename boost_move_conversion_aware_catch_1< ::boost::move_detail::nat, BOOST_MOVE_TEMPL_PARAM, TYPE>::type* = 0)\ { return FWD_FUNCTION(u); }\ \ template\ - RETURN_VALUE PUB_FUNCTION(const BOOST_MOVE_TEMPL_PARAM &u,\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(const BOOST_MOVE_TEMPL_PARAM &u,\ typename boost_move_conversion_aware_catch_2< ::boost::move_detail::nat, BOOST_MOVE_TEMPL_PARAM, TYPE>::type* = 0)\ {\ TYPE t((u));\ @@ -100,12 +100,12 @@ BOOST_MOVE_CONVERSION_AWARE_CATCH_COMMON(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION)\ \ template\ - typename boost_move_conversion_aware_catch_1::type\ + BOOST_MOVE_FORCEINLINE typename boost_move_conversion_aware_catch_1::type\ PUB_FUNCTION(const BOOST_MOVE_TEMPL_PARAM &u)\ { return FWD_FUNCTION(u); }\ \ template\ - typename boost_move_conversion_aware_catch_2::type\ + BOOST_MOVE_FORCEINLINE typename boost_move_conversion_aware_catch_2::type\ PUB_FUNCTION(const BOOST_MOVE_TEMPL_PARAM &u)\ {\ TYPE t((u));\ @@ -116,14 +116,14 @@ #elif (defined(_MSC_VER) && (_MSC_VER == 1600)) #define BOOST_MOVE_CONVERSION_AWARE_CATCH(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION)\ - RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_CONST(TYPE) x)\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_CONST(TYPE) x)\ { return FWD_FUNCTION(static_cast(x)); }\ \ - RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ { return FWD_FUNCTION(::boost::move(x)); }\ \ template\ - typename ::boost::move_detail::enable_if_c\ + BOOST_MOVE_FORCEINLINE typename ::boost::move_detail::enable_if_c\ < !::boost::move_detail::is_same::value\ , RETURN_VALUE >::type\ PUB_FUNCTION(const BOOST_MOVE_TEMPL_PARAM &u)\ @@ -136,10 +136,10 @@ #else //BOOST_NO_CXX11_RVALUE_REFERENCES #define BOOST_MOVE_CONVERSION_AWARE_CATCH(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION)\ - RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_CONST(TYPE) x)\ - { return FWD_FUNCTION(static_cast(x)); }\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_CONST(TYPE) x)\ + { return FWD_FUNCTION(x); }\ \ - RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ { return FWD_FUNCTION(::boost::move(x)); }\ // @@ -174,13 +174,13 @@ {}; #define BOOST_MOVE_CONVERSION_AWARE_CATCH_1ARG_COMMON(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION, ARG1, UNLESS_CONVERTIBLE_TO)\ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_CONST(TYPE) x)\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_CONST(TYPE) x)\ { return FWD_FUNCTION(arg1, static_cast(x)); }\ \ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ { return FWD_FUNCTION(arg1, ::boost::move(x)); }\ \ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, TYPE &x)\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, TYPE &x)\ { return FWD_FUNCTION(arg1, const_cast(x)); }\ // #if defined(BOOST_MOVE_HELPERS_RETURN_SFINAE_BROKEN) @@ -188,12 +188,12 @@ BOOST_MOVE_CONVERSION_AWARE_CATCH_1ARG_COMMON(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION, ARG1, UNLESS_CONVERTIBLE_TO)\ \ template\ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, const BOOST_MOVE_TEMPL_PARAM &u,\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, const BOOST_MOVE_TEMPL_PARAM &u,\ typename boost_move_conversion_aware_catch_1arg_1::type* = 0)\ { return FWD_FUNCTION(arg1, u); }\ \ template\ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, const BOOST_MOVE_TEMPL_PARAM &u,\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, const BOOST_MOVE_TEMPL_PARAM &u,\ typename boost_move_conversion_aware_catch_1arg_2::type* = 0)\ {\ TYPE t((u));\ @@ -205,12 +205,12 @@ BOOST_MOVE_CONVERSION_AWARE_CATCH_1ARG_COMMON(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION, ARG1, UNLESS_CONVERTIBLE_TO)\ \ template\ - typename boost_move_conversion_aware_catch_1arg_1::type\ + BOOST_MOVE_FORCEINLINE typename boost_move_conversion_aware_catch_1arg_1::type\ PUB_FUNCTION(ARG1 arg1, const BOOST_MOVE_TEMPL_PARAM &u)\ { return FWD_FUNCTION(arg1, u); }\ \ template\ - typename boost_move_conversion_aware_catch_1arg_2::type\ + BOOST_MOVE_FORCEINLINE typename boost_move_conversion_aware_catch_1arg_2::type\ PUB_FUNCTION(ARG1 arg1, const BOOST_MOVE_TEMPL_PARAM &u)\ {\ TYPE t((u));\ @@ -222,14 +222,14 @@ #elif (defined(_MSC_VER) && (_MSC_VER == 1600)) #define BOOST_MOVE_CONVERSION_AWARE_CATCH_1ARG(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION, ARG1, UNLESS_CONVERTIBLE_TO)\ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_CONST(TYPE) x)\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_CONST(TYPE) x)\ { return FWD_FUNCTION(arg1, static_cast(x)); }\ \ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ { return FWD_FUNCTION(arg1, ::boost::move(x)); }\ \ template\ - typename ::boost::move_detail::disable_if_or\ + BOOST_MOVE_FORCEINLINE typename ::boost::move_detail::disable_if_or\ < RETURN_VALUE \ , ::boost::move_detail::is_same \ , ::boost::move_detail::is_same_or_convertible \ @@ -244,10 +244,10 @@ #else #define BOOST_MOVE_CONVERSION_AWARE_CATCH_1ARG(PUB_FUNCTION, TYPE, RETURN_VALUE, FWD_FUNCTION, ARG1, UNLESS_CONVERTIBLE_TO)\ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_CONST(TYPE) x)\ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_CONST(TYPE) x)\ { return FWD_FUNCTION(arg1, static_cast(x)); }\ \ - RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ + BOOST_MOVE_FORCEINLINE RETURN_VALUE PUB_FUNCTION(ARG1 arg1, BOOST_MOVE_CATCH_RVALUE(TYPE) x) \ { return FWD_FUNCTION(arg1, ::boost::move(x)); }\ // diff --git a/include/boost/move/detail/workaround.hpp b/include/boost/move/detail/workaround.hpp index befe141..1d16f24 100644 --- a/include/boost/move/detail/workaround.hpp +++ b/include/boost/move/detail/workaround.hpp @@ -52,8 +52,6 @@ #define BOOST_MOVE_MSVC_AUTO_MOVE_RETURN_BUG #endif -#define BOOST_MOVE_DISABLE_FORCEINLINE - #if defined(BOOST_MOVE_DISABLE_FORCEINLINE) #define BOOST_MOVE_FORCEINLINE inline #elif defined(BOOST_MOVE_FORCEINLINE_IS_BOOST_FORCELINE) @@ -61,6 +59,9 @@ #elif defined(BOOST_MSVC) && defined(_DEBUG) //"__forceinline" and MSVC seems to have some bugs in debug mode #define BOOST_MOVE_FORCEINLINE inline +#elif defined(__GNUC__) && ((__GNUC__ < 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ < 5))) + //Older GCCs have problems with forceinline + #define BOOST_MOVE_FORCEINLINE inline #else #define BOOST_MOVE_FORCEINLINE BOOST_FORCEINLINE #endif diff --git a/proj/vc7ide/Move.sln b/proj/vc7ide/Move.sln index e8fabd2..742e96f 100644 --- a/proj/vc7ide/Move.sln +++ b/proj/vc7ide/Move.sln @@ -131,6 +131,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "doc_template_assign", "doc_ ProjectSection(ProjectDependencies) = postProject EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "inplace_merge_test", "inplace_merge_test.vcproj", "{CD617C28-62B7-CE9E-0000-000000000000}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject Global GlobalSection(SolutionConfiguration) = preSolution Debug = Debug @@ -271,6 +275,10 @@ Global {7460CA18-D532-E4F8-F1F2-3A796D2A91E2}.Debug.Build.0 = Debug|Win32 {7460CA18-D532-E4F8-F1F2-3A796D2A91E2}.Release.ActiveCfg = Release|Win32 {7460CA18-D532-E4F8-F1F2-3A796D2A91E2}.Release.Build.0 = Release|Win32 + {CD617C28-62B7-CE9E-0000-000000000000}.Debug.ActiveCfg = Debug|Win32 + {CD617C28-62B7-CE9E-0000-000000000000}.Debug.Build.0 = Debug|Win32 + {CD617C28-62B7-CE9E-0000-000000000000}.Release.ActiveCfg = Release|Win32 + {CD617C28-62B7-CE9E-0000-000000000000}.Release.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionItems) = postSolution ..\..\..\..\boost\move\algo\adaptive_merge.hpp = ..\..\..\..\boost\move\algo\adaptive_merge.hpp diff --git a/proj/vc7ide/inplace_merge_test.vcproj b/proj/vc7ide/inplace_merge_test.vcproj new file mode 100644 index 0000000..682b772 --- /dev/null +++ b/proj/vc7ide/inplace_merge_test.vcproj @@ -0,0 +1,134 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/adaptive_merge_test.cpp b/test/adaptive_merge_test.cpp index 5365e78..6f75dec 100644 --- a/test/adaptive_merge_test.cpp +++ b/test/adaptive_merge_test.cpp @@ -10,20 +10,15 @@ ////////////////////////////////////////////////////////////////////////////// #include //std::srand -#include //std::next_permutation #include //std::cout #include #include -#include -#include - -using boost::timer::cpu_timer; -using boost::timer::cpu_times; -using boost::timer::nanosecond_type; +#include #include "order_type.hpp" +#include "random_shuffle.hpp" #include #include @@ -46,7 +41,7 @@ bool test_random_shuffled(std::size_t const element_count, std::size_t const num for (std::size_t i = 0; i != num_iter; ++i) { - std::random_shuffle(elements.get(), elements.get() + element_count); + ::random_shuffle(elements.get(), elements.get() + element_count); for(std::size_t i = 0; i < (num_keys ? num_keys : element_count); ++i){ key_reps[i]=0; } @@ -54,14 +49,15 @@ bool test_random_shuffled(std::size_t const element_count, std::size_t const num elements[i].val = key_reps[elements[i].key]++; } - boost::container::vector tmp(elements.get(), elements.get()+element_count); - std::size_t const split = std::size_t(std::rand()) % element_count; - std::stable_sort(tmp.data(), tmp.data()+split, order_type_less()); - std::stable_sort(tmp.data()+split, tmp.data()+element_count, order_type_less()); - - boost::movelib::adaptive_merge(tmp.data(), tmp.data()+split, tmp.data()+element_count, order_type_less()); + boost::movelib::unique_ptr buf(new char [sizeof(T)*(element_count-element_count/2)]); - if (!is_order_type_ordered(tmp.data(), element_count)) + std::size_t const split = std::size_t(std::rand()) % element_count; + boost::movelib::merge_sort(elements.get(), elements.get()+split, order_type_less(), (T*)buf.get()); + boost::movelib::merge_sort(elements.get()+split, elements.get()+element_count, order_type_less(), (T*)buf.get()); + + boost::movelib::adaptive_merge(elements.get(), elements.get()+split, elements.get()+element_count, order_type_less()); + + if (!is_order_type_ordered(elements.get(), element_count)) { std::cout << "\n ERROR\n"; throw int(0); @@ -72,16 +68,12 @@ bool test_random_shuffled(std::size_t const element_count, std::size_t const num int main() { - #ifdef NDEBUG const std::size_t NIter = 100; - #else - const std::size_t NIter = 10; - #endif - test_random_shuffled(10001, 65, NIter); - test_random_shuffled(10001, 101, NIter); - test_random_shuffled(10001, 1023, NIter); - test_random_shuffled(10001, 4095, NIter); - test_random_shuffled(10001, 0, NIter); + test_random_shuffled(10001, 65, NIter); + test_random_shuffled(10001, 101, NIter); + test_random_shuffled(10001, 1023, NIter); + test_random_shuffled(10001, 4095, NIter); + test_random_shuffled(10001, 0, NIter); return 0; } diff --git a/test/adaptive_sort_test.cpp b/test/adaptive_sort_test.cpp index 4c24a32..5f2c70a 100644 --- a/test/adaptive_sort_test.cpp +++ b/test/adaptive_sort_test.cpp @@ -10,7 +10,6 @@ ////////////////////////////////////////////////////////////////////////////// #include //std::srand -#include //std::next_permutation #include //std::cout #include @@ -19,11 +18,8 @@ #include #include -using boost::timer::cpu_timer; -using boost::timer::cpu_times; -using boost::timer::nanosecond_type; - #include "order_type.hpp" +#include "random_shuffle.hpp" #include #include @@ -45,7 +41,7 @@ bool test_random_shuffled(std::size_t const element_count, std::size_t const num for (std::size_t i = 0; i != num_iter; ++i) { - std::random_shuffle(elements.get(), elements.get() + element_count); + ::random_shuffle(elements.get(), elements.get() + element_count); for(std::size_t i = 0; i < (num_keys ? num_keys : element_count); ++i){ key_reps[i]=0; } @@ -53,11 +49,9 @@ bool test_random_shuffled(std::size_t const element_count, std::size_t const num elements[i].val = key_reps[elements[i].key]++; } - boost::container::vector tmp(elements.get(), elements.get()+element_count); + boost::movelib::adaptive_sort(elements.get(), elements.get()+element_count, order_type_less()); - boost::movelib::adaptive_sort(tmp.data(), tmp.data()+element_count, order_type_less()); - - if (!is_order_type_ordered(tmp.data(), element_count)) + if (!is_order_type_ordered(elements.get(), element_count)) { std::cout << "\n ERROR\n"; throw int(0); @@ -68,16 +62,12 @@ bool test_random_shuffled(std::size_t const element_count, std::size_t const num int main() { - #ifdef NDEBUG const std::size_t NIter = 100; - #else - const std::size_t NIter = 10; - #endif - test_random_shuffled(10001, 65, NIter); - test_random_shuffled(10001, 101, NIter); - test_random_shuffled(10001, 1023, NIter); - test_random_shuffled(10001, 4095, NIter); - test_random_shuffled(10001, 0, NIter); + test_random_shuffled(10001, 65, NIter); + test_random_shuffled(10001, 101, NIter); + test_random_shuffled(10001, 1023, NIter); + test_random_shuffled(10001, 4095, NIter); + test_random_shuffled(10001, 0, NIter); return 0; } diff --git a/test/bench_merge.cpp b/test/bench_merge.cpp index 1ef92cd..26a84f0 100644 --- a/test/bench_merge.cpp +++ b/test/bench_merge.cpp @@ -27,7 +27,7 @@ using boost::timer::nanosecond_type; //#define BOOST_MOVE_ADAPTIVE_SORT_STATS void print_stats(const char *str, boost::ulong_long_type element_count) { - std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_type::num_compare)/element_count, double(order_type::num_copy)/element_count ); + std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); } #include @@ -83,7 +83,7 @@ const char *AlgoNames [] = { "StdMerge " , "SqrtHAdaptMerge " , "SqrtAdaptMerge " , "Sqrt2AdaptMerge " - , "QuartAdaptMerge " + , "QHalfAdaptMerge " , "StdInplaceMerge " }; @@ -92,53 +92,53 @@ BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); template bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count, std::size_t key_len, unsigned alg, nanosecond_type &prev_clock) { - std::size_t const split_pos = generate_elements(elements, element_count, key_reps, key_len, order_type_less()); + std::size_t const split_pos = generate_elements(elements, element_count, key_reps, key_len, order_type_less()); std::printf("%s ", AlgoNames[alg]); - order_type::num_compare=0; - order_type::num_copy=0; - order_type::num_elements = element_count; + order_perf_type::num_compare=0; + order_perf_type::num_copy=0; + order_perf_type::num_elements = element_count; cpu_timer timer; timer.resume(); switch(alg) { case StdMerge: - std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); + std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); break; case AdaptiveMerge: - boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); + boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); break; case SqrtHAdaptiveMerge: - adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() + adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); break; case SqrtAdaptiveMerge: - adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() + adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; case Sqrt2AdaptiveMerge: - adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() + adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; case QuartAdaptiveMerge: - adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() + adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() , (element_count-1)/4+1); break; case StdInplaceMerge: - boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); + boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); break; } timer.stop(); - if(order_type::num_elements == element_count){ + if(order_perf_type::num_elements == element_count){ std::printf(" Tmp Ok "); } else{ std::printf(" Tmp KO "); } nanosecond_type new_clock = timer.elapsed().wall; - //std::cout << "Cmp:" << order_type::num_compare << " Cpy:" << order_type::num_copy; //for old compilers without ll size argument - std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_type::num_compare)/element_count, double(order_type::num_copy)/element_count ); + //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument + std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); double time = double(new_clock); @@ -178,10 +178,10 @@ bool measure_all(std::size_t L, std::size_t NK) nanosecond_type back_clock; bool res = true; res = res && measure_algo(A,Keys,L,NK,StdMerge, prev_clock); - back_clock = prev_clock;/* + back_clock = prev_clock; // prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,QuartAdaptiveMerge, prev_clock);*/ + res = res && measure_algo(A,Keys,L,NK,QuartAdaptiveMerge, prev_clock); // prev_clock = back_clock; res = res && measure_algo(A,Keys,L,NK,Sqrt2AdaptiveMerge, prev_clock); @@ -211,58 +211,55 @@ int main() { try{ #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(101,1); - measure_all(101,7); - measure_all(101,31); + measure_all(101,1); + measure_all(101,7); + measure_all(101,31); #endif - measure_all(101,0); + measure_all(101,0); // #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(1101,1); - measure_all(1001,7); - measure_all(1001,31); - measure_all(1001,127); - measure_all(1001,511); + measure_all(1101,1); + measure_all(1001,7); + measure_all(1001,31); + measure_all(1001,127); + measure_all(1001,511); #endif - measure_all(1001,0); + measure_all(1001,0); // #ifndef BENCH_MERGE_SHORT #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(10001,65); - measure_all(10001,255); - measure_all(10001,1023); - measure_all(10001,4095); + measure_all(10001,65); + measure_all(10001,255); + measure_all(10001,1023); + measure_all(10001,4095); #endif - measure_all(10001,0); + measure_all(10001,0); // #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(100001,511); - measure_all(100001,2047); - measure_all(100001,8191); - measure_all(100001,32767); + measure_all(100001,511); + measure_all(100001,2047); + measure_all(100001,8191); + measure_all(100001,32767); #endif - measure_all(100001,0); + measure_all(100001,0); // #ifdef NDEBUG #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(1000001,1); - measure_all(1000001,1024); - measure_all(1000001,32768); - measure_all(1000001,524287); + measure_all(1000001,1); + measure_all(1000001,1024); + measure_all(1000001,32768); + measure_all(1000001,524287); #endif - measure_all(1000001,0); - measure_all(1500001,0); - //measure_all(10000001,0); - //measure_all(15000001,0); - //measure_all(100000001,0); + measure_all(1000001,0); + measure_all(3000001,0); #endif //NDEBUG #endif //#ifndef BENCH_MERGE_SHORT - //measure_all(100000001,0); + //measure_all(100000001,0); } catch(...) { diff --git a/test/bench_sort.cpp b/test/bench_sort.cpp index 630da4f..b67522a 100644 --- a/test/bench_sort.cpp +++ b/test/bench_sort.cpp @@ -26,15 +26,15 @@ using boost::timer::nanosecond_type; #include "order_type.hpp" //#define BOOST_MOVE_ADAPTIVE_SORT_STATS +//#define BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS void print_stats(const char *str, boost::ulong_long_type element_count) { - std::printf("%sCmp:%7.03f Cpy:%8.03f\n", str, double(order_type::num_compare)/element_count, double(order_type::num_copy)/element_count ); + std::printf("%sCmp:%7.03f Cpy:%8.03f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); } #include #include -#include #include template @@ -79,7 +79,6 @@ enum AlgoType SqrtAdaptiveSort, Sqrt2AdaptiveSort, QuartAdaptiveSort, - NoBufMergeSort, InplaceStableSort, SlowStableSort, HeapSort, @@ -93,7 +92,6 @@ const char *AlgoNames [] = { "MergeSort " , "SqrtAdaptSort " , "Sqrt2AdaptSort " , "QuartAdaptSort " - , "NoBufMergeSort " , "InplStableSort " , "SlowSort " , "HeapSort " @@ -107,63 +105,60 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count generate_elements(elements, element_count, key_reps, key_len); std::printf("%s ", AlgoNames[alg]); - order_type::num_compare=0; - order_type::num_copy=0; - order_type::num_elements = element_count; + order_perf_type::num_compare=0; + order_perf_type::num_copy=0; + order_perf_type::num_elements = element_count; cpu_timer timer; timer.resume(); switch(alg) { case MergeSort: - merge_sort_buffered(elements, element_count, order_type_less()); + merge_sort_buffered(elements, element_count, order_type_less()); break; case StableSort: - std::stable_sort(elements,elements+element_count,order_type_less()); + std::stable_sort(elements,elements+element_count,order_type_less()); break; case AdaptiveSort: - boost::movelib::adaptive_sort(elements, elements+element_count, order_type_less()); + boost::movelib::adaptive_sort(elements, elements+element_count, order_type_less()); break; case SqrtHAdaptiveSort: - adaptive_sort_buffered( elements, element_count, order_type_less() + adaptive_sort_buffered( elements, element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); break; case SqrtAdaptiveSort: - adaptive_sort_buffered( elements, element_count, order_type_less() + adaptive_sort_buffered( elements, element_count, order_type_less() , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; case Sqrt2AdaptiveSort: - adaptive_sort_buffered( elements, element_count, order_type_less() + adaptive_sort_buffered( elements, element_count, order_type_less() , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); break; case QuartAdaptiveSort: - adaptive_sort_buffered( elements, element_count, order_type_less() + adaptive_sort_buffered( elements, element_count, order_type_less() , (element_count-1)/4+1); break; - case NoBufMergeSort: - boost::movelib::bufferless_merge_sort(elements, elements+element_count, order_type_less()); - break; case InplaceStableSort: - boost::movelib::inplace_stable_sort(elements, elements+element_count, order_type_less()); + boost::movelib::inplace_stable_sort(elements, elements+element_count, order_type_less()); break; case SlowStableSort: - boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less()); + boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less()); break; case HeapSort: - std::make_heap(elements, elements+element_count, order_type_less()); - std::sort_heap(elements, elements+element_count, order_type_less()); + std::make_heap(elements, elements+element_count, order_type_less()); + std::sort_heap(elements, elements+element_count, order_type_less()); break; } timer.stop(); - if(order_type::num_elements == element_count){ + if(order_perf_type::num_elements == element_count){ std::printf(" Tmp Ok "); } else{ std::printf(" Tmp KO "); } nanosecond_type new_clock = timer.elapsed().wall; - //std::cout << "Cmp:" << order_type::num_compare << " Cpy:" << order_type::num_copy; //for old compilers without ll size argument - std::printf("Cmp:%7.03f Cpy:%8.03f", double(order_type::num_compare)/element_count, double(order_type::num_copy)/element_count ); + //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument + std::printf("Cmp:%7.03f Cpy:%8.03f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); double time = double(new_clock); @@ -186,7 +181,7 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count , units , prev_clock ? double(new_clock)/double(prev_clock): 1.0); prev_clock = new_clock; - bool res = is_order_type_ordered(elements, element_count, alg != HeapSort && alg != NoBufMergeSort); + bool res = is_order_type_ordered(elements, element_count, alg != HeapSort); return res; } @@ -229,9 +224,6 @@ bool measure_all(std::size_t L, std::size_t NK) prev_clock = back_clock; res = res && measure_algo(A,Keys,L,NK,InplaceStableSort, prev_clock); // - prev_clock = back_clock; - res = res && measure_algo(A,Keys,L,NK,NoBufMergeSort, prev_clock); - // //prev_clock = back_clock; //res = res && measure_algo(A,Keys,L,NK,SlowStableSort, prev_clock); // @@ -247,56 +239,55 @@ bool measure_all(std::size_t L, std::size_t NK) int main() { #ifndef BENCH_SORT_UNIQUE_VALUES - //measure_all(101,1); - measure_all(101,7); - measure_all(101,31); + measure_all(101,1); + measure_all(101,7); + measure_all(101,31); #endif - measure_all(101,0); + measure_all(101,0); // #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(1101,1); - measure_all(1001,7); - measure_all(1001,31); - measure_all(1001,127); - measure_all(1001,511); + measure_all(1101,1); + measure_all(1001,7); + measure_all(1001,31); + measure_all(1001,127); + measure_all(1001,511); #endif - measure_all(1001,0); + measure_all(1001,0); // #ifndef BENCH_SORT_SHORT #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(10001,65); - measure_all(10001,255); - measure_all(10001,1023); - measure_all(10001,4095); - measure_all(10001,0); + measure_all(10001,65); + measure_all(10001,255); + measure_all(10001,1023); + measure_all(10001,4095); #endif + measure_all(10001,0); // #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(100001,511); - measure_all(100001,2047); - measure_all(100001,8191); - measure_all(100001,32767); + measure_all(100001,511); + measure_all(100001,2047); + measure_all(100001,8191); + measure_all(100001,32767); #endif - measure_all(100001,0); + measure_all(100001,0); // - //#ifdef NDEBUG + #ifdef NDEBUG #ifndef BENCH_SORT_UNIQUE_VALUES - measure_all(1000001,1); - measure_all(1000001,1024); - measure_all(1000001,32768); - measure_all(1000001,524287); + measure_all(1000001,1); + measure_all(1000001,1024); + measure_all(1000001,32768); + measure_all(1000001,524287); #endif - measure_all(1000001,0); - measure_all(1500001,0); - //measure_all(10000001,0); - //#endif //NDEBUG + measure_all(1000001,0); + measure_all(1500001,0); + #endif //NDEBUG #endif //#ifndef BENCH_SORT_SHORT - //measure_all(100000001,0); + //measure_all(100000001,0); return 0; } diff --git a/test/inplace_merge_test.cpp b/test/inplace_merge_test.cpp new file mode 100644 index 0000000..b69bce0 --- /dev/null +++ b/test/inplace_merge_test.cpp @@ -0,0 +1,283 @@ +////////////////////////////////////////////////////////////////////////////// +// +// (C) Copyright Ion Gaztanaga 2016-2016. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// See http://www.boost.org/libs/move for documentation. +// +////////////////////////////////////////////////////////////////////////////// + +//#define BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS +#define BOOST_MOVE_ADAPTIVE_SORT_STATS + +#include "order_type.hpp" + +#include //std::cout +#include + +#include +#include +#include +#include + +#include +#include + +#include + +const std::size_t BlockSize = 7u; + +#if defined(BOOST_MSVC) +#pragma warning (disable : 4267) +#endif + + +const std::size_t left_merge = 0; +const std::size_t buf_merge = 1; +const std::size_t unbuf_merge= 2; +const std::size_t max_merge = 3; + +template +void alternating_test( + const std::size_t NumBlocksA, + const std::size_t NumBlocksB, + const std::size_t ExtraA, + const std::size_t ExtraB, + Op op) +{ + using namespace boost::movelib::detail_adaptive; + + + const std::size_t DataSize = ExtraA + NumBlocksA*BlockSize + NumBlocksB*BlockSize + ExtraB; + const std::size_t KeySize = NumBlocksA + NumBlocksB + 1; + const std::size_t HdrSize = BlockSize + KeySize; + const std::size_t ArraySize = HdrSize + DataSize; + + boost::movelib::unique_ptr testarray(boost::movelib::make_unique(ArraySize)); + + + for(std::size_t szt_merge = 0; szt_merge != max_merge; ++szt_merge){ + //Order keys + for (std::size_t szt_i = 0u; szt_i != KeySize; ++szt_i) { + testarray[szt_i].key = szt_i; + testarray[szt_i].val = std::size_t(-1); + } + + //Order buffer + for (std::size_t szt_i = 0u; szt_i != BlockSize; ++szt_i) { + testarray[KeySize+szt_i].key = std::size_t(-1); + testarray[KeySize+szt_i].val = szt_i; + } + + //Block A + std::size_t szt_k = 0; + for (std::size_t szt_i = 0u; szt_i != ExtraA; ++szt_i) { + testarray[HdrSize+szt_k].key = (szt_k/2)*2; + testarray[HdrSize+szt_k].val = szt_k & 1; + ++szt_k; + } + + for (std::size_t szt_b = 0u; szt_b != NumBlocksA; ++szt_b) + for (std::size_t szt_i = 0u; szt_i != BlockSize; ++szt_i) { + testarray[HdrSize+szt_k].key = (szt_k/2)*2; + testarray[HdrSize+szt_k].val = szt_k & 1; + ++szt_k; + } + + //Block B + std::size_t szt_l = 0; + for (std::size_t szt_b = 0u, szt_t = 0; szt_b != NumBlocksB; ++szt_b) + for (std::size_t szt_i = 0u; szt_i != BlockSize; ++szt_i, ++szt_t) { + testarray[HdrSize+szt_k].key = (szt_l/2)*2+1; + testarray[HdrSize+szt_k].val = szt_l & 1; + ++szt_k; + ++szt_l; + } + + for (std::size_t szt_i = 0u; szt_i != ExtraB; ++szt_i) { + testarray[HdrSize+szt_k].key = (szt_l/2)*2+1; + testarray[HdrSize+szt_k].val = szt_l & 1; + ++szt_k; + ++szt_l; + } + + if(szt_merge == left_merge){ + //Merge Left + op_merge_blocks_left + ( testarray.get(), order_type_less() + , testarray.get()+HdrSize, BlockSize, ExtraA, NumBlocksA, NumBlocksB, ExtraB + , order_type_less(), op ); + BOOST_TEST( is_order_type_ordered(testarray.get()+KeySize, DataSize) ); + BOOST_TEST( is_key(testarray.get(), KeySize) ); + BOOST_TEST(( !boost::move_detail::is_same::value + || is_buffer(testarray.get()+ KeySize+DataSize, BlockSize) )); + } + else if(szt_merge == buf_merge){ + //Merge with buf + op_merge_blocks_with_buf + ( testarray.get(), order_type_less() + , testarray.get()+HdrSize, BlockSize, ExtraA, NumBlocksA, NumBlocksB, ExtraB + , order_type_less(), op, testarray.get()+KeySize ); + BOOST_TEST( is_order_type_ordered(testarray.get()+HdrSize, DataSize) ); + BOOST_TEST( is_key(testarray.get(), KeySize) ); + BOOST_TEST(( !boost::move_detail::is_same::value + || is_buffer(testarray.get()+ KeySize, BlockSize) )); + } + else if(szt_merge == unbuf_merge){ + //Merge Left + merge_blocks_bufferless + ( testarray.get(), order_type_less() + , testarray.get()+HdrSize, BlockSize, ExtraA, NumBlocksA, NumBlocksB, ExtraB + , order_type_less()); + BOOST_TEST( is_order_type_ordered(testarray.get()+HdrSize, DataSize) ); + BOOST_TEST( is_key(testarray.get(), KeySize) ); + BOOST_TEST(( !boost::move_detail::is_same::value + || is_buffer(testarray.get()+ KeySize, BlockSize) )); + } + } +} + +int main() +{ + { + const std::size_t NumBlocksA = 3u; + const std::size_t NumBlocksB = 3u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = ExtraA; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 3u; + const std::size_t NumBlocksB = 3u; + const std::size_t ExtraA = 0u; + const std::size_t ExtraB = BlockSize/2; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 3u; + const std::size_t NumBlocksB = 3u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 3u; + const std::size_t NumBlocksB = 3u; + const std::size_t ExtraA = 0; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 6u; + const std::size_t NumBlocksB = 3u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = ExtraA; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 6u; + const std::size_t NumBlocksB = 3u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 3u; + const std::size_t NumBlocksB = 5u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = ExtraA; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 3u; + const std::size_t NumBlocksB = 5u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 0u; + const std::size_t NumBlocksB = 0u; + const std::size_t ExtraA = 0; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 0u; + const std::size_t NumBlocksB = 0u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 0u; + const std::size_t NumBlocksB = 0u; + const std::size_t ExtraA = 0; + const std::size_t ExtraB = BlockSize/2; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + // + { + const std::size_t NumBlocksA = 0u; + const std::size_t NumBlocksB = 1u; + const std::size_t ExtraA = 0; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 1u; + const std::size_t NumBlocksB = 0u; + const std::size_t ExtraA = 0; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 1u; + const std::size_t NumBlocksB = 0u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 0u; + const std::size_t NumBlocksB = 1u; + const std::size_t ExtraA = BlockSize/2; + const std::size_t ExtraB = 0; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 1u; + const std::size_t NumBlocksB = 0u; + const std::size_t ExtraA = 0; + const std::size_t ExtraB = BlockSize/2; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + { + const std::size_t NumBlocksA = 0u; + const std::size_t NumBlocksB = 1u; + const std::size_t ExtraA = 0; + const std::size_t ExtraB = BlockSize/2; + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::move_op()); + alternating_test(NumBlocksA, NumBlocksB, ExtraA, ExtraB, boost::movelib::swap_op()); + } + + return ::boost::report_errors(); +} diff --git a/test/order_type.hpp b/test/order_type.hpp index 5953cb4..350312f 100644 --- a/test/order_type.hpp +++ b/test/order_type.hpp @@ -13,28 +13,29 @@ #define BOOST_MOVE_TEST_ORDER_TYPE_HPP #include +#include #include #include -struct order_type +struct order_perf_type { public: std::size_t key; std::size_t val; - order_type() + order_perf_type() { ++num_elements; } - order_type(const order_type& other) + order_perf_type(const order_perf_type& other) : key(other.key), val(other.val) { ++num_elements; ++num_copy; } - order_type & operator=(const order_type& other) + order_perf_type & operator=(const order_perf_type& other) { ++num_copy; key = other.key; @@ -42,36 +43,81 @@ struct order_type return *this; } - ~order_type () + ~order_perf_type () { --num_elements; } + static void reset_stats() + { + num_compare=0; + num_copy=0; + } + + friend bool operator< (const order_perf_type& left, const order_perf_type& right) + { ++num_compare; return left.key < right.key; } + static boost::ulong_long_type num_compare; static boost::ulong_long_type num_copy; static boost::ulong_long_type num_elements; }; -boost::ulong_long_type order_type::num_compare = 0; -boost::ulong_long_type order_type::num_copy = 0; -boost::ulong_long_type order_type::num_elements = 0; +boost::ulong_long_type order_perf_type::num_compare = 0; +boost::ulong_long_type order_perf_type::num_copy = 0; +boost::ulong_long_type order_perf_type::num_elements = 0; + + +struct order_move_type +{ + BOOST_MOVABLE_BUT_NOT_COPYABLE(order_move_type) + + public: + std::size_t key; + std::size_t val; + + order_move_type() + : key(0u), val(0u) + {} + + order_move_type(BOOST_RV_REF(order_move_type) other) + : key(other.key), val(other.val) + { + other.key = other.val = std::size_t(-1); + } + + order_move_type & operator=(BOOST_RV_REF(order_move_type) other) + { + key = other.key; + val = other.val; + other.key = other.val = std::size_t(-2); + return *this; + } + + friend bool operator< (const order_move_type& left, const order_move_type& right) + { return left.key < right.key; } + + ~order_move_type () + { + key = val = std::size_t(-3); + } +}; -template struct order_type_less { - bool operator()(const T &a,T const &b) const - { ++order_type::num_compare; return a.key < b.key; } + template + bool operator()(const T &a, T const &b) const + { return a < b; } }; template inline bool is_order_type_ordered(T *elements, std::size_t element_count, bool stable = true) { for(std::size_t i = 1; i < element_count; ++i){ - if(order_type_less()(elements[i], elements[i-1])){ + if(order_type_less()(elements[i], elements[i-1])){ std::printf("\n Ord KO !!!!"); return false; } - if( stable && !(order_type_less()(elements[i-1], elements[i])) && (elements[i-1].val > elements[i].val) ){ + if( stable && !(order_type_less()(elements[i-1], elements[i])) && (elements[i-1].val > elements[i].val) ){ std::printf("\n Stb KO !!!! "); return false; } @@ -79,4 +125,45 @@ inline bool is_order_type_ordered(T *elements, std::size_t element_count, bool s return true; } +namespace boost { +namespace movelib { +namespace detail_adaptive { + + + +}}} + +template +inline bool is_key(T *elements, std::size_t element_count) +{ + for(std::size_t i = 1; i < element_count; ++i){ + if(elements[i].key >= element_count){ + std::printf("\n Key.key KO !!!!"); + return false; + } + if(elements[i].val != std::size_t(-1)){ + std::printf("\n Key.val KO !!!!"); + return false; + } + } + return true; +} + +template +inline bool is_buffer(T *elements, std::size_t element_count) +{ + for(std::size_t i = 1; i < element_count; ++i){ + if(elements[i].key != std::size_t(-1)){ + std::printf("\n Buf.key KO !!!!"); + return false; + } + if(elements[i].val >= element_count){ + std::printf("\n Buf.val KO !!!!"); + return false; + } + } + return true; +} + + #endif //BOOST_MOVE_TEST_ORDER_TYPE_HPP diff --git a/test/random_shuffle.hpp b/test/random_shuffle.hpp new file mode 100644 index 0000000..5041d96 --- /dev/null +++ b/test/random_shuffle.hpp @@ -0,0 +1,23 @@ +#ifndef BOOST_MOVE_TEST_RANDOM_SHUFFLE_HPP +#define BOOST_MOVE_TEST_RANDOM_SHUFFLE_HPP + + +#include +#include +#include + +template< class RandomIt > +void random_shuffle( RandomIt first, RandomIt last ) +{ + typedef typename boost::movelib::iterator_traits::difference_type difference_type; + difference_type n = last - first; + for (difference_type i = n-1; i > 0; --i) { + difference_type j = std::rand() % (i+1); + if(j != i) { + boost::adl_move_swap(first[i], first[j]); + } + } +} + + +#endif// BOOST_MOVE_TEST_RANDOM_SHUFFLE_HPP