From 833e507326e1bd5b8c9b0c263844a8342158abff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Fri, 13 May 2016 00:34:40 +0200 Subject: [PATCH] Added cache usage to selection sort when external buffer is available. Refactored merge to right using reverse iterators. --- .../move/algo/detail/adaptive_sort_merge.hpp | 888 ++++++++---------- 1 file changed, 369 insertions(+), 519 deletions(-) diff --git a/include/boost/move/algo/detail/adaptive_sort_merge.hpp b/include/boost/move/algo/detail/adaptive_sort_merge.hpp index 87828e8..3d97212 100644 --- a/include/boost/move/algo/detail/adaptive_sort_merge.hpp +++ b/include/boost/move/algo/detail/adaptive_sort_merge.hpp @@ -45,6 +45,7 @@ #define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP #include +#include #include #include #include @@ -161,6 +162,29 @@ class adaptive_xbuf m_size = size; } + void shrink_to_fit(std::size_t const size) + { + if(m_size > size){ + for(std::size_t szt_i = size; szt_i != m_size; ++szt_i){ + m_ptr[szt_i].~T(); + } + m_size = size; + } + } + + void initialize_until(std::size_t const size, T &t) + { + BOOST_ASSERT(m_size < m_capacity); + if(m_size < size){ + ::new((void*)&m_ptr[m_size]) T(::boost::move(t)); + ++m_size; + for(; m_size != size; ++m_size){ + ::new((void*)&m_ptr[m_size]) T(::boost::move(m_ptr[m_size-1])); + } + t = ::boost::move(m_ptr[m_size-1]); + } + } + template bool supports_aligned_trailing(std::size_t size, std::size_t trail_count) const { @@ -210,11 +234,7 @@ class adaptive_xbuf void clear() { - std::size_t size = m_size; - while(size--){ - m_ptr[size].~T(); - } - m_size = 0u; + this->shrink_to_fit(0u); } private: @@ -288,41 +308,75 @@ class range_xbuf Iterator const m_cap; }; -template -bool three_way_init( RandIt first1, RandIt last1, Buf &buf - , typename Buf::iterator &buf_first, typename Buf::iterator &buf_last, move_op) + +template +RandIt skip_until_merge + ( RandIt first1, RandIt const last1 + , const typename iterator_traits::value_type &next_key, Compare comp) { - buf.move_assign(first1, last1-first1); - buf_first = buf.data(); - buf_last = buf.end(); - return true; + while(first1 != last1 && !comp(next_key, *first1)){ + ++first1; + } + return first1; } -template -bool three_way_init( RandIt first, RandIt last, Buf &buf - , typename Buf::iterator &buf_first, typename Buf::iterator &buf_last, swap_op) +template +OutputIt op_partial_merge + (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op) { - typedef typename iterator_traits::size_type size_type; - buf.clear(); - buf_first = buf.data(); - buf_last = buf_first + size_type(last-first); - return false; + InputIt1 first1(r_first1); + InputIt2 first2(r_first2); + if(first2 != last2 && last1 != first1) + while(1){ + if(comp(*first2, *first1)) { + op(first2++, d_first++); + if(first2 == last2){ + break; + } + } + else{ + op(first1++, d_first++); + if(first1 == last1){ + break; + } + } + } + r_first1 = first1; + r_first2 = first2; + return d_first; } -template -void three_way_move(T &a, T &b, Buf &buf, move_op) +template +RandItB op_buffered_partial_merge_to_left_placed + ( RandIt1 first1, RandIt1 const last1 + , RandIt2 &rfirst2, RandIt2 const last2 + , RandItB &rfirstb, Compare comp, Op op ) { - buf.add(&b); - b = boost::move(a); -} + RandItB firstb = rfirstb; + RandItB lastb = firstb; + RandIt2 first2 = rfirst2; -template -void three_way_move(T &a, T &b, Buf &buf, swap_op) -{ - T tmp(boost::move(*buf.end())); - buf.add(&b); - b = boost::move(a); - a = boost::move(tmp); + //Move to buffer while merging + //Three way moves need less moves when op is swap_op so use it + //when merging elements from range2 to the destination occupied by range1 + if(first1 != last1 && first2 != last2){ + op(three_way_t(), first2++, first1++, lastb++); + + while(true){ + if(first1 == last1){ + break; + } + if(first2 == last2){ + lastb = op(forward_t(), first1, last1, firstb); + break; + } + op(three_way_t(), comp(*first2, *firstb) ? first2++ : firstb++, first1++, lastb++); + } + } + + rfirst2 = first2; + rfirstb = firstb; + return lastb; } /////////////////////////////////////////////////////////////////////////////// @@ -331,7 +385,6 @@ void three_way_move(T &a, T &b, Buf &buf, swap_op) // /////////////////////////////////////////////////////////////////////////////// - template RandIt op_partial_merge_with_buf_impl ( RandIt first1, RandIt const last1, RandIt first2, RandIt last2 @@ -340,7 +393,6 @@ RandIt op_partial_merge_with_buf_impl ) { typedef typename Buf::iterator buf_iterator; - typedef typename iterator_traits::value_type value_type; BOOST_ASSERT(first1 != last1); BOOST_ASSERT(first2 != last2); @@ -349,40 +401,21 @@ RandIt op_partial_merge_with_buf_impl if(buf_first1 == buf_last1){ //Skip any element that does not need to be moved - while(!comp(*last1, *first1)){ - ++first1; - if(first1 == last1){ - return first1; - } - } - - //If initialization is successful, move to buffer while merging - //Three way moves need less moves when op is swap_op so use it - //when merging elements from range2 to the destination occupied by range1 - if(!three_way_init(first1, last1, buf, buf_first1, buf_last1, op)){ - three_way_move(*first2, *first1, buf, op); - for(++first1, ++first2; first1 != last1; ++first1){ - value_type &v = comp(*first2, *buf_first1) ? *first2++ : *buf_first1++; - three_way_move(v, *first1, buf, op); - } + first1 = skip_until_merge(first1, last1, *last1, comp); + if(first1 == last1){ + return first1; } + buf_first1 = buf.data(); + buf_last1 = op_buffered_partial_merge_to_left_placed(first1, last1, first2, last2, buf_first1, comp, op); + BOOST_ASSERT(buf_last1 == (buf.data() + (last1-first1))); + first1 = last1; + } + else{ + BOOST_ASSERT((last1-first1) == (buf_last1 - buf_first1)); } //Now merge from buffer - if(first2 != last2) - while(1){ - if(comp(*first2, *buf_first1)) { - op(first2++, first1++); - if(first2 == last2) - break; - } - else{ - op(buf_first1++, first1++); - if(buf_first1 == buf_last1) - break; - } - } - + first1 = op_partial_merge(buf_first1, buf_last1, first2, last2, first1, comp, op); buf_first1_in_out = buf_first1; buf_last1_in_out = buf_last1; return first1; @@ -429,72 +462,53 @@ void op_merge_blocks_with_buf , Op op , Buf & xbuf) { - if(n_bef_irreg2 == 0){ - RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); - op_buffered_merge(first, last_reg, last_reg+l_irreg2, comp, op, xbuf); - } - else { - typedef typename Buf::iterator buf_iterator; - buf_iterator buffer = xbuf.data(); - buf_iterator buffer_end = buffer; - RandIt first1 = first; - RandIt last1 = l_irreg1 ? first1 + l_irreg1 : first1 + l_block; - RandIt first2 = last1; - RandItKeys const key_end (key_first+n_bef_irreg2); + typedef typename Buf::iterator buf_iterator; + buf_iterator buffer = xbuf.data(); + buf_iterator buffer_end = buffer; + RandIt first1 = first; + RandIt last1 = first1 + l_irreg1; + RandItKeys const key_end (key_first+n_bef_irreg2); - for( bool is_range1_A = l_irreg1 ? true : key_comp(*key_first, midkey), skip_first_it = l_irreg1 != 0 - ; key_first != key_end; ){ - if(!skip_first_it){ - ++key_first; - } - skip_first_it = false; - bool const last_it = key_first == key_end; - //If the trailing block is empty, we'll make it equal to the previous if empty - bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); + bool is_range1_A = true; //first l_irreg1 elements are always from range A - if(is_range1_A == is_range2_A){ - if(buffer != buffer_end){ - first1 = op(forward_t(), buffer, buffer_end, first1); - BOOST_ASSERT(first1 == first2); - buffer_end = buffer; - } - first1 = first2; - if(last_it){ - xbuf.clear(); - last1 = first2+l_block*n_aft_irreg2; - op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); - break; - } - else{ - last1 = first2 + l_block; - } - first2 += l_block; - } - else { - BOOST_ASSERT(!last_it || (l_irreg2 || n_aft_irreg2)); - if(last_it){ - RandIt res = op(forward_t(), buffer, buffer_end, first1); - BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; - last1 += l_block*n_aft_irreg2; - xbuf.clear(); - op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); - break; - } - else{ - RandIt const last2 = first2 + l_block; - first1 = op_partial_merge_with_buf(first1, last1, first2, last2, xbuf, buffer, buffer_end, comp, op, is_range1_A); - if(buffer == buffer_end){ - is_range1_A = is_range2_A; - } - last1 = last2; - first2 = last1; - BOOST_ASSERT((buffer == buffer_end) || (buffer_end-buffer) == (last1-first1)); - } - } + for( ; key_first != key_end; ++key_first, last1 += l_block){ + //If the trailing block is empty, we'll make it equal to the previous if empty + bool const is_range2_A = key_comp(*key_first, midkey); + + if(is_range1_A == is_range2_A){ + //If buffered, put those elements in place + RandIt res = op(forward_t(), buffer, buffer_end, first1); + BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; + buffer_end = buffer; + first1 = last1; + } + else { + first1 = op_partial_merge_with_buf(first1, last1, last1, last1 + l_block, xbuf, buffer, buffer_end, comp, op, is_range1_A); + BOOST_ASSERT(buffer == buffer_end || (buffer_end-buffer) == (last1+l_block-first1)); + is_range1_A ^= buffer == buffer_end; } } + + //Now the trailing irregular block, first put buffered elements in place + RandIt res = op(forward_t(), buffer, buffer_end, first1); + BOOST_ASSERT(buffer == buffer_end || res == last1); (void)res; + + BOOST_ASSERT(l_irreg2 || n_aft_irreg2); + if(l_irreg2){ + bool const is_range2_A = false; //last l_irreg2 elements always from range B + if(is_range1_A == is_range2_A){ + first1 = last1; + last1 = last1+l_block*n_aft_irreg2; + } + else { + last1 += l_block*n_aft_irreg2; + } + xbuf.clear(); + op_buffered_merge(first1, last1, last1+l_irreg2, comp, op, xbuf); + } } + template void merge_blocks_with_buf ( RandItKeys key_first @@ -532,9 +546,8 @@ RandIt op_partial_merge_left_middle_buffer_impl , const typename iterator_traits::value_type &next_key, Compare comp , Op op) { - while(first1 != last1 && !comp(next_key, *first1)){ - ++first1; - } + first1 = skip_until_merge(first1, last1, next_key, comp); + //Even if we copy backward, no overlapping occurs so use forward copy //that can be faster specially with trivial types RandIt const new_first1 = first2 - (last1 - first1); @@ -552,72 +565,6 @@ RandIt op_partial_merge_left_middle_buffer : op_partial_merge_left_middle_buffer_impl(first1, last1, first2, next_key, antistable(comp), op); } -// Partially merges two ordered ranges. Partially means that elements are merged -// until one of two ranges is exhausted (M elements from ranges 1 y 2). -// [buf_first, ...) -> buffer that can be overwritten -// [first1, last1) merge [last1,last2) -> [buf_first, buf_first+M) -// Note: distance(buf_first, first1) >= distance(last1, last2), so no overlapping occurs. -template -RandIt op_partial_merge_left_impl - ( RandIt buf_first, RandIt first1, RandIt const last1, RandIt const last2, Compare comp, Op op) -{ - RandIt first2 = last1; - while(first1 != last1){ - if(first2 == last2){ - return first1; - } - if(comp(*first2, *first1)) { - op(first2, buf_first); - ++first2; - } - else{ - op(first1, buf_first); - ++first1; - } - ++buf_first; - } - return first2; -} - - -template -RandIt op_partial_merge_left - ( RandIt buf_first, RandIt first1, RandIt const last1, RandIt const last2, Compare comp, Op op, bool is_stable) -{ - return is_stable ? op_partial_merge_left_impl(buf_first, first1, last1, last2, comp, op) - : op_partial_merge_left_impl(buf_first, first1, last1, last2, antistable(comp), op); -} - -template -bool three_way_side_init( RandIt first1, RandIt last1, RandIt buf_first1, move_op) -{ - boost::move(first1, last1, buf_first1); - return true; -} - -template -bool three_way_side_init( RandIt, RandIt, RandIt, swap_op) -{ - return false; -} - -template -void three_way_side_move(T &a, T &b, T&c, move_op) -{ - c = boost::move(b); - b = boost::move(a); -} - -template -void three_way_side_move(T &a, T &b, T &c, swap_op) -{ - T tmp(boost::move(c)); - c = boost::move(b); - b = boost::move(a); - a = boost::move(tmp); -} - - // Partially merges two ordered ranges. Partially means that elements are merged // until one of two ranges is exhausted (M elements from ranges 1 y 2). // [buf_first, ...) -> buffer that can be overwritten @@ -627,39 +574,17 @@ template RandIt op_partial_merge_left_smart_impl ( RandIt first1, RandIt last1, RandIt first2, RandIt const last2, Compare comp, Op op) { - typedef typename iterator_traits::value_type value_type; - RandIt dest; if(last1 != first2){ + BOOST_ASSERT(0 != (last1-first1)); BOOST_ASSERT((first2-last1)==(last2-first2)); //Skip any element that does not need to be moved - while(!comp(*first2, *first1)){ - ++first1; - if(first1 == last1) - return first2; - } - + first1 = skip_until_merge(first1, last1, *first2, comp); + if(first1 == last1) + return first2; RandIt buf_first1 = first2 - (last1-first1); - - //If initialization is successful, move to buffer while merging - //Three way moves need less moves when op is swap_op so use it - //when merging elements from range2 to the destination occupied by range1 - if(!three_way_side_init(first1, last1, buf_first1, op)){ - RandIt buf_last1 = buf_first1; - three_way_side_move(*first2, *first1, *buf_last1++, op); - - RandIt const orig_first2 = first2;(void)(orig_first2); - for(++first1, ++first2; first1 != last1; ++first1, ++buf_last1){ - value_type &v = comp(*first2, *buf_first1) ? *first2++ : *buf_first1++; - three_way_side_move(v, *first1, *buf_last1, op); - } - BOOST_ASSERT(buf_last1 == orig_first2); - last1 = buf_last1; - } - else{ - last1 = first2; - } - dest = first1; + dest = last1; + last1 = op_buffered_partial_merge_to_left_placed(first1, last1, first2, last2, buf_first1, comp, op); first1 = buf_first1; BOOST_ASSERT((first1-dest) == (last2-first2)); } @@ -667,27 +592,10 @@ RandIt op_partial_merge_left_smart_impl dest = first1-(last2-first2); } - BOOST_ASSERT(0 != (last1-first1)); - if(first2 != last2) - while(1){ - if(comp(*first2, *first1)) { - op(first2++, dest++); - if(first2 == last2){ - return first1; - } - } - else{ - op(first1++, dest++); - if(first1 == last1){ - return first2; - } - } - } - return first1; + op_partial_merge(first1, last1, first2, last2, dest, comp, op); + return first1 == last1 ? first2 : first1; } - - template RandIt op_partial_merge_left_smart (RandIt first1, RandIt const last1, RandIt first2, RandIt const last2, Compare comp, Op op, bool is_stable) @@ -696,7 +604,6 @@ RandIt op_partial_merge_left_smart : op_partial_merge_left_smart_impl(first1, last1, first2, last2, antistable(comp), op); } - // first - first element to merge. // first[-l_block, 0) - buffer // l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded @@ -717,209 +624,58 @@ void op_merge_blocks_left , typename iterator_traits::size_type const l_irreg2 , Compare comp, Op op) { - if(n_bef_irreg2 == 0){ - RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block); - op_merge_left(first-l_block, first, last_reg, last_reg+l_irreg2, comp, op); - } - else { - RandIt buffer = first - l_block; - RandIt first1 = first; - RandIt last1 = l_irreg1 ? first1 + l_irreg1 : first1 + l_block; - RandIt first2 = last1; - RandItKeys const key_end (key_first+n_bef_irreg2); - bool skip_first_it = l_irreg1 != 0; - for( bool is_range1_A = l_irreg1 ? true : key_comp(*key_first, midkey) - ; key_first != key_end; first2 += l_block){ - if(!skip_first_it){ - ++key_first; - } - skip_first_it = false; - bool const last_it = key_first == key_end; - //If the trailing block is empty, we'll make it equal to the previous if empty - bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey); - bool const is_buffer_middle = last1 == buffer; - - if(is_range1_A == is_range2_A){ - //If range1 is buffered, write it to its final position - if(!is_buffer_middle){ - buffer = op(forward_t(), first1, last1, buffer); - } - - first1 = first2; - if(last_it){ - last1 = first2+l_block*n_aft_irreg2; - op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); - break; - } - else{ - last1 = first2 + l_block; - } - } - else { - BOOST_ASSERT(!last_it || (l_irreg2 || n_aft_irreg2)); - if(last_it){ - if(is_buffer_middle){ - //See comment below marked with (*) - first1 = op_partial_merge_left_middle_buffer(first1, last1, first2, first2[l_block*n_aft_irreg2], comp, op, is_range1_A); - last1 = first2; - buffer = first1 - l_block; - } - last1 += l_block*n_aft_irreg2; - op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); - break; - } - else{ - RandIt const last2 = first2 + l_block; - first1 = op_partial_merge_left_smart(first1, last1, first2, last2, comp, op, is_range1_A); - - if(first1 < first2){ //is_buffer_middle == true for the next iteration - last1 = first2; - buffer = last1; - } - else{ //is_buffer_middle == false for the next iteration - is_range1_A = is_range2_A; - buffer = first1 - l_block; - last1 = last2; - } - } - } - } - } -} - -/////////////////////////////////////////////////////////////////////////////// -// -// PARTIAL MERGE RIGHT -// -/////////////////////////////////////////////////////////////////////////////// - - -template -RandIt op_partial_merge_right_middle_buffer_impl - ( RandIt const last1, RandIt const first2, RandIt last2, Compare comp, Op op) -{ - while(first2 != last2 && !comp(last2[-1], last1[-1])){ - --last2; - } - return op(forward_t(), first2, last2, last1); -} - -template -RandIt op_partial_merge_right_middle_buffer - ( RandIt const last1, RandIt first2, RandIt const last2 - , Compare comp, Op op, bool is_stable) -{ - return is_stable ? op_partial_merge_right_middle_buffer_impl(last1, first2, last2, comp, op) - : op_partial_merge_right_middle_buffer_impl(last1, first2, last2, antistable(comp), op); -} - -// Partially merges two ordered ranges. Partially means that elements are merged -// until one of two ranges is exhausted (M elements from ranges 1 y 2). -// [last2, buf_last) -> buffer that can be overwritten -// [first1, last1) merge [last1,last2) -> [buf_last - M, buf_last) -// Note: distance(last2, buf_last) >= distance(first1, last1), so no overlapping occurs. -template -RandIt op_partial_merge_right_impl - ( RandIt const first1, RandIt last1, RandIt last2, RandIt buf_last, Compare comp, Op op) -{ - RandIt const first2 = last1; - while(first2 != last2){ - if(last1 == first1){ - return last2; - } - --last2; - --last1; - --buf_last; - if(comp(*last2, *last1)){ - op(last1, buf_last); - ++last2; - } - else{ - op(last2, buf_last); - ++last1; - } - } - return last1; -} - -template -RandIt op_partial_merge_right - ( RandIt first1, RandIt const last1, RandIt const last2, RandIt buf_last, Compare comp, Op op, bool is_stable) -{ - return is_stable ? op_partial_merge_right_impl(first1, last1, last2, buf_last, comp, op) - : op_partial_merge_right_impl(first1, last1, last2, buf_last, antistable(comp), op); -} - - -// first - first element to merge. -// last iterator is (first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2) -// [last, last+l_block) - buffer -// l_block - length of regular blocks. Blocks are stable sorted by 1st elements and key-coded -// key_first - sequence of keys, in same order as blocks. key -void op_merge_blocks_right - ( RandItKeys const key_first - , const typename iterator_traits::value_type &midkey - , KeyCompare key_comp - , RandIt const first - , typename iterator_traits::size_type const l_block - , typename iterator_traits::size_type const n_bef_irreg2 - , typename iterator_traits::size_type const n_aft_irreg2 - , typename iterator_traits::size_type const l_irreg2 - , Compare comp, Op op) -{ - RandIt last1 = first + (n_bef_irreg2+n_aft_irreg2)*l_block; - RandIt first1 = last1-l_block; + RandIt buffer = first - l_block; + RandIt first1 = first; + RandIt last1 = first1 + l_irreg1; RandIt first2 = last1; - RandIt last2 = first2 + l_irreg2; - RandIt buffer_end = last2 + l_block; - RandItKeys key_end (key_first+(n_bef_irreg2+n_aft_irreg2)); - - for(bool is_range2_A = false; key_first != key_end; last1 = first1, first1 -= l_block){ - --key_end; - bool const is_range1_A = key_comp(*key_end, midkey); - bool const is_buffer_middle = first2 == buffer_end; + RandItKeys const key_end (key_first+n_bef_irreg2); + bool is_range1_A = true; + for( ; key_first != key_end; first2 += l_block, ++key_first){ + //If the trailing block is empty, we'll make it equal to the previous if empty + bool const is_range2_A = key_comp(*key_first, midkey); if(is_range1_A == is_range2_A){ - if(!is_buffer_middle){ - buffer_end = op(backward_t(), first2, last2, buffer_end); + if(last1 != buffer){ //equiv. to if(!is_buffer_middle) + buffer = op(forward_t(), first1, last1, buffer); } - //else //op forward already done on previous op_partial_merge_right - - first2 = first1; - last2 = last1; + first1 = first2; + last1 = first2 + l_block; } else { - if(is_buffer_middle){ - //A previous op_partial_merge_right has right range2 elements after the buffer. - //In order to merge it with the next block, move them to the start of the buffer so that - //buffer is placed to the right. Move only the minimum elements as some range1 elements - //won't be moved in the merge. - last2 = op_partial_merge_right_middle_buffer(last1, first2, last2, comp, op, !is_range2_A); - first2 = last1; - buffer_end = last2 + l_block; - } + RandIt const last2 = first2 + l_block; + first1 = op_partial_merge_left_smart(first1, last1, first2, last2, comp, op, is_range1_A); - //op_partial_merge_right merges two ranges, but stops moving elements - //once one range is emptied to avoid moving data twice in the next iteration - last2 = op_partial_merge_right(first1, last1, last2, buffer_end, comp, op, !is_range2_A); - if(last2 > first2){ //is_buffer_middle == true for the next iteration - buffer_end = first2; + if(first1 < first2){ //is_buffer_middle for the next iteration + last1 = first2; + buffer = last1; } - else{ //is_buffer_middle == false for the next iteration - is_range2_A = is_range1_A; - buffer_end = last2 + l_block; - first2 = first1; + else{ //!is_buffer_middle for the next iteration + is_range1_A = is_range2_A; + buffer = first1 - l_block; + last1 = last2; } } } - if(first2 != buffer_end){ - op(backward_t(), first2, last2, buffer_end); + //Now the trailing irregular block + bool const is_range2_A = false; //Trailing l_irreg2 is always from Range B + bool const is_buffer_middle = last1 == buffer; + + if(!l_irreg2 || is_range1_A == is_range2_A){ //trailing is always B type + //If range1 is buffered, write it to its final position + if(!is_buffer_middle){ + buffer = op(forward_t(), first1, last1, buffer); + } + first1 = first2; } + else { + if(is_buffer_middle){ + first1 = op_partial_merge_left_middle_buffer(first1, last1, first2, first2[l_block*n_aft_irreg2], comp, op, is_range1_A); + buffer = first1 - l_block; + } + } + last1 = first2 + l_block*n_aft_irreg2; + op_merge_left(buffer, first1, last1, last1+l_irreg2, comp, op); } /////////////////////////////////////////////////////////////////////////////// @@ -1200,6 +956,18 @@ struct less // /////////////////////////////////////////////////////////////////////////////// +//#define ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN + +#if defined ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN +template +void slow_stable_sort + ( RandIt const first, RandIt const last, Compare comp) +{ + boost::movelib::inplace_stable_sort(first, last, comp); +} + +#else //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN + template void slow_stable_sort ( RandIt const first, RandIt const last, Compare comp) @@ -1232,6 +1000,8 @@ void slow_stable_sort } } +#endif //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN + //Returns new l_block and updates use_buf template Unsigned lblock_for_combine @@ -1271,7 +1041,7 @@ Unsigned lblock_for_combine //Although "cycle" sort is known to have the minimum number of writes to target //selection sort is more appropriate here as we want to minimize swaps. -template +template void selection_sort_blocks ( RandItKeys keys , typename iterator_traits::size_type &midkey_idx //inout @@ -1280,7 +1050,9 @@ void selection_sort_blocks , typename iterator_traits::size_type const l_block , typename iterator_traits::size_type const n_blocks , Compare comp - , bool use_first_element) + , bool use_first_element + , XBuf & xbuf +) { typedef typename iterator_traits::size_type size_type ; size_type const back_midkey_idx = midkey_idx; @@ -1294,6 +1066,10 @@ void selection_sort_blocks //One-past the position of the first untouched element of the second half size_type high_watermark = back_midkey_idx+1; BOOST_ASSERT(high_watermark <= n_blocks); + const bool b_cache_on = xbuf.capacity() >= l_block; + //const bool b_cache_on = false; + const size_type cached_none = size_type(-1); + size_type cached_block = cached_none; //Sort by first element if left merging, last element otherwise size_type const reg_off = use_first_element ? 0u: l_block-1; @@ -1303,35 +1079,89 @@ void selection_sort_blocks //Since we are searching for the minimum value in two sorted halves: //Optimization 1: If block belongs to first half, don't waste time comparing elements of the first half. //Optimization 2: It is enough to compare until the first untouched element of the second half. + //Optimization 3: If cache memory is available, instead of swapping blocks (3 writes per element), + // play with the cache to aproximate it to 2 writes per element. high_watermark = size_type(max_value(block+2, high_watermark)); BOOST_ASSERT(high_watermark <= n_blocks); for(size_type next_block = size_type(max_value(block+1, back_midkey_idx)); next_block < high_watermark; ++next_block){ - const value_type &v = first_block[next_block*l_block+reg_off]; - const value_type &min = first_block[min_block*l_block+reg_off]; - if( comp(v, min) || (!comp(min, v) && key_comp(keys[next_block], keys[min_block])) ){ - min_block=next_block; + const value_type &min_v = (b_cache_on && (cached_block == min_block) ? xbuf.data()[reg_off] : first_block[min_block*l_block+reg_off]); + const value_type &v = (b_cache_on && (cached_block == next_block) ? xbuf.data()[reg_off] : first_block[next_block*l_block+reg_off]); + + if( comp(v, min_v) || (!comp(min_v, v) && key_comp(keys[next_block], keys[min_block])) ){ + min_block = next_block; } } if(min_block != block){ BOOST_ASSERT(block >= back_midkey_idx || min_block >= back_midkey_idx); BOOST_ASSERT(min_block < high_watermark); - //Update high watermark if n_blocks is not surpassed + //Increase high watermark if not the maximum and min_block is just before the high watermark high_watermark += size_type((min_block + 1) != n_blocks && (min_block + 1) == high_watermark); BOOST_ASSERT(high_watermark <= n_blocks); - boost::adl_move_swap_ranges(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); + if(!b_cache_on){ + boost::adl_move_swap_ranges(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); + } + else if(cached_block == cached_none){ + //Cache the biggest block and put the minimum into its final position + xbuf.move_assign(first_block+block*l_block, l_block); + boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); + cached_block = min_block; + } + else if(cached_block == block){ + //Since block is cached and is not the minimum, just put the minimum directly into its final position and update the cache index + boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); + cached_block = min_block; + } + else if(cached_block == min_block){ + //Since the minimum is cached, move the block to the back position and flush the cache to its final position + boost::move(first_block+block*l_block, first_block+(block+1)*l_block, first_block+min_block*l_block); + boost::move(xbuf.data(), xbuf.end(), first_block+block*l_block); + cached_block = cached_none; + } + else{ + //Cached block is not any of two blocks to be exchanged, a smarter operation must be performed + BOOST_ASSERT(cached_block != min_block); + BOOST_ASSERT(cached_block != block); + BOOST_ASSERT(cached_block > block); + BOOST_ASSERT(cached_block < high_watermark); + //Instead of moving block to the slot of the minimum (which is typical selection sort), before copying + //data from the minimum slot to its final position: + // -> move it to free slot pointed by cached index, and + // -> move cached index into slot of the minimum. + //Since both cached_block and min_block belong to the still unordered range of blocks, the change + //does not break selection sort and saves one copy. + boost::move(first_block+block*l_block, first_block+(block+1)*l_block, first_block+cached_block*l_block); + boost::move(first_block+min_block*l_block, first_block+(min_block+1)*l_block, first_block+block*l_block); + //Note that this trick requires an additionl fix for keys and midkey index + boost::adl_move_swap(keys[cached_block], keys[min_block]); + if(midkey_idx == cached_block) + midkey_idx = min_block; + else if(midkey_idx == min_block) + midkey_idx = cached_block; + boost::adl_move_swap(cached_block, min_block); + } + //Once min_block and block are exchanged, fix the movement imitation key buffer and midkey index. boost::adl_move_swap(keys[block], keys[min_block]); if(midkey_idx == block) midkey_idx = min_block; else if(midkey_idx == min_block) midkey_idx = block; } + else if(b_cache_on && cached_block == block){ + //The selected block was the minimum, but since it was cached, move it to its final position + boost::move(xbuf.data(), xbuf.end(), first_block+block*l_block); + cached_block = cached_none; + } + } //main for loop + + if(b_cache_on && cached_block != cached_none){ + //The sort has ended with cached data, move it to its final position + boost::move(xbuf.data(), xbuf.end(), first_block+cached_block*l_block); } } -template -void stable_sort( RandIt first, RandIt last, Compare comp - , adaptive_xbuf::value_type> & xbuf) +template +void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf) { typedef typename iterator_traits::size_type size_type; size_type const len = size_type(last - first); @@ -1344,10 +1174,10 @@ void stable_sort( RandIt first, RandIt last, Compare comp } } -template +template void initialize_keys( RandIt first, RandIt last , Comp comp - , adaptive_xbuf::value_type> & xbuf) + , XBuf & xbuf) { stable_sort(first, last, comp, xbuf); } @@ -1365,7 +1195,7 @@ void initialize_keys( RandIt first, RandIt last } } -template +template void combine_params ( RandItKeys const keys , KeyCompare key_comp @@ -1373,7 +1203,7 @@ void combine_params , typename iterator_traits::size_type l_combined , typename iterator_traits::size_type const l_prev_merged , typename iterator_traits::size_type const l_block - , adaptive_xbuf::value_type> & xbuf + , XBuf & xbuf , Compare comp //Output , typename iterator_traits::size_type &midkey_idx @@ -1381,24 +1211,36 @@ void combine_params , typename iterator_traits::size_type &n_bef_irreg2 , typename iterator_traits::size_type &n_aft_irreg2 , typename iterator_traits::size_type &l_irreg2 - , bool is_merge_left) + //Options + , bool is_merge_left_or_bufferless + , bool do_initialize_keys = true) { typedef typename iterator_traits::size_type size_type; typedef typename iterator_traits::value_type value_type; + //Initial parameters for selection sort blocks l_irreg1 = l_prev_merged%l_block; l_irreg2 = (l_combined-l_irreg1)%l_block; BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0); size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block; midkey_idx = l_prev_merged/l_block; BOOST_ASSERT(n_reg_block>=midkey_idx); - initialize_keys(keys, keys+n_reg_block+(midkey_idx==n_reg_block), key_comp, xbuf); - selection_sort_blocks(keys, midkey_idx, key_comp, first+l_irreg1, l_block, n_reg_block, comp, is_merge_left); + //Key initialization + if (do_initialize_keys) { + initialize_keys(keys, keys+n_reg_block+(midkey_idx==n_reg_block), key_comp, xbuf); + } + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A initkey: ", l_combined + l_block); + + //Selection sort blocks + selection_sort_blocks(keys, midkey_idx, key_comp, first+l_irreg1, l_block, n_reg_block, comp, is_merge_left_or_bufferless, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A selsort: ", l_combined + l_block); + + //Special case for the last elements n_aft_irreg2 = 0; - if(l_irreg2!=0){ - size_type const reg_off = is_merge_left ? 0u: l_block-1; - size_type const irreg_off = is_merge_left ? 0u: l_irreg2-1; + if(l_irreg2 != 0){ + size_type const reg_off = is_merge_left_or_bufferless ? 0u: l_block-1; + size_type const irreg_off = is_merge_left_or_bufferless ? 0u: l_irreg2-1; RandIt prev_block_first = first + l_combined - l_irreg2; const value_type &incomplete_block_first = prev_block_first[irreg_off]; while(n_aft_irreg2 != n_reg_block && @@ -1461,14 +1303,17 @@ void merge_blocks_right , Compare comp , bool const xbuf_used) { - if(xbuf_used){ - op_merge_blocks_right - (key_first, midkey, key_comp, first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, move_op()); - } - else{ - op_merge_blocks_right - (key_first, midkey, key_comp, first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, swap_op()); - } + merge_blocks_left + ( make_reverse_iterator(key_first+n_aft_irreg2 + n_bef_irreg2) + , midkey + , negate(key_comp) + , make_reverse_iterator(first+(n_bef_irreg2+n_aft_irreg2)*l_block+l_irreg2) + , l_block + , l_irreg2 + , n_aft_irreg2 + n_bef_irreg2 + , 0 + , 0 + , inverse(comp), xbuf_used); } @@ -1530,8 +1375,8 @@ Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merg // Blocks of length l_prev_merged combined. We'll combine them in pairs // l_prev_merged and n_keys are powers of 2. (2*l_prev_merged/l_block) keys are guaranteed // Returns the number of combined elements (some trailing elements might be left uncombined) -template -void combine_blocks +template +void adaptive_sort_combine_blocks ( RandItKeys const keys , KeyCompare key_comp , RandIt const first @@ -1540,16 +1385,17 @@ void combine_blocks , typename iterator_traits::size_type const l_block , bool const use_buf , bool const xbuf_used - , adaptive_xbuf::value_type> & xbuf + , XBuf & xbuf , Compare comp , bool merge_left) { + (void)xbuf; typedef typename iterator_traits::size_type size_type; - size_type const l_combined = 2*l_prev_merged; + size_type const l_reg_combined = 2*l_prev_merged; size_type l_irreg_combined = 0; size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); - size_type const n_reg_combined = len/l_combined; + size_type const n_reg_combined = len/l_reg_combined; RandIt combined_first = first; (void)l_total_combined; @@ -1558,32 +1404,37 @@ void combine_blocks size_type n_bef_irreg2, n_aft_irreg2, midkey_idx, l_irreg1, l_irreg2; size_type const max_i = n_reg_combined + (l_irreg_combined != 0); - if(merge_left || !use_buf) - for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_combined) { + if(merge_left || !use_buf) { + for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) { bool const is_last = combined_i==n_reg_combined; - combine_params( keys, key_comp, combined_first, is_last ? l_irreg_combined : l_combined - , l_prev_merged, l_block, xbuf, comp + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + + range_xbuf rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); + combine_params( keys, key_comp, combined_first, l_cur_combined + , l_prev_merged, l_block, rbuf, comp , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs - //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_params: ", len + l_block); - BOOST_ASSERT(!l_irreg1); - if(use_buf){ - merge_blocks_left - (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); - } - else{ + //Now merge blocks + if(!use_buf){ merge_blocks_bufferless (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); } + else{ + merge_blocks_left + (keys, keys[midkey_idx], key_comp, combined_first, l_block, 0u, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); + } //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_l: ", len + l_block); } + } else{ - combined_first += l_combined*(max_i-1); - for( size_type combined_i = max_i; combined_i--; combined_first -= l_combined) { + combined_first += l_reg_combined*(max_i-1); + for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) { bool const is_last = combined_i==n_reg_combined; - combine_params( keys, key_comp, combined_first, is_last ? l_irreg_combined : l_combined - , l_prev_merged, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, false); //Outputs - BOOST_ASSERT(!l_irreg1); + size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; + RandIt const combined_last(combined_first+l_cur_combined); + range_xbuf rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); + combine_params( keys, key_comp, combined_first, l_cur_combined + , l_prev_merged, l_block, rbuf, comp + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, false); //Outputs //BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_params: ", len + l_block); merge_blocks_right (keys, keys[midkey_idx], key_comp, combined_first, l_block, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used); @@ -1741,7 +1592,7 @@ void op_merge_right_step // until all is merged or auxiliary memory is not large enough. template typename iterator_traits::size_type - build_blocks + adaptive_sort_build_blocks ( RandIt const first , typename iterator_traits::size_type const len , typename iterator_traits::size_type const l_base @@ -1830,7 +1681,7 @@ typename iterator_traits::size_type //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is //[buffer,buffer+l_intbuf) template -bool combine_all_blocks +bool adaptive_sort_combine_all_blocks ( RandIt keys , typename iterator_traits::size_type &n_keys , RandIt const buffer @@ -1847,14 +1698,7 @@ bool combine_all_blocks //Backup data to external buffer once if possible bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); if(common_xbuf){ - if(n_keys){ - xbuf.move_assign(buffer, l_intbuf); - } - else{ - xbuf.clear(); - merge_sort_uninitialized_copy(buffer, first, xbuf.data(), comp); - xbuf.set_size(l_intbuf); - } + xbuf.move_assign(buffer, l_intbuf); } bool prev_merge_left = true; @@ -1877,8 +1721,7 @@ bool combine_all_blocks bool const is_merge_left = (n&1) == 0; size_type const l_total_combined = calculate_total_combined(l_data, l_merged); - - if(prev_use_internal_buf && prev_merge_left){ + if(n && prev_use_internal_buf && prev_merge_left){ if(is_merge_left || !use_internal_buf){ move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); } @@ -1900,13 +1743,13 @@ bool combine_all_blocks //Combine to form l_merged*2 segments if(n_keys){ - combine_blocks + adaptive_sort_combine_blocks ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); } else{ size_type *const uint_keys = xbuf.template aligned_trailing(); - combine_blocks + adaptive_sort_combine_blocks ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); } @@ -1918,6 +1761,7 @@ bool combine_all_blocks } BOOST_ASSERT(l_prev_total_combined == l_data); bool const buffer_right = prev_use_internal_buf && prev_merge_left; + l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; n_keys = l_unique - l_intbuf; //Restore data from to external common buffer if used @@ -1962,7 +1806,7 @@ void adaptive_sort_final_merge( bool buffer_right , adaptive_xbuf::value_type> & xbuf , Compare comp) { - BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); + //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); xbuf.clear(); typedef typename iterator_traits::size_type size_type; @@ -1990,7 +1834,7 @@ void adaptive_sort_final_merge( bool buffer_right } template -bool build_params +bool adaptive_sort_build_params (RandIt first, Unsigned const len, Compare comp , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf , adaptive_xbuf & xbuf @@ -2094,20 +1938,21 @@ inline void adaptive_merge_combine_blocks( RandIt first size_type const l_combine = len-collected; size_type const l_combine1 = len1-collected; size_type n_bef_irreg2, n_aft_irreg2, l_irreg1, l_irreg2, midkey_idx; + if(n_keys){ RandIt const first_data = first+collected; RandIt const keys = first; combine_params( keys, comp, first_data, l_combine , l_combine1, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true, false); //Outputs BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); if(xbuf_used){ + BOOST_ASSERT(xbuf.size() >= l_block); merge_blocks_with_buf (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, xbuf_used); BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); } else if(use_internal_buf){ - #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF range_xbuf rbuf(first_data-l_block, first_data); merge_blocks_with_buf @@ -2122,20 +1967,24 @@ inline void adaptive_merge_combine_blocks( RandIt first else{ merge_blocks_bufferless (keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg bfl: ", len); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len); } } else{ - xbuf.clear(); + xbuf.shrink_to_fit(l_block); + if(xbuf.size() < l_block){ + xbuf.initialize_until(l_block, *first); + } size_type *const uint_keys = xbuf.template aligned_trailing(l_block); combine_params( uint_keys, less(), first, l_combine , l_combine1, l_block, xbuf, comp - , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs + , midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true, true); //Outputs BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len); + BOOST_ASSERT(xbuf.size() >= l_block); merge_blocks_with_buf (uint_keys, uint_keys[midkey_idx], less(), first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, true); xbuf.clear(); - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len); } } @@ -2164,39 +2013,40 @@ inline void adaptive_merge_final_merge( RandIt first if(n_keys){ stable_sort(first, first+n_keys, comp, xbuf); stable_merge(first, first+n_keys, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); } } else{ #ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF xbuf.clear(); stable_sort(first, first+collected, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: ", len); stable_merge(first, first+collected, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: ", len); #else xbuf.clear(); stable_sort(first+len-l_block, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf srt: ", len); RandIt const pos1 = lower_bound(first+n_keys, first+len-l_block, first[len-1], comp); RandIt const pos2 = rotate_gcd(pos1, first+len-l_block, first+len); stable_merge(first+n_keys, pos1, pos2, antistable(comp), xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len); if(n_keys){ stable_sort(first, first+n_keys, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key srt: ", len); stable_merge(first, first+n_keys, first+len, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); } #endif } - - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len); } else{ - stable_sort(first, first+collected, comp, xbuf); xbuf.clear(); - if(xbuf.capacity() >= collected){ - buffered_merge(first, first+collected, first+len1+len2, comp, xbuf); - } - else{ - merge_bufferless(first, first+collected, first+len1+len2, comp); - } + stable_sort(first, first+collected, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: ", len); + stable_merge(first, first+collected, first+len1+len2, comp, xbuf); + BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: ", len); } - BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len); } template @@ -2207,12 +2057,12 @@ inline SizeType adaptive_merge_n_keys_intbuf(SizeType l_block, SizeType len, Xbu //This is the minimum number of keys to implement the ideal algorithm //ceil(len/l_block) - 1 (as the first block is used as buffer) - size_type n_keys = l_block; + size_type n_keys = len/l_block+1; while(n_keys >= (len-l_intbuf-n_keys)/l_block){ --n_keys; } ++n_keys; - BOOST_ASSERT(n_keys < l_block); + //BOOST_ASSERT(n_keys < l_block); if(xbuf.template supports_aligned_trailing(l_block, n_keys)){ n_keys = 0u; @@ -2316,7 +2166,7 @@ void adaptive_sort_impl //Calculate and extract needed unique elements. If a minimum is not achieved //fallback to rotation-based merge - if(!build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ + if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ stable_sort(first, first+len, comp, xbuf); return; } @@ -2328,12 +2178,12 @@ void adaptive_sort_impl BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); //Classic merge sort until internal buffer and xbuf are exhausted - size_type const l_merged = build_blocks + size_type const l_merged = adaptive_sort_build_blocks (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp); BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After build_blocks: ", len); //Non-trivial merge - bool const buffer_right = combine_all_blocks + bool const buffer_right = adaptive_sort_combine_all_blocks (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); //Sort keys and buffer and merge the whole sequence