mirror of
https://github.com/boostorg/move.git
synced 2025-07-31 12:57:14 +02:00
Refactored and documented the sort and merge algorithm
This commit is contained in:
@ -37,8 +37,8 @@
|
||||
// elements twice.
|
||||
//
|
||||
// The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts
|
||||
// from the sorting algorithm and implementing a block merge algorithm
|
||||
// without moving elements left or right, which is used when external memory
|
||||
// from the sorting algorithm and implementing an additional block merge algorithm
|
||||
// without moving elements to left or right, which is used when external memory
|
||||
// is available.
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
#ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
|
||||
@ -371,7 +371,7 @@ RandIt op_partial_merge_with_buf_impl
|
||||
//Now merge from buffer
|
||||
if(first2 != last2)
|
||||
while(1){
|
||||
if(comp(*first2, *buf_first1)) {
|
||||
if(comp(*first2, *buf_first1)) {
|
||||
op(first2++, first1++);
|
||||
if(first2 == last2)
|
||||
break;
|
||||
@ -450,7 +450,7 @@ void op_merge_blocks_with_buf
|
||||
skip_first_it = false;
|
||||
bool const last_it = key_first == key_end;
|
||||
//If the trailing block is empty, we'll make it equal to the previous if empty
|
||||
bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey);
|
||||
bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey);
|
||||
|
||||
if(is_range1_A == is_range2_A){
|
||||
if(buffer != buffer_end){
|
||||
@ -565,8 +565,8 @@ RandIt op_partial_merge_left_impl
|
||||
while(first1 != last1){
|
||||
if(first2 == last2){
|
||||
return first1;
|
||||
}
|
||||
if(comp(*first2, *first1)) {
|
||||
}
|
||||
if(comp(*first2, *first1)) {
|
||||
op(first2, buf_first);
|
||||
++first2;
|
||||
}
|
||||
@ -670,11 +670,11 @@ RandIt op_partial_merge_left_smart_impl
|
||||
BOOST_ASSERT(0 != (last1-first1));
|
||||
if(first2 != last2)
|
||||
while(1){
|
||||
if(comp(*first2, *first1)) {
|
||||
if(comp(*first2, *first1)) {
|
||||
op(first2++, dest++);
|
||||
if(first2 == last2){
|
||||
return first1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
op(first1++, dest++);
|
||||
@ -719,7 +719,7 @@ void op_merge_blocks_left
|
||||
{
|
||||
if(n_bef_irreg2 == 0){
|
||||
RandIt const last_reg(first+l_irreg1+n_aft_irreg2*l_block);
|
||||
op_merge_left(first-l_block, first, last_reg, last_reg+l_irreg2, comp, op);
|
||||
op_merge_left(first-l_block, first, last_reg, last_reg+l_irreg2, comp, op);
|
||||
}
|
||||
else {
|
||||
RandIt buffer = first - l_block;
|
||||
@ -736,8 +736,8 @@ void op_merge_blocks_left
|
||||
skip_first_it = false;
|
||||
bool const last_it = key_first == key_end;
|
||||
//If the trailing block is empty, we'll make it equal to the previous if empty
|
||||
bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey);
|
||||
bool const is_buffer_middle = last1 == buffer;
|
||||
bool const is_range2_A = last_it ? (!l_irreg2 && is_range1_A) : key_comp(*key_first, midkey);
|
||||
bool const is_buffer_middle = last1 == buffer;
|
||||
|
||||
if(is_range1_A == is_range2_A){
|
||||
//If range1 is buffered, write it to its final position
|
||||
@ -824,7 +824,7 @@ RandIt op_partial_merge_right_impl
|
||||
{
|
||||
RandIt const first2 = last1;
|
||||
while(first2 != last2){
|
||||
if(last1 == first1){
|
||||
if(last1 == first1){
|
||||
return last2;
|
||||
}
|
||||
--last2;
|
||||
@ -881,7 +881,7 @@ void op_merge_blocks_right
|
||||
for(bool is_range2_A = false; key_first != key_end; last1 = first1, first1 -= l_block){
|
||||
--key_end;
|
||||
bool const is_range1_A = key_comp(*key_end, midkey);
|
||||
bool const is_buffer_middle = first2 == buffer_end;
|
||||
bool const is_buffer_middle = first2 == buffer_end;
|
||||
|
||||
if(is_range1_A == is_range2_A){
|
||||
if(!is_buffer_middle){
|
||||
@ -937,17 +937,17 @@ RandIt partial_merge_bufferless_impl
|
||||
return first1;
|
||||
}
|
||||
bool const is_range1_A = *pis_range1_A;
|
||||
if(first1 != last1 && comp(*last1, last1[-1])){
|
||||
if(first1 != last1 && comp(*last1, last1[-1])){
|
||||
do{
|
||||
RandIt const old_last1 = last1;
|
||||
last1 = lower_bound(last1, last2, *first1, comp);
|
||||
last1 = lower_bound(last1, last2, *first1, comp);
|
||||
first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported
|
||||
if(last1 == last2){
|
||||
return first1;
|
||||
}
|
||||
do{
|
||||
++first1;
|
||||
} while(last1 != first1 && !comp(*last1, *first1) );
|
||||
} while(last1 != first1 && !comp(*last1, *first1) );
|
||||
} while(first1 != last1);
|
||||
}
|
||||
*pis_range1_A = !is_range1_A;
|
||||
@ -993,7 +993,7 @@ void merge_blocks_bufferless
|
||||
bool is_range1_A = l_irreg1 ? true : key_comp(*key_first++, midkey);
|
||||
|
||||
for( ; key_first != key_end; ++key_first){
|
||||
bool is_range2_A = key_comp(*key_first, midkey);
|
||||
bool is_range2_A = key_comp(*key_first, midkey);
|
||||
if(is_range1_A == is_range2_A){
|
||||
first1 = last1;
|
||||
}
|
||||
@ -1077,9 +1077,9 @@ typename iterator_traits<RandIt>::size_type
|
||||
if(xbuf.capacity() >= max_collected){
|
||||
value_type *const ph0 = xbuf.add(first);
|
||||
while(u != last && h < max_collected){
|
||||
value_type * const r = lower_bound(ph0, xbuf.end(), *u, comp);
|
||||
value_type * const r = lower_bound(ph0, xbuf.end(), *u, comp);
|
||||
//If key not found add it to [h, h+h0)
|
||||
if(r == xbuf.end() || comp(*u, *r) ){
|
||||
if(r == xbuf.end() || comp(*u, *r) ){
|
||||
RandIt const new_h0 = boost::move(search_end, u, h0);
|
||||
search_end = u;
|
||||
++search_end;
|
||||
@ -1094,9 +1094,9 @@ typename iterator_traits<RandIt>::size_type
|
||||
}
|
||||
else{
|
||||
while(u != last && h < max_collected){
|
||||
RandIt const r = lower_bound(h0, search_end, *u, comp);
|
||||
RandIt const r = lower_bound(h0, search_end, *u, comp);
|
||||
//If key not found add it to [h, h+h0)
|
||||
if(r == search_end || comp(*u, *r) ){
|
||||
if(r == search_end || comp(*u, *r) ){
|
||||
RandIt const new_h0 = rotate_gcd(h0, search_end, u);
|
||||
search_end = u;
|
||||
++search_end;
|
||||
@ -1222,11 +1222,11 @@ void slow_stable_sort
|
||||
if(do_merge){
|
||||
size_type const h_2 = 2*h;
|
||||
while((L-p0) > h_2){
|
||||
merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp);
|
||||
merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp);
|
||||
p0 += h_2;
|
||||
}
|
||||
}
|
||||
if((L-p0) > h){
|
||||
if((L-p0) > h){
|
||||
merge_bufferless(first+p0, first+p0+h, last, comp);
|
||||
}
|
||||
}
|
||||
@ -1401,7 +1401,7 @@ void combine_params
|
||||
size_type const irreg_off = is_merge_left ? 0u: l_irreg2-1;
|
||||
RandIt prev_block_first = first + l_combined - l_irreg2;
|
||||
const value_type &incomplete_block_first = prev_block_first[irreg_off];
|
||||
while(n_aft_irreg2 != n_reg_block &&
|
||||
while(n_aft_irreg2 != n_reg_block &&
|
||||
comp(incomplete_block_first, (prev_block_first-= l_block)[reg_off]) ){
|
||||
++n_aft_irreg2;
|
||||
}
|
||||
@ -1709,12 +1709,12 @@ void op_merge_right_step
|
||||
if(restk <= l_build_buf){
|
||||
op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf);
|
||||
}
|
||||
else{
|
||||
else{
|
||||
op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op);
|
||||
}
|
||||
while(p>0){
|
||||
p -= 2*l_build_buf;
|
||||
op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op);
|
||||
op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1954,13 +1954,13 @@ void stable_merge
|
||||
|
||||
|
||||
template<class RandIt, class Compare>
|
||||
void final_merge( bool buffer_right
|
||||
, RandIt const first
|
||||
, typename iterator_traits<RandIt>::size_type const l_intbuf
|
||||
, typename iterator_traits<RandIt>::size_type const n_keys
|
||||
, typename iterator_traits<RandIt>::size_type const len
|
||||
, adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf
|
||||
, Compare comp)
|
||||
void adaptive_sort_final_merge( bool buffer_right
|
||||
, RandIt const first
|
||||
, typename iterator_traits<RandIt>::size_type const l_intbuf
|
||||
, typename iterator_traits<RandIt>::size_type const n_keys
|
||||
, typename iterator_traits<RandIt>::size_type const len
|
||||
, adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf
|
||||
, Compare comp)
|
||||
{
|
||||
BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
|
||||
xbuf.clear();
|
||||
@ -2009,7 +2009,7 @@ bool build_params
|
||||
//segments of size l_build_buf*2, maximizing the classic merge phase.
|
||||
l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
|
||||
|
||||
//This is the minimum number of case to implement the ideal algorithm
|
||||
//This is the minimum number of keys to implement the ideal algorithm
|
||||
//
|
||||
//l_intbuf is used as buffer plus the key count
|
||||
size_type n_min_ideal_keys = l_intbuf-1u;
|
||||
@ -2030,10 +2030,10 @@ bool build_params
|
||||
//
|
||||
//If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
|
||||
//(to be used for keys in combine_all_blocks) as the whole l_build_buf
|
||||
//we'll be backuped in the buffer during build_blocks.
|
||||
//will be backuped in the buffer during build_blocks.
|
||||
bool const non_unique_buf = xbuf.capacity() >= 2*l_intbuf;
|
||||
size_type const to_collect = non_unique_buf ? l_intbuf : l_intbuf*2;
|
||||
size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
|
||||
size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
|
||||
|
||||
//If available memory is 2*sqrt(l), then for "build_params"
|
||||
//the situation is the same as if 2*l_intbuf were collected.
|
||||
@ -2044,7 +2044,7 @@ bool build_params
|
||||
//is possible (due to very low unique keys), then go to a slow sort based on rotations.
|
||||
if(collected < (n_min_ideal_keys+l_intbuf)){
|
||||
if(collected < 4){ //No combination possible with less that 4 keys
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
n_keys = l_intbuf;
|
||||
while(n_keys&(n_keys-1)){
|
||||
@ -2053,6 +2053,7 @@ bool build_params
|
||||
while(n_keys > collected){
|
||||
n_keys/=2;
|
||||
}
|
||||
//AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
|
||||
l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
|
||||
l_intbuf = 0;
|
||||
l_build_buf = n_keys;
|
||||
@ -2072,6 +2073,218 @@ bool build_params
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#define BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF
|
||||
|
||||
template<class RandIt, class Compare>
|
||||
inline void adaptive_merge_combine_blocks( RandIt first
|
||||
, typename iterator_traits<RandIt>::size_type len1
|
||||
, typename iterator_traits<RandIt>::size_type len2
|
||||
, typename iterator_traits<RandIt>::size_type collected
|
||||
, typename iterator_traits<RandIt>::size_type n_keys
|
||||
, typename iterator_traits<RandIt>::size_type l_block
|
||||
, bool use_internal_buf
|
||||
, bool xbuf_used
|
||||
, Compare comp
|
||||
, adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf
|
||||
)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
size_type const len = len1+len2;
|
||||
size_type const l_combine = len-collected;
|
||||
size_type const l_combine1 = len1-collected;
|
||||
size_type n_bef_irreg2, n_aft_irreg2, l_irreg1, l_irreg2, midkey_idx;
|
||||
if(n_keys){
|
||||
RandIt const first_data = first+collected;
|
||||
RandIt const keys = first;
|
||||
combine_params( keys, comp, first_data, l_combine
|
||||
, l_combine1, l_block, xbuf, comp
|
||||
, midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len);
|
||||
if(xbuf_used){
|
||||
merge_blocks_with_buf
|
||||
(keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, xbuf_used);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len);
|
||||
}
|
||||
else if(use_internal_buf){
|
||||
|
||||
#ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF
|
||||
range_xbuf<RandIt, swap_op> rbuf(first_data-l_block, first_data);
|
||||
merge_blocks_with_buf
|
||||
(keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, rbuf, xbuf_used);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len);
|
||||
#else
|
||||
merge_blocks_left
|
||||
(keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len);
|
||||
#endif
|
||||
}
|
||||
else{
|
||||
merge_blocks_bufferless
|
||||
(keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg bfl: ", len);
|
||||
}
|
||||
}
|
||||
else{
|
||||
xbuf.clear();
|
||||
size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
|
||||
combine_params( uint_keys, less(), first, l_combine
|
||||
, l_combine1, l_block, xbuf, comp
|
||||
, midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len);
|
||||
merge_blocks_with_buf
|
||||
(uint_keys, uint_keys[midkey_idx], less(), first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, true);
|
||||
xbuf.clear();
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class RandIt, class Compare>
|
||||
inline void adaptive_merge_final_merge( RandIt first
|
||||
, typename iterator_traits<RandIt>::size_type len1
|
||||
, typename iterator_traits<RandIt>::size_type len2
|
||||
, typename iterator_traits<RandIt>::size_type collected
|
||||
, typename iterator_traits<RandIt>::size_type l_intbuf
|
||||
, typename iterator_traits<RandIt>::size_type l_block
|
||||
, bool use_internal_buf
|
||||
, bool xbuf_used
|
||||
, Compare comp
|
||||
, adaptive_xbuf<typename iterator_traits<RandIt>::value_type> & xbuf
|
||||
)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
(void)l_block;
|
||||
size_type n_keys = collected-l_intbuf;
|
||||
size_type len = len1+len2;
|
||||
if(use_internal_buf){
|
||||
if(xbuf_used){
|
||||
xbuf.clear();
|
||||
//Nothing to do
|
||||
if(n_keys){
|
||||
stable_sort(first, first+n_keys, comp, xbuf);
|
||||
stable_merge(first, first+n_keys, first+len, comp, xbuf);
|
||||
}
|
||||
}
|
||||
else{
|
||||
#ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF
|
||||
xbuf.clear();
|
||||
stable_sort(first, first+collected, comp, xbuf);
|
||||
stable_merge(first, first+collected, first+len, comp, xbuf);
|
||||
#else
|
||||
xbuf.clear();
|
||||
stable_sort(first+len-l_block, first+len, comp, xbuf);
|
||||
RandIt const pos1 = lower_bound(first+n_keys, first+len-l_block, first[len-1], comp);
|
||||
RandIt const pos2 = rotate_gcd(pos1, first+len-l_block, first+len);
|
||||
stable_merge(first+n_keys, pos1, pos2, antistable<Compare>(comp), xbuf);
|
||||
if(n_keys){
|
||||
stable_sort(first, first+n_keys, comp, xbuf);
|
||||
stable_merge(first, first+n_keys, first+len, comp, xbuf);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len);
|
||||
}
|
||||
else{
|
||||
stable_sort(first, first+collected, comp, xbuf);
|
||||
xbuf.clear();
|
||||
if(xbuf.capacity() >= collected){
|
||||
buffered_merge(first, first+collected, first+len1+len2, comp, xbuf);
|
||||
}
|
||||
else{
|
||||
merge_bufferless(first, first+collected, first+len1+len2, comp);
|
||||
}
|
||||
}
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len);
|
||||
}
|
||||
|
||||
template<class SizeType, class Xbuf>
|
||||
inline SizeType adaptive_merge_n_keys_intbuf(SizeType l_block, SizeType len, Xbuf & xbuf, SizeType &l_intbuf_inout)
|
||||
{
|
||||
typedef SizeType size_type;
|
||||
size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
|
||||
|
||||
//This is the minimum number of keys to implement the ideal algorithm
|
||||
//ceil(len/l_block) - 1 (as the first block is used as buffer)
|
||||
size_type n_keys = l_block;
|
||||
while(n_keys >= (len-l_intbuf-n_keys)/l_block){
|
||||
--n_keys;
|
||||
}
|
||||
++n_keys;
|
||||
BOOST_ASSERT(n_keys < l_block);
|
||||
|
||||
if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){
|
||||
n_keys = 0u;
|
||||
}
|
||||
l_intbuf_inout = l_intbuf;
|
||||
return n_keys;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Main explanation of the sort algorithm.
|
||||
//
|
||||
// csqrtlen = ceil(sqrt(len));
|
||||
//
|
||||
// * First, 2*csqrtlen unique elements elements are extracted from elements to be
|
||||
// sorted and placed in the beginning of the range.
|
||||
//
|
||||
// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
|
||||
// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
|
||||
// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
|
||||
// 2*csqrtlen unique elements are again the leading elements of the whole range.
|
||||
//
|
||||
// * Step "combine_blocks": pairs of previously formed blocks are merged with a different
|
||||
// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
|
||||
// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
|
||||
// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
|
||||
//
|
||||
// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
|
||||
// know if elements belong to the first or second block to be merged and another
|
||||
// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
|
||||
//
|
||||
// Iteratively until all trailing (len-2*csqrtlen) elements are merged:
|
||||
// Iteratively for each pair of previously merged block:
|
||||
// * Blocks are divided groups of csqrtlen elements and
|
||||
// 2*merged_block/csqrtlen keys are sorted to be used as markers
|
||||
// * Groups are selection-sorted by first or last element (depending wheter they
|
||||
// merged to left or right) and keys are reordered accordingly as an imitation-buffer.
|
||||
// * Elements of each block pair is merged using the csqrtlen buffer taking into account
|
||||
// if they belong to the first half or second half (marked by the key).
|
||||
//
|
||||
// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
|
||||
// rotations with the rest of sorted elements in the "combine_blocks" step.
|
||||
//
|
||||
// Corner cases:
|
||||
//
|
||||
// * If no 2*csqrtlen elements can be extracted:
|
||||
//
|
||||
// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
|
||||
// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
|
||||
// means that an additional "combine_blocks" step will be needed to merge all elements.
|
||||
//
|
||||
// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
|
||||
// then reduces the number of elements used as buffer and keys in the "build_blocks"
|
||||
// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
|
||||
// then uses a rotation based smart merge.
|
||||
//
|
||||
// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
|
||||
//
|
||||
// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
|
||||
//
|
||||
// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
|
||||
// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
|
||||
// keys to combine blocks.
|
||||
//
|
||||
// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
|
||||
// using classic merge.
|
||||
template<class RandIt, class Compare>
|
||||
void adaptive_sort_impl
|
||||
( RandIt first
|
||||
@ -2093,7 +2306,7 @@ void adaptive_sort_impl
|
||||
return;
|
||||
}
|
||||
|
||||
//Make sure it is at least two
|
||||
//Make sure it is at least four
|
||||
BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
|
||||
|
||||
size_type l_base = 0;
|
||||
@ -2101,12 +2314,14 @@ void adaptive_sort_impl
|
||||
size_type n_keys = 0;
|
||||
size_type l_build_buf = 0;
|
||||
|
||||
//Calculate and extract needed unique elements. If a minimum is not achieved
|
||||
//fallback to rotation-based merge
|
||||
if(!build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
|
||||
stable_sort(first, first+len, comp, xbuf);
|
||||
return;
|
||||
}
|
||||
|
||||
//Otherwise, continue in adaptive_sort
|
||||
//Otherwise, continue the adaptive_sort
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n After collect_unique: ", len);
|
||||
size_type const n_key_plus_buf = l_intbuf+n_keys;
|
||||
//l_build_buf is always power of two if l_intbuf is zero
|
||||
@ -2122,9 +2337,51 @@ void adaptive_sort_impl
|
||||
(first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
|
||||
|
||||
//Sort keys and buffer and merge the whole sequence
|
||||
final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
|
||||
adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
|
||||
}
|
||||
|
||||
// Main explanation of the merge algorithm.
|
||||
//
|
||||
// csqrtlen = ceil(sqrt(len));
|
||||
//
|
||||
// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
|
||||
// unique elements are extracted from elements to be sorted and placed in the beginning of the range.
|
||||
//
|
||||
// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
|
||||
// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
|
||||
//
|
||||
// Explanation of the "combine_blocks" step:
|
||||
//
|
||||
// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
|
||||
// Remaining elements that can't form a group are grouped in the front of those elements.
|
||||
// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
|
||||
// Remaining elements that can't form a group are grouped in the back of those elements.
|
||||
// * Groups are selection-sorted by first or last element (depending wheter they
|
||||
// merged to left or right) and keys are reordered accordingly as an imitation-buffer.
|
||||
// * Elements of each block pair is merged using the csqrtlen buffer taking into account
|
||||
// if they belong to the first half or second half (marked by the key).
|
||||
//
|
||||
// * In the final merge step leading "to_collect" elements are merged with rotations
|
||||
// with the rest of merged elements in the "combine_blocks" step.
|
||||
//
|
||||
// Corner cases:
|
||||
//
|
||||
// * If no "to_collect" elements can be extracted:
|
||||
//
|
||||
// * If more than a minimum number of elements is extracted
|
||||
// then reduces the number of elements used as buffer and keys in the
|
||||
// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
|
||||
// then uses a rotation based smart merge.
|
||||
//
|
||||
// * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
|
||||
//
|
||||
// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
|
||||
//
|
||||
// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
|
||||
//
|
||||
// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
|
||||
// then no csqrtlen need to be extracted and "combine_blocks" will use integral
|
||||
// keys to combine blocks.
|
||||
template<class RandIt, class Compare>
|
||||
void adaptive_merge_impl
|
||||
( RandIt first
|
||||
@ -2144,134 +2401,43 @@ void adaptive_merge_impl
|
||||
//Calculate ideal parameters and try to collect needed unique keys
|
||||
size_type l_block = size_type(ceil_sqrt(len));
|
||||
|
||||
//One range is not big enough to extract keys and the internal buffer so a
|
||||
//rotation-based based merge will do just fine
|
||||
if(len1 <= l_block*2 || len2 <= l_block*2){
|
||||
merge_bufferless(first, first+len1, first+len1+len2, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
|
||||
|
||||
//This is the minimum number of case to implement the ideal algorithm
|
||||
//ceil(len/l_block) - 1 (as the first block is used as buffer)
|
||||
size_type n_keys = l_block;
|
||||
while(n_keys >= (len-l_intbuf-n_keys)/l_block){
|
||||
--n_keys;
|
||||
}
|
||||
++n_keys;
|
||||
BOOST_ASSERT(n_keys < l_block);
|
||||
|
||||
if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){
|
||||
n_keys = 0u;
|
||||
}
|
||||
|
||||
//Detail the number of keys and internal buffer. If xbuf has enough memory, no
|
||||
//internal buffer is needed so l_intbuf will remain 0.
|
||||
size_type l_intbuf = 0;
|
||||
size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len, xbuf, l_intbuf);
|
||||
size_type const to_collect = l_intbuf+n_keys;
|
||||
size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf);
|
||||
|
||||
//Try to extract needed unique values from the first range
|
||||
size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n A collect: ", len);
|
||||
|
||||
//Not the minimum number of keys is not available on the first range, so fallback to rotations
|
||||
if(collected != to_collect && collected < 4){
|
||||
merge_bufferless(first, first+len1, first+len1+len2, comp);
|
||||
return;
|
||||
}
|
||||
else{
|
||||
bool use_internal_buf = true;
|
||||
if (collected != to_collect){
|
||||
l_intbuf = 0u;
|
||||
n_keys = collected;
|
||||
use_internal_buf = false;
|
||||
l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
|
||||
l_intbuf = use_internal_buf ? l_block : 0u;
|
||||
}
|
||||
|
||||
bool xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
|
||||
size_type const l_combine = len-collected;
|
||||
size_type const l_combine1 = len1-collected;
|
||||
|
||||
size_type n_bef_irreg2, n_aft_irreg2, l_irreg1, l_irreg2, midkey_idx;
|
||||
if(n_keys){
|
||||
RandIt const first_data = first+collected;
|
||||
RandIt const keys = first;
|
||||
combine_params( keys, comp, first_data, l_combine
|
||||
, l_combine1, l_block, xbuf, comp
|
||||
, midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len);
|
||||
if(xbuf_used){
|
||||
merge_blocks_with_buf
|
||||
(keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, xbuf_used);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len);
|
||||
}
|
||||
else if(use_internal_buf){
|
||||
#define BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF
|
||||
#ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF
|
||||
range_xbuf<RandIt, swap_op> rbuf(first_data-l_block, first_data);
|
||||
merge_blocks_with_buf
|
||||
(keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, rbuf, xbuf_used);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len);
|
||||
#else
|
||||
merge_blocks_left
|
||||
(keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf_used);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len);
|
||||
#endif
|
||||
}
|
||||
else{
|
||||
merge_blocks_bufferless
|
||||
(keys, keys[midkey_idx], comp, first_data, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg bfl: ", len);
|
||||
}
|
||||
}
|
||||
else{
|
||||
xbuf.clear();
|
||||
size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
|
||||
combine_params( uint_keys, less(), first, l_combine
|
||||
, l_combine1, l_block, xbuf, comp
|
||||
, midkey_idx, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, true); //Outputs
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len);
|
||||
merge_blocks_with_buf
|
||||
(uint_keys, uint_keys[midkey_idx], less(), first, l_block, l_irreg1, n_bef_irreg2, n_aft_irreg2, l_irreg2, comp, xbuf, true);
|
||||
xbuf.clear();
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg lft: ", len);
|
||||
}
|
||||
|
||||
n_keys = collected-l_intbuf;
|
||||
if(use_internal_buf){
|
||||
if(xbuf_used){
|
||||
xbuf.clear();
|
||||
//Nothing to do
|
||||
if(n_keys){
|
||||
stable_sort(first, first+n_keys, comp, xbuf);
|
||||
stable_merge(first, first+n_keys, first+len, comp, xbuf);
|
||||
}
|
||||
}
|
||||
else{
|
||||
#ifdef BOOST_MOVE_ADAPTIVE_MERGE_WITH_BUF
|
||||
xbuf.clear();
|
||||
stable_sort(first, first+collected, comp, xbuf);
|
||||
stable_merge(first, first+collected, first+len, comp, xbuf);
|
||||
#else
|
||||
xbuf.clear();
|
||||
stable_sort(first+len-l_block, first+len, comp, xbuf);
|
||||
RandIt const pos1 = lower_bound(first+n_keys, first+len-l_block, first[len-1], comp);
|
||||
RandIt const pos2 = rotate_gcd(pos1, first+len-l_block, first+len);
|
||||
stable_merge(first+n_keys, pos1, pos2, antistable<Compare>(comp), xbuf);
|
||||
if(n_keys){
|
||||
stable_sort(first, first+n_keys, comp, xbuf);
|
||||
stable_merge(first, first+n_keys, first+len, comp, xbuf);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A buf mrg: ", len);
|
||||
}
|
||||
else{
|
||||
stable_sort(first, first+collected, comp, xbuf);
|
||||
xbuf.clear();
|
||||
if(xbuf.capacity() >= collected){
|
||||
buffered_merge(first, first+collected, first+len1+len2, comp, xbuf);
|
||||
}
|
||||
else{
|
||||
merge_bufferless(first, first+collected, first+len1+len2, comp);
|
||||
}
|
||||
}
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len);
|
||||
//If not enough keys but more than minimum, adjust the internal buffer and key count
|
||||
bool use_internal_buf = collected == to_collect;
|
||||
if (!use_internal_buf){
|
||||
l_intbuf = 0u;
|
||||
n_keys = collected;
|
||||
l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
|
||||
//If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
|
||||
l_intbuf = use_internal_buf ? l_block : 0u;
|
||||
}
|
||||
|
||||
bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
|
||||
//Merge trailing elements using smart merges
|
||||
adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
|
||||
//Merge buffer and keys with the rest of the values
|
||||
adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user