forked from boostorg/move
- Add heap_sort and pdqsort to the benchmark.
- Refactor adaptive_sort and adaptive_merge.
This commit is contained in:
@ -261,6 +261,12 @@ BidirIt2 adl_move_swap_ranges_backward(BidirIt1 first1, BidirIt1 last1, BidirIt2
|
||||
return last2;
|
||||
}
|
||||
|
||||
template<class ForwardIt1, class ForwardIt2>
|
||||
void adl_move_iter_swap(ForwardIt1 a, ForwardIt2 b)
|
||||
{
|
||||
boost::adl_move_swap(*a, *b);
|
||||
}
|
||||
|
||||
} //namespace boost{
|
||||
|
||||
#endif //#ifndef BOOST_MOVE_ADL_MOVE_SWAP_HPP
|
||||
|
@ -18,6 +18,259 @@
|
||||
namespace boost {
|
||||
namespace movelib {
|
||||
|
||||
///@cond
|
||||
namespace detail_adaptive {
|
||||
|
||||
template<class RandIt, class Compare, class XBuf>
|
||||
inline void adaptive_merge_combine_blocks( RandIt first
|
||||
, typename iterator_traits<RandIt>::size_type len1
|
||||
, typename iterator_traits<RandIt>::size_type len2
|
||||
, typename iterator_traits<RandIt>::size_type collected
|
||||
, typename iterator_traits<RandIt>::size_type n_keys
|
||||
, typename iterator_traits<RandIt>::size_type l_block
|
||||
, bool use_internal_buf
|
||||
, bool xbuf_used
|
||||
, Compare comp
|
||||
, XBuf & xbuf
|
||||
)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
size_type const len = len1+len2;
|
||||
size_type const l_combine = len-collected;
|
||||
size_type const l_combine1 = len1-collected;
|
||||
|
||||
if(n_keys){
|
||||
RandIt const first_data = first+collected;
|
||||
RandIt const keys = first;
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
|
||||
if(xbuf_used){
|
||||
if(xbuf.size() < l_block){
|
||||
xbuf.initialize_until(l_block, *first);
|
||||
}
|
||||
BOOST_ASSERT(xbuf.size() >= l_block);
|
||||
size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
|
||||
combine_params( keys, comp, l_combine
|
||||
, l_combine1, l_block, xbuf
|
||||
, n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
|
||||
op_merge_blocks_with_buf
|
||||
(keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len);
|
||||
}
|
||||
else{
|
||||
size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
|
||||
combine_params( keys, comp, l_combine
|
||||
, l_combine1, l_block, xbuf
|
||||
, n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
|
||||
if(use_internal_buf){
|
||||
op_merge_blocks_with_buf
|
||||
(keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len);
|
||||
}
|
||||
else{
|
||||
merge_blocks_bufferless
|
||||
(keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len);
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
xbuf.shrink_to_fit(l_block);
|
||||
if(xbuf.size() < l_block){
|
||||
xbuf.initialize_until(l_block, *first);
|
||||
}
|
||||
size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
|
||||
size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
|
||||
combine_params( uint_keys, less(), l_combine
|
||||
, l_combine1, l_block, xbuf
|
||||
, n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
|
||||
BOOST_ASSERT(xbuf.size() >= l_block);
|
||||
op_merge_blocks_with_buf
|
||||
(uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
|
||||
xbuf.clear();
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len);
|
||||
}
|
||||
}
|
||||
|
||||
template<class RandIt, class Compare, class XBuf>
|
||||
inline void adaptive_merge_final_merge( RandIt first
|
||||
, typename iterator_traits<RandIt>::size_type len1
|
||||
, typename iterator_traits<RandIt>::size_type len2
|
||||
, typename iterator_traits<RandIt>::size_type collected
|
||||
, typename iterator_traits<RandIt>::size_type l_intbuf
|
||||
, typename iterator_traits<RandIt>::size_type l_block
|
||||
, bool use_internal_buf
|
||||
, bool xbuf_used
|
||||
, Compare comp
|
||||
, XBuf & xbuf
|
||||
)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
(void)l_block;
|
||||
size_type n_keys = collected-l_intbuf;
|
||||
size_type len = len1+len2;
|
||||
if(use_internal_buf){
|
||||
if(xbuf_used){
|
||||
xbuf.clear();
|
||||
//Nothing to do
|
||||
if(n_keys){
|
||||
unstable_sort(first, first+n_keys, comp, xbuf);
|
||||
stable_merge(first, first+n_keys, first+len, comp, xbuf);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len);
|
||||
}
|
||||
}
|
||||
else{
|
||||
xbuf.clear();
|
||||
unstable_sort(first, first+collected, comp, xbuf);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
|
||||
stable_merge(first, first+collected, first+len, comp, xbuf);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len);
|
||||
}
|
||||
}
|
||||
else{
|
||||
xbuf.clear();
|
||||
unstable_sort(first, first+collected, comp, xbuf);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
|
||||
stable_merge(first, first+collected, first+len1+len2, comp, xbuf);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len);
|
||||
}
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len);
|
||||
}
|
||||
|
||||
template<class SizeType, class Xbuf>
|
||||
inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
|
||||
{
|
||||
typedef SizeType size_type;
|
||||
size_type l_block = rl_block;
|
||||
size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
|
||||
|
||||
while(xbuf.capacity() >= l_block*2){
|
||||
l_block *= 2;
|
||||
}
|
||||
|
||||
//This is the minimum number of keys to implement the ideal algorithm
|
||||
size_type n_keys = len1/l_block+len2/l_block;
|
||||
while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){
|
||||
--n_keys;
|
||||
}
|
||||
++n_keys;
|
||||
BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
|
||||
|
||||
if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){
|
||||
n_keys = 0u;
|
||||
}
|
||||
l_intbuf_inout = l_intbuf;
|
||||
rl_block = l_block;
|
||||
return n_keys;
|
||||
}
|
||||
|
||||
// Main explanation of the merge algorithm.
|
||||
//
|
||||
// csqrtlen = ceil(sqrt(len));
|
||||
//
|
||||
// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
|
||||
// unique elements are extracted from elements to be sorted and placed in the beginning of the range.
|
||||
//
|
||||
// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
|
||||
// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
|
||||
//
|
||||
// Explanation of the "combine_blocks" step:
|
||||
//
|
||||
// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
|
||||
// Remaining elements that can't form a group are grouped in front of those elements.
|
||||
// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
|
||||
// Remaining elements that can't form a group are grouped in the back of those elements.
|
||||
// * In parallel the following two steps are performed:
|
||||
// * Groups are selection-sorted by first or last element (depending whether they are going
|
||||
// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
|
||||
// * Elements of each block pair are merged using the csqrtlen buffer taking into account
|
||||
// if they belong to the first half or second half (marked by the key).
|
||||
//
|
||||
// * In the final merge step leading "to_collect" elements are merged with rotations
|
||||
// with the rest of merged elements in the "combine_blocks" step.
|
||||
//
|
||||
// Corner cases:
|
||||
//
|
||||
// * If no "to_collect" elements can be extracted:
|
||||
//
|
||||
// * If more than a minimum number of elements is extracted
|
||||
// then reduces the number of elements used as buffer and keys in the
|
||||
// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
|
||||
// then uses a rotation based smart merge.
|
||||
//
|
||||
// * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
|
||||
//
|
||||
// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
|
||||
//
|
||||
// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
|
||||
//
|
||||
// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
|
||||
// then no csqrtlen need to be extracted and "combine_blocks" will use integral
|
||||
// keys to combine blocks.
|
||||
template<class RandIt, class Compare, class XBuf>
|
||||
void adaptive_merge_impl
|
||||
( RandIt first
|
||||
, typename iterator_traits<RandIt>::size_type len1
|
||||
, typename iterator_traits<RandIt>::size_type len2
|
||||
, Compare comp
|
||||
, XBuf & xbuf
|
||||
)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
|
||||
if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
|
||||
buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
|
||||
}
|
||||
else{
|
||||
const size_type len = len1+len2;
|
||||
//Calculate ideal parameters and try to collect needed unique keys
|
||||
size_type l_block = size_type(ceil_sqrt(len));
|
||||
|
||||
//One range is not big enough to extract keys and the internal buffer so a
|
||||
//rotation-based based merge will do just fine
|
||||
if(len1 <= l_block*2 || len2 <= l_block*2){
|
||||
merge_bufferless(first, first+len1, first+len1+len2, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
//Detail the number of keys and internal buffer. If xbuf has enough memory, no
|
||||
//internal buffer is needed so l_intbuf will remain 0.
|
||||
size_type l_intbuf = 0;
|
||||
size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
|
||||
size_type const to_collect = l_intbuf+n_keys;
|
||||
//Try to extract needed unique values from the first range
|
||||
size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len);
|
||||
|
||||
//Not the minimum number of keys is not available on the first range, so fallback to rotations
|
||||
if(collected != to_collect && collected < 4){
|
||||
merge_bufferless(first, first+collected, first+len1, comp);
|
||||
merge_bufferless(first, first + len1, first + len1 + len2, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
//If not enough keys but more than minimum, adjust the internal buffer and key count
|
||||
bool use_internal_buf = collected == to_collect;
|
||||
if (!use_internal_buf){
|
||||
l_intbuf = 0u;
|
||||
n_keys = collected;
|
||||
l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
|
||||
//If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
|
||||
l_intbuf = use_internal_buf ? l_block : 0u;
|
||||
}
|
||||
|
||||
bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
|
||||
//Merge trailing elements using smart merges
|
||||
adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
|
||||
//Merge buffer and keys with the rest of the values
|
||||
adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
|
||||
}
|
||||
}
|
||||
|
||||
} //namespace detail_adaptive {
|
||||
|
||||
///@endcond
|
||||
|
||||
//! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last)
|
||||
//! into one sorted range [first, last) according to the given comparison function comp.
|
||||
//! The algorithm is stable (if there are equivalent elements in the original two ranges,
|
||||
|
@ -18,6 +18,558 @@
|
||||
namespace boost {
|
||||
namespace movelib {
|
||||
|
||||
///@cond
|
||||
namespace detail_adaptive {
|
||||
|
||||
template<class RandIt>
|
||||
void move_data_backward( RandIt cur_pos
|
||||
, typename iterator_traits<RandIt>::size_type const l_data
|
||||
, RandIt new_pos
|
||||
, bool const xbuf_used)
|
||||
{
|
||||
//Move buffer to the total combination right
|
||||
if(xbuf_used){
|
||||
boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
|
||||
}
|
||||
else{
|
||||
boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
|
||||
//Rotate does less moves but it seems slower due to cache issues
|
||||
//rotate_gcd(first-l_block, first+len-l_block, first+len);
|
||||
}
|
||||
}
|
||||
|
||||
template<class RandIt>
|
||||
void move_data_forward( RandIt cur_pos
|
||||
, typename iterator_traits<RandIt>::size_type const l_data
|
||||
, RandIt new_pos
|
||||
, bool const xbuf_used)
|
||||
{
|
||||
//Move buffer to the total combination right
|
||||
if(xbuf_used){
|
||||
boost::move(cur_pos, cur_pos+l_data, new_pos);
|
||||
}
|
||||
else{
|
||||
boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
|
||||
//Rotate does less moves but it seems slower due to cache issues
|
||||
//rotate_gcd(first-l_block, first+len-l_block, first+len);
|
||||
}
|
||||
}
|
||||
|
||||
// build blocks of length 2*l_build_buf. l_build_buf is power of two
|
||||
// input: [0, l_build_buf) elements are buffer, rest unsorted elements
|
||||
// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
|
||||
//
|
||||
// First elements are merged from right to left until elements start
|
||||
// at first. All old elements [first, first + l_build_buf) are placed at the end
|
||||
// [first+len-l_build_buf, first+len). To achieve this:
|
||||
// - If we have external memory to merge, we save elements from the buffer
|
||||
// so that a non-swapping merge is used. Buffer elements are restored
|
||||
// at the end of the buffer from the external memory.
|
||||
//
|
||||
// - When the external memory is not available or it is insufficient
|
||||
// for a merge operation, left swap merging is used.
|
||||
//
|
||||
// Once elements are merged left to right in blocks of l_build_buf, then a single left
|
||||
// to right merge step is performed to achieve merged blocks of size 2K.
|
||||
// If external memory is available, usual merge is used, swap merging otherwise.
|
||||
//
|
||||
// As a last step, if auxiliary memory is available in-place merge is performed.
|
||||
// until all is merged or auxiliary memory is not large enough.
|
||||
template<class RandIt, class Compare, class XBuf>
|
||||
typename iterator_traits<RandIt>::size_type
|
||||
adaptive_sort_build_blocks
|
||||
( RandIt const first
|
||||
, typename iterator_traits<RandIt>::size_type const len
|
||||
, typename iterator_traits<RandIt>::size_type const l_base
|
||||
, typename iterator_traits<RandIt>::size_type const l_build_buf
|
||||
, XBuf & xbuf
|
||||
, Compare comp)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
BOOST_ASSERT(l_build_buf <= len);
|
||||
BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
|
||||
|
||||
//Place the start pointer after the buffer
|
||||
RandIt first_block = first + l_build_buf;
|
||||
size_type const elements_in_blocks = len - l_build_buf;
|
||||
|
||||
//////////////////////////////////
|
||||
// Start of merge to left step
|
||||
//////////////////////////////////
|
||||
size_type l_merged = 0u;
|
||||
|
||||
BOOST_ASSERT(l_build_buf);
|
||||
//If there is no enough buffer for the insertion sort step, just avoid the external buffer
|
||||
size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
|
||||
kbuf = kbuf < l_base ? 0 : kbuf;
|
||||
|
||||
if(kbuf){
|
||||
//Backup internal buffer values in external buffer so they can be overwritten
|
||||
xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
|
||||
l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
|
||||
|
||||
//Now combine them using the buffer. Elements from buffer can be
|
||||
//overwritten since they've been saved to xbuf
|
||||
l_merged = op_merge_left_step_multiple
|
||||
( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
|
||||
|
||||
//Restore internal buffer from external buffer unless kbuf was l_build_buf,
|
||||
//in that case restoration will happen later
|
||||
if(kbuf != l_build_buf){
|
||||
boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
|
||||
}
|
||||
}
|
||||
else{
|
||||
l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
|
||||
rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
|
||||
}
|
||||
|
||||
//Now combine elements using the buffer. Elements from buffer can't be
|
||||
//overwritten since xbuf was not big enough, so merge swapping elements.
|
||||
l_merged = op_merge_left_step_multiple
|
||||
(first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
|
||||
|
||||
BOOST_ASSERT(l_merged == l_build_buf);
|
||||
|
||||
//////////////////////////////////
|
||||
// Start of merge to right step
|
||||
//////////////////////////////////
|
||||
|
||||
//If kbuf is l_build_buf then we can merge right without swapping
|
||||
//Saved data is still in xbuf
|
||||
if(kbuf && kbuf == l_build_buf){
|
||||
op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
|
||||
//Restore internal buffer from external buffer if kbuf was l_build_buf.
|
||||
//as this operation was previously delayed.
|
||||
boost::move(xbuf.data(), xbuf.data() + kbuf, first);
|
||||
}
|
||||
else{
|
||||
op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
|
||||
}
|
||||
xbuf.clear();
|
||||
//2*l_build_buf or total already merged
|
||||
return min_value(elements_in_blocks, 2*l_build_buf);
|
||||
}
|
||||
|
||||
template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
|
||||
void adaptive_sort_combine_blocks
|
||||
( RandItKeys const keys
|
||||
, KeyCompare key_comp
|
||||
, RandIt const first
|
||||
, typename iterator_traits<RandIt>::size_type const len
|
||||
, typename iterator_traits<RandIt>::size_type const l_prev_merged
|
||||
, typename iterator_traits<RandIt>::size_type const l_block
|
||||
, bool const use_buf
|
||||
, bool const xbuf_used
|
||||
, XBuf & xbuf
|
||||
, Compare comp
|
||||
, bool merge_left)
|
||||
{
|
||||
(void)xbuf;
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
|
||||
size_type const l_reg_combined = 2*l_prev_merged;
|
||||
size_type l_irreg_combined = 0;
|
||||
size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
|
||||
size_type const n_reg_combined = len/l_reg_combined;
|
||||
RandIt combined_first = first;
|
||||
|
||||
(void)l_total_combined;
|
||||
BOOST_ASSERT(l_total_combined <= len);
|
||||
|
||||
size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
|
||||
|
||||
if(merge_left || !use_buf) {
|
||||
for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) {
|
||||
//Now merge blocks
|
||||
bool const is_last = combined_i==n_reg_combined;
|
||||
size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
|
||||
|
||||
range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
|
||||
size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
|
||||
combine_params( keys, key_comp, l_cur_combined
|
||||
, l_prev_merged, l_block, rbuf
|
||||
, n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
|
||||
BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
|
||||
if(!use_buf){
|
||||
merge_blocks_bufferless
|
||||
(keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
|
||||
}
|
||||
else{
|
||||
merge_blocks_left
|
||||
(keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
|
||||
}
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block);
|
||||
}
|
||||
}
|
||||
else{
|
||||
combined_first += l_reg_combined*(max_i-1);
|
||||
for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) {
|
||||
bool const is_last = combined_i==n_reg_combined;
|
||||
size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
|
||||
|
||||
RandIt const combined_last(combined_first+l_cur_combined);
|
||||
range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
|
||||
size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
|
||||
combine_params( keys, key_comp, l_cur_combined
|
||||
, l_prev_merged, l_block, rbuf
|
||||
, n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
|
||||
BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
|
||||
merge_blocks_right
|
||||
(keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Returns true if buffer is placed in
|
||||
//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
|
||||
//[buffer,buffer+l_intbuf)
|
||||
template<class RandIt, class Compare, class XBuf>
|
||||
bool adaptive_sort_combine_all_blocks
|
||||
( RandIt keys
|
||||
, typename iterator_traits<RandIt>::size_type &n_keys
|
||||
, RandIt const buffer
|
||||
, typename iterator_traits<RandIt>::size_type const l_buf_plus_data
|
||||
, typename iterator_traits<RandIt>::size_type l_merged
|
||||
, typename iterator_traits<RandIt>::size_type &l_intbuf
|
||||
, XBuf & xbuf
|
||||
, Compare comp)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
RandIt const first = buffer + l_intbuf;
|
||||
size_type const l_data = l_buf_plus_data - l_intbuf;
|
||||
size_type const l_unique = l_intbuf+n_keys;
|
||||
//Backup data to external buffer once if possible
|
||||
bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
|
||||
if(common_xbuf){
|
||||
xbuf.move_assign(buffer, l_intbuf);
|
||||
}
|
||||
|
||||
bool prev_merge_left = true;
|
||||
size_type l_prev_total_combined = l_merged, l_prev_block = 0;
|
||||
bool prev_use_internal_buf = true;
|
||||
|
||||
for( size_type n = 0; l_data > l_merged
|
||||
; l_merged*=2
|
||||
, ++n){
|
||||
//If l_intbuf is non-zero, use that internal buffer.
|
||||
// Implies l_block == l_intbuf && use_internal_buf == true
|
||||
//If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
|
||||
// Implies l_block == n_keys/2 && use_internal_buf == true
|
||||
//Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
|
||||
bool use_internal_buf = false;
|
||||
size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf);
|
||||
BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
|
||||
BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
|
||||
BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
|
||||
|
||||
bool const is_merge_left = (n&1) == 0;
|
||||
size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
|
||||
if(n && prev_use_internal_buf && prev_merge_left){
|
||||
if(is_merge_left || !use_internal_buf){
|
||||
move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
|
||||
}
|
||||
else{
|
||||
//Put the buffer just after l_total_combined
|
||||
RandIt const buf_end = first+l_prev_total_combined;
|
||||
RandIt const buf_beg = buf_end-l_block;
|
||||
if(l_prev_total_combined > l_total_combined){
|
||||
size_type const l_diff = l_prev_total_combined - l_total_combined;
|
||||
move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
|
||||
}
|
||||
else if(l_prev_total_combined < l_total_combined){
|
||||
size_type const l_diff = l_total_combined - l_prev_total_combined;
|
||||
move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
|
||||
}
|
||||
}
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf);
|
||||
}
|
||||
|
||||
//Combine to form l_merged*2 segments
|
||||
if(n_keys){
|
||||
adaptive_sort_combine_blocks
|
||||
( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
|
||||
, l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
|
||||
}
|
||||
else{
|
||||
size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
|
||||
adaptive_sort_combine_blocks
|
||||
( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
|
||||
, l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
|
||||
}
|
||||
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf);
|
||||
prev_merge_left = is_merge_left;
|
||||
l_prev_total_combined = l_total_combined;
|
||||
l_prev_block = l_block;
|
||||
prev_use_internal_buf = use_internal_buf;
|
||||
}
|
||||
BOOST_ASSERT(l_prev_total_combined == l_data);
|
||||
bool const buffer_right = prev_use_internal_buf && prev_merge_left;
|
||||
|
||||
l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
|
||||
n_keys = l_unique - l_intbuf;
|
||||
//Restore data from to external common buffer if used
|
||||
if(common_xbuf){
|
||||
if(buffer_right){
|
||||
boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
|
||||
}
|
||||
else{
|
||||
boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
|
||||
}
|
||||
}
|
||||
return buffer_right;
|
||||
}
|
||||
|
||||
|
||||
template<class RandIt, class Compare, class XBuf>
|
||||
void adaptive_sort_final_merge( bool buffer_right
|
||||
, RandIt const first
|
||||
, typename iterator_traits<RandIt>::size_type const l_intbuf
|
||||
, typename iterator_traits<RandIt>::size_type const n_keys
|
||||
, typename iterator_traits<RandIt>::size_type const len
|
||||
, XBuf & xbuf
|
||||
, Compare comp)
|
||||
{
|
||||
//BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
|
||||
xbuf.clear();
|
||||
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
size_type const n_key_plus_buf = l_intbuf+n_keys;
|
||||
if(buffer_right){
|
||||
//Use stable sort as some buffer elements might not be unique (see non_unique_buf)
|
||||
stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
|
||||
stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
|
||||
unstable_sort(first, first+n_keys, comp, xbuf);
|
||||
stable_merge(first, first+n_keys, first+len, comp, xbuf);
|
||||
}
|
||||
else{
|
||||
//Use stable sort as some buffer elements might not be unique (see non_unique_buf)
|
||||
stable_sort(first, first+n_key_plus_buf, comp, xbuf);
|
||||
if(xbuf.capacity() >= n_key_plus_buf){
|
||||
buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
|
||||
}
|
||||
else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
|
||||
stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
|
||||
stable_merge(first, first+n_keys, first+len, comp, xbuf);
|
||||
}
|
||||
else{
|
||||
stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
|
||||
}
|
||||
}
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len);
|
||||
}
|
||||
|
||||
template<class RandIt, class Compare, class Unsigned, class XBuf>
|
||||
bool adaptive_sort_build_params
|
||||
(RandIt first, Unsigned const len, Compare comp
|
||||
, Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
|
||||
, XBuf & xbuf
|
||||
)
|
||||
{
|
||||
typedef Unsigned size_type;
|
||||
|
||||
//Calculate ideal parameters and try to collect needed unique keys
|
||||
l_base = 0u;
|
||||
|
||||
//Try to find a value near sqrt(len) that is 2^N*l_base where
|
||||
//l_base <= AdaptiveSortInsertionSortThreshold. This property is important
|
||||
//as build_blocks merges to the left iteratively duplicating the
|
||||
//merged size and all the buffer must be used just before the final
|
||||
//merge to right step. This guarantees "build_blocks" produces
|
||||
//segments of size l_build_buf*2, maximizing the classic merge phase.
|
||||
l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
|
||||
|
||||
//The internal buffer can be expanded if there is enough external memory
|
||||
while(xbuf.capacity() >= l_intbuf*2){
|
||||
l_intbuf *= 2;
|
||||
}
|
||||
|
||||
//This is the minimum number of keys to implement the ideal algorithm
|
||||
//
|
||||
//l_intbuf is used as buffer plus the key count
|
||||
size_type n_min_ideal_keys = l_intbuf-1;
|
||||
while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
|
||||
--n_min_ideal_keys;
|
||||
}
|
||||
n_min_ideal_keys += 1;
|
||||
BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
|
||||
|
||||
if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
|
||||
n_keys = 0u;
|
||||
l_build_buf = l_intbuf;
|
||||
}
|
||||
else{
|
||||
//Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
|
||||
//l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
|
||||
//as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
|
||||
//
|
||||
//If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
|
||||
//(to be used for keys in combine_all_blocks) as the whole l_build_buf
|
||||
//will be backuped in the buffer during build_blocks.
|
||||
bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
|
||||
size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
|
||||
size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
|
||||
|
||||
//If available memory is 2*sqrt(l), then for "build_params"
|
||||
//the situation is the same as if 2*l_intbuf were collected.
|
||||
if(non_unique_buf && collected == n_min_ideal_keys){
|
||||
l_build_buf = l_intbuf;
|
||||
n_keys = n_min_ideal_keys;
|
||||
}
|
||||
else if(collected == 2*l_intbuf){
|
||||
//l_intbuf*2 elements found. Use all of them in the build phase
|
||||
l_build_buf = l_intbuf*2;
|
||||
n_keys = l_intbuf;
|
||||
}
|
||||
else if(collected == (n_min_ideal_keys+l_intbuf)){
|
||||
l_build_buf = l_intbuf;
|
||||
n_keys = n_min_ideal_keys;
|
||||
}
|
||||
//If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
|
||||
//is possible (due to very low unique keys), then go to a slow sort based on rotations.
|
||||
else{
|
||||
BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
|
||||
if(collected < 4){ //No combination possible with less that 4 keys
|
||||
return false;
|
||||
}
|
||||
n_keys = l_intbuf;
|
||||
while(n_keys&(n_keys-1)){
|
||||
n_keys &= n_keys-1; // make it power or 2
|
||||
}
|
||||
while(n_keys > collected){
|
||||
n_keys/=2;
|
||||
}
|
||||
//AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
|
||||
l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
|
||||
l_intbuf = 0;
|
||||
l_build_buf = n_keys;
|
||||
}
|
||||
BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Main explanation of the sort algorithm.
|
||||
//
|
||||
// csqrtlen = ceil(sqrt(len));
|
||||
//
|
||||
// * First, 2*csqrtlen unique elements elements are extracted from elements to be
|
||||
// sorted and placed in the beginning of the range.
|
||||
//
|
||||
// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
|
||||
// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
|
||||
// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
|
||||
// 2*csqrtlen unique elements are again the leading elements of the whole range.
|
||||
//
|
||||
// * Step "combine_blocks": pairs of previously formed blocks are merged with a different
|
||||
// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
|
||||
// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
|
||||
// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
|
||||
//
|
||||
// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
|
||||
// know if elements belong to the first or second block to be merged and another
|
||||
// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
|
||||
//
|
||||
// Iteratively until all trailing (len-2*csqrtlen) elements are merged:
|
||||
// Iteratively for each pair of previously merged block:
|
||||
// * Blocks are divided groups of csqrtlen elements and
|
||||
// 2*merged_block/csqrtlen keys are sorted to be used as markers
|
||||
// * Groups are selection-sorted by first or last element (depending whether they are going
|
||||
// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
|
||||
// * Elements of each block pair are merged using the csqrtlen buffer taking into account
|
||||
// if they belong to the first half or second half (marked by the key).
|
||||
//
|
||||
// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
|
||||
// rotations with the rest of sorted elements in the "combine_blocks" step.
|
||||
//
|
||||
// Corner cases:
|
||||
//
|
||||
// * If no 2*csqrtlen elements can be extracted:
|
||||
//
|
||||
// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
|
||||
// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
|
||||
// means that an additional "combine_blocks" step will be needed to merge all elements.
|
||||
//
|
||||
// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
|
||||
// then reduces the number of elements used as buffer and keys in the "build_blocks"
|
||||
// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
|
||||
// then uses a rotation based smart merge.
|
||||
//
|
||||
// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
|
||||
//
|
||||
// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
|
||||
//
|
||||
// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
|
||||
// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
|
||||
// keys to combine blocks.
|
||||
//
|
||||
// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
|
||||
// using classic merge and "combine_blocks" will use bigger blocks when merging.
|
||||
template<class RandIt, class Compare, class XBuf>
|
||||
void adaptive_sort_impl
|
||||
( RandIt first
|
||||
, typename iterator_traits<RandIt>::size_type const len
|
||||
, Compare comp
|
||||
, XBuf & xbuf
|
||||
)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
|
||||
//Small sorts go directly to insertion sort
|
||||
if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
|
||||
insertion_sort(first, first + len, comp);
|
||||
}
|
||||
else if((len-len/2) <= xbuf.capacity()){
|
||||
merge_sort(first, first+len, comp, xbuf.data());
|
||||
}
|
||||
else{
|
||||
//Make sure it is at least four
|
||||
BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
|
||||
|
||||
size_type l_base = 0;
|
||||
size_type l_intbuf = 0;
|
||||
size_type n_keys = 0;
|
||||
size_type l_build_buf = 0;
|
||||
|
||||
//Calculate and extract needed unique elements. If a minimum is not achieved
|
||||
//fallback to a slow stable sort
|
||||
if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
|
||||
stable_sort(first, first+len, comp, xbuf);
|
||||
}
|
||||
else{
|
||||
BOOST_ASSERT(l_build_buf);
|
||||
//Otherwise, continue the adaptive_sort
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len);
|
||||
size_type const n_key_plus_buf = l_intbuf+n_keys;
|
||||
//l_build_buf is always power of two if l_intbuf is zero
|
||||
BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
|
||||
|
||||
//Classic merge sort until internal buffer and xbuf are exhausted
|
||||
size_type const l_merged = adaptive_sort_build_blocks
|
||||
(first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
|
||||
BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len);
|
||||
|
||||
//Non-trivial merge
|
||||
bool const buffer_right = adaptive_sort_combine_all_blocks
|
||||
(first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
|
||||
|
||||
//Sort keys and buffer and merge the whole sequence
|
||||
adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} //namespace detail_adaptive {
|
||||
|
||||
///@endcond
|
||||
|
||||
//! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according
|
||||
//! to comparison functor "comp". The sort is stable (order of equal elements
|
||||
//! is guaranteed to be preserved). Performance is improved if additional raw storage is
|
||||
|
File diff suppressed because it is too large
Load Diff
111
include/boost/move/algo/detail/heap_sort.hpp
Normal file
111
include/boost/move/algo/detail/heap_sort.hpp
Normal file
@ -0,0 +1,111 @@
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// (C) Copyright Ion Gaztanaga 2017-2018.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
// See http://www.boost.org/libs/move for documentation.
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//! \file
|
||||
|
||||
#ifndef BOOST_MOVE_DETAIL_HEAP_SORT_HPP
|
||||
#define BOOST_MOVE_DETAIL_HEAP_SORT_HPP
|
||||
|
||||
#ifndef BOOST_CONFIG_HPP
|
||||
# include <boost/config.hpp>
|
||||
#endif
|
||||
#
|
||||
#if defined(BOOST_HAS_PRAGMA_ONCE)
|
||||
# pragma once
|
||||
#endif
|
||||
|
||||
#include <boost/move/detail/config_begin.hpp>
|
||||
#include <boost/move/detail/workaround.hpp>
|
||||
#include <boost/move/detail/iterator_traits.hpp>
|
||||
#include <boost/move/algo/detail/is_sorted.hpp>
|
||||
#include <boost/move/utility_core.hpp>
|
||||
|
||||
namespace boost { namespace movelib{
|
||||
|
||||
template <class RandomAccessIterator, class Compare>
|
||||
class heap_sort_helper
|
||||
{
|
||||
typedef typename boost::movelib::iterator_traits<RandomAccessIterator>::size_type size_type;
|
||||
typedef typename boost::movelib::iterator_traits<RandomAccessIterator>::value_type value_type;
|
||||
|
||||
static void adjust_heap(RandomAccessIterator first, size_type hole_index, size_type const len, value_type &value, Compare comp)
|
||||
{
|
||||
size_type const top_index = hole_index;
|
||||
size_type second_child = 2 * (hole_index + 1);
|
||||
|
||||
while (second_child < len) {
|
||||
if (comp(*(first + second_child), *(first + (second_child - 1))))
|
||||
second_child--;
|
||||
*(first + hole_index) = boost::move(*(first + second_child));
|
||||
hole_index = second_child;
|
||||
second_child = 2 * (second_child + 1);
|
||||
}
|
||||
if (second_child == len) {
|
||||
*(first + hole_index) = boost::move(*(first + (second_child - 1)));
|
||||
hole_index = second_child - 1;
|
||||
}
|
||||
|
||||
{ //push_heap-like ending
|
||||
size_type parent = (hole_index - 1) / 2;
|
||||
while (hole_index > top_index && comp(*(first + parent), value)) {
|
||||
*(first + hole_index) = boost::move(*(first + parent));
|
||||
hole_index = parent;
|
||||
parent = (hole_index - 1) / 2;
|
||||
}
|
||||
*(first + hole_index) = boost::move(value);
|
||||
}
|
||||
}
|
||||
|
||||
static void make_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
|
||||
{
|
||||
size_type const len = size_type(last - first);
|
||||
if (len > 1) {
|
||||
size_type parent = len/2u - 1u;
|
||||
|
||||
do {
|
||||
value_type v(boost::move(*(first + parent)));
|
||||
adjust_heap(first, parent, len, v, comp);
|
||||
}while (parent--);
|
||||
}
|
||||
}
|
||||
|
||||
static void sort_heap(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
|
||||
{
|
||||
size_type len = size_type(last - first);
|
||||
while (len > 1) {
|
||||
//move biggest to the safe zone
|
||||
--last;
|
||||
value_type v(boost::move(*last));
|
||||
*last = boost::move(*first);
|
||||
adjust_heap(first, size_type(0), --len, v, comp);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
|
||||
{
|
||||
make_heap(first, last, comp);
|
||||
sort_heap(first, last, comp);
|
||||
BOOST_ASSERT(boost::movelib::is_sorted(first, last, comp));
|
||||
}
|
||||
};
|
||||
|
||||
template <class RandomAccessIterator, class Compare>
|
||||
BOOST_MOVE_FORCEINLINE void heap_sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp)
|
||||
{
|
||||
heap_sort_helper<RandomAccessIterator, Compare>::sort(first, last, comp);
|
||||
}
|
||||
|
||||
}} //namespace boost { namespace movelib{
|
||||
|
||||
#include <boost/move/detail/config_end.hpp>
|
||||
|
||||
#endif //#ifndef BOOST_MOVE_DETAIL_HEAP_SORT_HPP
|
55
include/boost/move/algo/detail/is_sorted.hpp
Normal file
55
include/boost/move/algo/detail/is_sorted.hpp
Normal file
@ -0,0 +1,55 @@
|
||||
#ifndef BOOST_MOVE_DETAIL_IS_SORTED_HPP
|
||||
#define BOOST_MOVE_DETAIL_IS_SORTED_HPP
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// (C) Copyright Ion Gaztanaga 2017-2018. Distributed under the Boost
|
||||
// Software License, Version 1.0. (See accompanying file
|
||||
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
// See http://www.boost.org/libs/container for documentation.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef BOOST_CONFIG_HPP
|
||||
# include <boost/config.hpp>
|
||||
#endif
|
||||
|
||||
#if defined(BOOST_HAS_PRAGMA_ONCE)
|
||||
# pragma once
|
||||
#endif
|
||||
|
||||
namespace boost {
|
||||
namespace movelib {
|
||||
|
||||
template<class ForwardIt, class Pred>
|
||||
bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred)
|
||||
{
|
||||
if (first != last) {
|
||||
ForwardIt next = first, cur(first);
|
||||
while (++next != last) {
|
||||
if (pred(*next, *cur))
|
||||
return false;
|
||||
cur = next;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class ForwardIt, class Pred>
|
||||
bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred)
|
||||
{
|
||||
if (first != last) {
|
||||
ForwardIt next = first;
|
||||
while (++next != last) {
|
||||
if (!pred(*first, *next))
|
||||
return false;
|
||||
first = next;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} //namespace movelib {
|
||||
} //namespace boost {
|
||||
|
||||
#endif //BOOST_MOVE_DETAIL_IS_SORTED_HPP
|
@ -256,101 +256,6 @@ void swap_merge_right
|
||||
op_merge_right(first1, last1, last2, buf_last, comp, swap_op());
|
||||
}
|
||||
|
||||
template <class BidirIt, class Distance, class Compare>
|
||||
void merge_bufferless_ONlogN_recursive
|
||||
(BidirIt first, BidirIt middle, BidirIt last, Distance len1, Distance len2, Compare comp)
|
||||
{
|
||||
typedef typename iterator_traits<BidirIt>::size_type size_type;
|
||||
while(1) {
|
||||
//#define MERGE_BUFFERLESS_RECURSIVE_OPT
|
||||
#ifndef MERGE_BUFFERLESS_RECURSIVE_OPT
|
||||
if (len2 == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!len1) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ((len1 | len2) == 1) {
|
||||
if (comp(*middle, *first))
|
||||
adl_move_swap(*first, *middle);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
if (len2 == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!len1) {
|
||||
return;
|
||||
}
|
||||
BidirIt middle_prev = middle; --middle_prev;
|
||||
if(!comp(*middle, *middle_prev))
|
||||
return;
|
||||
|
||||
while(true) {
|
||||
if (comp(*middle, *first))
|
||||
break;
|
||||
++first;
|
||||
if(--len1 == 1)
|
||||
break;
|
||||
}
|
||||
|
||||
if (len1 == 1 && len2 == 1) {
|
||||
//comp(*middle, *first) == true already tested in the loop
|
||||
adl_move_swap(*first, *middle);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
BidirIt first_cut = first;
|
||||
BidirIt second_cut = middle;
|
||||
Distance len11 = 0;
|
||||
Distance len22 = 0;
|
||||
if (len1 > len2) {
|
||||
len11 = len1 / 2;
|
||||
first_cut += len11;
|
||||
second_cut = boost::movelib::lower_bound(middle, last, *first_cut, comp);
|
||||
len22 = size_type(second_cut - middle);
|
||||
}
|
||||
else {
|
||||
len22 = len2 / 2;
|
||||
second_cut += len22;
|
||||
first_cut = boost::movelib::upper_bound(first, middle, *second_cut, comp);
|
||||
len11 = size_type(first_cut - first);
|
||||
}
|
||||
BidirIt new_middle = rotate_gcd(first_cut, middle, second_cut);
|
||||
|
||||
//Avoid one recursive call doing a manual tail call elimination on the biggest range
|
||||
const Distance len_internal = len11+len22;
|
||||
if( len_internal < (len1 + len2 - len_internal) ) {
|
||||
merge_bufferless_ONlogN_recursive(first, first_cut, new_middle, len11, len22, comp);
|
||||
//merge_bufferless_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp);
|
||||
first = new_middle;
|
||||
middle = second_cut;
|
||||
len1 -= len11;
|
||||
len2 -= len22;
|
||||
}
|
||||
else {
|
||||
//merge_bufferless_recursive(first, first_cut, new_middle, len11, len22, comp);
|
||||
merge_bufferless_ONlogN_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp);
|
||||
middle = first_cut;
|
||||
last = new_middle;
|
||||
len1 = len11;
|
||||
len2 = len22;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Complexity: NlogN
|
||||
template<class BidirIt, class Compare>
|
||||
void merge_bufferless_ONlogN(BidirIt first, BidirIt middle, BidirIt last, Compare comp)
|
||||
{
|
||||
merge_bufferless_ONlogN_recursive
|
||||
(first, middle, last, middle - first, last - middle, comp);
|
||||
}
|
||||
|
||||
//Complexity: min(len1,len2)^2 + max(len1,len2)
|
||||
template<class RandIt, class Compare>
|
||||
void merge_bufferless_ON2(RandIt first, RandIt middle, RandIt last, Compare comp)
|
||||
@ -384,10 +289,81 @@ void merge_bufferless_ON2(RandIt first, RandIt middle, RandIt last, Compare comp
|
||||
}
|
||||
}
|
||||
|
||||
static const std::size_t MergeBufferlessONLogNRotationThreshold = 32;
|
||||
|
||||
template <class RandIt, class Distance, class Compare>
|
||||
void merge_bufferless_ONlogN_recursive
|
||||
(RandIt first, RandIt middle, RandIt last, Distance len1, Distance len2, Compare comp)
|
||||
{
|
||||
typedef typename iterator_traits<RandIt>::size_type size_type;
|
||||
|
||||
while(1) {
|
||||
//trivial cases
|
||||
if (!len2) {
|
||||
return;
|
||||
}
|
||||
else if (!len1) {
|
||||
return;
|
||||
}
|
||||
else if (size_type(len1 | len2) == 1u) {
|
||||
if (comp(*middle, *first))
|
||||
adl_move_swap(*first, *middle);
|
||||
return;
|
||||
}
|
||||
else if(size_type(len1+len2) < MergeBufferlessONLogNRotationThreshold){
|
||||
merge_bufferless_ON2(first, middle, last, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
RandIt first_cut = first;
|
||||
RandIt second_cut = middle;
|
||||
Distance len11 = 0;
|
||||
Distance len22 = 0;
|
||||
if (len1 > len2) {
|
||||
len11 = len1 / 2;
|
||||
first_cut += len11;
|
||||
second_cut = boost::movelib::lower_bound(middle, last, *first_cut, comp);
|
||||
len22 = size_type(second_cut - middle);
|
||||
}
|
||||
else {
|
||||
len22 = len2 / 2;
|
||||
second_cut += len22;
|
||||
first_cut = boost::movelib::upper_bound(first, middle, *second_cut, comp);
|
||||
len11 = size_type(first_cut - first);
|
||||
}
|
||||
RandIt new_middle = rotate_gcd(first_cut, middle, second_cut);
|
||||
|
||||
//Avoid one recursive call doing a manual tail call elimination on the biggest range
|
||||
const Distance len_internal = len11+len22;
|
||||
if( len_internal < (len1 + len2 - len_internal) ) {
|
||||
merge_bufferless_ONlogN_recursive(first, first_cut, new_middle, len11, len22, comp);
|
||||
first = new_middle;
|
||||
middle = second_cut;
|
||||
len1 -= len11;
|
||||
len2 -= len22;
|
||||
}
|
||||
else {
|
||||
merge_bufferless_ONlogN_recursive(new_middle, second_cut, last, len1 - len11, len2 - len22, comp);
|
||||
middle = first_cut;
|
||||
last = new_middle;
|
||||
len1 = len11;
|
||||
len2 = len22;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Complexity: NlogN
|
||||
template<class RandIt, class Compare>
|
||||
void merge_bufferless_ONlogN(RandIt first, RandIt middle, RandIt last, Compare comp)
|
||||
{
|
||||
merge_bufferless_ONlogN_recursive
|
||||
(first, middle, last, middle - first, last - middle, comp);
|
||||
}
|
||||
|
||||
template<class RandIt, class Compare>
|
||||
void merge_bufferless(RandIt first, RandIt middle, RandIt last, Compare comp)
|
||||
{
|
||||
//#define BOOST_ADAPTIVE_MERGE_NLOGN_MERGE
|
||||
#define BOOST_ADAPTIVE_MERGE_NLOGN_MERGE
|
||||
#ifdef BOOST_ADAPTIVE_MERGE_NLOGN_MERGE
|
||||
merge_bufferless_ONlogN(first, middle, last, comp);
|
||||
#else
|
||||
|
334
include/boost/move/algo/detail/pdqsort.hpp
Normal file
334
include/boost/move/algo/detail/pdqsort.hpp
Normal file
@ -0,0 +1,334 @@
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// (C) Copyright Orson Peters 2017.
|
||||
// (C) Copyright Ion Gaztanaga 2017-2018.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
// See http://www.boost.org/libs/move for documentation.
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// This implementation of Pattern-defeating quicksort (pdqsort) was written
|
||||
// by Orson Peters, and discussed in the Boost mailing list:
|
||||
// http://boost.2283326.n4.nabble.com/sort-pdqsort-td4691031.html
|
||||
//
|
||||
// This implementation is the adaptation by Ion Gaztanaga of code originally in GitHub
|
||||
// with permission from the author to relicense it under the Boost Software License
|
||||
// (see the Boost mailing list for details).
|
||||
//
|
||||
// The original copyright statement is pasted here for completeness:
|
||||
//
|
||||
// pdqsort.h - Pattern-defeating quicksort.
|
||||
// Copyright (c) 2015 Orson Peters
|
||||
// This software is provided 'as-is', without any express or implied warranty. In no event will the
|
||||
// authors be held liable for any damages arising from the use of this software.
|
||||
// Permission is granted to anyone to use this software for any purpose, including commercial
|
||||
// applications, and to alter it and redistribute it freely, subject to the following restrictions:
|
||||
// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the
|
||||
// original software. If you use this software in a product, an acknowledgment in the product
|
||||
// documentation would be appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as
|
||||
// being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef BOOST_MOVE_ALGO_PDQSORT_HPP
|
||||
#define BOOST_MOVE_ALGO_PDQSORT_HPP
|
||||
|
||||
#ifndef BOOST_CONFIG_HPP
|
||||
# include <boost/config.hpp>
|
||||
#endif
|
||||
#
|
||||
#if defined(BOOST_HAS_PRAGMA_ONCE)
|
||||
# pragma once
|
||||
#endif
|
||||
|
||||
#include <boost/move/detail/config_begin.hpp>
|
||||
#include <boost/move/detail/workaround.hpp>
|
||||
#include <boost/move/utility_core.hpp>
|
||||
#include <boost/move/algo/detail/insertion_sort.hpp>
|
||||
#include <boost/move/algo/detail/heap_sort.hpp>
|
||||
#include <boost/move/detail/iterator_traits.hpp>
|
||||
|
||||
#include <boost/move/adl_move_swap.hpp>
|
||||
#include <cstddef>
|
||||
|
||||
namespace boost {
|
||||
namespace movelib {
|
||||
|
||||
namespace pdqsort_detail {
|
||||
|
||||
//A simple pair implementation to avoid including <utility>
|
||||
template<class T1, class T2>
|
||||
struct pair
|
||||
{
|
||||
pair()
|
||||
{}
|
||||
|
||||
pair(const T1 &t1, const T2 &t2)
|
||||
: first(t1), second(t2)
|
||||
{}
|
||||
|
||||
T1 first;
|
||||
T2 second;
|
||||
};
|
||||
|
||||
enum {
|
||||
// Partitions below this size are sorted using insertion sort.
|
||||
insertion_sort_threshold = 24,
|
||||
|
||||
// Partitions above this size use Tukey's ninther to select the pivot.
|
||||
ninther_threshold = 128,
|
||||
|
||||
// When we detect an already sorted partition, attempt an insertion sort that allows this
|
||||
// amount of element moves before giving up.
|
||||
partial_insertion_sort_limit = 8,
|
||||
|
||||
// Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
|
||||
block_size = 64,
|
||||
|
||||
// Cacheline size, assumes power of two.
|
||||
cacheline_size = 64
|
||||
|
||||
};
|
||||
|
||||
// Returns floor(log2(n)), assumes n > 0.
|
||||
template<class Unsigned>
|
||||
Unsigned log2(Unsigned n) {
|
||||
Unsigned log = 0;
|
||||
while (n >>= 1) ++log;
|
||||
return log;
|
||||
}
|
||||
|
||||
// Attempts to use insertion sort on [begin, end). Will return false if more than
|
||||
// partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
|
||||
// successfully sort and return true.
|
||||
template<class Iter, class Compare>
|
||||
inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename boost::movelib::iterator_traits<Iter>::value_type T;
|
||||
typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type;
|
||||
if (begin == end) return true;
|
||||
|
||||
size_type limit = 0;
|
||||
for (Iter cur = begin + 1; cur != end; ++cur) {
|
||||
if (limit > partial_insertion_sort_limit) return false;
|
||||
|
||||
Iter sift = cur;
|
||||
Iter sift_1 = cur - 1;
|
||||
|
||||
// Compare first so we can avoid 2 moves for an element already positioned correctly.
|
||||
if (comp(*sift, *sift_1)) {
|
||||
T tmp = boost::move(*sift);
|
||||
|
||||
do { *sift-- = boost::move(*sift_1); }
|
||||
while (sift != begin && comp(tmp, *--sift_1));
|
||||
|
||||
*sift = boost::move(tmp);
|
||||
limit += size_type(cur - sift);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Iter, class Compare>
|
||||
inline void sort2(Iter a, Iter b, Compare comp) {
|
||||
if (comp(*b, *a)) boost::adl_move_iter_swap(a, b);
|
||||
}
|
||||
|
||||
// Sorts the elements *a, *b and *c using comparison function comp.
|
||||
template<class Iter, class Compare>
|
||||
inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
|
||||
sort2(a, b, comp);
|
||||
sort2(b, c, comp);
|
||||
sort2(a, b, comp);
|
||||
}
|
||||
|
||||
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
|
||||
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
|
||||
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
|
||||
// pivot is a median of at least 3 elements and that [begin, end) is at least
|
||||
// insertion_sort_threshold long.
|
||||
template<class Iter, class Compare>
|
||||
pdqsort_detail::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename boost::movelib::iterator_traits<Iter>::value_type T;
|
||||
|
||||
// Move pivot into local for speed.
|
||||
T pivot(boost::move(*begin));
|
||||
|
||||
Iter first = begin;
|
||||
Iter last = end;
|
||||
|
||||
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
|
||||
// this exists).
|
||||
while (comp(*++first, pivot));
|
||||
|
||||
// Find the first element strictly smaller than the pivot. We have to guard this search if
|
||||
// there was no element before *first.
|
||||
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
|
||||
else while ( !comp(*--last, pivot));
|
||||
|
||||
// If the first pair of elements that should be swapped to partition are the same element,
|
||||
// the passed in sequence already was correctly partitioned.
|
||||
bool already_partitioned = first >= last;
|
||||
|
||||
// Keep swapping pairs of elements that are on the wrong side of the pivot. Previously
|
||||
// swapped pairs guard the searches, which is why the first iteration is special-cased
|
||||
// above.
|
||||
while (first < last) {
|
||||
boost::adl_move_iter_swap(first, last);
|
||||
while (comp(*++first, pivot));
|
||||
while (!comp(*--last, pivot));
|
||||
}
|
||||
|
||||
// Put the pivot in the right place.
|
||||
Iter pivot_pos = first - 1;
|
||||
*begin = boost::move(*pivot_pos);
|
||||
*pivot_pos = boost::move(pivot);
|
||||
|
||||
return pdqsort_detail::pair<Iter, bool>(pivot_pos, already_partitioned);
|
||||
}
|
||||
|
||||
// Similar function to the one above, except elements equal to the pivot are put to the left of
|
||||
// the pivot and it doesn't check or return if the passed sequence already was partitioned.
|
||||
// Since this is rarely used (the many equal case), and in that case pdqsort already has O(n)
|
||||
// performance, no block quicksort is applied here for simplicity.
|
||||
template<class Iter, class Compare>
|
||||
inline Iter partition_left(Iter begin, Iter end, Compare comp) {
|
||||
typedef typename boost::movelib::iterator_traits<Iter>::value_type T;
|
||||
|
||||
T pivot(boost::move(*begin));
|
||||
Iter first = begin;
|
||||
Iter last = end;
|
||||
|
||||
while (comp(pivot, *--last));
|
||||
|
||||
if (last + 1 == end) while (first < last && !comp(pivot, *++first));
|
||||
else while ( !comp(pivot, *++first));
|
||||
|
||||
while (first < last) {
|
||||
boost::adl_move_iter_swap(first, last);
|
||||
while (comp(pivot, *--last));
|
||||
while (!comp(pivot, *++first));
|
||||
}
|
||||
|
||||
Iter pivot_pos = last;
|
||||
*begin = boost::move(*pivot_pos);
|
||||
*pivot_pos = boost::move(pivot);
|
||||
|
||||
return pivot_pos;
|
||||
}
|
||||
|
||||
|
||||
template<class Iter, class Compare>
|
||||
void pdqsort_loop( Iter begin, Iter end, Compare comp
|
||||
, typename boost::movelib::iterator_traits<Iter>::size_type bad_allowed
|
||||
, bool leftmost = true)
|
||||
{
|
||||
typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type;
|
||||
|
||||
// Use a while loop for tail recursion elimination.
|
||||
while (true) {
|
||||
size_type size = size_type(end - begin);
|
||||
|
||||
// Insertion sort is faster for small arrays.
|
||||
if (size < insertion_sort_threshold) {
|
||||
insertion_sort(begin, end, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
// Choose pivot as median of 3 or pseudomedian of 9.
|
||||
size_type s2 = size / 2;
|
||||
if (size > ninther_threshold) {
|
||||
sort3(begin, begin + s2, end - 1, comp);
|
||||
sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
|
||||
sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
|
||||
sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
|
||||
boost::adl_move_iter_swap(begin, begin + s2);
|
||||
} else sort3(begin + s2, begin, end - 1, comp);
|
||||
|
||||
// If *(begin - 1) is the end of the right partition of a previous partition operation
|
||||
// there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
|
||||
// pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
|
||||
// the left partition, greater elements in the right partition. We do not have to
|
||||
// recurse on the left partition, since it's sorted (all equal).
|
||||
if (!leftmost && !comp(*(begin - 1), *begin)) {
|
||||
begin = partition_left(begin, end, comp) + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Partition and get results.
|
||||
pdqsort_detail::pair<Iter, bool> part_result = partition_right(begin, end, comp);
|
||||
Iter pivot_pos = part_result.first;
|
||||
bool already_partitioned = part_result.second;
|
||||
|
||||
// Check for a highly unbalanced partition.
|
||||
size_type l_size = size_type(pivot_pos - begin);
|
||||
size_type r_size = size_type(end - (pivot_pos + 1));
|
||||
bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
|
||||
|
||||
// If we got a highly unbalanced partition we shuffle elements to break many patterns.
|
||||
if (highly_unbalanced) {
|
||||
// If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
|
||||
if (--bad_allowed == 0) {
|
||||
boost::movelib::heap_sort(begin, end, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
if (l_size >= insertion_sort_threshold) {
|
||||
boost::adl_move_iter_swap(begin, begin + l_size / 4);
|
||||
boost::adl_move_iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
|
||||
|
||||
if (l_size > ninther_threshold) {
|
||||
boost::adl_move_iter_swap(begin + 1, begin + (l_size / 4 + 1));
|
||||
boost::adl_move_iter_swap(begin + 2, begin + (l_size / 4 + 2));
|
||||
boost::adl_move_iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
|
||||
boost::adl_move_iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
|
||||
}
|
||||
}
|
||||
|
||||
if (r_size >= insertion_sort_threshold) {
|
||||
boost::adl_move_iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
|
||||
boost::adl_move_iter_swap(end - 1, end - r_size / 4);
|
||||
|
||||
if (r_size > ninther_threshold) {
|
||||
boost::adl_move_iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
|
||||
boost::adl_move_iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
|
||||
boost::adl_move_iter_swap(end - 2, end - (1 + r_size / 4));
|
||||
boost::adl_move_iter_swap(end - 3, end - (2 + r_size / 4));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If we were decently balanced and we tried to sort an already partitioned
|
||||
// sequence try to use insertion sort.
|
||||
if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
|
||||
&& partial_insertion_sort(pivot_pos + 1, end, comp)) return;
|
||||
}
|
||||
|
||||
// Sort the left partition first using recursion and do tail recursion elimination for
|
||||
// the right-hand partition.
|
||||
pdqsort_loop<Iter, Compare>(begin, pivot_pos, comp, bad_allowed, leftmost);
|
||||
begin = pivot_pos + 1;
|
||||
leftmost = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Iter, class Compare>
|
||||
void pdqsort(Iter begin, Iter end, Compare comp)
|
||||
{
|
||||
if (begin == end) return;
|
||||
typedef typename boost::movelib::iterator_traits<Iter>::size_type size_type;
|
||||
pdqsort_detail::pdqsort_loop<Iter, Compare>(begin, end, comp, pdqsort_detail::log2(size_type(end - begin)));
|
||||
}
|
||||
|
||||
} //namespace movelib {
|
||||
} //namespace boost {
|
||||
|
||||
#include <boost/move/detail/config_end.hpp>
|
||||
|
||||
#endif //BOOST_MOVE_ALGO_PDQSORT_HPP
|
@ -216,6 +216,9 @@
|
||||
<File
|
||||
RelativePath="..\..\..\..\boost\move\algo\detail\basic_op.hpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\..\boost\move\algo\detail\heap_sort.hpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\..\boost\move\algo\detail\insertion_sort.hpp">
|
||||
</File>
|
||||
@ -225,6 +228,9 @@
|
||||
<File
|
||||
RelativePath="..\..\..\..\boost\move\algo\detail\merge_sort.hpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\..\boost\move\algo\detail\pdqsort.hpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\..\boost\move\algo\detail\set_difference.hpp">
|
||||
</File>
|
||||
|
@ -26,6 +26,7 @@ using boost::timer::cpu_times;
|
||||
using boost::timer::nanosecond_type;
|
||||
|
||||
//#define BOOST_MOVE_ADAPTIVE_SORT_STATS
|
||||
//#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2
|
||||
void print_stats(const char *str, boost::ulong_long_type element_count)
|
||||
{
|
||||
std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
|
||||
@ -84,7 +85,7 @@ const char *AlgoNames [] = { "StdMerge "
|
||||
, "SqrtHAdaptMerge "
|
||||
, "SqrtAdaptMerge "
|
||||
, "Sqrt2AdaptMerge "
|
||||
, "QHalfAdaptMerge "
|
||||
, "QuartAdaptMerge "
|
||||
, "StdInplaceMerge "
|
||||
};
|
||||
|
||||
@ -256,6 +257,7 @@ int main()
|
||||
#endif
|
||||
measure_all<order_perf_type>(1000001,0);
|
||||
measure_all<order_perf_type>(3000001,0);
|
||||
measure_all<order_perf_type>(5000001,0);
|
||||
#endif //NDEBUG
|
||||
|
||||
#endif //#ifndef BENCH_MERGE_SHORT
|
||||
|
@ -36,6 +36,8 @@ void print_stats(const char *str, boost::ulong_long_type element_count)
|
||||
|
||||
#include <boost/move/algo/adaptive_sort.hpp>
|
||||
#include <boost/move/algo/detail/merge_sort.hpp>
|
||||
#include <boost/move/algo/detail/pdqsort.hpp>
|
||||
#include <boost/move/algo/detail/heap_sort.hpp>
|
||||
#include <boost/move/core.hpp>
|
||||
|
||||
template<class T>
|
||||
@ -75,6 +77,7 @@ enum AlgoType
|
||||
{
|
||||
MergeSort,
|
||||
StableSort,
|
||||
PdQsort,
|
||||
AdaptiveSort,
|
||||
SqrtHAdaptiveSort,
|
||||
SqrtAdaptiveSort,
|
||||
@ -88,6 +91,7 @@ enum AlgoType
|
||||
|
||||
const char *AlgoNames [] = { "MergeSort "
|
||||
, "StableSort "
|
||||
, "PdQsort "
|
||||
, "AdaptSort "
|
||||
, "SqrtHAdaptSort "
|
||||
, "SqrtAdaptSort "
|
||||
@ -119,6 +123,9 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count
|
||||
case StableSort:
|
||||
std::stable_sort(elements,elements+element_count,order_type_less());
|
||||
break;
|
||||
case PdQsort:
|
||||
boost::movelib::pdqsort(elements,elements+element_count,order_type_less());
|
||||
break;
|
||||
case AdaptiveSort:
|
||||
boost::movelib::adaptive_sort(elements, elements+element_count, order_type_less());
|
||||
break;
|
||||
@ -145,8 +152,9 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count
|
||||
boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less());
|
||||
break;
|
||||
case HeapSort:
|
||||
std::make_heap(elements, elements+element_count, order_type_less());
|
||||
std::sort_heap(elements, elements+element_count, order_type_less());
|
||||
boost::movelib::heap_sort(elements, elements+element_count, order_type_less());
|
||||
boost::movelib::heap_sort((order_move_type*)0, (order_move_type*)0, order_type_less());
|
||||
|
||||
break;
|
||||
}
|
||||
timer.stop();
|
||||
@ -182,7 +190,7 @@ bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count
|
||||
, units
|
||||
, prev_clock ? double(new_clock)/double(prev_clock): 1.0);
|
||||
prev_clock = new_clock;
|
||||
bool res = is_order_type_ordered(elements, element_count, alg != HeapSort);
|
||||
bool res = is_order_type_ordered(elements, element_count, alg != HeapSort && alg != PdQsort);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -205,6 +213,9 @@ bool measure_all(std::size_t L, std::size_t NK)
|
||||
res = res && measure_algo(A,Keys,L,NK,StableSort, prev_clock);
|
||||
//
|
||||
prev_clock = back_clock;
|
||||
res = res && measure_algo(A,Keys,L,NK,PdQsort, prev_clock);
|
||||
//
|
||||
prev_clock = back_clock;
|
||||
res = res && measure_algo(A,Keys,L,NK,HeapSort, prev_clock);
|
||||
//
|
||||
prev_clock = back_clock;
|
||||
|
Reference in New Issue
Block a user