From 814f8a5c058507ea2c61f61e6544d840d5091c07 Mon Sep 17 00:00:00 2001 From: Marshall Clow Date: Sun, 18 Jun 2023 11:31:32 -0700 Subject: [PATCH] Update docs based on feedback, add 'stable_sort', 'partial_sort' and 'nth_element' --- doc/indirect_sort.qbk | 46 ++++++-- include/boost/algorithm/indirect_sort.hpp | 127 ++++++++++++++++++++-- test/indirect_sort_test.cpp | 10 +- 3 files changed, 161 insertions(+), 22 deletions(-) diff --git a/doc/indirect_sort.qbk b/doc/indirect_sort.qbk index ac4e5a2..3c78936 100644 --- a/doc/indirect_sort.qbk +++ b/doc/indirect_sort.qbk @@ -9,36 +9,66 @@ Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) ] -There are times that you want a sorted version of a sequence, but for some reason or another, you don't really want to sort them. Maybe the elements in the sequence are non-copyable (or non-movable), or the sequence is const, or they're just really expensive to move around. An example of this might be a sequence of records from a database. +There are times that you want a sorted version of a sequence, but for some reason you don't want to modify it. Maybe the elements in the sequence can't be moved/copied, e.g. the sequence is const, or they're just really expensive to move around. An example of this might be a sequence of records from a database. -Nevertheless, you might want to sort them. That's where indirect sorting comes in. In a "normal" sort, the elements of the sequence to be sorted are shuffled in place. In indirect sorting, the elements are unchanged, but the sort algorithm returns to you a "permutation" of the elements that, when applied, will leave the elements in the sequence in a sorted order. +That's where indirect sorting comes in. In a "normal" sort, the elements of the sequence to be sorted are shuffled in place. In indirect sorting, the elements are unchanged, but the sort algorithm returns a "permutation" of the elements that, when applied, will put the elements in the sequence in a sorted order. -Say you have a sequence `[first, last)` of 1000 items that are expensive to swap: +Assume have a sequence `[first, last)` of 1000 items that are expensive to swap: ``` std::sort(first, last); // ['O(N ln N)] comparisons and ['O(N ln N)] swaps (of the element type). ``` On the other hand, using indirect sorting: ``` - auto permutation = boost::algorithm::indirect_sort(first, last); // ['O(N lg N)] comparisons and ['O(N lg N)] swaps (of size_t). - boost::algorithm::apply_permutation(first, last, perm.begin(), perm.end()); // ['O(N)] swaps (of the element type) + auto perm = indirect_sort(first, last); // ['O(N lg N)] comparisons and ['O(N lg N)] swaps (of size_t). + apply_permutation(first, last, perm.begin(), perm.end()); // ['O(N)] swaps (of the element type) ``` If the element type is sufficiently expensive to swap, then 10,000 swaps of size_t + 1000 swaps of the element_type could be cheaper than 10,000 swaps of the element_type. Or maybe you don't need the elements to actually be sorted - you just want to traverse them in a sorted order: ``` - auto permutation = boost::algorithm::indirect_sort(first, last); + auto permutation = indirect_sort(first, last); for (size_t idx: permutation) std::cout << first[idx] << std::endl; ``` -More to come here .... +Assume that instead of an "array of structures", you have a "struct of arrays" +``` +struct AType { + Type0 key; + Type1 value1; + Type1 value2; + }; + +std::array arrayOfStruct; +``` + +versus: + +``` +template +struct AType { + std::array key; + std::array value1; + std::array value2; + }; + +AType<1000> structOfArrays; +``` + +Sorting the first one is easy, because each set of fields (`key`, `value1`, `value2`) are part of the same struct. But with indirect sorting, the second one is easy to sort as well - just sort the keys, then apply the permutation to the keys and the values: +``` + auto perm = indirect_sort(std::begin(structOfArrays.key), std::end(structOfArrays.key)); + apply_permutation(structOfArrays.key.begin(), structOfArrays.key.end(), perm.begin(), perm.end()); + apply_permutation(structOfArrays.value1.begin(), structOfArrays.value1.end(), perm.begin(), perm.end()); + apply_permutation(structOfArrays.value2.begin(), structOfArrays.value2.end(), perm.begin(), perm.end()); +``` [heading interface] -The function `indirect_sort` a `vector` containing the permutation necessary to put the input sequence into a sorted order. One version uses `std::less` to do the comparisons; the other lets the caller pass predicate to do the comparisons. +The function `indirect_sort` returns a `vector` containing the permutation necessary to put the input sequence into a sorted order. One version uses `std::less` to do the comparisons; the other lets the caller pass predicate to do the comparisons. ``` template diff --git a/include/boost/algorithm/indirect_sort.hpp b/include/boost/algorithm/indirect_sort.hpp index a551670..f0a9862 100644 --- a/include/boost/algorithm/indirect_sort.hpp +++ b/include/boost/algorithm/indirect_sort.hpp @@ -11,12 +11,12 @@ /// \author Marshall Clow /// -#ifndef BOOST_ALGORITHM_IS_INDIRECT_SORT -#define BOOST_ALGORITHM_IS_INDIRECT_SORT +#ifndef BOOST_ALGORITHM_INDIRECT_SORT +#define BOOST_ALGORITHM_INDIRECT_SORT #include // for std::sort (and others) #include // for std::less -#include // for std:;vector +#include // for std::vector #include @@ -53,7 +53,7 @@ typedef std::vector Permutation; /// \param pred The predicate to compare elements with /// template -std::vector indirect_sort (RAIterator first, RAIterator last, Pred pred) { +Permutation indirect_sort (RAIterator first, RAIterator last, Pred pred) { Permutation ret(std::distance(first, last)); boost::algorithm::iota(ret.begin(), ret.end(), size_t(0)); std::sort(ret.begin(), ret.end(), @@ -61,23 +61,132 @@ std::vector indirect_sort (RAIterator first, RAIterator last, Pred pred) return ret; } -/// \fn indirect_sort (RAIterator first, RAIterator las ) +/// \fn indirect_sort (RAIterator first, RAIterator last) +/// \returns a permutation of the elements in the range [first, last) +/// such that when the permutation is applied to the sequence, +/// the result is sorted in non-descending order. +/// +/// \param first The start of the input sequence +/// \param last The end of the input sequence +/// +template +Permutation indirect_sort (RAIterator first, RAIterator last) { + return indirect_sort(first, last, + std::less::value_type>()); +} + + // ===== stable_sort ===== + +/// \fn indirect_stable_sort (RAIterator first, RAIterator last, Predicate p) /// \returns a permutation of the elements in the range [first, last) /// such that when the permutation is applied to the sequence, /// the result is sorted according to the predicate pred. /// /// \param first The start of the input sequence /// \param last The end of the input sequence +/// \param pred The predicate to compare elements with +/// +template +Permutation indirect_stable_sort (RAIterator first, RAIterator last, Pred pred) { + Permutation ret(std::distance(first, last)); + boost::algorithm::iota(ret.begin(), ret.end(), size_t(0)); + std::stable_sort(ret.begin(), ret.end(), + detail::indirect_predicate(pred, first)); + return ret; +} + +/// \fn indirect_stable_sort (RAIterator first, RAIterator last) +/// \returns a permutation of the elements in the range [first, last) +/// such that when the permutation is applied to the sequence, +/// the result is sorted in non-descending order. +/// +/// \param first The start of the input sequence +/// \param last The end of the input sequence /// template -std::vector indirect_sort (RAIterator first, RAIterator last) { - return indirect_sort(first, last, +Permutation indirect_stable_sort (RAIterator first, RAIterator last) { + return indirect_stable_sort(first, last, std::less::value_type>()); } - // ===== stable_sort ===== // ===== partial_sort ===== + +/// \fn indirect_partial_sort (RAIterator first, RAIterator last, Predicate p) +/// \returns a permutation of the elements in the range [first, last) +/// such that when the permutation is applied to the sequence, +/// the resulting range [first, middle) is sorted and the range [middle,last) +/// consists of elements that are "larger" than then ones in [first, middle), +/// according to the predicate pred. +/// +/// \param first The start of the input sequence +/// \param middle The end of the range to be sorted +/// \param last The end of the input sequence +/// \param pred The predicate to compare elements with +/// +template +Permutation indirect_partial_sort (RAIterator first, RAIterator middle, + RAIterator last, Pred pred) { + Permutation ret(std::distance(first, last)); + + boost::algorithm::iota(ret.begin(), ret.end(), size_t(0)); + std::partial_sort(ret.begin(), ret.begin() + std::distance(first, middle), ret.end(), + detail::indirect_predicate(pred, first)); + return ret; +} + +/// \fn indirect_partial_sort (RAIterator first, RAIterator last) +/// \returns a permutation of the elements in the range [first, last) +/// such that when the permutation is applied to the sequence, +/// the resulting range [first, middle) is sorted in non-descending order, +/// and the range [middle,last) consists of elements that are larger than +/// then ones in [first, middle). +/// +/// \param first The start of the input sequence +/// \param last The end of the input sequence +/// +template +Permutation indirect_partial_sort (RAIterator first, RAIterator middle, RAIterator last) { + return indirect_partial_sort(first, middle, last, + std::less::value_type>()); +} + // ===== nth_element ===== + +/// \fn indirect_nth_element (RAIterator first, RAIterator last, Predicate p) +/// \returns a permutation of the elements in the range [first, last) +/// such that when the permutation is applied to the sequence, +/// the result is sorted according to the predicate pred. +/// +/// \param first The start of the input sequence +/// \param nth The sort partition point in the input sequence +/// \param last The end of the input sequence +/// \param pred The predicate to compare elements with +/// +template +Permutation indirect_nth_element (RAIterator first, RAIterator nth, + RAIterator last, Pred pred) { + Permutation ret(std::distance(first, last)); + boost::algorithm::iota(ret.begin(), ret.end(), size_t(0)); + std::nth_element(ret.begin(), ret.begin() + std::distance(first, nth), ret.end(), + detail::indirect_predicate(pred, first)); + return ret; +} + +/// \fn indirect_nth_element (RAIterator first, RAIterator last) +/// \returns a permutation of the elements in the range [first, last) +/// such that when the permutation is applied to the sequence, +/// the result is sorted in non-descending order. +/// +/// \param first The start of the input sequence +/// \param nth The sort partition point in the input sequence +/// \param last The end of the input sequence +/// +template +Permutation indirect_nth_element (RAIterator first, RAIterator nth, RAIterator last) { + return indirect_nth_element(first, nth, last, + std::less::value_type>()); +} + }} -#endif // BOOST_ALGORITHM_IS_INDIRECT_SORT +#endif // BOOST_ALGORITHM_INDIRECT_SORT diff --git a/test/indirect_sort_test.cpp b/test/indirect_sort_test.cpp index 54bf196..21bd3bd 100644 --- a/test/indirect_sort_test.cpp +++ b/test/indirect_sort_test.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) Marshall Clow 2011-2012. + Copyright (c) Marshall Clow 2023. Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -24,7 +24,7 @@ typedef std::vector Permutation; // A permutation of size N is a sequence of values in the range [0..N) // such that no value appears more than once in the permutation. -bool isa_permutation(Permutation p, size_t N) { +bool is_a_permutation(Permutation p, size_t N) { if (p.size() != N) return false; // Sort the permutation, and ensure that each value appears exactly once. @@ -49,7 +49,7 @@ struct indirect_comp { template void test_one_sort(Iter first, Iter last) { Permutation perm = boost::algorithm::indirect_sort(first, last); - BOOST_CHECK (isa_permutation(perm, std::distance(first, last))); + BOOST_CHECK (is_a_permutation(perm, std::distance(first, last))); BOOST_CHECK (boost::algorithm::is_sorted(perm.begin(), perm.end(), indirect_comp(first))); // Make a copy of the data, apply the permutation, and ensure that it is sorted. @@ -61,7 +61,7 @@ void test_one_sort(Iter first, Iter last) { template void test_one_sort(Iter first, Iter last, Comp comp) { Permutation perm = boost::algorithm::indirect_sort(first, last, comp); - BOOST_CHECK (isa_permutation(perm, std::distance(first, last))); + BOOST_CHECK (is_a_permutation(perm, std::distance(first, last))); BOOST_CHECK (boost::algorithm::is_sorted(perm.begin(), perm.end(), indirect_comp(first, comp))); @@ -73,7 +73,7 @@ void test_one_sort(Iter first, Iter last, Comp comp) { void test_sort () { - BOOST_CXX14_CONSTEXPR int num[] = { 1,3,5,7,9, 2, 4, 6, 8, 10 }; + int num[] = { 1,3,5,7,9, 2, 4, 6, 8, 10 }; const int sz = sizeof (num)/sizeof(num[0]); int *first = &num[0]; int const *cFirst = &num[0];