From 17c47e8061d61237a32b6f7903becbb2328ac298 Mon Sep 17 00:00:00 2001 From: Marshall Clow Date: Wed, 14 Jun 2023 18:30:08 -0700 Subject: [PATCH] Add 'indirect_sort' --- doc/algorithm.qbk | 2 + doc/indirect_sort.qbk | 71 +++++++++++++++ include/boost/algorithm/indirect_sort.hpp | 83 ++++++++++++++++++ test/Jamfile.v2 | 4 + test/indirect_sort_test.cpp | 100 ++++++++++++++++++++++ 5 files changed, 260 insertions(+) create mode 100644 doc/indirect_sort.qbk create mode 100644 include/boost/algorithm/indirect_sort.hpp create mode 100644 test/indirect_sort_test.cpp diff --git a/doc/algorithm.qbk b/doc/algorithm.qbk index 02a1562..be2cd8d 100644 --- a/doc/algorithm.qbk +++ b/doc/algorithm.qbk @@ -233,6 +233,8 @@ Convert a sequence of hexadecimal characters into a sequence of integers or char Convert a sequence of integral types into a lower case hexadecimal sequence of characters [endsect:hex_lower] +[include indirect_sort.qbk] + [include is_palindrome.qbk] [include is_partitioned_until.qbk] diff --git a/doc/indirect_sort.qbk b/doc/indirect_sort.qbk new file mode 100644 index 0000000..ac4e5a2 --- /dev/null +++ b/doc/indirect_sort.qbk @@ -0,0 +1,71 @@ +[/ File indirect_sort.qbk] + +[section:indirect_sort indirect_sort ] + +[/license +Copyright (c) 2023 Marshall Clow + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +There are times that you want a sorted version of a sequence, but for some reason or another, you don't really want to sort them. Maybe the elements in the sequence are non-copyable (or non-movable), or the sequence is const, or they're just really expensive to move around. An example of this might be a sequence of records from a database. + +Nevertheless, you might want to sort them. That's where indirect sorting comes in. In a "normal" sort, the elements of the sequence to be sorted are shuffled in place. In indirect sorting, the elements are unchanged, but the sort algorithm returns to you a "permutation" of the elements that, when applied, will leave the elements in the sequence in a sorted order. + +Say you have a sequence `[first, last)` of 1000 items that are expensive to swap: +``` + std::sort(first, last); // ['O(N ln N)] comparisons and ['O(N ln N)] swaps (of the element type). +``` + +On the other hand, using indirect sorting: +``` + auto permutation = boost::algorithm::indirect_sort(first, last); // ['O(N lg N)] comparisons and ['O(N lg N)] swaps (of size_t). + boost::algorithm::apply_permutation(first, last, perm.begin(), perm.end()); // ['O(N)] swaps (of the element type) +``` + +If the element type is sufficiently expensive to swap, then 10,000 swaps of size_t + 1000 swaps of the element_type could be cheaper than 10,000 swaps of the element_type. + +Or maybe you don't need the elements to actually be sorted - you just want to traverse them in a sorted order: +``` + auto permutation = boost::algorithm::indirect_sort(first, last); + for (size_t idx: permutation) + std::cout << first[idx] << std::endl; +``` + + +More to come here .... + +[heading interface] + +The function `indirect_sort` a `vector` containing the permutation necessary to put the input sequence into a sorted order. One version uses `std::less` to do the comparisons; the other lets the caller pass predicate to do the comparisons. + +``` +template +std::vector indirect_sort (RAIterator first, RAIterator last); + +template +std::vector indirect_sort (RAIterator first, RAIterator last, BinaryPredicate pred); +``` + +[heading Examples] + +[heading Iterator Requirements] + +`indirect_sort` requires random-access iterators. + +[heading Complexity] + +Both of the variants of `indirect_sort` run in ['O(N lg N)] time; they are not more (or less) efficient than `std::sort`. There is an extra layer of indirection on each comparison, but all off the swaps are done on values of type `size_t` + +[heading Exception Safety] + +[heading Notes] + +[endsect] + +[/ File indirect_sort.qbk +Copyright 2023 Marshall Clow +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt). +] diff --git a/include/boost/algorithm/indirect_sort.hpp b/include/boost/algorithm/indirect_sort.hpp new file mode 100644 index 0000000..a551670 --- /dev/null +++ b/include/boost/algorithm/indirect_sort.hpp @@ -0,0 +1,83 @@ +/* + Copyright (c) Marshall Clow 2023. + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +*/ + +/// \file indirect_sort.hpp +/// \brief indirect sorting algorithms +/// \author Marshall Clow +/// + +#ifndef BOOST_ALGORITHM_IS_INDIRECT_SORT +#define BOOST_ALGORITHM_IS_INDIRECT_SORT + +#include // for std::sort (and others) +#include // for std::less +#include // for std:;vector + +#include + +namespace boost { namespace algorithm { + +namespace detail { + + template + struct indirect_predicate { + indirect_predicate (Predicate pred, Iter iter) + : pred_(pred), iter_(iter) {} + + bool operator ()(size_t a, size_t b) const { + return pred_(iter_[a], iter_[b]); + } + + Predicate pred_; + Iter iter_; + }; + +} + +typedef std::vector Permutation; + + // ===== sort ===== + +/// \fn indirect_sort (RAIterator first, RAIterator last, Predicate p) +/// \returns a permutation of the elements in the range [first, last) +/// such that when the permutation is applied to the sequence, +/// the result is sorted according to the predicate pred. +/// +/// \param first The start of the input sequence +/// \param last The end of the input sequence +/// \param pred The predicate to compare elements with +/// +template +std::vector indirect_sort (RAIterator first, RAIterator last, Pred pred) { + Permutation ret(std::distance(first, last)); + boost::algorithm::iota(ret.begin(), ret.end(), size_t(0)); + std::sort(ret.begin(), ret.end(), + detail::indirect_predicate(pred, first)); + return ret; +} + +/// \fn indirect_sort (RAIterator first, RAIterator las ) +/// \returns a permutation of the elements in the range [first, last) +/// such that when the permutation is applied to the sequence, +/// the result is sorted according to the predicate pred. +/// +/// \param first The start of the input sequence +/// \param last The end of the input sequence +/// +template +std::vector indirect_sort (RAIterator first, RAIterator last) { + return indirect_sort(first, last, + std::less::value_type>()); +} + + // ===== stable_sort ===== + // ===== partial_sort ===== + // ===== nth_element ===== +}} + +#endif // BOOST_ALGORITHM_IS_INDIRECT_SORT diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index aef6bdb..3390234 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -88,6 +88,10 @@ alias unit_test_framework # Apply_permutation tests [ run apply_permutation_test.cpp unit_test_framework : : : : apply_permutation_test ] + +# Indirect_sort tests + [ run indirect_sort_test.cpp unit_test_framework : : : : indirect_sort_test ] + # Find tests [ run find_not_test.cpp unit_test_framework : : : : find_not_test ] [ run find_backward_test.cpp unit_test_framework : : : : find_backward_test ] diff --git a/test/indirect_sort_test.cpp b/test/indirect_sort_test.cpp new file mode 100644 index 0000000..54bf196 --- /dev/null +++ b/test/indirect_sort_test.cpp @@ -0,0 +1,100 @@ +/* + Copyright (c) Marshall Clow 2011-2012. + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + For more information, see http://www.boost.org +*/ + +#include +#include +#include +#include + +#define BOOST_TEST_MAIN +#include + +#include +#include +#include +#include + +typedef std::vector Permutation; + +// A permutation of size N is a sequence of values in the range [0..N) +// such that no value appears more than once in the permutation. +bool isa_permutation(Permutation p, size_t N) { + if (p.size() != N) return false; + +// Sort the permutation, and ensure that each value appears exactly once. + std::sort(p.begin(), p.end()); + for (size_t i = 0; i < N; ++i) + if (p[i] != i) return false; + return true; +} + +template ::value_type> > +struct indirect_comp { + indirect_comp (Iter it, Comp c = Comp()) + : iter_(it), comp_(c) {} + + bool operator ()(size_t a, size_t b) const { return comp_(iter_[a], iter_[b]);} + + Iter iter_; + Comp comp_; +}; + +template +void test_one_sort(Iter first, Iter last) { + Permutation perm = boost::algorithm::indirect_sort(first, last); + BOOST_CHECK (isa_permutation(perm, std::distance(first, last))); + BOOST_CHECK (boost::algorithm::is_sorted(perm.begin(), perm.end(), indirect_comp(first))); + +// Make a copy of the data, apply the permutation, and ensure that it is sorted. + std::vector::value_type> v(first, last); + boost::algorithm::apply_permutation(v.begin(), v.end(), perm.begin(), perm.end()); + BOOST_CHECK (boost::algorithm::is_sorted(v.begin(), v.end())); +} + +template +void test_one_sort(Iter first, Iter last, Comp comp) { + Permutation perm = boost::algorithm::indirect_sort(first, last, comp); + BOOST_CHECK (isa_permutation(perm, std::distance(first, last))); + BOOST_CHECK (boost::algorithm::is_sorted(perm.begin(), perm.end(), + indirect_comp(first, comp))); + +// Make a copy of the data, apply the permutation, and ensure that it is sorted. + std::vector::value_type> v(first, last); + boost::algorithm::apply_permutation(v.begin(), v.end(), perm.begin(), perm.end()); + BOOST_CHECK (boost::algorithm::is_sorted(v.begin(), v.end(), comp)); +} + + +void test_sort () { + BOOST_CXX14_CONSTEXPR int num[] = { 1,3,5,7,9, 2, 4, 6, 8, 10 }; + const int sz = sizeof (num)/sizeof(num[0]); + int *first = &num[0]; + int const *cFirst = &num[0]; + +// Test subsets + for (size_t i = 0; i <= sz; ++i) { + test_one_sort(first, first + i); + test_one_sort(first, first + i, std::greater()); + + // test with constant inputs + test_one_sort(cFirst, cFirst + i); + test_one_sort(cFirst, cFirst + i, std::greater()); + } + +// make sure we work with iterators as well as pointers + std::vector v(first, first + sz); + test_one_sort(v.begin(), v.end()); + test_one_sort(v.begin(), v.end(), std::greater()); + } + +BOOST_AUTO_TEST_CASE( test_main ) +{ + test_sort (); +}