forked from boostorg/fusion
sequence performance tests
[SVN r36086]
This commit is contained in:
@ -13,3 +13,6 @@ exe accumulate : accumulate.cpp ;
|
||||
exe inner_product : inner_product.cpp ;
|
||||
|
||||
exe inner_product2 : inner_product2.cpp ;
|
||||
|
||||
exe sequence_efficiency : sequence_efficiency.cpp ;
|
||||
|
||||
|
81
example/performance/measure.hpp
Normal file
81
example/performance/measure.hpp
Normal file
@ -0,0 +1,81 @@
|
||||
// Copyright David Abrahams, Matthias Troyer, Michael Gauckler
|
||||
// 2005. Distributed under the Boost Software License, Version
|
||||
// 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <boost/timer.hpp>
|
||||
|
||||
namespace test
|
||||
{
|
||||
// This value is required to ensure that a smart compiler's dead
|
||||
// code elimination doesn't optimize away anything we're testing.
|
||||
// We'll use it to compute the return code of the executable to make
|
||||
// sure it's needed.
|
||||
int live_code;
|
||||
|
||||
// Call objects of the given Accumulator type repeatedly with x as
|
||||
// an argument.
|
||||
template <class Accumulator, class Arg>
|
||||
void hammer(Arg const& x, long const repeats)
|
||||
{
|
||||
// Strategy: because the sum in an accumulator after each call
|
||||
// depends on the previous value of the sum, the CPU's pipeline
|
||||
// might be stalled while waiting for the previous addition to
|
||||
// complete. Therefore, we allocate an array of accumulators,
|
||||
// and update them in sequence, so that there's no dependency
|
||||
// between adjacent addition operations.
|
||||
//
|
||||
// Additionally, if there were only one accumulator, the
|
||||
// compiler or CPU might decide to update the value in a
|
||||
// register rather that writing it back to memory. we want each
|
||||
// operation to at least update the L1 cache. *** Note: This
|
||||
// concern is specific to the particular application at which
|
||||
// we're targeting the test. ***
|
||||
|
||||
// This has to be at least as large as the number of
|
||||
// simultaneous accumulations that can be executing in the
|
||||
// compiler pipeline. A safe number here is larger than the
|
||||
// machine's maximum pipeline depth. If you want to test the L2
|
||||
// or L3 cache, or main memory, you can increase the size of
|
||||
// this array. 1024 is an upper limit on the pipeline depth of
|
||||
// current vector machines.
|
||||
const std::size_t number_of_accumulators = 1024;
|
||||
live_code = 0; // reset to zero
|
||||
|
||||
Accumulator a[number_of_accumulators];
|
||||
|
||||
for (long iteration = 0; iteration < repeats; ++iteration)
|
||||
{
|
||||
for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
|
||||
{
|
||||
(*ap)(x);
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate all the partial sums to avoid dead code
|
||||
// elimination.
|
||||
for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
|
||||
{
|
||||
live_code += ap->sum;
|
||||
}
|
||||
}
|
||||
|
||||
// Measure the time required to hammer accumulators of the given
|
||||
// type with the argument x.
|
||||
template <class Accumulator, class T>
|
||||
double measure(T const& x, long const repeats)
|
||||
{
|
||||
// Hammer accumulators a couple of times to ensure the
|
||||
// instruction cache is full of our test code, and that we don't
|
||||
// measure the cost of a page fault for accessing the data page
|
||||
// containing the memory where the accumulators will be
|
||||
// allocated
|
||||
hammer<Accumulator>(x, repeats);
|
||||
hammer<Accumulator>(x, repeats);
|
||||
|
||||
// Now start a timer
|
||||
boost::timer time;
|
||||
hammer<Accumulator>(x, repeats); // This time, we'll measure
|
||||
return time.elapsed() / repeats; // return the time of one iteration
|
||||
}
|
||||
}
|
179
example/performance/sequence_efficiency.cpp
Normal file
179
example/performance/sequence_efficiency.cpp
Normal file
@ -0,0 +1,179 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2006 Joel de Guzman
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
==============================================================================*/
|
||||
#include "measure.hpp"
|
||||
|
||||
#define FUSION_MAX_LIST_SIZE 30
|
||||
#define FUSION_MAX_VECTOR_SIZE 30
|
||||
|
||||
#include <boost/fusion/algorithm/iteration/accumulate.hpp>
|
||||
#include <boost/fusion/sequence/container/vector.hpp>
|
||||
#include <boost/fusion/sequence/container/list.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// inline aggressively
|
||||
# pragma inline_recursion(on) // turn on inline recursion
|
||||
# pragma inline_depth(255) // max inline depth
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
struct poly_add
|
||||
{
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct result
|
||||
{
|
||||
typedef Lhs type;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
|
||||
{
|
||||
return lhs + rhs;
|
||||
}
|
||||
};
|
||||
|
||||
// Our Accumulator function
|
||||
template <typename T>
|
||||
struct accumulator
|
||||
{
|
||||
accumulator()
|
||||
: sum()
|
||||
{}
|
||||
|
||||
template <typename Sequence>
|
||||
void operator()(Sequence const& seq)
|
||||
{
|
||||
this->sum += boost::fusion::accumulate(seq, 0, poly_add());
|
||||
}
|
||||
|
||||
T sum;
|
||||
};
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
using namespace test;
|
||||
using namespace boost::fusion;
|
||||
|
||||
vector<
|
||||
int, int, int
|
||||
>
|
||||
vsmall(BOOST_PP_ENUM_PARAMS(3,));
|
||||
|
||||
list<
|
||||
int, int, int
|
||||
>
|
||||
lsmall(BOOST_PP_ENUM_PARAMS(3,));
|
||||
|
||||
vector<
|
||||
int, int, int, int, int, int, int, int, int, int
|
||||
>
|
||||
vmid(BOOST_PP_ENUM_PARAMS(10,));
|
||||
|
||||
list<
|
||||
int, int, int, int, int, int, int, int, int, int
|
||||
>
|
||||
lmid(BOOST_PP_ENUM_PARAMS(10,));
|
||||
|
||||
vector<
|
||||
int, int, int, int, int, int, int, int, int, int
|
||||
, int, int, int, int, int, int, int, int, int, int
|
||||
, int, int, int, int, int, int, int, int, int, int
|
||||
>
|
||||
vbig(BOOST_PP_ENUM_PARAMS(30,));
|
||||
|
||||
list<
|
||||
int, int, int, int, int, int, int, int, int, int
|
||||
, int, int, int, int, int, int, int, int, int, int
|
||||
, int, int, int, int, int, int, int, int, int, int
|
||||
>
|
||||
lbig(BOOST_PP_ENUM_PARAMS(30,));
|
||||
|
||||
// first decide how many repetitions to measure
|
||||
long repeats = 100;
|
||||
double measured = 0;
|
||||
while (measured < 1.0 && repeats <= 10000000)
|
||||
{
|
||||
repeats *= 10;
|
||||
|
||||
boost::timer time;
|
||||
|
||||
hammer<accumulator<int> >(vsmall, repeats);
|
||||
hammer<accumulator<int> >(lsmall, repeats);
|
||||
hammer<accumulator<int> >(vmid, repeats);
|
||||
hammer<accumulator<int> >(lmid, repeats);
|
||||
hammer<accumulator<int> >(vbig, repeats);
|
||||
hammer<accumulator<int> >(lbig, repeats);
|
||||
|
||||
measured = time.elapsed();
|
||||
}
|
||||
|
||||
measure<accumulator<int> >(vsmall, 1);
|
||||
std::cout
|
||||
<< "small vector accumulated result: "
|
||||
<< live_code << std::endl;
|
||||
measure<accumulator<int> >(lsmall, 1);
|
||||
std::cout
|
||||
<< "small list accumulated result: "
|
||||
<< live_code << std::endl;
|
||||
measure<accumulator<int> >(vmid, 1);
|
||||
std::cout
|
||||
<< "medium vector accumulated result: "
|
||||
<< live_code << std::endl;
|
||||
measure<accumulator<int> >(lmid, 1);
|
||||
std::cout
|
||||
<< "medium list accumulated result: "
|
||||
<< live_code << std::endl;
|
||||
measure<accumulator<int> >(vbig, 1);
|
||||
std::cout
|
||||
<< "big vector accumulated result: "
|
||||
<< live_code << std::endl;
|
||||
measure<accumulator<int> >(lbig, 1);
|
||||
std::cout
|
||||
<< "big list accumulated result: "
|
||||
<< live_code << std::endl;
|
||||
|
||||
std::cout.setf(std::ios::scientific);
|
||||
|
||||
std::cout
|
||||
<< "small vector time: "
|
||||
<< measure<accumulator<int> >(vsmall, repeats)
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< "small list time: "
|
||||
<< measure<accumulator<int> >(lsmall, repeats)
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< "medium vector time: "
|
||||
<< measure<accumulator<int> >(vmid, repeats)
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< "medium list time: "
|
||||
<< measure<accumulator<int> >(lmid, repeats)
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< "big vector time: "
|
||||
<< measure<accumulator<int> >(vbig, repeats)
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< "big list time: "
|
||||
<< measure<accumulator<int> >(lbig, repeats)
|
||||
<< std::endl;
|
||||
|
||||
// This is ultimately responsible for preventing all the test code
|
||||
// from being optimized away. Change this to return 0 and you
|
||||
// unplug the whole test's life support system.
|
||||
return live_code != 0;
|
||||
}
|
49
example/performance/timings.txt
Normal file
49
example/performance/timings.txt
Normal file
@ -0,0 +1,49 @@
|
||||
Timing result for sequence_efficiency.cpp comparing the speed of various
|
||||
fusion sequences. The test involves accumulating the elements of the
|
||||
sequence which is primed to have values 0..N (N=size of sequence). Small,
|
||||
medium and big sequences are tested where:
|
||||
|
||||
small = 3 elements
|
||||
medium = 10 elements
|
||||
big = 30 elements
|
||||
|
||||
Tester: Joel de Guzman. WinXP, P4-3.0GHZ, 2GB RAM
|
||||
|
||||
VC7.1 (flags = /MD /O2 /EHsc /GS)
|
||||
|
||||
small vector time: 1.880000e-006
|
||||
small list time: 2.040000e-006
|
||||
medium vector time: 2.030000e-006
|
||||
medium list time: 3.590000e-006
|
||||
big vector time: 1.880000e-006
|
||||
big list time: 9.070000e-006
|
||||
|
||||
VC8.0 (flags = /MD /O2 /EHsc /GS)
|
||||
|
||||
small vector time: 1.880000e-006
|
||||
small list time: 2.030000e-006
|
||||
medium vector time: 2.030000e-006
|
||||
medium list time: 3.750000e-006
|
||||
big vector time: 1.880000e-006
|
||||
big list time: 9.380000e-006
|
||||
|
||||
G++ 3.4 (flags = -ftemplate-depth-128 -funroll-loops -O3 -finline-functions -Wno-inline -Wall)
|
||||
|
||||
small vector time: 2.500000e-05
|
||||
small list time: 2.500000e-05
|
||||
medium vector time: 7.970000e-05
|
||||
medium list time: 7.970000e-05
|
||||
big vector time: 2.516000e-04
|
||||
big list time: 2.485000e-04
|
||||
|
||||
Intel 9.1 (flags = /MD /O2 /EHsc /GS)
|
||||
|
||||
small vector time: 1.141000e-006
|
||||
small list time: 1.156000e-006
|
||||
medium vector time: 1.156000e-006
|
||||
medium list time: 1.156000e-006
|
||||
big vector time: 1.171000e-006
|
||||
big list time: 1.156000e-006
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user