Full merge from trunk at revision 41356 of entire boost-root tree.

[SVN r41370]
This commit is contained in:
Beman Dawes
2007-11-25 18:38:02 +00:00
parent ed9cb87ac3
commit d57e8cfe9e
455 changed files with 64511 additions and 0 deletions

View File

@ -0,0 +1,20 @@
#==============================================================================
# Copyright (c) 2003-2006 Joel de Guzman
# Copyright (c) 2006 Dan Marsden
#
# Use, modification and distribution is subject to the Boost Software
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
project fusion-performance ;
exe accumulate : accumulate.cpp ;
exe inner_product : inner_product.cpp ;
exe inner_product2 : inner_product2.cpp ;
exe sequence_efficiency : sequence_efficiency.cpp ;
exe functional : functional.cpp ;

View File

@ -0,0 +1,357 @@
/*=============================================================================
Copyright (c) 2001-2006 Joel de Guzman
Copyright (c) 2005-2006 Dan Marsden
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#include <boost/array.hpp>
#include <boost/timer.hpp>
#include <boost/fusion/algorithm/iteration/accumulate.hpp>
#include <boost/fusion/algorithm/transformation/transform.hpp>
#include <boost/fusion/container/vector.hpp>
#include <boost/fusion/algorithm/transformation/zip.hpp>
#include <boost/fusion/sequence/intrinsic/at.hpp>
#include <boost/fusion/adapted/array.hpp>
#include <boost/type_traits/remove_reference.hpp>
#include <algorithm>
#include <numeric>
#include <functional>
#include <iostream>
#include <cmath>
#include <limits>
#ifdef _MSC_VER
// inline aggressively
# pragma inline_recursion(on) // turn on inline recursion
# pragma inline_depth(255) // max inline depth
#endif
int const REPEAT_COUNT = 10;
double const duration = 0.5;
namespace
{
template<int N>
double time_for_std_accumulate(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr;
std::generate(arr.begin(), arr.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = std::accumulate(arr.begin(), arr.end(), 0);
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = std::accumulate(arr.begin(), arr.end(), 0);
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
std::cout << i << std::endl;
return result / iter;
}
struct poly_add
{
template<typename Sig>
struct result;
template<typename Lhs, typename Rhs>
struct result<poly_add(Lhs,Rhs)>
: boost::remove_reference<Lhs>
{};
template<typename Lhs, typename Rhs>
Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
{
return lhs + rhs;
}
};
struct poly_mult
{
template<typename Sig>
struct result;
template<typename Lhs, typename Rhs>
struct result<poly_mult(Lhs, Rhs)>
: boost::remove_reference<Lhs>
{};
template<typename Lhs, typename Rhs>
Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
{
return lhs * rhs;
}
};
template<int N>
double time_for_fusion_accumulate(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr;
std::generate(arr.begin(), arr.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(arr, 0, poly_add());
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
std::cout << iter << " iterations" << std::endl;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(arr, 0, poly_add());
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
std::cout << ".";
std::cout.flush();
}
std::cout << i << std::endl;
return result / iter;
}
#if 0
template<int N>
double time_for_std_inner_product(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr1;
boost::array<int, N> arr2;
std::generate(arr1.begin(), arr1.end(), rand);
std::generate(arr2.begin(), arr2.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0);
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0);
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
std::cout << i << std::endl;
return result / iter;
}
template<int N>
double time_for_fusion_inner_product(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr1;
boost::array<int, N> arr2;
std::generate(arr1.begin(), arr1.end(), rand);
std::generate(arr2.begin(), arr2.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(
boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add());
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(
boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add());
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
std::cout << i << std::endl;
return result / iter;
}
struct poly_combine
{
template<typename Lhs, typename Rhs>
struct result
{
typedef Rhs type;
};
template<typename Lhs, typename Rhs>
typename result<Lhs,Rhs>::type
operator()(const Lhs& lhs, const Rhs& rhs) const
{
return rhs + boost::fusion::at_c<0>(lhs) * boost::fusion::at_c<1>(lhs);
}
};
template<int N>
double time_for_fusion_inner_product2(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr1;
boost::array<int, N> arr2;
std::generate(arr1.begin(), arr1.end(), rand);
std::generate(arr2.begin(), arr2.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(
boost::fusion::zip(arr1, arr2), 0, poly_combine());
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
std::cout << iter << " iterations" << std::endl;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(
boost::fusion::zip(arr1, arr2), 0, poly_combine());
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
std::cout << i << std::endl;
return result / iter;
}
#endif
}
int main()
{
int total = 0;
int res;
std::cout << "short accumulate std test " << time_for_std_accumulate<8>(res) << std::endl;
total += res;
std::cout << "short accumulate fusion test " << time_for_fusion_accumulate<8>(res) << std::endl;
total += res;
std::cout << "medium accumulate std test " << time_for_std_accumulate<64>(res) << std::endl;
total += res;
std::cout << "medium accumulate fusion test " << time_for_fusion_accumulate<64>(res) << std::endl;
total += res;
std::cout << "long accumulate std test " << time_for_std_accumulate<128>(res) << std::endl;
total += res;
std::cout << "long accumulate fusion test " << time_for_fusion_accumulate<128>(res) << std::endl;
total += res;
#if 0
std::cout << "short inner_product std test " << time_for_std_inner_product<8>(res) << std::endl;
total += res;
std::cout << "short inner_product fusion test " << time_for_fusion_inner_product<8>(res) << std::endl;
total += res;
std::cout << "short inner_product fusion 2 test " << time_for_fusion_inner_product2<8>(res) << std::endl;
total += res;
std::cout << "medium inner_product std test " << time_for_std_inner_product<64>(res) << std::endl;
total += res;
std::cout << "medium inner_product fusion test " << time_for_fusion_inner_product<64>(res) << std::endl;
total += res;
std::cout << "medium inner_product fusion 2 test " << time_for_fusion_inner_product2<64>(res) << std::endl;
total += res;
std::cout << "long inner_product std test " << time_for_std_inner_product<128>(res) << std::endl;
total += res;
std::cout << "long inner_product fusion test " << time_for_fusion_inner_product<128>(res) << std::endl;
total += res;
std::cout << "long inner_product fusion 2 test " << time_for_fusion_inner_product2<128>(res) << std::endl;
total += res;
#endif
return total;
}

View File

@ -0,0 +1,305 @@
/*=============================================================================
Copyright (c) 2001-2006 Joel de Guzman
Copyright (c) 2006-2007 Tobias Schwinger
Use modification and distribution are subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt).
==============================================================================*/
#include <boost/fusion/container/list.hpp>
#include <boost/fusion/container/vector.hpp>
#include <boost/fusion/algorithm/iteration/fold.hpp>
#include <boost/fusion/functional/adapter/unfused_generic.hpp>
#include <boost/fusion/functional/adapter/unfused_rvalue_args.hpp>
#include <boost/fusion/functional/adapter/fused_function_object.hpp>
#include <boost/utility/result_of.hpp>
#include <boost/config.hpp>
#include <boost/timer.hpp>
#include <algorithm>
#include <iostream>
#ifdef _MSC_VER
// inline aggressively
# pragma inline_recursion(on) // turn on inline recursion
# pragma inline_depth(255) // max inline depth
#endif
int const REPEAT_COUNT = 3;
double const duration = 0.125;
namespace
{
struct fused_sum
{
template <typename Seq>
int operator()(Seq const & seq) const
{
int state = 0;
return boost::fusion::fold(seq, state, sum_op());
}
typedef int result_type;
private:
struct sum_op
{
template <typename T>
int operator()(T const & elem, int value) const
{
return value + sizeof(T) * elem;
}
template <typename T>
int operator()(T & elem, int value) const
{
elem += sizeof(T);
return value;
}
typedef int result_type;
};
};
struct unfused_sum
{
inline int operator()() const
{
return 0;
}
template<typename T0>
inline int operator()(T0 const & a0) const
{
return a0;
}
template<typename T0, typename T1>
inline int operator()(T0 const & a0, T1 const & a1) const
{
return a0 + a1;
}
template<typename T0, typename T1, typename T2>
inline int operator()(T0 const & a0, T1 const & a1, T2 a2) const
{
return a0 + a1 + a2;
}
template<typename T0, typename T1, typename T2, typename T3>
inline int operator()(T0 const & a0, T1 const & a1, T2 const & a2, T3 const & a3) const
{
return a0 + a1 + a2 + a3;
}
typedef int result_type;
};
template<typename F>
double call_unfused(F const & func, int & j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i += func();
i += func(0);
i += func(0,1);
i += func(0,1,2);
i += func(0,1,2,3);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = func(); j += i;
i = func(0); j += i;
i = func(0,1); j += i;
i = func(0,1,2); j += i;
i = func(0,1,2,3); j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
return result / iter;
}
template<typename F>
double call_fused_ra(F const & func, int & j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
do
{
boost::fusion::vector<> v0;
boost::fusion::vector<int> v1(0);
boost::fusion::vector<int,int> v2(0,1);
boost::fusion::vector<int,int,int> v3(0,1,2);
boost::fusion::vector<int,int,int,int> v4(0,1,2,3);
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i += func(v0);
i += func(v1);
i += func(v2);
i += func(v3);
i += func(v4);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
boost::fusion::vector<> v0;
boost::fusion::vector<int> v1(0);
boost::fusion::vector<int,int> v2(0,1);
boost::fusion::vector<int,int,int> v3(0,1,2);
boost::fusion::vector<int,int,int,int> v4(0,1,2,3);
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = func(v0); j += i;
i = func(v1); j += i;
i = func(v2); j += i;
i = func(v3); j += i;
i = func(v4); j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
return result / iter;
}
template<typename F>
double call_fused(F const & func, int & j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
do
{
boost::fusion::list<> l0;
boost::fusion::list<int> l1(0);
boost::fusion::list<int,int> l2(0,1);
boost::fusion::list<int,int,int> l3(0,1,2);
boost::fusion::list<int,int,int,int> l4(0,1,2,3);
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i += func(l0);
i += func(l1);
i += func(l2);
i += func(l3);
i += func(l4);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
boost::fusion::list<> l0;
boost::fusion::list<int> l1(0);
boost::fusion::list<int,int> l2(0,1);
boost::fusion::list<int,int,int> l3(0,1,2);
boost::fusion::list<int,int,int,int> l4(0,1,2,3);
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = func(l0); j += i;
i = func(l1); j += i;
i = func(l2); j += i;
i = func(l3); j += i;
i = func(l4); j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
return result / iter;
}
}
int main()
{
int total = 0;
int res;
typedef fused_sum F;
typedef unfused_sum U;
std::cout << "Compiler: " << BOOST_COMPILER << std::endl;
std::cout << std::endl << "Unfused adapters:" << std::endl;
{
F f;
std::cout << "F /* a fused function object */ " << call_fused_ra(f,res) << std::endl;
total += res;
}
{
F f;
std::cout << "without random access " << call_fused(f,res) << std::endl;
total += res;
}
{
boost::fusion::unfused_rvalue_args<F> f;
std::cout << "unfused_rvalue_args<F> " << call_unfused(f,res) << std::endl;
total += res;
}
{
boost::fusion::unfused_generic<F> f;
std::cout << "unfused_generic<F> " << call_unfused(f,res) << std::endl;
total += res;
}
std::cout << std::endl << "Fused adapters:" << std::endl;
{
unfused_sum f;
std::cout << "U /* an unfused function object */ " << call_unfused(f,res) << std::endl;
total += res;
}
{
boost::fusion::fused_function_object<U> f;
std::cout << "fused_function_object<U> " << call_fused_ra(f,res) << std::endl;
total += res;
}
{
boost::fusion::fused_function_object<U> f;
std::cout << "without random access " << call_fused(f,res) << std::endl;
total += res;
}
{
boost::fusion::unfused_rvalue_args< boost::fusion::fused_function_object<U> > f;
std::cout << "unfused_rvalue_args<fused_function_object<U> > " << call_unfused(f,res) << std::endl;
total += res;
}
{
boost::fusion::unfused_generic< boost::fusion::fused_function_object<U> > f;
std::cout << "unfused_generic<fused_function_object<U> > " << call_unfused(f,res) << std::endl;
total += res;
}
return total;
}

View File

@ -0,0 +1,184 @@
/*=============================================================================
Copyright (c) 2001-2006 Joel de Guzman
Copyright (c) 2005-2006 Dan Marsden
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#include <boost/array.hpp>
#include <boost/timer.hpp>
#include <boost/fusion/algorithm/iteration/accumulate.hpp>
#include <boost/fusion/algorithm/transformation/transform.hpp>
#include <boost/fusion/container/vector.hpp>
#include <boost/fusion/algorithm/transformation/zip.hpp>
#include <boost/fusion/sequence/intrinsic/at.hpp>
#include <boost/fusion/adapted/array.hpp>
#include <boost/fusion/sequence/intrinsic/at.hpp>
#include <boost/type_traits/remove_reference.hpp>
#include <algorithm>
#include <numeric>
#include <functional>
#include <iostream>
#include <cmath>
#include <limits>
#ifdef _MSC_VER
// inline aggressively
# pragma inline_recursion(on) // turn on inline recursion
# pragma inline_depth(255) // max inline depth
#endif
int const REPEAT_COUNT = 10;
double const duration = 0.5;
namespace
{
struct poly_add
{
template<typename Sig>
struct result;
template<typename Lhs, typename Rhs>
struct result<poly_add(Lhs, Rhs)>
: boost::remove_reference<Lhs>
{};
template<typename Lhs, typename Rhs>
Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
{
return lhs + rhs;
}
};
struct poly_mult
{
template<typename Sig>
struct result;
template<typename Lhs, typename Rhs>
struct result<poly_mult(Lhs, Rhs)>
: boost::remove_reference<Lhs>
{};
template<typename Lhs, typename Rhs>
Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
{
return lhs * rhs;
}
};
template<int N>
double time_for_std_inner_product(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr1;
boost::array<int, N> arr2;
std::generate(arr1.begin(), arr1.end(), rand);
std::generate(arr2.begin(), arr2.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0);
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0);
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
std::cout << i << std::endl;
return result / iter;
}
template<int N>
double time_for_fusion_inner_product(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr1;
boost::array<int, N> arr2;
std::generate(arr1.begin(), arr1.end(), rand);
std::generate(arr2.begin(), arr2.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(
boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add());
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(
boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add());
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
std::cout << i << std::endl;
return result / iter;
}
}
int main()
{
int total = 0;
int res;
std::cout << "short inner_product std test " << time_for_std_inner_product<8>(res) << std::endl;
total += res;
std::cout << "short inner_product fusion test " << time_for_fusion_inner_product<8>(res) << std::endl;
total += res;
std::cout << "medium inner_product std test " << time_for_std_inner_product<64>(res) << std::endl;
total += res;
std::cout << "medium inner_product fusion test " << time_for_fusion_inner_product<64>(res) << std::endl;
total += res;
std::cout << "long inner_product std test " << time_for_std_inner_product<128>(res) << std::endl;
total += res;
std::cout << "long inner_product fusion test " << time_for_fusion_inner_product<128>(res) << std::endl;
total += res;
return total;
}

View File

@ -0,0 +1,206 @@
/*=============================================================================
Copyright (c) 2001-2006 Joel de Guzman
Copyright (c) 2005-2006 Dan Marsden
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#include <boost/array.hpp>
#include <boost/timer.hpp>
#include <boost/fusion/algorithm/iteration/accumulate.hpp>
#include <boost/fusion/algorithm/transformation/transform.hpp>
#include <boost/fusion/container/vector.hpp>
#include <boost/fusion/algorithm/transformation/zip.hpp>
#include <boost/fusion/sequence/intrinsic/at.hpp>
#include <boost/fusion/adapted/array.hpp>
#include <boost/fusion/sequence/intrinsic/at.hpp>
#include <boost/type_traits/remove_reference.hpp>
#include <algorithm>
#include <numeric>
#include <functional>
#include <iostream>
#include <cmath>
#include <limits>
#ifdef _MSC_VER
// inline aggressively
# pragma inline_recursion(on) // turn on inline recursion
# pragma inline_depth(255) // max inline depth
#endif
int const REPEAT_COUNT = 10;
double const duration = 0.5;
namespace
{
struct poly_add
{
template<typename Sig>
struct result;
template<typename Lhs, typename Rhs>
struct result<poly_add(Lhs, Rhs)>
: boost::remove_reference<Lhs>
{};
template<typename Lhs, typename Rhs>
Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
{
return lhs + rhs;
}
};
struct poly_mult
{
template<typename Sig>
struct result;
template<typename Lhs, typename Rhs>
struct result<poly_mult(Lhs, Rhs)>
: boost::remove_reference<Lhs>
{};
template<typename Lhs, typename Rhs>
Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
{
return lhs * rhs;
}
};
template<int N>
double time_for_std_inner_product(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr1;
boost::array<int, N> arr2;
std::generate(arr1.begin(), arr1.end(), rand);
std::generate(arr2.begin(), arr2.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0);
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0);
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
std::cout << i << std::endl;
return result / iter;
}
struct poly_combine
{
template<typename Sig>
struct result;
template<typename Lhs, typename Rhs>
struct result<poly_combine(Lhs, Rhs)>
: boost::remove_reference<Rhs>
{};
template<typename Lhs, typename Rhs>
typename result<poly_combine(Lhs,Rhs)>::type
operator()(const Lhs& lhs, const Rhs& rhs) const
{
return rhs + boost::fusion::at_c<0>(lhs) * boost::fusion::at_c<1>(lhs);
}
};
template<int N>
double time_for_fusion_inner_product2(int& j)
{
boost::timer tim;
int i = 0;
long long iter = 65536;
long long counter, repeats;
double result = (std::numeric_limits<double>::max)();
double runtime = 0;
double run;
boost::array<int, N> arr1;
boost::array<int, N> arr2;
std::generate(arr1.begin(), arr1.end(), rand);
std::generate(arr2.begin(), arr2.end(), rand);
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(
boost::fusion::zip(arr1, arr2), 0, poly_combine());
static_cast<void>(i);
}
runtime = tim.elapsed();
iter *= 2;
} while(runtime < duration);
iter /= 2;
std::cout << iter << " iterations" << std::endl;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
i = boost::fusion::accumulate(
boost::fusion::zip(arr1, arr2), 0, poly_combine());
j += i;
}
run = tim.elapsed();
result = (std::min)(run, result);
}
std::cout << i << std::endl;
return result / iter;
}
}
int main()
{
int total = 0;
int res;
std::cout << "short inner_product std test " << time_for_std_inner_product<8>(res) << std::endl;
total += res;
std::cout << "short inner_product fusion 2 test " << time_for_fusion_inner_product2<8>(res) << std::endl;
total += res;
std::cout << "medium inner_product std test " << time_for_std_inner_product<64>(res) << std::endl;
total += res;
std::cout << "medium inner_product fusion 2 test " << time_for_fusion_inner_product2<64>(res) << std::endl;
total += res;
#if 0 // Leads to ICE with MSVC 8.0
std::cout << "long inner_product std test " << time_for_std_inner_product<128>(res) << std::endl;
total += res;
std::cout << "long inner_product fusion 2 test " << time_for_fusion_inner_product2<128>(res) << std::endl;
total += res;
#endif
return total;
}

View File

@ -0,0 +1,85 @@
// Copyright David Abrahams, Matthias Troyer, Michael Gauckler
// 2005. Distributed under the Boost Software License, Version
// 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#if !defined(LIVE_CODE_TYPE)
# define LIVE_CODE_TYPE int
#endif
#include <boost/timer.hpp>
namespace test
{
// This value is required to ensure that a smart compiler's dead
// code elimination doesn't optimize away anything we're testing.
// We'll use it to compute the return code of the executable to make
// sure it's needed.
LIVE_CODE_TYPE live_code;
// Call objects of the given Accumulator type repeatedly with x as
// an argument.
template <class Accumulator, class Arg>
void hammer(Arg const& x, long const repeats)
{
// Strategy: because the sum in an accumulator after each call
// depends on the previous value of the sum, the CPU's pipeline
// might be stalled while waiting for the previous addition to
// complete. Therefore, we allocate an array of accumulators,
// and update them in sequence, so that there's no dependency
// between adjacent addition operations.
//
// Additionally, if there were only one accumulator, the
// compiler or CPU might decide to update the value in a
// register rather that writing it back to memory. we want each
// operation to at least update the L1 cache. *** Note: This
// concern is specific to the particular application at which
// we're targeting the test. ***
// This has to be at least as large as the number of
// simultaneous accumulations that can be executing in the
// compiler pipeline. A safe number here is larger than the
// machine's maximum pipeline depth. If you want to test the L2
// or L3 cache, or main memory, you can increase the size of
// this array. 1024 is an upper limit on the pipeline depth of
// current vector machines.
const std::size_t number_of_accumulators = 1024;
live_code = 0; // reset to zero
Accumulator a[number_of_accumulators];
for (long iteration = 0; iteration < repeats; ++iteration)
{
for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
{
(*ap)(x);
}
}
// Accumulate all the partial sums to avoid dead code
// elimination.
for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
{
live_code += ap->sum;
}
}
// Measure the time required to hammer accumulators of the given
// type with the argument x.
template <class Accumulator, class T>
double measure(T const& x, long const repeats)
{
// Hammer accumulators a couple of times to ensure the
// instruction cache is full of our test code, and that we don't
// measure the cost of a page fault for accessing the data page
// containing the memory where the accumulators will be
// allocated
hammer<Accumulator>(x, repeats);
hammer<Accumulator>(x, repeats);
// Now start a timer
boost::timer time;
hammer<Accumulator>(x, repeats); // This time, we'll measure
return time.elapsed() / repeats; // return the time of one iteration
}
}

View File

@ -0,0 +1,248 @@
/*=============================================================================
Copyright (c) 2001-2006 Joel de Guzman
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#include "measure.hpp"
#define FUSION_MAX_LIST_SIZE 30
#define FUSION_MAX_VECTOR_SIZE 30
#include <boost/fusion/algorithm/iteration/accumulate.hpp>
#include <boost/fusion/container/vector.hpp>
#include <boost/fusion/container/list.hpp>
#include <boost/type_traits/remove_reference.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/preprocessor/stringize.hpp>
#include <boost/preprocessor/enum.hpp>
#include <iostream>
#ifdef _MSC_VER
// inline aggressively
# pragma inline_recursion(on) // turn on inline recursion
# pragma inline_depth(255) // max inline depth
#endif
// About the tests:
//
// The tests below compare various fusion sequences to see how abstraction
// affects prformance.
//
// We have 3 sequence sizes for each fusion sequence we're going to test.
//
// small = 3 elements
// medium = 10 elements
// big = 30 elements
//
// The sequences are initialized with values 0..N-1 from numeric strings
// parsed by boost::lexical_cast to make sure that the compiler is not
// optimizing by replacing the computation with constant results computed
// at compile time.
//
// These sequences will be subjected to our accumulator which calls
// fusion::accumulate:
//
// this->sum += boost::fusion::accumulate(seq, 0, poly_add());
//
// where poly_add simply sums the current value with the content of
// the sequence element. This accumulator will be called many times
// through the "hammer" test (see measure.hpp).
//
// The tests are compared against a base using a plain_accumulator
// which does a simple addition:
//
// this->sum += x;
namespace
{
struct poly_add
{
template<typename Sig>
struct result;
template<typename Lhs, typename Rhs>
struct result<poly_add(Lhs, Rhs)>
: boost::remove_reference<Lhs>
{};
template<typename Lhs, typename Rhs>
Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
{
return lhs + rhs;
}
};
// Our Accumulator function
template <typename T>
struct accumulator
{
accumulator()
: sum()
{}
template <typename Sequence>
void operator()(Sequence const& seq)
{
this->sum += boost::fusion::accumulate(seq, 0, poly_add());
}
T sum;
};
// Plain Accumulator function
template <typename T>
struct plain_accumulator
{
plain_accumulator()
: sum()
{}
template <typename X>
void operator()(X const& x)
{
this->sum += x;
}
T sum;
};
template <typename T>
void check(T const& seq, char const* info)
{
test::measure<accumulator<int> >(seq, 1);
std::cout << info << test::live_code << std::endl;
}
template <typename T>
void measure(T const& seq, char const* info, long const repeats, double base)
{
double t = test::measure<accumulator<int> >(seq, repeats);
std::cout
<< info
<< t
<< " (" << int((t/base)*100) << "%)"
<< std::endl;
}
template <typename T>
void test_assembler(T const& seq)
{
test::live_code = boost::fusion::accumulate(seq, 0, poly_add());
}
}
// We'll initialize the sequences from numeric strings that
// pass through boost::lexical_cast to make sure that the
// compiler is not optimizing by replacing the computation
// with constant results computed at compile time.
#define INIT(z, n, text) boost::lexical_cast<int>(BOOST_PP_STRINGIZE(n))
int main()
{
using namespace boost::fusion;
std::cout.setf(std::ios::scientific);
vector<
int, int, int
>
vsmall(BOOST_PP_ENUM(3, INIT, _));
list<
int, int, int
>
lsmall(BOOST_PP_ENUM(3, INIT, _));
vector<
int, int, int, int, int, int, int, int, int, int
>
vmedium(BOOST_PP_ENUM(10, INIT, _));
list<
int, int, int, int, int, int, int, int, int, int
>
lmedium(BOOST_PP_ENUM(10, INIT, _));
vector<
int, int, int, int, int, int, int, int, int, int
, int, int, int, int, int, int, int, int, int, int
, int, int, int, int, int, int, int, int, int, int
>
vbig(BOOST_PP_ENUM(30, INIT, _));
list<
int, int, int, int, int, int, int, int, int, int
, int, int, int, int, int, int, int, int, int, int
, int, int, int, int, int, int, int, int, int, int
>
lbig(BOOST_PP_ENUM(30, INIT, _));
// first decide how many repetitions to measure
long repeats = 100;
double measured = 0;
while (measured < 2.0 && repeats <= 10000000)
{
repeats *= 10;
boost::timer time;
test::hammer<plain_accumulator<int> >(0, repeats);
test::hammer<accumulator<int> >(vsmall, repeats);
test::hammer<accumulator<int> >(lsmall, repeats);
test::hammer<accumulator<int> >(vmedium, repeats);
test::hammer<accumulator<int> >(lmedium, repeats);
test::hammer<accumulator<int> >(vbig, repeats);
test::hammer<accumulator<int> >(lbig, repeats);
measured = time.elapsed();
}
test::measure<plain_accumulator<int> >(1, 1);
std::cout
<< "base accumulated result: "
<< test::live_code
<< std::endl;
double base_time = test::measure<plain_accumulator<int> >(1, repeats);
std::cout
<< "base time: "
<< base_time;
std::cout
<< std::endl
<< "-------------------------------------------------------------------"
<< std::endl;
check(vsmall, "small vector accumulated result: ");
check(lsmall, "small list accumulated result: ");
check(vmedium, "medium vector accumulated result: ");
check(lmedium, "medium list accumulated result: ");
check(vbig, "big vector accumulated result: ");
check(lbig, "big list accumulated result: ");
std::cout
<< "-------------------------------------------------------------------"
<< std::endl;
measure(vsmall, "small vector time: ", repeats, base_time);
measure(lsmall, "small list time: ", repeats, base_time);
measure(vmedium, "medium vector time: ", repeats, base_time);
measure(lmedium, "medium list time: ", repeats, base_time);
measure(vbig, "big vector time: ", repeats, base_time);
measure(lbig, "big list time: ", repeats, base_time);
std::cout
<< "-------------------------------------------------------------------"
<< std::endl;
// Let's see how this looks in assembler
test_assembler(vmedium);
// This is ultimately responsible for preventing all the test code
// from being optimized away. Change this to return 0 and you
// unplug the whole test's life support system.
return test::live_code != 0;
}

View File

@ -0,0 +1,57 @@
===============================================================================
Copyright (C) 2001-2007 Joel de Guzman, Dan Marsden, Tobias Schwinger
Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
===============================================================================
Timing result for sequence_efficiency.cpp comparing the speed of various
fusion sequences. The test involves accumulating the elements of the
sequence which is primed to have values 0..N (N=size of sequence). Small,
medium and big sequences are tested where:
small = 3 elements
medium = 10 elements
big = 30 elements
Tester: Joel de Guzman. WinXP, P4-3.0GHZ, 2GB RAM
VC7.1 (flags = /MD /O2 /EHsc /GS)
small vector time: 1.870000e-006
small list time: 1.870000e-006
medium vector time: 1.880000e-006
medium list time: 3.600000e-006
big vector time: 2.030000e-006
big list time: 8.910000e-006
VC8.0 (flags = /MD /O2 /EHsc /GS)
small vector time: 2.500000e-05
small list time: 2.500000e-05
medium vector time: 7.810000e-05
medium list time: 7.810000e-05
big vector time: 2.469000e-04
big list time: 2.453000e-04
G++ 3.4 (flags = -ftemplate-depth-128 -funroll-loops -O3 -finline-functions -Wno-inline -Wall)
small vector time: 2.500000e-05
small list time: 2.500000e-05
medium vector time: 7.970000e-05
medium list time: 7.970000e-05
big vector time: 2.516000e-04
big list time: 2.485000e-04
Intel 9.1 (flags = /MD /O2 /EHsc /GS)
small vector time: 1.125000e-006
small list time: 1.125000e-006
medium vector time: 1.125000e-006
medium list time: 1.141000e-006
big vector time: 1.140000e-006
big list time: 1.141000e-006

View File

@ -0,0 +1,155 @@
/*=============================================================================
Copyright (c) 2001-2006 Joel de Guzman
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#include "measure.hpp"
//~ #define FUSION_MAX_VECTOR_SIZE 30
#include <boost/fusion/algorithm/iteration/accumulate.hpp>
#include <boost/fusion/algorithm/transformation/zip.hpp>
#include <boost/fusion/container/vector.hpp>
#include <boost/fusion/sequence/intrinsic/value_at.hpp>
#include <boost/fusion/sequence/intrinsic/at.hpp>
#include <boost/type_traits/remove_reference.hpp>
#include <iostream>
#ifdef _MSC_VER
// inline aggressively
# pragma inline_recursion(on) // turn on inline recursion
# pragma inline_depth(255) // max inline depth
#endif
namespace
{
struct zip_add
{
template<typename Lhs, typename Rhs>
struct result
{
typedef typename
boost::remove_reference<
typename boost::fusion::result_of::value_at_c<Lhs, 0>::type
>::type
type;
};
template<typename Lhs, typename Rhs>
typename result<Lhs, Rhs>::type
operator()(const Lhs& lhs, const Rhs& rhs) const
{
return boost::fusion::at_c<0>(lhs) + boost::fusion::at_c<1>(lhs) + rhs;
}
};
// Our Accumulator function
template <typename T>
struct zip_accumulator
{
zip_accumulator()
: sum()
{}
template <typename Sequence>
void operator()(Sequence const& seq)
{
this->sum += boost::fusion::accumulate(seq, 0, zip_add());
}
T sum;
};
template <typename T>
void check(T const& seq, char const* info)
{
test::measure<zip_accumulator<int> >(seq, 1);
std::cout << info << test::live_code << std::endl;
}
template <typename T>
void measure(T const& seq, char const* info, long const repeats)
{
std::cout
<< info
<< test::measure<zip_accumulator<int> >(seq, repeats)
<< std::endl;
}
}
int main()
{
using namespace boost::fusion;
std::cout.setf(std::ios::scientific);
vector<
int, int, int
>
vsmall_1(BOOST_PP_ENUM_PARAMS(3,));
vector<
int, int, int
>
vsmall_2(BOOST_PP_ENUM_PARAMS(3,));
vector<
int, int, int, int, int, int, int, int, int, int
>
vmedium_1(BOOST_PP_ENUM_PARAMS(10,));
vector<
int, int, int, int, int, int, int, int, int, int
>
vmedium_2(BOOST_PP_ENUM_PARAMS(10,));
//~ vector<
//~ int, int, int, int, int, int, int, int, int, int
//~ , int, int, int, int, int, int, int, int, int, int
//~ , int, int, int, int, int, int, int, int, int, int
//~ >
//~ vbig_1(BOOST_PP_ENUM_PARAMS(30,));
//~ vector<
//~ int, int, int, int, int, int, int, int, int, int
//~ , int, int, int, int, int, int, int, int, int, int
//~ , int, int, int, int, int, int, int, int, int, int
//~ >
//~ vbig_2(BOOST_PP_ENUM_PARAMS(30,));
// first decide how many repetitions to measure
long repeats = 100;
double measured = 0;
while (measured < 2.0 && repeats <= 10000000)
{
repeats *= 10;
boost::timer time;
test::hammer<zip_accumulator<int> >(zip(vsmall_1, vsmall_2), repeats);
test::hammer<zip_accumulator<int> >(zip(vmedium_1, vmedium_2), repeats);
//~ test::hammer<zip_accumulator<int> >(zip(vbig_1, vbig_2), repeats);
measured = time.elapsed();
}
check(zip(vsmall_1, vsmall_2),
"small zip accumulated result: ");
check(zip(vmedium_1, vmedium_2),
"medium zip accumulated result: ");
//~ check(zip(vbig_1, vbig_2),
//~ "big zip accumulated result: ");
measure(zip(vsmall_1, vsmall_2),
"small zip time: ", repeats);
measure(zip(vmedium_1, vmedium_2),
"medium zip time: ", repeats);
//~ measure(zip(vbig_1, vbig_2),
//~ "big zip time: ", repeats);
// This is ultimately responsible for preventing all the test code
// from being optimized away. Change this to return 0 and you
// unplug the whole test's life support system.
return test::live_code != 0;
}