From a55a44c67be0e54667d0de446931581542eaecd6 Mon Sep 17 00:00:00 2001 From: Beman Date: Tue, 28 May 2013 07:37:52 -0400 Subject: [PATCH] Loop test time work in progress. --- build/Jamfile.v2 | 4 +- doc/index.html | 29 +++- test/loop_time_test.cpp | 153 +++++++++++------- .../loop_time_test/loop_time_test.vcxproj | 2 +- 4 files changed, 121 insertions(+), 67 deletions(-) diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index 4bf7726..9fc614c 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -12,9 +12,11 @@ project SOURCES = speed_test speed_test_functions ; exe "speed_test" - : $(SOURCES).cpp ../../timer/build//boost_timer + : $(SOURCES).cpp ../../timer/build//boost_timer + : gcc:-march=native ; exe "loop_time_test" : loop_time_test.cpp ../../timer/build//boost_timer + : gcc:-march=native ; diff --git a/doc/index.html b/doc/index.html index 00eb839..3f5ce30 100644 --- a/doc/index.html +++ b/doc/index.html @@ -224,15 +224,16 @@ big_endian(x); -

There will be no performance difference between the two approaches. Optimizing compilers will likely +

There will be no performance difference between the two approaches, +regardless of the native endianness of the machine. Optimizing compilers will likely generate exactly the same code for both.

Now consider a slightly different problem: 

- + @@ -246,7 +247,7 @@ big_int32_t x; ... read into x from a file ... for (int32_t i = 0; i < 1000000; ++i) - x += f(i); + x += i; ... write x to a file ... @@ -260,7 +261,7 @@ int32_t x; big_endian(x); for (int32_t i = 0; i < 1000000; ++i) - x += f(i); + x += i; big_endian(x); @@ -270,10 +271,22 @@ big_endian(x);
Add a million values to a big endian value in a file, then write the - result to a file Add a million values to a big endian value in a file, then write the + result to a file
Endian type approach
-

There will be no performance difference. Optimizing compilers will likely -generate exactly the same code for both approaches. 

+

There may or may not be a considerable performance difference, depending +on the endianness of the machine. If machine endianness differs from the +desired endianness, the Endian type approach must do the byte reversal a million +times while the Endian conversion approach only does the reversal once. But if +the endianness is the same, there is no conversion with either approach and no +conversion code is generated for typical release builds.

-

Timing tests

+

If compiler byte swap intrinsics are not available, any timing differences +will be magnified. Byte swap intrinsics are not available on some older +compilers and on some machine architectures, such as pre-486 X86 CPUs.

+ +

Unaligned types are much slower that aligned types, regardless of +endianness considerations. Instead of single instruction register loads and +stores, multiple instructions are required.

+ +

Timing tests

These tests were run against release builds on a circa 2012 4-core little endian X64 Intel Core i5-3570K CPU @ 3.40GHz under Windows 7.

diff --git a/test/loop_time_test.cpp b/test/loop_time_test.cpp index a38db3f..7a3e32c 100644 --- a/test/loop_time_test.cpp +++ b/test/loop_time_test.cpp @@ -9,7 +9,7 @@ #define _SCL_SECURE_NO_WARNINGS -//#define BOOST_ENDIAN_NO_INTRINSICS +#define BOOST_ENDIAN_NO_INTRINSICS //#define BOOST_ENDIAN_LOG #include @@ -84,8 +84,9 @@ namespace template void time() { + T total = 0; { - cout << "*************Endian integer approach...\n"; + // cout << "*************Endian integer approach...\n"; EndianT x(0); boost::timer::cpu_timer t; for (uint64_t i = 0; i < n; ++i) @@ -93,11 +94,11 @@ namespace x += static_cast(i); } t.stop(); - cout << "x: " << x << endl; + total += x; cout << "" << t.format(places, "%t") << " s"; } { - cout << "***************Endian conversion approach...\n"; +// cout << "***************Endian conversion approach...\n"; T x(0); boost::timer::cpu_timer t; big_endian(x); @@ -108,31 +109,41 @@ namespace big_endian(x); t.stop(); big_endian(x); - cout << "x: " << x << endl; + if (x != total) + throw std::logic_error("integer approach total != conversion approach total"); cout << "" << t.format(places, "%t") << " s"; } } - //void test_big_int16() - //{ - // cout << "16-bit aligned big endian"; - // time(user::return_x_big_int16); - // time(user::return_x_value_big_int16); - // time(user::return_x_in_place_big_int16); - // time(user::return_x_big_int16); - // cout << "\n"; - //} + + void test_big_int16() + { + cout << "16-bit aligned big endian"; + time(); + cout << "\n"; + } - //void test_little_int16() - //{ - // cout << "16-bit aligned little endian"; - // time(user::return_x_little_int16); - // time(user::return_x_value_little_int16); - // time(user::return_x_in_place_little_int16); - // time(user::return_x_little_int16); - // cout << "\n"; - //} + void test_little_int16() + { + cout << "16-bit aligned little endian"; + time(); + cout << "\n"; + } + void test_big_int16un() + { + cout << "16-bit unaligned big endian"; + time(); + cout << "\n"; + } + + void test_little_int16un() + { + cout << "16-bit unaligned little endian"; + time(); + cout << "\n"; + } + void test_big_int32() { cout << "32-bit aligned big endian"; @@ -140,32 +151,54 @@ namespace cout << "\n"; } - //void test_little_int32() - //{ - // cout << "32-bit aligned little endian"; - // time(); - // cout << "\n"; - //} + void test_little_int32() + { + cout << "32-bit aligned little endian"; + time(); + cout << "\n"; + } - //void test_big_int64() - //{ - // cout << "64-bit aligned big endian"; - // time(user::return_x_big_int64); - // time(user::return_x_value_big_int64); - // time(user::return_x_in_place_big_int64); - // time(user::return_x_big_int64); - // cout << "\n"; - //} + void test_big_int32un() + { + cout << "32-bit unaligned big endian"; + time(); + cout << "\n"; + } - //void test_little_int64() - //{ - // cout << "64-bit aligned little endian"; - // time(user::return_x_little_int64); - // time(user::return_x_value_little_int64); - // time(user::return_x_in_place_little_int64); - // time(user::return_x_little_int64); - // cout << "\n"; - //} + void test_little_int32un() + { + cout << "32-bit unaligned little endian"; + time(); + cout << "\n"; + } + + void test_big_int64() + { + cout << "64-bit aligned big endian"; + time(); + cout << "\n"; + } + + void test_little_int64() + { + cout << "64-bit aligned little endian"; + time(); + cout << "\n"; + } + + void test_big_int64un() + { + cout << "64-bit unaligned big endian"; + time(); + cout << "\n"; + } + + void test_little_int64un() + { + cout << "64-bit unaligned little endian"; + time(); + cout << "\n"; + } } // unnamed namespace @@ -176,7 +209,7 @@ int cpp_main(int argc, char* argv[]) process_command_line(argc, argv); cout - << "\n\nEndian Speed Test\n\n\n" + << "\n\nEndian Loop Time Test\n\n\n" << "\n" << "\n" << "\n" - "\n" - "\n" - "\n" - "\n" + "\n" + "\n" "\n" ; - //test_big_int16(); - //test_little_int16(); + test_big_int16(); + test_little_int16(); + test_big_int16un(); + test_little_int16un(); + test_big_int32(); - //test_little_int32(); - //test_big_int64(); - //test_little_int64(); + test_little_int32(); + test_big_int32un(); + test_little_int32un(); + + test_big_int64(); + test_little_int64(); + test_big_int64un(); + test_little_int64un(); cout << "\n
" @@ -186,19 +219,25 @@ int cpp_main(int argc, char* argv[]) << ", Intrinsics: " BOOST_ENDIAN_INTRINSIC_MSG << "
Test Caseint
arg
int
value(arg)
int
in place(arg)
Endian
arg
Endian
type
Endian
conversion
function
\n\n\n"; diff --git a/test/msvc2012/loop_time_test/loop_time_test.vcxproj b/test/msvc2012/loop_time_test/loop_time_test.vcxproj index 32047e6..b417ca6 100644 --- a/test/msvc2012/loop_time_test/loop_time_test.vcxproj +++ b/test/msvc2012/loop_time_test/loop_time_test.vcxproj @@ -60,7 +60,7 @@ true - "$(TargetDir)\$(TargetName).exe" 1000 + "$(TargetDir)\$(TargetName).exe" 1