From a55a44c67be0e54667d0de446931581542eaecd6 Mon Sep 17 00:00:00 2001
From: Beman <bdawes@acm.org>
Date: Tue, 28 May 2013 07:37:52 -0400
Subject: [PATCH] Loop test time work in progress.

---
 build/Jamfile.v2                              |   4 +-
 doc/index.html                                |  29 +++-
 test/loop_time_test.cpp                       | 153 +++++++++++-------
 .../loop_time_test/loop_time_test.vcxproj     |   2 +-
 4 files changed, 121 insertions(+), 67 deletions(-)
diff --git a/build/Jamfile.v2 b/build/Jamfile.v2
index 4bf7726..9fc614c 100644
--- a/build/Jamfile.v2
+++ b/build/Jamfile.v2
@@ -12,9 +12,11 @@ project
 SOURCES = speed_test speed_test_functions ;
     
 exe "speed_test"
-       : $(SOURCES).cpp  ../../timer/build//boost_timer 
+       : $(SOURCES).cpp  ../../timer/build//boost_timer
+       : <toolset>gcc:<cxxflags>-march=native 
        ;
 
 exe "loop_time_test"
        : loop_time_test.cpp  ../../timer/build//boost_timer 
+       : <toolset>gcc:<cxxflags>-march=native 
        ;
diff --git a/doc/index.html b/doc/index.html
index 00eb839..3f5ce30 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -224,15 +224,16 @@ big_endian(x);
   </tr>
 </table>
 
-<p>There will be no performance difference between the two approaches. Optimizing compilers will likely 
+<p><b>There will be no performance difference between the two approaches, 
+regardless of the native endianness of the machine.</b> Optimizing compilers will likely 
 generate exactly the same code for both.</p>
 
 <p>Now consider a slightly different problem:&nbsp; </p>
 
 <table border="1" cellpadding="5" cellspacing="0" style="border-collapse: collapse" bordercolor="#111111">
   <tr>
-    <td colspan="2">Add a million values to a big endian value in a file, then write the 
-    result to a file </td>
+    <td colspan="2"><i><b>Add a million values to a big endian value in a file, then write the 
+    result to a file </b></i> </td>
   </tr>
   <tr>
     <td><i><b>Endian type approach</b></i></td>
@@ -246,7 +247,7 @@ big_int32_t x;
 ... read into x from a file ...
 
 for (int32_t i = 0; i &lt; 1000000; ++i)
-  x += f(i);
+  x += i;
 
 ... write x to a file ...
 </pre>
@@ -260,7 +261,7 @@ int32_t x;
 big_endian(x);
 
 for (int32_t i = 0; i &lt; 1000000; ++i)
-  x += f(i);
+  x += i;
 
 big_endian(x);
 
@@ -270,10 +271,22 @@ big_endian(x);
   </tr>
 </table>
 
-<p>There will be no performance difference. Optimizing compilers will likely 
-generate exactly the same code for both approaches.&nbsp; </p>
+<p><b>There may or may not be a considerable performance difference, depending 
+on the endianness of the machine. </b>If machine endianness differs from the 
+desired endianness, the Endian type approach must do the byte reversal a million 
+times while the Endian conversion approach only does the reversal once. But if 
+the endianness is the same, there is no conversion with either approach and no 
+conversion code is generated for typical release builds.</p>
 
-<h2>Timing tests</h2>
+<p><b>If compiler byte swap intrinsics are not available, any timing differences 
+will be magnified.</b> Byte swap intrinsics are not available on some older 
+compilers and on some machine architectures, such as pre-486 X86 CPUs.</p>
+
+<p><b>Unaligned types are much slower that aligned types, regardless of 
+endianness considerations.</b> Instead of single instruction register loads and 
+stores, multiple instructions are required.</p>
+
+<h3>Timing tests</h3>
 <p>These tests were run against release builds on a circa 2012 4-core little endian X64 Intel Core i5-3570K 
 CPU @ 3.40GHz under Windows 7.</p>
 
diff --git a/test/loop_time_test.cpp b/test/loop_time_test.cpp
index a38db3f..7a3e32c 100644
--- a/test/loop_time_test.cpp
+++ b/test/loop_time_test.cpp
@@ -9,7 +9,7 @@
 
 #define _SCL_SECURE_NO_WARNINGS
 
-//#define BOOST_ENDIAN_NO_INTRINSICS
+#define BOOST_ENDIAN_NO_INTRINSICS
 //#define BOOST_ENDIAN_LOG
 
 #include <boost/endian/detail/disable_warnings.hpp>
@@ -84,8 +84,9 @@ namespace
   template <class T, class EndianT>
   void time()
   {
+    T total = 0;
     { 
-      cout << "*************Endian integer approach...\n";
+ //     cout << "*************Endian integer approach...\n";
       EndianT x(0);
       boost::timer::cpu_timer t;
       for (uint64_t i = 0; i < n; ++i)
@@ -93,11 +94,11 @@ namespace
         x += static_cast<T>(i);
       }
       t.stop();
-      cout << "x: " << x << endl;
+      total += x;
       cout << "<td align=\"right\">" << t.format(places, "%t") << " s</td>";
     }
     { 
-      cout << "***************Endian conversion approach...\n";
+//      cout << "***************Endian conversion approach...\n";
       T x(0);
       boost::timer::cpu_timer t;
       big_endian(x);
@@ -108,31 +109,41 @@ namespace
       big_endian(x);
       t.stop();
       big_endian(x);
-      cout << "x: " << x << endl;
+      if (x != total)
+        throw std::logic_error("integer approach total != conversion approach total");
       cout << "<td align=\"right\">" << t.format(places, "%t") << " s</td>";
     }
   }
 
-  //void test_big_int16()
-  //{
-  //  cout << "<tr><td>16-bit aligned big endian</td>";
-  //  time<int16_t, big_int16_t>(user::return_x_big_int16);
-  //  time<int16_t, big_int16_t>(user::return_x_value_big_int16);
-  //  time<int16_t, big_int16_t>(user::return_x_in_place_big_int16);
-  //  time<int16_t, big_int16_t>(user::return_x_big_int16);
-  //  cout << "</tr>\n";
-  //}
+ 
+  void test_big_int16()
+  {
+    cout << "<tr><td>16-bit aligned big endian</td>";
+    time<int16_t, big_int16_t>();
+    cout << "</tr>\n";
+  }
 
-  //void test_little_int16()
-  //{
-  //  cout << "<tr><td>16-bit aligned little endian</td>";
-  //  time<int16_t, little_int16_t>(user::return_x_little_int16);
-  //  time<int16_t, little_int16_t>(user::return_x_value_little_int16);
-  //  time<int16_t, little_int16_t>(user::return_x_in_place_little_int16);
-  //  time<int16_t, little_int16_t>(user::return_x_little_int16);
-  //  cout << "</tr>\n";
-  //}
+  void test_little_int16()
+  {
+    cout << "<tr><td>16-bit aligned little endian</td>";
+    time<int16_t, little_int16_t>();
+    cout << "</tr>\n";
+  }
 
+  void test_big_int16un()
+  {
+    cout << "<tr><td>16-bit unaligned big endian</td>";
+    time<int16_t, big_int16un_t>();
+    cout << "</tr>\n";
+  }
+
+  void test_little_int16un()
+  {
+    cout << "<tr><td>16-bit unaligned little endian</td>";
+    time<int16_t, little_int16un_t>();
+    cout << "</tr>\n";
+  }
+ 
   void test_big_int32()
   {
     cout << "<tr><td>32-bit aligned big endian</td>";
@@ -140,32 +151,54 @@ namespace
     cout << "</tr>\n";
   }
 
-  //void test_little_int32()
-  //{
-  //  cout << "<tr><td>32-bit aligned little endian</td>";
-  //  time<int32_t, little_int32_t>();
-  //  cout << "</tr>\n";
-  //}
+  void test_little_int32()
+  {
+    cout << "<tr><td>32-bit aligned little endian</td>";
+    time<int32_t, little_int32_t>();
+    cout << "</tr>\n";
+  }
 
-  //void test_big_int64()
-  //{
-  //  cout << "<tr><td>64-bit aligned big endian</td>";
-  //  time<int64_t, big_int64_t>(user::return_x_big_int64);
-  //  time<int64_t, big_int64_t>(user::return_x_value_big_int64);
-  //  time<int64_t, big_int64_t>(user::return_x_in_place_big_int64);
-  //  time<int64_t, big_int64_t>(user::return_x_big_int64);
-  //  cout << "</tr>\n";
-  //}
+  void test_big_int32un()
+  {
+    cout << "<tr><td>32-bit unaligned big endian</td>";
+    time<int32_t, big_int32un_t>();
+    cout << "</tr>\n";
+  }
 
-  //void test_little_int64()
-  //{
-  //  cout << "<tr><td>64-bit aligned little endian</td>";
-  //  time<int64_t, little_int64_t>(user::return_x_little_int64);
-  //  time<int64_t, little_int64_t>(user::return_x_value_little_int64);
-  //  time<int64_t, little_int64_t>(user::return_x_in_place_little_int64);
-  //  time<int64_t, little_int64_t>(user::return_x_little_int64);
-  //  cout << "</tr>\n";
-  //}
+  void test_little_int32un()
+  {
+    cout << "<tr><td>32-bit unaligned little endian</td>";
+    time<int32_t, little_int32un_t>();
+    cout << "</tr>\n";
+  }
+ 
+  void test_big_int64()
+  {
+    cout << "<tr><td>64-bit aligned big endian</td>";
+    time<int64_t, big_int64_t>();
+    cout << "</tr>\n";
+  }
+
+  void test_little_int64()
+  {
+    cout << "<tr><td>64-bit aligned little endian</td>";
+    time<int64_t, little_int64_t>();
+    cout << "</tr>\n";
+  }
+
+  void test_big_int64un()
+  {
+    cout << "<tr><td>64-bit unaligned big endian</td>";
+    time<int64_t, big_int64un_t>();
+    cout << "</tr>\n";
+  }
+
+  void test_little_int64un()
+  {
+    cout << "<tr><td>64-bit unaligned little endian</td>";
+    time<int64_t, little_int64un_t>();
+    cout << "</tr>\n";
+  }
 
 }  // unnamed namespace
 
@@ -176,7 +209,7 @@ int cpp_main(int argc, char* argv[])
   process_command_line(argc, argv);
   
   cout
-    << "<html>\n<head>\n<title>Endian Speed Test</title>\n</head>\n<body>\n"
+    << "<html>\n<head>\n<title>Endian Loop Time Test</title>\n</head>\n<body>\n"
     << "<table border=\"1\" cellpadding=\"5\" cellspacing=\"0\""
     << "style=\"border-collapse: collapse\" bordercolor=\"#111111\">\n"
     << "<tr><td colspan=\"6\" align=\"center\"><b>"
@@ -186,19 +219,25 @@ int cpp_main(int argc, char* argv[])
     << ", Intrinsics: " BOOST_ENDIAN_INTRINSIC_MSG
     << "</b></td></tr>\n"
     << "<tr><td><b>Test Case</b></td>\n"
-       "<td align=\"center\"><b>int<br>arg</b></td>\n"
-       "<td align=\"center\"><b>int<br>value(arg)</b></td>\n"
-       "<td align=\"center\"><b>int<br>in place(arg)</b></td>\n"
-       "<td align=\"center\"><b>Endian<br>arg</b></td>\n"
+       "<td align=\"center\"><b>Endian<br>type</b></td>\n"
+       "<td align=\"center\"><b>Endian<br>conversion<br>function</b></td>\n"
        "</tr>\n"
     ;
 
-  //test_big_int16();
-  //test_little_int16();
+  test_big_int16();
+  test_little_int16();
+  test_big_int16un();
+  test_little_int16un();
+
   test_big_int32();
-  //test_little_int32();
-  //test_big_int64();
-  //test_little_int64();
+  test_little_int32();
+  test_big_int32un();
+  test_little_int32un();
+
+  test_big_int64();
+  test_little_int64();
+  test_big_int64un();
+  test_little_int64un();
 
   cout << "\n</table>\n</body>\n</html>\n";
 
diff --git a/test/msvc2012/loop_time_test/loop_time_test.vcxproj b/test/msvc2012/loop_time_test/loop_time_test.vcxproj
index 32047e6..b417ca6 100644
--- a/test/msvc2012/loop_time_test/loop_time_test.vcxproj
+++ b/test/msvc2012/loop_time_test/loop_time_test.vcxproj
@@ -60,7 +60,7 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
     <PostBuildEvent>
-      <Command>"$(TargetDir)\$(TargetName).exe" 1000</Command>
+      <Command>"$(TargetDir)\$(TargetName).exe" 1</Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">

Add a million values to a big endian value in a file, then write the - result to a file		Add a million values to a big endian value in a file, then write the + result to a file
Endian type approach
*" @@ -186,19 +219,25 @@ int cpp_main(int argc, char argv[]) << ", Intrinsics: " BOOST_ENDIAN_INTRINSIC_MSG << "**
Test Case	int arg	int value(arg)	int in place(arg)	Endian arg	Endian type	Endian conversion function