Loop test time work in progress.

2025-09-29 01:50:54 +02:00 · 2013-05-28 07:37:52 -04:00
parent 50b5488997
commit a55a44c67b
4 changed files with 121 additions and 67 deletions
--- a/build/Jamfile.v2
+++ b/build/Jamfile.v2
@@ -12,9 +12,11 @@ project
 SOURCES = speed_test speed_test_functions ;
    
 exe "speed_test"
-       : $(SOURCES).cpp  ../../timer/build//boost_timer 
+       : $(SOURCES).cpp  ../../timer/build//boost_timer
+       : <toolset>gcc:<cxxflags>-march=native 
       ;

 exe "loop_time_test"
       : loop_time_test.cpp  ../../timer/build//boost_timer 
+       : <toolset>gcc:<cxxflags>-march=native 
       ;
--- a/doc/index.html
+++ b/doc/index.html
@@ -224,15 +224,16 @@ big_endian(x);
  </tr>
 </table>

-<p>There will be no performance difference between the two approaches. Optimizing compilers will likely 
+<p><b>There will be no performance difference between the two approaches, 
+regardless of the native endianness of the machine.</b> Optimizing compilers will likely 
 generate exactly the same code for both.</p>

 <p>Now consider a slightly different problem:&nbsp; </p>

 <table border="1" cellpadding="5" cellspacing="0" style="border-collapse: collapse" bordercolor="#111111">
  <tr>
-    <td colspan="2">Add a million values to a big endian value in a file, then write the 
-    result to a file </td>
+    <td colspan="2"><i><b>Add a million values to a big endian value in a file, then write the 
+    result to a file </b></i> </td>
  </tr>
  <tr>
    <td><i><b>Endian type approach</b></i></td>
@@ -246,7 +247,7 @@ big_int32_t x;
 ... read into x from a file ...

 for (int32_t i = 0; i &lt; 1000000; ++i)
-  x += f(i);
+  x += i;

 ... write x to a file ...
 </pre>
@@ -260,7 +261,7 @@ int32_t x;
 big_endian(x);

 for (int32_t i = 0; i &lt; 1000000; ++i)
-  x += f(i);
+  x += i;

 big_endian(x);

@@ -270,10 +271,22 @@ big_endian(x);
  </tr>
 </table>

-<p>There will be no performance difference. Optimizing compilers will likely 
-generate exactly the same code for both approaches.&nbsp; </p>
+<p><b>There may or may not be a considerable performance difference, depending 
+on the endianness of the machine. </b>If machine endianness differs from the 
+desired endianness, the Endian type approach must do the byte reversal a million 
+times while the Endian conversion approach only does the reversal once. But if 
+the endianness is the same, there is no conversion with either approach and no 
+conversion code is generated for typical release builds.</p>

-<h2>Timing tests</h2>
+<p><b>If compiler byte swap intrinsics are not available, any timing differences 
+will be magnified.</b> Byte swap intrinsics are not available on some older 
+compilers and on some machine architectures, such as pre-486 X86 CPUs.</p>
+
+<p><b>Unaligned types are much slower that aligned types, regardless of 
+endianness considerations.</b> Instead of single instruction register loads and 
+stores, multiple instructions are required.</p>
+
+<h3>Timing tests</h3>
 <p>These tests were run against release builds on a circa 2012 4-core little endian X64 Intel Core i5-3570K 
 CPU @ 3.40GHz under Windows 7.</p>

--- a/test/loop_time_test.cpp
+++ b/test/loop_time_test.cpp
@@ -9,7 +9,7 @@

 #define _SCL_SECURE_NO_WARNINGS

-//#define BOOST_ENDIAN_NO_INTRINSICS
+#define BOOST_ENDIAN_NO_INTRINSICS
 //#define BOOST_ENDIAN_LOG

 #include <boost/endian/detail/disable_warnings.hpp>
@@ -84,8 +84,9 @@ namespace
  template <class T, class EndianT>
  void time()
  {
+    T total = 0;
    { 
-      cout << "*************Endian integer approach...\n";
+ //     cout << "*************Endian integer approach...\n";
      EndianT x(0);
      boost::timer::cpu_timer t;
      for (uint64_t i = 0; i < n; ++i)
@@ -93,11 +94,11 @@ namespace
        x += static_cast<T>(i);
      }
      t.stop();
-      cout << "x: " << x << endl;
+      total += x;
      cout << "<td align=\"right\">" << t.format(places, "%t") << " s</td>";
    }
    { 
-      cout << "***************Endian conversion approach...\n";
+//      cout << "***************Endian conversion approach...\n";
      T x(0);
      boost::timer::cpu_timer t;
      big_endian(x);
@@ -108,31 +109,41 @@ namespace
      big_endian(x);
      t.stop();
      big_endian(x);
-      cout << "x: " << x << endl;
+      if (x != total)
+        throw std::logic_error("integer approach total != conversion approach total");
      cout << "<td align=\"right\">" << t.format(places, "%t") << " s</td>";
    }
  }

-  //void test_big_int16()
-  //{
-  //  cout << "<tr><td>16-bit aligned big endian</td>";
-  //  time<int16_t, big_int16_t>(user::return_x_big_int16);
-  //  time<int16_t, big_int16_t>(user::return_x_value_big_int16);
-  //  time<int16_t, big_int16_t>(user::return_x_in_place_big_int16);
-  //  time<int16_t, big_int16_t>(user::return_x_big_int16);
-  //  cout << "</tr>\n";
-  //}
+ 
+  void test_big_int16()
+  {
+    cout << "<tr><td>16-bit aligned big endian</td>";
+    time<int16_t, big_int16_t>();
+    cout << "</tr>\n";
+  }

-  //void test_little_int16()
-  //{
-  //  cout << "<tr><td>16-bit aligned little endian</td>";
-  //  time<int16_t, little_int16_t>(user::return_x_little_int16);
-  //  time<int16_t, little_int16_t>(user::return_x_value_little_int16);
-  //  time<int16_t, little_int16_t>(user::return_x_in_place_little_int16);
-  //  time<int16_t, little_int16_t>(user::return_x_little_int16);
-  //  cout << "</tr>\n";
-  //}
+  void test_little_int16()
+  {
+    cout << "<tr><td>16-bit aligned little endian</td>";
+    time<int16_t, little_int16_t>();
+    cout << "</tr>\n";
+  }

+  void test_big_int16un()
+  {
+    cout << "<tr><td>16-bit unaligned big endian</td>";
+    time<int16_t, big_int16un_t>();
+    cout << "</tr>\n";
+  }
+
+  void test_little_int16un()
+  {
+    cout << "<tr><td>16-bit unaligned little endian</td>";
+    time<int16_t, little_int16un_t>();
+    cout << "</tr>\n";
+  }
+ 
  void test_big_int32()
  {
    cout << "<tr><td>32-bit aligned big endian</td>";
@@ -140,32 +151,54 @@ namespace
    cout << "</tr>\n";
  }

-  //void test_little_int32()
-  //{
-  //  cout << "<tr><td>32-bit aligned little endian</td>";
-  //  time<int32_t, little_int32_t>();
-  //  cout << "</tr>\n";
-  //}
+  void test_little_int32()
+  {
+    cout << "<tr><td>32-bit aligned little endian</td>";
+    time<int32_t, little_int32_t>();
+    cout << "</tr>\n";
+  }

-  //void test_big_int64()
-  //{
-  //  cout << "<tr><td>64-bit aligned big endian</td>";
-  //  time<int64_t, big_int64_t>(user::return_x_big_int64);
-  //  time<int64_t, big_int64_t>(user::return_x_value_big_int64);
-  //  time<int64_t, big_int64_t>(user::return_x_in_place_big_int64);
-  //  time<int64_t, big_int64_t>(user::return_x_big_int64);
-  //  cout << "</tr>\n";
-  //}
+  void test_big_int32un()
+  {
+    cout << "<tr><td>32-bit unaligned big endian</td>";
+    time<int32_t, big_int32un_t>();
+    cout << "</tr>\n";
+  }

-  //void test_little_int64()
-  //{
-  //  cout << "<tr><td>64-bit aligned little endian</td>";
-  //  time<int64_t, little_int64_t>(user::return_x_little_int64);
-  //  time<int64_t, little_int64_t>(user::return_x_value_little_int64);
-  //  time<int64_t, little_int64_t>(user::return_x_in_place_little_int64);
-  //  time<int64_t, little_int64_t>(user::return_x_little_int64);
-  //  cout << "</tr>\n";
-  //}
+  void test_little_int32un()
+  {
+    cout << "<tr><td>32-bit unaligned little endian</td>";
+    time<int32_t, little_int32un_t>();
+    cout << "</tr>\n";
+  }
+ 
+  void test_big_int64()
+  {
+    cout << "<tr><td>64-bit aligned big endian</td>";
+    time<int64_t, big_int64_t>();
+    cout << "</tr>\n";
+  }
+
+  void test_little_int64()
+  {
+    cout << "<tr><td>64-bit aligned little endian</td>";
+    time<int64_t, little_int64_t>();
+    cout << "</tr>\n";
+  }
+
+  void test_big_int64un()
+  {
+    cout << "<tr><td>64-bit unaligned big endian</td>";
+    time<int64_t, big_int64un_t>();
+    cout << "</tr>\n";
+  }
+
+  void test_little_int64un()
+  {
+    cout << "<tr><td>64-bit unaligned little endian</td>";
+    time<int64_t, little_int64un_t>();
+    cout << "</tr>\n";
+  }

 }  // unnamed namespace

@@ -176,7 +209,7 @@ int cpp_main(int argc, char* argv[])
  process_command_line(argc, argv);
  
  cout
-    << "<html>\n<head>\n<title>Endian Speed Test</title>\n</head>\n<body>\n"
+    << "<html>\n<head>\n<title>Endian Loop Time Test</title>\n</head>\n<body>\n"
    << "<table border=\"1\" cellpadding=\"5\" cellspacing=\"0\""
    << "style=\"border-collapse: collapse\" bordercolor=\"#111111\">\n"
    << "<tr><td colspan=\"6\" align=\"center\"><b>"
@@ -186,19 +219,25 @@ int cpp_main(int argc, char* argv[])
    << ", Intrinsics: " BOOST_ENDIAN_INTRINSIC_MSG
    << "</b></td></tr>\n"
    << "<tr><td><b>Test Case</b></td>\n"
-       "<td align=\"center\"><b>int<br>arg</b></td>\n"
-       "<td align=\"center\"><b>int<br>value(arg)</b></td>\n"
-       "<td align=\"center\"><b>int<br>in place(arg)</b></td>\n"
-       "<td align=\"center\"><b>Endian<br>arg</b></td>\n"
+       "<td align=\"center\"><b>Endian<br>type</b></td>\n"
+       "<td align=\"center\"><b>Endian<br>conversion<br>function</b></td>\n"
       "</tr>\n"
    ;

-  //test_big_int16();
-  //test_little_int16();
+  test_big_int16();
+  test_little_int16();
+  test_big_int16un();
+  test_little_int16un();
+
  test_big_int32();
-  //test_little_int32();
-  //test_big_int64();
-  //test_little_int64();
+  test_little_int32();
+  test_big_int32un();
+  test_little_int32un();
+
+  test_big_int64();
+  test_little_int64();
+  test_big_int64un();
+  test_little_int64un();

  cout << "\n</table>\n</body>\n</html>\n";

--- a/test/msvc2012/loop_time_test/loop_time_test.vcxproj
+++ b/test/msvc2012/loop_time_test/loop_time_test.vcxproj
@@ -60,7 +60,7 @@
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
    <PostBuildEvent>
-      <Command>"$(TargetDir)\$(TargetName).exe" 1000</Command>
+      <Command>"$(TargetDir)\$(TargetName).exe" 1</Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">