Add align-functions=64", "align-loops=32 for GCC to make benchmarks more independent of code alignment.

2026-07-05 15:30:46 +02:00 · 2026-05-06 13:00:12 +02:00
parent 2085b112e2
commit 7dfd0cafb9
1 changed files with 14 additions and 1 deletions
@@ -8,6 +8,19 @@
 //
 //////////////////////////////////////////////////////////////////////////////

+// Force aggressive function/loop alignment to eliminate instruction cache-line
+// alignment noise from benchmark measurements. Without this, identical code can
+// show up to 1.8x performance variation depending on where the linker happens
+// to place each template instantiation relative to 64-byte cache-line boundaries.
+#if defined(__GNUC__) && !defined(__clang__)
+   #pragma GCC optimize("align-functions=64", "align-loops=32")
+#elif defined(__clang__)
+   // Clang has no file-wide pragma for alignment. Use command-line flags:
+   //   -falign-functions=64 -falign-loops=32
+#elif defined(_MSC_VER)
+   // MSVC has no pragma or attribute for function/loop alignment control.
+#endif
+
 #include <algorithm>
 #include <boost/container/vector.hpp>
 #include <iostream>
@@ -2492,7 +2505,7 @@ void run_benchmarks()

   {
      std::cout << "--- bc::deque<" << typeid(T).name() << "> ---\n";
-      bc::deque<T, void, bc::deque_options_t<bc::block_size<128> > > dq;
+      bc::deque<T, void, bc::deque_options_t<bc::block_size<1024> > > dq;
      fill_test_data(dq, N);
      run_all(dq, iter, "deque");
         std::cout << "\n";