Add align-functions=64", "align-loops=32 for GCC to make benchmarks more independent of code alignment.

This commit is contained in:
Ion Gaztañaga
2026-05-06 13:00:12 +02:00
parent 2085b112e2
commit 7dfd0cafb9
+14 -1
View File
@@ -8,6 +8,19 @@
//
//////////////////////////////////////////////////////////////////////////////
// Force aggressive function/loop alignment to eliminate instruction cache-line
// alignment noise from benchmark measurements. Without this, identical code can
// show up to 1.8x performance variation depending on where the linker happens
// to place each template instantiation relative to 64-byte cache-line boundaries.
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC optimize("align-functions=64", "align-loops=32")
#elif defined(__clang__)
// Clang has no file-wide pragma for alignment. Use command-line flags:
// -falign-functions=64 -falign-loops=32
#elif defined(_MSC_VER)
// MSVC has no pragma or attribute for function/loop alignment control.
#endif
#include <algorithm>
#include <boost/container/vector.hpp>
#include <iostream>
@@ -2492,7 +2505,7 @@ void run_benchmarks()
{
std::cout << "--- bc::deque<" << typeid(T).name() << "> ---\n";
bc::deque<T, void, bc::deque_options_t<bc::block_size<128> > > dq;
bc::deque<T, void, bc::deque_options_t<bc::block_size<1024> > > dq;
fill_test_data(dq, N);
run_all(dq, iter, "deque");
std::cout << "\n";