From 0b70dfc27f98eb701383a75b32d9dda03fe75462 Mon Sep 17 00:00:00 2001
From: Angus Gratton <angus@espressif.com>
Date: Thu, 16 May 2019 11:19:32 +0800
Subject: [PATCH] Add floating point performance test

---
 components/esp32/test/test_fp.c               | 69 +++++++++++++++++++
 components/idf_test/include/idf_performance.h |  3 +
 2 files changed, 72 insertions(+)

diff --git a/components/esp32/test/test_fp.c b/components/esp32/test/test_fp.c
index 8f08ed9c46..c9454107a6 100644
--- a/components/esp32/test/test_fp.c
+++ b/components/esp32/test/test_fp.c
@@ -1,8 +1,10 @@
 #include <math.h>
 #include <stdio.h>
+#include "soc/cpu.h"
 #include "freertos/FreeRTOS.h"
 #include "freertos/task.h"
 #include "unity.h"
+#include "test_utils.h"
 
 /* Note: these functions are included here for unit test purposes. They are not needed for writing
  * normal code. If writing standard C floating point code, libgcc should correctly include implementations
@@ -195,3 +197,70 @@ TEST_CASE("context switch saves FP registers", "[fp]")
     }
     TEST_ASSERT(state.fail == 0);
 }
+
+/* Note: not static, to avoid optimisation of const result */
+float IRAM_ATTR test_fp_benchmark_fp_divide(int counts, unsigned *cycles)
+{
+    float f = MAXFLOAT;
+    uint32_t before, after;
+    RSR(CCOUNT, before);
+
+    for (int i = 0; i < counts; i++) {
+        f /= 1.000432f;
+    }
+
+    RSR(CCOUNT, after);
+    *cycles = (after - before) / counts;
+
+    return f;
+}
+
+TEST_CASE("floating point division performance", "[fp]")
+{
+    const unsigned COUNTS = 1000;
+    unsigned cycles = 0;
+
+    // initialize fpu
+    volatile __attribute__((unused)) float dummy = sqrtf(rand());
+
+    float f = test_fp_benchmark_fp_divide(COUNTS, &cycles);
+
+    printf("%d divisions from %f = %f\n", COUNTS, MAXFLOAT, f);
+    printf("Per division = %d cycles\n", cycles);
+
+    TEST_PERFORMANCE_LESS_THAN(ESP32_CYCLES_PER_DIV, "%d cycles", cycles);
+}
+
+/* Note: not static, to avoid optimisation of const result */
+float IRAM_ATTR test_fp_benchmark_fp_sqrt(int counts, unsigned *cycles)
+{
+    float f = MAXFLOAT;
+    uint32_t before, after;
+    RSR(CCOUNT, before);
+
+    for (int i = 0; i < counts; i++) {
+        f = sqrtf(f);
+    }
+
+    RSR(CCOUNT, after);
+    *cycles = (after - before) / counts;
+
+    return f;
+}
+
+TEST_CASE("floating point square root performance", "[fp]")
+{
+    const unsigned COUNTS = 200;
+    unsigned cycles = 0;
+
+    // initialize fpu
+    volatile float __attribute__((unused)) dummy = sqrtf(rand());
+
+    float f = test_fp_benchmark_fp_sqrt(COUNTS, &cycles);
+
+    printf("%d square roots from %f = %f\n", COUNTS, MAXFLOAT, f);
+    printf("Per sqrt = %d cycles\n", cycles);
+
+    TEST_PERFORMANCE_LESS_THAN(ESP32_CYCLES_PER_SQRT, "%d cycles", cycles);
+}
+
diff --git a/components/idf_test/include/idf_performance.h b/components/idf_test/include/idf_performance.h
index 55809e90ae..6cdb288236 100644
--- a/components/idf_test/include/idf_performance.h
+++ b/components/idf_test/include/idf_performance.h
@@ -27,4 +27,7 @@
 #define IDF_PERFORMANCE_MAX_ESP32_TIME_SHA512_32KB                              4500
 // AES-CBC hardware throughput (accounts for worst-case performance with PSRAM workaround)
 #define IDF_PERFORMANCE_MIN_AES_CBC_THROUGHPUT_MBSEC                            8.5
+// floating point instructions per divide and per sqrt (configured for worst-case with PSRAM workaround)
+#define IDF_PERFORMANCE_MAX_ESP32_CYCLES_PER_DIV 70
+#define IDF_PERFORMANCE_MAX_ESP32_CYCLES_PER_SQRT 140