diff --git a/components/freertos/test/CMakeLists.txt b/components/freertos/test/CMakeLists.txt index 8f40b2df02..896bb5795c 100644 --- a/components/freertos/test/CMakeLists.txt +++ b/components/freertos/test/CMakeLists.txt @@ -5,7 +5,7 @@ if(IDF_TARGET STREQUAL "esp32") endif() -idf_component_register(SRC_DIRS ${src_dirs} +idf_component_register(SRC_DIRS ${srcdirs} INCLUDE_DIRS . REQUIRES unity test_utils ) \ No newline at end of file diff --git a/components/freertos/test/test_isr_latency.c b/components/freertos/test/test_isr_latency.c new file mode 100644 index 0000000000..10c5d3069e --- /dev/null +++ b/components/freertos/test/test_isr_latency.c @@ -0,0 +1,73 @@ +#include +#include + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/semphr.h" +#include "freertos/queue.h" +#include "freertos/xtensa_api.h" +#include "esp_intr_alloc.h" +#include "xtensa/hal.h" +#include "unity.h" +#include "soc/cpu.h" +#include "test_utils.h" + +#define SW_ISR_LEVEL_1 7 + +static SemaphoreHandle_t sync; +static SemaphoreHandle_t end_sema; +static uint32_t cycle_before_trigger; +static uint32_t cycle_before_exit; +static uint32_t delta_enter_cycles = 0; +static uint32_t delta_exit_cycles = 0; + +static void software_isr(void *arg) { + (void)arg; + BaseType_t yield; + delta_enter_cycles += portGET_RUN_TIME_COUNTER_VALUE() - cycle_before_trigger; + + xt_set_intclear(1 << SW_ISR_LEVEL_1); + + xSemaphoreGiveFromISR(sync, &yield); + if(yield) { + portYIELD_FROM_ISR(); + } + + cycle_before_exit = portGET_RUN_TIME_COUNTER_VALUE(); +} + +static void test_task(void *arg) { + (void)arg; + + intr_handle_t handle; + + esp_err_t err = esp_intr_alloc(ETS_INTERNAL_SW0_INTR_SOURCE, ESP_INTR_FLAG_LEVEL1, &software_isr, NULL, &handle); + TEST_ASSERT_EQUAL_HEX32(ESP_OK, err); + + for(int i = 0;i < 10000; i++) { + cycle_before_trigger = portGET_RUN_TIME_COUNTER_VALUE(); + xt_set_intset(1 << SW_ISR_LEVEL_1); + xSemaphoreTake(sync, portMAX_DELAY); + delta_exit_cycles += portGET_RUN_TIME_COUNTER_VALUE() - cycle_before_exit; + } + + delta_enter_cycles /= 10000; + delta_exit_cycles /= 10000; + + esp_intr_free(handle); + xSemaphoreGive(end_sema); + vTaskDelete(NULL); +} + +TEST_CASE("isr latency test", "[freertos] [ignore]") +{ + sync = xSemaphoreCreateBinary(); + TEST_ASSERT(sync != NULL); + end_sema = xSemaphoreCreateBinary(); + TEST_ASSERT(end_sema != NULL); + xTaskCreatePinnedToCore(test_task, "tst" , 4096, NULL, configMAX_PRIORITIES - 1, NULL, 0); + BaseType_t result = xSemaphoreTake(end_sema, portMAX_DELAY); + TEST_ASSERT_EQUAL_HEX32(pdTRUE, result); + TEST_PERFORMANCE_LESS_THAN(ISR_ENTER_CYCLES, "%d cycles" ,delta_enter_cycles); + TEST_PERFORMANCE_LESS_THAN(ISR_EXIT_CYCLES, "%d cycles" ,delta_exit_cycles); +} \ No newline at end of file diff --git a/components/freertos/xt_asm_utils.h b/components/freertos/xt_asm_utils.h new file mode 100644 index 0000000000..e16d4b32c0 --- /dev/null +++ b/components/freertos/xt_asm_utils.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* Copyright 2015-2018 Espressif Systems (Shanghai) PTE LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* File adapted to use on IDF FreeRTOS component, extracted + * originally from zephyr RTOS code base: + * https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h + */ + +#ifndef __XT_ASM_UTILS_H +#define __XT_ASM_UTILS_H + +/* + * SPILL_ALL_WINDOWS + * + * Spills all windowed registers (i.e. registers not visible as + * A0-A15) to their ABI-defined spill regions on the stack. + * + * Unlike the Xtensa HAL implementation, this code requires that the + * EXCM and WOE bit be enabled in PS, and relies on repeated hardware + * exception handling to do the register spills. The trick is to do a + * noop write to the high registers, which the hardware will trap + * (into an overflow exception) in the case where those registers are + * already used by an existing call frame. Then it rotates the window + * and repeats until all but the A0-A3 registers of the original frame + * are guaranteed to be spilled, eventually rotating back around into + * the original frame. Advantages: + * + * - Vastly smaller code size + * + * - More easily maintained if changes are needed to window over/underflow + * exception handling. + * + * - Requires no scratch registers to do its work, so can be used safely in any + * context. + * + * - If the WOE bit is not enabled (for example, in code written for + * the CALL0 ABI), this becomes a silent noop and operates compatbily. + * + * - Hilariously it's ACTUALLY FASTER than the HAL routine. And not + * just a little bit, it's MUCH faster. With a mostly full register + * file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill + * registers with this vs. 279 (!) to do it with + * xthal_spill_windows(). + */ + +.macro SPILL_ALL_WINDOWS +#if XCHAL_NUM_AREGS == 64 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 4 +#elif XCHAL_NUM_AREGS == 32 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a4, a4, a4 + rotw 2 +#else +#error Unrecognized XCHAL_NUM_AREGS +#endif +.endm + +#endif \ No newline at end of file diff --git a/components/freertos/xtensa_context.S b/components/freertos/xtensa_context.S index a8a19be7e4..4db99114be 100644 --- a/components/freertos/xtensa_context.S +++ b/components/freertos/xtensa_context.S @@ -51,6 +51,7 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\ #include "xtensa_rtos.h" #include "xtensa_context.h" +#include "xt_asm_utils.h" #ifdef XT_USE_OVLY #include @@ -58,8 +59,6 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\ .text - - /******************************************************************************* _xt_context_save @@ -97,8 +96,8 @@ Exit conditions: .align 4 .literal_position .align 4 -_xt_context_save: +_xt_context_save: s32i a2, sp, XT_STK_A2 s32i a3, sp, XT_STK_A3 s32i a4, sp, XT_STK_A4 @@ -143,49 +142,15 @@ _xt_context_save: mov a9, a0 /* preserve ret addr */ #endif - #ifndef __XTENSA_CALL0_ABI__ - /* - To spill the reg windows, temp. need pre-interrupt stack ptr and a4-15. - Need to save a9,12,13 temporarily (in frame temps) and recover originals. - Interrupts need to be disabled below XCHAL_EXCM_LEVEL and window overflow - and underflow exceptions disabled (assured by PS.EXCM == 1). - */ s32i a12, sp, XT_STK_TMP0 /* temp. save stuff in stack frame */ s32i a13, sp, XT_STK_TMP1 s32i a9, sp, XT_STK_TMP2 - /* - Save the overlay state if we are supporting overlays. Since we just saved - three registers, we can conveniently use them here. Note that as of now, - overlays only work for windowed calling ABI. - */ - #ifdef XT_USE_OVLY - l32i a9, sp, XT_STK_PC /* recover saved PC */ - _xt_overlay_get_state a9, a12, a13 - s32i a9, sp, XT_STK_OVLY /* save overlay state */ - #endif - l32i a12, sp, XT_STK_A12 /* recover original a9,12,13 */ l32i a13, sp, XT_STK_A13 l32i a9, sp, XT_STK_A9 - addi sp, sp, XT_STK_FRMSZ /* restore the interruptee's SP */ - call0 xthal_window_spill_nw /* preserves only a4,5,8,9,12,13 */ - addi sp, sp, -XT_STK_FRMSZ - l32i a12, sp, XT_STK_TMP0 /* recover stuff from stack frame */ - l32i a13, sp, XT_STK_TMP1 - l32i a9, sp, XT_STK_TMP2 - #endif #if XCHAL_EXTRA_SA_SIZE > 0 - /* - NOTE: Normally the xthal_save_extra_nw macro only affects address - registers a2-a5. It is theoretically possible for Xtensa processor - designers to write TIE that causes more address registers to be - affected, but it is generally unlikely. If that ever happens, - more registers need to be saved/restored around this macro invocation. - Here we assume a9,12,13 are preserved. - Future Xtensa tools releases might limit the regs that can be affected. - */ addi a2, sp, XT_STK_EXTRA /* where to save it */ # if XCHAL_EXTRA_SA_ALIGN > 16 movi a3, -XCHAL_EXTRA_SA_ALIGN @@ -194,6 +159,38 @@ _xt_context_save: call0 xthal_save_extra_nw /* destroys a0,2,3,4,5 */ #endif + #ifndef __XTENSA_CALL0_ABI__ + #ifdef XT_USE_OVLY + l32i a9, sp, XT_STK_PC /* recover saved PC */ + _xt_overlay_get_state a9, a12, a13 + s32i a9, sp, XT_STK_OVLY /* save overlay state */ + #endif + + rsr a2, PS /* We need to enable window exceptions to */ + movi a3, PS_INTLEVEL_MASK /* perform spill registers*/ + and a2, a2, a3 + bnez a2, _not_l1 + rsr a2, PS + movi a3, PS_INTLEVEL(1) /* For some curious reason the level 1 interrupts */ + or a2, a2, a3 /* dont set the intlevel correctly on PS, we need to */ + wsr a2, PS /* do this manually */ +_not_l1: + rsr a2, PS /* finally umask the window exceptions */ + movi a3, ~(PS_EXCM_MASK) + and a2, a2, a3 + wsr a2, PS + rsync + + addi sp, sp, XT_STK_FRMSZ /* go back to spill register region */ + SPILL_ALL_WINDOWS /* place the live register windows there */ + addi sp, sp, -XT_STK_FRMSZ /* return the current stack pointer and proceed with context save*/ + + #endif + + l32i a12, sp, XT_STK_TMP0 /* restore the temp saved registers */ + l32i a13, sp, XT_STK_TMP1 /* our return address is there */ + l32i a9, sp, XT_STK_TMP2 + #if XCHAL_EXTRA_SA_SIZE > 0 || !defined(__XTENSA_CALL0_ABI__) mov a0, a9 /* retrieve ret addr */ #endif diff --git a/components/idf_test/include/idf_performance.h b/components/idf_test/include/idf_performance.h index ff8654c0b5..6a67e4b5ea 100644 --- a/components/idf_test/include/idf_performance.h +++ b/components/idf_test/include/idf_performance.h @@ -32,6 +32,10 @@ #define IDF_PERFORMANCE_MAX_ESP32_CYCLES_PER_SQRT 140 // SHA256 hardware throughput at 240MHz, threshold set lower than worst case #define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 9.0 +#define IDF_PERFORMANCE_MAX_SPILL_REG_CYCLES 150 +#define IDF_PERFORMANCE_MAX_ISR_ENTER_CYCLES 290 +#define IDF_PERFORMANCE_MAX_ISR_EXIT_CYCLES 565 + #define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 19000 #define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 180000 diff --git a/components/unity/include/unity_config.h b/components/unity/include/unity_config.h index b23a5fe129..686fb2507c 100644 --- a/components/unity/include/unity_config.h +++ b/components/unity/include/unity_config.h @@ -27,6 +27,14 @@ #define UNITY_EXCLUDE_TIME_H +/** + * @note For some reason setjmp does not work with + * unity, since it is only used on test entry and + * exit it should not impact the rest of test + * framework. So we disable it here. + */ +#define UNITY_EXCLUDE_SETJMP_H + void unity_flush(void); void unity_putc(int c); void unity_gets(char* dst, size_t len); diff --git a/tools/ci/config/target-test.yml b/tools/ci/config/target-test.yml index 565872872a..47044a1c54 100644 --- a/tools/ci/config/target-test.yml +++ b/tools/ci/config/target-test.yml @@ -307,7 +307,7 @@ example_test_012: UT_001: extends: .unit_test_template - parallel: 28 + parallel: 30 tags: - ESP32_IDF - UT_T1_1 @@ -316,7 +316,7 @@ UT_001: UT_002: extends: .unit_test_template - parallel: 9 + parallel: 11 tags: - ESP32_IDF - UT_T1_1 @@ -466,7 +466,7 @@ UT_034: UT_035: extends: .unit_test_template - parallel: 17 + parallel: 20 tags: - ESP32S2BETA_IDF - UT_T1_1