From 271c611485cb0863cee0900ba1c420f1f8570d66 Mon Sep 17 00:00:00 2001 From: Konstantin Kondrashov Date: Thu, 25 Apr 2024 19:13:46 +0300 Subject: [PATCH] feat(esp_system): Print backtrace for both CPUs when cache error does not determine CPU --- .../esp_system/port/arch/xtensa/panic_arch.c | 1 + components/esp_system/port/panic_handler.c | 37 +++++++++++----- .../system/panic/main/include/test_panic.h | 4 ++ .../system/panic/main/test_app_main.c | 2 + .../test_apps/system/panic/main/test_panic.c | 44 +++++++++++++++++++ tools/test_apps/system/panic/pytest_panic.py | 36 +++++++++++++++ 6 files changed, 113 insertions(+), 11 deletions(-) diff --git a/components/esp_system/port/arch/xtensa/panic_arch.c b/components/esp_system/port/arch/xtensa/panic_arch.c index 223915e602..619f121dfa 100644 --- a/components/esp_system/port/arch/xtensa/panic_arch.c +++ b/components/esp_system/port/arch/xtensa/panic_arch.c @@ -341,6 +341,7 @@ static inline void print_cache_err_details(const void *f) break; case EXTMEM_DCACHE_WRITE_FLASH_ST: panic_print_str("Write back error occurred while dcache tries to write back to flash\r\n"); + panic_print_str("The following backtrace may not indicate the code that caused Cache invalid access\r\n"); break; case EXTMEM_MMU_ENTRY_FAULT_ST: vaddr = REG_READ(EXTMEM_CACHE_MMU_FAULT_VADDR_REG); diff --git a/components/esp_system/port/panic_handler.c b/components/esp_system/port/panic_handler.c index a936a01b4e..464df0fe83 100644 --- a/components/esp_system/port/panic_handler.c +++ b/components/esp_system/port/panic_handler.c @@ -116,6 +116,14 @@ static void frame_to_panic_info(void *frame, panic_info_t *info, bool pseudo_exc info->frame = frame; } +#if !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE +FORCE_INLINE_ATTR __attribute__((__noreturn__)) +void busy_wait(void) +{ + while (1) {;} // infinite loop +} +#endif // !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE + static void panic_handler(void *frame, bool pseudo_excause) { panic_info_t info = { 0 }; @@ -133,17 +141,24 @@ static void panic_handler(void *frame, bool pseudo_excause) // These are cases where both CPUs both go into panic handler. The following code ensures // only one core proceeds to the system panic handler. if (pseudo_excause) { -#define BUSY_WAIT_IF_TRUE(b) { if (b) while(1); } // For WDT expiry, pause the non-offending core - offending core handles panic - BUSY_WAIT_IF_TRUE(panic_get_cause(frame) == PANIC_RSN_INTWDT_CPU0 && core_id == 1); - BUSY_WAIT_IF_TRUE(panic_get_cause(frame) == PANIC_RSN_INTWDT_CPU1 && core_id == 0); - - // For cache error, pause the non-offending core - offending core handles panic - if (panic_get_cause(frame) == PANIC_RSN_CACHEERR && core_id != esp_cache_err_get_cpuid()) { - // Only print the backtrace for the offending core in case of the cache error - g_exc_frames[core_id] = NULL; - while (1) { - ; + if (panic_get_cause(frame) == PANIC_RSN_INTWDT_CPU0 && core_id == 1) { + busy_wait(); + } else if (panic_get_cause(frame) == PANIC_RSN_INTWDT_CPU1 && core_id == 0) { + busy_wait(); + } else if (panic_get_cause(frame) == PANIC_RSN_CACHEERR) { + // The invalid cache access interrupt calls to the panic handler. + // When the cache interrupt happens, we can not determine the CPU where the + // invalid cache access has occurred. + if (esp_cache_err_get_cpuid() == -1) { + // We can not determine the CPU where the invalid cache access has occurred. + // Print backtraces for both CPUs. + if (core_id != 0) { + busy_wait(); + } + } else if (core_id != esp_cache_err_get_cpuid()) { + g_exc_frames[core_id] = NULL; // Only print the backtrace for the offending core + busy_wait(); } } } @@ -166,7 +181,7 @@ static void panic_handler(void *frame, bool pseudo_excause) #if __XTENSA__ if (!(esp_ptr_executable(esp_cpu_pc_to_addr(panic_get_address(frame))) && (panic_get_address(frame) & 0xC0000000U))) { /* Xtensa ABI sets the 2 MSBs of the PC according to the windowed call size - * Incase the PC is invalid, GDB will fail to translate addresses to function names + * In case the PC is invalid, GDB will fail to translate addresses to function names * Hence replacing the PC to a placeholder address in case of invalid PC */ panic_set_address(frame, (uint32_t)&_invalid_pc_placeholder); diff --git a/tools/test_apps/system/panic/main/include/test_panic.h b/tools/test_apps/system/panic/main/include/test_panic.h index 1121a62648..0bbe656f29 100644 --- a/tools/test_apps/system/panic/main/include/test_panic.h +++ b/tools/test_apps/system/panic/main/include/test_panic.h @@ -53,6 +53,10 @@ void test_assert(void); void test_assert_cache_disabled(void); +void test_assert_cache_write_back_error_can_print_backtrace(void); + +void test_assert_cache_write_back_error_can_print_backtrace2(void); + #ifdef __cplusplus } #endif diff --git a/tools/test_apps/system/panic/main/test_app_main.c b/tools/test_apps/system/panic/main/test_app_main.c index a68db57728..ccb12d130d 100644 --- a/tools/test_apps/system/panic/main/test_app_main.c +++ b/tools/test_apps/system/panic/main/test_app_main.c @@ -100,6 +100,8 @@ void app_main(void) HANDLE_TEST(test_name, test_ub); HANDLE_TEST(test_name, test_assert); HANDLE_TEST(test_name, test_assert_cache_disabled); + HANDLE_TEST(test_name, test_assert_cache_write_back_error_can_print_backtrace); + HANDLE_TEST(test_name, test_assert_cache_write_back_error_can_print_backtrace2); #if CONFIG_TEST_MEMPROT diff --git a/tools/test_apps/system/panic/main/test_panic.c b/tools/test_apps/system/panic/main/test_panic.c index 9c33b74cca..3f6f6faa15 100644 --- a/tools/test_apps/system/panic/main/test_panic.c +++ b/tools/test_apps/system/panic/main/test_panic.c @@ -7,10 +7,12 @@ #include #include #include +#include #include "esp_partition.h" #include "esp_flash.h" #include "esp_system.h" +#include "spi_flash_mmap.h" #include "esp_private/cache_utils.h" #include "esp_memory_utils.h" @@ -19,6 +21,8 @@ #include "freertos/FreeRTOS.h" #include "freertos/task.h" +#include "rom/cache.h" + /* Test utility function */ extern void esp_restart_noos(void) __attribute__ ((noreturn)); @@ -155,6 +159,46 @@ void IRAM_ATTR test_assert_cache_disabled(void) assert(0); } +const char TEST_STR[] = "my_tag"; +void test_assert_cache_write_back_error_can_print_backtrace(void) +{ + printf("1) %p\n", TEST_STR); + *(uint32_t*)TEST_STR = 3; // We changed the rodata string. + // All chips except ESP32S3 stop execution here and raise a LoadStore error on the line above. +#if CONFIG_IDF_TARGET_ESP32S3 + // On the ESP32S3, the error occurs later when the cache writeback is triggered + // (in this test, a direct call to Cache_WriteBack_All). + Cache_WriteBack_All(); // Cache writeback triggers the invalid cache access interrupt. +#endif + // We are testing that the backtrace is printed instead of TG1WDT. + printf("2) %p\n", TEST_STR); // never get to this place. +} + +void test_assert_cache_write_back_error_can_print_backtrace2(void) +{ + printf("1) %p\n", TEST_STR); + *(uint32_t*)TEST_STR = 3; // We changed the rodata string. + // All chips except ESP32S3 stop execution here and raise a LoadStore error on the line above. + // On the ESP32S3, the error occurs later when the cache writeback is triggered + // (in this test, a large range of DRAM is mapped and read, causing an error). + uint8_t temp = 0; + size_t map_size = SPI_FLASH_SEC_SIZE * 512; + const void *map; + spi_flash_mmap_handle_t out_handle; + esp_err_t err = spi_flash_mmap(0, map_size, SPI_FLASH_MMAP_DATA, &map, &out_handle); + if (err != ESP_OK) { + printf("spi_flash_mmap failed %x\n", err); + return; + } + const uint8_t *rodata = map; + for (size_t i = 0; i < map_size; i++) { + temp = rodata[i]; + } + // Cache writeback triggers the invalid cache access interrupt. + // We are testing that the backtrace is printed instead of TG1WDT. + printf("2) %p 0x%" PRIx8 " \n", TEST_STR, temp); // never get to this place. +} + /** * This function overwrites the stack beginning from the valid area continuously towards and beyond * the end of the stack (stack base) of the current task. diff --git a/tools/test_apps/system/panic/pytest_panic.py b/tools/test_apps/system/panic/pytest_panic.py index 2fb7a16ebe..e95c808444 100644 --- a/tools/test_apps/system/panic/pytest_panic.py +++ b/tools/test_apps/system/panic/pytest_panic.py @@ -544,6 +544,42 @@ def test_assert_cache_disabled( ) +def cache_error_log_check(dut: PanicTestDut) -> None: + if dut.is_xtensa: + if dut.target == 'esp32s3': + dut.expect_exact("Guru Meditation Error: Core / panic'ed (Cache disabled but cached memory region accessed)") + dut.expect_exact('Write back error occurred while dcache tries to write back to flash') + dut.expect_exact('The following backtrace may not indicate the code that caused Cache invalid access') + else: + dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (LoadStoreError)") + else: + dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (Store access fault)") + dut.expect_reg_dump(0) + if dut.target == 'esp32s3': + dut.expect_reg_dump(1) + dut.expect_cpu_reset() + + +@pytest.mark.generic +@pytest.mark.supported_targets +@pytest.mark.parametrize('config', ['panic'], indirect=True) +def test_assert_cache_write_back_error_can_print_backtrace( + dut: PanicTestDut, config: str, test_func_name: str +) -> None: + dut.run_test_func(test_func_name) + cache_error_log_check(dut) + + +@pytest.mark.generic +@pytest.mark.supported_targets +@pytest.mark.parametrize('config', ['panic'], indirect=True) +def test_assert_cache_write_back_error_can_print_backtrace2( + dut: PanicTestDut, config: str, test_func_name: str +) -> None: + dut.run_test_func(test_func_name) + cache_error_log_check(dut) + + @pytest.mark.esp32 @pytest.mark.parametrize('config', ['panic_delay'], indirect=True) def test_panic_delay(dut: PanicTestDut) -> None: