feat(esp_system): Print backtrace for both CPUs when cache error does not determine CPU

This commit is contained in:
Konstantin Kondrashov
2024-04-25 19:13:46 +03:00
committed by BOT
parent e488d1945c
commit 271c611485
6 changed files with 113 additions and 11 deletions

View File

@@ -341,6 +341,7 @@ static inline void print_cache_err_details(const void *f)
break;
case EXTMEM_DCACHE_WRITE_FLASH_ST:
panic_print_str("Write back error occurred while dcache tries to write back to flash\r\n");
panic_print_str("The following backtrace may not indicate the code that caused Cache invalid access\r\n");
break;
case EXTMEM_MMU_ENTRY_FAULT_ST:
vaddr = REG_READ(EXTMEM_CACHE_MMU_FAULT_VADDR_REG);

View File

@@ -116,6 +116,14 @@ static void frame_to_panic_info(void *frame, panic_info_t *info, bool pseudo_exc
info->frame = frame;
}
#if !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE
FORCE_INLINE_ATTR __attribute__((__noreturn__))
void busy_wait(void)
{
while (1) {;} // infinite loop
}
#endif // !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE
static void panic_handler(void *frame, bool pseudo_excause)
{
panic_info_t info = { 0 };
@@ -133,17 +141,24 @@ static void panic_handler(void *frame, bool pseudo_excause)
// These are cases where both CPUs both go into panic handler. The following code ensures
// only one core proceeds to the system panic handler.
if (pseudo_excause) {
#define BUSY_WAIT_IF_TRUE(b) { if (b) while(1); }
// For WDT expiry, pause the non-offending core - offending core handles panic
BUSY_WAIT_IF_TRUE(panic_get_cause(frame) == PANIC_RSN_INTWDT_CPU0 && core_id == 1);
BUSY_WAIT_IF_TRUE(panic_get_cause(frame) == PANIC_RSN_INTWDT_CPU1 && core_id == 0);
// For cache error, pause the non-offending core - offending core handles panic
if (panic_get_cause(frame) == PANIC_RSN_CACHEERR && core_id != esp_cache_err_get_cpuid()) {
// Only print the backtrace for the offending core in case of the cache error
g_exc_frames[core_id] = NULL;
while (1) {
;
if (panic_get_cause(frame) == PANIC_RSN_INTWDT_CPU0 && core_id == 1) {
busy_wait();
} else if (panic_get_cause(frame) == PANIC_RSN_INTWDT_CPU1 && core_id == 0) {
busy_wait();
} else if (panic_get_cause(frame) == PANIC_RSN_CACHEERR) {
// The invalid cache access interrupt calls to the panic handler.
// When the cache interrupt happens, we can not determine the CPU where the
// invalid cache access has occurred.
if (esp_cache_err_get_cpuid() == -1) {
// We can not determine the CPU where the invalid cache access has occurred.
// Print backtraces for both CPUs.
if (core_id != 0) {
busy_wait();
}
} else if (core_id != esp_cache_err_get_cpuid()) {
g_exc_frames[core_id] = NULL; // Only print the backtrace for the offending core
busy_wait();
}
}
}
@@ -166,7 +181,7 @@ static void panic_handler(void *frame, bool pseudo_excause)
#if __XTENSA__
if (!(esp_ptr_executable(esp_cpu_pc_to_addr(panic_get_address(frame))) && (panic_get_address(frame) & 0xC0000000U))) {
/* Xtensa ABI sets the 2 MSBs of the PC according to the windowed call size
* Incase the PC is invalid, GDB will fail to translate addresses to function names
* In case the PC is invalid, GDB will fail to translate addresses to function names
* Hence replacing the PC to a placeholder address in case of invalid PC
*/
panic_set_address(frame, (uint32_t)&_invalid_pc_placeholder);

View File

@@ -53,6 +53,10 @@ void test_assert(void);
void test_assert_cache_disabled(void);
void test_assert_cache_write_back_error_can_print_backtrace(void);
void test_assert_cache_write_back_error_can_print_backtrace2(void);
#ifdef __cplusplus
}
#endif

View File

@@ -100,6 +100,8 @@ void app_main(void)
HANDLE_TEST(test_name, test_ub);
HANDLE_TEST(test_name, test_assert);
HANDLE_TEST(test_name, test_assert_cache_disabled);
HANDLE_TEST(test_name, test_assert_cache_write_back_error_can_print_backtrace);
HANDLE_TEST(test_name, test_assert_cache_write_back_error_can_print_backtrace2);
#if CONFIG_TEST_MEMPROT

View File

@@ -7,10 +7,12 @@
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <inttypes.h>
#include "esp_partition.h"
#include "esp_flash.h"
#include "esp_system.h"
#include "spi_flash_mmap.h"
#include "esp_private/cache_utils.h"
#include "esp_memory_utils.h"
@@ -19,6 +21,8 @@
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "rom/cache.h"
/* Test utility function */
extern void esp_restart_noos(void) __attribute__ ((noreturn));
@@ -155,6 +159,46 @@ void IRAM_ATTR test_assert_cache_disabled(void)
assert(0);
}
const char TEST_STR[] = "my_tag";
void test_assert_cache_write_back_error_can_print_backtrace(void)
{
printf("1) %p\n", TEST_STR);
*(uint32_t*)TEST_STR = 3; // We changed the rodata string.
// All chips except ESP32S3 stop execution here and raise a LoadStore error on the line above.
#if CONFIG_IDF_TARGET_ESP32S3
// On the ESP32S3, the error occurs later when the cache writeback is triggered
// (in this test, a direct call to Cache_WriteBack_All).
Cache_WriteBack_All(); // Cache writeback triggers the invalid cache access interrupt.
#endif
// We are testing that the backtrace is printed instead of TG1WDT.
printf("2) %p\n", TEST_STR); // never get to this place.
}
void test_assert_cache_write_back_error_can_print_backtrace2(void)
{
printf("1) %p\n", TEST_STR);
*(uint32_t*)TEST_STR = 3; // We changed the rodata string.
// All chips except ESP32S3 stop execution here and raise a LoadStore error on the line above.
// On the ESP32S3, the error occurs later when the cache writeback is triggered
// (in this test, a large range of DRAM is mapped and read, causing an error).
uint8_t temp = 0;
size_t map_size = SPI_FLASH_SEC_SIZE * 512;
const void *map;
spi_flash_mmap_handle_t out_handle;
esp_err_t err = spi_flash_mmap(0, map_size, SPI_FLASH_MMAP_DATA, &map, &out_handle);
if (err != ESP_OK) {
printf("spi_flash_mmap failed %x\n", err);
return;
}
const uint8_t *rodata = map;
for (size_t i = 0; i < map_size; i++) {
temp = rodata[i];
}
// Cache writeback triggers the invalid cache access interrupt.
// We are testing that the backtrace is printed instead of TG1WDT.
printf("2) %p 0x%" PRIx8 " \n", TEST_STR, temp); // never get to this place.
}
/**
* This function overwrites the stack beginning from the valid area continuously towards and beyond
* the end of the stack (stack base) of the current task.

View File

@@ -544,6 +544,42 @@ def test_assert_cache_disabled(
)
def cache_error_log_check(dut: PanicTestDut) -> None:
if dut.is_xtensa:
if dut.target == 'esp32s3':
dut.expect_exact("Guru Meditation Error: Core / panic'ed (Cache disabled but cached memory region accessed)")
dut.expect_exact('Write back error occurred while dcache tries to write back to flash')
dut.expect_exact('The following backtrace may not indicate the code that caused Cache invalid access')
else:
dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (LoadStoreError)")
else:
dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (Store access fault)")
dut.expect_reg_dump(0)
if dut.target == 'esp32s3':
dut.expect_reg_dump(1)
dut.expect_cpu_reset()
@pytest.mark.generic
@pytest.mark.supported_targets
@pytest.mark.parametrize('config', ['panic'], indirect=True)
def test_assert_cache_write_back_error_can_print_backtrace(
dut: PanicTestDut, config: str, test_func_name: str
) -> None:
dut.run_test_func(test_func_name)
cache_error_log_check(dut)
@pytest.mark.generic
@pytest.mark.supported_targets
@pytest.mark.parametrize('config', ['panic'], indirect=True)
def test_assert_cache_write_back_error_can_print_backtrace2(
dut: PanicTestDut, config: str, test_func_name: str
) -> None:
dut.run_test_func(test_func_name)
cache_error_log_check(dut)
@pytest.mark.esp32
@pytest.mark.parametrize('config', ['panic_delay'], indirect=True)
def test_panic_delay(dut: PanicTestDut) -> None: