From 096cb409d9779012458305d997d473c1c7d7236b Mon Sep 17 00:00:00 2001 From: Marius Vikhammer Date: Mon, 14 Oct 2024 14:29:46 +0200 Subject: [PATCH] feat(panic): panic immediatly if trying to write to flash through cache on ESP32-S3 Updated S3 to use PMS protection for writing to flash through cache. This means we get a panic quicker for this illegal behavior than we did before, making the source of the error easier to track down. --- .../esp_system/port/arch/xtensa/panic_arch.c | 2 +- .../private/esp_private/cache_err_int.h | 12 ++ components/esp_system/port/panic_handler.c | 6 + .../port/soc/esp32s3/cache_err_int.c | 82 +++++++++-- components/hal/esp32s3/include/hal/cache_ll.h | 17 +++ components/hal/esp32s3/include/hal/mspi_ll.h | 127 ++++++++++++++++++ .../esp32s3/include/soc/Kconfig.soc_caps.in | 4 + components/soc/esp32s3/include/soc/soc_caps.h | 1 + .../system/panic/main/include/test_panic.h | 2 - .../system/panic/main/test_app_main.c | 1 - .../test_apps/system/panic/main/test_panic.c | 30 +---- tools/test_apps/system/panic/pytest_panic.py | 17 +-- 12 files changed, 244 insertions(+), 57 deletions(-) create mode 100644 components/hal/esp32s3/include/hal/mspi_ll.h diff --git a/components/esp_system/port/arch/xtensa/panic_arch.c b/components/esp_system/port/arch/xtensa/panic_arch.c index 610a4a0468..60518cc565 100644 --- a/components/esp_system/port/arch/xtensa/panic_arch.c +++ b/components/esp_system/port/arch/xtensa/panic_arch.c @@ -213,7 +213,7 @@ static inline void print_cache_err_details(const void *f) } if (err.size) { panic_print_str(", error size: 0x"); - panic_print_hex(err.vaddr); + panic_print_hex(err.size); } } else { // Default to cache disabled message if no specific error is found diff --git a/components/esp_system/port/include/private/esp_private/cache_err_int.h b/components/esp_system/port/include/private/esp_private/cache_err_int.h index 369257358c..c359ea7d5a 100644 --- a/components/esp_system/port/include/private/esp_private/cache_err_int.h +++ b/components/esp_system/port/include/private/esp_private/cache_err_int.h @@ -58,6 +58,18 @@ void esp_cache_err_get_panic_info(esp_cache_err_info_t *err_info); */ bool esp_cache_err_has_active_err(void); +#if SOC_CACHE_ACS_INVALID_STATE_ON_PANIC +/** + * @brief Saves and clears active access errors + * + * @note Some access errors needs to be cleared to allow the cache to operate again, + * otherwise accessing e.g rodata from flash might give invalid data + * + */ +void esp_cache_err_acs_save_and_clr(void); + +#endif //SOC_CACHE_ACS_INVALID_STATE_ON_PANIC + #ifdef __cplusplus } #endif diff --git a/components/esp_system/port/panic_handler.c b/components/esp_system/port/panic_handler.c index 14d0f39711..6ecae0335e 100644 --- a/components/esp_system/port/panic_handler.c +++ b/components/esp_system/port/panic_handler.c @@ -232,6 +232,12 @@ static void IRAM_ATTR panic_enable_cache(void) esp_ipc_isr_stall_abort(); spi_flash_enable_cache(core_id); } + +#if SOC_CACHE_ACS_INVALID_STATE_ON_PANIC + // Some errors need to be cleared here to allow cache to operate normally again + // for certain circumstances. + esp_cache_err_acs_save_and_clr(); +#endif //SOC_CACHE_ACS_INVALID_STATE_ON_PANIC } #endif diff --git a/components/esp_system/port/soc/esp32s3/cache_err_int.c b/components/esp_system/port/soc/esp32s3/cache_err_int.c index 9e6499b86a..93f9c816b9 100644 --- a/components/esp_system/port/soc/esp32s3/cache_err_int.c +++ b/components/esp_system/port/soc/esp32s3/cache_err_int.c @@ -23,7 +23,9 @@ #include "soc/periph_defs.h" #include "esp_rom_sys.h" #include "hal/cache_ll.h" +#include "hal/mspi_ll.h" #include "soc/syscon_struct.h" +#include "soc/extmem_reg.h" #include "esp_private/cache_err_int.h" static const char *TAG = "CACHE_ERR"; @@ -42,6 +44,10 @@ typedef struct { const uint32_t fault_size_reg; } register_bit_t; +static uint32_t access_error_intr_status; +static uint32_t acs_fault_addr; +static uint32_t cache_err_cpu_id; + /* Define the array that contains the status (bits) to test on the register * EXTMEM_CACHE_ILG_INT_ST_REG. each bit is accompanied by a small * message. @@ -87,6 +93,15 @@ const register_bit_t ilg_int_st_bits[] = { }, }; +const register_bit_t acs_int_st_bits[] = { + { + .bit = CACHE_LL_L1_ACCESS_EVENT_DBUS_REJECT, + .msg = "Dbus write to cache rejected", + .fault_addr_reg = (uint32_t) &acs_fault_addr, + .fault_size_reg = 0, + }, +}; + /** * Function to check each bits defined in the array reg_bits in the given * status register. The first bit from the array to be set in the status @@ -124,6 +139,33 @@ void esp_cache_err_get_panic_info(esp_cache_err_info_t *err_info) get_cache_error(illegal_intr_status, ilg_int_st_bits, DIM(ilg_int_st_bits), err_info); + // If no error reported above we check if the error came from ACS + if (err_info->err_str == NULL) { + uint32_t st = access_error_intr_status; + get_cache_error(st, acs_int_st_bits, DIM(acs_int_st_bits), err_info); + } +} + +void esp_cache_err_acs_save_and_clr(void) +{ + if (cache_ll_l1_get_access_error_intr_status(0, CACHE_LL_L1_ACCESS_EVENT_MASK)) { + cache_err_cpu_id = PRO_CPU_NUM; + } else if (cache_ll_l1_get_access_error_intr_status(1, CACHE_LL_L1_ACCESS_EVENT_MASK)) { + cache_err_cpu_id = APP_CPU_NUM; + } else { + cache_err_cpu_id = -1; + return; + } + + // Certain errors needs to be cleared if the cache is to continue functioning properly. + // E.g. for CACHE_LL_L1_ACCESS_EVENT_DBUS_REJECT errors the cache will sometimes end up in an invalid state + // where the panic handler will then be unable to access rodata from flash + // Store the error information before clearing, as it will be used later when reporting + access_error_intr_status = cache_ll_l1_get_access_error_intr_status(cache_err_cpu_id, CACHE_LL_L1_ACCESS_EVENT_MASK); + if (access_error_intr_status & CACHE_LL_L1_ACCESS_EVENT_DBUS_REJECT) { + acs_fault_addr = cache_ll_get_acs_dbus_reject_vaddr(cache_err_cpu_id); + cache_ll_l1_clear_access_error_intr(cache_err_cpu_id, CACHE_LL_L1_ACCESS_EVENT_DBUS_REJECT); + } } void esp_cache_err_int_init(void) @@ -167,18 +209,40 @@ void esp_cache_err_int_init(void) cache_ll_l1_enable_access_error_intr(1, CACHE_LL_L1_ACCESS_EVENT_MASK); } + if (core_id == 0) { + // Set up flash access permissions to disallow write from cache. + // Max flash size supported on ESP32-S3 is 1Gb. + // flash_aceN_size is in units of 64kB pages. + // This configuration covers the entire flash region. + uint32_t max_flash_size = 1024 * 1024 * 1024 / (64 * 1024); + + mspi_ll_flash_ace_ctrl_t ctrl = { + .sec_x = 1, + .sec_r = 1, + .sec_w = 0, + .nsec_x = 1, + .nsec_r = 1, + .nsec_w = 0, + .spi1_r = 1, + .spi1_w = 1, + }; + + mspi_ll_set_flash_protection_addr(0, 0x0); + mspi_ll_set_flash_protection_size(0, max_flash_size); + + // Set other flash_aceN_size to 0 to disable them. + mspi_ll_set_flash_protection_size(1, 0); + mspi_ll_set_flash_protection_size(2, 0); + mspi_ll_set_flash_protection_size(3, 0); + + mspi_ll_set_flash_protection_access(0, ctrl); + + } + ESP_INTR_ENABLE(ETS_CACHEERR_INUM); } int esp_cache_err_get_cpuid(void) { - if (cache_ll_l1_get_access_error_intr_status(0, CACHE_LL_L1_ACCESS_EVENT_MASK)) { - return PRO_CPU_NUM; - } - - if (cache_ll_l1_get_access_error_intr_status(1, CACHE_LL_L1_ACCESS_EVENT_MASK)) { - return APP_CPU_NUM; - } - - return -1; + return cache_err_cpu_id; } diff --git a/components/hal/esp32s3/include/hal/cache_ll.h b/components/hal/esp32s3/include/hal/cache_ll.h index 9ab5b7f5ff..463387390d 100644 --- a/components/hal/esp32s3/include/hal/cache_ll.h +++ b/components/hal/esp32s3/include/hal/cache_ll.h @@ -752,6 +752,23 @@ static inline uint32_t cache_ll_l1_get_illegal_error_intr_status(uint32_t cache_ return GET_PERI_REG_MASK(EXTMEM_CACHE_ILG_INT_ST_REG, mask); } +/** + * @brief Read vaddr that caused acs dbus reject error + * + * @param cache_id cache id to get vaddr from + * + * @return vaddr that cause the acs dbus reject error + */ +__attribute__((always_inline)) +static inline uint32_t cache_ll_get_acs_dbus_reject_vaddr(uint32_t cache_id) +{ + if (cache_id == 0) { + return REG_READ(EXTMEM_CORE0_DBUS_REJECT_VADDR_REG); + } else { + return REG_READ(EXTMEM_CORE1_DBUS_REJECT_VADDR_REG); + } +} + #ifdef __cplusplus } #endif diff --git a/components/hal/esp32s3/include/hal/mspi_ll.h b/components/hal/esp32s3/include/hal/mspi_ll.h new file mode 100644 index 0000000000..8bb8fce0b0 --- /dev/null +++ b/components/hal/esp32s3/include/hal/mspi_ll.h @@ -0,0 +1,127 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/******************************************************************************* + * NOTICE + * The ll is not public api, don't use in application code. + * See readme.md in hal/include/hal/readme.md + ******************************************************************************/ + +#pragma once + +#include +#include +#include "soc/syscon_struct.h" +#include "hal/assert.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * flash access control config struct + */ +typedef union { + uint32_t val; + struct { + uint32_t sec_x: 1; // Secure world cache execute access + uint32_t sec_r: 1; // Secure world cache read access + uint32_t sec_w: 1; // Secure world cache write access + uint32_t nsec_x: 1; // Non-secure world cache execute access + uint32_t nsec_r: 1; // Non-secure world cache execute read + uint32_t nsec_w: 1; // Non-secure world cache execute write + uint32_t spi1_r: 1; // SPI1 read access + uint32_t spi1_w: 1; // SPI1 write access + }; +} mspi_ll_flash_ace_ctrl_t; + +/** + * @brief Set PMS flash protection start address + * + * @param region PMS region id (0~3) + * @param address Starting address + * + */ +__attribute__((always_inline)) +static inline void mspi_ll_set_flash_protection_addr(uint32_t region, uint32_t address) +{ + switch(region){ + case 0: + SYSCON.flash_ace0_addr = address; + break; + case 1: + SYSCON.flash_ace1_addr = address; + break; + case 2: + SYSCON.flash_ace2_addr = address; + break; + case 3: + SYSCON.flash_ace3_addr = address; + break; + default: + HAL_ASSERT(false); + } +} + +/** + * @brief Set PMS flash protection size + * + * @param region PMS region id (0~3) + * @param size Size, in number of 64kB pages + * + */ +__attribute__((always_inline)) +static inline void mspi_ll_set_flash_protection_size(uint32_t region, uint32_t size) +{ + switch(region){ + case 0: + SYSCON.flash_ace0_size.flash_ace0_size = size; + break; + case 1: + SYSCON.flash_ace1_size.flash_ace1_size = size; + break; + case 2: + SYSCON.flash_ace2_size.flash_ace2_size = size; + break; + case 3: + SYSCON.flash_ace3_size.flash_ace3_size = size; + break; + default: + HAL_ASSERT(false); + } +} + +/** + * @brief Set PMS flash protection access ctrl bits + * + * @param region PMS region id (0~3) + * @param address ctrl bits, RWX per mode + * + */ +__attribute__((always_inline)) +static inline void mspi_ll_set_flash_protection_access(uint32_t region, mspi_ll_flash_ace_ctrl_t ctrl) +{ + switch(region){ + case 0: + SYSCON.flash_ace0_attr.flash_ace0_attr = ctrl.val; + break; + case 1: + SYSCON.flash_ace1_attr.flash_ace1_attr = ctrl.val; + break; + case 2: + SYSCON.flash_ace2_attr.flash_ace2_attr = ctrl.val; + break; + case 3: + SYSCON.flash_ace3_attr.flash_ace3_attr = ctrl.val; + break; + default: + HAL_ASSERT(false); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/components/soc/esp32s3/include/soc/Kconfig.soc_caps.in b/components/soc/esp32s3/include/soc/Kconfig.soc_caps.in index 45e03084ba..8dcae00335 100644 --- a/components/soc/esp32s3/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32s3/include/soc/Kconfig.soc_caps.in @@ -383,6 +383,10 @@ config SOC_CACHE_FREEZE_SUPPORTED bool default y +config SOC_CACHE_ACS_INVALID_STATE_ON_PANIC + bool + default y + config SOC_CPU_CORES_NUM int default 2 diff --git a/components/soc/esp32s3/include/soc/soc_caps.h b/components/soc/esp32s3/include/soc/soc_caps.h index fc00755fa0..62cea93f24 100644 --- a/components/soc/esp32s3/include/soc/soc_caps.h +++ b/components/soc/esp32s3/include/soc/soc_caps.h @@ -136,6 +136,7 @@ /*-------------------------- CACHE CAPS --------------------------------------*/ #define SOC_CACHE_WRITEBACK_SUPPORTED 1 #define SOC_CACHE_FREEZE_SUPPORTED 1 +#define SOC_CACHE_ACS_INVALID_STATE_ON_PANIC 1 /*!< Certain ACS errors may cause the cache to be in a bad state until cleared */ /*-------------------------- CPU CAPS ----------------------------------------*/ #define SOC_CPU_CORES_NUM 2 diff --git a/tools/test_apps/system/panic/main/include/test_panic.h b/tools/test_apps/system/panic/main/include/test_panic.h index 32d0512655..e4cb6de8a0 100644 --- a/tools/test_apps/system/panic/main/include/test_panic.h +++ b/tools/test_apps/system/panic/main/include/test_panic.h @@ -64,8 +64,6 @@ void test_assert_cache_disabled(void); void test_assert_cache_write_back_error_can_print_backtrace(void); -void test_assert_cache_write_back_error_can_print_backtrace2(void); - void test_illegal_access(void); void test_capture_dram(void); diff --git a/tools/test_apps/system/panic/main/test_app_main.c b/tools/test_apps/system/panic/main/test_app_main.c index a824e25e5b..a51d22f360 100644 --- a/tools/test_apps/system/panic/main/test_app_main.c +++ b/tools/test_apps/system/panic/main/test_app_main.c @@ -118,7 +118,6 @@ void app_main(void) HANDLE_TEST(test_name, test_assert); HANDLE_TEST(test_name, test_assert_cache_disabled); HANDLE_TEST(test_name, test_assert_cache_write_back_error_can_print_backtrace); - HANDLE_TEST(test_name, test_assert_cache_write_back_error_can_print_backtrace2); HANDLE_TEST(test_name, test_tcb_corrupted); #if CONFIG_ESP_COREDUMP_ENABLE_TO_FLASH && CONFIG_ESP_COREDUMP_DATA_FORMAT_ELF HANDLE_TEST(test_name, test_setup_coredump_summary); diff --git a/tools/test_apps/system/panic/main/test_panic.c b/tools/test_apps/system/panic/main/test_panic.c index eafc9daddf..9564be2aa4 100644 --- a/tools/test_apps/system/panic/main/test_panic.c +++ b/tools/test_apps/system/panic/main/test_panic.c @@ -196,39 +196,11 @@ void test_assert_cache_write_back_error_can_print_backtrace(void) printf("1) %p\n", TEST_STR); *(uint32_t*)TEST_STR = 3; // We changed the rodata string. // All chips except ESP32S3 stop execution here and raise a LoadStore error on the line above. -#if CONFIG_IDF_TARGET_ESP32S3 - // On the ESP32S3, the error occurs later when the cache writeback is triggered - // (in this test, a direct call to Cache_WriteBack_All). - Cache_WriteBack_All(); // Cache writeback triggers the invalid cache access interrupt. -#endif + // We are testing that the backtrace is printed instead of TG1WDT. printf("2) %p\n", TEST_STR); // never get to this place. } -void test_assert_cache_write_back_error_can_print_backtrace2(void) -{ - printf("1) %p\n", TEST_STR); - *(uint32_t*)TEST_STR = 3; // We changed the rodata string. - // All chips except ESP32S3 stop execution here and raise a LoadStore error on the line above. - // On the ESP32S3, the error occurs later when the cache writeback is triggered - // (in this test, a large range of DRAM is mapped and read, causing an error). - uint8_t temp = 0; - size_t map_size = SPI_FLASH_SEC_SIZE * 512; - const void *map; - spi_flash_mmap_handle_t out_handle; - esp_err_t err = spi_flash_mmap(0, map_size, SPI_FLASH_MMAP_DATA, &map, &out_handle); - if (err != ESP_OK) { - printf("spi_flash_mmap failed %x\n", err); - return; - } - const uint8_t *rodata = map; - for (size_t i = 0; i < map_size; i++) { - temp = rodata[i]; - } - // Cache writeback triggers the invalid cache access interrupt. - // We are testing that the backtrace is printed instead of TG1WDT. - printf("2) %p 0x%" PRIx8 " \n", TEST_STR, temp); // never get to this place. -} /** * This function overwrites the stack beginning from the valid area continuously towards and beyond diff --git a/tools/test_apps/system/panic/pytest_panic.py b/tools/test_apps/system/panic/pytest_panic.py index 4f522488e0..09ec8337b5 100644 --- a/tools/test_apps/system/panic/pytest_panic.py +++ b/tools/test_apps/system/panic/pytest_panic.py @@ -573,16 +573,13 @@ def test_assert_cache_disabled( def cache_error_log_check(dut: PanicTestDut) -> None: if dut.is_xtensa: if dut.target == 'esp32s3': - dut.expect_exact("Guru Meditation Error: Core / panic'ed (Cache error)") - dut.expect_exact('Write back error occurred while dcache tries to write back to flash') - dut.expect_exact('The following backtrace may not indicate the code that caused Cache invalid access') + dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (Cache error)") + dut.expect_exact('Dbus write to cache rejected, error address') else: dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (LoadStoreError)") else: dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (Store access fault)") dut.expect_reg_dump(0) - if dut.target == 'esp32s3': - dut.expect_reg_dump(1) dut.expect_cpu_reset() @@ -596,16 +593,6 @@ def test_assert_cache_write_back_error_can_print_backtrace( cache_error_log_check(dut) -@pytest.mark.generic -@pytest.mark.supported_targets -@pytest.mark.parametrize('config', ['panic'], indirect=True) -def test_assert_cache_write_back_error_can_print_backtrace2( - dut: PanicTestDut, config: str, test_func_name: str -) -> None: - dut.run_test_func(test_func_name) - cache_error_log_check(dut) - - @pytest.mark.esp32 @pytest.mark.generic @pytest.mark.parametrize('config', ['panic_delay'], indirect=True)