feat(panic): panic immediatly if trying to write to flash through cache on ESP32-S3

Updated S3 to use PMS protection for writing to flash through cache. This means we get
a panic quicker for this illegal behavior than we did before, making the source of the error
easier to track down.
This commit is contained in:
Marius Vikhammer
2024-10-14 14:29:46 +02:00
parent f8ddcee8cd
commit 096cb409d9
12 changed files with 244 additions and 57 deletions

View File

@@ -213,7 +213,7 @@ static inline void print_cache_err_details(const void *f)
}
if (err.size) {
panic_print_str(", error size: 0x");
panic_print_hex(err.vaddr);
panic_print_hex(err.size);
}
} else {
// Default to cache disabled message if no specific error is found

View File

@@ -58,6 +58,18 @@ void esp_cache_err_get_panic_info(esp_cache_err_info_t *err_info);
*/
bool esp_cache_err_has_active_err(void);
#if SOC_CACHE_ACS_INVALID_STATE_ON_PANIC
/**
* @brief Saves and clears active access errors
*
* @note Some access errors needs to be cleared to allow the cache to operate again,
* otherwise accessing e.g rodata from flash might give invalid data
*
*/
void esp_cache_err_acs_save_and_clr(void);
#endif //SOC_CACHE_ACS_INVALID_STATE_ON_PANIC
#ifdef __cplusplus
}
#endif

View File

@@ -232,6 +232,12 @@ static void IRAM_ATTR panic_enable_cache(void)
esp_ipc_isr_stall_abort();
spi_flash_enable_cache(core_id);
}
#if SOC_CACHE_ACS_INVALID_STATE_ON_PANIC
// Some errors need to be cleared here to allow cache to operate normally again
// for certain circumstances.
esp_cache_err_acs_save_and_clr();
#endif //SOC_CACHE_ACS_INVALID_STATE_ON_PANIC
}
#endif

View File

@@ -23,7 +23,9 @@
#include "soc/periph_defs.h"
#include "esp_rom_sys.h"
#include "hal/cache_ll.h"
#include "hal/mspi_ll.h"
#include "soc/syscon_struct.h"
#include "soc/extmem_reg.h"
#include "esp_private/cache_err_int.h"
static const char *TAG = "CACHE_ERR";
@@ -42,6 +44,10 @@ typedef struct {
const uint32_t fault_size_reg;
} register_bit_t;
static uint32_t access_error_intr_status;
static uint32_t acs_fault_addr;
static uint32_t cache_err_cpu_id;
/* Define the array that contains the status (bits) to test on the register
* EXTMEM_CACHE_ILG_INT_ST_REG. each bit is accompanied by a small
* message.
@@ -87,6 +93,15 @@ const register_bit_t ilg_int_st_bits[] = {
},
};
const register_bit_t acs_int_st_bits[] = {
{
.bit = CACHE_LL_L1_ACCESS_EVENT_DBUS_REJECT,
.msg = "Dbus write to cache rejected",
.fault_addr_reg = (uint32_t) &acs_fault_addr,
.fault_size_reg = 0,
},
};
/**
* Function to check each bits defined in the array reg_bits in the given
* status register. The first bit from the array to be set in the status
@@ -124,6 +139,33 @@ void esp_cache_err_get_panic_info(esp_cache_err_info_t *err_info)
get_cache_error(illegal_intr_status, ilg_int_st_bits, DIM(ilg_int_st_bits), err_info);
// If no error reported above we check if the error came from ACS
if (err_info->err_str == NULL) {
uint32_t st = access_error_intr_status;
get_cache_error(st, acs_int_st_bits, DIM(acs_int_st_bits), err_info);
}
}
void esp_cache_err_acs_save_and_clr(void)
{
if (cache_ll_l1_get_access_error_intr_status(0, CACHE_LL_L1_ACCESS_EVENT_MASK)) {
cache_err_cpu_id = PRO_CPU_NUM;
} else if (cache_ll_l1_get_access_error_intr_status(1, CACHE_LL_L1_ACCESS_EVENT_MASK)) {
cache_err_cpu_id = APP_CPU_NUM;
} else {
cache_err_cpu_id = -1;
return;
}
// Certain errors needs to be cleared if the cache is to continue functioning properly.
// E.g. for CACHE_LL_L1_ACCESS_EVENT_DBUS_REJECT errors the cache will sometimes end up in an invalid state
// where the panic handler will then be unable to access rodata from flash
// Store the error information before clearing, as it will be used later when reporting
access_error_intr_status = cache_ll_l1_get_access_error_intr_status(cache_err_cpu_id, CACHE_LL_L1_ACCESS_EVENT_MASK);
if (access_error_intr_status & CACHE_LL_L1_ACCESS_EVENT_DBUS_REJECT) {
acs_fault_addr = cache_ll_get_acs_dbus_reject_vaddr(cache_err_cpu_id);
cache_ll_l1_clear_access_error_intr(cache_err_cpu_id, CACHE_LL_L1_ACCESS_EVENT_DBUS_REJECT);
}
}
void esp_cache_err_int_init(void)
@@ -167,18 +209,40 @@ void esp_cache_err_int_init(void)
cache_ll_l1_enable_access_error_intr(1, CACHE_LL_L1_ACCESS_EVENT_MASK);
}
if (core_id == 0) {
// Set up flash access permissions to disallow write from cache.
// Max flash size supported on ESP32-S3 is 1Gb.
// flash_aceN_size is in units of 64kB pages.
// This configuration covers the entire flash region.
uint32_t max_flash_size = 1024 * 1024 * 1024 / (64 * 1024);
mspi_ll_flash_ace_ctrl_t ctrl = {
.sec_x = 1,
.sec_r = 1,
.sec_w = 0,
.nsec_x = 1,
.nsec_r = 1,
.nsec_w = 0,
.spi1_r = 1,
.spi1_w = 1,
};
mspi_ll_set_flash_protection_addr(0, 0x0);
mspi_ll_set_flash_protection_size(0, max_flash_size);
// Set other flash_aceN_size to 0 to disable them.
mspi_ll_set_flash_protection_size(1, 0);
mspi_ll_set_flash_protection_size(2, 0);
mspi_ll_set_flash_protection_size(3, 0);
mspi_ll_set_flash_protection_access(0, ctrl);
}
ESP_INTR_ENABLE(ETS_CACHEERR_INUM);
}
int esp_cache_err_get_cpuid(void)
{
if (cache_ll_l1_get_access_error_intr_status(0, CACHE_LL_L1_ACCESS_EVENT_MASK)) {
return PRO_CPU_NUM;
}
if (cache_ll_l1_get_access_error_intr_status(1, CACHE_LL_L1_ACCESS_EVENT_MASK)) {
return APP_CPU_NUM;
}
return -1;
return cache_err_cpu_id;
}

View File

@@ -752,6 +752,23 @@ static inline uint32_t cache_ll_l1_get_illegal_error_intr_status(uint32_t cache_
return GET_PERI_REG_MASK(EXTMEM_CACHE_ILG_INT_ST_REG, mask);
}
/**
* @brief Read vaddr that caused acs dbus reject error
*
* @param cache_id cache id to get vaddr from
*
* @return vaddr that cause the acs dbus reject error
*/
__attribute__((always_inline))
static inline uint32_t cache_ll_get_acs_dbus_reject_vaddr(uint32_t cache_id)
{
if (cache_id == 0) {
return REG_READ(EXTMEM_CORE0_DBUS_REJECT_VADDR_REG);
} else {
return REG_READ(EXTMEM_CORE1_DBUS_REJECT_VADDR_REG);
}
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,127 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
/*******************************************************************************
* NOTICE
* The ll is not public api, don't use in application code.
* See readme.md in hal/include/hal/readme.md
******************************************************************************/
#pragma once
#include <stdint.h>
#include <stdbool.h>
#include "soc/syscon_struct.h"
#include "hal/assert.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* flash access control config struct
*/
typedef union {
uint32_t val;
struct {
uint32_t sec_x: 1; // Secure world cache execute access
uint32_t sec_r: 1; // Secure world cache read access
uint32_t sec_w: 1; // Secure world cache write access
uint32_t nsec_x: 1; // Non-secure world cache execute access
uint32_t nsec_r: 1; // Non-secure world cache execute read
uint32_t nsec_w: 1; // Non-secure world cache execute write
uint32_t spi1_r: 1; // SPI1 read access
uint32_t spi1_w: 1; // SPI1 write access
};
} mspi_ll_flash_ace_ctrl_t;
/**
* @brief Set PMS flash protection start address
*
* @param region PMS region id (0~3)
* @param address Starting address
*
*/
__attribute__((always_inline))
static inline void mspi_ll_set_flash_protection_addr(uint32_t region, uint32_t address)
{
switch(region){
case 0:
SYSCON.flash_ace0_addr = address;
break;
case 1:
SYSCON.flash_ace1_addr = address;
break;
case 2:
SYSCON.flash_ace2_addr = address;
break;
case 3:
SYSCON.flash_ace3_addr = address;
break;
default:
HAL_ASSERT(false);
}
}
/**
* @brief Set PMS flash protection size
*
* @param region PMS region id (0~3)
* @param size Size, in number of 64kB pages
*
*/
__attribute__((always_inline))
static inline void mspi_ll_set_flash_protection_size(uint32_t region, uint32_t size)
{
switch(region){
case 0:
SYSCON.flash_ace0_size.flash_ace0_size = size;
break;
case 1:
SYSCON.flash_ace1_size.flash_ace1_size = size;
break;
case 2:
SYSCON.flash_ace2_size.flash_ace2_size = size;
break;
case 3:
SYSCON.flash_ace3_size.flash_ace3_size = size;
break;
default:
HAL_ASSERT(false);
}
}
/**
* @brief Set PMS flash protection access ctrl bits
*
* @param region PMS region id (0~3)
* @param address ctrl bits, RWX per mode
*
*/
__attribute__((always_inline))
static inline void mspi_ll_set_flash_protection_access(uint32_t region, mspi_ll_flash_ace_ctrl_t ctrl)
{
switch(region){
case 0:
SYSCON.flash_ace0_attr.flash_ace0_attr = ctrl.val;
break;
case 1:
SYSCON.flash_ace1_attr.flash_ace1_attr = ctrl.val;
break;
case 2:
SYSCON.flash_ace2_attr.flash_ace2_attr = ctrl.val;
break;
case 3:
SYSCON.flash_ace3_attr.flash_ace3_attr = ctrl.val;
break;
default:
HAL_ASSERT(false);
}
}
#ifdef __cplusplus
}
#endif

View File

@@ -383,6 +383,10 @@ config SOC_CACHE_FREEZE_SUPPORTED
bool
default y
config SOC_CACHE_ACS_INVALID_STATE_ON_PANIC
bool
default y
config SOC_CPU_CORES_NUM
int
default 2

View File

@@ -136,6 +136,7 @@
/*-------------------------- CACHE CAPS --------------------------------------*/
#define SOC_CACHE_WRITEBACK_SUPPORTED 1
#define SOC_CACHE_FREEZE_SUPPORTED 1
#define SOC_CACHE_ACS_INVALID_STATE_ON_PANIC 1 /*!< Certain ACS errors may cause the cache to be in a bad state until cleared */
/*-------------------------- CPU CAPS ----------------------------------------*/
#define SOC_CPU_CORES_NUM 2

View File

@@ -64,8 +64,6 @@ void test_assert_cache_disabled(void);
void test_assert_cache_write_back_error_can_print_backtrace(void);
void test_assert_cache_write_back_error_can_print_backtrace2(void);
void test_illegal_access(void);
void test_capture_dram(void);

View File

@@ -118,7 +118,6 @@ void app_main(void)
HANDLE_TEST(test_name, test_assert);
HANDLE_TEST(test_name, test_assert_cache_disabled);
HANDLE_TEST(test_name, test_assert_cache_write_back_error_can_print_backtrace);
HANDLE_TEST(test_name, test_assert_cache_write_back_error_can_print_backtrace2);
HANDLE_TEST(test_name, test_tcb_corrupted);
#if CONFIG_ESP_COREDUMP_ENABLE_TO_FLASH && CONFIG_ESP_COREDUMP_DATA_FORMAT_ELF
HANDLE_TEST(test_name, test_setup_coredump_summary);

View File

@@ -196,39 +196,11 @@ void test_assert_cache_write_back_error_can_print_backtrace(void)
printf("1) %p\n", TEST_STR);
*(uint32_t*)TEST_STR = 3; // We changed the rodata string.
// All chips except ESP32S3 stop execution here and raise a LoadStore error on the line above.
#if CONFIG_IDF_TARGET_ESP32S3
// On the ESP32S3, the error occurs later when the cache writeback is triggered
// (in this test, a direct call to Cache_WriteBack_All).
Cache_WriteBack_All(); // Cache writeback triggers the invalid cache access interrupt.
#endif
// We are testing that the backtrace is printed instead of TG1WDT.
printf("2) %p\n", TEST_STR); // never get to this place.
}
void test_assert_cache_write_back_error_can_print_backtrace2(void)
{
printf("1) %p\n", TEST_STR);
*(uint32_t*)TEST_STR = 3; // We changed the rodata string.
// All chips except ESP32S3 stop execution here and raise a LoadStore error on the line above.
// On the ESP32S3, the error occurs later when the cache writeback is triggered
// (in this test, a large range of DRAM is mapped and read, causing an error).
uint8_t temp = 0;
size_t map_size = SPI_FLASH_SEC_SIZE * 512;
const void *map;
spi_flash_mmap_handle_t out_handle;
esp_err_t err = spi_flash_mmap(0, map_size, SPI_FLASH_MMAP_DATA, &map, &out_handle);
if (err != ESP_OK) {
printf("spi_flash_mmap failed %x\n", err);
return;
}
const uint8_t *rodata = map;
for (size_t i = 0; i < map_size; i++) {
temp = rodata[i];
}
// Cache writeback triggers the invalid cache access interrupt.
// We are testing that the backtrace is printed instead of TG1WDT.
printf("2) %p 0x%" PRIx8 " \n", TEST_STR, temp); // never get to this place.
}
/**
* This function overwrites the stack beginning from the valid area continuously towards and beyond

View File

@@ -573,16 +573,13 @@ def test_assert_cache_disabled(
def cache_error_log_check(dut: PanicTestDut) -> None:
if dut.is_xtensa:
if dut.target == 'esp32s3':
dut.expect_exact("Guru Meditation Error: Core / panic'ed (Cache error)")
dut.expect_exact('Write back error occurred while dcache tries to write back to flash')
dut.expect_exact('The following backtrace may not indicate the code that caused Cache invalid access')
dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (Cache error)")
dut.expect_exact('Dbus write to cache rejected, error address')
else:
dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (LoadStoreError)")
else:
dut.expect_exact("Guru Meditation Error: Core 0 panic'ed (Store access fault)")
dut.expect_reg_dump(0)
if dut.target == 'esp32s3':
dut.expect_reg_dump(1)
dut.expect_cpu_reset()
@@ -596,16 +593,6 @@ def test_assert_cache_write_back_error_can_print_backtrace(
cache_error_log_check(dut)
@pytest.mark.generic
@pytest.mark.supported_targets
@pytest.mark.parametrize('config', ['panic'], indirect=True)
def test_assert_cache_write_back_error_can_print_backtrace2(
dut: PanicTestDut, config: str, test_func_name: str
) -> None:
dut.run_test_func(test_func_name)
cache_error_log_check(dut)
@pytest.mark.esp32
@pytest.mark.generic
@pytest.mark.parametrize('config', ['panic_delay'], indirect=True)