Merge branch 'fix/multi_core_race_cond_in_panic_handler_v5.4' into 'release/v5.4'

fix(panic_handler): Prevent race condition in panic handler (v5.4)

See merge request espressif/esp-idf!38664
This commit is contained in:
Marius Vikhammer
2025-04-30 11:02:07 +08:00
5 changed files with 45 additions and 42 deletions

View File

@@ -203,6 +203,7 @@ void esp_panic_handler_disable_timg_wdts(void)
/* This function enables the RTC WDT with the given timeout in milliseconds */
void esp_panic_handler_enable_rtc_wdt(uint32_t timeout_ms)
{
wdt_hal_context_t rtc_wdt_ctx = RWDT_HAL_CONTEXT_DEFAULT(); // Use a local context variable to avoid race conditions when both cores enter the panic handler
wdt_hal_init(&rtc_wdt_ctx, WDT_RWDT, 0, false);
uint32_t stage_timeout_ticks = (uint32_t)(timeout_ms * rtc_clk_slow_freq_get_hz() / 1000ULL);
wdt_hal_write_protect_disable(&rtc_wdt_ctx);

View File

@@ -137,45 +137,23 @@ void busy_wait(void)
static void panic_handler(void *frame, bool pseudo_excause)
{
/* If watchdogs are enabled, the panic handler runs the risk of getting aborted pre-emptively because
* an overzealous watchdog decides to reset it. On the other hand, if we disable all watchdogs, we run
* the risk of somehow halting in the panic handler and not resetting.
* an overzealous watchdog decides to reset it. Hence, we feed the WDTs here.
*
* We have to do this before we do anything that might cause issues in the WDT interrupt handlers,
* for example stalling the other core on ESP32 may cause the ESP32_ECO3_CACHE_LOCK_FIX
* handler to get stuck.
* However, we do not feed the WDTs in multi-core mode because we do not have a reliable way to handle
* concurrency issues when both cores enter the panic handler at the same time. Hence, we avoid performing
* any WDT configurations until one of the cores is put in to a busy_wait() state below. As a side note,
* it may so happen that neither of the cores end up in a busy_wait() state and still try to work with the
* WDTs simultaneously but chances of that happening are low. (TODO: IDF-12900)
*
* We do this before we increment the panic handler entry count to ensure that the WDTs are fed.
*/
#if CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE
esp_panic_handler_feed_wdts();
#endif // CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE
/* Increment the panic handler entry count */
esp_panic_handler_increment_entry_count();
/* Configuring the RTC WDT as early as possible in the panic handler
* is critical for system safety.
*
* The RTC WDT is relied upon for a complete system reset, as it is the only
* watchdog timer capable of resetting both the main system and the RTC subsystem.
* In contrast, the Timer Group Watchdog Timers can only reset the main system
* but not the RTC module.
*
* The timeout value for the RTC WDT is set to 10 seconds. The primary reason for
* choosing a 10 second timeout is to allow the panic handler to run to completion
* which may include core dump collection and apptrace flushing.
*
* Explanation for why the core dump takes time:
* 64KB of core dump data (stacks of about 30 tasks) will produce ~85KB base64 data.
* @ 115200 UART speed it will take more than 6 sec to print them out.
*
* TODO: Make the timeout configurable or more intelligent based on the panic reason and the
* config options.
*/
#if CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS
esp_panic_handler_enable_rtc_wdt((CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS + 10) * 1000);
#else
esp_panic_handler_enable_rtc_wdt(10000);
#endif /* CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS */
panic_info_t info = { 0 };
/*
@@ -221,6 +199,35 @@ static void panic_handler(void *frame, bool pseudo_excause)
}
#endif // CONFIG_ESP_SYSTEM_HW_STACK_GUARD
}
#endif // !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE
/* Configuring the RTC WDT is critical for system safety.
*
* The RTC WDT is relied upon for a complete system reset, as it is the only
* watchdog timer capable of resetting both the main system and the RTC subsystem.
* In contrast, the Timer Group Watchdog Timers can only reset the main system
* but not the RTC module.
*
* We have to do this before we do anything that might cause issues in the WDT interrupt handlers,
* for example stalling the other core on ESP32 may cause the ESP32_ECO3_CACHE_LOCK_FIX
* handler to get stuck.
*
* The timeout value for the RTC WDT is set to 10 seconds. The primary reason for
* choosing a 10 second timeout is to allow the panic handler to run to completion
* which may include core dump collection and apptrace flushing.
*
* Explanation for why the core dump takes time:
* 64KB of core dump data (stacks of about 30 tasks) will produce ~85KB base64 data.
* @ 115200 UART speed it will take more than 6 sec to print them out.
*
* TODO: Make the timeout configurable or more intelligent based on the panic reason and the
* config options.
*/
#if CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS
esp_panic_handler_enable_rtc_wdt((CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS + 10) * 1000);
#else
esp_panic_handler_enable_rtc_wdt(10000);
#endif /* CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS */
/* Before we stall the other CPU, we need to disable all WDTs except the RTC WDT.
* This is because the TIMG WDTs cannot reset the RTC subsystem, which stores the CPU stalling
@@ -229,6 +236,7 @@ static void panic_handler(void *frame, bool pseudo_excause)
*/
esp_panic_handler_disable_timg_wdts();
#if !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE
esp_rom_delay_us(1);
// Stall all other cores
for (uint32_t i = 0; i < SOC_CPU_CORES_NUM; i++) {
@@ -236,12 +244,6 @@ static void panic_handler(void *frame, bool pseudo_excause)
esp_cpu_stall(i);
}
}
#else
/* In single core mode, we don't need to disable the TIMG WDTs,
* but we do it anyway to keep the code consistent and to avoid
* managing the state of multiple WDTs.
*/
esp_panic_handler_disable_timg_wdts();
#endif // !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE
esp_ipc_isr_stall_abort();

View File

@@ -1,2 +1,2 @@
| Supported Targets | ESP32 | ESP32-C2 | ESP32-C3 | ESP32-C5 | ESP32-C6 | ESP32-C61 | ESP32-H2 | ESP32-P4 | ESP32-S2 | ESP32-S3 |
| ----------------- | ----- | -------- | -------- | -------- | -------- | --------- | -------- | -------- | -------- | -------- |
| Supported Targets | ESP32 | ESP32-C2 | ESP32-C3 | ESP32-C6 | ESP32-C61 | ESP32-H2 | ESP32-P4 | ESP32-S2 | ESP32-S3 |
| ----------------- | ----- | -------- | -------- | -------- | --------- | -------- | -------- | -------- | -------- |

View File

@@ -1,2 +1,2 @@
| Supported Targets | ESP32 | ESP32-C2 | ESP32-C3 | ESP32-C5 | ESP32-C6 | ESP32-C61 | ESP32-H2 | ESP32-P4 | ESP32-S2 | ESP32-S3 |
| ----------------- | ----- | -------- | -------- | -------- | -------- | --------- | -------- | -------- | -------- | -------- |
| Supported Targets | ESP32 | ESP32-C2 | ESP32-C3 | ESP32-C6 | ESP32-C61 | ESP32-H2 | ESP32-P4 | ESP32-S2 | ESP32-S3 |
| ----------------- | ----- | -------- | -------- | -------- | --------- | -------- | -------- | -------- | -------- |

View File

@@ -1,2 +1,2 @@
| Supported Targets | ESP32 | ESP32-C2 | ESP32-C3 | ESP32-C5 | ESP32-C6 | ESP32-C61 | ESP32-H2 | ESP32-P4 | ESP32-S2 | ESP32-S3 |
| ----------------- | ----- | -------- | -------- | -------- | -------- | --------- | -------- | -------- | -------- | -------- |
| Supported Targets | ESP32 | ESP32-C2 | ESP32-C3 | ESP32-C6 | ESP32-C61 | ESP32-H2 | ESP32-P4 | ESP32-S2 | ESP32-S3 |
| ----------------- | ----- | -------- | -------- | -------- | --------- | -------- | -------- | -------- | -------- |