diff --git a/components/esp_system/panic.c b/components/esp_system/panic.c index cde3339560..bc75239f08 100644 --- a/components/esp_system/panic.c +++ b/components/esp_system/panic.c @@ -191,6 +191,7 @@ void esp_panic_handler_disable_timg_wdts(void) /* This function enables the RTC WDT with the given timeout in milliseconds */ void esp_panic_handler_enable_rtc_wdt(uint32_t timeout_ms) { + wdt_hal_context_t rtc_wdt_ctx = {.inst = WDT_RWDT, .rwdt_dev = &RTCCNTL}; // Use a local context variable to avoid race conditions when both cores enter the panic handler wdt_hal_init(&rtc_wdt_ctx, WDT_RWDT, 0, false); uint32_t stage_timeout_ticks = (uint32_t)(timeout_ms * rtc_clk_slow_freq_get_hz() / 1000ULL); wdt_hal_write_protect_disable(&rtc_wdt_ctx); diff --git a/components/esp_system/port/panic_handler.c b/components/esp_system/port/panic_handler.c index 11f44c7e48..db34b3730e 100644 --- a/components/esp_system/port/panic_handler.c +++ b/components/esp_system/port/panic_handler.c @@ -125,41 +125,23 @@ static void frame_to_panic_info(void *frame, panic_info_t *info, bool pseudo_exc static void panic_handler(void *frame, bool pseudo_excause) { /* If watchdogs are enabled, the panic handler runs the risk of getting aborted pre-emptively because - * an overzealous watchdog decides to reset it. On the other hand, if we disable all watchdogs, we run - * the risk of somehow halting in the panic handler and not resetting. + * an overzealous watchdog decides to reset it. Hence, we feed the WDTs here. * - * We have to do this before we do anything that might cause issues in the WDT interrupt handlers, - * for example stalling the other core on ESP32 may cause the ESP32_ECO3_CACHE_LOCK_FIX - * handler to get stuck. + * However, we do not feed the WDTs in multi-core mode because we do not have a reliable way to handle + * concurrency issues when both cores enter the panic handler at the same time. Hence, we avoid performing + * any WDT configurations until one of the cores is put in to a busy_wait() state below. As a side note, + * it may so happen that neither of the cores end up in a busy_wait() state and still try to work with the + * WDTs simultaneously but chances of that happening are low. (TODO: IDF-12900) * * We do this before we increment the panic handler entry count to ensure that the WDTs are fed. */ +#if CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE esp_panic_handler_feed_wdts(); +#endif // CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE /* Increment the panic handler entry count */ esp_panic_handler_increment_entry_count(); - /* Configuring the RTC WDT as early as possible in the panic handler - * is critical for system safety. - * - * The RTC WDT is relied upon for a complete system reset, as it is the only - * watchdog timer capable of resetting both the main system and the RTC subsystem. - * In contrast, the Timer Group Watchdog Timers can only reset the main system - * but not the RTC module. - * - * The timeout value for the RTC WDT is set to 10 seconds. The primary reason for - * choosing a 10 second timeout is to allow the panic handler to run to completion - * which may include core dump collection and apptrace flushing. - * - * Explanation for why the core dump takes time: - * 64KB of core dump data (stacks of about 30 tasks) will produce ~85KB base64 data. - * @ 115200 UART speed it will take more than 6 sec to print them out. - * - * TODO: Make the timeout configurable or more intelligent based on the panic reason and the - * config options. - */ - esp_panic_handler_enable_rtc_wdt(10000); - panic_info_t info = { 0 }; /* @@ -189,6 +171,31 @@ static void panic_handler(void *frame, bool pseudo_excause) } } } +#endif // !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE + + /* Configuring the RTC WDT is critical for system safety. + * + * The RTC WDT is relied upon for a complete system reset, as it is the only + * watchdog timer capable of resetting both the main system and the RTC subsystem. + * In contrast, the Timer Group Watchdog Timers can only reset the main system + * but not the RTC module. + * + * We have to do this before we do anything that might cause issues in the WDT interrupt handlers, + * for example stalling the other core on ESP32 may cause the ESP32_ECO3_CACHE_LOCK_FIX + * handler to get stuck. + * + * The timeout value for the RTC WDT is set to 10 seconds. The primary reason for + * choosing a 10 second timeout is to allow the panic handler to run to completion + * which may include core dump collection and apptrace flushing. + * + * Explanation for why the core dump takes time: + * 64KB of core dump data (stacks of about 30 tasks) will produce ~85KB base64 data. + * @ 115200 UART speed it will take more than 6 sec to print them out. + * + * TODO: Make the timeout configurable or more intelligent based on the panic reason and the + * config options. + */ + esp_panic_handler_enable_rtc_wdt(10000); /* Before we stall the other CPU, we need to disable all WDTs except the RTC WDT. * This is because the TIMG WDTs cannot reset the RTC subsystem, which stores the CPU stalling @@ -197,6 +204,7 @@ static void panic_handler(void *frame, bool pseudo_excause) */ esp_panic_handler_disable_timg_wdts(); +#if !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE esp_rom_delay_us(1); // Stall all other cores for (uint32_t i = 0; i < SOC_CPU_CORES_NUM; i++) { @@ -204,12 +212,6 @@ static void panic_handler(void *frame, bool pseudo_excause) esp_cpu_stall(i); } } -#else - /* In single core mode, we don't need to disable the TIMG WDTs, - * but we do it anyway to keep the code consistent and to avoid - * managing the state of multiple WDTs. - */ - esp_panic_handler_disable_timg_wdts(); #endif // !CONFIG_ESP_SYSTEM_SINGLE_CORE_MODE esp_ipc_isr_stall_abort();