Merge branch 'feature/p4_multi_core_wdt' into 'master'

WDT: Multi-core support for RISC-V WDT

Closes IDF-6516 and IDF-8136

See merge request espressif/esp-idf!25689
This commit is contained in:
Marius Vikhammer
2023-10-24 09:39:58 +08:00
8 changed files with 126 additions and 98 deletions

View File

@@ -30,12 +30,10 @@
#define REASON_YIELD BIT(0) #define REASON_YIELD BIT(0)
#define REASON_FREQ_SWITCH BIT(1) #define REASON_FREQ_SWITCH BIT(1)
#define REASON_GDB_CALL BIT(3)
#if CONFIG_IDF_TARGET_ARCH_XTENSA
#define REASON_PRINT_BACKTRACE BIT(2) #define REASON_PRINT_BACKTRACE BIT(2)
#define REASON_GDB_CALL BIT(3)
#define REASON_TWDT_ABORT BIT(4) #define REASON_TWDT_ABORT BIT(4)
#endif
static portMUX_TYPE reason_spinlock = portMUX_INITIALIZER_UNLOCKED; static portMUX_TYPE reason_spinlock = portMUX_INITIALIZER_UNLOCKED;
static volatile uint32_t reason[portNUM_PROCESSORS]; static volatile uint32_t reason[portNUM_PROCESSORS];
@@ -100,20 +98,22 @@ static void IRAM_ATTR esp_crosscore_isr(void *arg) {
update_breakpoints(); update_breakpoints();
} }
#endif // !CONFIG_ESP_SYSTEM_GDBSTUB_RUNTIME #endif // !CONFIG_ESP_SYSTEM_GDBSTUB_RUNTIME
#if CONFIG_IDF_TARGET_ARCH_XTENSA // IDF-2986
if (my_reason_val & REASON_PRINT_BACKTRACE) { if (my_reason_val & REASON_PRINT_BACKTRACE) {
esp_backtrace_print(100); esp_backtrace_print(100);
} }
#if CONFIG_ESP_TASK_WDT_EN #if CONFIG_ESP_TASK_WDT_EN
if (my_reason_val & REASON_TWDT_ABORT) { if (my_reason_val & REASON_TWDT_ABORT) {
extern void task_wdt_timeout_abort_xtensa(bool); extern void task_wdt_timeout_abort(bool);
/* Called from a crosscore interrupt, thus, we are not the core that received /* Called from a crosscore interrupt, thus, we are not the core that received
* the TWDT interrupt, call the function with `false` as a parameter. */ * the TWDT interrupt, call the function with `false` as a parameter. */
task_wdt_timeout_abort_xtensa(false); task_wdt_timeout_abort(false);
} }
#endif // CONFIG_ESP_TASK_WDT_EN #endif // CONFIG_ESP_TASK_WDT_EN
#endif // CONFIG_IDF_TARGET_ARCH_XTENSA
} }
//Initialize the crosscore interrupt on this core. Call this once //Initialize the crosscore interrupt on this core. Call this once
@@ -182,7 +182,6 @@ void IRAM_ATTR esp_crosscore_int_send_gdb_call(int core_id)
esp_crosscore_int_send(core_id, REASON_GDB_CALL); esp_crosscore_int_send(core_id, REASON_GDB_CALL);
} }
#if CONFIG_IDF_TARGET_ARCH_XTENSA
void IRAM_ATTR esp_crosscore_int_send_print_backtrace(int core_id) void IRAM_ATTR esp_crosscore_int_send_print_backtrace(int core_id)
{ {
esp_crosscore_int_send(core_id, REASON_PRINT_BACKTRACE); esp_crosscore_int_send(core_id, REASON_PRINT_BACKTRACE);
@@ -193,4 +192,3 @@ void IRAM_ATTR esp_crosscore_int_send_twdt_abort(int core_id) {
esp_crosscore_int_send(core_id, REASON_TWDT_ABORT); esp_crosscore_int_send(core_id, REASON_TWDT_ABORT);
} }
#endif // CONFIG_ESP_TASK_WDT_EN #endif // CONFIG_ESP_TASK_WDT_EN
#endif

View File

@@ -103,6 +103,11 @@ esp_err_t esp_backtrace_print_from_frame(int depth, const esp_backtrace_frame_t*
* *
* @param depth The maximum number of stack frames to print (should be > 0) * @param depth The maximum number of stack frames to print (should be > 0)
* *
* @note On RISC-V targets printing backtrace at run-time is only available if
* CONFIG_ESP_SYSTEM_USE_EH_FRAME is selected. Otherwise we simply print
* a register dump. Function assumes it is called in a context where the
* calling task will not migrate to another core, e.g. interrupts disabled/panic handler.
*
* @return * @return
* - ESP_OK Backtrace successfully printed to completion or to depth limit * - ESP_OK Backtrace successfully printed to completion or to depth limit
* - ESP_FAIL Backtrace is corrupted * - ESP_FAIL Backtrace is corrupted

View File

@@ -41,6 +41,7 @@ if(CONFIG_IDF_TARGET_ARCH_XTENSA)
elseif(CONFIG_IDF_TARGET_ARCH_RISCV) elseif(CONFIG_IDF_TARGET_ARCH_RISCV)
list(APPEND srcs "arch/riscv/expression_with_stack.c" list(APPEND srcs "arch/riscv/expression_with_stack.c"
"arch/riscv/panic_arch.c" "arch/riscv/panic_arch.c"
"arch/riscv/debug_helpers.c"
"arch/riscv/debug_stubs.c") "arch/riscv/debug_stubs.c")
endif() endif()

View File

@@ -0,0 +1,60 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "esp_debug_helpers.h"
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_private/freertos_debug.h"
#include "esp_err.h"
#include "esp_attr.h"
#include "riscv/rvruntime-frames.h"
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
#include "esp_private/eh_frame_parser.h"
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
#if !CONFIG_ESP_SYSTEM_USE_EH_FRAME
/* Function used to print all the registers pointed by the given frame .*/
extern void panic_print_registers(const void *frame, int core);
#endif // !CONFIG_ESP_SYSTEM_USE_EH_FRAME
/* Targets based on a RISC-V CPU cannot perform backtracing that easily.
* We have two options here:
* - Perform backtracing at runtime.
* - Let IDF monitor do the backtracing for us. Used during panic already.
* This could be configurable, choosing one or the other depending on
* CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option.
*
* In both cases, this takes time, and we might be in an ISR, we must
* exit this handler as fast as possible, then we will simply print
* the interruptee's registers.
*/
esp_err_t IRAM_ATTR esp_backtrace_print(int depth)
{
(void)depth;
const int current_core = xPortGetCoreID();
TaskSnapshot_t snapshot = { 0 };
BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandleForCPU(current_core), &snapshot);
if (ret != pdTRUE) {
return ESP_ERR_NOT_FOUND;
}
void *frame = snapshot.pxTopOfStack;
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
esp_rom_printf("Print CPU %d (current core) backtrace\n", current_core);
esp_eh_frame_print_backtrace(frame);
#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME
esp_rom_printf("Print CPU %d (current core) registers\n", current_core);
panic_print_registers(frame, current_core);
esp_rom_printf("\r\n");
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
return ESP_OK;
}

View File

@@ -13,6 +13,7 @@
#else #else
#include "soc/extmem_reg.h" #include "soc/extmem_reg.h"
#endif #endif
#include "soc/soc_caps.h"
#include "esp_private/panic_internal.h" #include "esp_private/panic_internal.h"
#include "esp_private/panic_reason.h" #include "esp_private/panic_reason.h"
#include "riscv/rvruntime-frames.h" #include "riscv/rvruntime-frames.h"
@@ -276,8 +277,6 @@ static void panic_print_register_array(const char* names[], const uint32_t* regs
void panic_print_registers(const void *f, int core) void panic_print_registers(const void *f, int core)
{ {
const RvExcFrame *frame = (RvExcFrame *)f;
/** /**
* General Purpose context, only print ABI name * General Purpose context, only print ABI name
*/ */
@@ -290,7 +289,7 @@ void panic_print_registers(const void *f, int core)
}; };
panic_print_str("Core "); panic_print_str("Core ");
panic_print_dec(frame->mhartid); panic_print_dec(core);
panic_print_str(" register dump:"); panic_print_str(" register dump:");
panic_print_register_array(desc, f, DIM(desc)); panic_print_register_array(desc, f, DIM(desc));
} }
@@ -319,23 +318,21 @@ void panic_soc_fill_info(void *f, panic_info_t *info)
info->reason = "Cache error"; info->reason = "Cache error";
info->details = print_cache_err_details; info->details = print_cache_err_details;
} else if (frame->mcause == ETS_INT_WDT_INUM) { } else if (frame->mcause == PANIC_RSN_INTWDT_CPU0) {
/* Watchdog interrupt occured, get the core on which it happened const int core = 0;
* and update the reason/message accordingly. */
const int core = esp_cache_err_get_cpuid();
info->core = core; info->core = core;
info->exception = PANIC_EXCEPTION_IWDT; info->exception = PANIC_EXCEPTION_IWDT;
#if SOC_CPU_NUM > 1
#error "TODO: define PANIC_RSN_INTWDT_CPU1 in panic_reason.h"
_Static_assert(PANIC_RSN_INTWDT_CPU0 + 1 == PANIC_RSN_INTWDT_CPU1,
"PANIC_RSN_INTWDT_CPU1 must be equal to PANIC_RSN_INTWDT_CPU0 + 1");
info->reason = core == 0 ? "Interrupt wdt timeout on CPU0" : "Interrupt wdt timeout on CPU1";
#else
info->reason = "Interrupt wdt timeout on CPU0"; info->reason = "Interrupt wdt timeout on CPU0";
#endif
} }
#if SOC_CPU_CORES_NUM > 1
else if (frame->mcause == PANIC_RSN_INTWDT_CPU1) {
const int core = 1;
info->core = core;
info->exception = PANIC_EXCEPTION_IWDT;
info->reason = "Interrupt wdt timeout on CPU1";
}
#endif
#if CONFIG_ESP_SYSTEM_HW_STACK_GUARD #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
else if (frame->mcause == ETS_ASSIST_DEBUG_INUM) { else if (frame->mcause == ETS_ASSIST_DEBUG_INUM) {
info->core = esp_cache_err_get_cpuid(); info->core = esp_cache_err_get_cpuid();

View File

@@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@@ -24,6 +24,12 @@
#include "esp_private/esp_task_wdt.h" #include "esp_private/esp_task_wdt.h"
#include "esp_private/esp_task_wdt_impl.h" #include "esp_private/esp_task_wdt_impl.h"
#if CONFIG_IDF_TARGET_ARCH_RISCV
#include "riscv/rvruntime-frames.h"
#endif //CONFIG_IDF_TARGET_ARCH_RISCV
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME #if CONFIG_ESP_SYSTEM_USE_EH_FRAME
#include "esp_private/eh_frame_parser.h" #include "esp_private/eh_frame_parser.h"
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME #endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
@@ -331,65 +337,6 @@ static void subscribe_idle(uint32_t core_mask)
* *
*/ */
#if CONFIG_IDF_TARGET_ARCH_RISCV
static void task_wdt_timeout_handling(int cores_fail, bool panic)
{
/* For RISC-V, make sure the cores that fail is only composed of core 0. */
assert(cores_fail == BIT(0));
const int current_core = 0;
TaskSnapshot_t snapshot = { 0 };
BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot);
if (p_twdt_obj->panic) {
assert(ret == pdTRUE);
ESP_EARLY_LOGE(TAG, "Aborting.");
esp_reset_reason_set_hint(ESP_RST_TASK_WDT);
/**
* We cannot simply use `abort` here because the `panic` handler would
* interpret it as if the task watchdog ISR aborted and so, print this
* current ISR backtrace/context. We want to trick the `panic` handler
* to think the task itself is aborting.
* To do so, we need to get the interruptee's top of the stack. It contains
* its own context, saved when the interrupt occurred.
* We must also set the global flag that states that an abort occurred
* (and not a panic)
**/
g_panic_abort = true;
g_twdt_isr = true;
void *frame = (void *) snapshot.pxTopOfStack;
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core);
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
xt_unhandled_exception(frame);
} else {
/* Targets based on a RISC-V CPU cannot perform backtracing that easily.
* We have two options here:
* - Perform backtracing at runtime.
* - Let IDF monitor do the backtracing for us. Used during panic already.
* This could be configurable, choosing one or the other depending on
* CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option.
*
* In both cases, this takes time, and we are in an ISR, we must
* exit this handler as fast as possible, then we will simply print
* the interruptee's registers.
*/
if (ret == pdTRUE) {
void *frame = (void *) snapshot.pxTopOfStack;
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core);
esp_eh_frame_print_backtrace(frame);
#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME
ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) registers", current_core);
panic_print_registers(frame, current_core);
esp_rom_printf("\r\n");
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
}
}
}
#else // CONFIG_IDF_TARGET_ARCH_RISCV
/** /**
* Function simulating an abort coming from the interrupted task of the current * Function simulating an abort coming from the interrupted task of the current
@@ -398,7 +345,7 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
* in the case where the other core (than the main one) has to abort because one * in the case where the other core (than the main one) has to abort because one
* of his tasks didn't reset the TWDT on time. * of his tasks didn't reset the TWDT on time.
*/ */
void task_wdt_timeout_abort_xtensa(bool current_core) void task_wdt_timeout_abort(bool current_core)
{ {
TaskSnapshot_t snapshot = { 0 }; TaskSnapshot_t snapshot = { 0 };
BaseType_t ret = pdTRUE; BaseType_t ret = pdTRUE;
@@ -408,26 +355,32 @@ void task_wdt_timeout_abort_xtensa(bool current_core)
ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot); ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot);
assert(ret == pdTRUE); assert(ret == pdTRUE);
g_panic_abort = true; g_panic_abort = true;
/* For Xtensa, we should set this flag as late as possible, as this function may /* We should set this flag as late as possible, as this function may
* be called after a crosscore interrupt. Indeed, a higher interrupt may occur * be called after a crosscore interrupt. Indeed, a higher interrupt may occur
* after calling the crosscore interrupt, if its handler fails, this flag * after calling the crosscore interrupt, if its handler fails, this flag
* shall not be set. * shall not be set.
* This flag will tell the coredump component (if activated) that yes, we are in * This flag will tell the coredump component (if activated) that yes, we are in
* an ISR context, but it is intended, it is not because an ISR encountered an * an ISR context, but it is intended, it is not because an ISR encountered an
* exception. If we don't set such flag, later tested by coredump, the later would * exception. If we don't set such flag, later tested by coredump, the latter would
* switch the execution frame/context we are giving it to the interrupt stack. * switch the execution frame/context we are giving it to the interrupt stack.
* For details about this behavior in the TODO task: IDF-5694 * For details about this behavior in the TODO task: IDF-5694
*/ */
g_twdt_isr = true; g_twdt_isr = true;
void *frame = (void *) snapshot.pxTopOfStack; void *frame = (void *) snapshot.pxTopOfStack;
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME | CONFIG_IDF_TARGET_ARCH_XTENSA
if (current_core) { if (current_core) {
ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", xPortGetCoreID()); ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", xPortGetCoreID());
} else { } else {
ESP_EARLY_LOGE(TAG, "Print CPU %d backtrace", xPortGetCoreID()); ESP_EARLY_LOGE(TAG, "Print CPU %d backtrace", xPortGetCoreID());
} }
#endif
xt_unhandled_exception(frame); xt_unhandled_exception(frame);
} }
static void task_wdt_timeout_handling(int cores_fail, bool panic) static void task_wdt_timeout_handling(int cores_fail, bool panic)
{ {
const int current_core = xPortGetCoreID(); const int current_core = xPortGetCoreID();
@@ -453,7 +406,7 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
} }
#endif // !CONFIG_FREERTOS_UNICORE #endif // !CONFIG_FREERTOS_UNICORE
/* Current core is failing, abort right now */ /* Current core is failing, abort right now */
task_wdt_timeout_abort_xtensa(true); task_wdt_timeout_abort(true);
} else { } else {
/* Print backtrace of the core that failed to reset the watchdog */ /* Print backtrace of the core that failed to reset the watchdog */
if (cores_fail & BIT(current_core)) { if (cores_fail & BIT(current_core)) {
@@ -470,8 +423,6 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
} }
} }
#endif // CONFIG_IDF_TARGET_ARCH_RISCV
// ---------------------- Callbacks ------------------------ // ---------------------- Callbacks ------------------------

View File

@@ -5,14 +5,16 @@
*/ */
#pragma once #pragma once
/* Since riscv does not replace mcause with "pseudo_reason" as it xtensa does #include "soc/soc_caps.h"
* PANIC_RSN_* defined with original interrupt numbers to make it work in
* common code /* Need a way to signal which core caused the INT WDT like we do with EXCAUSE on xtensa.
Choosing a large number that is unlikely to conflict with any actual riscv mcauses
bit 12 and above are always zero on the CPU used by P4
*/ */
#define PANIC_RSN_INTWDT_CPU0 ETS_INT_WDT_INUM #define PANIC_RSN_INTWDT_CPU0 ETS_INT_WDT_INUM
//TODO: IDF-7511
#if SOC_CPU_CORES_NUM > 1 #if SOC_CPU_CORES_NUM > 1
#define PANIC_RSN_INTWDT_CPU1 ETS_INT_WDT_INUM #define PANIC_RSN_INTWDT_CPU1_FLAG (1 << 12)
#define PANIC_RSN_INTWDT_CPU1 (PANIC_RSN_INTWDT_CPU1_FLAG | ETS_INT_WDT_INUM)
#endif #endif
#define PANIC_RSN_CACHEERR 3 #define PANIC_RSN_CACHEERR 3

View File

@@ -10,6 +10,7 @@
#include "soc/soc_caps.h" #include "soc/soc_caps.h"
#include "sdkconfig.h" #include "sdkconfig.h"
#include "esp_private/vectors_const.h" #include "esp_private/vectors_const.h"
#include "esp_private/panic_reason.h"
.equ SAVE_REGS, 32 .equ SAVE_REGS, 32
@@ -224,6 +225,19 @@ _call_panic_handler:
/* When CLIC is supported, external interrupts are shifted by 16, deduct this difference from mcause */ /* When CLIC is supported, external interrupts are shifted by 16, deduct this difference from mcause */
add a1, a1, -16 add a1, a1, -16
#endif // CONFIG_SOC_INT_CLIC_SUPPORTED #endif // CONFIG_SOC_INT_CLIC_SUPPORTED
#if CONFIG_ESP_INT_WDT_CHECK_CPU1
/* Check if this was a INT WDT */
li t0, PANIC_RSN_INTWDT_CPU0
bne a1, t0, _store_mcause
/* Check if the cause is the app cpu failing to tick, if so then update mcause to reflect this*/
lw t0, int_wdt_cpu1_ticked
bnez t0, _store_mcause
li t0, PANIC_RSN_INTWDT_CPU1_FLAG
add a1, a1, t0
#endif
_store_mcause:
sw a1, RV_STK_MCAUSE(sp) sw a1, RV_STK_MCAUSE(sp)
call panic_from_isr call panic_from_isr
/* We arrive here if the exception handler has returned. This means that /* We arrive here if the exception handler has returned. This means that