diff --git a/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c b/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c index 18aa79db67..068668e965 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c +++ b/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c @@ -60,6 +60,11 @@ #include "soc/hp_system_reg.h" #endif +#if SOC_CPU_HAS_HWLOOP +#include "riscv/csr.h" +#include "riscv/csr_hwlp.h" +#endif + #if ( SOC_CPU_COPROC_NUM > 0 ) #include "esp_private/panic_internal.h" @@ -125,9 +130,23 @@ StackType_t *xIsrStackBottom[portNUM_PROCESSORS] = {0}; BaseType_t xPortStartScheduler(void) { #if ( SOC_CPU_COPROC_NUM > 0 ) + +#if SOC_CPU_HAS_FPU /* Disable FPU so that the first task to use it will trigger an exception */ rv_utils_disable_fpu(); -#endif +#endif /* SOC_CPU_HAS_FPU */ + +#if SOC_CPU_HAS_PIE + /* Similarly, disable PIE */ + rv_utils_disable_pie(); +#endif /* SOC_CPU_HAS_FPU */ + +#if SOC_CPU_HAS_HWLOOP + /* Initialize the Hardware loop feature */ + RV_WRITE_CSR(CSR_HWLP_STATE_REG, HWLP_INITIAL_STATE); +#endif /* SOC_CPU_HAS_HWLOOP */ +#endif /* ( SOC_CPU_COPROC_NUM > 0 ) */ + /* Initialize all kernel state tracking variables */ BaseType_t coreID = xPortGetCoreID(); port_uxInterruptNesting[coreID] = 0; @@ -230,7 +249,7 @@ FORCE_INLINE_ATTR UBaseType_t uxInitialiseStackTLS(UBaseType_t uxStackPointer, u #if CONFIG_FREERTOS_TASK_FUNCTION_WRAPPER static void vPortTaskWrapper(TaskFunction_t pxCode, void *pvParameters) { - __asm__ volatile(".cfi_undefined ra"); // tell to debugger that it's outermost (inital) frame + __asm__ volatile(".cfi_undefined ra"); // tell to debugger that it's outermost (initial) frame extern void __attribute__((noreturn)) panic_abort(const char *details); static char DRAM_ATTR msg[80] = "FreeRTOS: FreeRTOS Task \"\0"; pxCode(pvParameters); @@ -356,7 +375,7 @@ StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxC HIGH ADDRESS |---------------------------| <- pxTopOfStack on entry | TLS Variables | - | ------------------------- | <- Start of useable stack + | ------------------------- | <- Start of usable stack | Starting stack frame | | ------------------------- | <- pxTopOfStack on return (which is the tasks current SP) | | | @@ -374,7 +393,7 @@ StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxC | Coproc. Save Area | <- RvCoprocSaveArea | ------------------------- | | TLS Variables | - | ------------------------- | <- Start of useable stack + | ------------------------- | <- Start of usable stack | Starting stack frame | | ------------------------- | <- pxTopOfStack on return (which is the tasks current SP) | | | @@ -430,7 +449,7 @@ BaseType_t xPortInIsrContext(void) /* Disable interrupts to fetch the coreID atomically */ irqStatus = portSET_INTERRUPT_MASK_FROM_ISR(); - /* Return the interrupt nexting counter for this core */ + /* Return the interrupt nesting counter for this core */ ret = port_uxInterruptNesting[xPortGetCoreID()]; /* Restore interrupts */ @@ -445,7 +464,7 @@ BaseType_t xPortInIsrContext(void) BaseType_t IRAM_ATTR xPortInterruptedFromISRContext(void) { - /* Return the interrupt nexting counter for this core */ + /* Return the interrupt nesting counter for this core */ return port_uxInterruptNesting[xPortGetCoreID()]; } @@ -536,7 +555,7 @@ BaseType_t __attribute__((optimize("-O3"))) xPortEnterCriticalTimeout(portMUX_TY void __attribute__((optimize("-O3"))) vPortExitCriticalMultiCore(portMUX_TYPE *mux) { /* This function may be called in a nested manner. Therefore, we only need - * to reenable interrupts if this is the last call to exit the critical. We + * to re-enable interrupts if this is the last call to exit the critical. We * can use the nesting count to determine whether this is the last exit call. */ spinlock_release(mux); @@ -787,9 +806,14 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop /* Check if coprocessor area is allocated */ if (allocate && sa->sa_coprocs[coproc] == NULL) { const uint32_t coproc_sa_sizes[] = { - RV_COPROC0_SIZE, RV_COPROC1_SIZE + RV_COPROC0_SIZE, RV_COPROC1_SIZE, RV_COPROC2_SIZE }; - /* The allocator points to a usable part of the stack, use it for the coprocessor */ + const uint32_t coproc_sa_align[] = { + RV_COPROC0_ALIGN, RV_COPROC1_ALIGN, RV_COPROC2_ALIGN + }; + /* The allocator points to a usable part of the stack, use it for the coprocessor. + * Align it up to the coprocessor save area requirement */ + sa->sa_allocator = (sa->sa_allocator + coproc_sa_align[coproc] - 1) & ~(coproc_sa_align[coproc] - 1); sa->sa_coprocs[coproc] = (void*) (sa->sa_allocator); sa->sa_allocator += coproc_sa_sizes[coproc]; /* Update the lowest address of the stack to prevent FreeRTOS performing overflow/watermark checks on the coprocessors contexts */ @@ -800,9 +824,9 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop if (task_sp <= task->pxDummy6) { /* In theory we need to call vApplicationStackOverflowHook to trigger the stack overflow callback, * but in practice, since we are already in an exception handler, this won't work, so let's manually - * trigger an exception with the previous FPU owner's TCB */ + * trigger an exception with the previous coprocessor owner's TCB */ g_panic_abort = true; - g_panic_abort_details = (char *) "ERROR: Stack overflow while saving FPU context!\n"; + g_panic_abort_details = (char *) "ERROR: Stack overflow while saving coprocessor context!\n"; xt_unhandled_exception(task_sp); } } @@ -821,7 +845,8 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop * @param coreid Current core * @param coproc Coprocessor to save context of * - * @returns Coprocessor former owner's save area + * @returns Coprocessor former owner's save area, can be NULL if there was no owner yet, can be -1 if + * the former owner is the same as the new owner. */ RvCoprocSaveArea* pxPortUpdateCoprocOwner(int coreid, int coproc, StaticTask_t* owner) { @@ -830,8 +855,11 @@ RvCoprocSaveArea* pxPortUpdateCoprocOwner(int coreid, int coproc, StaticTask_t* StaticTask_t** owner_addr = &port_uxCoprocOwner[ coreid ][ coproc ]; /* Atomically exchange former owner with the new one */ StaticTask_t* former = Atomic_SwapPointers_p32((void**) owner_addr, owner); - /* Get the save area of former owner */ - if (former != NULL) { + /* Get the save area of former owner. small optimization here, if the former owner is the new owner, + * return -1. This will simplify the assembly code while making it faster. */ + if (former == owner) { + sa = (void*) -1; + } else if (former != NULL) { /* Allocate coprocessor memory if not available yet */ sa = pxPortGetCoprocArea(former, true, coproc); } diff --git a/components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S b/components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S index 6a99e7ebb9..b737e58d7c 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S +++ b/components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S @@ -8,6 +8,8 @@ #include "freertos/FreeRTOSConfig.h" #include "soc/soc_caps.h" #include "riscv/rvruntime-frames.h" +#include "riscv/csr_hwlp.h" +#include "riscv/csr_pie.h" .extern pxCurrentTCBs @@ -33,6 +35,283 @@ #if SOC_CPU_COPROC_NUM > 0 +/** + * @brief Macro to generate a routine that saves a coprocessor's registers in the previous owner's TCB dedicated save area. + * This routine aborts if the coprocessor is used from an ISR, since this is not allowed in ESP-IDF. + * However it is allowed to use these coprocessors in the init process, so no error will be triggered if the + * current TCB is NULL. + * + * @param name The name of the coprocessor, this will be used to generate the label, so it must not contain special characters + * @param coproc_idx Index of the coprocessor in the coprocessor save area, this value can be found in rvruntime definition + * @param enable_coproc Macro that takes a scratch register as a parameter and enables the coprocessor. + * @param save_coproc_regs Macro that takes a frame as a parameter and saves all the coprocessors' registers in that frame. + * @param restore_coproc_regs Macro that takes a frame as a parameter and restores all the coprocessors' registers from that. + * + * Note: macros given as parameters can freely use temporary registers + */ +.macro generate_coprocessor_routine name, coproc_idx, enable_coproc, save_coproc_regs, restore_coproc_regs + + .global rtos_save_\name\()_coproc + .type rtos_save_\name\()_coproc, @function +rtos_save_\name\()_coproc: + /* If we are in an interrupt context, we have to abort. We don't allow using the coprocessors from ISR */ +#if ( configNUM_CORES > 1 ) + csrr a2, mhartid /* a2 = coreID */ + slli a2, a2, 2 /* a2 = coreID * 4 */ + la a1, port_uxInterruptNesting /* a1 = &port_uxInterruptNesting */ + add a1, a1, a2 /* a1 = &port_uxInterruptNesting[coreID] */ + lw a1, 0(a1) /* a1 = port_uxInterruptNesting[coreID] */ +#else /* ( configNUM_CORES <= 1 ) */ + lw a1, (port_uxInterruptNesting) /* a1 = port_uxInterruptNesting */ +#endif /* ( configNUM_CORES > 1 ) */ + /* SP still contains the RvExcFrame address */ + mv a0, sp + bnez a1, vPortCoprocUsedInISR + /* Enable the coprocessor needed by the current task */ + \enable_coproc a1 + mv s0, ra + call rtos_current_tcb + /* If the current TCB is NULL, the coprocessor is used during initialization, even before + * the scheduler started. Consider this a valid usage, it will be disabled as soon as the + * scheduler is started anyway */ + beqz a0, rtos_save_\name\()_coproc_norestore + mv s1, a0 /* s1 = pxCurrentTCBs */ + /* Prepare parameters of pxPortUpdateCoprocOwner */ + mv a2, a0 + li a1, \coproc_idx + csrr a0, mhartid + call pxPortUpdateCoprocOwner + /* If the save area is NULL, no need to save context */ + beqz a0, rtos_save_\name\()_coproc_nosave + /* If the former owner is the current task (new owner), the return value is -1, we can skip restoring the + * coprocessor context and return directly */ + li a1, -1 + beq a0, a1, rtos_save_\name\()_coproc_norestore + /* Save the coprocessor context in the structure */ + lw a0, RV_COPROC_SA+\coproc_idx*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[coproc_idx] */ + \save_coproc_regs a0 +rtos_save_\name\()_coproc_nosave: +#if ( configNUM_CORES > 1 ) + /* Pin current task to current core, s1 has pxCurrentTCBs */ + mv a0, s1 + csrr a1, mhartid + call vPortTaskPinToCore +#endif /* configNUM_CORES > 1 */ + /* Check if we have to restore a previous context from the current TCB */ + mv a0, s1 + /* Do not allocate memory for the coprocessor yet, delay this until another task wants to use it. + * This guarantees that if a stack overflow occurs when allocating the coprocessor context on the stack, + * the current task context is flushed and updated in the TCB, generating a correct backtrace + * from the panic handler. */ + li a1, 0 + li a2, \coproc_idx + call pxPortGetCoprocArea + /* Get the enable flags from the coprocessor save area */ + lw a1, RV_COPROC_ENABLE(a0) + /* To avoid having branches below, set the coprocessor enable flag now */ + ori a2, a1, 1 << \coproc_idx + sw a2, RV_COPROC_ENABLE(a0) + /* Check if the former coprocessor enable bit was set */ + andi a2, a1, 1 << \coproc_idx + beqz a2, rtos_save_\name\()_coproc_norestore + /* Enable bit was set, restore the coprocessor context */ + lw a0, RV_COPROC_SA+\coproc_idx*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[\coproc_idx] */ + \restore_coproc_regs a0 +rtos_save_\name\()_coproc_norestore: + /* Return from routine via s0, instead of ra */ + jr s0 + .size rtos_save_\name\()_coproc, .-rtos_save_\name\()_coproc + +.endm + + + +#if SOC_CPU_HAS_HWLOOP + +/** + * @brief Macros to enable and disable the hardware loop feature on the current core + */ +.macro hwlp_enable scratch_reg=a0 + li \scratch_reg, 1 + csrw CSR_HWLP_STATE_REG, \scratch_reg +.endm + +/** + * @brief Disable HW Loop CPU feature while returning the former status in the given register + */ +.macro hwlp_disable reg + csrrw \reg, CSR_HWLP_STATE_REG, zero + /* Only keep the lowest two bits */ + andi \reg, \reg, 0b11 + /* If register is 0, HWLP was off */ + beqz \reg, 1f + /* It was ON, return the enable bit in \reg */ + li \reg, 1 << HWLP_COPROC_IDX +1: +.endm + +/** + * @brief Macros to save and restore the hardware loop registers to and from the given frame + */ +.macro hwlp_save_regs frame=sp + csrr a1, CSR_LOOP0_START_ADDR + sw a1, RV_HWLOOP_START0(\frame) + csrr a1, CSR_LOOP0_END_ADDR + sw a1, RV_HWLOOP_END0(\frame) + csrr a1, CSR_LOOP0_COUNT + sw a1, RV_HWLOOP_COUNT0(\frame) + csrr a1, CSR_LOOP1_START_ADDR + sw a1, RV_HWLOOP_START1(\frame) + csrr a1, CSR_LOOP1_END_ADDR + sw a1, RV_HWLOOP_END1(\frame) + csrr a1, CSR_LOOP1_COUNT + sw a1, RV_HWLOOP_COUNT1(\frame) +.endm + +.macro hwlp_restore_regs frame=sp + lw a1, RV_HWLOOP_START0(\frame) + csrw CSR_LOOP0_START_ADDR, a1 + lw a1, RV_HWLOOP_END0(\frame) + csrw CSR_LOOP0_END_ADDR, a1 + lw a1, RV_HWLOOP_COUNT0(\frame) + csrw CSR_LOOP0_COUNT, a1 + lw a1, RV_HWLOOP_START1(\frame) + csrw CSR_LOOP1_START_ADDR, a1 + lw a1, RV_HWLOOP_END1(\frame) + csrw CSR_LOOP1_END_ADDR, a1 + lw a1, RV_HWLOOP_COUNT1(\frame) + csrw CSR_LOOP1_COUNT, a1 +.endm + + + /** + * @brief Restore the HWLP registers contained in the dedicated save area if the given task ever used it. + * This routine sets the HWLP context to clean in any case. + * + * @param a0 StaticTask address for the newly scheduled task + */ +hwlp_restore_if_used: + addi sp, sp, -16 + sw ra, (sp) + /* Check if the HWLP was in use beforehand */ + li a1, 0 + li a2, HWLP_COPROC_IDX + call pxPortGetCoprocArea + /* Get the enable flags from the coprocessor save area */ + lw a1, RV_COPROC_ENABLE(a0) + /* To avoid having branches below, set the coprocessor enable flag now */ + andi a2, a1, 1 << HWLP_COPROC_IDX + beqz a2, _hwlp_restore_never_used + /* Enable bit was set, restore the coprocessor context */ + lw a0, RV_COPROC_SA+HWLP_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[HWLP_COPROC_IDX] */ + hwlp_restore_regs a0 +_hwlp_restore_never_used: + /* Clear the context */ + csrwi CSR_HWLP_STATE_REG, HWLP_CLEAN_STATE + lw ra, (sp) + addi sp, sp, 16 + ret + +#endif /* SOC_CPU_HAS_HWLOOP */ + + +#if SOC_CPU_HAS_PIE + +/** + * @brief Macros to enable and disable the PIE coprocessor on the current core + */ +.macro pie_enable scratch_reg=a0 + li \scratch_reg, 1 + csrw CSR_PIE_STATE_REG, \scratch_reg +.endm + +/** + * @brief Disable the PIE coprocessor while returning the former status in the given register + */ +.macro pie_disable reg + csrrw \reg, CSR_PIE_STATE_REG, zero + /* Only keep the lowest two bits, if register is 0, PIE was off */ + andi \reg, \reg, 0b11 + beqz \reg, 1f + /* It was ON, return the enable bit in \reg */ + li \reg, 1 << PIE_COPROC_IDX +1: +.endm + +/** + * @brief Macros to save and restore the PIE coprocessor registers to and from the given frame + */ +.macro pie_save_regs frame=a0 + /* Save the 128-bit Q registers from the frame memory and then frame += 16 */ + esp.vst.128.ip q0, \frame, 16 + esp.vst.128.ip q1, \frame, 16 + esp.vst.128.ip q2, \frame, 16 + esp.vst.128.ip q4, \frame, 16 + esp.vst.128.ip q5, \frame, 16 + esp.vst.128.ip q6, \frame, 16 + esp.vst.128.ip q7, \frame, 16 + /* Save the QACC_H and QACC_L registers, each being 256 bits big */ + esp.st.qacc.l.l.128.ip \frame, 16 + esp.st.qacc.l.h.128.ip \frame, 16 + esp.st.qacc.h.l.128.ip \frame, 16 + esp.st.qacc.h.h.128.ip \frame, 16 + /* UA_STATE register (128 bits) */ + esp.st.ua.state.ip \frame, 16 + /* XACC register (40 bits) */ + esp.st.u.xacc.ip \frame, 8 + /* The following registers will be stored in the same word */ + /* SAR register (6 bits) */ + esp.movx.r.sar a1 + slli a2, a1, 8 + /* SAR_BYTES register (4 bits) */ + esp.movx.r.sar.bytes a1 + slli a1, a1, 4 + or a2, a2, a1 + /* FFT_BIT_WIDTH register (4 bits) */ + esp.movx.r.fft.bit.width a1 + or a2, a2, a1 + sw a2, (\frame) +.endm + + +.macro pie_restore_regs frame=a0 + /* Restore the 128-bit Q registers from the frame memory and then frame += 16 */ + esp.vld.128.ip q0, \frame, 16 + esp.vld.128.ip q1, \frame, 16 + esp.vld.128.ip q2, \frame, 16 + esp.vld.128.ip q4, \frame, 16 + esp.vld.128.ip q5, \frame, 16 + esp.vld.128.ip q6, \frame, 16 + esp.vld.128.ip q7, \frame, 16 + /* Save the QACC_H and QACC_L registers, each being 256 bits big */ + esp.ld.qacc.l.l.128.ip \frame, 16 + esp.ld.qacc.l.h.128.ip \frame, 16 + esp.ld.qacc.h.l.128.ip \frame, 16 + esp.ld.qacc.h.h.128.ip \frame, 16 + /* UA_STATE register (128 bits) */ + esp.ld.ua.state.ip \frame, 16 + /* XACC register (40 bits) */ + esp.ld.xacc.ip \frame, 8 + /* The following registers are stored in the same word */ + lw a2, (\frame) + /* FFT_BIT_WIDTH register (4 bits) */ + andi a1, a2, 0xf + esp.movx.w.sar a1 + /* SAR_BYTES register (4 bits) */ + srli a2, a2, 4 + andi a1, a2, 0xf + esp.movx.w.sar.bytes a1 + /* SAR register (6 bits) */ + srli a2, a2, 4 + andi a1, a2, 0x3f + esp.movx.w.fft.bit.width a1 +.endm + +generate_coprocessor_routine pie, PIE_COPROC_IDX, pie_enable, pie_save_regs, pie_restore_regs + +#endif /* SOC_CPU_HAS_PIE */ + + #if SOC_CPU_HAS_FPU /* Bit to set in mstatus to enable the FPU */ @@ -40,7 +319,7 @@ /* Bit to clear in mstatus to disable the FPU */ #define CSR_MSTATUS_FPU_DISABLE (3 << 13) -.macro save_fpu_regs frame=sp +.macro fpu_save_regs frame=sp fsw ft0, RV_FPU_FT0(\frame) fsw ft1, RV_FPU_FT1(\frame) fsw ft2, RV_FPU_FT2(\frame) @@ -73,9 +352,11 @@ fsw ft9, RV_FPU_FT9 (\frame) fsw ft10, RV_FPU_FT10(\frame) fsw ft11, RV_FPU_FT11(\frame) + csrr a1, fcsr + sw a1, RV_FPU_FCSR(\frame) .endm -.macro restore_fpu_regs frame=sp +.macro fpu_restore_regs frame=sp flw ft0, RV_FPU_FT0(\frame) flw ft1, RV_FPU_FT1(\frame) flw ft2, RV_FPU_FT2(\frame) @@ -108,6 +389,8 @@ flw ft9, RV_FPU_FT9(\frame) flw ft10, RV_FPU_FT10(\frame) flw ft11, RV_FPU_FT11(\frame) + lw a1, RV_FPU_FCSR(\frame) + csrw fcsr, a1 .endm @@ -125,98 +408,17 @@ .macro fpu_enable reg - li \reg, CSR_MSTATUS_FPU_ENABLE + li \reg, CSR_MSTATUS_FPU_ENABLE csrs mstatus, \reg .endm .macro fpu_disable reg - li \reg, CSR_MSTATUS_FPU_DISABLE + li \reg, CSR_MSTATUS_FPU_DISABLE csrc mstatus, \reg .endm - .global vPortTaskPinToCore - .global vPortCoprocUsedInISR - .global pxPortUpdateCoprocOwner - -/** - * @brief Save the current FPU context in the FPU owner's save area - * - * @param sp Interuptee's RvExcFrame address - * - * Note: Since this routine is ONLY meant to be called from _panic_handler routine, - * it is possible to alter `s0-s11` registers - */ - .global rtos_save_fpu_coproc - .type rtos_save_fpu_coproc, @function -rtos_save_fpu_coproc: - /* If we are in an interrupt context, we have to abort. We don't allow using the FPU from ISR */ -#if ( configNUM_CORES > 1 ) - csrr a2, mhartid /* a2 = coreID */ - slli a2, a2, 2 /* a2 = coreID * 4 */ - la a1, port_uxInterruptNesting /* a1 = &port_uxInterruptNesting */ - add a1, a1, a2 /* a1 = &port_uxInterruptNesting[coreID] */ - lw a1, 0(a1) /* a1 = port_uxInterruptNesting[coreID] */ -#else /* ( configNUM_CORES <= 1 ) */ - lw a1, (port_uxInterruptNesting) /* a1 = port_uxInterruptNesting */ -#endif /* ( configNUM_CORES > 1 ) */ - /* SP still contains the RvExcFrame address */ - mv a0, sp - bnez a1, vPortCoprocUsedInISR - /* Enable the FPU needed by the current task */ - fpu_enable a1 - mv s0, ra - call rtos_current_tcb - /* If the current TCB is NULL, the FPU is used during initialization, even before - * the scheduler started. Consider this a valid usage, the FPU will be disabled - * as soon as the scheduler is started anyway*/ - beqz a0, rtos_save_fpu_coproc_norestore - mv s1, a0 /* s1 = pxCurrentTCBs */ - /* Prepare parameters of pxPortUpdateCoprocOwner */ - mv a2, a0 - li a1, FPU_COPROC_IDX - csrr a0, mhartid - call pxPortUpdateCoprocOwner - /* If the save area is NULL, no need to save context */ - beqz a0, rtos_save_fpu_coproc_nosave - /* Save the FPU context in the structure */ - lw a0, RV_COPROC_SA+FPU_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[FPU_COPROC_IDX] */ - save_fpu_regs a0 - csrr a1, fcsr - sw a1, RV_FPU_FCSR(a0) -rtos_save_fpu_coproc_nosave: -#if ( configNUM_CORES > 1 ) - /* Pin current task to current core */ - mv a0, s1 - csrr a1, mhartid - call vPortTaskPinToCore -#endif /* configNUM_CORES > 1 */ - /* Check if we have to restore a previous FPU context from the current TCB */ - mv a0, s1 - /* Do not allocate memory for the FPU yet, delay this until another task wants to use it. - * This guarantees that if a stack overflow occurs when allocating FPU context on the stack, - * the current task context is flushed and updated in the TCB, generating a correct backtrace - * from the panic handler. */ - li a1, 0 - li a2, FPU_COPROC_IDX - call pxPortGetCoprocArea - /* Get the enable flags from the coprocessor save area */ - lw a1, RV_COPROC_ENABLE(a0) - /* To avoid having branches below, set the FPU enable flag now */ - ori a2, a1, 1 << FPU_COPROC_IDX - sw a2, RV_COPROC_ENABLE(a0) - /* Check if the former FPU enable bit was set */ - andi a2, a1, 1 << FPU_COPROC_IDX - beqz a2, rtos_save_fpu_coproc_norestore - /* FPU enable bit was set, restore the FPU context */ - lw a0, RV_COPROC_SA+FPU_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[FPU_COPROC_IDX] */ - restore_fpu_regs a0 - lw a1, RV_FPU_FCSR(a0) - csrw fcsr, a1 -rtos_save_fpu_coproc_norestore: - /* Return from routine via s0, instead of ra */ - jr s0 - .size rtos_save_fpu_coproc, .-rtos_save_fpu_coproc +generate_coprocessor_routine fpu, FPU_COPROC_IDX, fpu_enable, fpu_save_regs, fpu_restore_regs #endif /* SOC_CPU_HAS_FPU */ @@ -249,37 +451,50 @@ rtos_current_tcb: * TODO: ISR nesting code improvements ? * In the routines below, let's use a0-a5 registers to let the compiler generate * 16-bit instructions. + * @returns Context that should be given to `rtos_int_exit`. On targets that have coprocessors, + * this value is a bitmap where bit i is 1 if coprocessor i is enable, 0 if it is disabled. + * This routine can use the s registers too since they are not used by the caller (yet) */ .global rtos_int_enter .type rtos_int_enter, @function rtos_int_enter: #if ( configNUM_CORES > 1 ) - csrr a5, mhartid /* a5 = coreID */ - slli a5, a5, 2 /* a5 = coreID * 4 */ + csrr s0, mhartid /* s0 = coreID */ + slli s0, s0, 2 /* s0 = coreID * 4 */ la a0, port_xSchedulerRunning /* a0 = &port_xSchedulerRunning */ - add a0, a0, a5 /* a0 = &port_xSchedulerRunning[coreID] */ + add a0, a0, s0 /* a0 = &port_xSchedulerRunning[coreID] */ lw a0, (a0) /* a0 = port_xSchedulerRunning[coreID] */ #else lw a0, port_xSchedulerRunning /* a0 = port_xSchedulerRunning */ #endif /* ( configNUM_CORES > 1 ) */ + /* In case we jump, return value (a0) is correct */ beqz a0, rtos_int_enter_end /* if (port_xSchedulerRunning[coreID] == 0) jump to rtos_int_enter_end */ /* Increment the ISR nesting count */ la a0, port_uxInterruptNesting /* a0 = &port_uxInterruptNesting */ #if ( configNUM_CORES > 1 ) - add a0, a0, a5 /* a0 = &port_uxInterruptNesting[coreID] // a5 already contains coreID * 4 */ + add a0, a0, s0 /* a0 = &port_uxInterruptNesting[coreID] // s0 contains coreID * 4 */ #endif /* ( configNUM_CORES > 1 ) */ lw a1, 0(a0) /* a1 = port_uxInterruptNesting[coreID] */ addi a2, a1, 1 /* a2 = a1 + 1 */ sw a2, 0(a0) /* port_uxInterruptNesting[coreID] = a2 */ /* If we reached here from another low-priority ISR, i.e, port_uxInterruptNesting[coreID] > 0, then skip stack pushing to TCB */ + li a0, 0 /* return 0 in case we are going to branch */ bnez a1, rtos_int_enter_end /* if (port_uxInterruptNesting[coreID] > 0) jump to rtos_int_enter_end */ + li s2, 0 #if SOC_CPU_COPROC_NUM > 0 - /* Disable the FPU to forbid the ISR from using it. We don't need to re-enable it manually since the caller - * will restore `mstatus` before returning from interrupt. */ + /* Disable the coprocessors to forbid the ISR from using it */ +#if SOC_CPU_HAS_PIE + /* The current PIE coprocessor status will be returned in a0 */ + pie_disable a0 + or s2, s2, a0 +#endif /* SOC_CPU_HAS_PIE */ + +#if SOC_CPU_HAS_FPU fpu_disable a0 +#endif /* SOC_CPU_HAS_FPU */ #endif /* SOC_CPU_COPROC_NUM > 0 */ @@ -291,24 +506,48 @@ rtos_int_enter: /* Save the current sp in pxCurrentTCBs[coreID] and load the ISR stack on to sp */ #if ( configNUM_CORES > 1 ) la a0, pxCurrentTCBs /* a0 = &pxCurrentTCBs */ - add a0, a0, a5 /* a0 = &pxCurrentTCBs[coreID] // a5 already contains coreID * 4 */ + add a0, a0, s0 /* a0 = &pxCurrentTCBs[coreID] // s0 already contains coreID * 4 */ lw a0, (a0) /* a0 = pxCurrentTCBs[coreID] */ sw sp, 0(a0) /* pxCurrentTCBs[coreID] = sp */ - la a0, xIsrStackTop /* a0 = &xIsrStackTop */ - add a0, a0, a5 /* a0 = &xIsrStackTop[coreID] // a5 already contains coreID * 4 */ - lw sp, (a0) /* sp = xIsrStackTop[coreID] */ + /* We may need a0 below to call pxPortGetCoprocArea */ + la a1, xIsrStackTop /* a1 = &xIsrStackTop */ + add a1, a1, s0 /* a1 = &xIsrStackTop[coreID] // s0 already contains coreID * 4 */ + lw sp, (a1) /* sp = xIsrStackTop[coreID] */ #else lw a0, pxCurrentTCBs /* a0 = pxCurrentTCBs */ sw sp, 0(a0) /* pxCurrentTCBs[0] = sp */ lw sp, xIsrStackTop /* sp = xIsrStackTop */ #endif /* ( configNUM_CORES > 1 ) */ +#if SOC_CPU_HAS_HWLOOP + /* Check if the current task used the Hardware loop feature, by reading the state */ + csrr a1, CSR_HWLP_STATE_REG + addi a1, a1, -HWLP_DIRTY_STATE + bnez a1, 1f + /* State is dirty! The hardware loop feature was used, save the registers */ + li a1, 1 /* Allocate the save area if not already allocated */ + li a2, HWLP_COPROC_IDX + mv s1, ra + call pxPortGetCoprocArea + mv ra, s1 + /* Set the enable flags from the coprocessor save area */ + lw a1, RV_COPROC_ENABLE(a0) + ori a1, a1, 1 << HWLP_COPROC_IDX + sw a1, RV_COPROC_ENABLE(a0) + /* Get the area where we need to save the HWLP registers */ + lw a0, RV_COPROC_SA+HWLP_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[\coproc_idx] */ + hwlp_save_regs a0 + /* Disable the HWLP feature so that ISR cannot use them */ + csrwi CSR_HWLP_STATE_REG, HWLP_CLEAN_STATE +1: +#endif + #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD /* Prepare the parameters for esp_hw_stack_guard_set_bounds(xIsrStackBottom, xIsrStackTop); */ #if ( configNUM_CORES > 1 ) /* Load the xIsrStack for the current core and set the new bounds */ la a0, xIsrStackBottom - add a0, a0, a5 /* a0 = &xIsrStackBottom[coreID] */ + add a0, a0, s0 /* a0 = &xIsrStackBottom[coreID] */ lw a0, (a0) /* a0 = xIsrStackBottom[coreID] */ #else lw a0, xIsrStackBottom @@ -320,6 +559,8 @@ rtos_int_enter: ESP_HW_STACK_GUARD_MONITOR_START_CUR_CORE a0 a1 #endif /* CONFIG_ESP_SYSTEM_HW_STACK_GUARD */ + /* Return the coprocessor context from s2 */ + mv a0, s2 rtos_int_enter_end: ret @@ -327,6 +568,8 @@ rtos_int_enter_end: * @brief Restore the stack pointer of the next task to run. * * @param a0 Former mstatus + * @param a1 Context returned by `rtos_int_enter`. On targets that have coprocessors, this value is a bitmap + * where bit i is 1 if coprocessor i was enable, 0 if it was disabled. * * @returns New mstatus (potentially with coprocessors disabled) */ @@ -334,9 +577,14 @@ rtos_int_enter_end: .type rtos_int_exit, @function rtos_int_exit: /* To speed up this routine and because this current routine is only meant to be called from the interrupt - * handler, let's use callee-saved registers instead of stack space. Registers `s3-s11` are not used by + * handler, let's use callee-saved registers instead of stack space. Registers `s5-s11` are not used by * the caller */ mv s11, a0 +#if SOC_CPU_COPROC_NUM > 0 + /* Save a1 as it contains the bitmap with the enabled coprocessors */ + mv s8, a1 +#endif + #if ( configNUM_CORES > 1 ) csrr a1, mhartid /* a1 = coreID */ slli a1, a1, 2 /* a1 = a1 * 4 */ @@ -366,12 +614,12 @@ isr_skip_decrement: /* If the CPU reached this label, a2 (uxInterruptNesting) is 0 for sure */ /* Schedule the next task if a yield is pending */ - la a0, xPortSwitchFlag /* a0 = &xPortSwitchFlag */ + la s7, xPortSwitchFlag /* s7 = &xPortSwitchFlag */ #if ( configNUM_CORES > 1 ) - add a0, a0, a1 /* a0 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */ + add s7, s7, a1 /* s7 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */ #endif /* ( configNUM_CORES > 1 ) */ - lw a2, 0(a0) /* a2 = xPortSwitchFlag[coreID] */ - beqz a2, no_switch /* if (xPortSwitchFlag[coreID] == 0) jump to no_switch */ + lw a0, 0(s7) /* a0 = xPortSwitchFlag[coreID] */ + beqz a0, no_switch_restore_coproc /* if (xPortSwitchFlag[coreID] == 0) jump to no_switch_restore_coproc */ /* Preserve return address and schedule next task. To speed up the process, and because this current routine * is only meant to be called from the interrupt handle, let's save some speed and space by using callee-saved @@ -379,33 +627,68 @@ isr_skip_decrement: mv s10, ra #if ( SOC_CPU_COPROC_NUM > 0 ) /* In the cases where the newly scheduled task is different from the previously running one, - * we have to disable the coprocessor(s) to let them trigger an exception on first use. - * Else, if the same task is scheduled, do not change the coprocessor(s) state. */ + * we have to disable the coprocessors to let them trigger an exception on first use. + * Else, if the same task is scheduled, restore the former coprocessors state (before the interrupt) */ call rtos_current_tcb + /* Keep former TCB in s9 */ mv s9, a0 +#endif call vTaskSwitchContext +#if ( SOC_CPU_COPROC_NUM == 0 ) + mv ra, s10 /* Restore original return address */ +#endif + /* Clears the switch pending flag (stored in s7) */ + sw zero, 0(s7) /* xPortSwitchFlag[coreID] = 0; */ + +#if ( SOC_CPU_COPROC_NUM > 0 ) + /* If the Task to schedule is NOT the same as the former one (s9), keep the coprocessors disabled */ call rtos_current_tcb - beq a0, s9, rtos_int_exit_no_change - /* Disable the coprocessors in s11 register (former mstatus) */ - li a0, ~CSR_MSTATUS_FPU_DISABLE - and s11, s11, a0 -rtos_int_exit_no_change: -#else /* ( SOC_CPU_COPROC_NUM == 0 ) */ - call vTaskSwitchContext + mv ra, s10 /* Restore original return address */ + beq a0, s9, no_switch_restore_coproc + +#if SOC_CPU_HAS_HWLOOP + /* We have to restore the context of the HWLP if the newly scheduled task used it before. In all cases, this + * routine will also clean the state and set it to clean */ + mv s7, ra + /* a0 contains the current TCB address */ + call hwlp_restore_if_used + mv ra, s7 +#endif /* SOC_CPU_HAS_HWLOOP */ + +#if SOC_CPU_HAS_FPU + /* Disable the FPU in the `mstatus` value to return */ + li a1, ~CSR_MSTATUS_FPU_DISABLE + and s11, s11, a1 +#endif /* SOC_CPU_HAS_FPU */ + j no_switch_restored + #endif /* ( SOC_CPU_COPROC_NUM > 0 ) */ - mv ra, s10 - /* Clears the switch pending flag */ - la a0, xPortSwitchFlag /* a0 = &xPortSwitchFlag */ -#if ( configNUM_CORES > 1 ) - /* C routine vTaskSwitchContext may change the temp registers, so we read again */ - csrr a1, mhartid /* a1 = coreID */ - slli a1, a1, 2 /* a1 = a1 * 4 */ - add a0, a0, a1 /* a0 = &xPortSwitchFlag[coreID]; */ -#endif /* ( configNUM_CORES > 1 ) */ - sw zero, 0(a0) /* xPortSwitchFlag[coreID] = 0; */ +no_switch_restore_coproc: + /* We reach here either because there is no switch scheduled or because the TCB that is going to be scheduled + * is the same as the one that has been interrupted. In both cases, we need to restore the coprocessors status */ +#if SOC_CPU_HAS_HWLOOP + /* Check if the ISR altered the state of the HWLP */ + csrr a1, CSR_HWLP_STATE_REG + addi a1, a1, -HWLP_DIRTY_STATE + bnez a1, 1f + /* ISR used the HWLP, restore the HWLP context! */ + mv s7, ra + /* a0 contains the current TCB address */ + call hwlp_restore_if_used + mv ra, s7 +1: + /* Else, the ISR hasn't touched HWLP registers, we don't need to restore the HWLP registers */ +#endif /* SOC_CPU_HAS_HWLOOP */ -no_switch: +#if SOC_CPU_HAS_PIE + andi a0, s8, 1 << PIE_COPROC_IDX + beqz a0, 2f + pie_enable a0 +2: +#endif /* SOC_CPU_HAS_PIE */ + +no_switch_restored: #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD /* esp_hw_stack_guard_monitor_stop(); pass the scratch registers */ diff --git a/components/freertos/app_startup.c b/components/freertos/app_startup.c index 7e4439d0b8..c7f8116769 100644 --- a/components/freertos/app_startup.c +++ b/components/freertos/app_startup.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ diff --git a/components/freertos/test_apps/freertos/port/test_fpu_in_task.c b/components/freertos/test_apps/freertos/port/test_fpu_in_task.c index 6c52f314d9..fe59eb4bc2 100644 --- a/components/freertos/test_apps/freertos/port/test_fpu_in_task.c +++ b/components/freertos/test_apps/freertos/port/test_fpu_in_task.c @@ -165,7 +165,7 @@ static void unpinned_task(void *arg) TEST_ASSERT_EQUAL(cur_core_num, xTaskGetCoreID(NULL)); #endif #endif // !CONFIG_FREERTOS_UNICORE - // Reenable scheduling/preemption + // Re-enable scheduling/preemption #if ( ( CONFIG_FREERTOS_SMP ) && ( !CONFIG_FREERTOS_UNICORE ) ) vTaskPreemptionEnable(NULL); #else @@ -192,7 +192,7 @@ TEST_CASE("FPU: Usage in unpinned task", "[freertos]") typedef struct { bool negative; TaskHandle_t main; -} ParamsFPU; +} fpu_params_t; /** * @brief Function performing some simple calculation using several FPU registers. @@ -200,7 +200,7 @@ typedef struct { */ void fpu_calculation(void* arg) { - ParamsFPU* p = (ParamsFPU*) arg; + fpu_params_t* p = (fpu_params_t*) arg; const bool negative = p->negative; const float init = negative ? -1.f : 1.f; float f = init; @@ -236,13 +236,13 @@ TEST_CASE("FPU: Unsolicited context switch between tasks using FPU", "[freertos] /* Create two tasks that are on the same core and use the same FPU */ TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle(); TaskHandle_t tasks[2]; - ParamsFPU params[2] = { + fpu_params_t params[2] = { { .negative = false, .main = unity_task_handle }, { .negative = true, .main = unity_task_handle }, }; xTaskCreatePinnedToCore(fpu_calculation, "Task1", 2048, params + 0, UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1); - xTaskCreatePinnedToCore(fpu_calculation, "Task2", 2048, params + 1, UNITY_FREERTOS_PRIORITY + 2, &tasks[2], 1); + xTaskCreatePinnedToCore(fpu_calculation, "Task2", 2048, params + 1, UNITY_FREERTOS_PRIORITY + 1, &tasks[1], 1); ulTaskNotifyTake(pdTRUE, portMAX_DELAY); ulTaskNotifyTake(pdTRUE, portMAX_DELAY); diff --git a/components/freertos/test_apps/freertos/port/test_hwlp.c b/components/freertos/test_apps/freertos/port/test_hwlp.c new file mode 100644 index 0000000000..49347318f5 --- /dev/null +++ b/components/freertos/test_apps/freertos/port/test_hwlp.c @@ -0,0 +1,94 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "sdkconfig.h" +#include +#include "soc/soc_caps.h" +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "unity.h" + +/** + * On RISC-V targets that have coprocessors, the contexts are saved at the lowest address of the stack, + * which can lead to wrong stack watermark calculation in FreeRTOS in theory. + * As such, the port layer of FreeRTOS will adjust the lowest address of the stack when a coprocessor + * context is saved. + */ +#if SOC_CPU_HAS_HWLOOP + +uint32_t use_hwlp(uint32_t count); + +TEST_CASE("HWLP: Context save does not affect stack watermark", "[freertos]") +{ + /* Force the FreeRTOS port layer to store a HWLP context in the current task. + * So let's use the it and make sure another task, on the SAME CORE, also uses it */ + const TaskHandle_t current_handle = xTaskGetCurrentTaskHandle(); + + /* Get the current stack watermark */ + const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle); + + /* Use the HWLP unit, the context will NOT be flushed until a context switch is done */ + use_hwlp(20); + + /* Make sure FreeRTOS switches to another task, even Idle task, so that the current Task saves + * the HWLP current context */ + vTaskDelay(10); + + const UBaseType_t after_watermark = uxTaskGetStackHighWaterMark(current_handle); + + TEST_ASSERT_TRUE(after_watermark > before_watermark / 2); +} + +#if CONFIG_FREERTOS_NUMBER_OF_CORES > 1 + +typedef struct { + uint32_t count; + TaskHandle_t main; +} hwlp_params_t; + +static void calculation(void* arg) +{ + hwlp_params_t* p = (hwlp_params_t*) arg; + const uint32_t count = p->count; + uint32_t result = 0; + int i = 0; + + for (i = 0; i < 50000; i++) { + uint32_t current = use_hwlp(count); + result += current; + + /* Using TEST_ASSERT_TRUE triggers a stack overflow, make sure the count is still correct. + * The function `use_hwlp` should return (count * 16) */ + assert(count * 16 == current); + } + + /* Make sure the result is correct */ + assert(count * 16 * i == result); + + xTaskNotifyGive(p->main); + vTaskDelete(NULL); +} + +TEST_CASE("HWLP: Unsolicited context switch between tasks using HWLP", "[freertos]") +{ + /* Create two tasks that are on the same core and use the same FPU */ + TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle(); + TaskHandle_t tasks[2]; + hwlp_params_t params[2] = { + { .count = 1024, .main = unity_task_handle }, + { .count = 2048, .main = unity_task_handle }, + }; + + xTaskCreatePinnedToCore(calculation, "Task1", 2048, params + 0, CONFIG_UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1); + xTaskCreatePinnedToCore(calculation, "Task2", 2048, params + 1, CONFIG_UNITY_FREERTOS_PRIORITY + 1, &tasks[1], 1); + + ulTaskNotifyTake(pdTRUE, portMAX_DELAY); + ulTaskNotifyTake(pdTRUE, portMAX_DELAY); +} + +#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */ + +#endif // SOC_CPU_HAS_HWLOOP diff --git a/components/freertos/test_apps/freertos/port/test_hwlp_routines.S b/components/freertos/test_apps/freertos/port/test_hwlp_routines.S new file mode 100644 index 0000000000..80bd955776 --- /dev/null +++ b/components/freertos/test_apps/freertos/port/test_hwlp_routines.S @@ -0,0 +1,52 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "sdkconfig.h" +#include "soc/soc_caps.h" + +#if SOC_CPU_HAS_HWLOOP + + .text + .align 4 + +/** + * @brief Perform a hardware loop with a given number of iterations + * + * @param a0 Number of iterations + */ + .global use_hwlp + .type use_hwlp, @function +use_hwlp: + /* The toolchain doesn't support HWLP instructions yet, manually set it up */ + la a2, start + csrw 0x7c6, a2 + la a2, end + csrw 0x7c7, a2 + csrw 0x7c8, a0 + li a1, 0 + /* Hardware loops must have at least 8 32-bit instructions or 16 16-bit instructions */ +start: + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 +end: + addi a1, a1, 1 + mv a0, a1 + ret + .size use_hwlp, .-use_hwlp + +#endif /* SOC_CPU_HAS_HWLOOP */ diff --git a/components/freertos/test_apps/freertos/port/test_pie_in_task.c b/components/freertos/test_apps/freertos/port/test_pie_in_task.c new file mode 100644 index 0000000000..aa9bcf658e --- /dev/null +++ b/components/freertos/test_apps/freertos/port/test_pie_in_task.c @@ -0,0 +1,258 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "sdkconfig.h" +#include +#include "soc/soc_caps.h" +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/semphr.h" +#include "unity.h" +#include "test_utils.h" + +/* PIE instructions set is currently only supported in GCC compiler */ +#if SOC_CPU_HAS_PIE + +/** + * @brief Performs the signed sum of two 4-word vectors using the PIE. + * + * @param a First vector + * @param b Second vector + * @param dst Destination to store the sum + */ +void pie_vector_signed_add(const int32_t a[4], const int32_t b[4], int32_t dst[4]); + +/* ------------------------------------------------------------------------------------------------------------------ */ + +typedef struct { + int32_t cst; + TaskHandle_t main; + SemaphoreHandle_t sem; +} pie_params_t; + +/* +Test PIE usage from a task context + +Purpose: + - Test that the PIE can be used from a task context + - Test that PIE context is properly saved and restored + - Test that PIE context is cleaned up on task deletion by running multiple iterations +Procedure: + - Create TEST_PINNED_NUM_TASKS tasks pinned to each core + - Start each task + - Each task updates a float variable and then blocks (to allow other tasks to run thus forcing the a PIE context + save and restore). + - Delete each task + - Repeat test for TEST_PINNED_NUM_ITERS iterations +Expected: + - Correct float value calculated by each task + - Each task cleans up its PIE context on deletion +*/ + +#define TEST_PINNED_NUM_TASKS 3 +#define TEST_PINNED_NUM_ITERS 5 + +static void pinned_task(void *arg) +{ + pie_params_t *param = (pie_params_t*) arg; + ulTaskNotifyTake(pdTRUE, portMAX_DELAY); + + int32_t constant = 42 * param->cst; + int32_t a[4] = { constant, constant, constant, constant }; + int32_t b[4] = { 10, 20, 30, 40 }; + int32_t dst[4] = { 0 }; + + pie_vector_signed_add(a, b, dst); + + for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) { + TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]); + } + + // Indicate done and wait to be deleted + xSemaphoreGive((SemaphoreHandle_t)param->sem); + vTaskSuspend(NULL); +} + +TEST_CASE("PIE: Usage in task", "[freertos]") +{ + SemaphoreHandle_t done_sem = xSemaphoreCreateCounting(CONFIG_FREERTOS_NUMBER_OF_CORES * TEST_PINNED_NUM_TASKS, 0); + TEST_ASSERT_NOT_EQUAL(NULL, done_sem); + + for (int iter = 0; iter < TEST_PINNED_NUM_ITERS; iter++) { + TaskHandle_t task_handles[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS]; + pie_params_t params[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS]; + + // Create test tasks for each core + for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) { + for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { + params[i][j] = (pie_params_t) { + .cst = i + j + 1, + .sem = done_sem, + }; + TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *) ¶ms[i][j], UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i)); + } + } + + // Start the created tasks + for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) { + for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { + xTaskNotifyGive(task_handles[i][j]); + } + } + + // Wait for the tasks to complete + for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES * TEST_PINNED_NUM_TASKS; i++) { + xSemaphoreTake(done_sem, portMAX_DELAY); + } + + // Delete the tasks + for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) { + for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { + vTaskDelete(task_handles[i][j]); + } + } + + vTaskDelay(10); // Short delay to allow idle task to be free task memory and FPU contexts + } + + vSemaphoreDelete(done_sem); +} + +/* ------------------------------------------------------------------------------------------------------------------ */ + +/* +Test PIE usage will pin an unpinned task + +Purpose: + - Test that unpinned tasks are automatically pinned to the current core on the task's first use of the PIE + - Test that PIE context is cleaned up on task deletion by running multiple iterations +Procedure: + - Create an unpinned task + - Task disables scheduling/preemption to ensure that it does not switch cores + - Task uses the PIE + - Task checks its core affinity after PIE usage + - Task deletes itself + - Repeat test for TEST_UNPINNED_NUM_ITERS iterations +Expected: + - Task remains unpinned until its first usage of the PIE + - The task becomes pinned to the current core after first use of the PIE + - Each task cleans up its PIE context on deletion +*/ + +#if CONFIG_FREERTOS_NUMBER_OF_CORES > 1 + +#define TEST_UNPINNED_NUM_ITERS 5 + +static void unpinned_task(void *arg) +{ + // Disable scheduling/preemption to make sure current core ID doesn't change +#if ( ( CONFIG_FREERTOS_SMP ) && ( !CONFIG_FREERTOS_UNICORE ) ) + vTaskPreemptionDisable(NULL); +#else + vTaskSuspendAll(); +#endif + BaseType_t cur_core_num = xPortGetCoreID(); + // Check that the task is unpinned +#if !CONFIG_FREERTOS_UNICORE +#if CONFIG_FREERTOS_SMP + TEST_ASSERT_EQUAL(tskNO_AFFINITY, vTaskCoreAffinityGet(NULL)); +#else + TEST_ASSERT_EQUAL(tskNO_AFFINITY, xTaskGetCoreID(NULL)); +#endif +#endif // !CONFIG_FREERTOS_UNICORE + + int32_t a[4] = { 0, 1, 2, 3}; + int32_t b[4] = { 111, 222, 333, 444 }; + int32_t dst[4] = { 0 }; + + pie_vector_signed_add(a, b, dst); + + for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) { + TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]); + } + +#if !CONFIG_FREERTOS_UNICORE +#if CONFIG_FREERTOS_SMP + TEST_ASSERT_EQUAL(1 << cur_core_num, vTaskCoreAffinityGet(NULL)); +#else + TEST_ASSERT_EQUAL(cur_core_num, xTaskGetCoreID(NULL)); +#endif +#endif // !CONFIG_FREERTOS_UNICORE + // Re-enable scheduling/preemption +#if ( ( CONFIG_FREERTOS_SMP ) && ( !CONFIG_FREERTOS_UNICORE ) ) + vTaskPreemptionEnable(NULL); +#else + xTaskResumeAll(); +#endif + + // Indicate done and self delete + xTaskNotifyGive((TaskHandle_t)arg); + vTaskDelete(NULL); +} + +TEST_CASE("PIE: Usage in unpinned task", "[freertos]") +{ + TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle(); + for (int iter = 0; iter < TEST_UNPINNED_NUM_ITERS; iter++) { + // Create unpinned task + xTaskCreate(unpinned_task, "unpin", 4096, (void *)unity_task_handle, UNITY_FREERTOS_PRIORITY + 1, NULL); + // Wait for task to complete + ulTaskNotifyTake(pdTRUE, portMAX_DELAY); + vTaskDelay(10); // Short delay to allow task memory to be freed + } +} + +/** + * @brief Function performing some simple calculation using the PIE coprocessor. + * The goal is to be preempted by a task that also uses the PIE on the same core. + */ +static void pie_calculation(void* arg) +{ + pie_params_t* p = (pie_params_t*) arg; + const int32_t cst = p->cst; + int32_t a[4] = { cst, cst, cst, cst }; + int32_t dst[4] = { 0 }; + + for (int i = 0; i < 10; i++) { + pie_vector_signed_add(a, dst, dst); + + /* Give some time to the other to interrupt us before checking `f` value */ + esp_rom_delay_us(1000); + + /* Using TEST_ASSERT_TRUE triggers a stack overflow, make sure the sign is still correct */ + assert((dst[0] < 0 && cst < 0) || (dst[0] > 0 && cst > 0)); + + /* Give the hand back to FreeRTOS to avoid any watchdog error */ + vTaskDelay(2); + } + + /* Make sure the result is correct */ + assert((dst[0] * cst == 10)); + + xTaskNotifyGive(p->main); + vTaskDelete(NULL); +} + +TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freertos]") +{ + /* Create two tasks that are on the same core and use the same FPU */ + TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle(); + TaskHandle_t tasks[2]; + pie_params_t params[2] = { + { .cst = 1, .main = unity_task_handle }, + { .cst = -1, .main = unity_task_handle }, + }; + + xTaskCreatePinnedToCore(pie_calculation, "Task1", 2048, params + 0, UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1); + xTaskCreatePinnedToCore(pie_calculation, "Task2", 2048, params + 1, UNITY_FREERTOS_PRIORITY + 1, &tasks[1], 1); + + ulTaskNotifyTake(pdTRUE, portMAX_DELAY); + ulTaskNotifyTake(pdTRUE, portMAX_DELAY); +} + +#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */ + +#endif /* SOC_CPU_HAS_PIE */ diff --git a/components/freertos/test_apps/freertos/port/test_pie_routines.S b/components/freertos/test_apps/freertos/port/test_pie_routines.S new file mode 100644 index 0000000000..7de3a4da28 --- /dev/null +++ b/components/freertos/test_apps/freertos/port/test_pie_routines.S @@ -0,0 +1,50 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "sdkconfig.h" +#include "soc/soc_caps.h" + +/* PIE instructions set is currently only supported in GCC compiler */ +#if SOC_CPU_HAS_PIE + + .text + .align 4 + +/** + * @brief Performs the unsigned sum of two 4-word vectors using the PIE. + * + * @param a0 First vector + * @param a1 Second vector + * @param a2 Destination to store the sum + */ + .type pie_vector_unsigned_add, @function + .global pie_vector_unsigned_add +pie_vector_unsigned_add: + esp.vld.128.ip q0, a0, 0 + esp.vld.128.ip q1, a1, 0 + esp.vadd.u32 q2, q0, q1 + esp.vst.128.ip q2, a2, 0 + ret + .size pie_vector_unsigned_add, .-pie_vector_unsigned_add + + +/** + * @brief Performs the signed sum of two 4-word vectors using the PIE. + * + * @param a0 First vector + * @param a1 Second vector + * @param a2 Destination to store the sum + */ + .type pie_vector_signed_add, @function + .global pie_vector_signed_add +pie_vector_signed_add: + esp.vld.128.ip q0, a0, 0 + esp.vld.128.ip q1, a1, 0 + esp.vadd.s32 q2, q0, q1 + esp.vst.128.ip q2, a2, 0 + ret + .size pie_vector_signed_add, .-pie_vector_signed_add + +#endif /* SOC_CPU_HAS_PIE */ diff --git a/components/freertos/test_apps/freertos/port/test_pie_watermark.c b/components/freertos/test_apps/freertos/port/test_pie_watermark.c new file mode 100644 index 0000000000..075b40b34f --- /dev/null +++ b/components/freertos/test_apps/freertos/port/test_pie_watermark.c @@ -0,0 +1,88 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "sdkconfig.h" +#include +#include "soc/soc_caps.h" +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "unity.h" + +#define TASKS_STATUS_ARRAY_LEN 16 + +/** + * On RISC-V targets that have coprocessors, the contexts are saved at the lowest address of the stack, + * which can lead to wrong stack watermark calculation in FreeRTOS in theory. + * As such, the port layer of FreeRTOS will adjust the lowest address of the stack when a coprocessor + * context is saved. + */ +#if SOC_CPU_HAS_PIE + +/** + * @brief Performs the signed sum of two 4-word vectors using the PIE. + * + * @param a First vector + * @param b Second vector + * @param dst Destination to store the sum + */ +void pie_vector_unsigned_add(const uint32_t a[4], const uint32_t b[4], uint32_t dst[4]); + +static void other_task(void* arg) +{ + uint32_t a[4] = { 1, 2, 3, 4}; + uint32_t b[4] = { 42, 43, 44, 45}; + uint32_t dst[4] = { 0 }; + const TaskHandle_t main_task = (TaskHandle_t) arg; + + /* This task must also use the PIE coprocessor to force a PIE context flush on the main task */ + pie_vector_unsigned_add(a, b, dst); + + xTaskNotifyGive(main_task); + vTaskDelete(NULL); +} + +TEST_CASE("PIE: Context save does not affect stack watermark", "[freertos]") +{ + /* Setup some random values */ + uint32_t a[4] = { 0x3f00ffff, 0xffe10045, 0xffe10096, 0x42434546}; + uint32_t b[4] = { 0x42, 0xbb43, 0x6644, 0x845}; + uint32_t dst[4] = { 0 }; + + TaskHandle_t pvCreatedTask; + /* Force the FreeRTOS port layer to store a PIE context in the current task. + * So let's use the PIE and make sure another task, on the SAME CORE, also uses it */ + const int core_id = xPortGetCoreID(); + const TaskHandle_t current_handle = xTaskGetCurrentTaskHandle(); + + /* Get the current stack watermark */ + const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle); + + /* Use the PIE unit, the context will NOT be flushed until another task starts using it */ + pie_vector_unsigned_add(a, b, dst); + + xTaskCreatePinnedToCore(other_task, + "OtherTask", + 2048, + (void*) current_handle, + CONFIG_UNITY_FREERTOS_PRIORITY - 1, + &pvCreatedTask, + core_id); + + vTaskDelay(10); + + /* Wait for other task to complete */ + ulTaskNotifyTake(pdTRUE, portMAX_DELAY); + + const UBaseType_t after_watermark = uxTaskGetStackHighWaterMark(current_handle); + + /* The current task has seen a PIE registers context save, so we have at least 8 16-byte registers saved on the + * stack, which represents 128 bytes. In practice, it may be very different, for example a call to printf would + * result is more than 1KB of additional stack space used. So let's just make sure that the watermark is bigger + * than 50% of the former watermark. */ + TEST_ASSERT_TRUE(after_watermark > before_watermark / 2); +} + +#endif // SOC_CPU_HAS_PIE diff --git a/components/riscv/include/riscv/csr_hwlp.h b/components/riscv/include/riscv/csr_hwlp.h new file mode 100644 index 0000000000..83a44ea441 --- /dev/null +++ b/components/riscv/include/riscv/csr_hwlp.h @@ -0,0 +1,33 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "soc/soc_caps.h" + +#if SOC_CPU_HAS_HWLOOP + +/* CSR 0x7F1 lowest 2 bits describe the following states: + * 00: OFF + * 01: Initial + * 10: Clean + * 11: Dirty + */ +#define CSR_HWLP_STATE_REG 0x7F1 + +#define HWLP_OFF_STATE 0 +#define HWLP_INITIAL_STATE 1 +#define HWLP_CLEAN_STATE 2 +#define HWLP_DIRTY_STATE 3 + +#define CSR_LOOP0_START_ADDR 0x7C6 +#define CSR_LOOP0_END_ADDR 0x7C7 +#define CSR_LOOP0_COUNT 0x7C8 +#define CSR_LOOP1_START_ADDR 0x7C9 +#define CSR_LOOP1_END_ADDR 0x7CA +#define CSR_LOOP1_COUNT 0x7CB + +#endif /* SOC_CPU_HAS_HWLOOP */ diff --git a/components/riscv/include/riscv/csr_pie.h b/components/riscv/include/riscv/csr_pie.h new file mode 100644 index 0000000000..59b5c8f25d --- /dev/null +++ b/components/riscv/include/riscv/csr_pie.h @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "soc/soc_caps.h" + +#if SOC_CPU_HAS_PIE + +/* CSR lowest 2 bits describe the following states: + * 00: OFF + * 01: Initial + * 10: Clean + * 11: Dirty + */ +#define CSR_PIE_STATE_REG 0x7F2 + +#endif /* SOC_CPU_HAS_PIE */ diff --git a/components/riscv/include/riscv/rv_utils.h b/components/riscv/include/riscv/rv_utils.h index 716fee6e80..c0d3a64b2f 100644 --- a/components/riscv/include/riscv/rv_utils.h +++ b/components/riscv/include/riscv/rv_utils.h @@ -14,6 +14,7 @@ #include "esp_attr.h" #include "riscv/csr.h" #include "riscv/interrupt.h" +#include "riscv/csr_pie.h" #ifdef __cplusplus extern "C" { @@ -168,6 +169,27 @@ FORCE_INLINE_ATTR void rv_utils_disable_fpu(void) #endif /* SOC_CPU_HAS_FPU */ +/* ------------------------------------------------- PIE Related ---------------------------------------------------- + * + * ------------------------------------------------------------------------------------------------------------------ */ + +#if SOC_CPU_HAS_PIE + +FORCE_INLINE_ATTR void rv_utils_enable_pie(void) +{ + RV_WRITE_CSR(CSR_PIE_STATE_REG, 1); +} + + +FORCE_INLINE_ATTR void rv_utils_disable_pie(void) +{ + RV_WRITE_CSR(CSR_PIE_STATE_REG, 0); +} + +#endif /* SOC_CPU_HAS_FPU */ + + + /* -------------------------------------------------- Memory Ports ----------------------------------------------------- * * ------------------------------------------------------------------------------------------------------------------ */ diff --git a/components/riscv/include/riscv/rvruntime-frames.h b/components/riscv/include/riscv/rvruntime-frames.h index e218d6a5ad..45f013b0e5 100644 --- a/components/riscv/include/riscv/rvruntime-frames.h +++ b/components/riscv/include/riscv/rvruntime-frames.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2015-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -86,7 +86,21 @@ STRUCT_END(RvExcFrame) #if SOC_CPU_COPROC_NUM > 0 +/* Define the default size of each coprocessor save area */ +#define RV_COPROC0_SIZE 0 +#define RV_COPROC1_SIZE 0 +#define RV_COPROC2_SIZE 0 +/* And the alignment for each of them */ +#define RV_COPROC0_ALIGN 4 +#define RV_COPROC1_ALIGN 4 +#define RV_COPROC2_ALIGN 4 + + #if SOC_CPU_HAS_FPU + +/* Floating-Point Unit coprocessor is now considered coprocessor 0 */ +#define FPU_COPROC_IDX 0 + /** * @brief Floating-Point Unit save area */ @@ -126,29 +140,97 @@ STRUCT_FIELD (long, 4, RV_FPU_FT11, ft11) STRUCT_FIELD (long, 4, RV_FPU_FCSR, fcsr) /* fcsr special register */ STRUCT_END(RvFPUSaveArea) -/* Floating-Point Unit coprocessor is now considered coprocessor 0 */ -#define FPU_COPROC_IDX 0 -/* PIE/AIA coprocessor is coprocessor 1 */ -#define PIE_COPROC_IDX 1 - -/* Define the size of each coprocessor save area */ +/* Redefine the coprocessor area size previously defined to 0 */ +#undef RV_COPROC0_SIZE #if defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) -#define RV_COPROC0_SIZE RvFPUSaveAreaSize -#define RV_COPROC1_SIZE 0 // PIE/AIA coprocessor area + #define RV_COPROC0_SIZE RvFPUSaveAreaSize #else -#define RV_COPROC0_SIZE sizeof(RvFPUSaveArea) -#define RV_COPROC1_SIZE 0 // PIE/AIA coprocessor area + #define RV_COPROC0_SIZE sizeof(RvFPUSaveArea) #endif /* defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) */ #endif /* SOC_CPU_HAS_FPU */ + +#if SOC_CPU_HAS_HWLOOP + +/* Hardware Loop extension is "coprocessor" 1 */ +#define HWLP_COPROC_IDX 1 + +/** + * @brief Hardware loop save area + */ +STRUCT_BEGIN +STRUCT_FIELD (long, 4, RV_HWLOOP_START0, start0) +STRUCT_FIELD (long, 4, RV_HWLOOP_END0, end0) +STRUCT_FIELD (long, 4, RV_HWLOOP_COUNT0, count0) +STRUCT_FIELD (long, 4, RV_HWLOOP_START1, start1) +STRUCT_FIELD (long, 4, RV_HWLOOP_END1, end1) +STRUCT_FIELD (long, 4, RV_HWLOOP_COUNT1, count1) +STRUCT_END(RvHWLPSaveArea) + +/* Redefine the coprocessor area size previously defined to 0 */ +#undef RV_COPROC1_SIZE +#if defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) + #define RV_COPROC1_SIZE RvHWLPSaveAreaSize +#else + #define RV_COPROC1_SIZE sizeof(RvHWLPSaveArea) +#endif /* defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) */ + +#endif /* SOC_CPU_HAS_HWLOOP */ + + + +#if SOC_CPU_HAS_PIE + +/* PIE/AIA coprocessor is now considered coprocessor 2 */ +#define PIE_COPROC_IDX 2 + +/** + * @brief PIE save area + */ +STRUCT_BEGIN +STRUCT_AFIELD (long, 4, RV_PIE_Q0, q0, 4) +STRUCT_AFIELD (long, 4, RV_PIE_Q1, q1, 4) +STRUCT_AFIELD (long, 4, RV_PIE_Q2, q2, 4) +STRUCT_AFIELD (long, 4, RV_PIE_Q3, q3, 4) +STRUCT_AFIELD (long, 4, RV_PIE_Q4, q4, 4) +STRUCT_AFIELD (long, 4, RV_PIE_Q5, q5, 4) +STRUCT_AFIELD (long, 4, RV_PIE_Q6, q6, 4) +STRUCT_AFIELD (long, 4, RV_PIE_Q7, q7, 4) +STRUCT_AFIELD (long, 4, RV_PIE_QACC_L_L, qacc_l_l, 4) +STRUCT_AFIELD (long, 4, RV_PIE_QACC_L_H, qacc_l_h, 4) +STRUCT_AFIELD (long, 4, RV_PIE_QACC_H_L, qacc_h_l, 4) +STRUCT_AFIELD (long, 4, RV_PIE_QACC_H_H, qacc_h_h, 4) +STRUCT_AFIELD (long, 4, RV_PIE_UA_STATE, ua_state, 4) +STRUCT_FIELD (long, 4, RV_PIE_XACC, xacc) +/* This register contains SAR, SAR_BYTES and FFT_BIT_WIDTH in this order (from top to low) */ +STRUCT_FIELD (long, 4, RV_PIE_MISC, misc) +STRUCT_END(RvPIESaveArea) + +/* Redefine the coprocessor area size previously defined to 0 */ +#undef RV_COPROC2_SIZE + +#if defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) + #define RV_COPROC2_SIZE RvPIESaveAreaSize +#else + #define RV_COPROC2_SIZE sizeof(RvPIESaveArea) +#endif /* defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) */ + +/* The PIE save area structure must be aligned on 16 bytes */ +#undef RV_COPROC2_ALIGN +#define RV_COPROC2_ALIGN 16 + +#endif /* SOC_CPU_HAS_PIE */ + + + /** * @brief Coprocessors save area, containing each coprocessor save area */ STRUCT_BEGIN /* Enable bitmap: BIT(i) represents coprocessor i, 1 is used, 0 else */ STRUCT_FIELD (long, 4, RV_COPROC_ENABLE, sa_enable) -/* Address of the original lowest stack address, convenient when the stack needs to re-initialized */ +/* Address of the original lowest stack address, convenient when the stack needs to be re-initialized */ STRUCT_FIELD (void*, 4, RV_COPROC_TCB_STACK, sa_tcbstack) /* Address of the pool of memory used to allocate coprocessors save areas */ STRUCT_FIELD (long, 4, RV_COPROC_ALLOCATOR, sa_allocator) diff --git a/components/riscv/vectors.S b/components/riscv/vectors.S index 0b46681416..058877f8ed 100644 --- a/components/riscv/vectors.S +++ b/components/riscv/vectors.S @@ -25,11 +25,11 @@ /* EXT_ILL CSR reasons are stored as follows: * - Bit 0: FPU core instruction (Load/Store instructions NOT concerned) - * - Bit 1: Low-power core + * - Bit 1: Hardware Loop instructions * - Bit 2: PIE core */ - .equ EXT_ILL_RSN_FPU, 1 - .equ EXT_ILL_RSN_LP, 2 - .equ EXT_ILL_RSN_PIE, 4 + .equ EXT_ILL_RSN_FPU, 1 + .equ EXT_ILL_RSN_HWLP, 2 + .equ EXT_ILL_RSN_PIE, 4 #endif /* SOC_CPU_COPROC_NUM > 0 */ /* Macro which first allocates space on the stack to save general @@ -166,12 +166,19 @@ _panic_handler: /* In case this is due to a coprocessor, set ra right now to simplify the logic below */ la ra, _return_from_exception /* EXT_ILL CSR should contain the reason for the Illegal Instruction */ - csrr a0, EXT_ILL_CSR - mv a2, a0 + csrrw a0, EXT_ILL_CSR, zero + + /* Hardware loop cannot be treated lazily, so we should never end here if a HWLP instruction is used */ +#if SOC_CPU_HAS_PIE + /* Check if the PIE bit is set. */ + andi a1, a0, EXT_ILL_RSN_PIE + bnez a1, rtos_save_pie_coproc +#endif /* SOC_CPU_HAS_PIE */ + +#if SOC_CPU_HAS_FPU /* Check if the FPU bit is set. When targets have the FPU reason bug (SOC_CPU_HAS_FPU_EXT_ILL_BUG), * it is possible that another bit is set even if the reason is an FPU instruction. * For example, bit 1 can be set and bit 0 won't, even if the reason is an FPU instruction. */ -#if SOC_CPU_HAS_FPU andi a1, a0, EXT_ILL_RSN_FPU bnez a1, rtos_save_fpu_coproc #if SOC_CPU_HAS_FPU_EXT_ILL_BUG @@ -202,8 +209,6 @@ _panic_handler_not_fpu: #endif /* SOC_CPU_HAS_FPU_EXT_ILL_BUG */ #endif /* SOC_CPU_HAS_FPU */ - /* Need to check the other coprocessors reason now, instruction is in register a2 */ - /* Ignore LP and PIE for now, continue the exception */ _panic_handler_not_coproc: #endif /* ( SOC_CPU_COPROC_NUM > 0 ) */ @@ -298,9 +303,11 @@ _interrupt_handler: /* Save SP former value */ sw a0, RV_STK_SP(sp) - /* Notify the RTOS that an interrupt ocurred, it will save the current stack pointer - * in the running TCB, no need to pass it as a parameter */ + /* Notify the RTOS that an interrupt occurred, it will save the current stack pointer + * in the running TCB, no need to pass it as a parameter + * Returns an abstract context in a0, needs to be passed to `rtos_int_exit` */ call rtos_int_enter + mv s4, a0 /* If this is a non-nested interrupt, SP now points to the interrupt stack */ /* Before dispatch c handler, restore interrupt to enable nested intr */ @@ -366,6 +373,7 @@ _interrupt_handler: /* The RTOS will restore the current TCB stack pointer. This routine will preserve s1 and s2. * Returns the new `mstatus` value. */ mv a0, s2 /* a0 = mstatus */ + mv a1, s4 /* a1 = abstract context returned by `rtos_int_enter` */ call rtos_int_exit /* Restore the rest of the registers. diff --git a/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in b/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in index f4e56069f5..6f6566e6c4 100644 --- a/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in @@ -411,6 +411,10 @@ config SOC_BRANCH_PREDICTOR_SUPPORTED bool default y +config SOC_CPU_COPROC_NUM + int + default 3 + config SOC_CPU_HAS_FPU bool default y @@ -419,9 +423,13 @@ config SOC_CPU_HAS_FPU_EXT_ILL_BUG bool default y -config SOC_CPU_COPROC_NUM - int - default 2 +config SOC_CPU_HAS_HWLOOP + bool + default y + +config SOC_CPU_HAS_PIE + bool + default y config SOC_HP_CPU_HAS_MULTIPLE_CORES bool diff --git a/components/soc/esp32p4/include/soc/soc_caps.h b/components/soc/esp32p4/include/soc/soc_caps.h index b19770cd1b..d2a55eea8b 100644 --- a/components/soc/esp32p4/include/soc/soc_caps.h +++ b/components/soc/esp32p4/include/soc/soc_caps.h @@ -157,9 +157,15 @@ #define SOC_INT_CLIC_SUPPORTED 1 #define SOC_INT_HW_NESTED_SUPPORTED 1 // Support for hardware interrupts nesting #define SOC_BRANCH_PREDICTOR_SUPPORTED 1 +#define SOC_CPU_COPROC_NUM 3 #define SOC_CPU_HAS_FPU 1 #define SOC_CPU_HAS_FPU_EXT_ILL_BUG 1 // EXT_ILL CSR doesn't support FLW/FSW -#define SOC_CPU_COPROC_NUM 2 +#define SOC_CPU_HAS_HWLOOP 1 +/* PIE coprocessor assembly is only supported with GCC compiler */ +#ifndef __clang__ +#define SOC_CPU_HAS_PIE 1 +#endif + #define SOC_HP_CPU_HAS_MULTIPLE_CORES 1 // Convenience boolean macro used to determine if a target has multiple cores. #define SOC_CPU_BREAKPOINTS_NUM 3