From 0403d43b194f5c7c77dd246aece7d936d6985fce Mon Sep 17 00:00:00 2001 From: Jeroen Domburg Date: Mon, 17 Oct 2016 18:09:15 +0800 Subject: [PATCH 1/5] Optimize xPortGetCoreID to 2-instruction inline assembly. --- components/freertos/include/freertos/portable.h | 3 +-- .../freertos/include/freertos/xtensa_context.h | 7 +------ components/freertos/panic.c | 1 - components/freertos/portasm.S | 12 ------------ 4 files changed, 2 insertions(+), 21 deletions(-) diff --git a/components/freertos/include/freertos/portable.h b/components/freertos/include/freertos/portable.h index 4030ca0c03..58e6903664 100644 --- a/components/freertos/include/freertos/portable.h +++ b/components/freertos/include/freertos/portable.h @@ -192,8 +192,7 @@ void vPortEndScheduler( void ) PRIVILEGED_FUNCTION; #endif /* Multi-core: get current core ID */ -int xPortGetCoreID( void ); - +#define xPortGetCoreID() __extension__({int id; asm volatile("rsr.prid %0; extui %0,%0,13,1":"=r"(id)); id;}) #ifdef __cplusplus } diff --git a/components/freertos/include/freertos/xtensa_context.h b/components/freertos/include/freertos/xtensa_context.h index 3167c84726..b748a0d1a7 100644 --- a/components/freertos/include/freertos/xtensa_context.h +++ b/components/freertos/include/freertos/xtensa_context.h @@ -322,12 +322,7 @@ STRUCT_END(XtSolFrame) #ifdef __ASSEMBLER__ .macro getcoreid reg rsr.prid \reg - bbci \reg,1,1f - movi \reg,1 - j 2f -1: - movi \reg,0 -2: + extui \reg,\reg,13,1 .endm #endif diff --git a/components/freertos/panic.c b/components/freertos/panic.c index 9400867356..6a87679d13 100644 --- a/components/freertos/panic.c +++ b/components/freertos/panic.c @@ -76,7 +76,6 @@ inline static void panicPutHex(int a) { } inline static void panicPutDec(int a) { } #endif -int xPortGetCoreID(); void __attribute__((weak)) vApplicationStackOverflowHook( TaskHandle_t xTask, signed char *pcTaskName ) { panicPutStr("***ERROR*** A stack overflow in task "); diff --git a/components/freertos/portasm.S b/components/freertos/portasm.S index 65406b0b06..3a45b19a32 100644 --- a/components/freertos/portasm.S +++ b/components/freertos/portasm.S @@ -51,18 +51,6 @@ port_switch_flag: .text - - -/* C function to get proc ID.*/ - .global xPortGetCoreID - .type xPortGetCoreID,@function - .align 4 -xPortGetCoreID: - ENTRY(16) - getcoreid a2 - RET(16) - - /* ******************************************************************************* * _frxt_setup_switch From c03549e1170f76871a5f66cbc709371e38dbf9d2 Mon Sep 17 00:00:00 2001 From: Jeroen Domburg Date: Mon, 17 Oct 2016 18:30:13 +0800 Subject: [PATCH 2/5] Make uxPortCompareSet into a macro. 25uS -> 24uS --- components/freertos/port.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/components/freertos/port.c b/components/freertos/port.c index 6117a2804e..f42cba936c 100644 --- a/components/freertos/port.c +++ b/components/freertos/port.c @@ -263,17 +263,14 @@ void vPortAssertIfInISR() * *bitwise inverse* of the old mem if the mem wasn't written. This doesn't seem to happen on the * ESP32, though. (Would show up directly if it did because the magic wouldn't match.) */ -uint32_t uxPortCompareSet(volatile uint32_t *mux, uint32_t compare, uint32_t set) -{ - __asm__ __volatile__ ( - "WSR %2,SCOMPARE1 \n" //initialize SCOMPARE1 - "ISYNC \n" //wait sync - "S32C1I %0, %1, 0 \n" //store id into the lock, if the lock is the same as comparel. Otherwise, no write-access - :"=r"(set) \ - :"r"(mux), "r"(compare), "0"(set) \ - ); - return set; -} +#define uxPortCompareSet(mux, compare, set) \ + __asm__ __volatile__( \ + "WSR %2,SCOMPARE1 \n" \ + "ISYNC \n" \ + "S32C1I %0, %1, 0 \n" \ + :"=r"(*set) \ + :"r"(mux), "r"(compare), "0"(*set) \ + ); \ /* * For kernel use: Initialize a per-CPU mux. Mux will be initialized unlocked. @@ -310,7 +307,8 @@ void vPortCPUAcquireMutex(portMUX_TYPE *mux) { irqStatus=portENTER_CRITICAL_NESTED(); do { //Lock mux if it's currently unlocked - res=uxPortCompareSet(&mux->mux, portMUX_FREE_VAL, (xPortGetCoreID()<mux, portMUX_FREE_VAL, &res); //If it wasn't free and we're the owner of the lock, we are locking recursively. if ( (res != portMUX_FREE_VAL) && (((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT) == xPortGetCoreID()) ) { //Mux was already locked by us. Just bump the recurse count by one. @@ -362,7 +360,8 @@ portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux) { if ( (mux->mux & portMUX_MAGIC_MASK) != portMUX_MAGIC_VAL ) ets_printf("ERROR: vPortCPUReleaseMutex: mux %p is uninitialized (0x%X)!\n", mux, mux->mux); #endif //Unlock mux if it's currently locked with a recurse count of 0 - res=uxPortCompareSet(&mux->mux, (xPortGetCoreID()<mux, (xPortGetCoreID()< Date: Mon, 17 Oct 2016 18:49:19 +0800 Subject: [PATCH 3/5] Detect success before errors in vPortCPUReleaseMutex. Shaves off another half uS. --- components/freertos/port.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/components/freertos/port.c b/components/freertos/port.c index f42cba936c..ef72b1cb5e 100644 --- a/components/freertos/port.c +++ b/components/freertos/port.c @@ -363,27 +363,30 @@ portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux) { res=portMUX_FREE_VAL; uxPortCompareSet(&mux->mux, (xPortGetCoreID()<>portMUX_VAL_SHIFT) == xPortGetCoreID() ) { + //Lock is valid, we can return safely. Just need to check if it's a recursive lock; if so we need to decrease the refcount. + if ( ((res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT)!=0) { + //We locked this, but the reccount isn't zero. Decrease refcount and continue. + recCnt=(res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT; + recCnt--; +#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE + ets_printf("Recursive unlock: recCnt=%d last locked %s line %d, curr %s line %d\n", recCnt, lastLockedFn, lastLockedLine, fnName, line); +#endif + mux->mux=portMUX_MAGIC_VAL|(recCnt<>portMUX_VAL_SHIFT) != xPortGetCoreID() ) { + } else { #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG ets_printf("ERROR: vPortCPUReleaseMutex: mux %p wasn't locked by this core (%d) but by core %d (ret=%x, mux=%x).\n", mux, xPortGetCoreID(), ((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT), res, mux->mux); ets_printf("Last non-recursive lock %s line %d\n", lastLockedFn, lastLockedLine); ets_printf("Called by %s line %d\n", fnName, line); #endif ret=pdFALSE; - } else if ( ((res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT)!=0) { - //We locked this, but the reccount isn't zero. Decrease refcount and continue. - recCnt=(res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT; - recCnt--; -#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE - ets_printf("Recursive unlock: recCnt=%d last locked %s line %d, curr %s line %d\n", recCnt, lastLockedFn, lastLockedLine, fnName, line); -#endif - mux->mux=portMUX_MAGIC_VAL|(recCnt< Date: Tue, 18 Oct 2016 10:51:08 +0800 Subject: [PATCH 4/5] Some more optimizations, mostly in involuntary task switches. Doesn not really help here, but might in other cases. --- components/freertos/portasm.S | 48 +++++++++++----------------- components/freertos/xtensa_vectors.S | 7 ++-- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/components/freertos/portasm.S b/components/freertos/portasm.S index 3a45b19a32..ad65a103ed 100644 --- a/components/freertos/portasm.S +++ b/components/freertos/portasm.S @@ -69,9 +69,8 @@ _frxt_setup_switch: ENTRY(16) getcoreid a3 - slli a3, a3, 2 movi a2, port_switch_flag - add a2, a2, a3 + addx4 a2, a3, a2 movi a3, 1 s32i a3, a2, 0 @@ -116,12 +115,11 @@ _frxt_int_enter: Manage nesting directly rather than call the generic IntEnter() (in windowed ABI we can't call a C function here anyway because PS.EXCM is still set). */ - getcoreid a3 - slli a4, a3, 2 /* a4 = cpuid * 4 */ + getcoreid a4 movi a2, port_xSchedulerRunning - add a2, a2, a4 + addx4 a2, a4, a2 movi a3, port_interruptNesting - add a3, a3, a4 + addx4 a3, a4, a3 l32i a2, a2, 0 /* a2 = port_xSchedulerRunning */ beqz a2, 1f /* scheduler not running, no tasks */ l32i a2, a3, 0 /* a2 = port_interruptNesting */ @@ -130,14 +128,13 @@ _frxt_int_enter: bnei a2, 1, .Lnested /* !=0 before incr, so nested */ movi a2, pxCurrentTCB - add a2, a2, a4 + addx4 a2, a4, a2 l32i a2, a2, 0 /* a2 = current TCB */ beqz a2, 1f s32i a1, a2, TOPOFSTACK_OFFS /* pxCurrentTCB->pxTopOfStack = SP */ movi a1, port_IntStackTop /* a1 = top of intr stack */ movi a2, configISR_STACK_SIZE - getcoreid a3 - mull a2, a3, a2 + mull a2, a4, a2 add a1, a1, a2 /* for current proc */ .Lnested: @@ -165,12 +162,11 @@ _frxt_int_enter: .align 4 _frxt_int_exit: - getcoreid a3 - slli a4, a3, 2 /* a4 is core * 4 */ + getcoreid a4 movi a2, port_xSchedulerRunning - add a2, a2, a4 + addx4 a2, a4, a2 movi a3, port_interruptNesting - add a3, a3, a4 + addx4 a3, a4, a3 rsil a0, XCHAL_EXCM_LEVEL /* lock out interrupts */ l32i a2, a2, 0 /* a2 = port_xSchedulerRunning */ beqz a2, .Lnoswitch /* scheduler not running, no tasks */ @@ -180,13 +176,13 @@ _frxt_int_exit: bnez a2, .Lnesting /* !=0 after decr so still nested */ movi a2, pxCurrentTCB - add a2, a2, a4 + addx4 a2, a4, a2 l32i a2, a2, 0 /* a2 = current TCB */ beqz a2, 1f /* no task ? go to dispatcher */ l32i a1, a2, TOPOFSTACK_OFFS /* SP = pxCurrentTCB->pxTopOfStack */ movi a2, port_switch_flag /* address of switch flag */ - add a2, a2, a4 /* point to flag for this cpu */ + addx4 a2, a4, a2 /* point to flag for this cpu */ l32i a3, a2, 0 /* a3 = port_switch_flag */ beqz a3, .Lnoswitch /* flag = 0 means no switch reqd */ movi a3, 0 @@ -392,14 +388,12 @@ _frxt_dispatch: call0 vTaskSwitchContext // Get next TCB to resume movi a2, pxCurrentTCB getcoreid a3 - slli a3, a3, 2 - add a2, a2, a3 + addx4 a2, a3, a2 #else call4 vTaskSwitchContext // Get next TCB to resume movi a2, pxCurrentTCB getcoreid a3 - slli a3, a3, 2 - add a2, a2, a3 + addx4 a2, a3, a2 #endif l32i a3, a2, 0 l32i sp, a3, TOPOFSTACK_OFFS /* SP = next_TCB->pxTopOfStack; */ @@ -439,8 +433,7 @@ _frxt_dispatch: /* Restore CPENABLE from task's co-processor save area. */ movi a3, pxCurrentTCB /* cp_state = */ getcoreid a2 - slli a2, a2, 2 - add a3, a2, a3 + addx4 a3, a2, a3 l32i a3, a3, 0 l32i a2, a3, CP_TOPOFSTACK_OFFS /* StackType_t *pxStack; */ l16ui a3, a2, XT_CPENABLE /* CPENABLE = cp_state->cpenable; */ @@ -529,8 +522,7 @@ vPortYield: movi a2, pxCurrentTCB getcoreid a3 - slli a3, a3, 2 - add a2, a2, a3 + addx4 a2, a3, a2 l32i a2, a2, 0 /* a2 = pxCurrentTCB */ movi a3, 0 s32i a3, sp, XT_SOL_EXIT /* 0 to flag as solicited frame */ @@ -581,8 +573,7 @@ vPortYieldFromInt: /* Save CPENABLE in task's co-processor save area, and clear CPENABLE. */ movi a3, pxCurrentTCB /* cp_state = */ getcoreid a2 - slli a2, a2, 2 - add a3, a2, a3 + addx4 a3, a2, a3 l32i a3, a3, 0 l32i a2, a3, CP_TOPOFSTACK_OFFS @@ -625,18 +616,17 @@ _frxt_task_coproc_state: /* We can use a3 as a scratchpad, the instances of code calling XT_RTOS_CP_STATE don't seem to need it saved. */ getcoreid a3 - slli a3, a3, 2 /* a3=coreid*4 */ movi a15, port_xSchedulerRunning /* if (port_xSchedulerRunning */ - add a15, a15, a3 + addx4 a15, a3,a15 l32i a15, a15, 0 beqz a15, 1f movi a15, port_interruptNesting /* && port_interruptNesting == 0 */ - add a15, a15, a3 + addx4 a15, a3, a15 l32i a15, a15, 0 bnez a15, 1f movi a15, pxCurrentTCB - add a15, a3, a15 + addx4 a15, a3, a15 l32i a15, a15, 0 /* && pxCurrentTCB != 0) { */ diff --git a/components/freertos/xtensa_vectors.S b/components/freertos/xtensa_vectors.S index f0d874a59c..7c2fc29607 100644 --- a/components/freertos/xtensa_vectors.S +++ b/components/freertos/xtensa_vectors.S @@ -904,16 +904,13 @@ _xt_coproc_exc: core we're running on now. */ movi a2, pxCurrentTCB getcoreid a3 - slli a3, a3, 2 - add a2, a2, a3 + addx4 a2, a3, a2 l32i a2, a2, 0 /* a2 = start of pxCurrentTCB[cpuid] */ addi a2, a2, TASKTCB_XCOREID_OFFSET /* offset to xCoreID in tcb struct */ - getcoreid a3 s32i a3, a2, 0 /* store current cpuid */ /* Grab correct xt_coproc_owner_sa for this core */ - getcoreid a2 - movi a3, XCHAL_CP_MAX << 2 + movi a2, XCHAL_CP_MAX << 2 mull a2, a2, a3 movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */ add a3, a3, a2 From 2d393f05300b6050788561ba1dada185c27917f8 Mon Sep 17 00:00:00 2001 From: Jeroen Domburg Date: Thu, 20 Oct 2016 11:23:59 +0800 Subject: [PATCH 5/5] Change inline assembly bits from macros to inline functions --- .../freertos/include/freertos/portable.h | 9 ++++++++- .../freertos/include/freertos/portmacro.h | 20 +++++++++++++++++++ components/freertos/port.c | 19 ------------------ 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/components/freertos/include/freertos/portable.h b/components/freertos/include/freertos/portable.h index 58e6903664..a3d39bd5a2 100644 --- a/components/freertos/include/freertos/portable.h +++ b/components/freertos/include/freertos/portable.h @@ -192,7 +192,14 @@ void vPortEndScheduler( void ) PRIVILEGED_FUNCTION; #endif /* Multi-core: get current core ID */ -#define xPortGetCoreID() __extension__({int id; asm volatile("rsr.prid %0; extui %0,%0,13,1":"=r"(id)); id;}) +inline uint32_t xPortGetCoreID() { + int id; + asm volatile( + "rsr.prid %0\n" + " extui %0,%0,13,1" + :"=r"(id)); + return id; +} #ifdef __cplusplus } diff --git a/components/freertos/include/freertos/portmacro.h b/components/freertos/include/freertos/portmacro.h index ab83b0e05a..5e2386d721 100644 --- a/components/freertos/include/freertos/portmacro.h +++ b/components/freertos/include/freertos/portmacro.h @@ -225,6 +225,26 @@ static inline unsigned portENTER_CRITICAL_NESTED() { unsigned state = XTOS_SET_I #define portCLEAR_INTERRUPT_MASK_FROM_ISR(state) portEXIT_CRITICAL_NESTED(state) +/* + * Wrapper for the Xtensa compare-and-set instruction. This subroutine will atomically compare + * *mux to compare, and if it's the same, will set *mux to set. It will return the old value + * of *addr in *set. + * + * Warning: From the ISA docs: in some (unspecified) cases, the s32c1i instruction may return the + * *bitwise inverse* of the old mem if the mem wasn't written. This doesn't seem to happen on the + * ESP32, though. (Would show up directly if it did because the magic wouldn't match.) + */ +inline void uxPortCompareSet(volatile uint32_t *addr, uint32_t compare, uint32_t *set) { + __asm__ __volatile__( + "WSR %2,SCOMPARE1 \n" + "ISYNC \n" + "S32C1I %0, %1, 0 \n" + :"=r"(*set) + :"r"(addr), "r"(compare), "0"(*set) + ); +} + + /*-----------------------------------------------------------*/ /* Architecture specifics. */ diff --git a/components/freertos/port.c b/components/freertos/port.c index ef72b1cb5e..a982db7d42 100644 --- a/components/freertos/port.c +++ b/components/freertos/port.c @@ -253,25 +253,6 @@ void vPortAssertIfInISR() configASSERT(port_interruptNesting[xPortGetCoreID()]==0) } - -/* - * Wrapper for the Xtensa compare-and-set instruction. This subroutine will atomically compare - * *mux to compare, and if it's the same, will set *mux to set. It will return the old value - * of *addr. - * - * Warning: From the ISA docs: in some (unspecified) cases, the s32c1i instruction may return the - * *bitwise inverse* of the old mem if the mem wasn't written. This doesn't seem to happen on the - * ESP32, though. (Would show up directly if it did because the magic wouldn't match.) - */ -#define uxPortCompareSet(mux, compare, set) \ - __asm__ __volatile__( \ - "WSR %2,SCOMPARE1 \n" \ - "ISYNC \n" \ - "S32C1I %0, %1, 0 \n" \ - :"=r"(*set) \ - :"r"(mux), "r"(compare), "0"(*set) \ - ); \ - /* * For kernel use: Initialize a per-CPU mux. Mux will be initialized unlocked. */