fix(freertos): fix xesppie registers save/restore

This commit is contained in:
Alexey Lapshin
2025-03-04 10:09:21 +07:00
parent 1f8096359d
commit b25cb2906c
2 changed files with 75 additions and 26 deletions

View File

@@ -264,6 +264,7 @@ _hwlp_restore_end:
esp.vst.128.ip q0, \frame, 16 esp.vst.128.ip q0, \frame, 16
esp.vst.128.ip q1, \frame, 16 esp.vst.128.ip q1, \frame, 16
esp.vst.128.ip q2, \frame, 16 esp.vst.128.ip q2, \frame, 16
esp.vst.128.ip q3, \frame, 16
esp.vst.128.ip q4, \frame, 16 esp.vst.128.ip q4, \frame, 16
esp.vst.128.ip q5, \frame, 16 esp.vst.128.ip q5, \frame, 16
esp.vst.128.ip q6, \frame, 16 esp.vst.128.ip q6, \frame, 16
@@ -275,20 +276,57 @@ _hwlp_restore_end:
esp.st.qacc.h.h.128.ip \frame, 16 esp.st.qacc.h.h.128.ip \frame, 16
/* UA_STATE register (128 bits) */ /* UA_STATE register (128 bits) */
esp.st.ua.state.ip \frame, 16 esp.st.ua.state.ip \frame, 16
/* XACC register (40 bits) */ /* XACC register (40 bits)
esp.st.u.xacc.ip \frame, 8 *
/* The following registers will be stored in the same word */ * Bit layout across two 32-bit words (total 64 bits, but only 40 bits are used):
*
* Word 0 (Lower 32 bits):
* +------+------+------+------+------+------+------+------+ 32
* | res | res | res | res | res | res | res | res |
* +------+------+------+------+------+------+------+------+ 24
* | res | res | sar | sar | sar | sar | sar | sar |
* +------+------+------+------+------+------+------+------+ 16
* | sarB | sarB | sarB | sarB | fftW | fftW | fftW | fftW |
* +------+------+------+------+------+------+------+------+ 8
* | xacc | xacc | xacc | xacc | xacc | xacc | xacc | xacc |
* +------+------+------+------+------+------+------+------+ 0
*
* Word 1 (Upper 32 bits, only lower 8 bits used):
* +------+------+------+------+------+------+------+------+ 32
* | xacc | xacc | xacc | xacc | xacc | xacc | xacc | xacc |
* +------+------+------+------+------+------+------+------+ 24
* | xacc | xacc | xacc | xacc | xacc | xacc | xacc | xacc |
* +------+------+------+------+------+------+------+------+ 16
* | xacc | xacc | xacc | xacc | xacc | xacc | xacc | xacc |
* +------+------+------+------+------+------+------+------+ 8
* | xacc | xacc | xacc | xacc | xacc | xacc | xacc | xacc |
* +------+------+------+------+------+------+------+------+ 0
*
* Legend:
* - `xacc` = xacc bits
* - `sar` = sar bits
* - `sarB` = sar_bytes bits
* - `fftW` = FFT bit width bits
* - `res` = Reserved bits
*
*/
/* Pointer not increased to write data as tightly as possible (minimum increment is 8). */
esp.st.u.xacc.ip \frame, 0
/* SAR_BYTES and FFT_BIT_WIDTH registers
* Prepare the 8-bit value: (SAR_BYTES << 4) | FFT_BIT_WIDTH */
esp.movx.r.sar.bytes a1 /* Load SAR_BYTES register (4 bits) */
slli a2, a1, 4 /* a2 = (SAR_BYTES << 4) */
esp.movx.r.fft.bit.width a1 /* Load FFT_BIT_WIDTH register (4 bits) */
or a2, a2, a1 /* a2 |= FFT_BIT_WIDTH */
sb a2, 5(\frame) /* Store byte after the XACC data */
/* SAR register (6 bits) */ /* SAR register (6 bits) */
esp.movx.r.sar a1 esp.movx.r.sar a1
slli a2, a1, 8 sb a1, 6(\frame) /* Store byte after the SAR_BYTES and FFT_BIT_WIDTH */
/* SAR_BYTES register (4 bits) */
esp.movx.r.sar.bytes a1
slli a1, a1, 4
or a2, a2, a1
/* FFT_BIT_WIDTH register (4 bits) */
esp.movx.r.fft.bit.width a1
or a2, a2, a1
sw a2, (\frame)
.endm .endm
@@ -297,6 +335,7 @@ _hwlp_restore_end:
esp.vld.128.ip q0, \frame, 16 esp.vld.128.ip q0, \frame, 16
esp.vld.128.ip q1, \frame, 16 esp.vld.128.ip q1, \frame, 16
esp.vld.128.ip q2, \frame, 16 esp.vld.128.ip q2, \frame, 16
esp.vld.128.ip q3, \frame, 16
esp.vld.128.ip q4, \frame, 16 esp.vld.128.ip q4, \frame, 16
esp.vld.128.ip q5, \frame, 16 esp.vld.128.ip q5, \frame, 16
esp.vld.128.ip q6, \frame, 16 esp.vld.128.ip q6, \frame, 16
@@ -308,21 +347,31 @@ _hwlp_restore_end:
esp.ld.qacc.h.h.128.ip \frame, 16 esp.ld.qacc.h.h.128.ip \frame, 16
/* UA_STATE register (128 bits) */ /* UA_STATE register (128 bits) */
esp.ld.ua.state.ip \frame, 16 esp.ld.ua.state.ip \frame, 16
/* XACC register (40 bits) */ /* XACC register (40 bits)
esp.ld.xacc.ip \frame, 8 * Pointer not increased because the minimum step is 8, preventing data loss */
/* The following registers are stored in the same word */ esp.ld.xacc.ip \frame, 0
lw a2, (\frame)
/* The following registers are packed in the same word (addr: frame + 4):
* - XACC (upper byte) [7..0]
* - FFT_BIT_WIDTH [11..8]
* - SAR_BYTES [15..12]
* - SAR [21..16]
*
* See pie_save_regs macro for more details.
*/
lbu a1, 5(\frame) /* Load packed FFT_BIT_WIDTH and SAR_BYTES from offset 5
* (frame points to the start of the 5-byte XACC) */
/* FFT_BIT_WIDTH register (4 bits) */ /* FFT_BIT_WIDTH register (4 bits) */
andi a1, a2, 0xf esp.movx.w.fft.bit.width a1 /* FFT_BIT_WIDTH[3:0] = rs1[3:0] */
esp.movx.w.sar a1
/* SAR_BYTES register (4 bits) */ /* SAR_BYTES register (4 bits) */
srli a2, a2, 4 srli a1, a1, 4
andi a1, a2, 0xf esp.movx.w.sar.bytes a1 /* SAR_BYTE[3:0] = rs1[3:0] */
esp.movx.w.sar.bytes a1
/* SAR register (6 bits) */ /* SAR register (6 bits) */
srli a2, a2, 4 lbu a1, 6(\frame)
andi a1, a2, 0x3f esp.movx.w.sar a1 /* SAR[5:0] = rs1[5:0] */
esp.movx.w.fft.bit.width a1
.endm .endm
generate_coprocessor_routine pie, PIE_COPROC_IDX, pie_enable, pie_save_regs, pie_restore_regs generate_coprocessor_routine pie, PIE_COPROC_IDX, pie_enable, pie_save_regs, pie_restore_regs

View File

@@ -233,8 +233,8 @@ STRUCT_AFIELD (long, 4, RV_PIE_QACC_H_L, qacc_h_l, 4)
STRUCT_AFIELD (long, 4, RV_PIE_QACC_H_H, qacc_h_h, 4) STRUCT_AFIELD (long, 4, RV_PIE_QACC_H_H, qacc_h_h, 4)
STRUCT_AFIELD (long, 4, RV_PIE_UA_STATE, ua_state, 4) STRUCT_AFIELD (long, 4, RV_PIE_UA_STATE, ua_state, 4)
STRUCT_FIELD (long, 4, RV_PIE_XACC, xacc) STRUCT_FIELD (long, 4, RV_PIE_XACC, xacc)
/* This register contains SAR, SAR_BYTES and FFT_BIT_WIDTH in this order (from top to low) */ /* misc field contains registers: XACC (upper byte) [7:0], FFT_BIT_WIDTH [11:8], SAR_BYTES [15:12], and SAR [21:16] */
STRUCT_FIELD (long, 4, RV_PIE_MISC, misc) STRUCT_FIELD (long, 4, RV_PIE_MISC, misc)
STRUCT_END(RvPIESaveArea) STRUCT_END(RvPIESaveArea)
/* Redefine the coprocessor area size previously defined to 0 */ /* Redefine the coprocessor area size previously defined to 0 */