From ad85f49926777bbdae682affabdfc0aca8ffc4fe Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 18 Jun 2026 20:34:11 +1000 Subject: [PATCH] Thumb2 X25519: Fix to do full reduction This fix when into ARM32 assembly it is now being added to Thumb2 assembly. Full reduction to ensure the number is in range at end of work. --- wolfcrypt/src/port/arm/thumb2-curve25519.S | 41 +++++++++++++++++--- wolfcrypt/src/port/arm/thumb2-curve25519_c.c | 37 ++++++++++++++++-- 2 files changed, 70 insertions(+), 8 deletions(-) diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519.S b/wolfcrypt/src/port/arm/thumb2-curve25519.S index 27acee8b1c..dbc3f327e9 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519.S +++ b/wolfcrypt/src/port/arm/thumb2-curve25519.S @@ -2933,10 +2933,33 @@ L_curve25519_inv_8: LDR r1, [sp, #160] LDR r0, [sp, #160] BL fe_mul_op + /* Ensure result is less than modulus */ + LDR r0, [sp, #160] + LDM r0, {r4, r5, r6, r7, r8, r9, r10, r11} + ADDS r2, r4, #0x13 + ADCS r2, r5, #0x0 + ADCS r2, r6, #0x0 + ADCS r2, r7, #0x0 + ADCS r2, r8, #0x0 + ADCS r2, r9, #0x0 + ADCS r2, r10, #0x0 + ADC r2, r11, #0x0 + ASR r2, r2, #31 + AND r2, r2, #0x13 + ADDS r4, r4, r2 + ADCS r5, r5, #0x0 + ADCS r6, r6, #0x0 + ADCS r7, r7, #0x0 + ADCS r8, r8, #0x0 + ADCS r9, r9, #0x0 + ADCS r10, r10, #0x0 + ADC r11, r11, #0x0 + BFC r11, #31, #1 + STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} MOV r0, #0x0 ADD sp, sp, #0xbc POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - /* Cycle Count = 682 */ + /* Cycle Count = 721 */ .size curve25519,.-curve25519 #else .text @@ -3253,22 +3276,30 @@ L_curve25519_inv_8: /* Ensure result is less than modulus */ LDR r0, [sp, #176] LDM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - MOV r2, #0x13 - AND r2, r2, r11, ASR #31 + ADDS r2, r4, #0x13 + ADCS r2, r5, #0x0 + ADCS r2, r6, #0x0 + ADCS r2, r7, #0x0 + ADCS r2, r8, #0x0 + ADCS r2, r9, #0x0 + ADCS r2, r10, #0x0 + ADC r2, r11, #0x0 + ASR r2, r2, #31 + AND r2, r2, #0x13 ADDS r4, r4, r2 ADCS r5, r5, #0x0 ADCS r6, r6, #0x0 ADCS r7, r7, #0x0 ADCS r8, r8, #0x0 ADCS r9, r9, #0x0 - BFC r11, #31, #1 ADCS r10, r10, #0x0 ADC r11, r11, #0x0 + BFC r11, #31, #1 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} MOV r0, #0x0 ADD sp, sp, #0xc0 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - /* Cycle Count = 589 */ + /* Cycle Count = 597 */ .size curve25519,.-curve25519 #endif /* WC_NO_CACHE_RESISTANT */ #endif /* HAVE_CURVE25519 */ diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index e351b4349e..3738854f55 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -3372,6 +3372,29 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "LDR r1, [sp, #160]\n\t" "LDR r0, [sp, #160]\n\t" "BL fe_mul_op\n\t" + /* Ensure result is less than modulus */ + "LDR %[r], [sp, #160]\n\t" + "LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ADDS %[a], r4, #0x13\n\t" + "ADCS %[a], r5, #0x0\n\t" + "ADCS %[a], r6, #0x0\n\t" + "ADCS %[a], r7, #0x0\n\t" + "ADCS %[a], r8, #0x0\n\t" + "ADCS %[a], r9, #0x0\n\t" + "ADCS %[a], r10, #0x0\n\t" + "ADC %[a], r11, #0x0\n\t" + "ASR %[a], %[a], #31\n\t" + "AND %[a], %[a], #0x13\n\t" + "ADDS r4, r4, %[a]\n\t" + "ADCS r5, r5, #0x0\n\t" + "ADCS r6, r6, #0x0\n\t" + "ADCS r7, r7, #0x0\n\t" + "ADCS r8, r8, #0x0\n\t" + "ADCS r9, r9, #0x0\n\t" + "ADCS r10, r10, #0x0\n\t" + "ADC r11, r11, #0x0\n\t" + "BFC r11, #31, #1\n\t" + "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "MOV r0, #0x0\n\t" "ADD sp, sp, #0xbc\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -3773,17 +3796,25 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) /* Ensure result is less than modulus */ "LDR %[r], [sp, #176]\n\t" "LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "MOV %[a], #0x13\n\t" - "AND %[a], %[a], r11, ASR #31\n\t" + "ADDS %[a], r4, #0x13\n\t" + "ADCS %[a], r5, #0x0\n\t" + "ADCS %[a], r6, #0x0\n\t" + "ADCS %[a], r7, #0x0\n\t" + "ADCS %[a], r8, #0x0\n\t" + "ADCS %[a], r9, #0x0\n\t" + "ADCS %[a], r10, #0x0\n\t" + "ADC %[a], r11, #0x0\n\t" + "ASR %[a], %[a], #31\n\t" + "AND %[a], %[a], #0x13\n\t" "ADDS r4, r4, %[a]\n\t" "ADCS r5, r5, #0x0\n\t" "ADCS r6, r6, #0x0\n\t" "ADCS r7, r7, #0x0\n\t" "ADCS r8, r8, #0x0\n\t" "ADCS r9, r9, #0x0\n\t" - "BFC r11, #31, #1\n\t" "ADCS r10, r10, #0x0\n\t" "ADC r11, r11, #0x0\n\t" + "BFC r11, #31, #1\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "MOV r0, #0x0\n\t" "ADD sp, sp, #0xc0\n\t"