diff --git a/configure.ac b/configure.ac index 97d19e07a..65e2af871 100644 --- a/configure.ac +++ b/configure.ac @@ -1289,6 +1289,10 @@ do LMS_VERIFY_ONLY=yes AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_LMS_VERIFY_ONLY" ;; + small) + ENABLED_WC_LMS=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_WC_LMS_SMALL" + ;; wolfssl) ENABLED_WC_LMS=yes AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_WC_LMS" diff --git a/src/include.am b/src/include.am index b9a8b3316..27330ac55 100644 --- a/src/include.am +++ b/src/include.am @@ -710,6 +710,7 @@ endif if BUILD_WC_LMS src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_lms.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_lms_impl.c endif if BUILD_WC_XMSS diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index e8ebccfe9..a658c0b47 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -178,6 +178,8 @@ #include #ifdef HAVE_LIBLMS #include + #else + #include #endif #endif #if defined(WOLFSSL_HAVE_XMSS) && !defined(WOLFSSL_XMSS_VERIFY_ONLY) @@ -9430,7 +9432,7 @@ static const byte lms_pub_L4_H5_W8[60] = 0x74,0x24,0x12,0xC8 }; -static int lms_write_key_mem(const byte * priv, word32 privSz, void *context) +static int lms_write_key_mem(const byte* priv, word32 privSz, void* context) { /* WARNING: THIS IS AN INSECURE WRITE CALLBACK THAT SHOULD ONLY * BE USED FOR TESTING PURPOSES! Production applications should @@ -9439,15 +9441,128 @@ static int lms_write_key_mem(const byte * priv, word32 privSz, void *context) return WC_LMS_RC_SAVED_TO_NV_MEMORY; } -static int lms_read_key_mem(byte * priv, word32 privSz, void *context) +static int lms_read_key_mem(byte* priv, word32 privSz, void* context) { /* WARNING: THIS IS AN INSECURE READ CALLBACK THAT SHOULD ONLY * BE USED FOR TESTING PURPOSES! */ XMEMCPY(priv, context, privSz); return WC_LMS_RC_READ_TO_MEMORY; } +static byte lms_priv[HSS_MAX_PRIVATE_KEY_LEN]; -static void bench_lms_sign_verify(enum wc_LmsParm parm) +static void bench_lms_keygen(int parm, byte* pub) +{ + WC_RNG rng; + LmsKey key; + int ret; + word32 pubLen = HSS_MAX_PUBLIC_KEY_LEN; + int times = 0; + int count = 0; + double start = 0.0F; + int levels; + int height; + int winternitz; + const char* str = wc_LmsKey_ParmToStr(parm); + DECLARE_MULTI_VALUE_STATS_VARS() + +#ifndef HAVE_FIPS + ret = wc_InitRng_ex(&rng, HEAP_HINT, INVALID_DEVID); +#else + ret = wc_InitRng(&rng); +#endif + if (ret != 0) { + fprintf(stderr, "error: wc_InitRng failed: %d\n", ret); + return; + } + + ret = wc_LmsKey_Init(&key, NULL, INVALID_DEVID); + if (ret) { + printf("wc_LmsKey_Init failed: %d\n", ret); + wc_FreeRng(&rng); + return; + } + + count = 0; + bench_stats_start(&count, &start); + + do { + /* LMS is stateful. Async queuing not practical. */ + for (times = 0; times < 1; ++times) { + + wc_LmsKey_Free(&key); + + ret = wc_LmsKey_Init(&key, NULL, INVALID_DEVID); + if (ret) { + printf("wc_LmsKey_Init failed: %d\n", ret); + goto exit_lms_keygen; + } + + ret = wc_LmsKey_SetLmsParm(&key, parm); + if (ret) { + printf("wc_LmsKey_SetLmsParm failed: %d\n", ret); + goto exit_lms_keygen; + } + + ret = wc_LmsKey_GetParameters(&key, &levels, &height, &winternitz); + if (ret) { + fprintf(stderr, "error: wc_LmsKey_GetParameters failed: %d\n", + ret); + goto exit_lms_keygen; + } + + ret = wc_LmsKey_SetWriteCb(&key, lms_write_key_mem); + if (ret) { + fprintf(stderr, "error: wc_LmsKey_SetWriteCb failed: %d\n", + ret); + goto exit_lms_keygen; + } + + ret = wc_LmsKey_SetReadCb(&key, lms_read_key_mem); + if (ret) { + fprintf(stderr, "error: wc_LmsKey_SetReadCb failed: %d\n", ret); + goto exit_lms_keygen; + } + + ret = wc_LmsKey_SetContext(&key, (void*)lms_priv); + if (ret) { + fprintf(stderr, "error: wc_LmsKey_SetContext failed: %d\n", + ret); + goto exit_lms_keygen; + } + + ret = wc_LmsKey_MakeKey(&key, &rng); + if (ret) { + printf("wc_LmsKey_MakeKey failed: %d\n", ret); + goto exit_lms_keygen; + } + + RECORD_MULTI_VALUE_STATS(); + } + + count += times; + } while (bench_stats_check(start) +#ifdef MULTI_VALUE_STATISTICS + || runs < minimum_runs +#endif + ); + + bench_stats_asym_finish(str, levels * height, "keygen", 0, + count, start, ret); +#ifdef MULTI_VALUE_STATISTICS + bench_multi_value_stats(max, min, sum, squareSum, runs); +#endif + + ret = wc_LmsKey_ExportPubRaw(&key, pub, &pubLen); + if (ret) { + fprintf(stderr, "error: wc_LmsKey_ExportPubRaw failed: %d\n", ret); + } + +exit_lms_keygen: + wc_LmsKey_Free(&key); + wc_FreeRng(&rng); +} + +static void bench_lms_sign_verify(int parm, byte* pub) { LmsKey key; int ret = 0; @@ -9460,8 +9575,8 @@ static void bench_lms_sign_verify(enum wc_LmsParm parm) int times = 0; int count = 0; double start = 0.0F; - byte priv[HSS_MAX_PRIVATE_KEY_LEN]; const char * str = wc_LmsKey_ParmToStr(parm); + DECLARE_MULTI_VALUE_STATS_VARS() ret = wc_LmsKey_Init(&key, NULL, INVALID_DEVID); if (ret) { @@ -9477,33 +9592,33 @@ static void bench_lms_sign_verify(enum wc_LmsParm parm) switch (parm) { case WC_LMS_PARM_L2_H10_W2: - XMEMCPY(priv, lms_priv_L2_H10_W2, sizeof(lms_priv_L2_H10_W2)); - XMEMCPY(key.pub, lms_pub_L2_H10_W2, sizeof(lms_pub_L2_H10_W2)); + XMEMCPY(lms_priv, lms_priv_L2_H10_W2, sizeof(lms_priv_L2_H10_W2)); + XMEMCPY(key.pub, lms_pub_L2_H10_W2, HSS_MAX_PUBLIC_KEY_LEN); break; case WC_LMS_PARM_L2_H10_W4: - XMEMCPY(priv, lms_priv_L2_H10_W4, sizeof(lms_priv_L2_H10_W4)); - XMEMCPY(key.pub, lms_pub_L2_H10_W4, sizeof(lms_pub_L2_H10_W4)); + XMEMCPY(lms_priv, lms_priv_L2_H10_W4, sizeof(lms_priv_L2_H10_W4)); + XMEMCPY(key.pub, lms_pub_L2_H10_W4, HSS_MAX_PUBLIC_KEY_LEN); break; case WC_LMS_PARM_L3_H5_W4: - XMEMCPY(priv, lms_priv_L3_H5_W4, sizeof(lms_priv_L3_H5_W4)); - XMEMCPY(key.pub, lms_pub_L3_H5_W4, sizeof(lms_pub_L3_H5_W4)); + XMEMCPY(lms_priv, lms_priv_L3_H5_W4, sizeof(lms_priv_L3_H5_W4)); + XMEMCPY(key.pub, lms_pub_L3_H5_W4, HSS_MAX_PUBLIC_KEY_LEN); break; case WC_LMS_PARM_L3_H5_W8: - XMEMCPY(priv, lms_priv_L3_H5_W8, sizeof(lms_priv_L3_H5_W8)); - XMEMCPY(key.pub, lms_pub_L3_H5_W8, sizeof(lms_pub_L3_H5_W8)); + XMEMCPY(lms_priv, lms_priv_L3_H5_W8, sizeof(lms_priv_L3_H5_W8)); + XMEMCPY(key.pub, lms_pub_L3_H5_W8, HSS_MAX_PUBLIC_KEY_LEN); break; case WC_LMS_PARM_L3_H10_W4: - XMEMCPY(priv, lms_priv_L3_H10_W4, sizeof(lms_priv_L3_H10_W4)); - XMEMCPY(key.pub, lms_pub_L3_H10_W4, sizeof(lms_pub_L3_H10_W4)); + XMEMCPY(lms_priv, lms_priv_L3_H10_W4, sizeof(lms_priv_L3_H10_W4)); + XMEMCPY(key.pub, lms_pub_L3_H10_W4, HSS_MAX_PUBLIC_KEY_LEN); break; case WC_LMS_PARM_L4_H5_W8: - XMEMCPY(priv, lms_priv_L4_H5_W8, sizeof(lms_priv_L4_H5_W8)); - XMEMCPY(key.pub, lms_pub_L4_H5_W8, sizeof(lms_pub_L4_H5_W8)); + XMEMCPY(lms_priv, lms_priv_L4_H5_W8, sizeof(lms_priv_L4_H5_W8)); + XMEMCPY(key.pub, lms_pub_L4_H5_W8, HSS_MAX_PUBLIC_KEY_LEN); break; case WC_LMS_PARM_NONE: @@ -9511,9 +9626,9 @@ static void bench_lms_sign_verify(enum wc_LmsParm parm) case WC_LMS_PARM_L1_H15_W4: case WC_LMS_PARM_L2_H10_W8: case WC_LMS_PARM_L3_H5_W2: - printf("bench_lms_sign_verify: unsupported benchmark option: %d\n", - parm); - goto exit_lms_sign_verify; + default: + XMEMCPY(key.pub, pub, HSS_MAX_PUBLIC_KEY_LEN); + break; } ret = wc_LmsKey_SetWriteCb(&key, lms_write_key_mem); @@ -9528,7 +9643,7 @@ static void bench_lms_sign_verify(enum wc_LmsParm parm) goto exit_lms_sign_verify; } - ret = wc_LmsKey_SetContext(&key, (void *) priv); + ret = wc_LmsKey_SetContext(&key, (void*)lms_priv); if (ret) { fprintf(stderr, "error: wc_LmsKey_SetContext failed: %d\n", ret); goto exit_lms_sign_verify; @@ -9537,35 +9652,68 @@ static void bench_lms_sign_verify(enum wc_LmsParm parm) /* Even with saved priv/pub keys, we must still reload the private * key before using it. Reloading the private key is the bottleneck * for larger heights. Only print load time in debug builds. */ -#if defined(DEBUG_WOLFSSL) + count = 0; bench_stats_start(&count, &start); -#endif /* if defined DEBUG_WOLFSSL*/ +#ifndef WOLFSSL_WC_LMS_SMALL + do { + #ifdef WOLFSSL_WC_LMS + key.priv.inited = 0; + key.state = WC_LMS_STATE_PARMSET; + #endif + ret = wc_LmsKey_Reload(&key); + if (ret) { + printf("wc_LmsKey_Reload failed: %d\n", ret); + goto exit_lms_sign_verify; + } + RECORD_MULTI_VALUE_STATS(); + + count++; + + ret = wc_LmsKey_GetSigLen(&key, &sigSz); + if (ret) { + printf("wc_LmsKey_GetSigLen failed: %d\n", ret); + goto exit_lms_sign_verify; + } + + ret = wc_LmsKey_GetPrivLen(&key, &privLen); + if (ret) { + printf("wc_LmsKey_GetPrivLen failed: %d\n", ret); + goto exit_lms_sign_verify; + } + #ifdef HAVE_LIBLMS + break; + #endif + } while (bench_stats_check(start) +#ifdef MULTI_VALUE_STATISTICS + || runs < minimum_runs +#endif + ); + + bench_stats_asym_finish(str, (int)privLen, "load", 0, + count, start, ret); +#ifdef MULTI_VALUE_STATISTICS + bench_multi_value_stats(max, min, sum, squareSum, runs); +#endif + + RESET_MULTI_VALUE_STATS_VARS(); +#else ret = wc_LmsKey_Reload(&key); if (ret) { printf("wc_LmsKey_Reload failed: %d\n", ret); goto exit_lms_sign_verify; } - - count +=1; - ret = wc_LmsKey_GetSigLen(&key, &sigSz); if (ret) { printf("wc_LmsKey_GetSigLen failed: %d\n", ret); goto exit_lms_sign_verify; } - ret = wc_LmsKey_GetPrivLen(&key, &privLen); if (ret) { printf("wc_LmsKey_GetPrivLen failed: %d\n", ret); goto exit_lms_sign_verify; } - -#if defined(DEBUG_WOLFSSL) - bench_stats_check(start); - bench_stats_asym_finish(str, (int)privLen, "load", 0, - count, start, ret); -#endif /* if defined DEBUG_WOLFSSL*/ +#endif loaded = 1; @@ -9580,22 +9728,29 @@ static void bench_lms_sign_verify(enum wc_LmsParm parm) do { /* LMS is stateful. Async queuing not practical. */ - for (times = 0; times < ntimes; ++times) { - +#ifndef WOLFSSL_WC_LMS_SMALL + for (times = 0; times < ntimes; ++times) +#else + for (times = 0; times < 1; ++times) +#endif + { ret = wc_LmsKey_Sign(&key, sig, &sigSz, (byte *) msg, msgSz); if (ret) { printf("wc_LmsKey_Sign failed: %d\n", ret); goto exit_lms_sign_verify; } RECORD_MULTI_VALUE_STATS(); + if (!wc_LmsKey_SigsLeft(&key)) { + break; + } } count += times; - } while (bench_stats_check(start) + } while (wc_LmsKey_SigsLeft(&key) && (bench_stats_check(start) #ifdef MULTI_VALUE_STATISTICS || runs < minimum_runs #endif - ); + )); bench_stats_asym_finish(str, (int)sigSz, "sign", 0, count, start, ret); @@ -9635,25 +9790,62 @@ exit_lms_sign_verify: if (loaded) { wc_LmsKey_Free(&key); - loaded = 0; - } - - if (sig != NULL) { - XFREE(sig, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - sig = NULL; } + XFREE(sig, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); return; } void bench_lms(void) { - bench_lms_sign_verify(WC_LMS_PARM_L2_H10_W2); - bench_lms_sign_verify(WC_LMS_PARM_L2_H10_W4); - bench_lms_sign_verify(WC_LMS_PARM_L3_H5_W4); - bench_lms_sign_verify(WC_LMS_PARM_L3_H5_W8); - bench_lms_sign_verify(WC_LMS_PARM_L3_H10_W4); - bench_lms_sign_verify(WC_LMS_PARM_L4_H5_W8); + byte pub[HSS_MAX_PUBLIC_KEY_LEN]; + +#ifdef BENCH_LMS_SLOW_KEYGEN +#if !defined(WOLFSSL_WC_LMS) || (LMS_MAX_HEIGHT >= 15) + bench_lms_keygen(WC_LMS_PARM_L1_H15_W2, pub); + bench_lms_sign_verify(WC_LMS_PARM_L1_H15_W2, pub); + bench_lms_keygen(WC_LMS_PARM_L1_H15_W4, pub); + bench_lms_sign_verify(WC_LMS_PARM_L1_H15_W4, pub); + #undef LMS_PARAMS_BENCHED + #define LMS_PARAMS_BENCHED +#endif +#endif +#if !defined(WOLFSSL_WC_LMS) || ((LMS_MAX_LEVELS >= 2) && \ + (LMS_MAX_HEIGHT >= 10)) + bench_lms_keygen(WC_LMS_PARM_L2_H10_W2, pub); + bench_lms_sign_verify(WC_LMS_PARM_L2_H10_W2, pub); + bench_lms_keygen(WC_LMS_PARM_L2_H10_W4, pub); + bench_lms_sign_verify(WC_LMS_PARM_L2_H10_W4, pub); + #undef LMS_PARAMS_BENCHED + #define LMS_PARAMS_BENCHED +#ifdef BENCH_LMS_SLOW_KEYGEN + bench_lms_keygen(WC_LMS_PARM_L2_H10_W8, pub); + bench_lms_sign_verify(WC_LMS_PARM_L2_H10_W8, pub); +#endif +#endif +#if !defined(WOLFSSL_WC_LMS) || (LMS_MAX_LEVELS >= 3) + bench_lms_keygen(WC_LMS_PARM_L3_H5_W4, pub); + bench_lms_sign_verify(WC_LMS_PARM_L3_H5_W4, pub); + bench_lms_keygen(WC_LMS_PARM_L3_H5_W8, pub); + bench_lms_sign_verify(WC_LMS_PARM_L3_H5_W8, pub); + #undef LMS_PARAMS_BENCHED + #define LMS_PARAMS_BENCHED +#endif +#if !defined(WOLFSSL_WC_LMS) || ((LMS_MAX_LEVELS >= 3) && \ + (LMS_MAX_HEIGHT >= 10)) + bench_lms_keygen(WC_LMS_PARM_L3_H10_W4, pub); + bench_lms_sign_verify(WC_LMS_PARM_L3_H10_W4, pub); +#endif +#if !defined(WOLFSSL_WC_LMS) || (LMS_MAX_LEVELS >= 4) + bench_lms_keygen(WC_LMS_PARM_L4_H5_W8, pub); + bench_lms_sign_verify(WC_LMS_PARM_L4_H5_W8, pub); +#endif + +#if defined(WOLFSSL_WC_LMS) && !defined(LMS_PARAMS_BENCHED) + bench_lms_keygen(0x100, pub); + bench_lms_sign_verify(0x100, pub); +#endif + return; } diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index 6be10f665..af5f09abb 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -460,10 +460,16 @@ WC_MISC_STATIC WC_INLINE void c16toa(word16 wc_u16, byte* c) /* convert 32 bit integer to opaque */ WC_MISC_STATIC WC_INLINE void c32toa(word32 wc_u32, byte* c) { +#ifdef WOLFSSL_USE_ALIGN c[0] = (byte)((wc_u32 >> 24) & 0xff); c[1] = (byte)((wc_u32 >> 16) & 0xff); c[2] = (byte)((wc_u32 >> 8) & 0xff); c[3] = (byte)(wc_u32 & 0xff); +#elif defined(LITTLE_ENDIAN_ORDER) + *(word32*)c = ByteReverseWord32(wc_u32); +#else + *(word32*)c = wc_u32; +#endif } #endif @@ -492,10 +498,16 @@ WC_MISC_STATIC WC_INLINE void ato16(const byte* c, word16* wc_u16) /* convert opaque to 32 bit integer */ WC_MISC_STATIC WC_INLINE void ato32(const byte* c, word32* wc_u32) { +#ifdef WOLFSSL_USE_ALIGN *wc_u32 = ((word32)c[0] << 24) | ((word32)c[1] << 16) | ((word32)c[2] << 8) | (word32)c[3]; +#elif defined(LITTLE_ENDIAN_ORDER) + *wc_u32 = ByteReverseWord32(*(word32*)c); +#else + *wc_u32 = *(word32*)c; +#endif } /* convert opaque to 32 bit integer. Interpret as little endian. */ diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c index e65e2104e..1f740dd83 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256.c +++ b/wolfcrypt/src/port/arm/armv8-sha256.c @@ -130,8 +130,8 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, word32* k = (word32*)K; __asm__ volatile ( - "#load leftover data\n" - "LD1 {v0.2d-v3.2d}, %[buffer] \n" + "# load first block of data\n" + "LD1 {v0.16b-v3.16b}, [%[dataIn]], #64 \n" "#load current digest\n" "LD1 {v12.2d-v13.2d}, %[digest] \n" @@ -293,10 +293,9 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, "2:\n" "ST1 {v12.2d-v13.2d}, %[out] \n" - : [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks), - "=r" (data), "=r" (k) - : [k] "4" (k), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer), - [blocks] "2" (numBlocks), [dataIn] "3" (data) + : [out] "=m" (sha256->digest), "=r" (numBlocks), "=r" (data), "=r" (k) + : [k] "3" (k), [digest] "m" (sha256->digest), [blocks] "1" (numBlocks), + [dataIn] "2" (data) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", @@ -306,7 +305,8 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, } /* ARMv8 hardware acceleration */ -static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, + word32 len) { word32 add; word32 numBlocks; @@ -315,26 +315,32 @@ static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 le if (len > 0) { AddLength(sha256, len); - /* fill leftover buffer with data */ - add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); - XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); - sha256->buffLen += add; - data += add; - len -= add; + if (sha256->buffLen > 0) { + /* fill leftover buffer with data */ + add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); + sha256->buffLen += add; + data += add; + len -= add; + if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) { + Sha256Transform(sha256, (byte*)sha256->buffer, 1); + sha256->buffLen = 0; + } + } /* number of blocks in a row to complete */ numBlocks = (len + sha256->buffLen)/WC_SHA256_BLOCK_SIZE; if (numBlocks > 0) { - /* get leftover amount after blocks */ - add = (len + sha256->buffLen) - numBlocks * WC_SHA256_BLOCK_SIZE; - Sha256Transform(sha256, data, numBlocks); - data += numBlocks * WC_SHA256_BLOCK_SIZE - sha256->buffLen; + data += numBlocks * WC_SHA256_BLOCK_SIZE; + len -= numBlocks * WC_SHA256_BLOCK_SIZE; + } + if (len > 0) { /* copy over any remaining data leftover */ - XMEMCPY(sha256->buffer, data, add); - sha256->buffLen = add; + XMEMCPY(sha256->buffer, data, len); + sha256->buffLen = len; } } @@ -702,8 +708,9 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, word32* digPt = sha256->digest; __asm__ volatile ( - "#load leftover data\n" - "VLDM %[buffer]!, {q0-q3} \n" + "# load first block of data\n" + "VLD1.8 {d0-d3}, [%[dataIn]]! \n" + "VLD1.8 {d4-d7}, [%[dataIn]]! \n" "#load current digest\n" "VLDM %[digest], {q12-q13} \n" @@ -863,10 +870,8 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, "BEQ 2f \n" "#load in message and schedule updates \n" - "VLD1.32 {q0}, [%[dataIn]]! \n" - "VLD1.32 {q1}, [%[dataIn]]! \n" - "VLD1.32 {q2}, [%[dataIn]]! \n" - "VLD1.32 {q3}, [%[dataIn]]! \n" + "VLD1.8 {d0-d3}, [%[dataIn]]! \n" + "VLD1.8 {d4-d7}, [%[dataIn]]! \n" /* reset K pointer */ "SUB %[k], %[k], #160 \n" @@ -892,7 +897,8 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, } /* ARMv8 hardware acceleration Aarch32 */ -static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, + word32 len) { word32 add; word32 numBlocks; @@ -901,26 +907,32 @@ static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 le if (len > 0) { AddLength(sha256, len); - /* fill leftover buffer with data */ - add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); - XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); - sha256->buffLen += add; - data += add; - len -= add; + if (sha256->buffLen > 0) { + /* fill leftover buffer with data */ + add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); + sha256->buffLen += add; + data += add; + len -= add; + if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) { + Sha256Transform(sha256, (byte*)sha256->buffer, 1); + sha256->buffLen = 0; + } + } /* number of blocks in a row to complete */ numBlocks = (len + sha256->buffLen)/WC_SHA256_BLOCK_SIZE; if (numBlocks > 0) { - /* get leftover amount after blocks */ - add = (len + sha256->buffLen) - numBlocks * WC_SHA256_BLOCK_SIZE; - Sha256Transform(sha256, data, numBlocks); - data += numBlocks * WC_SHA256_BLOCK_SIZE - sha256->buffLen; + data += numBlocks * WC_SHA256_BLOCK_SIZE; + len -= numBlocks * WC_SHA256_BLOCK_SIZE; + } + if (len > 0) { /* copy over any remaining data leftover */ - XMEMCPY(sha256->buffer, data, add); - sha256->buffLen = add; + XMEMCPY(sha256->buffer, data, len); + sha256->buffLen = len; } } @@ -1619,6 +1631,78 @@ int wc_Sha256Transform(wc_Sha256* sha256, const unsigned char* data) } #endif +#if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_LMS_FULL_HASH) +/* One block will be used from data. + * hash must be big enough to hold all of digest output. + */ +int wc_Sha256HashBlock(wc_Sha256* sha256, const unsigned char* data, + unsigned char* hash) +{ + int ret = 0; + + if ((sha256 == NULL) || (data == NULL)) { + return BAD_FUNC_ARG; + } + +#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO + Sha256Transform(sha256, data, 1); +#else + Transform_Sha256_Len(sha256, data, WC_SHA256_BLOCK_SIZE); +#endif + + if (hash != NULL) { +#ifdef LITTLE_ENDIAN_ORDER + #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO + #ifdef __aarch64__ + __asm__ __volatile__ ( + "LD1 {v0.2d-v1.2d}, [%[digest]] \n" + "REV32 v0.16b, v0.16b \n" + "REV32 v1.16b, v1.16b \n" + "ST1 {v0.16b-v1.16b}, [%[hash]] \n" + : + : [digest] "r" (sha256->digest), [hash] "r" (hash) + : "memory", "v0", "v1" + ); + #else + __asm__ __volatile__ ( + "VLDM %[digest], {q0-q1} \n" + "VREV32.8 q0, q0 \n" + "VREV32.8 q1, q1 \n" + "VST1.8 {d0-d3}, [%[hash]] \n" + : + : [digest] "r" (sha256->digest), [hash] "r" (hash) + : "memory", "q0", "q1" + ); + #endif + #else + word32* hash32 = (word32*)hash; + word32* digest = (word32*)sha256->digest; + hash32[0] = ByteReverseWord32(digest[0]); + hash32[1] = ByteReverseWord32(digest[1]); + hash32[2] = ByteReverseWord32(digest[2]); + hash32[3] = ByteReverseWord32(digest[3]); + hash32[4] = ByteReverseWord32(digest[4]); + hash32[5] = ByteReverseWord32(digest[5]); + hash32[6] = ByteReverseWord32(digest[6]); + hash32[7] = ByteReverseWord32(digest[7]); + #endif /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */ +#else + XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); +#endif + sha256->digest[0] = 0x6A09E667L; + sha256->digest[1] = 0xBB67AE85L; + sha256->digest[2] = 0x3C6EF372L; + sha256->digest[3] = 0xA54FF53AL; + sha256->digest[4] = 0x510E527FL; + sha256->digest[5] = 0x9B05688CL; + sha256->digest[6] = 0x1F83D9ABL; + sha256->digest[7] = 0x5BE0CD19L; + } + + return ret; +} +#endif /* WOLFSSL_HAVE_LMS && !WOLFSSL_LMS_FULL_HASH */ + #endif /* !NO_SHA256 */ diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index e4e1ddf93..5b58cbcda 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -169,6 +169,38 @@ on the specific device platform. #endif +#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if ( defined(CONFIG_IDF_TARGET_ESP32C2) || \ + defined(CONFIG_IDF_TARGET_ESP8684) || \ + defined(CONFIG_IDF_TARGET_ESP32C3) || \ + defined(CONFIG_IDF_TARGET_ESP32C6) \ + ) && \ + defined(WOLFSSL_ESP32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) && \ + !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) + /* For Espressif RISC-V Targets, we *may* need to reverse bytes + * depending on if HW is active or not. */ + #define SHA256_REV_BYTES(ctx) \ + (esp_sha_need_byte_reversal(ctx)) + #endif +#endif +#ifndef SHA256_REV_BYTES + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #define SHA256_REV_BYTES(ctx) 1 + #else + #define SHA256_REV_BYTES(ctx) 0 + #endif +#endif +#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) && \ + defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + #define SHA256_UPDATE_REV_BYTES(ctx) \ + (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) +#else + #define SHA256_UPDATE_REV_BYTES(ctx) SHA256_REV_BYTES(ctx) +#endif + + #if !defined(WOLFSSL_PIC32MZ_HASH) && !defined(STM32_HASH_SHA2) && \ (!defined(WOLFSSL_IMX6_CAAM) || defined(NO_IMX6_CAAM_HASH) || \ defined(WOLFSSL_QNX_CAAM)) && \ @@ -188,11 +220,6 @@ on the specific device platform. static int InitSha256(wc_Sha256* sha256) { - int ret = 0; - - if (sha256 == NULL) - return BAD_FUNC_ARG; - XMEMSET(sha256->digest, 0, sizeof(sha256->digest)); sha256->digest[0] = 0x6A09E667L; sha256->digest[1] = 0xBB67AE85L; @@ -227,7 +254,7 @@ static int InitSha256(wc_Sha256* sha256) sha256->hSession = NULL; #endif - return ret; + return 0; } #endif @@ -590,7 +617,7 @@ static int InitSha256(wc_Sha256* sha256) { int ret = 0; - if (sha256 == NULL || (data == NULL && len > 0)) { + if (sha224 == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; } @@ -736,10 +763,6 @@ static int InitSha256(wc_Sha256* sha256) { int ret = 0; /* zero = success */ - if (sha256 == NULL) { - return BAD_FUNC_ARG; - } - /* We may or may not need initial digest for HW. * Always needed for SW-only. */ sha256->digest[0] = 0x6A09E667L; @@ -1049,21 +1072,13 @@ static int InitSha256(wc_Sha256* sha256) } /* do block size increments/updates */ - static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) + static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, + word32 len) { int ret = 0; word32 blocksLen; byte* local; - if (sha256 == NULL || (data == NULL && len > 0)) { - return BAD_FUNC_ARG; - } - - if (data == NULL && len == 0) { - /* valid, but do nothing */ - return 0; - } - /* check that internal buffLen is valid */ if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) { return BUFFER_E; @@ -1092,34 +1107,13 @@ static int InitSha256(wc_Sha256* sha256) } #endif - - #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if defined(WOLFSSL_X86_64_BUILD) && \ - defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) - #endif - #if ( defined(CONFIG_IDF_TARGET_ESP32C2) || \ - defined(CONFIG_IDF_TARGET_ESP8684) || \ - defined(CONFIG_IDF_TARGET_ESP32C3) || \ - defined(CONFIG_IDF_TARGET_ESP32C6) \ - ) && \ - defined(WOLFSSL_ESP32_CRYPT) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) - /* For Espressif RISC-V Targets, we *may* need to reverse bytes - * depending on if HW is active or not. */ - if (esp_sha_need_byte_reversal(&sha256->ctx)) - #endif - { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); - } - #endif + if (SHA256_UPDATE_REV_BYTES(&sha256->ctx)) { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) - if (sha256->ctx.mode == ESP32_SHA_SW) { #if defined(WOLFSSL_DEBUG_MUTEX) { @@ -1146,7 +1140,6 @@ static int InitSha256(wc_Sha256* sha256) /* Always SW */ ret = XTRANSFORM(sha256, (const byte*)local); #endif - if (ret == 0) sha256->buffLen = 0; else @@ -1161,12 +1154,13 @@ static int InitSha256(wc_Sha256* sha256) if (Transform_Sha256_Len_p != NULL) #endif { - /* get number of blocks */ - /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ - /* len (masked by 0xFFFFFFC0) returns block aligned length */ - blocksLen = len & ~((word32)WC_SHA256_BLOCK_SIZE-1); - if (blocksLen > 0) { - /* Byte reversal and alignment handled in function if required */ + if (len >= WC_SHA256_BLOCK_SIZE) { + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~((word32)WC_SHA256_BLOCK_SIZE-1); + /* Byte reversal and alignment handled in function if required + */ XTRANSFORM_LEN(sha256, data, blocksLen); data += blocksLen; len -= blocksLen; @@ -1209,28 +1203,9 @@ static int InitSha256(wc_Sha256* sha256) } #endif - #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if ( defined(CONFIG_IDF_TARGET_ESP32C2) || \ - defined(CONFIG_IDF_TARGET_ESP8684) || \ - defined(CONFIG_IDF_TARGET_ESP32C3) || \ - defined(CONFIG_IDF_TARGET_ESP32C6) \ - ) && \ - defined(WOLFSSL_ESP32_CRYPT) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) - /* For Espressif RISC-V Targets, we *may* need to reverse bytes - * depending on if HW is active or not. */ - if (esp_sha_need_byte_reversal(&sha256->ctx)) - #endif - #if defined(WOLFSSL_X86_64_BUILD) && \ - defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) - #endif - { - ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE); - } - #endif + if (SHA256_UPDATE_REV_BYTES(&sha256->ctx)) { + ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE); + } #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) @@ -1267,14 +1242,16 @@ static int InitSha256(wc_Sha256* sha256) #else int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) { - if (sha256 == NULL || (data == NULL && len > 0)) { + if (sha256 == NULL) { return BAD_FUNC_ARG; } - if (data == NULL && len == 0) { /* valid, but do nothing */ return 0; } + if (data == NULL) { + return BAD_FUNC_ARG; + } #ifdef WOLF_CRYPTO_CB #ifndef WOLF_CRYPTO_CB_FIND @@ -1301,14 +1278,9 @@ static int InitSha256(wc_Sha256* sha256) static WC_INLINE int Sha256Final(wc_Sha256* sha256) { - int ret; byte* local; - if (sha256 == NULL) { - return BAD_FUNC_ARG; - } - /* we'll add a 0x80 byte at the end, ** so make sure we have appropriate buffer length. */ if (sha256->buffLen > WC_SHA256_BLOCK_SIZE - 1) { @@ -1326,8 +1298,6 @@ static int InitSha256(wc_Sha256* sha256) WC_SHA256_BLOCK_SIZE - sha256->buffLen); } - sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; - #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) if (sha256->ctx.mode == ESP32_SHA_INIT) { @@ -1335,28 +1305,10 @@ static int InitSha256(wc_Sha256* sha256) } #endif - #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if ( defined(CONFIG_IDF_TARGET_ESP32C2) || \ - defined(CONFIG_IDF_TARGET_ESP8684) || \ - defined(CONFIG_IDF_TARGET_ESP32C3) || \ - defined(CONFIG_IDF_TARGET_ESP32C6) \ - ) && \ - defined(WOLFSSL_ESP32_CRYPT) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) - /* For Espressif RISC-V Targets, we *may* need to reverse bytes - * depending on if HW is active or not. */ - if (esp_sha_need_byte_reversal(&sha256->ctx)) - #endif - #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) - #endif - { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); - } - #endif + if (SHA256_UPDATE_REV_BYTES(&sha256->ctx)) { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) @@ -1393,28 +1345,10 @@ static int InitSha256(wc_Sha256* sha256) #endif /* store lengths */ - #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if ( defined(CONFIG_IDF_TARGET_ESP32C2) || \ - defined(CONFIG_IDF_TARGET_ESP8684) || \ - defined(CONFIG_IDF_TARGET_ESP32C3) || \ - defined(CONFIG_IDF_TARGET_ESP32C6) \ - ) && \ - defined(WOLFSSL_ESP32_CRYPT) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) - /* For Espressif RISC-V Targets, we *may* need to reverse bytes - * depending on if HW is active or not. */ - if (esp_sha_need_byte_reversal(&sha256->ctx)) - #endif - #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) - #endif - { + if (SHA256_UPDATE_REV_BYTES(&sha256->ctx)) { ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); + WC_SHA256_PAD_SIZE); } - #endif /* ! 64-bit length ordering dependent on digest endian type ! */ XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32)); XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen, @@ -1496,23 +1430,10 @@ static int InitSha256(wc_Sha256* sha256) } #ifdef LITTLE_ENDIAN_ORDER - #if ( defined(CONFIG_IDF_TARGET_ESP32C2) || \ - defined(CONFIG_IDF_TARGET_ESP8684) || \ - defined(CONFIG_IDF_TARGET_ESP32C3) || \ - defined(CONFIG_IDF_TARGET_ESP32C6) \ - ) && \ - defined(WOLFSSL_ESP32_CRYPT) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) - /* For Espressif RISC-V Targets, we *may* need to reverse bytes - * depending on if HW is active or not. */ - if (esp_sha_need_byte_reversal(&sha256->ctx)) - #endif - { - ByteReverseWords((word32*)digest, - (word32*)sha256->digest, - WC_SHA256_DIGEST_SIZE); - } + if (SHA256_REV_BYTES(&sha256->ctx)) { + ByteReverseWords((word32*)digest, (word32*)sha256->digest, + WC_SHA256_DIGEST_SIZE); + } XMEMCPY(hash, digest, WC_SHA256_DIGEST_SIZE); #else XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); @@ -1556,22 +1477,10 @@ static int InitSha256(wc_Sha256* sha256) } #if defined(LITTLE_ENDIAN_ORDER) - #if ( defined(CONFIG_IDF_TARGET_ESP32C2) || \ - defined(CONFIG_IDF_TARGET_ESP8684) || \ - defined(CONFIG_IDF_TARGET_ESP32C3) || \ - defined(CONFIG_IDF_TARGET_ESP32C6) \ - ) && \ - defined(WOLFSSL_ESP32_CRYPT) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) - /* For Espressif RISC-V Targets, we *may* need to reverse bytes - * depending on if HW is active or not. */ - if (esp_sha_need_byte_reversal(&sha256->ctx)) - #endif - { - ByteReverseWords(sha256->digest, sha256->digest, - WC_SHA256_DIGEST_SIZE); - } + if (SHA256_REV_BYTES(&sha256->ctx)) { + ByteReverseWords(sha256->digest, sha256->digest, + WC_SHA256_DIGEST_SIZE); + } #endif XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); @@ -1583,18 +1492,115 @@ static int InitSha256(wc_Sha256* sha256) /* @param sha a pointer to wc_Sha256 structure */ /* @param data data to be applied SHA256 transformation */ /* @return 0 on successful, otherwise non-zero on failure */ - int wc_Sha256Transform(wc_Sha256* sha, const unsigned char* data) + int wc_Sha256Transform(wc_Sha256* sha256, const unsigned char* data) { - if (sha == NULL || data == NULL) { + if (sha256 == NULL || data == NULL) { return BAD_FUNC_ARG; } - return (Transform_Sha256(sha, data)); + return Transform_Sha256(sha256, data); } - #endif -#endif /* OPENSSL_EXTRA */ +#endif /* OPENSSL_EXTRA || HAVE_CURL */ +#if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_LMS_FULL_HASH) + /* One block will be used from data. + * hash must be big enough to hold all of digest output. + */ + int wc_Sha256HashBlock(wc_Sha256* sha256, const unsigned char* data, + unsigned char* hash) + { + int ret; + + if ((sha256 == NULL) || (data == NULL)) { + return BAD_FUNC_ARG; + } + + if (SHA256_UPDATE_REV_BYTES(&sha256->ctx)) { + ByteReverseWords(sha256->buffer, (word32*)data, + WC_SHA256_BLOCK_SIZE); + data = (unsigned char*)sha256->buffer; + } + ret = XTRANSFORM(sha256, data); + + if ((ret == 0) && (hash != NULL)) { + if (!SHA256_REV_BYTES(&sha256->ctx)) { + XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); + } + else { + #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) + __asm__ __volatile__ ( + "mov 0x00(%[d]), %%esi\n\t" + "movbe %%esi, 0x00(%[h])\n\t" + "mov 0x04(%[d]), %%esi\n\t" + "movbe %%esi, 0x04(%[h])\n\t" + "mov 0x08(%[d]), %%esi\n\t" + "movbe %%esi, 0x08(%[h])\n\t" + "mov 0x0c(%[d]), %%esi\n\t" + "movbe %%esi, 0x0c(%[h])\n\t" + "mov 0x10(%[d]), %%esi\n\t" + "movbe %%esi, 0x10(%[h])\n\t" + "mov 0x14(%[d]), %%esi\n\t" + "movbe %%esi, 0x14(%[h])\n\t" + "mov 0x18(%[d]), %%esi\n\t" + "movbe %%esi, 0x18(%[h])\n\t" + "mov 0x1c(%[d]), %%esi\n\t" + "movbe %%esi, 0x1c(%[h])\n\t" + : + : [d] "r" (sha256->digest), [h] "r" (hash) + : "memory", "esi" + ); + #else + word32* hash32 = (word32*)hash; + word32* digest = (word32*)sha256->digest; + #if WOLFSSL_GENERAL_ALIGNMENT < 4 + ALIGN16 word32 buf[WC_SHA256_DIGEST_SIZE / sizeof(word32)]; + + if (((size_t)digest & 0x3) != 0) { + if (((size_t)hash32 & 0x3) != 0) { + XMEMCPY(buf, digest, WC_SHA256_DIGEST_SIZE); + hash32 = buf; + digest = buf; + } + else { + XMEMCPY(hash, digest, WC_SHA256_DIGEST_SIZE); + digest = hash32; + } + } + else if (((size_t)hash32 & 0x3) != 0) { + hash32 = digest; + } + #endif + hash32[0] = ByteReverseWord32(digest[0]); + hash32[1] = ByteReverseWord32(digest[1]); + hash32[2] = ByteReverseWord32(digest[2]); + hash32[3] = ByteReverseWord32(digest[3]); + hash32[4] = ByteReverseWord32(digest[4]); + hash32[5] = ByteReverseWord32(digest[5]); + hash32[6] = ByteReverseWord32(digest[6]); + hash32[7] = ByteReverseWord32(digest[7]); + #if WOLFSSL_GENERAL_ALIGNMENT < 4 + if (hash != (byte*)hash32) { + XMEMCPY(hash, hash32, WC_SHA256_DIGEST_SIZE); + } + #endif + #endif /* WOLFSSL_X86_64_BUILD && USE_INTEL_SPEEDUP */ + } + sha256->digest[0] = 0x6A09E667L; + sha256->digest[1] = 0xBB67AE85L; + sha256->digest[2] = 0x3C6EF372L; + sha256->digest[3] = 0xA54FF53AL; + sha256->digest[4] = 0x510E527FL; + sha256->digest[5] = 0x9B05688CL; + sha256->digest[6] = 0x1F83D9ABL; + sha256->digest[7] = 0x5BE0CD19L; + } + + return ret; + } +#endif /* WOLFSSL_HAVE_LMS && !WOLFSSL_LMS_FULL_HASH */ #endif /* !WOLFSSL_KCAPI_HASH */ +#endif /* XTRANSFORM */ + #ifdef WOLFSSL_SHA224 @@ -1713,10 +1719,6 @@ static int InitSha256(wc_Sha256* sha256) { int ret = 0; - if (sha224 == NULL) { - return BAD_FUNC_ARG; - } - sha224->digest[0] = 0xc1059ed8; sha224->digest[1] = 0x367cd507; sha224->digest[2] = 0x3070dd17; @@ -1817,7 +1819,14 @@ static int InitSha256(wc_Sha256* sha256) { int ret; - if (sha224 == NULL || (data == NULL && len > 0)) { + if (sha224 == NULL) { + return BAD_FUNC_ARG; + } + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } + if (data == NULL) { return BAD_FUNC_ARG; } @@ -1869,18 +1878,7 @@ static int InitSha256(wc_Sha256* sha256) return ret; #if defined(LITTLE_ENDIAN_ORDER) - #if ( defined(CONFIG_IDF_TARGET_ESP32C2) || \ - defined(CONFIG_IDF_TARGET_ESP8684) || \ - defined(CONFIG_IDF_TARGET_ESP32C3) || \ - defined(CONFIG_IDF_TARGET_ESP32C6) \ - ) && \ - defined(WOLFSSL_ESP32_CRYPT) && \ - (!defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA256) || \ - !defined(NO_WOLFSSL_ESP32_CRYPT_HASH_SHA224) \ - ) - if (esp_sha_need_byte_reversal(&sha224->ctx)) - #endif - { + if (SHA256_REV_BYTES(&sha224->ctx)) { ByteReverseWords(sha224->digest, sha224->digest, WC_SHA224_DIGEST_SIZE); diff --git a/wolfcrypt/src/sha256_asm.S b/wolfcrypt/src/sha256_asm.S index 67145b9cc..3f7f6cf24 100644 --- a/wolfcrypt/src/sha256_asm.S +++ b/wolfcrypt/src/sha256_asm.S @@ -92,7 +92,6 @@ Transform_Sha256_SSE2_Sha: .p2align 4 _Transform_Sha256_SSE2_Sha: #endif /* __APPLE__ */ - leaq 32(%rdi), %rdx movdqa L_sse2_sha256_shuf_mask(%rip), %xmm10 movq (%rdi), %xmm1 movq 8(%rdi), %xmm2 @@ -100,10 +99,10 @@ _Transform_Sha256_SSE2_Sha: movhpd 24(%rdi), %xmm2 pshufd $27, %xmm1, %xmm1 pshufd $27, %xmm2, %xmm2 - movdqu (%rdx), %xmm3 - movdqu 16(%rdx), %xmm4 - movdqu 32(%rdx), %xmm5 - movdqu 48(%rdx), %xmm6 + movdqu (%rsi), %xmm3 + movdqu 16(%rsi), %xmm4 + movdqu 32(%rsi), %xmm5 + movdqu 48(%rsi), %xmm6 pshufb %xmm10, %xmm3 movdqa %xmm1, %xmm8 movdqa %xmm2, %xmm9 @@ -557,7 +556,6 @@ _Transform_Sha256_AVX1: pushq %r14 pushq %r15 subq $0x40, %rsp - leaq 32(%rdi), %rax vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11 vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12 @@ -570,12 +568,12 @@ _Transform_Sha256_AVX1: movl 24(%rdi), %r14d movl 28(%rdi), %r15d # X0, X1, X2, X3 = W[0..15] - vmovdqu (%rax), %xmm0 - vmovdqu 16(%rax), %xmm1 + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 - vmovdqu 32(%rax), %xmm2 - vmovdqu 48(%rax), %xmm3 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 movl %r9d, %ebx @@ -2947,8 +2945,7 @@ _Transform_Sha256_AVX1_Len: pushq %r14 pushq %r15 pushq %rbp - movq %rsi, %rbp - movq %rdx, %rsi + movq %rdx, %rbp subq $0x40, %rsp vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11 @@ -2964,12 +2961,12 @@ _Transform_Sha256_AVX1_Len: # Start of loop processing a block L_sha256_len_avx1_start: # X0, X1, X2, X3 = W[0..15] - vmovdqu (%rbp), %xmm0 - vmovdqu 16(%rbp), %xmm1 + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 - vmovdqu 32(%rbp), %xmm2 - vmovdqu 48(%rbp), %xmm3 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 movl %r9d, %ebx @@ -5311,8 +5308,8 @@ L_sha256_len_avx1_start: addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d - addq $0x40, %rbp - subl $0x40, %esi + addq $0x40, %rsi + subl $0x40, %ebp movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) @@ -5414,14 +5411,13 @@ _Transform_Sha256_AVX1_RORX: vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11 vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12 - leaq 32(%rdi), %rax # X0, X1, X2, X3 = W[0..15] - vmovdqu (%rax), %xmm0 - vmovdqu 16(%rax), %xmm1 + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 - vmovdqu 32(%rax), %xmm2 - vmovdqu 48(%rax), %xmm3 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 movl (%rdi), %r8d @@ -7759,8 +7755,7 @@ _Transform_Sha256_AVX1_RORX_Len: pushq %r14 pushq %r15 pushq %rbp - movq %rsi, %rbp - movq %rdx, %rsi + movq %rdx, %rbp subq $0x40, %rsp vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11 @@ -7776,12 +7771,12 @@ _Transform_Sha256_AVX1_RORX_Len: # Start of loop processing a block L_sha256_len_avx1_len_rorx_start: # X0, X1, X2, X3 = W[0..15] - vmovdqu (%rbp), %xmm0 - vmovdqu 16(%rbp), %xmm1 + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 - vmovdqu 32(%rbp), %xmm2 - vmovdqu 48(%rbp), %xmm3 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 # set_w_k_xfer_4: 0 @@ -10082,8 +10077,8 @@ L_sha256_len_avx1_len_rorx_start: addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d - addq $0x40, %rbp - subl $0x40, %esi + addq $0x40, %rsi + subl $0x40, %ebp movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) @@ -10152,7 +10147,6 @@ Transform_Sha256_AVX1_Sha: .p2align 4 _Transform_Sha256_AVX1_Sha: #endif /* __APPLE__ */ - leaq 32(%rdi), %rdx vmovdqa L_avx1_sha256_shuf_mask(%rip), %xmm10 vmovq (%rdi), %xmm1 vmovq 8(%rdi), %xmm2 @@ -10160,10 +10154,10 @@ _Transform_Sha256_AVX1_Sha: vmovhpd 24(%rdi), %xmm2, %xmm2 vpshufd $27, %xmm1, %xmm1 vpshufd $27, %xmm2, %xmm2 - vmovdqu (%rdx), %xmm3 - vmovdqu 16(%rdx), %xmm4 - vmovdqu 32(%rdx), %xmm5 - vmovdqu 48(%rdx), %xmm6 + vmovdqu (%rsi), %xmm3 + vmovdqu 16(%rsi), %xmm4 + vmovdqu 32(%rsi), %xmm5 + vmovdqu 48(%rsi), %xmm6 vpshufb %xmm10, %xmm3, %xmm3 vmovdqa %xmm1, %xmm8 vmovdqa %xmm2, %xmm9 @@ -10581,7 +10575,6 @@ _Transform_Sha256_AVX2: pushq %r14 pushq %r15 subq $0x200, %rsp - leaq 32(%rdi), %rax vmovdqa L_avx2_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11 vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12 @@ -10594,12 +10587,12 @@ _Transform_Sha256_AVX2: movl 24(%rdi), %r14d movl 28(%rdi), %r15d # X0, X1, X2, X3 = W[0..15] - vmovdqu (%rax), %xmm0 - vmovdqu 16(%rax), %xmm1 + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 - vmovdqu 32(%rax), %xmm2 - vmovdqu 48(%rax), %xmm3 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 movl %r9d, %ebx @@ -12971,13 +12964,12 @@ _Transform_Sha256_AVX2_Len: pushq %r14 pushq %r15 pushq %rbp - movq %rsi, %rbp - movq %rdx, %rsi + movq %rdx, %rbp subq $0x200, %rsp - testb $0x40, %sil + testb $0x40, %bpl je L_sha256_len_avx2_block - vmovdqu (%rbp), %ymm0 - vmovdqu 32(%rbp), %ymm1 + vmovdqu (%rsi), %ymm0 + vmovdqu 32(%rsi), %ymm1 vmovups %ymm0, 32(%rdi) vmovups %ymm1, 64(%rdi) #ifndef __APPLE__ @@ -12985,8 +12977,8 @@ _Transform_Sha256_AVX2_Len: #else call _Transform_Sha256_AVX2 #endif /* __APPLE__ */ - addq $0x40, %rbp - subl $0x40, %esi + addq $0x40, %rsi + subl $0x40, %ebp jz L_sha256_len_avx2_done L_sha256_len_avx2_block: vmovdqa L_avx2_sha256_flip_mask(%rip), %ymm13 @@ -13003,18 +12995,18 @@ L_sha256_len_avx2_block: # Start of loop processing two blocks L_sha256_len_avx2_start: # X0, X1, X2, X3 = W[0..15] - vmovdqu (%rbp), %xmm0 - vmovdqu 16(%rbp), %xmm1 - vmovdqu 64(%rbp), %xmm4 - vmovdqu 80(%rbp), %xmm5 + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 + vmovdqu 64(%rsi), %xmm4 + vmovdqu 80(%rsi), %xmm5 vinserti128 $0x01, %xmm4, %ymm0, %ymm0 vinserti128 $0x01, %xmm5, %ymm1, %ymm1 vpshufb %ymm13, %ymm0, %ymm0 vpshufb %ymm13, %ymm1, %ymm1 - vmovdqu 32(%rbp), %xmm2 - vmovdqu 48(%rbp), %xmm3 - vmovdqu 96(%rbp), %xmm6 - vmovdqu 112(%rbp), %xmm7 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 + vmovdqu 96(%rsi), %xmm6 + vmovdqu 112(%rsi), %xmm7 vinserti128 $0x01, %xmm6, %ymm2, %ymm2 vinserti128 $0x01, %xmm7, %ymm3, %ymm3 vpshufb %ymm13, %ymm2, %ymm2 @@ -17057,8 +17049,8 @@ L_sha256_len_avx2_start: addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d - addq $0x80, %rbp - subl $0x80, %esi + addq $0x80, %rsi + subl $0x80, %ebp movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) @@ -17177,21 +17169,20 @@ _Transform_Sha256_AVX2_RORX: pushq %r14 pushq %r15 subq $0x200, %rsp - leaq 32(%rdi), %rax vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %xmm13 vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11 vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12 # X0, X1, X2, X3 = W[0..15] - vmovdqu (%rax), %xmm0 - vmovdqu 16(%rax), %xmm1 + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 vpshufb %xmm13, %xmm0, %xmm0 vpshufb %xmm13, %xmm1, %xmm1 vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, (%rsp) vmovdqu %ymm5, 32(%rsp) - vmovdqu 32(%rax), %xmm2 - vmovdqu 48(%rax), %xmm3 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 vpshufb %xmm13, %xmm2, %xmm2 vpshufb %xmm13, %xmm3, %xmm3 vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 @@ -19542,13 +19533,12 @@ _Transform_Sha256_AVX2_RORX_Len: pushq %r14 pushq %r15 pushq %rbp - movq %rsi, %rbp - movq %rdx, %rsi + movq %rdx, %rbp subq $0x200, %rsp - testb $0x40, %sil + testb $0x40, %bpl je L_sha256_len_avx2_rorx_block - vmovdqu (%rbp), %ymm0 - vmovdqu 32(%rbp), %ymm1 + vmovdqu (%rsi), %ymm0 + vmovdqu 32(%rsi), %ymm1 vmovups %ymm0, 32(%rdi) vmovups %ymm1, 64(%rdi) #ifndef __APPLE__ @@ -19556,8 +19546,8 @@ _Transform_Sha256_AVX2_RORX_Len: #else call _Transform_Sha256_AVX2_RORX #endif /* __APPLE__ */ - addq $0x40, %rbp - subl $0x40, %esi + addq $0x40, %rsi + subl $0x40, %ebp jz L_sha256_len_avx2_rorx_done L_sha256_len_avx2_rorx_block: vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %ymm13 @@ -19574,20 +19564,20 @@ L_sha256_len_avx2_rorx_block: # Start of loop processing two blocks L_sha256_len_avx2_rorx_start: # X0, X1, X2, X3 = W[0..15] - vmovdqu (%rbp), %xmm0 - vmovdqu 16(%rbp), %xmm1 - vinserti128 $0x01, 64(%rbp), %ymm0, %ymm0 - vinserti128 $0x01, 80(%rbp), %ymm1, %ymm1 + vmovdqu (%rsi), %xmm0 + vmovdqu 16(%rsi), %xmm1 + vinserti128 $0x01, 64(%rsi), %ymm0, %ymm0 + vinserti128 $0x01, 80(%rsi), %ymm1, %ymm1 vpshufb %ymm13, %ymm0, %ymm0 vpshufb %ymm13, %ymm1, %ymm1 vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4 vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5 vmovdqu %ymm4, (%rsp) vmovdqu %ymm5, 32(%rsp) - vmovdqu 32(%rbp), %xmm2 - vmovdqu 48(%rbp), %xmm3 - vinserti128 $0x01, 96(%rbp), %ymm2, %ymm2 - vinserti128 $0x01, 112(%rbp), %ymm3, %ymm3 + vmovdqu 32(%rsi), %xmm2 + vmovdqu 48(%rsi), %xmm3 + vinserti128 $0x01, 96(%rsi), %ymm2, %ymm2 + vinserti128 $0x01, 112(%rsi), %ymm3, %ymm3 vpshufb %ymm13, %ymm2, %ymm2 vpshufb %ymm13, %ymm3, %ymm3 vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4 @@ -23449,7 +23439,7 @@ L_sha256_len_avx2_rorx_start: addl %edx, %r8d xorl %r10d, %eax addl %eax, %r8d - addq $0x80, %rbp + addq $0x80, %rsi addl (%rdi), %r8d addl 4(%rdi), %r9d addl 8(%rdi), %r10d @@ -23458,7 +23448,7 @@ L_sha256_len_avx2_rorx_start: addl 20(%rdi), %r13d addl 24(%rdi), %r14d addl 28(%rdi), %r15d - subl $0x80, %esi + subl $0x80, %ebp movl %r8d, (%rdi) movl %r9d, 4(%rdi) movl %r10d, 8(%rdi) diff --git a/wolfcrypt/src/wc_lms.c b/wolfcrypt/src/wc_lms.c index 60e8519d9..cdc732f6e 100644 --- a/wolfcrypt/src/wc_lms.c +++ b/wolfcrypt/src/wc_lms.c @@ -1,6 +1,6 @@ /* wc_lms.c * - * Copyright (C) 2006-2023 wolfSSL Inc. + * Copyright (C) 2006-2024 wolfSSL Inc. * * This file is part of wolfSSL. * diff --git a/wolfcrypt/src/wc_lms_impl.c b/wolfcrypt/src/wc_lms_impl.c new file mode 100644 index 000000000..dbd5ed68b --- /dev/null +++ b/wolfcrypt/src/wc_lms_impl.c @@ -0,0 +1,26 @@ +/* wc_lms_impl.c + * + * Copyright (C) 2006-2024 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#include + +#ifdef WOLFSSL_HAVE_LMS + #error "Contact wolfSSL to get the implementation of this file" +#endif diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 0b1a93c59..d1cecbd3a 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -324,6 +324,8 @@ const byte const_byte_array[] = "A+Gd\0\0\0"; #include #ifdef HAVE_LIBLMS #include +#else + #include #endif #endif #ifdef WOLFCRYPT_HAVE_ECCSI @@ -624,12 +626,14 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t scrypt_test(void); #endif #endif #if defined(WOLFSSL_HAVE_LMS) + #if !defined(WOLFSSL_SMALL_STACK) + #if defined(WOLFSSL_WC_LMS) && (LMS_MAX_HEIGHT >= 10) + WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test_verify_only(void); + #endif + #endif #if !defined(WOLFSSL_LMS_VERIFY_ONLY) WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test(void); #endif - #if defined(WOLFSSL_LMS_VERIFY_ONLY) && !defined(WOLFSSL_SMALL_STACK) - WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test_verify_only(void); - #endif #endif #ifdef WOLFCRYPT_HAVE_ECCSI WOLFSSL_TEST_SUBROUTINE wc_test_ret_t eccsi_test(void); @@ -1738,15 +1742,17 @@ options: [-s max_relative_stack_bytes] [-m max_relative_heap_memory_bytes]\n\ #endif /* if defined(WOLFSSL_HAVE_XMSS) */ #if defined(WOLFSSL_HAVE_LMS) - #if !defined(WOLFSSL_LMS_VERIFY_ONLY) - if ( (ret = lms_test()) != 0) - TEST_FAIL("LMS test failed!\n", ret); + #if !defined(WOLFSSL_SMALL_STACK) + #if defined(WOLFSSL_WC_LMS) && (LMS_MAX_HEIGHT >= 10) + if ( (ret = lms_test_verify_only()) != 0) + TEST_FAIL("LMS Vfy test failed!\n", ret); else - TEST_PASS("LMS test passed!\n"); + TEST_PASS("LMS Vfy test passed!\n"); + #endif #endif - #if defined(WOLFSSL_LMS_VERIFY_ONLY) && !defined(WOLFSSL_SMALL_STACK) - if ( (ret = lms_test_verify_only()) != 0) + #if !defined(WOLFSSL_LMS_VERIFY_ONLY) + if ( (ret = lms_test()) != 0) TEST_FAIL("LMS test failed!\n", ret); else TEST_PASS("LMS test passed!\n"); @@ -3239,6 +3245,35 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t sha256_test(void) } /* END LARGE HASH TEST */ #endif /* NO_LARGE_HASH_TEST */ +#if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_LMS_FULL_HASH) + unsigned char data_hb[WC_SHA256_BLOCK_SIZE] = { + 0x61, 0x62, 0x63, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18 + }; + + ret = wc_Sha256HashBlock(&sha, data_hb, hash); + if (ret != 0) { + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), exit); + } + if (XMEMCMP(hash, b.output, WC_SHA256_DIGEST_SIZE) != 0) { +{ + for (int ii = 0; ii < WC_SHA256_DIGEST_SIZE; ii++) + fprintf(stderr, " %02x", hash[ii]); + fprintf(stderr, "\n"); + for (int ii = 0; ii < WC_SHA256_DIGEST_SIZE; ii++) + fprintf(stderr, " %02x", b.output[ii]); + fprintf(stderr, "\n"); +} + ERROR_OUT(WC_TEST_RET_ENC_NC, exit); + } +#endif + exit: wc_Sha256Free(&sha); @@ -37393,7 +37428,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test(void) if (ret != 0) { return WC_TEST_RET_ENC_EC(ret); } if (sigSz != WC_TEST_LMS_SIG_LEN) { - printf("error: got %d, expected %d\n", sigSz, WC_TEST_LMS_SIG_LEN); + printf("error: got %u, expected %d\n", sigSz, WC_TEST_LMS_SIG_LEN); return WC_TEST_RET_ENC_EC(sigSz); } @@ -37427,7 +37462,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test(void) ret2 = wc_LmsKey_Verify(&verifyKey, sig, sigSz, (byte *) msg, msgSz); - if (ret2 != -1) { + if ((ret2 != -1) && (ret2 != SIG_VERIFY_E)) { /* Verify passed when it should have failed. */ return WC_TEST_RET_ENC_I(j); } @@ -37448,13 +37483,17 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test(void) wc_FreeRng(&rng); +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + XFREE(sig, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; } #endif /* if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_LMS_VERIFY_ONLY) */ -#if defined(WOLFSSL_HAVE_LMS) && defined(WOLFSSL_LMS_VERIFY_ONLY) && \ - !defined(WOLFSSL_SMALL_STACK) +#if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_SMALL_STACK) +#if defined(WOLFSSL_WC_LMS) && (LMS_MAX_HEIGHT >= 10) /* A simple LMS verify only test. * @@ -37468,7 +37507,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test(void) * */ /* "wolfSSL LMS example message!" without null terminator. */ -static const byte lms_msg[28] = +static byte lms_msg[28] = { 0x77,0x6F,0x6C,0x66,0x53,0x53,0x4C,0x20, 0x4C,0x4D,0x53,0x20,0x65,0x78,0x61,0x6D, @@ -37490,7 +37529,7 @@ static const byte lms_L1H10W8_pub[HSS_MAX_PUBLIC_KEY_LEN] = #define LMS_L1H10W8_SIGLEN (1456) -static const byte lms_L1H10W8_sig[LMS_L1H10W8_SIGLEN] = +static byte lms_L1H10W8_sig[LMS_L1H10W8_SIGLEN] = { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01, 0x00,0x00,0x00,0x04,0x18,0x70,0x09,0x2E, @@ -37713,7 +37752,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test_verify_only(void) if (ret != 0) { return WC_TEST_RET_ENC_EC(ret); } if (pubLen != HSS_MAX_PUBLIC_KEY_LEN) { - printf("error: got %d, expected %d\n", pubLen, HSS_MAX_PUBLIC_KEY_LEN); + printf("error: got %u, expected %d\n", pubLen, HSS_MAX_PUBLIC_KEY_LEN); return WC_TEST_RET_ENC_EC(pubLen); } @@ -37721,7 +37760,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test_verify_only(void) if (ret != 0) { return WC_TEST_RET_ENC_EC(ret); } if (sigSz != LMS_L1H10W8_SIGLEN) { - printf("error: got %d, expected %d\n", sigSz, LMS_L1H10W8_SIGLEN); + printf("error: got %u, expected %d\n", sigSz, LMS_L1H10W8_SIGLEN); return WC_TEST_RET_ENC_EC(sigSz); } @@ -37736,7 +37775,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test_verify_only(void) lms_msg[msgSz / 2] ^= 1; ret2 = wc_LmsKey_Verify(&verifyKey, lms_L1H10W8_sig, LMS_L1H10W8_SIGLEN, (byte *) lms_msg, msgSz); - if (ret2 != -1) { + if ((ret2 != -1) && (ret2 != SIG_VERIFY_E)) { printf("error: wc_LmsKey_Verify returned %d, expected -1\n", ret2); return WC_TEST_RET_ENC_EC(ret); } @@ -37758,7 +37797,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test_verify_only(void) ret2 = wc_LmsKey_Verify(&verifyKey, lms_L1H10W8_sig, LMS_L1H10W8_SIGLEN, (byte *) lms_msg, msgSz); - if (ret2 != -1) { + if ((ret2 != -1) && (ret2 != SIG_VERIFY_E)) { /* Verify passed when it should have failed. */ return WC_TEST_RET_ENC_I(j); } @@ -37771,8 +37810,8 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t lms_test_verify_only(void) return ret; } -#endif /* if defined(WOLFSSL_HAVE_LMS) && defined(WOLFSSL_LMS_VERIFY_ONLY) && - * !defined(WOLFSSL_SMALL_STACK) */ +#endif +#endif /* if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_SMALL_STACK) */ static const int fiducial3 = WC_TEST_RET_LN; /* source code reference point -- * see print_fiducials() below. diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index 38f6ba290..64043e3ba 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -3283,8 +3283,9 @@ extern void uITRON4_free(void *p) ; #define NO_SESSION_CACHE_REF #endif -/* (D)TLS v1.3 requires 64-bit number wrappers */ -#if defined(WOLFSSL_TLS13) || defined(WOLFSSL_DTLS_DROP_STATS) +/* (D)TLS v1.3 requires 64-bit number wrappers as does XMSS and LMS. */ +#if defined(WOLFSSL_TLS13) || defined(WOLFSSL_DTLS_DROP_STATS) || \ + defined(WOLFSSL_WC_XMSS) || defined(WOLFSSL_WC_LMS) #undef WOLFSSL_W64_WRAPPER #define WOLFSSL_W64_WRAPPER #endif diff --git a/wolfssl/wolfcrypt/sha256.h b/wolfssl/wolfcrypt/sha256.h index 3651dd31f..323c53adf 100644 --- a/wolfssl/wolfcrypt/sha256.h +++ b/wolfssl/wolfcrypt/sha256.h @@ -249,6 +249,10 @@ WOLFSSL_API void wc_Sha256Free(wc_Sha256* sha256); #if defined(OPENSSL_EXTRA) || defined(HAVE_CURL) WOLFSSL_API int wc_Sha256Transform(wc_Sha256* sha, const unsigned char* data); #endif +#if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_LMS_FULL_HASH) +WOLFSSL_API int wc_Sha256HashBlock(wc_Sha256* sha, const unsigned char* data, + unsigned char* hash); +#endif #if defined(WOLFSSL_HASH_KEEP) WOLFSSL_API int wc_Sha256_Grow(wc_Sha256* sha256, const byte* in, int inSz); #endif diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h index 692f11337..380fda6fa 100644 --- a/wolfssl/wolfcrypt/types.h +++ b/wolfssl/wolfcrypt/types.h @@ -1040,6 +1040,7 @@ typedef struct w64wrapper { DYNAMIC_TYPE_SPHINCS = 98, DYNAMIC_TYPE_SM4_BUFFER = 99, DYNAMIC_TYPE_DEBUG_TAG = 100, + DYNAMIC_TYPE_LMS = 101, DYNAMIC_TYPE_SNIFFER_SERVER = 1000, DYNAMIC_TYPE_SNIFFER_SESSION = 1001, DYNAMIC_TYPE_SNIFFER_PB = 1002, diff --git a/wolfssl/wolfcrypt/wc_lms.h b/wolfssl/wolfcrypt/wc_lms.h index a0e06e413..f51dad7bf 100644 --- a/wolfssl/wolfcrypt/wc_lms.h +++ b/wolfssl/wolfcrypt/wc_lms.h @@ -1,6 +1,6 @@ /* wc_lms.h * - * Copyright (C) 2006-2023 wolfSSL Inc. + * Copyright (C) 2006-2024 wolfSSL Inc. * * This file is part of wolfSSL. *