diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 44d95f26c..e8bcd38bc 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -255,7 +255,7 @@ INTIMEVER IOTSAFE_NO_GETDATA IOTSAFE_SIG_8BIT_LENGTH KCAPI_USE_XMALLOC -KYBER_NONDETERMINISTIC +MLKEM_NONDETERMINISTIC K_SERIES LIBWOLFSSL_VERSION_GIT_BRANCH LIBWOLFSSL_VERSION_GIT_HASH @@ -663,10 +663,9 @@ WOLFSSL_IMXRT_DCP WOLFSSL_ISOTP WOLFSSL_KEIL WOLFSSL_KEIL_NET -WOLFSSL_KYBER_INVNTT_UNROLL -WOLFSSL_KYBER_NO_LARGE_CODE -WOLFSSL_KYBER_NO_MALLOC -WOLFSSL_KYBER_NTT_UNROLL +WOLFSSL_KYBER_NO_DECAPSULATE +WOLFSSL_KYBER_NO_ENCAPSULATE +WOLFSSL_KYBER_NO_MAKE_KEY WOLFSSL_LIB WOLFSSL_LMS_CACHE_BITS WOLFSSL_LMS_FULL_HASH @@ -681,7 +680,11 @@ WOLFSSL_MAKE_SYSTEM_NAME_WSL WOLFSSL_MDK5 WOLFSSL_MEM_FAIL_COUNT WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM +WOLFSSL_MLKEM_INVNTT_UNROLL WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM +WOLFSSL_MLKEM_NO_LARGE_CODE +WOLFSSL_MLKEM_NO_MALLOC +WOLFSSL_MLKEM_NTT_UNROLL WOLFSSL_MONT_RED_CT WOLFSSL_MP_COND_COPY WOLFSSL_MP_INVMOD_CONSTANT_TIME diff --git a/CMakeLists.txt b/CMakeLists.txt index 563548390..98a7dadc1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -569,9 +569,9 @@ add_option(WOLFSSL_OQS "Enable integration with the OQS (Open Quantum Safe) liboqs library (default: disabled)" "no" "yes;no") -# Kyber -add_option(WOLFSSL_KYBER - "Enable the wolfSSL PQ Kyber library (default: disabled)" +# ML-KEM/Kyber +add_option(WOLFSSL_MMLKEM + "Enable the wolfSSL PQ ML-KEM library (default: disabled)" "no" "yes;no") # Experimental features @@ -620,8 +620,8 @@ if (WOLFSSL_EXPERIMENTAL) set(WOLFSSL_FOUND_EXPERIMENTAL_FEATURE 1) message(STATUS "Automatically set related requirements for Kyber:") - set_wolfssl_definitions("WOLFSSL_HAVE_KYBER" RESUlT) - set_wolfssl_definitions("WOLFSSL_WC_KYBER" RESUlT) + set_wolfssl_definitions("WOLFSSL_HAVE_MLKEM" RESUlT) + set_wolfssl_definitions("WOLFSSL_WC_MLKEM" RESUlT) set_wolfssl_definitions("WOLFSSL_SHA3" RESUlT) set_wolfssl_definitions("WOLFSSL_SHAKE128" RESUlT) set_wolfssl_definitions("WOLFSSL_SHAKE256" RESUlT) diff --git a/IDE/Espressif/ESP-IDF/examples/template/components/wolfssl/component.mk b/IDE/Espressif/ESP-IDF/examples/template/components/wolfssl/component.mk index 290563e69..f94bd617d 100644 --- a/IDE/Espressif/ESP-IDF/examples/template/components/wolfssl/component.mk +++ b/IDE/Espressif/ESP-IDF/examples/template/components/wolfssl/component.mk @@ -203,7 +203,7 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed25519.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed448.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/error.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/evp.o -# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_kyber.o +# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_mlkem.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_xmss.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/falcon.o @@ -266,8 +266,8 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/srp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/tfm.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_dsp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_encrypt.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber_poly.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem_poly.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_pkcs11.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_port.o diff --git a/IDE/Espressif/ESP-IDF/examples/template/components/wolfssl/include/user_settings.h b/IDE/Espressif/ESP-IDF/examples/template/components/wolfssl/include/user_settings.h index 7349338aa..4644c6217 100644 --- a/IDE/Espressif/ESP-IDF/examples/template/components/wolfssl/include/user_settings.h +++ b/IDE/Espressif/ESP-IDF/examples/template/components/wolfssl/include/user_settings.h @@ -213,8 +213,8 @@ #ifdef CONFIG_WOLFSSL_ENABLE_KYBER /* Kyber typically needs a minimum 10K stack */ #define WOLFSSL_EXPERIMENTAL_SETTINGS - #define WOLFSSL_HAVE_KYBER - #define WOLFSSL_WC_KYBER + #define WOLFSSL_HAVE_MLKEM + #define WOLFSSL_WC_MLKEM #define WOLFSSL_SHA3 #if defined(CONFIG_IDF_TARGET_ESP8266) /* With limited RAM, we'll disable some of the Kyber sizes: */ diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_benchmark/components/wolfssl/component.mk b/IDE/Espressif/ESP-IDF/examples/wolfssl_benchmark/components/wolfssl/component.mk index a7b5f3706..83f414e50 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_benchmark/components/wolfssl/component.mk +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_benchmark/components/wolfssl/component.mk @@ -203,7 +203,7 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed25519.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed448.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/error.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/evp.o -# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_kyber.o +# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_mlkem.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_xmss.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/falcon.o @@ -266,8 +266,8 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/srp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/tfm.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_dsp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_encrypt.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber_poly.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem_poly.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_pkcs11.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_port.o diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_benchmark/components/wolfssl/include/user_settings.h b/IDE/Espressif/ESP-IDF/examples/wolfssl_benchmark/components/wolfssl/include/user_settings.h index 7349338aa..4644c6217 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_benchmark/components/wolfssl/include/user_settings.h +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_benchmark/components/wolfssl/include/user_settings.h @@ -213,8 +213,8 @@ #ifdef CONFIG_WOLFSSL_ENABLE_KYBER /* Kyber typically needs a minimum 10K stack */ #define WOLFSSL_EXPERIMENTAL_SETTINGS - #define WOLFSSL_HAVE_KYBER - #define WOLFSSL_WC_KYBER + #define WOLFSSL_HAVE_MLKEM + #define WOLFSSL_WC_MLKEM #define WOLFSSL_SHA3 #if defined(CONFIG_IDF_TARGET_ESP8266) /* With limited RAM, we'll disable some of the Kyber sizes: */ diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/component.mk b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/component.mk index 290563e69..f94bd617d 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/component.mk +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/component.mk @@ -203,7 +203,7 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed25519.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed448.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/error.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/evp.o -# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_kyber.o +# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_mlkem.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_xmss.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/falcon.o @@ -266,8 +266,8 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/srp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/tfm.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_dsp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_encrypt.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber_poly.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem_poly.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_pkcs11.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_port.o diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/include/user_settings.h b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/include/user_settings.h index 7349338aa..4644c6217 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/include/user_settings.h +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/components/wolfssl/include/user_settings.h @@ -213,8 +213,8 @@ #ifdef CONFIG_WOLFSSL_ENABLE_KYBER /* Kyber typically needs a minimum 10K stack */ #define WOLFSSL_EXPERIMENTAL_SETTINGS - #define WOLFSSL_HAVE_KYBER - #define WOLFSSL_WC_KYBER + #define WOLFSSL_HAVE_MLKEM + #define WOLFSSL_WC_MLKEM #define WOLFSSL_SHA3 #if defined(CONFIG_IDF_TARGET_ESP8266) /* With limited RAM, we'll disable some of the Kyber sizes: */ diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/main/client-tls.c b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/main/client-tls.c index ea6972d7b..9a0bdc2ec 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/main/client-tls.c +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/main/client-tls.c @@ -41,9 +41,9 @@ #undef USE_WOLFSSL_ESP_SDK_WIFI #include -#if defined(WOLFSSL_WC_KYBER) - #include - #include +#if defined(WOLFSSL_WC_MLKEM) + #include + #include #endif #if defined(USE_CERT_BUFFERS_2048) || defined(USE_CERT_BUFFERS_1024) #include @@ -397,22 +397,22 @@ WOLFSSL_ESP_TASK tls_smp_client_task(void* args) ESP_LOGI(TAG, "tls_smp_client_task heap @ %p = %d", &this_heap, this_heap); #endif -#if defined(WOLFSSL_HAVE_KYBER) +#if defined(WOLFSSL_HAVE_MLKEM) #if defined(WOLFSSL_KYBER1024) - ESP_LOGI(TAG, "WOLFSSL_HAVE_KYBER is enabled, setting key share: " + ESP_LOGI(TAG, "WOLFSSL_HAVE_MLKEM is enabled, setting key share: " "WOLFSSL_P256_KYBER_LEVEL5"); ret_i = wolfSSL_UseKeyShare(ssl, WOLFSSL_P521_KYBER_LEVEL5); #elif defined(WOLFSSL_KYBER768) - ESP_LOGI(TAG, "WOLFSSL_HAVE_KYBER is enabled, setting key share: " + ESP_LOGI(TAG, "WOLFSSL_HAVE_MLKEM is enabled, setting key share: " "WOLFSSL_P256_KYBER_LEVEL3"); ret_i = wolfSSL_UseKeyShare(ssl, WOLFSSL_P256_KYBER_LEVEL3); #elif defined(WOLFSSL_KYBER512) /* This will typically be a low memory situation, such as ESP8266 */ - ESP_LOGI(TAG, "WOLFSSL_HAVE_KYBER is enabled, setting key share: " + ESP_LOGI(TAG, "WOLFSSL_HAVE_MLKEM is enabled, setting key share: " "WOLFSSL_P256_KYBER_LEVEL1"); ret_i = wolfSSL_UseKeyShare(ssl, WOLFSSL_P256_KYBER_LEVEL1); #else - ESP_LOGW(TAG, "WOLFSSL_HAVE_KYBER enabled but no key size available."); + ESP_LOGW(TAG, "WOLFSSL_HAVE_MLKEM enabled but no key size available."); ret_i = ESP_FAIL; #endif if (ret_i == WOLFSSL_SUCCESS) { @@ -422,7 +422,7 @@ WOLFSSL_ESP_TASK tls_smp_client_task(void* args) ESP_LOGE(TAG, "UseKeyShare Kyber failed"); } #else - ESP_LOGI(TAG, "WOLFSSL_HAVE_KYBER is not enabled"); + ESP_LOGI(TAG, "WOLFSSL_HAVE_MLKEM is not enabled"); #endif } diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/main/include/client-tls.h b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/main/include/client-tls.h index 88266142c..5df9f1474 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_client/main/include/client-tls.h +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_client/main/include/client-tls.h @@ -44,7 +44,7 @@ /* Reminder: Vanilla FreeRTOS is words, Espressif is bytes. */ #if defined(WOLFSSL_ESP8266) - #if defined(WOLFSSL_HAVE_KYBER) + #if defined(WOLFSSL_HAVE_MLKEM) /* Minimum ESP8266 stack size = 10K with Kyber. * Note there's a maximum not far away as Kyber needs heap * and the total DRAM is typically only 80KB total. */ @@ -54,7 +54,7 @@ #define TLS_SMP_CLIENT_TASK_BYTES (6 * 1024) #endif #else - #if defined(WOLFSSL_HAVE_KYBER) + #if defined(WOLFSSL_HAVE_MLKEM) /* Minimum ESP32 stack size = 12K with Kyber enabled. */ #define TLS_SMP_CLIENT_TASK_BYTES (12 * 1024) #else diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/component.mk b/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/component.mk index 290563e69..f94bd617d 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/component.mk +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/component.mk @@ -203,7 +203,7 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed25519.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed448.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/error.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/evp.o -# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_kyber.o +# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_mlkem.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_xmss.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/falcon.o @@ -266,8 +266,8 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/srp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/tfm.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_dsp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_encrypt.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber_poly.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem_poly.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_pkcs11.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_port.o diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/include/user_settings.h b/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/include/user_settings.h index 7349338aa..4644c6217 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/include/user_settings.h +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_server/components/wolfssl/include/user_settings.h @@ -213,8 +213,8 @@ #ifdef CONFIG_WOLFSSL_ENABLE_KYBER /* Kyber typically needs a minimum 10K stack */ #define WOLFSSL_EXPERIMENTAL_SETTINGS - #define WOLFSSL_HAVE_KYBER - #define WOLFSSL_WC_KYBER + #define WOLFSSL_HAVE_MLKEM + #define WOLFSSL_WC_MLKEM #define WOLFSSL_SHA3 #if defined(CONFIG_IDF_TARGET_ESP8266) /* With limited RAM, we'll disable some of the Kyber sizes: */ diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_server/main/server-tls.c b/IDE/Espressif/ESP-IDF/examples/wolfssl_server/main/server-tls.c index 8520249e6..da8f933c1 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_server/main/server-tls.c +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_server/main/server-tls.c @@ -54,9 +54,9 @@ #error "Missing WOLFSSL_USER_SETTINGS in CMakeLists or Makefile:\ CFLAGS +=-DWOLFSSL_USER_SETTINGS" #endif -#if defined(WOLFSSL_WC_KYBER) - #include - #include +#if defined(WOLFSSL_WC_MLKEM) + #include + #include #endif #if defined(USE_CERT_BUFFERS_2048) || defined(USE_CERT_BUFFERS_1024) #include @@ -329,7 +329,7 @@ WOLFSSL_ESP_TASK tls_smp_server_task(void *args) if ((ssl = wolfSSL_new(ctx)) == NULL) { ESP_LOGE(TAG, "ERROR: failed to create WOLFSSL object"); } -#if defined(WOLFSSL_HAVE_KYBER) +#if defined(WOLFSSL_HAVE_MLKEM) else { /* If success creating CTX and Kyber enabled, set key share: */ ret = wolfSSL_UseKeyShare(ssl, WOLFSSL_P521_KYBER_LEVEL5); @@ -341,7 +341,7 @@ WOLFSSL_ESP_TASK tls_smp_server_task(void *args) } } #else - ESP_LOGI(TAG, "WOLFSSL_HAVE_KYBER is not enabled, not using PQ."); + ESP_LOGI(TAG, "WOLFSSL_HAVE_MLKEM is not enabled, not using PQ."); #endif /* show what cipher connected for this WOLFSSL* object */ ShowCiphers(ssl); diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/component.mk b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/component.mk index 8865ec880..aacd62566 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/component.mk +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/component.mk @@ -203,7 +203,7 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed25519.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ed448.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/error.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/evp.o -# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_kyber.o +# COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_mlkem.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/ext_xmss.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/falcon.o @@ -266,8 +266,8 @@ COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/srp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/tfm.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_dsp.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_encrypt.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber.o -COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_kyber_poly.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem.o +COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_mlkem_poly.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_lms.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_pkcs11.o COMPONENT_OBJS += $(WOLFSSL_ROOT)/wolfcrypt/src/wc_port.o diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/include/user_settings.h b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/include/user_settings.h index 7349338aa..4644c6217 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/include/user_settings.h +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/include/user_settings.h @@ -213,8 +213,8 @@ #ifdef CONFIG_WOLFSSL_ENABLE_KYBER /* Kyber typically needs a minimum 10K stack */ #define WOLFSSL_EXPERIMENTAL_SETTINGS - #define WOLFSSL_HAVE_KYBER - #define WOLFSSL_WC_KYBER + #define WOLFSSL_HAVE_MLKEM + #define WOLFSSL_WC_MLKEM #define WOLFSSL_SHA3 #if defined(CONFIG_IDF_TARGET_ESP8266) /* With limited RAM, we'll disable some of the Kyber sizes: */ diff --git a/IDE/MCUEXPRESSO/RT1170/wolfssl_cm7/.cproject b/IDE/MCUEXPRESSO/RT1170/wolfssl_cm7/.cproject index 0a346c114..ab827e198 100644 --- a/IDE/MCUEXPRESSO/RT1170/wolfssl_cm7/.cproject +++ b/IDE/MCUEXPRESSO/RT1170/wolfssl_cm7/.cproject @@ -241,7 +241,7 @@ - + @@ -489,7 +489,7 @@ - + @@ -565,4 +565,4 @@ - \ No newline at end of file + diff --git a/IDE/STM32Cube/default_conf.ftl b/IDE/STM32Cube/default_conf.ftl index c5215604f..988fe05fc 100644 --- a/IDE/STM32Cube/default_conf.ftl +++ b/IDE/STM32Cube/default_conf.ftl @@ -588,11 +588,11 @@ extern ${variable.value} ${variable.name}; #undef WOLFSSL_EXPERIMENTAL_SETTINGS #define WOLFSSL_EXPERIMENTAL_SETTINGS - #undef WOLFSSL_HAVE_KYBER - #define WOLFSSL_HAVE_KYBER + #undef WOLFSSL_HAVE_MLKEM + #define WOLFSSL_HAVE_MLKEM - #undef WOLFSSL_WC_KYBER - #define WOLFSSL_WC_KYBER + #undef WOLFSSL_WC_MLKEM + #define WOLFSSL_WC_MLKEM #undef WOLFSSL_NO_SHAKE128 #undef WOLFSSL_SHAKE128 diff --git a/IDE/STM32Cube/wolfssl_example.c b/IDE/STM32Cube/wolfssl_example.c index ae696c4c3..9d6ec5b92 100644 --- a/IDE/STM32Cube/wolfssl_example.c +++ b/IDE/STM32Cube/wolfssl_example.c @@ -1750,7 +1750,7 @@ static int tls13_uart_client(void) wolfSSL_SetIOReadCtx(ssl, tbuf); -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM #ifndef WOLFSSL_NO_ML_KEM if (wolfSSL_UseKeyShare(ssl, WOLFSSL_ML_KEM_512) != WOLFSSL_SUCCESS) { printf("wolfSSL_UseKeyShare Error!!"); diff --git a/IDE/XCODE/wolfssl-FIPS.xcodeproj/project.pbxproj b/IDE/XCODE/wolfssl-FIPS.xcodeproj/project.pbxproj index 63c889fe5..c96018c5a 100644 --- a/IDE/XCODE/wolfssl-FIPS.xcodeproj/project.pbxproj +++ b/IDE/XCODE/wolfssl-FIPS.xcodeproj/project.pbxproj @@ -125,7 +125,7 @@ 700F0CF52A2FC11300755BA7 /* eccsi.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CDB2A2FC0D500755BA7 /* eccsi.h */; }; 700F0CF62A2FC11300755BA7 /* ed448.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CD22A2FC0D500755BA7 /* ed448.h */; }; 700F0CF72A2FC11300755BA7 /* ed25519.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CE12A2FC0D500755BA7 /* ed25519.h */; }; - 700F0CF82A2FC11300755BA7 /* ext_kyber.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CD52A2FC0D500755BA7 /* ext_kyber.h */; }; + 700F0CF82A2FC11300755BA7 /* ext_mlkem.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CD52A2FC0D500755BA7 /* ext_mlkem.h */; }; 700F0CF92A2FC11300755BA7 /* falcon.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CDD2A2FC0D500755BA7 /* falcon.h */; }; 700F0CFA2A2FC11300755BA7 /* fe_448.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CDE2A2FC0D500755BA7 /* fe_448.h */; }; 700F0CFB2A2FC11300755BA7 /* fe_operations.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CC72A2FC0D400755BA7 /* fe_operations.h */; }; @@ -133,7 +133,7 @@ 700F0CFD2A2FC11300755BA7 /* ge_448.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CE22A2FC0D500755BA7 /* ge_448.h */; }; 700F0CFE2A2FC11300755BA7 /* ge_operations.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CCA2A2FC0D500755BA7 /* ge_operations.h */; }; 700F0CFF2A2FC11300755BA7 /* hpke.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CCD2A2FC0D500755BA7 /* hpke.h */; }; - 700F0D002A2FC11300755BA7 /* kyber.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CCC2A2FC0D500755BA7 /* kyber.h */; }; + 700F0D002A2FC11300755BA7 /* mlkem.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CCC2A2FC0D500755BA7 /* mlkem.h */; }; 700F0D012A2FC11300755BA7 /* mem_track.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CDA2A2FC0D500755BA7 /* mem_track.h */; }; 700F0D022A2FC11300755BA7 /* pkcs11.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CD92A2FC0D500755BA7 /* pkcs11.h */; }; 700F0D032A2FC11300755BA7 /* pkcs12.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CE42A2FC0D500755BA7 /* pkcs12.h */; }; @@ -147,7 +147,7 @@ 700F0D0B2A2FC11300755BA7 /* sp.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CD12A2FC0D500755BA7 /* sp.h */; }; 700F0D0C2A2FC11300755BA7 /* sphincs.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CEC2A2FC0D500755BA7 /* sphincs.h */; }; 700F0D0D2A2FC11300755BA7 /* srp.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CDC2A2FC0D500755BA7 /* srp.h */; }; - 700F0D0E2A2FC11300755BA7 /* wc_kyber.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CD82A2FC0D500755BA7 /* wc_kyber.h */; }; + 700F0D0E2A2FC11300755BA7 /* wc_mlkem.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CD82A2FC0D500755BA7 /* wc_mlkem.h */; }; 700F0D0F2A2FC11300755BA7 /* wc_pkcs11.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CE82A2FC0D500755BA7 /* wc_pkcs11.h */; }; 700F0D102A2FC11300755BA7 /* wolfevent.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CD62A2FC0D500755BA7 /* wolfevent.h */; }; 700F0D112A2FC11300755BA7 /* wolfmath.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0CEB2A2FC0D500755BA7 /* wolfmath.h */; }; @@ -285,7 +285,7 @@ 700F0CF52A2FC11300755BA7 /* eccsi.h in CopyFiles */, 700F0CF62A2FC11300755BA7 /* ed448.h in CopyFiles */, 700F0CF72A2FC11300755BA7 /* ed25519.h in CopyFiles */, - 700F0CF82A2FC11300755BA7 /* ext_kyber.h in CopyFiles */, + 700F0CF82A2FC11300755BA7 /* ext_mlkem.h in CopyFiles */, 700F0CF92A2FC11300755BA7 /* falcon.h in CopyFiles */, 700F0CFA2A2FC11300755BA7 /* fe_448.h in CopyFiles */, 700F0CFB2A2FC11300755BA7 /* fe_operations.h in CopyFiles */, @@ -293,7 +293,7 @@ 700F0CFD2A2FC11300755BA7 /* ge_448.h in CopyFiles */, 700F0CFE2A2FC11300755BA7 /* ge_operations.h in CopyFiles */, 700F0CFF2A2FC11300755BA7 /* hpke.h in CopyFiles */, - 700F0D002A2FC11300755BA7 /* kyber.h in CopyFiles */, + 700F0D002A2FC11300755BA7 /* mlkem.h in CopyFiles */, 700F0D012A2FC11300755BA7 /* mem_track.h in CopyFiles */, 700F0D022A2FC11300755BA7 /* pkcs11.h in CopyFiles */, 700F0D032A2FC11300755BA7 /* pkcs12.h in CopyFiles */, @@ -307,7 +307,7 @@ 700F0D0B2A2FC11300755BA7 /* sp.h in CopyFiles */, 700F0D0C2A2FC11300755BA7 /* sphincs.h in CopyFiles */, 700F0D0D2A2FC11300755BA7 /* srp.h in CopyFiles */, - 700F0D0E2A2FC11300755BA7 /* wc_kyber.h in CopyFiles */, + 700F0D0E2A2FC11300755BA7 /* wc_mlkem.h in CopyFiles */, 700F0D0F2A2FC11300755BA7 /* wc_pkcs11.h in CopyFiles */, 700F0D102A2FC11300755BA7 /* wolfevent.h in CopyFiles */, 700F0D112A2FC11300755BA7 /* wolfmath.h in CopyFiles */, @@ -563,7 +563,7 @@ 700F0CC92A2FC0D500755BA7 /* selftest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = selftest.h; path = ../../wolfssl/wolfcrypt/selftest.h; sourceTree = ""; }; 700F0CCA2A2FC0D500755BA7 /* ge_operations.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ge_operations.h; path = ../../wolfssl/wolfcrypt/ge_operations.h; sourceTree = ""; }; 700F0CCB2A2FC0D500755BA7 /* async.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = async.h; path = ../../wolfssl/wolfcrypt/async.h; sourceTree = ""; }; - 700F0CCC2A2FC0D500755BA7 /* kyber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = kyber.h; path = ../../wolfssl/wolfcrypt/kyber.h; sourceTree = ""; }; + 700F0CCC2A2FC0D500755BA7 /* mlkem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mlkem.h; path = ../../wolfssl/wolfcrypt/mlkem.h; sourceTree = ""; }; 700F0CCD2A2FC0D500755BA7 /* hpke.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = hpke.h; path = ../../wolfssl/wolfcrypt/hpke.h; sourceTree = ""; }; 700F0CCE2A2FC0D500755BA7 /* cpuid.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = cpuid.h; path = ../../wolfssl/wolfcrypt/cpuid.h; sourceTree = ""; }; 700F0CCF2A2FC0D500755BA7 /* fips.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fips.h; path = ../../wolfssl/wolfcrypt/fips.h; sourceTree = ""; }; @@ -572,10 +572,10 @@ 700F0CD22A2FC0D500755BA7 /* ed448.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ed448.h; path = ../../wolfssl/wolfcrypt/ed448.h; sourceTree = ""; }; 700F0CD32A2FC0D500755BA7 /* curve448.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = curve448.h; path = ../../wolfssl/wolfcrypt/curve448.h; sourceTree = ""; }; 700F0CD42A2FC0D500755BA7 /* siphash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = siphash.h; path = ../../wolfssl/wolfcrypt/siphash.h; sourceTree = ""; }; - 700F0CD52A2FC0D500755BA7 /* ext_kyber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ext_kyber.h; path = ../../wolfssl/wolfcrypt/ext_kyber.h; sourceTree = ""; }; + 700F0CD52A2FC0D500755BA7 /* ext_mlkem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ext_mlkem.h; path = ../../wolfssl/wolfcrypt/ext_mlkem.h; sourceTree = ""; }; 700F0CD62A2FC0D500755BA7 /* wolfevent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = wolfevent.h; path = ../../wolfssl/wolfcrypt/wolfevent.h; sourceTree = ""; }; 700F0CD72A2FC0D500755BA7 /* cmac.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = cmac.h; path = ../../wolfssl/wolfcrypt/cmac.h; sourceTree = ""; }; - 700F0CD82A2FC0D500755BA7 /* wc_kyber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = wc_kyber.h; path = ../../wolfssl/wolfcrypt/wc_kyber.h; sourceTree = ""; }; + 700F0CD82A2FC0D500755BA7 /* wc_mlkem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = wc_mlkem.h; path = ../../wolfssl/wolfcrypt/wc_mlkem.h; sourceTree = ""; }; 700F0CD92A2FC0D500755BA7 /* pkcs11.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pkcs11.h; path = ../../wolfssl/wolfcrypt/pkcs11.h; sourceTree = ""; }; 700F0CDA2A2FC0D500755BA7 /* mem_track.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mem_track.h; path = ../../wolfssl/wolfcrypt/mem_track.h; sourceTree = ""; }; 700F0CDB2A2FC0D500755BA7 /* eccsi.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = eccsi.h; path = ../../wolfssl/wolfcrypt/eccsi.h; sourceTree = ""; }; @@ -643,7 +643,7 @@ 700F0CDB2A2FC0D500755BA7 /* eccsi.h */, 700F0CD22A2FC0D500755BA7 /* ed448.h */, 700F0CE12A2FC0D500755BA7 /* ed25519.h */, - 700F0CD52A2FC0D500755BA7 /* ext_kyber.h */, + 700F0CD52A2FC0D500755BA7 /* ext_mlkem.h */, 700F0CDD2A2FC0D500755BA7 /* falcon.h */, 700F0CDE2A2FC0D500755BA7 /* fe_448.h */, 700F0CC72A2FC0D400755BA7 /* fe_operations.h */, @@ -651,7 +651,7 @@ 700F0CE22A2FC0D500755BA7 /* ge_448.h */, 700F0CCA2A2FC0D500755BA7 /* ge_operations.h */, 700F0CCD2A2FC0D500755BA7 /* hpke.h */, - 700F0CCC2A2FC0D500755BA7 /* kyber.h */, + 700F0CCC2A2FC0D500755BA7 /* mlkem.h */, 700F0CDA2A2FC0D500755BA7 /* mem_track.h */, 700F0CD92A2FC0D500755BA7 /* pkcs11.h */, 700F0CE42A2FC0D500755BA7 /* pkcs12.h */, @@ -665,7 +665,7 @@ 700F0CD12A2FC0D500755BA7 /* sp.h */, 700F0CEC2A2FC0D500755BA7 /* sphincs.h */, 700F0CDC2A2FC0D500755BA7 /* srp.h */, - 700F0CD82A2FC0D500755BA7 /* wc_kyber.h */, + 700F0CD82A2FC0D500755BA7 /* wc_mlkem.h */, 700F0CE82A2FC0D500755BA7 /* wc_pkcs11.h */, 700F0CD62A2FC0D500755BA7 /* wolfevent.h */, 700F0CEB2A2FC0D500755BA7 /* wolfmath.h */, diff --git a/IDE/XCODE/wolfssl.xcodeproj/project.pbxproj b/IDE/XCODE/wolfssl.xcodeproj/project.pbxproj index 33c55dcc4..1b5787ddc 100644 --- a/IDE/XCODE/wolfssl.xcodeproj/project.pbxproj +++ b/IDE/XCODE/wolfssl.xcodeproj/project.pbxproj @@ -256,7 +256,7 @@ 700F0C0D2A2FBC5100755BA7 /* eccsi.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BF72A2FBC1600755BA7 /* eccsi.h */; }; 700F0C0E2A2FBC5100755BA7 /* ed448.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BF82A2FBC1600755BA7 /* ed448.h */; }; 700F0C0F2A2FBC5100755BA7 /* ed25519.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BF42A2FBC1600755BA7 /* ed25519.h */; }; - 700F0C102A2FBC5100755BA7 /* ext_kyber.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BF92A2FBC1600755BA7 /* ext_kyber.h */; }; + 700F0C102A2FBC5100755BA7 /* ext_mlkem.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BF92A2FBC1600755BA7 /* ext_mlkem.h */; }; 700F0C112A2FBC5100755BA7 /* falcon.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0C022A2FBC1600755BA7 /* falcon.h */; }; 700F0C122A2FBC5100755BA7 /* fe_448.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BEB2A2FBC1500755BA7 /* fe_448.h */; }; 700F0C132A2FBC5100755BA7 /* fe_operations.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BF62A2FBC1600755BA7 /* fe_operations.h */; }; @@ -264,7 +264,7 @@ 700F0C152A2FBC5100755BA7 /* ge_448.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BE72A2FBC1500755BA7 /* ge_448.h */; }; 700F0C162A2FBC5100755BA7 /* ge_operations.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0C012A2FBC1600755BA7 /* ge_operations.h */; }; 700F0C172A2FBC5100755BA7 /* hpke.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BE12A2FBC1500755BA7 /* hpke.h */; }; - 700F0C182A2FBC5100755BA7 /* kyber.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BEA2A2FBC1500755BA7 /* kyber.h */; }; + 700F0C182A2FBC5100755BA7 /* mlkem.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BEA2A2FBC1500755BA7 /* mlkem.h */; }; 700F0C192A2FBC5100755BA7 /* pkcs11.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BFD2A2FBC1600755BA7 /* pkcs11.h */; }; 700F0C1A2A2FBC5100755BA7 /* pkcs12.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BEC2A2FBC1500755BA7 /* pkcs12.h */; }; 700F0C1B2A2FBC5100755BA7 /* rc2.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BE02A2FBC1500755BA7 /* rc2.h */; }; @@ -277,7 +277,7 @@ 700F0C222A2FBC5100755BA7 /* sp.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BE92A2FBC1500755BA7 /* sp.h */; }; 700F0C232A2FBC5100755BA7 /* sphincs.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BE22A2FBC1500755BA7 /* sphincs.h */; }; 700F0C242A2FBC5100755BA7 /* srp.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BF32A2FBC1600755BA7 /* srp.h */; }; - 700F0C252A2FBC5100755BA7 /* wc_kyber.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BFF2A2FBC1600755BA7 /* wc_kyber.h */; }; + 700F0C252A2FBC5100755BA7 /* wc_mlkem.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BFF2A2FBC1600755BA7 /* wc_mlkem.h */; }; 700F0C262A2FBC5100755BA7 /* wc_pkcs11.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BF52A2FBC1600755BA7 /* wc_pkcs11.h */; }; 700F0C272A2FBC5100755BA7 /* wolfevent.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 700F0BE62A2FBC1500755BA7 /* wolfevent.h */; }; 700F0C282A2FBC5100755BA7 /* kdf.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 6AC8513B272CB04F00F2B32A /* kdf.h */; }; @@ -625,7 +625,7 @@ 700F0C0D2A2FBC5100755BA7 /* eccsi.h in CopyFiles */, 700F0C0E2A2FBC5100755BA7 /* ed448.h in CopyFiles */, 700F0C0F2A2FBC5100755BA7 /* ed25519.h in CopyFiles */, - 700F0C102A2FBC5100755BA7 /* ext_kyber.h in CopyFiles */, + 700F0C102A2FBC5100755BA7 /* ext_mlkem.h in CopyFiles */, 700F0C112A2FBC5100755BA7 /* falcon.h in CopyFiles */, 700F0C122A2FBC5100755BA7 /* fe_448.h in CopyFiles */, 700F0C132A2FBC5100755BA7 /* fe_operations.h in CopyFiles */, @@ -633,7 +633,7 @@ 700F0C152A2FBC5100755BA7 /* ge_448.h in CopyFiles */, 700F0C162A2FBC5100755BA7 /* ge_operations.h in CopyFiles */, 700F0C172A2FBC5100755BA7 /* hpke.h in CopyFiles */, - 700F0C182A2FBC5100755BA7 /* kyber.h in CopyFiles */, + 700F0C182A2FBC5100755BA7 /* mlkem.h in CopyFiles */, 700F0C192A2FBC5100755BA7 /* pkcs11.h in CopyFiles */, 700F0C1A2A2FBC5100755BA7 /* pkcs12.h in CopyFiles */, 700F0C1B2A2FBC5100755BA7 /* rc2.h in CopyFiles */, @@ -646,7 +646,7 @@ 700F0C222A2FBC5100755BA7 /* sp.h in CopyFiles */, 700F0C232A2FBC5100755BA7 /* sphincs.h in CopyFiles */, 700F0C242A2FBC5100755BA7 /* srp.h in CopyFiles */, - 700F0C252A2FBC5100755BA7 /* wc_kyber.h in CopyFiles */, + 700F0C252A2FBC5100755BA7 /* wc_mlkem.h in CopyFiles */, 700F0C262A2FBC5100755BA7 /* wc_pkcs11.h in CopyFiles */, 700F0C272A2FBC5100755BA7 /* wolfevent.h in CopyFiles */, 700F0C282A2FBC5100755BA7 /* kdf.h in CopyFiles */, @@ -985,7 +985,7 @@ 700F0BE72A2FBC1500755BA7 /* ge_448.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ge_448.h; path = ../../wolfssl/wolfcrypt/ge_448.h; sourceTree = ""; }; 700F0BE82A2FBC1500755BA7 /* sp_int.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sp_int.h; path = ../../wolfssl/wolfcrypt/sp_int.h; sourceTree = ""; }; 700F0BE92A2FBC1500755BA7 /* sp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sp.h; path = ../../wolfssl/wolfcrypt/sp.h; sourceTree = ""; }; - 700F0BEA2A2FBC1500755BA7 /* kyber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = kyber.h; path = ../../wolfssl/wolfcrypt/kyber.h; sourceTree = ""; }; + 700F0BEA2A2FBC1500755BA7 /* mlkem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mlkem.h; path = ../../wolfssl/wolfcrypt/mlkem.h; sourceTree = ""; }; 700F0BEB2A2FBC1500755BA7 /* fe_448.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fe_448.h; path = ../../wolfssl/wolfcrypt/fe_448.h; sourceTree = ""; }; 700F0BEC2A2FBC1500755BA7 /* pkcs12.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pkcs12.h; path = ../../wolfssl/wolfcrypt/pkcs12.h; sourceTree = ""; }; 700F0BED2A2FBC1500755BA7 /* chacha20_poly1305.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chacha20_poly1305.h; path = ../../wolfssl/wolfcrypt/chacha20_poly1305.h; sourceTree = ""; }; @@ -1000,13 +1000,13 @@ 700F0BF62A2FBC1600755BA7 /* fe_operations.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fe_operations.h; path = ../../wolfssl/wolfcrypt/fe_operations.h; sourceTree = ""; }; 700F0BF72A2FBC1600755BA7 /* eccsi.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = eccsi.h; path = ../../wolfssl/wolfcrypt/eccsi.h; sourceTree = ""; }; 700F0BF82A2FBC1600755BA7 /* ed448.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ed448.h; path = ../../wolfssl/wolfcrypt/ed448.h; sourceTree = ""; }; - 700F0BF92A2FBC1600755BA7 /* ext_kyber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ext_kyber.h; path = ../../wolfssl/wolfcrypt/ext_kyber.h; sourceTree = ""; }; + 700F0BF92A2FBC1600755BA7 /* ext_mlkem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ext_mlkem.h; path = ../../wolfssl/wolfcrypt/ext_mlkem.h; sourceTree = ""; }; 700F0BFA2A2FBC1600755BA7 /* sha3.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sha3.h; path = ../../wolfssl/wolfcrypt/sha3.h; sourceTree = ""; }; 700F0BFB2A2FBC1600755BA7 /* signature.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = signature.h; path = ../../wolfssl/wolfcrypt/signature.h; sourceTree = ""; }; 700F0BFC2A2FBC1600755BA7 /* cmac.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = cmac.h; path = ../../wolfssl/wolfcrypt/cmac.h; sourceTree = ""; }; 700F0BFD2A2FBC1600755BA7 /* pkcs11.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pkcs11.h; path = ../../wolfssl/wolfcrypt/pkcs11.h; sourceTree = ""; }; 700F0BFE2A2FBC1600755BA7 /* siphash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = siphash.h; path = ../../wolfssl/wolfcrypt/siphash.h; sourceTree = ""; }; - 700F0BFF2A2FBC1600755BA7 /* wc_kyber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = wc_kyber.h; path = ../../wolfssl/wolfcrypt/wc_kyber.h; sourceTree = ""; }; + 700F0BFF2A2FBC1600755BA7 /* wc_mlkem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = wc_mlkem.h; path = ../../wolfssl/wolfcrypt/wc_mlkem.h; sourceTree = ""; }; 700F0C002A2FBC1600755BA7 /* fips.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fips.h; path = ../../wolfssl/wolfcrypt/fips.h; sourceTree = ""; }; 700F0C012A2FBC1600755BA7 /* ge_operations.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ge_operations.h; path = ../../wolfssl/wolfcrypt/ge_operations.h; sourceTree = ""; }; 700F0C022A2FBC1600755BA7 /* falcon.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = falcon.h; path = ../../wolfssl/wolfcrypt/falcon.h; sourceTree = ""; }; @@ -1157,7 +1157,7 @@ 700F0BF72A2FBC1600755BA7 /* eccsi.h */, 700F0BF82A2FBC1600755BA7 /* ed448.h */, 700F0BF42A2FBC1600755BA7 /* ed25519.h */, - 700F0BF92A2FBC1600755BA7 /* ext_kyber.h */, + 700F0BF92A2FBC1600755BA7 /* ext_mlkem.h */, 700F0C022A2FBC1600755BA7 /* falcon.h */, 700F0BEB2A2FBC1500755BA7 /* fe_448.h */, 700F0BF62A2FBC1600755BA7 /* fe_operations.h */, @@ -1165,7 +1165,7 @@ 700F0BE72A2FBC1500755BA7 /* ge_448.h */, 700F0C012A2FBC1600755BA7 /* ge_operations.h */, 700F0BE12A2FBC1500755BA7 /* hpke.h */, - 700F0BEA2A2FBC1500755BA7 /* kyber.h */, + 700F0BEA2A2FBC1500755BA7 /* mlkem.h */, 700F0BFD2A2FBC1600755BA7 /* pkcs11.h */, 700F0BEC2A2FBC1500755BA7 /* pkcs12.h */, 700F0BE02A2FBC1500755BA7 /* rc2.h */, @@ -1178,7 +1178,7 @@ 700F0BE92A2FBC1500755BA7 /* sp.h */, 700F0BE22A2FBC1500755BA7 /* sphincs.h */, 700F0BF32A2FBC1600755BA7 /* srp.h */, - 700F0BFF2A2FBC1600755BA7 /* wc_kyber.h */, + 700F0BFF2A2FBC1600755BA7 /* wc_mlkem.h */, 700F0BF52A2FBC1600755BA7 /* wc_pkcs11.h */, 700F0BE62A2FBC1500755BA7 /* wolfevent.h */, 5216465E1A8993770062516A /* aes.h */, diff --git a/cmake/functions.cmake b/cmake/functions.cmake index c36219400..f43ebf09b 100644 --- a/cmake/functions.cmake +++ b/cmake/functions.cmake @@ -812,16 +812,16 @@ function(generate_lib_src_list LIB_SOURCES) endif() if(BUILD_WC_KYBER) - list(APPEND LIB_SOURCES wolfcrypt/src/wc_kyber.c) - list(APPEND LIB_SOURCES wolfcrypt/src/wc_kyber_poly.c) + list(APPEND LIB_SOURCES wolfcrypt/src/wc_mlkem.c) + list(APPEND LIB_SOURCES wolfcrypt/src/wc_mlkem_poly.c) if(BUILD_INTELASM) - list(APPEND LIB_SOURCES wolfcrypt/src/wc_kyber_asm.S) + list(APPEND LIB_SOURCES wolfcrypt/src/wc_mlkem_asm.S) endif() endif() if(BUILD_EXT_KYBER) - list(APPEND LIB_SOURCES wolfcrypt/src/ext_kyber.c) + list(APPEND LIB_SOURCES wolfcrypt/src/ext_mlkem.c) endif() if(BUILD_WC_LMS) diff --git a/cmake/options.h.in b/cmake/options.h.in index 13e56625c..fb7570a83 100644 --- a/cmake/options.h.in +++ b/cmake/options.h.in @@ -366,10 +366,10 @@ extern "C" { #cmakedefine NO_DES3_TLS_SUITES #undef WOLFSSL_EXPERIMENTAL_SETTINGS #cmakedefine WOLFSSL_EXPERIMENTAL_SETTINGS -#undef WOLFSSL_HAVE_KYBER -#cmakedefine WOLFSSL_HAVE_KYBER -#undef WOLFSSL_WC_KYBER -#cmakedefine WOLFSSL_WC_KYBER +#undef WOLFSSL_HAVE_MLKEM +#cmakedefine WOLFSSL_HAVE_MLKEM +#undef WOLFSSL_WC_MLKEM +#cmakedefine WOLFSSL_WC_MLKEM #undef NO_WOLFSSL_STUB #cmakedefine NO_WOLFSSL_STUB #undef HAVE_ECC_SECPR2 diff --git a/configure.ac b/configure.ac index d89927f5d..91051973c 100644 --- a/configure.ac +++ b/configure.ac @@ -1394,93 +1394,98 @@ AC_ARG_WITH([liboqs], ) -# KYBER +# MLKEM # Used: # - SHA3, Shake128 and Shake256 AC_ARG_ENABLE([kyber], - [AS_HELP_STRING([--enable-kyber],[Enable KYBER (default: disabled)])], - [ ENABLED_KYBER=$enableval ], - [ ENABLED_KYBER=no ] + [AS_HELP_STRING([--enable-kyber],[Enable MLKEM (default: disabled)])], + [ ENABLED_MLKEM=$enableval ], + [ ENABLED_MLKEM=no ] + ) +AC_ARG_ENABLE([mlkem], + [AS_HELP_STRING([--enable-kyber],[Enable MLKEM (default: disabled)])], + [ ENABLED_MLKEM=$enableval ], + [ ENABLED_MLKEM=no ] ) -ENABLED_WC_KYBER=no +ENABLED_WC_MLKEM=no ENABLED_ML_KEM=unset -ENABLED_KYBER_MAKE_KEY=no -ENABLED_KYBER_ENCAPSULATE=no -ENABLED_KYBER_DECAPSULATE=no -for v in `echo $ENABLED_KYBER | tr "," " "` +ENABLED_MLKEM_MAKE_KEY=no +ENABLED_MLKEM_ENCAPSULATE=no +ENABLED_MLKEM_DECAPSULATE=no +for v in `echo $ENABLED_MLKEM | tr "," " "` do case $v in yes) - ENABLED_KYBER512=yes - ENABLED_KYBER768=yes - ENABLED_KYBER1024=yes - ENABLED_KYBER_MAKE_KEY=yes - ENABLED_KYBER_ENCAPSULATE=yes - ENABLED_KYBER_DECAPSULATE=yes + ENABLED_MLKEM512=yes + ENABLED_MLKEM768=yes + ENABLED_MLKEM1024=yes + ENABLED_MLKEM_MAKE_KEY=yes + ENABLED_MLKEM_ENCAPSULATE=yes + ENABLED_MLKEM_DECAPSULATE=yes ;; no) ;; small) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_KYBER_SMALL" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_MLKEM_SMALL" ;; cache-a) AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_MLKEM_CACHE_A" ;; 512) - ENABLED_KYBER512=yes + ENABLED_MLKEM512=yes ;; 768) - ENABLED_KYBER768=yes + ENABLED_MLKEM768=yes ;; 1024) - ENABLED_KYBER1024=yes + ENABLED_MLKEM1024=yes ;; make) - ENABLED_KYBER_MAKE_KEY=yes + ENABLED_MLKEM_MAKE_KEY=yes ;; encapsulate|enc) - ENABLED_KYBER_ENCAPSULATE=yes + ENABLED_MLKEM_ENCAPSULATE=yes ;; decapsulate|dec) - ENABLED_KYBER_DECAPSULATE=yes + ENABLED_MLKEM_DECAPSULATE=yes ;; all) - ENABLED_KYBER_MAKE_KEY=yes - ENABLED_KYBER_ENCAPSULATE=yes - ENABLED_KYBER_DECAPSULATE=yes + ENABLED_MLKEM_MAKE_KEY=yes + ENABLED_MLKEM_ENCAPSULATE=yes + ENABLED_MLKEM_DECAPSULATE=yes ;; - original) + original|kyber) ENABLED_ORIGINAL=yes ;; ml-kem) ENABLED_ML_KEM=yes ;; *) - AC_MSG_ERROR([Invalid choice for KYBER []: $ENABLED_KYBER.]) + AC_MSG_ERROR([Invalid choice for MLKEM []: $ENABLED_MLKEM.]) break;; esac done -if test "$ENABLED_KYBER" != "no" +if test "$ENABLED_MLKEM" != "no" then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HAVE_KYBER" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HAVE_MLKEM" # Use liboqs if specified. if test "$ENABLED_LIBOQS" = "no"; then - ENABLED_WC_KYBER=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_WC_KYBER" - AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_WC_KYBER" + ENABLED_WC_MLKEM=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_WC_MLKEM" + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_WC_MLKEM" fi if test "$ENABLED_ORIGINAL" = "yes"; then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_KYBER_ORIGINAL" - if test "$ENABLED_KYBER512" = ""; then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_MLKEM_KYBER" + if test "$ENABLED_MLKEM512" = ""; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_NO_KYBER512" fi - if test "$ENABLED_KYBER768" = ""; then + if test "$ENABLED_MLKEM768" = ""; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_NO_KYBER768" fi - if test "$ENABLED_KYBER1024" = ""; then + if test "$ENABLED_MLKEM1024" = ""; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_NO_KYBER1024" fi if test "$ENABLED_ML_KEM" = "unset"; then @@ -1491,29 +1496,29 @@ then ENABLED_ML_KEM=yes fi if test "$ENABLED_ML_KEM" = "yes"; then - if test "$ENABLED_KYBER512" = ""; then + if test "$ENABLED_MLKEM512" = ""; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_NO_ML_KEM_512" fi - if test "$ENABLED_KYBER768" = ""; then + if test "$ENABLED_MLKEM768" = ""; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_NO_ML_KEM_768" fi - if test "$ENABLED_KYBER1024" = ""; then + if test "$ENABLED_MLKEM1024" = ""; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_NO_ML_KEM_1024" fi else AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_NO_ML_KEM" fi - if test "$ENABLED_KYBER_MAKE_KEY" = "no"; then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_KYBER_NO_MAKE_KEY" + if test "$ENABLED_MLKEM_MAKE_KEY" = "no"; then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_MLKEM_NO_MAKE_KEY" fi - if test "$ENABLED_KYBER_ENCAPSULATE" = "no"; then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_KYBER_NO_ENCAPSULATE" + if test "$ENABLED_MLKEM_ENCAPSULATE" = "no"; then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_MLKEM_NO_ENCAPSULATE" fi - if test "$ENABLED_KYBER_DECAPSULATE" = "no"; then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_KYBER_NO_DECAPSULATE" + if test "$ENABLED_MLKEM_DECAPSULATE" = "no"; then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_MLKEM_NO_DECAPSULATE" fi - if test "$ENABLED_WC_KYBER" = "yes" + if test "$ENABLED_WC_MLKEM" = "yes" then test "$enable_sha3" = "" && enable_sha3=yes test "$enable_shake128" = "" && enable_shake128=yes @@ -10125,7 +10130,7 @@ AM_CONDITIONAL([BUILD_CURVE448],[test "x$ENABLED_CURVE448" = "xyes" || test "x$E AM_CONDITIONAL([BUILD_CURVE448_SMALL],[test "x$ENABLED_CURVE448_SMALL" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_WC_LMS],[test "x$ENABLED_WC_LMS" != "xno" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_WC_XMSS],[test "x$ENABLED_WC_XMSS" != "xno" || test "x$ENABLED_USERSETTINGS" = "xyes"]) -AM_CONDITIONAL([BUILD_WC_KYBER],[test "x$ENABLED_WC_KYBER" != "xno" || test "x$ENABLED_USERSETTINGS" = "xyes"]) +AM_CONDITIONAL([BUILD_WC_MLKEM],[test "x$ENABLED_WC_MLKEM" != "xno" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_DILITHIUM],[test "x$ENABLED_DILITHIUM" != "xno" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_ECCSI],[test "x$ENABLED_ECCSI" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) AM_CONDITIONAL([BUILD_SAKKE],[test "x$ENABLED_SAKKE" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"]) @@ -10631,8 +10636,8 @@ echo " * XMSS wolfSSL impl: $ENABLED_WC_XMSS" if test "$ENABLED_LIBXMSS" = "yes"; then echo " * XMSS_ROOT: $XMSS_ROOT" fi -echo " * KYBER: $ENABLED_KYBER" -echo " * KYBER wolfSSL impl: $ENABLED_WC_KYBER" +echo " * MLKEM: $ENABLED_MLKEM" +echo " * MLKEM wolfSSL impl: $ENABLED_WC_MLKEM" echo " * DILITHIUM: $ENABLED_DILITHIUM" echo " * ECCSI $ENABLED_ECCSI" echo " * SAKKE $ENABLED_SAKKE" diff --git a/examples/benchmark/tls_bench.c b/examples/benchmark/tls_bench.c index e44f164c2..9983d3eaf 100644 --- a/examples/benchmark/tls_bench.c +++ b/examples/benchmark/tls_bench.c @@ -305,7 +305,7 @@ static struct group_info groups[] = { { WOLFSSL_X448_ML_KEM_768, "X448_ML_KEM_768" }, { WOLFSSL_X25519_ML_KEM_768, "X25519_ML_KEM_768" }, #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER { WOLFSSL_KYBER_LEVEL1, "KYBER_LEVEL1" }, { WOLFSSL_KYBER_LEVEL3, "KYBER_LEVEL3" }, { WOLFSSL_KYBER_LEVEL5, "KYBER_LEVEL5" }, diff --git a/examples/client/client.c b/examples/client/client.c index dca255c19..d53068a09 100644 --- a/examples/client/client.c +++ b/examples/client/client.c @@ -464,7 +464,7 @@ static void SetKeyShare(WOLFSSL* ssl, int onlyKeyShare, int useX25519, else #endif #endif /* WOLFSSL_NO_ML_KEM */ - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER #ifndef WOLFSSL_NO_KYBER512 if (XSTRCMP(pqcAlg, "KYBER_LEVEL1") == 0) { group = WOLFSSL_KYBER_LEVEL1; @@ -522,7 +522,7 @@ static void SetKeyShare(WOLFSSL* ssl, int onlyKeyShare, int useX25519, } else #endif - #endif /* WOLFSSL_KYBER_ORIGINAL */ + #endif /* WOLFSSL_MLKEM_KYBER */ { err_sys("invalid post-quantum KEM specified"); } @@ -1428,7 +1428,7 @@ static const char* client_usage_msg[][78] = { "X25519_ML_KEM_768,\n" " X448_ML_KEM_768\n" #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER " KYBER_LEVEL1, KYBER_LEVEL3, KYBER_LEVEL5, " "P256_KYBER_LEVEL1,\n" " P384_KYBER_LEVEL3, P256_KYBER_LEVEL3, " @@ -1679,7 +1679,7 @@ static const char* client_usage_msg[][78] = { "\n" " P384_ML_KEM_768, P521_ML_KEM_1024\n" #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER " KYBER_LEVEL1, KYBER_LEVEL3, KYBER_LEVEL5, " "P256_KYBER_LEVEL1,\n" " P384_KYBER_LEVEL3, P521_KYBER_LEVEL5\n" diff --git a/examples/configs/user_settings_espressif.h b/examples/configs/user_settings_espressif.h index c626982cf..8f598aff5 100644 --- a/examples/configs/user_settings_espressif.h +++ b/examples/configs/user_settings_espressif.h @@ -213,8 +213,8 @@ #ifdef CONFIG_WOLFSSL_ENABLE_KYBER /* Kyber typically needs a minimum 10K stack */ #define WOLFSSL_EXPERIMENTAL_SETTINGS - #define WOLFSSL_HAVE_KYBER - #define WOLFSSL_WC_KYBER + #define WOLFSSL_HAVE_MLKEM + #define WOLFSSL_WC_MLKEM #define WOLFSSL_SHA3 #if defined(CONFIG_IDF_TARGET_ESP8266) /* With limited RAM, we'll disable some of the Kyber sizes: */ diff --git a/examples/configs/user_settings_platformio.h b/examples/configs/user_settings_platformio.h index 5b6bbbc98..4fd753bce 100644 --- a/examples/configs/user_settings_platformio.h +++ b/examples/configs/user_settings_platformio.h @@ -51,8 +51,8 @@ #if 0 /* Kyber typically needs a minimum 10K stack */ #define WOLFSSL_EXPERIMENTAL_SETTINGS - #define WOLFSSL_HAVE_KYBER - #define WOLFSSL_WC_KYBER + #define WOLFSSL_HAVE_MLKEM + #define WOLFSSL_WC_MLKEM #define WOLFSSL_SHA3 #endif diff --git a/examples/configs/user_settings_stm32.h b/examples/configs/user_settings_stm32.h index 381b9785b..c069cfe85 100644 --- a/examples/configs/user_settings_stm32.h +++ b/examples/configs/user_settings_stm32.h @@ -668,11 +668,11 @@ extern "C" { #undef WOLFSSL_EXPERIMENTAL_SETTINGS #define WOLFSSL_EXPERIMENTAL_SETTINGS - #undef WOLFSSL_HAVE_KYBER - #define WOLFSSL_HAVE_KYBER + #undef WOLFSSL_HAVE_MLKEM + #define WOLFSSL_HAVE_MLKEM - #undef WOLFSSL_WC_KYBER - #define WOLFSSL_WC_KYBER + #undef WOLFSSL_WC_MLKEM + #define WOLFSSL_WC_MLKEM #undef WOLFSSL_NO_SHAKE128 #undef WOLFSSL_SHAKE128 diff --git a/examples/server/server.c b/examples/server/server.c index 2f093d161..5b028ac15 100644 --- a/examples/server/server.c +++ b/examples/server/server.c @@ -777,7 +777,7 @@ static void SetKeyShare(WOLFSSL* ssl, int onlyKeyShare, int useX25519, else #endif #endif /* WOLFSSL_NO_ML_KEM */ - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER #ifndef WOLFSSL_NO_KYBER512 if (XSTRCMP(pqcAlg, "KYBER_LEVEL1") == 0) { groups[count] = WOLFSSL_KYBER_LEVEL1; @@ -1077,7 +1077,7 @@ static const char* server_usage_msg[][66] = { "X25519_ML_KEM_768,\n" " X448_ML_KEM_768\n" #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER " KYBER_LEVEL1, KYBER_LEVEL3, KYBER_LEVEL5, " "P256_KYBER_LEVEL1,\n" " P384_KYBER_LEVEL3, P256_KYBER_LEVEL3, " @@ -1286,7 +1286,7 @@ static const char* server_usage_msg[][66] = { "\n" " P384_ML_KEM_768, P521_ML_KEM_1024\n" #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER " KYBER_LEVEL1, KYBER_LEVEL3, KYBER_LEVEL5, " "P256_KYBER_LEVEL1,\n" " P384_KYBER_LEVEL3, P521_KYBER_LEVEL5\n" diff --git a/linuxkm/Kbuild b/linuxkm/Kbuild index 0198d04da..04266cfc4 100644 --- a/linuxkm/Kbuild +++ b/linuxkm/Kbuild @@ -136,7 +136,7 @@ $(obj)/wolfcrypt/src/chacha_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_ $(obj)/wolfcrypt/src/chacha_asm.o: OBJECT_FILES_NON_STANDARD := y $(obj)/wolfcrypt/src/poly1305_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE) $(obj)/wolfcrypt/src/poly1305_asm.o: OBJECT_FILES_NON_STANDARD := y -$(obj)/wolfcrypt/src/wc_kyber_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE) +$(obj)/wolfcrypt/src/wc_mlkem_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE) ifndef READELF READELF := readelf diff --git a/linuxkm/module_exports.c.template b/linuxkm/module_exports.c.template index 699f83c45..8d98cd0b5 100644 --- a/linuxkm/module_exports.c.template +++ b/linuxkm/module_exports.c.template @@ -149,10 +149,10 @@ #include #endif -#ifdef WOLFSSL_HAVE_KYBER - #include -#ifdef WOLFSSL_WC_KYBER - #include +#ifdef WOLFSSL_HAVE_MLKEM + #include +#ifdef WOLFSSL_WC_MLKEM + #include #endif #endif #if defined(WOLFSSL_HAVE_XMSS) diff --git a/src/include.am b/src/include.am index 0a34c41f9..174974372 100644 --- a/src/include.am +++ b/src/include.am @@ -1193,34 +1193,34 @@ if BUILD_SAKKE src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sakke.c endif -if BUILD_WC_KYBER -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber.c -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_poly.c +if BUILD_WC_MLKEM +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_mlkem.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_mlkem_poly.c if BUILD_ARMASM if BUILD_ARM_THUMB if BUILD_ARMASM_INLINE -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm_c.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c else -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm.S +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-mlkem-asm.S endif !BUILD_ARMASM_INLINE else if BUILD_ARMASM_INLINE -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c else -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-kyber-asm.S +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S endif !BUILD_ARMASM_INLINE endif !BUILD_ARM_THUMB endif BUILD_ARMASM if !BUILD_X86_ASM if BUILD_INTELASM -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_asm.S +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_mlkem_asm.S endif endif if BUILD_ARMASM_NEON if BUILD_ARMASM_INLINE -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-kyber-asm_c.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c else -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-kyber-asm.S +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-mlkem-asm.S endif !BUILD_ARMASM_INLINE endif BUILD_ARMASM_NEON endif @@ -1381,7 +1381,7 @@ if BUILD_LIBOQS src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/falcon.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/dilithium.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sphincs.c -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/ext_kyber.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/ext_mlkem.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/liboqs/liboqs.c endif diff --git a/src/internal.c b/src/internal.c index 652adc5c0..ce00a4624 100644 --- a/src/internal.c +++ b/src/internal.c @@ -35145,7 +35145,7 @@ static int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx, } #endif /* HAVE_ECC */ -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM /* Returns 1 when the given group is a PQC group, 0 otherwise. */ int NamedGroupIsPqc(int group) { @@ -35155,7 +35155,7 @@ static int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx, case WOLFSSL_ML_KEM_768: case WOLFSSL_ML_KEM_1024: #endif - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER case WOLFSSL_KYBER_LEVEL1: case WOLFSSL_KYBER_LEVEL3: case WOLFSSL_KYBER_LEVEL5: @@ -35180,7 +35180,7 @@ static int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx, case WOLFSSL_X25519_ML_KEM_512: case WOLFSSL_X448_ML_KEM_768: #endif - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER case WOLFSSL_P256_KYBER_LEVEL3: case WOLFSSL_X25519_KYBER_LEVEL3: case WOLFSSL_P256_KYBER_LEVEL1: @@ -35194,7 +35194,7 @@ static int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx, return 0; } } -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ int TranslateErrorToAlert(int err) { diff --git a/src/ssl.c b/src/ssl.c index 139067aba..96478068d 100644 --- a/src/ssl.c +++ b/src/ssl.c @@ -3592,12 +3592,12 @@ static int isValidCurveGroup(word16 name) case WOLFSSL_FFDHE_6144: case WOLFSSL_FFDHE_8192: -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM #ifndef WOLFSSL_NO_ML_KEM case WOLFSSL_ML_KEM_512: case WOLFSSL_ML_KEM_768: case WOLFSSL_ML_KEM_1024: - #if defined(WOLFSSL_WC_KYBER) || defined(HAVE_LIBOQS) + #if defined(WOLFSSL_WC_MLKEM) || defined(HAVE_LIBOQS) case WOLFSSL_P256_ML_KEM_512: case WOLFSSL_P384_ML_KEM_768: case WOLFSSL_P521_ML_KEM_1024: @@ -3608,11 +3608,11 @@ static int isValidCurveGroup(word16 name) case WOLFSSL_P256_ML_KEM_768: #endif #endif /* !WOLFSSL_NO_ML_KEM */ -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER case WOLFSSL_KYBER_LEVEL1: case WOLFSSL_KYBER_LEVEL3: case WOLFSSL_KYBER_LEVEL5: - #if defined(WOLFSSL_WC_KYBER) || defined(HAVE_LIBOQS) + #if defined(WOLFSSL_WC_MLKEM) || defined(HAVE_LIBOQS) case WOLFSSL_P256_KYBER_LEVEL1: case WOLFSSL_P384_KYBER_LEVEL3: case WOLFSSL_P521_KYBER_LEVEL5: @@ -3621,7 +3621,7 @@ static int isValidCurveGroup(word16 name) case WOLFSSL_X25519_KYBER_LEVEL3: case WOLFSSL_P256_KYBER_LEVEL3: #endif -#endif /* WOLFSSL_KYBER_ORIGINAL */ +#endif /* WOLFSSL_MLKEM_KYBER */ #endif return 1; @@ -15389,13 +15389,13 @@ const char* wolfSSL_get_curve_name(WOLFSSL* ssl) if (ssl == NULL) return NULL; -#if defined(WOLFSSL_TLS13) && defined(WOLFSSL_HAVE_KYBER) +#if defined(WOLFSSL_TLS13) && defined(WOLFSSL_HAVE_MLKEM) /* Check for post-quantum groups. Return now because we do not want the ECC * check to override this result in the case of a hybrid. */ if (IsAtLeastTLSv1_3(ssl->version)) { switch (ssl->namedGroup) { #ifndef WOLFSSL_NO_ML_KEM -#if defined(WOLFSSL_WC_KYBER) +#if defined(WOLFSSL_WC_MLKEM) #ifndef WOLFSSL_NO_ML_KEM_512 case WOLFSSL_ML_KEM_512: return "ML_KEM_512"; @@ -15457,10 +15457,10 @@ const char* wolfSSL_get_curve_name(WOLFSSL* ssl) case WOLFSSL_X448_ML_KEM_768: return "X448_ML_KEM_768"; #endif -#endif /* WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_WC_MLKEM */ #endif /* WOLFSSL_NO_ML_KEM */ -#ifdef WOLFSSL_KYBER_ORIGINAL -#if defined(WOLFSSL_WC_KYBER) +#ifdef WOLFSSL_MLKEM_KYBER +#if defined(WOLFSSL_WC_MLKEM) #ifndef WOLFSSL_NO_KYBER512 case WOLFSSL_KYBER_LEVEL1: return "KYBER_LEVEL1"; @@ -15518,11 +15518,11 @@ const char* wolfSSL_get_curve_name(WOLFSSL* ssl) case WOLFSSL_X448_KYBER_LEVEL3: return "X448_KYBER_LEVEL3"; #endif -#endif /* WOLFSSL_WC_KYBER */ -#endif /* WOLFSSL_KYBER_ORIGINAL */ +#endif /* WOLFSSL_WC_MLKEM */ +#endif /* WOLFSSL_MLKEM_KYBER */ } } -#endif /* WOLFSSL_TLS13 && WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_TLS13 && WOLFSSL_HAVE_MLKEM */ #ifdef HAVE_FFDHE if (ssl->namedGroup != 0) { @@ -22999,12 +22999,12 @@ const WOLF_EC_NIST_NAME kNistCurves[] = { #ifdef HAVE_CURVE448 {CURVE_NAME("X448"), WC_NID_X448, WOLFSSL_ECC_X448}, #endif -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM #ifndef WOLFSSL_NO_ML_KEM {CURVE_NAME("ML_KEM_512"), WOLFSSL_ML_KEM_512, WOLFSSL_ML_KEM_512}, {CURVE_NAME("ML_KEM_768"), WOLFSSL_ML_KEM_768, WOLFSSL_ML_KEM_768}, {CURVE_NAME("ML_KEM_1024"), WOLFSSL_ML_KEM_1024, WOLFSSL_ML_KEM_1024}, -#if (defined(WOLFSSL_WC_KYBER) || defined(HAVE_LIBOQS)) && defined(HAVE_ECC) +#if (defined(WOLFSSL_WC_MLKEM) || defined(HAVE_LIBOQS)) && defined(HAVE_ECC) {CURVE_NAME("P256_ML_KEM_512"), WOLFSSL_P256_ML_KEM_512, WOLFSSL_P256_ML_KEM_512}, {CURVE_NAME("P384_ML_KEM_768"), WOLFSSL_P384_ML_KEM_768, @@ -23023,11 +23023,11 @@ const WOLF_EC_NIST_NAME kNistCurves[] = { WOLFSSL_X25519_ML_KEM_768}, #endif #endif /* !WOLFSSL_NO_ML_KEM */ -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER {CURVE_NAME("KYBER_LEVEL1"), WOLFSSL_KYBER_LEVEL1, WOLFSSL_KYBER_LEVEL1}, {CURVE_NAME("KYBER_LEVEL3"), WOLFSSL_KYBER_LEVEL3, WOLFSSL_KYBER_LEVEL3}, {CURVE_NAME("KYBER_LEVEL5"), WOLFSSL_KYBER_LEVEL5, WOLFSSL_KYBER_LEVEL5}, -#if (defined(WOLFSSL_WC_KYBER) || defined(HAVE_LIBOQS)) && defined(HAVE_ECC) +#if (defined(WOLFSSL_WC_MLKEM) || defined(HAVE_LIBOQS)) && defined(HAVE_ECC) {CURVE_NAME("P256_KYBER_LEVEL1"), WOLFSSL_P256_KYBER_LEVEL1, WOLFSSL_P256_KYBER_LEVEL1}, {CURVE_NAME("P384_KYBER_LEVEL3"), WOLFSSL_P384_KYBER_LEVEL3, @@ -23043,8 +23043,8 @@ const WOLF_EC_NIST_NAME kNistCurves[] = { {CURVE_NAME("X25519_KYBER_LEVEL3"), WOLFSSL_X25519_KYBER_LEVEL3, WOLFSSL_X25519_KYBER_LEVEL3}, #endif -#endif /* WOLFSSL_KYBER_ORIGINAL */ -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_MLKEM_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ #ifdef WOLFSSL_SM2 {CURVE_NAME("SM2"), WC_NID_sm2, WOLFSSL_ECC_SM2P256V1}, #endif diff --git a/src/tls.c b/src/tls.c index 1dd6436d4..9c4b5cf50 100644 --- a/src/tls.c +++ b/src/tls.c @@ -48,12 +48,12 @@ #ifdef HAVE_CURVE448 #include #endif -#ifdef WOLFSSL_HAVE_KYBER - #include -#ifdef WOLFSSL_WC_KYBER - #include +#ifdef WOLFSSL_HAVE_MLKEM + #include +#ifdef WOLFSSL_WC_MLKEM + #include #elif defined(HAVE_LIBOQS) - #include + #include #endif #endif @@ -4462,7 +4462,7 @@ int TLSX_UseCertificateStatusRequestV2(TLSX** extensions, byte status_type, #ifdef HAVE_SUPPORTED_CURVES #if !defined(HAVE_ECC) && !defined(HAVE_CURVE25519) && !defined(HAVE_CURVE448) \ - && !defined(HAVE_FFDHE) && !defined(WOLFSSL_HAVE_KYBER) + && !defined(HAVE_FFDHE) && !defined(WOLFSSL_HAVE_MLKEM) #error Elliptic Curves Extension requires Elliptic Curve Cryptography or liboqs groups. \ Use --enable-ecc and/or --enable-liboqs in the configure script or \ define HAVE_ECC. Alternatively use FFDHE for DH cipher suites. @@ -8092,8 +8092,24 @@ static int TLSX_KeyShare_GenEccKey(WOLFSSL *ssl, KeyShareEntry* kse) return ret; } -#ifdef WOLFSSL_HAVE_KYBER -static int kyber_id2type(int id, int *type) +#ifdef WOLFSSL_HAVE_MLKEM +#if defined(WOLFSSL_MLKEM_CACHE_A) && \ + !defined(WOLFSSL_TLSX_PQC_MLKEM_STORE_PRIV_KEY) + /* Store KyberKey object rather than private key bytes in key share entry. + * Improves performance at cost of more dynamic memory being used. */ + #define WOLFSSL_TLSX_PQC_MLKEM_STORE_OBJ +#endif +#if defined(WOLFSSL_TLSX_PQC_MLKEM_STORE_PRIV_KEY) && \ + defined(WOLFSSL_TLSX_PQC_MLKEM_STORE_OBJ) + #error "Choose WOLFSSL_TLSX_PQC_MLKEM_STORE_PRIV_KEY or " + "WOLFSSL_TLSX_PQC_MLKEM_STORE_OBJ" +#endif + +#if !defined(WOLFSSL_MLKEM_NO_MAKE_KEY) || \ + !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + (!defined(WOLFSSL_MLKEM_NO_DECAPSULATE) && \ + !defined(WOLFSSL_TLSX_PQC_MLKEM_STORE_OBJ)) +static int mlkem_id2type(int id, int *type) { int ret = 0; @@ -8115,7 +8131,7 @@ static int kyber_id2type(int id, int *type) break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case WOLFSSL_KYBER_LEVEL1: *type = KYBER512; @@ -8139,20 +8155,89 @@ static int kyber_id2type(int id, int *type) return ret; } - -#if defined(WOLFSSL_MLKEM_CACHE_A) && \ - !defined(WOLFSSL_TLSX_PQC_MLKEM_STORE_PRIV_KEY) - /* Store KyberKey object rather than private key bytes in key share entry. - * Improves performance at cost of more dynamic memory being used. */ - #define WOLFSSL_TLSX_PQC_MLKEM_STORE_OBJ -#endif -#if defined(WOLFSSL_TLSX_PQC_MLKEM_STORE_PRIV_KEY) && \ - defined(WOLFSSL_TLSX_PQC_MLKEM_STORE_OBJ) - #error "Choose WOLFSSL_TLSX_PQC_MLKEM_STORE_PRIV_KEY or " - "WOLFSSL_TLSX_PQC_MLKEM_STORE_OBJ" #endif -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +/* Structures and objects needed for hybrid key exchanges using both classic + * ECDHE and PQC KEM key material. */ +typedef struct PqcHybridMapping { + int hybrid; + int ecc; + int pqc; + int pqc_first; +} PqcHybridMapping; + +static const PqcHybridMapping pqc_hybrid_mapping[] = { +#ifndef WOLFSSL_NO_ML_KEM + {.hybrid = WOLFSSL_P256_ML_KEM_512, .ecc = WOLFSSL_ECC_SECP256R1, + .pqc = WOLFSSL_ML_KEM_512, .pqc_first = 0}, + {.hybrid = WOLFSSL_P384_ML_KEM_768, .ecc = WOLFSSL_ECC_SECP384R1, + .pqc = WOLFSSL_ML_KEM_768, .pqc_first = 0}, + {.hybrid = WOLFSSL_P256_ML_KEM_768, .ecc = WOLFSSL_ECC_SECP256R1, + .pqc = WOLFSSL_ML_KEM_768, .pqc_first = 0}, + {.hybrid = WOLFSSL_P521_ML_KEM_1024, .ecc = WOLFSSL_ECC_SECP521R1, + .pqc = WOLFSSL_ML_KEM_1024, .pqc_first = 0}, + {.hybrid = WOLFSSL_P384_ML_KEM_1024, .ecc = WOLFSSL_ECC_SECP384R1, + .pqc = WOLFSSL_ML_KEM_1024, .pqc_first = 0}, +#ifdef HAVE_CURVE25519 + {.hybrid = WOLFSSL_X25519_ML_KEM_512, .ecc = WOLFSSL_ECC_X25519, + .pqc = WOLFSSL_ML_KEM_512, .pqc_first = 1}, + {.hybrid = WOLFSSL_X25519_ML_KEM_768, .ecc = WOLFSSL_ECC_X25519, + .pqc = WOLFSSL_ML_KEM_768, .pqc_first = 1}, +#endif +#ifdef HAVE_CURVE448 + {.hybrid = WOLFSSL_X448_ML_KEM_768, .ecc = WOLFSSL_ECC_X448, + .pqc = WOLFSSL_ML_KEM_768, .pqc_first = 1}, +#endif +#endif /* WOLFSSL_NO_ML_KEM */ +#ifdef WOLFSSL_MLKEM_KYBER + {.hybrid = WOLFSSL_P256_KYBER_LEVEL1, .ecc = WOLFSSL_ECC_SECP256R1, + .pqc = WOLFSSL_KYBER_LEVEL1, .pqc_first = 0}, + {.hybrid = WOLFSSL_P384_KYBER_LEVEL3, .ecc = WOLFSSL_ECC_SECP384R1, + .pqc = WOLFSSL_KYBER_LEVEL3, .pqc_first = 0}, + {.hybrid = WOLFSSL_P256_KYBER_LEVEL3, .ecc = WOLFSSL_ECC_SECP256R1, + .pqc = WOLFSSL_KYBER_LEVEL3, .pqc_first = 0}, + {.hybrid = WOLFSSL_P521_KYBER_LEVEL5, .ecc = WOLFSSL_ECC_SECP521R1, + .pqc = WOLFSSL_KYBER_LEVEL5, .pqc_first = 0}, +#ifdef HAVE_CURVE25519 + {.hybrid = WOLFSSL_X25519_KYBER_LEVEL1, .ecc = WOLFSSL_ECC_X25519, + .pqc = WOLFSSL_KYBER_LEVEL1, .pqc_first = 0}, + {.hybrid = WOLFSSL_X25519_KYBER_LEVEL3, .ecc = WOLFSSL_ECC_X25519, + .pqc = WOLFSSL_KYBER_LEVEL3, .pqc_first = 0}, +#endif +#ifdef HAVE_CURVE448 + {.hybrid = WOLFSSL_X448_KYBER_LEVEL3, .ecc = WOLFSSL_ECC_X448, + .pqc = WOLFSSL_KYBER_LEVEL3, .pqc_first = 0}, +#endif +#endif /* WOLFSSL_MLKEM_KYBER */ + {.hybrid = 0, .ecc = 0, .pqc = 0, .pqc_first = 0} +}; + +/* Map an ecc-pqc hybrid group into its ecc group and pqc kem group. */ +static void findEccPqc(int *ecc, int *pqc, int *pqc_first, int group) +{ + int i; + + if (pqc != NULL) + *pqc = 0; + if (ecc != NULL) + *ecc = 0; + if (pqc_first != NULL) + *pqc_first = 0; + + for (i = 0; pqc_hybrid_mapping[i].hybrid != 0; i++) { + if (pqc_hybrid_mapping[i].hybrid == group) { + if (pqc != NULL) + *pqc = pqc_hybrid_mapping[i].pqc; + if (ecc != NULL) + *ecc = pqc_hybrid_mapping[i].ecc; + if (pqc_first != NULL) + *pqc_first = pqc_hybrid_mapping[i].pqc_first; + break; + } + } +} + +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY /* Create a key share entry using pqc parameters group on the client side. * Generates a key pair. * @@ -8180,7 +8265,7 @@ static int TLSX_KeyShare_GenPqcKeyClient(WOLFSSL *ssl, KeyShareEntry* kse) } /* Get the type of key we need from the key share group. */ - ret = kyber_id2type(kse->group, &type); + ret = mlkem_id2type(kse->group, &type); if (ret == WC_NO_ERR_TRACE(NOT_COMPILED_IN)) { WOLFSSL_MSG("Invalid Kyber algorithm specified."); ret = BAD_FUNC_ARG; @@ -8285,86 +8370,6 @@ static int TLSX_KeyShare_GenPqcKeyClient(WOLFSSL *ssl, KeyShareEntry* kse) return ret; } -/* Structures and objects needed for hybrid key exchanges using both classic - * ECDHE and PQC KEM key material. */ -typedef struct PqcHybridMapping { - int hybrid; - int ecc; - int pqc; - int pqc_first; -} PqcHybridMapping; - -static const PqcHybridMapping pqc_hybrid_mapping[] = { -#ifndef WOLFSSL_NO_ML_KEM - {.hybrid = WOLFSSL_P256_ML_KEM_512, .ecc = WOLFSSL_ECC_SECP256R1, - .pqc = WOLFSSL_ML_KEM_512, .pqc_first = 0}, - {.hybrid = WOLFSSL_P384_ML_KEM_768, .ecc = WOLFSSL_ECC_SECP384R1, - .pqc = WOLFSSL_ML_KEM_768, .pqc_first = 0}, - {.hybrid = WOLFSSL_P256_ML_KEM_768, .ecc = WOLFSSL_ECC_SECP256R1, - .pqc = WOLFSSL_ML_KEM_768, .pqc_first = 0}, - {.hybrid = WOLFSSL_P521_ML_KEM_1024, .ecc = WOLFSSL_ECC_SECP521R1, - .pqc = WOLFSSL_ML_KEM_1024, .pqc_first = 0}, - {.hybrid = WOLFSSL_P384_ML_KEM_1024, .ecc = WOLFSSL_ECC_SECP384R1, - .pqc = WOLFSSL_ML_KEM_1024, .pqc_first = 0}, -#ifdef HAVE_CURVE25519 - {.hybrid = WOLFSSL_X25519_ML_KEM_512, .ecc = WOLFSSL_ECC_X25519, - .pqc = WOLFSSL_ML_KEM_512, .pqc_first = 1}, - {.hybrid = WOLFSSL_X25519_ML_KEM_768, .ecc = WOLFSSL_ECC_X25519, - .pqc = WOLFSSL_ML_KEM_768, .pqc_first = 1}, -#endif -#ifdef HAVE_CURVE448 - {.hybrid = WOLFSSL_X448_ML_KEM_768, .ecc = WOLFSSL_ECC_X448, - .pqc = WOLFSSL_ML_KEM_768, .pqc_first = 1}, -#endif -#endif /* WOLFSSL_NO_ML_KEM */ -#ifdef WOLFSSL_KYBER_ORIGINAL - {.hybrid = WOLFSSL_P256_KYBER_LEVEL1, .ecc = WOLFSSL_ECC_SECP256R1, - .pqc = WOLFSSL_KYBER_LEVEL1, .pqc_first = 0}, - {.hybrid = WOLFSSL_P384_KYBER_LEVEL3, .ecc = WOLFSSL_ECC_SECP384R1, - .pqc = WOLFSSL_KYBER_LEVEL3, .pqc_first = 0}, - {.hybrid = WOLFSSL_P256_KYBER_LEVEL3, .ecc = WOLFSSL_ECC_SECP256R1, - .pqc = WOLFSSL_KYBER_LEVEL3, .pqc_first = 0}, - {.hybrid = WOLFSSL_P521_KYBER_LEVEL5, .ecc = WOLFSSL_ECC_SECP521R1, - .pqc = WOLFSSL_KYBER_LEVEL5, .pqc_first = 0}, -#ifdef HAVE_CURVE25519 - {.hybrid = WOLFSSL_X25519_KYBER_LEVEL1, .ecc = WOLFSSL_ECC_X25519, - .pqc = WOLFSSL_KYBER_LEVEL1, .pqc_first = 0}, - {.hybrid = WOLFSSL_X25519_KYBER_LEVEL3, .ecc = WOLFSSL_ECC_X25519, - .pqc = WOLFSSL_KYBER_LEVEL3, .pqc_first = 0}, -#endif -#ifdef HAVE_CURVE448 - {.hybrid = WOLFSSL_X448_KYBER_LEVEL3, .ecc = WOLFSSL_ECC_X448, - .pqc = WOLFSSL_KYBER_LEVEL3, .pqc_first = 0}, -#endif -#endif /* WOLFSSL_KYBER_ORIGINAL */ - {.hybrid = 0, .ecc = 0, .pqc = 0, .pqc_first = 0} -}; - -/* Map an ecc-pqc hybrid group into its ecc group and pqc kem group. */ -static void findEccPqc(int *ecc, int *pqc, int *pqc_first, int group) -{ - int i; - - if (pqc != NULL) - *pqc = 0; - if (ecc != NULL) - *ecc = 0; - if (pqc_first != NULL) - *pqc_first = 0; - - for (i = 0; pqc_hybrid_mapping[i].hybrid != 0; i++) { - if (pqc_hybrid_mapping[i].hybrid == group) { - if (pqc != NULL) - *pqc = pqc_hybrid_mapping[i].pqc; - if (ecc != NULL) - *ecc = pqc_hybrid_mapping[i].ecc; - if (pqc_first != NULL) - *pqc_first = pqc_hybrid_mapping[i].pqc_first; - break; - } - } -} - /* Create a key share entry using both ecdhe and pqc parameters groups. * Generates two key pairs on the client side. * @@ -8504,8 +8509,8 @@ static int TLSX_KeyShare_GenPqcHybridKeyClient(WOLFSSL *ssl, KeyShareEntry* kse) return ret; } -#endif /* !WOLFSSL_KYBER_NO_MAKE_KEY */ -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* !WOLFSSL_MLKEM_NO_MAKE_KEY */ +#endif /* WOLFSSL_HAVE_MLKEM */ /* Generate a secret/key using the key share entry. * @@ -8522,7 +8527,7 @@ int TLSX_KeyShare_GenKey(WOLFSSL *ssl, KeyShareEntry *kse) ret = TLSX_KeyShare_GenX25519Key(ssl, kse); else if (kse->group == WOLFSSL_ECC_X448) ret = TLSX_KeyShare_GenX448Key(ssl, kse); -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_MAKE_KEY) +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_MAKE_KEY) else if (WOLFSSL_NAMED_GROUP_IS_PQC(kse->group)) ret = TLSX_KeyShare_GenPqcKeyClient(ssl, kse); else if (WOLFSSL_NAMED_GROUP_IS_PQC_HYBRID(kse->group)) @@ -8562,7 +8567,7 @@ static void TLSX_KeyShare_FreeAll(KeyShareEntry* list, void* heap) wc_curve448_free((curve448_key*)current->key); #endif } -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM else if (WOLFSSL_NAMED_GROUP_IS_PQC(current->group)) { wc_KyberKey_Free((KyberKey*)current->key); #ifndef WOLFSSL_TLSX_PQC_MLKEM_STORE_OBJ @@ -8608,7 +8613,7 @@ static void TLSX_KeyShare_FreeAll(KeyShareEntry* list, void* heap) #endif } XFREE(current->key, heap, DYNAMIC_TYPE_PRIVATE_KEY); - #if !defined(NO_DH) || defined(WOLFSSL_HAVE_KYBER) + #if !defined(NO_DH) || defined(WOLFSSL_HAVE_MLKEM) XFREE(current->privKey, heap, DYNAMIC_TYPE_PRIVATE_KEY); #endif XFREE(current->pubKey, heap, DYNAMIC_TYPE_PUBLIC_KEY); @@ -9194,7 +9199,7 @@ static int TLSX_KeyShare_ProcessEcc(WOLFSSL* ssl, KeyShareEntry* keyShareEntry) ssl->arrays->preMasterSecret, &ssl->arrays->preMasterSz); } -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) /* Process the Kyber key share extension on the client side. * * ssl The SSL/TLS object. @@ -9242,7 +9247,7 @@ static int TLSX_KeyShare_ProcessPqcClient_ex(WOLFSSL* ssl, ret = MEMORY_E; } if (ret == 0) { - ret = kyber_id2type(keyShareEntry->group, &type); + ret = mlkem_id2type(keyShareEntry->group, &type); } if (ret != 0) { WOLFSSL_MSG("Invalid PQC algorithm specified."); @@ -9392,7 +9397,7 @@ static int TLSX_KeyShare_ProcessPqcHybridClient(WOLFSSL* ssl, pqc_kse->privKey = keyShareEntry->privKey; - ret = kyber_id2type(pqc_group, &type); + ret = mlkem_id2type(pqc_group, &type); if (ret != 0) { WOLFSSL_MSG("Invalid Kyber algorithm specified."); ret = BAD_FUNC_ARG; @@ -9539,7 +9544,7 @@ static int TLSX_KeyShare_ProcessPqcHybridClient(WOLFSSL* ssl, return ret; } -#endif /* WOLFSSL_HAVE_KYBER && !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* WOLFSSL_HAVE_MLKEM && !WOLFSSL_MLKEM_NO_DECAPSULATE */ /* Process the key share extension on the client side. * @@ -9565,7 +9570,7 @@ static int TLSX_KeyShare_Process(WOLFSSL* ssl, KeyShareEntry* keyShareEntry) ret = TLSX_KeyShare_ProcessX25519(ssl, keyShareEntry); else if (keyShareEntry->group == WOLFSSL_ECC_X448) ret = TLSX_KeyShare_ProcessX448(ssl, keyShareEntry); -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) else if (WOLFSSL_NAMED_GROUP_IS_PQC(keyShareEntry->group)) ret = TLSX_KeyShare_ProcessPqcClient(ssl, keyShareEntry); else if (WOLFSSL_NAMED_GROUP_IS_PQC_HYBRID(keyShareEntry->group)) @@ -9618,7 +9623,7 @@ static int TLSX_KeyShareEntry_Parse(const WOLFSSL* ssl, const byte* input, if (keLen > length - offset) return BUFFER_ERROR; -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM if ((WOLFSSL_NAMED_GROUP_IS_PQC(group) || WOLFSSL_NAMED_GROUP_IS_PQC_HYBRID(group)) && ssl->options.side == WOLFSSL_SERVER_END) { @@ -9804,7 +9809,7 @@ int TLSX_KeyShare_Parse(WOLFSSL* ssl, const byte* input, word16 length, /* Not in list sent if there isn't a private key. */ if (keyShareEntry == NULL || (keyShareEntry->key == NULL - #if !defined(NO_DH) || defined(WOLFSSL_HAVE_KYBER) + #if !defined(NO_DH) || defined(WOLFSSL_HAVE_MLKEM) && keyShareEntry->privKey == NULL #endif )) { @@ -9898,7 +9903,7 @@ static int TLSX_KeyShare_New(KeyShareEntry** list, int group, void *heap, return 0; } -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) /* Process the Kyber key share extension on the server side. * * ssl The SSL/TLS object. @@ -9940,7 +9945,7 @@ static int TLSX_KeyShare_HandlePqcKeyServer(WOLFSSL* ssl, ret = MEMORY_E; } if (ret == 0) { - ret = kyber_id2type(keyShareEntry->group, &type); + ret = mlkem_id2type(keyShareEntry->group, &type); } if (ret != 0) { WOLFSSL_MSG("Invalid PQC algorithm specified."); @@ -10073,7 +10078,7 @@ static int TLSX_KeyShare_HandlePqcHybridKeyServer(WOLFSSL* ssl, ret = MEMORY_E; } if (ret == 0) { - ret = kyber_id2type(pqc_kse->group, &type); + ret = mlkem_id2type(pqc_kse->group, &type); } if (ret != 0) { WOLFSSL_MSG("Invalid PQC algorithm specified."); @@ -10246,7 +10251,7 @@ static int TLSX_KeyShare_HandlePqcHybridKeyServer(WOLFSSL* ssl, XFREE(ciphertext, ssl->heap, DYNAMIC_TYPE_TLSX); return ret; } -#endif /* WOLFSSL_HAVE_KYBER && !WOLFSSL_KYBER_NO_ENCAPSULATE */ +#endif /* WOLFSSL_HAVE_MLKEM && !WOLFSSL_MLKEM_NO_ENCAPSULATE */ /* Use the data to create a new key share object in the extensions. * @@ -10295,7 +10300,7 @@ int TLSX_KeyShare_Use(const WOLFSSL* ssl, word16 group, word16 len, byte* data, } -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) if (ssl->options.side == WOLFSSL_SERVER_END && WOLFSSL_NAMED_GROUP_IS_PQC(group)) { ret = TLSX_KeyShare_HandlePqcKeyServer((WOLFSSL*)ssl, @@ -10471,9 +10476,9 @@ static int TLSX_KeyShare_IsSupported(int namedGroup) break; #endif #endif -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM #ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_KYBER + #ifdef WOLFSSL_WC_MLKEM #ifndef WOLFSSL_NO_ML_KEM_512 case WOLFSSL_ML_KEM_512: case WOLFSSL_P256_ML_KEM_512: @@ -10505,12 +10510,12 @@ static int TLSX_KeyShare_IsSupported(int namedGroup) { int ret; int id; - ret = kyber_id2type(namedGroup, &id); + ret = mlkem_id2type(namedGroup, &id); if (ret == WC_NO_ERR_TRACE(NOT_COMPILED_IN)) { return 0; } - if (! ext_kyber_enabled(id)) { + if (! ext_mlkem_enabled(id)) { return 0; } break; @@ -10527,20 +10532,20 @@ static int TLSX_KeyShare_IsSupported(int namedGroup) int ret; int id; findEccPqc(NULL, &namedGroup, NULL, namedGroup); - ret = kyber_id2type(namedGroup, &id); + ret = mlkem_id2type(namedGroup, &id); if (ret == WC_NO_ERR_TRACE(NOT_COMPILED_IN)) { return 0; } - if (! ext_kyber_enabled(id)) { + if (! ext_mlkem_enabled(id)) { return 0; } break; } #endif #endif /* WOLFSSL_NO_ML_KEM */ -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_MLKEM_KYBER + #ifdef WOLFSSL_WC_MLKEM #ifdef WOLFSSL_KYBER512 case WOLFSSL_KYBER_LEVEL1: case WOLFSSL_P256_KYBER_LEVEL1: @@ -10571,12 +10576,12 @@ static int TLSX_KeyShare_IsSupported(int namedGroup) { int ret; int id; - ret = kyber_id2type(namedGroup, &id); + ret = mlkem_id2type(namedGroup, &id); if (ret == WC_NO_ERR_TRACE(NOT_COMPILED_IN)) { return 0; } - if (! ext_kyber_enabled(id)) { + if (! ext_mlkem_enabled(id)) { return 0; } break; @@ -10592,19 +10597,19 @@ static int TLSX_KeyShare_IsSupported(int namedGroup) int ret; int id; findEccPqc(NULL, &namedGroup, NULL, namedGroup); - ret = kyber_id2type(namedGroup, &id); + ret = mlkem_id2type(namedGroup, &id); if (ret == WC_NO_ERR_TRACE(NOT_COMPILED_IN)) { return 0; } - if (! ext_kyber_enabled(id)) { + if (! ext_mlkem_enabled(id)) { return 0; } break; } #endif #endif -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ default: return 0; } @@ -10651,7 +10656,7 @@ static const word16 preferredGroup[] = { WOLFSSL_FFDHE_8192, #endif #ifndef WOLFSSL_NO_ML_KEM -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_WC_MLKEM #ifndef WOLFSSL_NO_ML_KEM_512 WOLFSSL_ML_KEM_512, WOLFSSL_P256_ML_KEM_512, @@ -10694,8 +10699,8 @@ static const word16 preferredGroup[] = { #endif #endif #endif /* !WOLFSSL_NO_ML_KEM */ -#ifdef WOLFSSL_KYBER_ORIGINAL -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_MLKEM_KYBER +#ifdef WOLFSSL_WC_MLKEM #ifdef WOLFSSL_KYBER512 WOLFSSL_KYBER_LEVEL1, WOLFSSL_P256_KYBER_LEVEL1, @@ -10735,7 +10740,7 @@ static const word16 preferredGroup[] = { WOLFSSL_X448_KYBER_LEVEL3, #endif #endif -#endif /* WOLFSSL_KYBER_ORIGINAL */ +#endif /* WOLFSSL_MLKEM_KYBER */ WOLFSSL_NAMED_GROUP_INVALID }; @@ -11052,7 +11057,7 @@ int TLSX_KeyShare_Choose(const WOLFSSL *ssl, TLSX* extensions, if (!WOLFSSL_NAMED_GROUP_IS_FFDHE(clientKSE->group)) { /* Check max value supported. */ if (clientKSE->group > WOLFSSL_ECC_MAX) { -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM if (!WOLFSSL_NAMED_GROUP_IS_PQC(clientKSE->group) && !WOLFSSL_NAMED_GROUP_IS_PQC_HYBRID(clientKSE->group)) #endif @@ -11118,7 +11123,7 @@ int TLSX_KeyShare_Setup(WOLFSSL *ssl, KeyShareEntry* clientKSE) return ret; if (clientKSE->key == NULL) { -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM if (WOLFSSL_NAMED_GROUP_IS_PQC(clientKSE->group) || WOLFSSL_NAMED_GROUP_IS_PQC_HYBRID(clientKSE->group)) { /* Going to need the public key (AKA ciphertext). */ @@ -14358,9 +14363,9 @@ static int TLSX_PopulateSupportedGroups(WOLFSSL* ssl, TLSX** extensions) #endif #endif -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM #ifndef WOLFSSL_NO_ML_KEM -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_WC_MLKEM #ifndef WOLFSSL_NO_ML_KEM_512 if (ret == WOLFSSL_SUCCESS) ret = TLSX_UseSupportedCurve(extensions, WOLFSSL_ML_KEM_512, @@ -14444,8 +14449,8 @@ static int TLSX_PopulateSupportedGroups(WOLFSSL* ssl, TLSX** extensions) #endif #endif /* HAVE_LIBOQS */ #endif /* !WOLFSSL_NO_ML_KEM */ -#ifdef WOLFSSL_KYBER_ORIGINAL -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_MLKEM_KYBER +#ifdef WOLFSSL_WC_MLKEM #ifdef WOLFSSL_KYBER512 if (ret == WOLFSSL_SUCCESS) ret = TLSX_UseSupportedCurve(extensions, WOLFSSL_KYBER_LEVEL1, @@ -14522,8 +14527,8 @@ static int TLSX_PopulateSupportedGroups(WOLFSSL* ssl, TLSX** extensions) ssl->heap); #endif #endif /* HAVE_LIBOQS */ -#endif /* WOLFSSL_KYBER_ORIGINAL */ -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_MLKEM_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ (void)ssl; (void)extensions; diff --git a/src/tls13.c b/src/tls13.c index 61d97e1e8..3ec4b7ee7 100644 --- a/src/tls13.c +++ b/src/tls13.c @@ -13594,7 +13594,7 @@ int wolfSSL_UseKeyShare(WOLFSSL* ssl, word16 group) } #endif -#if defined(WOLFSSL_HAVE_KYBER) +#if defined(WOLFSSL_HAVE_MLKEM) if (WOLFSSL_NAMED_GROUP_IS_PQC(group) || WOLFSSL_NAMED_GROUP_IS_PQC_HYBRID(group)) { diff --git a/tests/api.c b/tests/api.c index 236082658..7a817b183 100644 --- a/tests/api.c +++ b/tests/api.c @@ -175,10 +175,10 @@ #include #endif -#ifdef WOLFSSL_HAVE_KYBER - #include -#ifdef WOLFSSL_WC_KYBER - #include +#ifdef WOLFSSL_HAVE_MLKEM + #include +#ifdef WOLFSSL_WC_MLKEM + #include #endif #endif #ifdef HAVE_DILITHIUM @@ -69974,8 +69974,8 @@ static int test_tls13_apis(void) #endif #if defined(HAVE_ECC) && defined(HAVE_SUPPORTED_CURVES) int groups[2] = { WOLFSSL_ECC_SECP256R1, -#ifdef WOLFSSL_HAVE_KYBER -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_HAVE_MLKEM +#ifdef WOLFSSL_MLKEM_KYBER #ifndef WOLFSSL_NO_KYBER512 WOLFSSL_KYBER_LEVEL1 #elif !defined(WOLFSSL_NO_KYBER768) @@ -70018,11 +70018,11 @@ static int test_tls13_apis(void) #endif #if (!defined(NO_ECC256) || defined(HAVE_ALL_CURVES)) && ECC_MIN_KEY_SZ <= 256 "P-256:secp256r1" -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_MALLOC) && \ - !defined(WOLFSSL_KYBER_NO_MAKE_KEY) && \ - !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) && \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) -#ifdef WOLFSSL_KYBER_ORIGINAL +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_MALLOC) && \ + !defined(WOLFSSL_MLKEM_NO_MAKE_KEY) && \ + !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) && \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) +#ifdef WOLFSSL_MLKEM_KYBER #ifndef WOLFSSL_NO_KYBER512 ":P256_KYBER_LEVEL1" #elif !defined(WOLFSSL_NO_KYBER768) @@ -70042,11 +70042,11 @@ static int test_tls13_apis(void) #endif #endif #endif /* !defined(NO_ECC_SECP) */ -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_MALLOC) && \ - !defined(WOLFSSL_KYBER_NO_MAKE_KEY) && \ - !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) && \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) -#ifdef WOLFSSL_KYBER_ORIGINAL +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_MALLOC) && \ + !defined(WOLFSSL_MLKEM_NO_MAKE_KEY) && \ + !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) && \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) +#ifdef WOLFSSL_MLKEM_KYBER #ifndef WOLFSSL_NO_KYBER512 ":KYBER_LEVEL1" #elif !defined(WOLFSSL_NO_KYBER768) @@ -70066,11 +70066,11 @@ static int test_tls13_apis(void) #endif ""; #endif /* defined(OPENSSL_EXTRA) && defined(HAVE_ECC) */ -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_MALLOC) && \ - !defined(WOLFSSL_KYBER_NO_MAKE_KEY) && \ - !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) && \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) - int kyberLevel; +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_MALLOC) && \ + !defined(WOLFSSL_MLKEM_NO_MAKE_KEY) && \ + !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) && \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) + int mlkemLevel; #endif #ifndef WOLFSSL_NO_TLS12 @@ -70225,38 +70225,39 @@ static int test_tls13_apis(void) #endif #endif -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_KYBER_NO_MALLOC) && \ - !defined(WOLFSSL_KYBER_NO_MAKE_KEY) && \ - !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) && \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_MLKEM_NO_MALLOC) && \ + !defined(WOLFSSL_MLKEM_NO_MAKE_KEY) && \ + !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) && \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) #ifndef WOLFSSL_NO_ML_KEM #ifndef WOLFSSL_NO_ML_KEM_768 - kyberLevel = WOLFSSL_ML_KEM_768; + mlkemLevel = WOLFSSL_ML_KEM_768; #elif !defined(WOLFSSL_NO_ML_KEM_1024) - kyberLevel = WOLFSSL_ML_KEM_1024; + mlkemLevel = WOLFSSL_ML_KEM_1024; #else - kyberLevel = WOLFSSL_ML_KEM_512; + mlkemLevel = WOLFSSL_ML_KEM_512; #endif #else #ifndef WOLFSSL_NO_KYBER768 - kyberLevel = WOLFSSL_KYBER_LEVEL3; + mlkemLevel = WOLFSSL_KYBER_LEVEL3; #elif !defined(WOLFSSL_NO_KYBER1024) - kyberLevel = WOLFSSL_KYBER_LEVEL5; + mlkemLevel = WOLFSSL_KYBER_LEVEL5; #else - kyberLevel = WOLFSSL_KYBER_LEVEL1; + mlkemLevel = WOLFSSL_KYBER_LEVEL1; #endif #endif - ExpectIntEQ(wolfSSL_UseKeyShare(NULL, kyberLevel), WC_NO_ERR_TRACE(BAD_FUNC_ARG)); + ExpectIntEQ(wolfSSL_UseKeyShare(NULL, mlkemLevel), + WC_NO_ERR_TRACE(BAD_FUNC_ARG)); #ifndef NO_WOLFSSL_SERVER - ExpectIntEQ(wolfSSL_UseKeyShare(serverSsl, kyberLevel), + ExpectIntEQ(wolfSSL_UseKeyShare(serverSsl, mlkemLevel), WOLFSSL_SUCCESS); #endif #ifndef NO_WOLFSSL_CLIENT #ifndef WOLFSSL_NO_TLS12 - ExpectIntEQ(wolfSSL_UseKeyShare(clientTls12Ssl, kyberLevel), + ExpectIntEQ(wolfSSL_UseKeyShare(clientTls12Ssl, mlkemLevel), WC_NO_ERR_TRACE(BAD_FUNC_ARG)); #endif - ExpectIntEQ(wolfSSL_UseKeyShare(clientSsl, kyberLevel), + ExpectIntEQ(wolfSSL_UseKeyShare(clientSsl, mlkemLevel), WOLFSSL_SUCCESS); #endif #endif @@ -87237,7 +87238,7 @@ static int test_dtls13_frag_ch_pq(void) const char *test_str = "test"; int test_str_size; byte buf[255]; -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER int group = WOLFSSL_KYBER_LEVEL5; #else int group = WOLFSSL_ML_KEM_1024; @@ -87251,7 +87252,7 @@ static int test_dtls13_frag_ch_pq(void) ExpectIntEQ(wolfSSL_UseKeyShare(ssl_c, group), WOLFSSL_SUCCESS); ExpectIntEQ(wolfSSL_dtls13_allow_ch_frag(ssl_s, 1), WOLFSSL_SUCCESS); ExpectIntEQ(test_memio_do_handshake(ssl_c, ssl_s, 10, NULL), 0); -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER ExpectStrEQ(wolfSSL_get_curve_name(ssl_c), "KYBER_LEVEL5"); ExpectStrEQ(wolfSSL_get_curve_name(ssl_s), "KYBER_LEVEL5"); #else @@ -87775,7 +87776,7 @@ static int test_dtls13_missing_finished_server(void) defined(HAVE_LIBOQS) static void test_tls13_pq_groups_ctx_ready(WOLFSSL_CTX* ctx) { -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER int group = WOLFSSL_KYBER_LEVEL5; #else int group = WOLFSSL_ML_KEM_1024; @@ -87785,7 +87786,7 @@ static void test_tls13_pq_groups_ctx_ready(WOLFSSL_CTX* ctx) static void test_tls13_pq_groups_on_result(WOLFSSL* ssl) { -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER AssertStrEQ(wolfSSL_get_curve_name(ssl), "KYBER_LEVEL5"); #else AssertStrEQ(wolfSSL_get_curve_name(ssl), "ML_KEM_1024"); diff --git a/tests/api/test_mlkem.c b/tests/api/test_mlkem.c index 1b5514407..a4c2fcd15 100644 --- a/tests/api/test_mlkem.c +++ b/tests/api/test_mlkem.c @@ -35,10 +35,10 @@ #include #endif -#ifdef WOLFSSL_HAVE_KYBER - #include -#ifdef WOLFSSL_WC_KYBER - #include +#ifdef WOLFSSL_HAVE_MLKEM + #include +#ifdef WOLFSSL_WC_MLKEM + #include #endif #endif #include @@ -49,9 +49,9 @@ int test_wc_mlkem_make_key_kats(void) { EXPECT_DECLS; -#if defined(WOLFSSL_HAVE_KYBER) && defined(WOLFSSL_WC_KYBER) && \ - !defined(WOLFSSL_NO_ML_KEM) && !defined(WOLFSSL_KYBER_NO_MAKE_KEY) - KyberKey* key; +#if defined(WOLFSSL_HAVE_MLKEM) && defined(WOLFSSL_WC_MLKEM) && \ + !defined(WOLFSSL_NO_ML_KEM) && !defined(WOLFSSL_MLKEM_NO_MAKE_KEY) + MlKemKey* key; #ifndef WOLFSSL_NO_ML_KEM_512 static const byte seed_512[WC_ML_KEM_MAKEKEY_RAND_SZ] = { /* d */ @@ -1453,47 +1453,47 @@ int test_wc_mlkem_make_key_kats(void) static byte pubKey[WC_ML_KEM_MAX_PUBLIC_KEY_SIZE]; static byte privKey[WC_ML_KEM_MAX_PRIVATE_KEY_SIZE]; - key = (KyberKey*)XMALLOC(sizeof(KyberKey), NULL, DYNAMIC_TYPE_TMP_BUFFER); + key = (MlKemKey*)XMALLOC(sizeof(MlKemKey), NULL, DYNAMIC_TYPE_TMP_BUFFER); ExpectNotNull(key); if (key != NULL) { - XMEMSET(key, 0, sizeof(KyberKey)); + XMEMSET(key, 0, sizeof(MlKemKey)); } #ifndef WOLFSSL_NO_ML_KEM_512 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_512, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_MakeKeyWithRandom(key, seed_512, sizeof(seed_512)), + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_512, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_MakeKeyWithRandom(key, seed_512, sizeof(seed_512)), 0); - ExpectIntEQ(wc_KyberKey_EncodePublicKey(key, pubKey, + ExpectIntEQ(wc_MlKemKey_EncodePublicKey(key, pubKey, WC_ML_KEM_512_PUBLIC_KEY_SIZE), 0); - ExpectIntEQ(wc_KyberKey_EncodePrivateKey(key, privKey, + ExpectIntEQ(wc_MlKemKey_EncodePrivateKey(key, privKey, WC_ML_KEM_512_PRIVATE_KEY_SIZE), 0); ExpectIntEQ(XMEMCMP(pubKey, ek_512, WC_ML_KEM_512_PUBLIC_KEY_SIZE), 0); ExpectIntEQ(XMEMCMP(privKey, dk_512, WC_ML_KEM_512_PRIVATE_KEY_SIZE), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif #ifndef WOLFSSL_NO_ML_KEM_768 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_768, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_MakeKeyWithRandom(key, seed_768, sizeof(seed_768)), + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_768, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_MakeKeyWithRandom(key, seed_768, sizeof(seed_768)), 0); - ExpectIntEQ(wc_KyberKey_EncodePublicKey(key, pubKey, + ExpectIntEQ(wc_MlKemKey_EncodePublicKey(key, pubKey, WC_ML_KEM_768_PUBLIC_KEY_SIZE), 0); - ExpectIntEQ(wc_KyberKey_EncodePrivateKey(key, privKey, + ExpectIntEQ(wc_MlKemKey_EncodePrivateKey(key, privKey, WC_ML_KEM_768_PRIVATE_KEY_SIZE), 0); ExpectIntEQ(XMEMCMP(pubKey, ek_768, WC_ML_KEM_768_PUBLIC_KEY_SIZE), 0); ExpectIntEQ(XMEMCMP(privKey, dk_768, WC_ML_KEM_768_PRIVATE_KEY_SIZE), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif #ifndef WOLFSSL_NO_ML_KEM_1024 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_1024, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_MakeKeyWithRandom(key, seed_1024, + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_1024, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_MakeKeyWithRandom(key, seed_1024, sizeof(seed_1024)), 0); - ExpectIntEQ(wc_KyberKey_EncodePublicKey(key, pubKey, + ExpectIntEQ(wc_MlKemKey_EncodePublicKey(key, pubKey, WC_ML_KEM_1024_PUBLIC_KEY_SIZE), 0); - ExpectIntEQ(wc_KyberKey_EncodePrivateKey(key, privKey, + ExpectIntEQ(wc_MlKemKey_EncodePrivateKey(key, privKey, WC_ML_KEM_1024_PRIVATE_KEY_SIZE), 0); ExpectIntEQ(XMEMCMP(pubKey, ek_1024, WC_ML_KEM_1024_PUBLIC_KEY_SIZE), 0); ExpectIntEQ(XMEMCMP(privKey, dk_1024, WC_ML_KEM_1024_PRIVATE_KEY_SIZE), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -1504,9 +1504,9 @@ int test_wc_mlkem_make_key_kats(void) int test_wc_mlkem_encapsulate_kats(void) { EXPECT_DECLS; -#if defined(WOLFSSL_HAVE_KYBER) && defined(WOLFSSL_WC_KYBER) && \ - !defined(WOLFSSL_NO_ML_KEM) && !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) - KyberKey* key; +#if defined(WOLFSSL_HAVE_MLKEM) && defined(WOLFSSL_WC_MLKEM) && \ + !defined(WOLFSSL_NO_ML_KEM) && !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) + MlKemKey* key; #ifndef WOLFSSL_NO_ML_KEM_512 static const byte ek_512[WC_ML_KEM_512_PUBLIC_KEY_SIZE] = { 0xDD, 0x19, 0x24, 0x93, 0x5A, 0xA8, 0xE6, 0x17, @@ -2436,38 +2436,38 @@ int test_wc_mlkem_encapsulate_kats(void) static byte ct[WC_ML_KEM_MAX_CIPHER_TEXT_SIZE]; static byte ss[WC_ML_KEM_SS_SZ]; - key = (KyberKey*)XMALLOC(sizeof(KyberKey), NULL, DYNAMIC_TYPE_TMP_BUFFER); + key = (MlKemKey*)XMALLOC(sizeof(MlKemKey), NULL, DYNAMIC_TYPE_TMP_BUFFER); ExpectNotNull(key); if (key != NULL) { - XMEMSET(key, 0, sizeof(KyberKey)); + XMEMSET(key, 0, sizeof(MlKemKey)); } #ifndef WOLFSSL_NO_ML_KEM_512 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_512, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_DecodePublicKey(key, ek_512, sizeof(ek_512)), 0); - ExpectIntEQ(wc_KyberKey_EncapsulateWithRandom(key, ct, ss, seed_512, + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_512, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_DecodePublicKey(key, ek_512, sizeof(ek_512)), 0); + ExpectIntEQ(wc_MlKemKey_EncapsulateWithRandom(key, ct, ss, seed_512, sizeof(seed_512)), 0); ExpectIntEQ(XMEMCMP(ct, c_512, WC_ML_KEM_512_CIPHER_TEXT_SIZE), 0); ExpectIntEQ(XMEMCMP(ss, k_512, WC_ML_KEM_SS_SZ), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif #ifndef WOLFSSL_NO_ML_KEM_768 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_768, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_DecodePublicKey(key, ek_768, sizeof(ek_768)), 0); - ExpectIntEQ(wc_KyberKey_EncapsulateWithRandom(key, ct, ss, seed_768, + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_768, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_DecodePublicKey(key, ek_768, sizeof(ek_768)), 0); + ExpectIntEQ(wc_MlKemKey_EncapsulateWithRandom(key, ct, ss, seed_768, sizeof(seed_768)), 0); ExpectIntEQ(XMEMCMP(ct, c_768, WC_ML_KEM_768_CIPHER_TEXT_SIZE), 0); ExpectIntEQ(XMEMCMP(ss, k_768, WC_ML_KEM_SS_SZ), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif #ifndef WOLFSSL_NO_ML_KEM_1024 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_1024, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_DecodePublicKey(key, ek_1024, sizeof(ek_1024)), 0); - ExpectIntEQ(wc_KyberKey_EncapsulateWithRandom(key, ct, ss, seed_1024, + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_1024, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_DecodePublicKey(key, ek_1024, sizeof(ek_1024)), 0); + ExpectIntEQ(wc_MlKemKey_EncapsulateWithRandom(key, ct, ss, seed_1024, sizeof(seed_1024)), 0); ExpectIntEQ(XMEMCMP(ct, c_1024, WC_ML_KEM_1024_CIPHER_TEXT_SIZE), 0); ExpectIntEQ(XMEMCMP(ss, k_1024, WC_ML_KEM_SS_SZ), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -2478,9 +2478,9 @@ int test_wc_mlkem_encapsulate_kats(void) int test_wc_mlkem_decapsulate_kats(void) { EXPECT_DECLS; -#if defined(WOLFSSL_HAVE_KYBER) && defined(WOLFSSL_WC_KYBER) && \ - !defined(WOLFSSL_NO_ML_KEM) && !defined(WOLFSSL_KYBER_NO_DECAPSULATE) - KyberKey* key; +#if defined(WOLFSSL_HAVE_MLKEM) && defined(WOLFSSL_WC_MLKEM) && \ + !defined(WOLFSSL_NO_ML_KEM) && !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) + MlKemKey* key; #ifndef WOLFSSL_NO_ML_KEM_512 static const byte dk_512[WC_ML_KEM_512_PRIVATE_KEY_SIZE] = { 0x69, 0xF9, 0xCB, 0xFD, 0x12, 0x37, 0xBA, 0x16, @@ -3847,32 +3847,32 @@ int test_wc_mlkem_decapsulate_kats(void) #endif static byte ss[WC_ML_KEM_SS_SZ]; - key = (KyberKey*)XMALLOC(sizeof(KyberKey), NULL, DYNAMIC_TYPE_TMP_BUFFER); + key = (MlKemKey*)XMALLOC(sizeof(MlKemKey), NULL, DYNAMIC_TYPE_TMP_BUFFER); ExpectNotNull(key); if (key != NULL) { - XMEMSET(key, 0, sizeof(KyberKey)); + XMEMSET(key, 0, sizeof(MlKemKey)); } #ifndef WOLFSSL_NO_ML_KEM_512 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_512, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_DecodePrivateKey(key, dk_512, sizeof(dk_512)), 0); - ExpectIntEQ(wc_KyberKey_Decapsulate(key, ss, c_512, sizeof(c_512)), 0); + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_512, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_DecodePrivateKey(key, dk_512, sizeof(dk_512)), 0); + ExpectIntEQ(wc_MlKemKey_Decapsulate(key, ss, c_512, sizeof(c_512)), 0); ExpectIntEQ(XMEMCMP(ss, kprime_512, WC_ML_KEM_SS_SZ), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif #ifndef WOLFSSL_NO_ML_KEM_768 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_768, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_DecodePrivateKey(key, dk_768, sizeof(dk_768)), 0); - ExpectIntEQ(wc_KyberKey_Decapsulate(key, ss, c_768, sizeof(c_768)), 0); + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_768, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_DecodePrivateKey(key, dk_768, sizeof(dk_768)), 0); + ExpectIntEQ(wc_MlKemKey_Decapsulate(key, ss, c_768, sizeof(c_768)), 0); ExpectIntEQ(XMEMCMP(ss, kprime_768, WC_ML_KEM_SS_SZ), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif #ifndef WOLFSSL_NO_ML_KEM_1024 - ExpectIntEQ(wc_KyberKey_Init(WC_ML_KEM_1024, key, NULL, INVALID_DEVID), 0); - ExpectIntEQ(wc_KyberKey_DecodePrivateKey(key, dk_1024, sizeof(dk_1024)), 0); - ExpectIntEQ(wc_KyberKey_Decapsulate(key, ss, c_1024, sizeof(c_1024)), 0); + ExpectIntEQ(wc_MlKemKey_Init(key, WC_ML_KEM_1024, NULL, INVALID_DEVID), 0); + ExpectIntEQ(wc_MlKemKey_DecodePrivateKey(key, dk_1024, sizeof(dk_1024)), 0); + ExpectIntEQ(wc_MlKemKey_Decapsulate(key, ss, c_1024, sizeof(c_1024)), 0); ExpectIntEQ(XMEMCMP(ss, kprime_1024, WC_ML_KEM_SS_SZ), 0); - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #endif XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER); diff --git a/tests/suites.c b/tests/suites.c index 503dec1de..60b89e335 100644 --- a/tests/suites.c +++ b/tests/suites.c @@ -168,7 +168,7 @@ static int IsValidCipherSuite(const char* line, char *suite, size_t suite_spc) return valid; } -#if defined(WOLFSSL_HAVE_KYBER) +#if defined(WOLFSSL_HAVE_MLKEM) static int IsKyberLevelAvailable(const char* line) { int available = 0; @@ -201,7 +201,7 @@ static int IsKyberLevelAvailable(const char* line) } #endif #endif - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER if ((size_t)end - (size_t)begin == 12) { #ifndef WOLFSSL_NO_KYBER512 if (XSTRNCMP(begin, "KYBER_LEVEL1", 12) == 0) { @@ -222,9 +222,9 @@ static int IsKyberLevelAvailable(const char* line) #endif } -#if defined(WOLFSSL_KYBER_NO_MAKE_KEY) || \ - defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if defined(WOLFSSL_MLKEM_NO_MAKE_KEY) || \ + defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + defined(WOLFSSL_MLKEM_NO_DECAPSULATE) (void)available; return begin == NULL; #else @@ -414,7 +414,7 @@ static int execute_test_case(int svr_argc, char** svr_argv, #endif return NOT_BUILT_IN; } -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM if (!IsKyberLevelAvailable(commandLine)) { #ifdef DEBUG_SUITE_TESTS printf("Kyber level not supported in build: %s\n", commandLine); diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index fa308dfca..d2f93c66d 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -167,13 +167,13 @@ #ifdef HAVE_ED448 #include #endif -#ifdef WOLFSSL_HAVE_KYBER - #include - #ifdef WOLFSSL_WC_KYBER - #include +#ifdef WOLFSSL_HAVE_MLKEM + #include + #ifdef WOLFSSL_WC_MLKEM + #include #endif #if defined(HAVE_LIBOQS) - #include + #include #endif #endif #if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_LMS_VERIFY_ONLY) @@ -1123,7 +1123,7 @@ static const bench_pq_hash_sig_alg bench_pq_hash_sig_opt[] = { }; #endif /* BENCH_PQ_STATEFUL_HBS */ -#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \ +#if defined(WOLFSSL_HAVE_MLKEM) || defined(HAVE_FALCON) || \ defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS) /* The post-quantum-specific mapping of command line option to bit values and * OQS name. */ @@ -1138,7 +1138,7 @@ typedef struct bench_pq_alg { * options. */ static const bench_pq_alg bench_pq_asym_opt[] = { { "-pq", 0xffffffff }, -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM { "-kyber", BENCH_KYBER }, { "-kyber512", BENCH_KYBER512 }, { "-kyber768", BENCH_KYBER768 }, @@ -1290,7 +1290,7 @@ static const char* bench_result_words1[][4] = { defined(HAVE_CURVE25519) || defined(HAVE_CURVE25519_SHARED_SECRET) || \ defined(HAVE_ED25519) || defined(HAVE_CURVE448) || \ defined(HAVE_CURVE448_SHARED_SECRET) || defined(HAVE_ED448) || \ - defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_DILITHIUM) + defined(WOLFSSL_HAVE_MLKEM) || defined(HAVE_DILITHIUM) static const char* bench_desc_words[][15] = { /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 */ @@ -1753,7 +1753,7 @@ static const char* bench_result_words3[][5] = { || !defined(NO_DH) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC) \ || defined(HAVE_CURVE25519) || defined(HAVE_ED25519) \ || defined(HAVE_CURVE448) || defined(HAVE_ED448) \ - || defined(WOLFSSL_HAVE_KYBER)) + || defined(WOLFSSL_HAVE_MLKEM)) #define HAVE_LOCAL_RNG static THREAD_LS_T WC_RNG gRng; #define GLOBAL_RNG &gRng @@ -1765,7 +1765,7 @@ static const char* bench_result_words3[][5] = { defined(HAVE_CURVE448) || defined(HAVE_ED448) || \ defined(HAVE_ECC) || !defined(NO_DH) || \ !defined(NO_RSA) || defined(HAVE_SCRYPT) || \ - defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_DILITHIUM) || \ + defined(WOLFSSL_HAVE_MLKEM) || defined(HAVE_DILITHIUM) || \ defined(WOLFSSL_HAVE_LMS) #define BENCH_ASYM #endif @@ -1774,7 +1774,7 @@ static const char* bench_result_words3[][5] = { #if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DH) || \ defined(HAVE_CURVE25519) || defined(HAVE_ED25519) || \ defined(HAVE_CURVE448) || defined(HAVE_ED448) || \ - defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_DILITHIUM) || \ + defined(WOLFSSL_HAVE_MLKEM) || defined(HAVE_DILITHIUM) || \ defined(WOLFSSL_HAVE_LMS) static const char* bench_result_words2[][5] = { #ifdef BENCH_MICROSECOND @@ -2744,7 +2744,7 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID, #if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DH) || \ defined(HAVE_CURVE25519) || defined(HAVE_ED25519) || \ defined(HAVE_CURVE448) || defined(HAVE_ED448) || \ - defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_DILITHIUM) || \ + defined(WOLFSSL_HAVE_MLKEM) || defined(HAVE_DILITHIUM) || \ defined(WOLFSSL_HAVE_LMS) static void bench_stats_asym_finish_ex(const char* algo, int strength, const char* desc, const char* desc_extra, int useDeviceID, int count, @@ -3690,39 +3690,39 @@ static void* benchmarks_do(void* args) } #endif -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM if (bench_all || (bench_pq_asym_algs & BENCH_KYBER)) { #ifndef WOLFSSL_NO_ML_KEM #ifdef WOLFSSL_WC_ML_KEM_512 if (bench_all || (bench_pq_asym_algs & BENCH_KYBER512)) { - bench_kyber(WC_ML_KEM_512); + bench_mlkem(WC_ML_KEM_512); } #endif #ifdef WOLFSSL_WC_ML_KEM_768 if (bench_all || (bench_pq_asym_algs & BENCH_KYBER768)) { - bench_kyber(WC_ML_KEM_768); + bench_mlkem(WC_ML_KEM_768); } #endif #ifdef WOLFSSL_WC_ML_KEM_1024 if (bench_all || (bench_pq_asym_algs & BENCH_KYBER1024)) { - bench_kyber(WC_ML_KEM_1024); + bench_mlkem(WC_ML_KEM_1024); } #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 if (bench_all || (bench_pq_asym_algs & BENCH_KYBER512)) { - bench_kyber(KYBER512); + bench_mlkem(KYBER512); } #endif #ifdef WOLFSSL_KYBER768 if (bench_all || (bench_pq_asym_algs & BENCH_KYBER768)) { - bench_kyber(KYBER768); + bench_mlkem(KYBER768); } #endif #ifdef WOLFSSL_KYBER1024 if (bench_all || (bench_pq_asym_algs & BENCH_KYBER1024)) { - bench_kyber(KYBER1024); + bench_mlkem(KYBER1024); } #endif #endif @@ -9652,17 +9652,17 @@ exit: } #endif /* !NO_DH */ -#ifdef WOLFSSL_HAVE_KYBER -static void bench_kyber_keygen(int type, const char* name, int keySize, +#ifdef WOLFSSL_HAVE_MLKEM +static void bench_mlkem_keygen(int type, const char* name, int keySize, KyberKey* key) { -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY int ret = 0, times, count, pending = 0; double start; const char**desc = bench_desc_words[lng_index]; DECLARE_MULTI_VALUE_STATS_VARS() - /* KYBER Make Key */ + /* MLKEM Make Key */ bench_stats_start(&count, &start); do { /* while free pending slots in queue, submit ops */ @@ -9672,10 +9672,10 @@ static void bench_kyber_keygen(int type, const char* name, int keySize, if (ret != 0) goto exit; -#ifdef KYBER_NONDETERMINISTIC +#ifdef MLKEM_NONDETERMINISTIC ret = wc_KyberKey_MakeKey(key, &gRng); #else - unsigned char rand[KYBER_MAKEKEY_RAND_SZ] = {0,}; + unsigned char rand[WC_ML_KEM_MAKEKEY_RAND_SZ] = {0,}; ret = wc_KyberKey_MakeKeyWithRandom(key, rand, sizeof(rand)); #endif if (ret != 0) @@ -9699,20 +9699,20 @@ exit: (void)name; (void)keySize; (void)key; -#endif /* !WOLFSSL_KYBER_NO_MAKE_KEY */ +#endif /* !WOLFSSL_MLKEM_NO_MAKE_KEY */ } -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) -static void bench_kyber_encap(int type, const char* name, int keySize, +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) +static void bench_mlkem_encap(int type, const char* name, int keySize, KyberKey* key1, KyberKey* key2) { int ret = 0, times, count, pending = 0; double start; const char**desc = bench_desc_words[lng_index]; - byte ct[KYBER_MAX_CIPHER_TEXT_SIZE]; - byte ss[KYBER_SS_SZ]; - byte pub[KYBER_MAX_PUBLIC_KEY_SIZE]; + byte ct[WC_ML_KEM_MAX_CIPHER_TEXT_SIZE]; + byte ss[WC_ML_KEM_SS_SZ]; + byte pub[WC_ML_KEM_MAX_PUBLIC_KEY_SIZE]; word32 pubLen; word32 ctSz; DECLARE_MULTI_VALUE_STATS_VARS() @@ -9739,16 +9739,16 @@ static void bench_kyber_encap(int type, const char* name, int keySize, return; } -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - /* KYBER Encapsulate */ +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + /* MLKEM Encapsulate */ bench_stats_start(&count, &start); do { /* while free pending slots in queue, submit ops */ for (times = 0; times < agreeTimes || pending > 0; times++) { -#ifdef KYBER_NONDETERMINISTIC +#ifdef MLKEM_NONDETERMINISTIC ret = wc_KyberKey_Encapsulate(key2, ct, ss, &gRng); #else - unsigned char rand[KYBER_ENC_RAND_SZ] = {0,}; + unsigned char rand[WC_ML_KEM_ENC_RAND_SZ] = {0,}; ret = wc_KyberKey_EncapsulateWithRandom(key2, ct, ss, rand, sizeof(rand)); #endif @@ -9770,10 +9770,10 @@ exit_encap: #endif #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE RESET_MULTI_VALUE_STATS_VARS(); - /* KYBER Decapsulate */ + /* MLKEM Decapsulate */ bench_stats_start(&count, &start); do { /* while free pending slots in queue, submit ops */ @@ -9799,7 +9799,7 @@ exit_decap: } #endif -void bench_kyber(int type) +void bench_mlkem(int type) { KyberKey key1; KyberKey key2; @@ -9827,7 +9827,7 @@ void bench_kyber(int type) break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: name = "KYBER512 "; @@ -9847,12 +9847,14 @@ void bench_kyber(int type) break; #endif #endif + default: + return; } - bench_kyber_keygen(type, name, keySize, &key1); -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) - bench_kyber_encap(type, name, keySize, &key1, &key2); + bench_mlkem_keygen(type, name, keySize, &key1); +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) + bench_mlkem_encap(type, name, keySize, &key1, &key2); #endif wc_KyberKey_Free(&key2); @@ -15165,7 +15167,7 @@ static void Usage(void) print_alg(bench_asym_opt[i].str, &line); for (i=0; bench_other_opt[i].str != NULL; i++) print_alg(bench_other_opt[i].str, &line); -#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \ +#if defined(WOLFSSL_HAVE_MLKEM) || defined(HAVE_FALCON) || \ defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS) for (i=0; bench_pq_asym_opt[i].str != NULL; i++) print_alg(bench_pq_asym_opt[i].str, &line); @@ -15448,7 +15450,7 @@ int wolfcrypt_benchmark_main(int argc, char** argv) optMatched = 1; } } - #if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \ + #if defined(WOLFSSL_HAVE_MLKEM) || defined(HAVE_FALCON) || \ defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS) /* Known asymmetric post-quantum algorithms */ for (i=0; !optMatched && bench_pq_asym_opt[i].str != NULL; i++) { diff --git a/wolfcrypt/benchmark/benchmark.h b/wolfcrypt/benchmark/benchmark.h index ac4c16864..8103fd506 100644 --- a/wolfcrypt/benchmark/benchmark.h +++ b/wolfcrypt/benchmark/benchmark.h @@ -102,7 +102,7 @@ void bench_rsaKeyGen_size(int useDeviceID, word32 keySz); void bench_rsa(int useDeviceID); void bench_rsa_key(int useDeviceID, word32 keySz); void bench_dh(int useDeviceID); -void bench_kyber(int type); +void bench_mlkem(int type); void bench_lms(void); void bench_xmss(int hash); void bench_ecc_curve(int curveId); diff --git a/wolfcrypt/src/cryptocb.c b/wolfcrypt/src/cryptocb.c index daf9cb99b..f775070dd 100644 --- a/wolfcrypt/src/cryptocb.c +++ b/wolfcrypt/src/cryptocb.c @@ -865,7 +865,7 @@ int wc_CryptoCb_Ed25519Verify(const byte* sig, word32 sigLen, } #endif /* HAVE_ED25519 */ -#if defined(WOLFSSL_HAVE_KYBER) +#if defined(WOLFSSL_HAVE_MLKEM) int wc_CryptoCb_PqcKemGetDevId(int type, void* key) { int devId = INVALID_DEVID; @@ -984,7 +984,7 @@ int wc_CryptoCb_PqcDecapsulate(const byte* ciphertext, word32 ciphertextLen, return wc_CryptoCb_TranslateErrorCode(ret); } -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ #if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) int wc_CryptoCb_PqcSigGetDevId(int type, void* key) diff --git a/wolfcrypt/src/ext_kyber.c b/wolfcrypt/src/ext_mlkem.c similarity index 91% rename from wolfcrypt/src/ext_kyber.c rename to wolfcrypt/src/ext_mlkem.c index 50196d444..7b5a209d7 100644 --- a/wolfcrypt/src/ext_kyber.c +++ b/wolfcrypt/src/ext_mlkem.c @@ -1,4 +1,4 @@ -/* ext_kyber.c +/* ext_mlkem.c * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -27,8 +27,8 @@ #include #include -#if defined(WOLFSSL_HAVE_KYBER) && !defined(WOLFSSL_WC_KYBER) -#include +#if defined(WOLFSSL_HAVE_MLKEM) && !defined(WOLFSSL_WC_MLKEM) +#include #ifdef NO_INLINE #include @@ -48,7 +48,7 @@ static const char* OQS_ID2name(int id) { case WC_ML_KEM_768: return OQS_KEM_alg_ml_kem_768; case WC_ML_KEM_1024: return OQS_KEM_alg_ml_kem_1024; #endif - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER case KYBER_LEVEL1: return OQS_KEM_alg_kyber_512; case KYBER_LEVEL3: return OQS_KEM_alg_kyber_768; case KYBER_LEVEL5: return OQS_KEM_alg_kyber_1024; @@ -58,7 +58,7 @@ static const char* OQS_ID2name(int id) { return NULL; } -int ext_kyber_enabled(int id) +int ext_mlkem_enabled(int id) { const char * name = OQS_ID2name(id); return OQS_KEM_alg_is_enabled(name); @@ -71,15 +71,15 @@ int ext_kyber_enabled(int id) /** * Initialize the Kyber key. * - * @param [in] type Type of key: KYBER512, KYBER768, KYBER1024. * @param [out] key Kyber key object to initialize. + * @param [in] type Type of key: KYBER512, KYBER768, KYBER1024. * @param [in] heap Dynamic memory hint. * @param [in] devId Device Id. * @return 0 on success. * @return BAD_FUNC_ARG when key is NULL or type is unrecognized. * @return NOT_COMPILED_IN when key type is not supported. */ -int wc_KyberKey_Init(int type, KyberKey* key, void* heap, int devId) +int wc_MlKemKey_Init(MlKemKey* key, int type, void* heap, int devId) { int ret = 0; @@ -97,7 +97,7 @@ int wc_KyberKey_Init(int type, KyberKey* key, void* heap, int devId) case WC_ML_KEM_1024: #endif /* HAVE_LIBOQS */ #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER case KYBER_LEVEL1: #ifdef HAVE_LIBOQS case KYBER_LEVEL3: @@ -135,12 +135,14 @@ int wc_KyberKey_Init(int type, KyberKey* key, void* heap, int devId) * * @param [in, out] key Kyber key object to dispose of. */ -void wc_KyberKey_Free(KyberKey* key) +int wc_MlKemKey_Free(MlKemKey* key) { if (key != NULL) { /* Ensure all private data is zeroed. */ ForceZero(key, sizeof(*key)); } + + return 0; } /******************************************************************************/ @@ -155,7 +157,7 @@ void wc_KyberKey_Free(KyberKey* key) * @return BAD_FUNC_ARG when key or len is NULL. * @return NOT_COMPILED_IN when key type is not supported. */ -int wc_KyberKey_PrivateKeySize(KyberKey* key, word32* len) +int wc_MlKemKey_PrivateKeySize(MlKemKey* key, word32* len) { int ret = 0; @@ -179,7 +181,7 @@ int wc_KyberKey_PrivateKeySize(KyberKey* key, word32* len) *len = OQS_KEM_ml_kem_1024_length_secret_key; break; #endif - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER case KYBER_LEVEL1: *len = OQS_KEM_kyber_512_length_secret_key; break; @@ -210,7 +212,7 @@ int wc_KyberKey_PrivateKeySize(KyberKey* key, word32* len) * @return BAD_FUNC_ARG when key or len is NULL. * @return NOT_COMPILED_IN when key type is not supported. */ -int wc_KyberKey_PublicKeySize(KyberKey* key, word32* len) +int wc_MlKemKey_PublicKeySize(MlKemKey* key, word32* len) { int ret = 0; @@ -234,7 +236,7 @@ int wc_KyberKey_PublicKeySize(KyberKey* key, word32* len) *len = OQS_KEM_ml_kem_1024_length_public_key; break; #endif - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER case KYBER_LEVEL1: *len = OQS_KEM_kyber_512_length_public_key; break; @@ -265,7 +267,7 @@ int wc_KyberKey_PublicKeySize(KyberKey* key, word32* len) * @return BAD_FUNC_ARG when key or len is NULL. * @return NOT_COMPILED_IN when key type is not supported. */ -int wc_KyberKey_CipherTextSize(KyberKey* key, word32* len) +int wc_MlKemKey_CipherTextSize(MlKemKey* key, word32* len) { int ret = 0; @@ -289,7 +291,7 @@ int wc_KyberKey_CipherTextSize(KyberKey* key, word32* len) *len = OQS_KEM_ml_kem_1024_length_ciphertext; break; #endif - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER case KYBER_LEVEL1: *len = OQS_KEM_kyber_512_length_ciphertext; break; @@ -319,7 +321,7 @@ int wc_KyberKey_CipherTextSize(KyberKey* key, word32* len) * @return 0 on success. * @return 0 to indicate success. */ -int wc_KyberKey_SharedSecretSize(KyberKey* key, word32* len) +int wc_MlKemKey_SharedSecretSize(MlKemKey* key, word32* len) { (void)key; /* Validate parameters. */ @@ -346,7 +348,7 @@ int wc_KyberKey_SharedSecretSize(KyberKey* key, word32* len) * @return BAD_FUNC_ARG when key or rng is NULL. * @return MEMORY_E when dynamic memory allocation failed. */ -int wc_KyberKey_MakeKey(KyberKey* key, WC_RNG* rng) +int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng) { int ret = 0; #ifdef HAVE_LIBOQS @@ -418,13 +420,13 @@ int wc_KyberKey_MakeKey(KyberKey* key, WC_RNG* rng) * @return NOT_COMPILED_IN when key type is not supported. * @return MEMORY_E when dynamic memory allocation failed. */ -int wc_KyberKey_MakeKeyWithRandom(KyberKey* key, const unsigned char* rand, +int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, const unsigned char* rand, int len) { (void)rand; (void)len; /* OQS doesn't support external randomness. */ - return wc_KyberKey_MakeKey(key, NULL); + return wc_MlKemKey_MakeKey(key, NULL); } /** @@ -439,7 +441,7 @@ int wc_KyberKey_MakeKeyWithRandom(KyberKey* key, const unsigned char* rand, * @return NOT_COMPILED_IN when key type is not supported. * @return MEMORY_E when dynamic memory allocation failed. */ -int wc_KyberKey_Encapsulate(KyberKey* key, unsigned char* ct, unsigned char* ss, +int wc_MlKemKey_Encapsulate(MlKemKey* key, unsigned char* ct, unsigned char* ss, WC_RNG* rng) { int ret = 0; @@ -460,7 +462,7 @@ int wc_KyberKey_Encapsulate(KyberKey* key, unsigned char* ct, unsigned char* ss, #ifdef WOLF_CRYPTO_CB if (ret == 0) { - ret = wc_KyberKey_CipherTextSize(key, &ctlen); + ret = wc_MlKemKey_CipherTextSize(key, &ctlen); } if ((ret == 0) #ifndef WOLF_CRYPTO_CB_FIND @@ -517,13 +519,13 @@ int wc_KyberKey_Encapsulate(KyberKey* key, unsigned char* ct, unsigned char* ss, * @return NOT_COMPILED_IN when key type is not supported. * @return MEMORY_E when dynamic memory allocation failed. */ -int wc_KyberKey_EncapsulateWithRandom(KyberKey* key, unsigned char* ct, +int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, unsigned char* ct, unsigned char* ss, const unsigned char* rand, int len) { (void)rand; (void)len; /* OQS doesn't support external randomness. */ - return wc_KyberKey_Encapsulate(key, ct, ss, NULL); + return wc_MlKemKey_Encapsulate(key, ct, ss, NULL); } /** @@ -541,7 +543,7 @@ int wc_KyberKey_EncapsulateWithRandom(KyberKey* key, unsigned char* ct, * @return BUFFER_E when len is not the length of cipher text for the key type. * @return MEMORY_E when dynamic memory allocation failed. */ -int wc_KyberKey_Decapsulate(KyberKey* key, unsigned char* ss, +int wc_MlKemKey_Decapsulate(MlKemKey* key, unsigned char* ss, const unsigned char* ct, word32 len) { int ret = 0; @@ -556,7 +558,7 @@ int wc_KyberKey_Decapsulate(KyberKey* key, unsigned char* ss, ret = BAD_FUNC_ARG; } if (ret == 0) { - ret = wc_KyberKey_CipherTextSize(key, &ctlen); + ret = wc_MlKemKey_CipherTextSize(key, &ctlen); } if ((ret == 0) && (len != ctlen)) { ret = BUFFER_E; @@ -621,7 +623,7 @@ int wc_KyberKey_Decapsulate(KyberKey* key, unsigned char* ss, * @return NOT_COMPILED_IN when key type is not supported. * @return BUFFER_E when len is not the correct size. */ -int wc_KyberKey_DecodePrivateKey(KyberKey* key, const unsigned char* in, +int wc_MlKemKey_DecodePrivateKey(MlKemKey* key, const unsigned char* in, word32 len) { int ret = 0; @@ -633,7 +635,7 @@ int wc_KyberKey_DecodePrivateKey(KyberKey* key, const unsigned char* in, } if (ret == 0) { - ret = wc_KyberKey_PrivateKeySize(key, &privLen); + ret = wc_MlKemKey_PrivateKeySize(key, &privLen); } /* Ensure the data is the correct length for the key type. */ @@ -661,7 +663,7 @@ int wc_KyberKey_DecodePrivateKey(KyberKey* key, const unsigned char* in, * @return NOT_COMPILED_IN when key type is not supported. * @return BUFFER_E when len is not the correct size. */ -int wc_KyberKey_DecodePublicKey(KyberKey* key, const unsigned char* in, +int wc_MlKemKey_DecodePublicKey(MlKemKey* key, const unsigned char* in, word32 len) { int ret = 0; @@ -673,7 +675,7 @@ int wc_KyberKey_DecodePublicKey(KyberKey* key, const unsigned char* in, } if (ret == 0) { - ret = wc_KyberKey_PublicKeySize(key, &pubLen); + ret = wc_MlKemKey_PublicKeySize(key, &pubLen); } /* Ensure the data is the correct length for the key type. */ @@ -701,7 +703,7 @@ int wc_KyberKey_DecodePublicKey(KyberKey* key, const unsigned char* in, * available. * @return NOT_COMPILED_IN when key type is not supported. */ -int wc_KyberKey_EncodePrivateKey(KyberKey* key, unsigned char* out, word32 len) +int wc_MlKemKey_EncodePrivateKey(MlKemKey* key, unsigned char* out, word32 len) { int ret = 0; unsigned int privLen = 0; @@ -711,7 +713,7 @@ int wc_KyberKey_EncodePrivateKey(KyberKey* key, unsigned char* out, word32 len) } if (ret == 0) { - ret = wc_KyberKey_PrivateKeySize(key, &privLen); + ret = wc_MlKemKey_PrivateKeySize(key, &privLen); } /* Check buffer is big enough for encoding. */ @@ -738,7 +740,7 @@ int wc_KyberKey_EncodePrivateKey(KyberKey* key, unsigned char* out, word32 len) * @return BAD_FUNC_ARG when key or out is NULL or public key not available. * @return NOT_COMPILED_IN when key type is not supported. */ -int wc_KyberKey_EncodePublicKey(KyberKey* key, unsigned char* out, word32 len) +int wc_MlKemKey_EncodePublicKey(MlKemKey* key, unsigned char* out, word32 len) { int ret = 0; unsigned int pubLen = 0; @@ -748,7 +750,7 @@ int wc_KyberKey_EncodePublicKey(KyberKey* key, unsigned char* out, word32 len) } if (ret == 0) { - ret = wc_KyberKey_PublicKeySize(key, &pubLen); + ret = wc_MlKemKey_PublicKeySize(key, &pubLen); } /* Check buffer is big enough for encoding. */ @@ -763,4 +765,4 @@ int wc_KyberKey_EncodePublicKey(KyberKey* key, unsigned char* out, word32 len) return ret; } -#endif /* WOLFSSL_HAVE_KYBER && !WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM && !WOLFSSL_WC_MLKEM */ diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index 5b0da2fe3..d442c9c86 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -297,7 +297,9 @@ L_AES_ARM32_td_data: .word 0x74486c5c .word 0x42d0b857 #endif /* HAVE_AES_DECRYPT */ -#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ + defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_ARM32_te_data, %object .size L_AES_ARM32_te_data, 1024 @@ -559,7 +561,8 @@ L_AES_ARM32_te_data: .word 0xfca85454 .word 0xd66dbbbb .word 0x3a2c1616 -#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || + * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT .text .type L_AES_ARM32_td, %object @@ -568,14 +571,17 @@ L_AES_ARM32_te_data: L_AES_ARM32_td: .word L_AES_ARM32_td_data #endif /* HAVE_AES_DECRYPT */ -#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ + defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_ARM32_te, %object .size L_AES_ARM32_te, 12 .align 4 L_AES_ARM32_te: .word L_AES_ARM32_te_data -#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || + * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT .text .align 4 @@ -1935,7 +1941,8 @@ L_AES_encrypt_block_nr: eor r7, r7, r11 pop {pc} .size AES_encrypt_block,.-AES_encrypt_block -#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_ARM32_te_ecb, %object .size L_AES_ARM32_te_ecb, 12 @@ -2175,7 +2182,8 @@ L_AES_ECB_encrypt_end: pop {r3} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size AES_ECB_encrypt,.-AES_ECB_encrypt -#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || + * WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_CBC .text .type L_AES_ARM32_te_cbc, %object @@ -2691,7 +2699,8 @@ L_AES_CTR_encrypt_end: .size AES_CTR_encrypt,.-AES_CTR_encrypt #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT -#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) + #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ + defined(HAVE_AES_CBC) .text .align 4 .globl AES_decrypt_block diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 21dcb0d23..22938bbd5 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -122,8 +122,8 @@ static const word32 L_AES_ARM32_td_data[] = { #endif /* HAVE_AES_DECRYPT */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ - defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ - defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) static const word32 L_AES_ARM32_te_data[] = { 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, @@ -197,8 +197,8 @@ static const word32 L_AES_ARM32_te_data[] = { static const word32* L_AES_ARM32_td = L_AES_ARM32_td_data; #endif /* HAVE_AES_DECRYPT */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ - defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ - defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) static const word32* L_AES_ARM32_te = L_AES_ARM32_te_data; #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ @@ -1591,7 +1591,7 @@ void AES_encrypt_block(const word32* te_p, int nr_p, int len_p, } #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ - defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) static const word32* L_AES_ARM32_te_ecb = L_AES_ARM32_te_data; void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p); @@ -2395,7 +2395,7 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT -#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ + #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_CBC) void AES_decrypt_block(const word32* td_p, int nr_p, const byte* td4_p); void AES_decrypt_block(const word32* td_p, int nr_p, const byte* td4_p) diff --git a/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S similarity index 98% rename from wolfcrypt/src/port/arm/armv8-32-kyber-asm.S rename to wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S index 2f29f52b8..04d479a06 100644 --- a/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S @@ -1,4 +1,4 @@ -/* armv8-32-kyber-asm +/* armv8-32-mlkem-asm * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -22,7 +22,7 @@ /* Generated using (from wolfssl): * cd ../scripts * ruby ./kyber/kyber.rb arm32 \ - * ../wolfssl/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S + * ../wolfssl/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S */ #ifdef HAVE_CONFIG_H @@ -33,12 +33,12 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) #ifndef WOLFSSL_ARMASM_INLINE -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_WC_MLKEM .text - .type L_kyber_arm32_ntt_zetas, %object - .size L_kyber_arm32_ntt_zetas, 256 + .type L_mlkem_arm32_ntt_zetas, %object + .size L_mlkem_arm32_ntt_zetas, 256 .align 4 -L_kyber_arm32_ntt_zetas: +L_mlkem_arm32_ntt_zetas: .short 0x8ed .short 0xa0b .short 0xb9a @@ -169,12 +169,12 @@ L_kyber_arm32_ntt_zetas: .short 0x65c .text .align 4 - .globl kyber_arm32_ntt - .type kyber_arm32_ntt, %function -kyber_arm32_ntt: + .globl mlkem_arm32_ntt + .type mlkem_arm32_ntt, %function +mlkem_arm32_ntt: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, sp, #8 - adr r1, L_kyber_arm32_ntt_zetas + adr r1, L_mlkem_arm32_ntt_zetas #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r10, #0x1 @@ -190,7 +190,7 @@ kyber_arm32_ntt: #endif #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r2, #16 -L_kyber_arm32_ntt_loop_123: +L_mlkem_arm32_ntt_loop_123: str r2, [sp] ldrh r11, [r1, #2] ldr r2, [r0] @@ -1303,15 +1303,15 @@ L_kyber_arm32_ntt_loop_123: ldr r2, [sp] subs r2, r2, #1 add r0, r0, #4 - bne L_kyber_arm32_ntt_loop_123 + bne L_mlkem_arm32_ntt_loop_123 sub r0, r0, #0x40 mov r3, #0 -L_kyber_arm32_ntt_loop_4_j: +L_mlkem_arm32_ntt_loop_4_j: str r3, [sp, #4] add r11, r1, r3, lsr #4 mov r2, #4 ldr r11, [r11, #16] -L_kyber_arm32_ntt_loop_4_i: +L_mlkem_arm32_ntt_loop_4_i: str r2, [sp] ldr r2, [r0] ldr r3, [r0, #16] @@ -1698,14 +1698,14 @@ L_kyber_arm32_ntt_loop_4_i: #endif subs r2, r2, #1 add r0, r0, #4 - bne L_kyber_arm32_ntt_loop_4_i + bne L_mlkem_arm32_ntt_loop_4_i add r3, r3, #0x40 rsbs r12, r3, #0x100 add r0, r0, #0x70 - bne L_kyber_arm32_ntt_loop_4_j + bne L_mlkem_arm32_ntt_loop_4_j sub r0, r0, #0x200 mov r3, #0 -L_kyber_arm32_ntt_loop_567: +L_mlkem_arm32_ntt_loop_567: add r11, r1, r3, lsr #3 str r3, [sp, #4] ldrh r11, [r11, #32] @@ -3154,15 +3154,15 @@ L_kyber_arm32_ntt_loop_567: add r3, r3, #16 rsbs r12, r3, #0x100 add r0, r0, #32 - bne L_kyber_arm32_ntt_loop_567 + bne L_mlkem_arm32_ntt_loop_567 add sp, sp, #8 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - .size kyber_arm32_ntt,.-kyber_arm32_ntt + .size mlkem_arm32_ntt,.-mlkem_arm32_ntt .text - .type L_kyber_arm32_invntt_zetas_inv, %object - .size L_kyber_arm32_invntt_zetas_inv, 256 + .type L_mlkem_invntt_zetas_inv, %object + .size L_mlkem_invntt_zetas_inv, 256 .align 4 -L_kyber_arm32_invntt_zetas_inv: +L_mlkem_invntt_zetas_inv: .short 0x6a5 .short 0x70f .short 0x5b4 @@ -3293,12 +3293,12 @@ L_kyber_arm32_invntt_zetas_inv: .short 0x5a1 .text .align 4 - .globl kyber_arm32_invntt - .type kyber_arm32_invntt, %function -kyber_arm32_invntt: + .globl mlkem_arm32_invntt + .type mlkem_arm32_invntt, %function +mlkem_arm32_invntt: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, sp, #8 - adr r1, L_kyber_arm32_invntt_zetas_inv + adr r1, L_mlkem_invntt_zetas_inv #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r10, #0x1 @@ -3314,7 +3314,7 @@ kyber_arm32_invntt: #endif #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r3, #0 -L_kyber_arm32_invntt_loop_765: +L_mlkem_invntt_loop_765: add r11, r1, r3, lsr #1 str r3, [sp, #4] ldr r2, [r0] @@ -4911,15 +4911,15 @@ L_kyber_arm32_invntt_loop_765: add r3, r3, #16 rsbs r12, r3, #0x100 add r0, r0, #32 - bne L_kyber_arm32_invntt_loop_765 + bne L_mlkem_invntt_loop_765 sub r0, r0, #0x200 mov r3, #0 -L_kyber_arm32_invntt_loop_4_j: +L_mlkem_invntt_loop_4_j: str r3, [sp, #4] add r11, r1, r3, lsr #4 mov r2, #4 ldr r11, [r11, #224] -L_kyber_arm32_invntt_loop_4_i: +L_mlkem_invntt_loop_4_i: str r2, [sp] ldr r2, [r0] ldr r3, [r0, #16] @@ -5410,14 +5410,14 @@ L_kyber_arm32_invntt_loop_4_i: #endif subs r2, r2, #1 add r0, r0, #4 - bne L_kyber_arm32_invntt_loop_4_i + bne L_mlkem_invntt_loop_4_i add r3, r3, #0x40 rsbs r12, r3, #0x100 add r0, r0, #0x70 - bne L_kyber_arm32_invntt_loop_4_j + bne L_mlkem_invntt_loop_4_j sub r0, r0, #0x200 mov r2, #16 -L_kyber_arm32_invntt_loop_321: +L_mlkem_invntt_loop_321: str r2, [sp] ldrh r11, [r1, #2] ldr r2, [r0] @@ -7672,15 +7672,15 @@ L_kyber_arm32_invntt_loop_321: ldr r2, [sp] subs r2, r2, #1 add r0, r0, #4 - bne L_kyber_arm32_invntt_loop_321 + bne L_mlkem_invntt_loop_321 add sp, sp, #8 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - .size kyber_arm32_invntt,.-kyber_arm32_invntt + .size mlkem_arm32_invntt,.-mlkem_arm32_invntt .text - .type L_kyber_arm32_basemul_mont_zetas, %object - .size L_kyber_arm32_basemul_mont_zetas, 256 + .type L_mlkem_basemul_mont_zetas, %object + .size L_mlkem_basemul_mont_zetas, 256 .align 4 -L_kyber_arm32_basemul_mont_zetas: +L_mlkem_basemul_mont_zetas: .short 0x8ed .short 0xa0b .short 0xb9a @@ -7811,11 +7811,11 @@ L_kyber_arm32_basemul_mont_zetas: .short 0x65c .text .align 4 - .globl kyber_arm32_basemul_mont - .type kyber_arm32_basemul_mont, %function -kyber_arm32_basemul_mont: + .globl mlkem_arm32_basemul_mont + .type mlkem_arm32_basemul_mont, %function +mlkem_arm32_basemul_mont: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} - adr r3, L_kyber_arm32_basemul_mont_zetas + adr r3, L_mlkem_basemul_mont_zetas add r3, r3, #0x80 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -7832,7 +7832,7 @@ kyber_arm32_basemul_mont: #endif #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r8, #0 -L_kyber_arm32_basemul_mont_loop: +L_mlkem_basemul_mont_loop: ldm r1!, {r4, r5} ldm r2!, {r6, r7} ldr lr, [r3, r8] @@ -8084,16 +8084,16 @@ L_kyber_arm32_basemul_mont_loop: #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ stm r0!, {r4, r5} pop {r8} - bne L_kyber_arm32_basemul_mont_loop + bne L_mlkem_basemul_mont_loop pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - .size kyber_arm32_basemul_mont,.-kyber_arm32_basemul_mont + .size mlkem_arm32_basemul_mont,.-mlkem_arm32_basemul_mont .text .align 4 - .globl kyber_arm32_basemul_mont_add - .type kyber_arm32_basemul_mont_add, %function -kyber_arm32_basemul_mont_add: + .globl mlkem_arm32_basemul_mont_add + .type mlkem_arm32_basemul_mont_add, %function +mlkem_arm32_basemul_mont_add: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} - adr r3, L_kyber_arm32_basemul_mont_zetas + adr r3, L_mlkem_basemul_mont_zetas add r3, r3, #0x80 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -8110,7 +8110,7 @@ kyber_arm32_basemul_mont_add: #endif #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r8, #0 -L_kyber_arm32_basemul_mont_add_loop: +L_mlkem_arm32_basemul_mont_add_loop: ldm r1!, {r4, r5} ldm r2!, {r6, r7} ldr lr, [r3, r8] @@ -8396,14 +8396,14 @@ L_kyber_arm32_basemul_mont_add_loop: #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ stm r0!, {r4, r5} pop {r8} - bne L_kyber_arm32_basemul_mont_add_loop + bne L_mlkem_arm32_basemul_mont_add_loop pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - .size kyber_arm32_basemul_mont_add,.-kyber_arm32_basemul_mont_add + .size mlkem_arm32_basemul_mont_add,.-mlkem_arm32_basemul_mont_add .text .align 4 - .globl kyber_arm32_csubq - .type kyber_arm32_csubq, %function -kyber_arm32_csubq: + .globl mlkem_arm32_csubq + .type mlkem_arm32_csubq, %function +mlkem_arm32_csubq: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r12, #0x1 @@ -8432,7 +8432,7 @@ kyber_arm32_csubq: movt r11, #0x8000 #endif mov r1, #0x100 -L_kyber_arm32_csubq_loop: +L_mlkem_arm32_csubq_loop: ldm r0, {r2, r3, r4, r5} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r2, r2, lr @@ -8567,14 +8567,14 @@ L_kyber_arm32_csubq_loop: #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ stm r0!, {r2, r3, r4, r5} subs r1, r1, #8 - bne L_kyber_arm32_csubq_loop + bne L_mlkem_arm32_csubq_loop pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - .size kyber_arm32_csubq,.-kyber_arm32_csubq + .size mlkem_arm32_csubq,.-mlkem_arm32_csubq .text .align 4 - .globl kyber_arm32_rej_uniform - .type kyber_arm32_rej_uniform, %function -kyber_arm32_rej_uniform: + .globl mlkem_arm32_rej_uniform + .type mlkem_arm32_rej_uniform, %function +mlkem_arm32_rej_uniform: push {r4, r5, r6, r7, r8, lr} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r8, #0x1 @@ -8583,9 +8583,9 @@ kyber_arm32_rej_uniform: mov r8, #0xd01 #endif mov r12, #0 -L_kyber_arm32_rej_uniform_loop_no_fail: +L_mlkem_arm32_rej_uniform_loop_no_fail: cmp r1, #8 - blt L_kyber_arm32_rej_uniform_done_no_fail + blt L_mlkem_arm32_rej_uniform_done_no_fail ldm r2!, {r4, r5, r6} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r7, r4, #20 @@ -8689,12 +8689,12 @@ L_kyber_arm32_rej_uniform_loop_no_fail: sub r1, r1, lr add r12, r12, lr, lsl #1 subs r3, r3, #12 - bne L_kyber_arm32_rej_uniform_loop_no_fail - b L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_done_no_fail: + bne L_mlkem_arm32_rej_uniform_loop_no_fail + b L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_done_no_fail: cmp r1, #0 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_loop: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_loop: ldm r2!, {r4, r5, r6} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r7, r4, #20 @@ -8703,12 +8703,12 @@ L_kyber_arm32_rej_uniform_loop: ubfx r7, r4, #0, #12 #endif cmp r7, r8 - bge L_kyber_arm32_rej_uniform_fail_0 + bge L_mlkem_arm32_rej_uniform_fail_0 strh r7, [r0, r12] subs r1, r1, #1 add r12, r12, #2 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_fail_0: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_fail_0: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r7, r4, #8 lsr r7, r7, #20 @@ -8716,12 +8716,12 @@ L_kyber_arm32_rej_uniform_fail_0: ubfx r7, r4, #12, #12 #endif cmp r7, r8 - bge L_kyber_arm32_rej_uniform_fail_1 + bge L_mlkem_arm32_rej_uniform_fail_1 strh r7, [r0, r12] subs r1, r1, #1 add r12, r12, #2 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_fail_1: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_fail_1: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsr r7, r4, #24 #else @@ -8736,12 +8736,12 @@ L_kyber_arm32_rej_uniform_fail_1: bfi r7, r5, #8, #4 #endif cmp r7, r8 - bge L_kyber_arm32_rej_uniform_fail_2 + bge L_mlkem_arm32_rej_uniform_fail_2 strh r7, [r0, r12] subs r1, r1, #1 add r12, r12, #2 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_fail_2: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_fail_2: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r7, r5, #16 lsr r7, r7, #20 @@ -8749,12 +8749,12 @@ L_kyber_arm32_rej_uniform_fail_2: ubfx r7, r5, #4, #12 #endif cmp r7, r8 - bge L_kyber_arm32_rej_uniform_fail_3 + bge L_mlkem_arm32_rej_uniform_fail_3 strh r7, [r0, r12] subs r1, r1, #1 add r12, r12, #2 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_fail_3: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_fail_3: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r7, r5, #4 lsr r7, r7, #20 @@ -8762,12 +8762,12 @@ L_kyber_arm32_rej_uniform_fail_3: ubfx r7, r5, #16, #12 #endif cmp r7, r8 - bge L_kyber_arm32_rej_uniform_fail_4 + bge L_mlkem_arm32_rej_uniform_fail_4 strh r7, [r0, r12] subs r1, r1, #1 add r12, r12, #2 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_fail_4: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_fail_4: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsr r7, r5, #28 #else @@ -8782,12 +8782,12 @@ L_kyber_arm32_rej_uniform_fail_4: bfi r7, r6, #4, #8 #endif cmp r7, r8 - bge L_kyber_arm32_rej_uniform_fail_5 + bge L_mlkem_arm32_rej_uniform_fail_5 strh r7, [r0, r12] subs r1, r1, #1 add r12, r12, #2 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_fail_5: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_fail_5: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r7, r6, #12 lsr r7, r7, #20 @@ -8795,31 +8795,31 @@ L_kyber_arm32_rej_uniform_fail_5: ubfx r7, r6, #8, #12 #endif cmp r7, r8 - bge L_kyber_arm32_rej_uniform_fail_6 + bge L_mlkem_arm32_rej_uniform_fail_6 strh r7, [r0, r12] subs r1, r1, #1 add r12, r12, #2 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_fail_6: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_fail_6: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsr r7, r6, #20 #else ubfx r7, r6, #20, #12 #endif cmp r7, r8 - bge L_kyber_arm32_rej_uniform_fail_7 + bge L_mlkem_arm32_rej_uniform_fail_7 strh r7, [r0, r12] subs r1, r1, #1 add r12, r12, #2 - beq L_kyber_arm32_rej_uniform_done -L_kyber_arm32_rej_uniform_fail_7: + beq L_mlkem_arm32_rej_uniform_done +L_mlkem_arm32_rej_uniform_fail_7: subs r3, r3, #12 - bgt L_kyber_arm32_rej_uniform_loop -L_kyber_arm32_rej_uniform_done: + bgt L_mlkem_arm32_rej_uniform_loop +L_mlkem_arm32_rej_uniform_done: lsr r0, r12, #1 pop {r4, r5, r6, r7, r8, pc} - .size kyber_arm32_rej_uniform,.-kyber_arm32_rej_uniform -#endif /* WOLFSSL_WC_KYBER */ + .size mlkem_arm32_rej_uniform,.-mlkem_arm32_rej_uniform +#endif /* WOLFSSL_WC_MLKEM */ #endif /* !__aarch64__ && !WOLFSSL_ARMASM_THUMB2 */ #endif /* WOLFSSL_ARMASM */ diff --git a/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c similarity index 98% rename from wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c rename to wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c index e514604f8..b6aa589cc 100644 --- a/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c @@ -1,4 +1,4 @@ -/* armv8-32-kyber-asm +/* armv8-32-mlkem-asm * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -22,7 +22,7 @@ /* Generated using (from wolfssl): * cd ../scripts * ruby ./kyber/kyber.rb arm32 \ - * ../wolfssl/wolfcrypt/src/port/arm/armv8-32-kyber-asm.c + * ../wolfssl/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.c */ #ifdef HAVE_CONFIG_H @@ -49,10 +49,10 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ -#include +#include -#ifdef WOLFSSL_WC_KYBER -static const word16 L_kyber_arm32_ntt_zetas[] = { +#ifdef WOLFSSL_WC_MLKEM +static const word16 L_mlkem_arm32_ntt_zetas[] = { 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, 0x0c56, 0x026e, 0x0629, 0x00b6, @@ -87,11 +87,11 @@ static const word16 L_kyber_arm32_ntt_zetas[] = { 0x03be, 0x074d, 0x05f2, 0x065c, }; -void kyber_arm32_ntt(sword16* r_p) +void mlkem_arm32_ntt(sword16* r_p) { register sword16* r asm ("r0") = (sword16*)r_p; - register word16* L_kyber_arm32_ntt_zetas_c asm ("r1") = - (word16*)&L_kyber_arm32_ntt_zetas; + register word16* L_mlkem_arm32_ntt_zetas_c asm ("r1") = + (word16*)&L_mlkem_arm32_ntt_zetas; __asm__ __volatile__ ( "sub sp, sp, #8\n\t" @@ -111,7 +111,7 @@ void kyber_arm32_ntt(sword16* r_p) #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r2, #16\n\t" "\n" - "L_kyber_arm32_ntt_loop_123_%=: \n\t" + "L_mlkem_arm32_ntt_loop_123_%=: \n\t" "str r2, [sp]\n\t" "ldrh r11, [r1, #2]\n\t" "ldr r2, [%[r]]\n\t" @@ -1224,17 +1224,17 @@ void kyber_arm32_ntt(sword16* r_p) "ldr r2, [sp]\n\t" "subs r2, r2, #1\n\t" "add %[r], %[r], #4\n\t" - "bne L_kyber_arm32_ntt_loop_123_%=\n\t" + "bne L_mlkem_arm32_ntt_loop_123_%=\n\t" "sub %[r], %[r], #0x40\n\t" "mov r3, #0\n\t" "\n" - "L_kyber_arm32_ntt_loop_4_j_%=: \n\t" + "L_mlkem_arm32_ntt_loop_4_j_%=: \n\t" "str r3, [sp, #4]\n\t" "add r11, r1, r3, lsr #4\n\t" "mov r2, #4\n\t" "ldr r11, [r11, #16]\n\t" "\n" - "L_kyber_arm32_ntt_loop_4_i_%=: \n\t" + "L_mlkem_arm32_ntt_loop_4_i_%=: \n\t" "str r2, [sp]\n\t" "ldr r2, [%[r]]\n\t" "ldr r3, [%[r], #16]\n\t" @@ -1621,15 +1621,15 @@ void kyber_arm32_ntt(sword16* r_p) #endif "subs r2, r2, #1\n\t" "add %[r], %[r], #4\n\t" - "bne L_kyber_arm32_ntt_loop_4_i_%=\n\t" + "bne L_mlkem_arm32_ntt_loop_4_i_%=\n\t" "add r3, r3, #0x40\n\t" "rsbs r12, r3, #0x100\n\t" "add %[r], %[r], #0x70\n\t" - "bne L_kyber_arm32_ntt_loop_4_j_%=\n\t" + "bne L_mlkem_arm32_ntt_loop_4_j_%=\n\t" "sub %[r], %[r], #0x200\n\t" "mov r3, #0\n\t" "\n" - "L_kyber_arm32_ntt_loop_567_%=: \n\t" + "L_mlkem_arm32_ntt_loop_567_%=: \n\t" "add r11, r1, r3, lsr #3\n\t" "str r3, [sp, #4]\n\t" "ldrh r11, [r11, #32]\n\t" @@ -3078,17 +3078,17 @@ void kyber_arm32_ntt(sword16* r_p) "add r3, r3, #16\n\t" "rsbs r12, r3, #0x100\n\t" "add %[r], %[r], #32\n\t" - "bne L_kyber_arm32_ntt_loop_567_%=\n\t" + "bne L_mlkem_arm32_ntt_loop_567_%=\n\t" "add sp, sp, #8\n\t" : [r] "+r" (r), - [L_kyber_arm32_ntt_zetas] "+r" (L_kyber_arm32_ntt_zetas_c) + [L_mlkem_arm32_ntt_zetas] "+r" (L_mlkem_arm32_ntt_zetas_c) : : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } -static const word16 L_kyber_arm32_invntt_zetas_inv[] = { +static const word16 L_mlkem_invntt_zetas_inv[] = { 0x06a5, 0x070f, 0x05b4, 0x0943, 0x0922, 0x091d, 0x0134, 0x006c, 0x0b23, 0x0366, 0x0356, 0x05e6, @@ -3123,11 +3123,11 @@ static const word16 L_kyber_arm32_invntt_zetas_inv[] = { 0x05ed, 0x0167, 0x02f6, 0x05a1, }; -void kyber_arm32_invntt(sword16* r_p) +void mlkem_arm32_invntt(sword16* r_p) { register sword16* r asm ("r0") = (sword16*)r_p; - register word16* L_kyber_arm32_invntt_zetas_inv_c asm ("r1") = - (word16*)&L_kyber_arm32_invntt_zetas_inv; + register word16* L_mlkem_invntt_zetas_inv_c asm ("r1") = + (word16*)&L_mlkem_invntt_zetas_inv; __asm__ __volatile__ ( "sub sp, sp, #8\n\t" @@ -3147,7 +3147,7 @@ void kyber_arm32_invntt(sword16* r_p) #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r3, #0\n\t" "\n" - "L_kyber_arm32_invntt_loop_765_%=: \n\t" + "L_mlkem_invntt_loop_765_%=: \n\t" "add r11, r1, r3, lsr #1\n\t" "str r3, [sp, #4]\n\t" "ldr r2, [%[r]]\n\t" @@ -4744,17 +4744,17 @@ void kyber_arm32_invntt(sword16* r_p) "add r3, r3, #16\n\t" "rsbs r12, r3, #0x100\n\t" "add %[r], %[r], #32\n\t" - "bne L_kyber_arm32_invntt_loop_765_%=\n\t" + "bne L_mlkem_invntt_loop_765_%=\n\t" "sub %[r], %[r], #0x200\n\t" "mov r3, #0\n\t" "\n" - "L_kyber_arm32_invntt_loop_4_j_%=: \n\t" + "L_mlkem_invntt_loop_4_j_%=: \n\t" "str r3, [sp, #4]\n\t" "add r11, r1, r3, lsr #4\n\t" "mov r2, #4\n\t" "ldr r11, [r11, #224]\n\t" "\n" - "L_kyber_arm32_invntt_loop_4_i_%=: \n\t" + "L_mlkem_invntt_loop_4_i_%=: \n\t" "str r2, [sp]\n\t" "ldr r2, [%[r]]\n\t" "ldr r3, [%[r], #16]\n\t" @@ -5245,15 +5245,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "subs r2, r2, #1\n\t" "add %[r], %[r], #4\n\t" - "bne L_kyber_arm32_invntt_loop_4_i_%=\n\t" + "bne L_mlkem_invntt_loop_4_i_%=\n\t" "add r3, r3, #0x40\n\t" "rsbs r12, r3, #0x100\n\t" "add %[r], %[r], #0x70\n\t" - "bne L_kyber_arm32_invntt_loop_4_j_%=\n\t" + "bne L_mlkem_invntt_loop_4_j_%=\n\t" "sub %[r], %[r], #0x200\n\t" "mov r2, #16\n\t" "\n" - "L_kyber_arm32_invntt_loop_321_%=: \n\t" + "L_mlkem_invntt_loop_321_%=: \n\t" "str r2, [sp]\n\t" "ldrh r11, [r1, #2]\n\t" "ldr r2, [%[r]]\n\t" @@ -7508,17 +7508,17 @@ void kyber_arm32_invntt(sword16* r_p) "ldr r2, [sp]\n\t" "subs r2, r2, #1\n\t" "add %[r], %[r], #4\n\t" - "bne L_kyber_arm32_invntt_loop_321_%=\n\t" + "bne L_mlkem_invntt_loop_321_%=\n\t" "add sp, sp, #8\n\t" : [r] "+r" (r), - [L_kyber_arm32_invntt_zetas_inv] "+r" (L_kyber_arm32_invntt_zetas_inv_c) + [L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c) : : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } -static const word16 L_kyber_arm32_basemul_mont_zetas[] = { +static const word16 L_mlkem_basemul_mont_zetas[] = { 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, 0x0c56, 0x026e, 0x0629, 0x00b6, @@ -7553,14 +7553,14 @@ static const word16 L_kyber_arm32_basemul_mont_zetas[] = { 0x03be, 0x074d, 0x05f2, 0x065c, }; -void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, +void mlkem_arm32_basemul_mont(sword16* r_p, const sword16* a_p, const sword16* b_p) { register sword16* r asm ("r0") = (sword16*)r_p; register const sword16* a asm ("r1") = (const sword16*)a_p; register const sword16* b asm ("r2") = (const sword16*)b_p; - register word16* L_kyber_arm32_basemul_mont_zetas_c asm ("r3") = - (word16*)&L_kyber_arm32_basemul_mont_zetas; + register word16* L_mlkem_basemul_mont_zetas_c asm ("r3") = + (word16*)&L_mlkem_basemul_mont_zetas; __asm__ __volatile__ ( "add r3, r3, #0x80\n\t" @@ -7580,7 +7580,7 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r8, #0\n\t" "\n" - "L_kyber_arm32_basemul_mont_loop_%=: \n\t" + "L_mlkem_basemul_mont_loop_%=: \n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "ldr lr, [r3, r8]\n\t" @@ -7832,23 +7832,23 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "stm %[r]!, {r4, r5}\n\t" "pop {r8}\n\t" - "bne L_kyber_arm32_basemul_mont_loop_%=\n\t" + "bne L_mlkem_basemul_mont_loop_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), - [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } -void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, +void mlkem_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, const sword16* b_p) { register sword16* r asm ("r0") = (sword16*)r_p; register const sword16* a asm ("r1") = (const sword16*)a_p; register const sword16* b asm ("r2") = (const sword16*)b_p; - register word16* L_kyber_arm32_basemul_mont_zetas_c asm ("r3") = - (word16*)&L_kyber_arm32_basemul_mont_zetas; + register word16* L_mlkem_basemul_mont_zetas_c asm ("r3") = + (word16*)&L_mlkem_basemul_mont_zetas; __asm__ __volatile__ ( "add r3, r3, #0x80\n\t" @@ -7868,7 +7868,7 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r8, #0\n\t" "\n" - "L_kyber_arm32_basemul_mont_add_loop_%=: \n\t" + "L_mlkem_arm32_basemul_mont_add_loop_%=: \n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "ldr lr, [r3, r8]\n\t" @@ -8154,20 +8154,20 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "stm %[r]!, {r4, r5}\n\t" "pop {r8}\n\t" - "bne L_kyber_arm32_basemul_mont_add_loop_%=\n\t" + "bne L_mlkem_arm32_basemul_mont_add_loop_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), - [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } -void kyber_arm32_csubq(sword16* p_p) +void mlkem_arm32_csubq(sword16* p_p) { register sword16* p asm ("r0") = (sword16*)p_p; - register word16* L_kyber_arm32_basemul_mont_zetas_c asm ("r1") = - (word16*)&L_kyber_arm32_basemul_mont_zetas; + register word16* L_mlkem_basemul_mont_zetas_c asm ("r1") = + (word16*)&L_mlkem_basemul_mont_zetas; __asm__ __volatile__ ( #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -8198,7 +8198,7 @@ void kyber_arm32_csubq(sword16* p_p) #endif "mov r1, #0x100\n\t" "\n" - "L_kyber_arm32_csubq_loop_%=: \n\t" + "L_mlkem_arm32_csubq_loop_%=: \n\t" "ldm %[p], {r2, r3, r4, r5}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r2, r2, lr\n\t" @@ -8333,24 +8333,24 @@ void kyber_arm32_csubq(sword16* p_p) #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "stm %[p]!, {r2, r3, r4, r5}\n\t" "subs r1, r1, #8\n\t" - "bne L_kyber_arm32_csubq_loop_%=\n\t" + "bne L_mlkem_arm32_csubq_loop_%=\n\t" : [p] "+r" (p), - [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } -unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, +unsigned int mlkem_arm32_rej_uniform(sword16* p_p, unsigned int len_p, const byte* r_p, unsigned int rLen_p) { register sword16* p asm ("r0") = (sword16*)p_p; register unsigned int len asm ("r1") = (unsigned int)len_p; register const byte* r asm ("r2") = (const byte*)r_p; register unsigned int rLen asm ("r3") = (unsigned int)rLen_p; - register word16* L_kyber_arm32_basemul_mont_zetas_c asm ("r4") = - (word16*)&L_kyber_arm32_basemul_mont_zetas; + register word16* L_mlkem_basemul_mont_zetas_c asm ("r4") = + (word16*)&L_mlkem_basemul_mont_zetas; __asm__ __volatile__ ( #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) @@ -8361,9 +8361,9 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, #endif "mov r12, #0\n\t" "\n" - "L_kyber_arm32_rej_uniform_loop_no_fail_%=: \n\t" + "L_mlkem_arm32_rej_uniform_loop_no_fail_%=: \n\t" "cmp %[len], #8\n\t" - "blt L_kyber_arm32_rej_uniform_done_no_fail_%=\n\t" + "blt L_mlkem_arm32_rej_uniform_done_no_fail_%=\n\t" "ldm %[r]!, {r4, r5, r6}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r4, #20\n\t" @@ -8467,14 +8467,14 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "sub %[len], %[len], lr\n\t" "add r12, r12, lr, lsl #1\n\t" "subs %[rLen], %[rLen], #12\n\t" - "bne L_kyber_arm32_rej_uniform_loop_no_fail_%=\n\t" - "b L_kyber_arm32_rej_uniform_done_%=\n\t" + "bne L_mlkem_arm32_rej_uniform_loop_no_fail_%=\n\t" + "b L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_done_no_fail_%=: \n\t" + "L_mlkem_arm32_rej_uniform_done_no_fail_%=: \n\t" "cmp %[len], #0\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_loop_%=: \n\t" + "L_mlkem_arm32_rej_uniform_loop_%=: \n\t" "ldm %[r]!, {r4, r5, r6}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r4, #20\n\t" @@ -8483,13 +8483,13 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "ubfx r7, r4, #0, #12\n\t" #endif "cmp r7, r8\n\t" - "bge L_kyber_arm32_rej_uniform_fail_0_%=\n\t" + "bge L_mlkem_arm32_rej_uniform_fail_0_%=\n\t" "strh r7, [%[p], r12]\n\t" "subs %[len], %[len], #1\n\t" "add r12, r12, #2\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_fail_0_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_0_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r4, #8\n\t" "lsr r7, r7, #20\n\t" @@ -8497,13 +8497,13 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "ubfx r7, r4, #12, #12\n\t" #endif "cmp r7, r8\n\t" - "bge L_kyber_arm32_rej_uniform_fail_1_%=\n\t" + "bge L_mlkem_arm32_rej_uniform_fail_1_%=\n\t" "strh r7, [%[p], r12]\n\t" "subs %[len], %[len], #1\n\t" "add r12, r12, #2\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_fail_1_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_1_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r7, r4, #24\n\t" #else @@ -8518,13 +8518,13 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "bfi r7, r5, #8, #4\n\t" #endif "cmp r7, r8\n\t" - "bge L_kyber_arm32_rej_uniform_fail_2_%=\n\t" + "bge L_mlkem_arm32_rej_uniform_fail_2_%=\n\t" "strh r7, [%[p], r12]\n\t" "subs %[len], %[len], #1\n\t" "add r12, r12, #2\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_fail_2_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_2_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r5, #16\n\t" "lsr r7, r7, #20\n\t" @@ -8532,13 +8532,13 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "ubfx r7, r5, #4, #12\n\t" #endif "cmp r7, r8\n\t" - "bge L_kyber_arm32_rej_uniform_fail_3_%=\n\t" + "bge L_mlkem_arm32_rej_uniform_fail_3_%=\n\t" "strh r7, [%[p], r12]\n\t" "subs %[len], %[len], #1\n\t" "add r12, r12, #2\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_fail_3_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_3_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r5, #4\n\t" "lsr r7, r7, #20\n\t" @@ -8546,13 +8546,13 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "ubfx r7, r5, #16, #12\n\t" #endif "cmp r7, r8\n\t" - "bge L_kyber_arm32_rej_uniform_fail_4_%=\n\t" + "bge L_mlkem_arm32_rej_uniform_fail_4_%=\n\t" "strh r7, [%[p], r12]\n\t" "subs %[len], %[len], #1\n\t" "add r12, r12, #2\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_fail_4_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_4_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r7, r5, #28\n\t" #else @@ -8567,13 +8567,13 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "bfi r7, r6, #4, #8\n\t" #endif "cmp r7, r8\n\t" - "bge L_kyber_arm32_rej_uniform_fail_5_%=\n\t" + "bge L_mlkem_arm32_rej_uniform_fail_5_%=\n\t" "strh r7, [%[p], r12]\n\t" "subs %[len], %[len], #1\n\t" "add r12, r12, #2\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_fail_5_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_5_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r6, #12\n\t" "lsr r7, r7, #20\n\t" @@ -8581,40 +8581,40 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "ubfx r7, r6, #8, #12\n\t" #endif "cmp r7, r8\n\t" - "bge L_kyber_arm32_rej_uniform_fail_6_%=\n\t" + "bge L_mlkem_arm32_rej_uniform_fail_6_%=\n\t" "strh r7, [%[p], r12]\n\t" "subs %[len], %[len], #1\n\t" "add r12, r12, #2\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_fail_6_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_6_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r7, r6, #20\n\t" #else "ubfx r7, r6, #20, #12\n\t" #endif "cmp r7, r8\n\t" - "bge L_kyber_arm32_rej_uniform_fail_7_%=\n\t" + "bge L_mlkem_arm32_rej_uniform_fail_7_%=\n\t" "strh r7, [%[p], r12]\n\t" "subs %[len], %[len], #1\n\t" "add r12, r12, #2\n\t" - "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_fail_7_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_7_%=: \n\t" "subs %[rLen], %[rLen], #12\n\t" - "bgt L_kyber_arm32_rej_uniform_loop_%=\n\t" + "bgt L_mlkem_arm32_rej_uniform_loop_%=\n\t" "\n" - "L_kyber_arm32_rej_uniform_done_%=: \n\t" + "L_mlkem_arm32_rej_uniform_done_%=: \n\t" "lsr r0, r12, #1\n\t" : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen), - [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8" ); return (word32)(size_t)p; } -#endif /* WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_WC_MLKEM */ #endif /* !__aarch64__ && !WOLFSSL_ARMASM_THUMB2 */ #endif /* WOLFSSL_ARMASM */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index bb7f1f86d..1ec907402 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -72,7 +72,8 @@ static const word32 L_SHA256_transform_len_k[] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, }; -void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); +void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, + word32 len_p); void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) { register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; @@ -1757,7 +1758,8 @@ static const word32 L_SHA256_transform_neon_len_k[] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, }; -void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); +void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, + word32 len_p); void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) { register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index 007c81ff7..ab02ae1fd 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -96,7 +96,8 @@ static const word64 L_SHA512_transform_len_k[] = { 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; -void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p); +void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, + word32 len_p); void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) { register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; @@ -7573,7 +7574,8 @@ static const word64 L_SHA512_transform_neon_len_k[] = { 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; -void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p); +void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, + word32 len_p); void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) { register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index a36af7ad8..ad4785933 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -24018,7 +24018,7 @@ static void AesGcmInit_C(Aes* aes, const byte* iv, word32 ivSz) /* Counter is IV with bottom 4 bytes set to: 0x00,0x00,0x00,0x01. */ XMEMCPY(counter, iv, ivSz); XMEMSET(counter + GCM_NONCE_MID_SZ, 0, - WC_AES_BLOCK_SIZE - GCM_NONCE_MID_SZ - 1); + WC_AES_BLOCK_SIZE - GCM_NONCE_MID_SZ - 1); counter[WC_AES_BLOCK_SIZE - 1] = 1; } else { @@ -25001,8 +25001,8 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) * in input plain text buffer to encrypt * sz size of both out and in buffers * i value to use for tweak - * iSz size of i buffer, should always be WC_AES_BLOCK_SIZE but having this input - * adds a sanity check on how the user calls the function. + * iSz size of i buffer, should always be WC_AES_BLOCK_SIZE but having this + * input adds a sanity check on how the user calls the function. * * returns 0 on success */ @@ -25321,8 +25321,8 @@ void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, * in input cipher text buffer to decrypt * sz size of both out and in buffers * i value to use for tweak - * iSz size of i buffer, should always be WC_AES_BLOCK_SIZE but having this input - * adds a sanity check on how the user calls the function. + * iSz size of i buffer, should always be WC_AES_BLOCK_SIZE but having this + * input adds a sanity check on how the user calls the function. * * returns 0 on success */ @@ -25842,8 +25842,8 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, * in input plain text buffer to encrypt * sz size of both out and in buffers * i value to use for tweak - * iSz size of i buffer, should always be WC_AES_BLOCK_SIZE but having this input - * adds a sanity check on how the user calls the function. + * iSz size of i buffer, should always be WC_AES_BLOCK_SIZE but having this + * input adds a sanity check on how the user calls the function. * * returns 0 on success */ @@ -25976,8 +25976,8 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, * in input cipher text buffer to decrypt * sz size of both out and in buffers * i value to use for tweak - * iSz size of i buffer, should always be WC_AES_BLOCK_SIZE but having this input - * adds a sanity check on how the user calls the function. + * iSz size of i buffer, should always be WC_AES_BLOCK_SIZE but having this + * input adds a sanity check on how the user calls the function. * * returns 0 on success */ @@ -26152,7 +26152,8 @@ extern void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, extern void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); #if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) -/* in pre-C2x C, constness conflicts for dimensioned arrays can't be resolved. */ +/* in pre-C2x C, constness conflicts for dimensioned arrays can't be resolved. + */ extern void GCM_gmult_len(byte* x, /* const */ byte m[32][WC_AES_BLOCK_SIZE], const unsigned char* data, unsigned long len); #endif @@ -26165,7 +26166,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, { #if defined(AES_MAX_KEY_SIZE) const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); - word32 userKey_aligned[AES_MAX_KEY_SIZE / WOLFSSL_BIT_SIZE / sizeof(word32)]; + word32 userKey_aligned[AES_MAX_KEY_SIZE/WOLFSSL_BIT_SIZE/sizeof(word32)]; #endif if (((keylen != 16) && (keylen != 24) && (keylen != 32)) || @@ -26206,7 +26207,8 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, #if defined(AES_MAX_KEY_SIZE) if ((unsigned long)userKey & (sizeof(aes->key[0]) - 1U)) { XMEMCPY(userKey_aligned, userKey, keylen); - AES_set_encrypt_key((byte *)userKey_aligned, keylen * 8, (byte*)aes->key); + AES_set_encrypt_key((byte *)userKey_aligned, keylen * 8, + (byte*)aes->key); } else #endif @@ -26462,8 +26464,8 @@ int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) byte zeros[WC_AES_BLOCK_SIZE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - AES_CTR_encrypt(zeros, (byte*)aes->tmp, WC_AES_BLOCK_SIZE, (byte*)aes->key, - aes->rounds, (byte*)aes->reg); + AES_CTR_encrypt(zeros, (byte*)aes->tmp, WC_AES_BLOCK_SIZE, + (byte*)aes->key, aes->rounds, (byte*)aes->reg); aes->left = WC_AES_BLOCK_SIZE; tmp = (byte*)aes->tmp; @@ -27027,7 +27029,8 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, initialCounter[WC_AES_BLOCK_SIZE - 1] = 1; } else { - gcm_ghash_arm32(aes, NULL, 0, iv, ivSz, initialCounter, WC_AES_BLOCK_SIZE); + gcm_ghash_arm32(aes, NULL, 0, iv, ivSz, initialCounter, + WC_AES_BLOCK_SIZE); } XMEMCPY(counter, initialCounter, WC_AES_BLOCK_SIZE); @@ -27129,7 +27132,8 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, initialCounter[WC_AES_BLOCK_SIZE - 1] = 1; } else { - gcm_ghash_arm32(aes, NULL, 0, iv, ivSz, initialCounter, WC_AES_BLOCK_SIZE); + gcm_ghash_arm32(aes, NULL, 0, iv, ivSz, initialCounter, + WC_AES_BLOCK_SIZE); } XMEMCPY(counter, initialCounter, WC_AES_BLOCK_SIZE); diff --git a/wolfcrypt/src/port/arm/armv8-chacha.c b/wolfcrypt/src/port/arm/armv8-chacha.c index afb62c4fb..9836572cc 100644 --- a/wolfcrypt/src/port/arm/armv8-chacha.c +++ b/wolfcrypt/src/port/arm/armv8-chacha.c @@ -19,7 +19,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ -/* The paper NEON crypto by Daniel J. Bernstein and Peter Schwabe was used to optimize for ARM +/* The paper NEON crypto by Daniel J. Bernstein and Peter Schwabe was used to + * optimize for ARM * https://cryptojedi.org/papers/neoncrypto-20120320.pdf */ @@ -121,7 +122,7 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* iv, word32 counter) ctx->left = 0; ctx->X[CHACHA_IV_BYTES+0] = counter; /* block counter */ - ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed variable from nonce */ + ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed var from nonce */ ctx->X[CHACHA_IV_BYTES+2] = LITTLE32(temp[1]); /* counter from nonce */ ctx->X[CHACHA_IV_BYTES+3] = LITTLE32(temp[2]); /* counter from nonce */ @@ -246,7 +247,7 @@ static const word32 L_chacha20_neon_inc_first_word[] = { #ifdef __aarch64__ -static const word32 L_chacha20_neon_add_all_counters[] = { +static const word32 L_chacha20_neon_add_all_cntrs[] = { 0x0, 0x1, 0x2, @@ -260,7 +261,8 @@ static const word32 L_chacha20_neon_rol8[] = { 0xe0d0c0f, }; -static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m, byte* c, word32 bytes) +static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m, + byte* c, word32 bytes) { #ifdef CHACHA_TEST printf("Entering wc_Chacha_encrypt_320 with %d bytes\n", bytes); @@ -270,22 +272,26 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m, /* * The layout of used registers is: * ARM - * w4-w19: these registers hold the fifth Chacha block for calculation in regular ARM + * w4-w19: these registers hold the fifth Chacha block for calculation + * in regular ARM * w20: loop counter for how many even-odd rounds need to be executed * w21: the counter offset for the block in ARM registers - * NEON - * v0-v15: the vi'th register holds the i'th word of four blocks during the quarter rounds. - * these registers are later transposed make ADDing the input and XORing the message easier. - * v16-v19: these are helper registers that are used as temporary location to store data + * NEON + * v0-v15: the vi'th register holds the i'th word of four blocks during + * the quarter rounds. these registers are later transposed make + * ADDing the input and XORing the message easier. + * v16-v19: these are helper registers that are used as temporary + * location to store data * v20-v23: load the next message block * v24-v27: the 64 byte initial Chacha block * v28: vector to increment the counter words of each block - * v29: vector of 5's to increment counters between L_chacha20_arm64_outer_%= loops + * v29: vector of 5's to increment counters between + * L_chacha20_arm64_outer_%= loops * v30: table lookup indices to rotate values by 8 */ /* Load counter-add values for each block */ - "LD1 {v28.4s}, [%[L_chacha20_neon_add_all_counters]] \n\t" + "LD1 {v28.4s}, [%[L_chacha20_neon_add_all_cntrs]] \n\t" /* Load index look-up for rotating left 8 bits */ "LD1 {v30.16b}, [%[L_chacha20_neon_rol8]] \n\t" /* For adding 5 to each counter-add for next 320-byte chunk */ @@ -669,7 +675,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m, "BNE L_chacha20_arm64_outer_%= \n\t" : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes64) - : [L_chacha20_neon_add_all_counters] "r" (L_chacha20_neon_add_all_counters), + : [L_chacha20_neon_add_all_cntrs] "r" (L_chacha20_neon_add_all_cntrs), [L_chacha20_neon_rol8] "r" (L_chacha20_neon_rol8) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", @@ -684,7 +690,8 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m, /** * Converts word into bytes with rotations having been done. */ -static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS], const byte* m, byte* c) +static WC_INLINE int wc_Chacha_encrypt_256( + const word32 input[CHACHA_CHUNK_WORDS], const byte* m, byte* c) { #ifdef CHACHA_TEST printf("Entering wc_Chacha_encrypt_256\n"); @@ -1035,7 +1042,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS ); #else __asm__ __volatile__ ( - // The paper NEON crypto by Daniel J. Bernstein and Peter Schwabe was used to optimize for ARM + // The paper NEON crypto by Daniel J. Bernstein and Peter Schwabe was + // used to optimize for ARM // https://cryptojedi.org/papers/neoncrypto-20120320.pdf ".align 2 \n\t" @@ -1087,7 +1095,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "VMOV d14, r10, r11 \n\t" "ADD r10, r10, #1 \n\t" "VMOV d22, r10, r11 \n\t" - "ADD r10, r10, #1 \n\t" // ARM calculates the fourth block (two was already added earlier) + "ADD r10, r10, #1 \n\t" // ARM calculates the fourth block (two was + // already added earlier) "\n" "L_chacha20_arm32_256_loop_%=: \n\t" "SUBS r14, r14, #1 \n\t" @@ -1109,7 +1118,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "ROR r11, r11, #16 \n\t" // 13 13 "VEOR q14, q11, q8 \n\t" "ADD r8, r8, r10 \n\t" // 8 8 12 - // rotation by 16 bits may be done by reversing the 16 bit elements in 32 bit words + // rotation by 16 bits may be done by reversing the 16 bit elements in + // 32 bit words "VREV32.16 q3, q12 \n\t" "ADD r9, r9, r11 \n\t" // 9 9 13 "VREV32.16 q7, q13 \n\t" @@ -1129,7 +1139,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "EOR r10, r10, r0 \n\t" // 12 12 0 "VEOR q14, q9, q10 \n\t" "EOR r11, r11, r1 \n\t" // 13 13 1 - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q1, q12, #12 \n\t" "ROR r10, r10, #24 \n\t" // 12 12 "VSHL.I32 q5, q13, #12 \n\t" @@ -1155,7 +1166,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "STR r9, [sp, #4*9] \n\t" "VEOR q14, q11, q8 \n\t" "LDR r9, [sp, #4*11] \n\t" - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q3, q12, #8 \n\t" "ROR r4, r4, #25 \n\t" // 4 4 "VSHL.I32 q7, q13, #8 \n\t" @@ -1188,7 +1200,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "EOR r6, r6, r8 \n\t" // 6 6 10 "VEOR q14, q9, q10 \n\t" "EOR r7, r7, r9 \n\t" // 7 7 11 - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q1, q12, #7 \n\t" "ROR r6, r6, #20 \n\t" // 6 6 "VSHL.I32 q5, q13, #7 \n\t" @@ -1240,7 +1253,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "ROR r10, r10, #16 \n\t" // 12 12 "VEOR q14, q11, q8 \n\t" "ADD r8, r8, r11 \n\t" // 10 10 15 - // rotation by 16 bits may be done by reversing the 16 bit elements in 32 bit words + // rotation by 16 bits may be done by reversing the 16 bit elements in + // 32 bit words "VREV32.16 q3, q12 \n\t" "ADD r9, r9, r10 \n\t" // 11 11 12 "VREV32.16 q7, q13 \n\t" @@ -1260,7 +1274,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "EOR r11, r11, r0 \n\t" // 15 15 0 "VEOR q14, q9, q10 \n\t" "EOR r10, r10, r1 \n\t" // 12 12 1 - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q1, q12, #12 \n\t" "ROR r11, r11, #24 \n\t" // 15 15 "VSHL.I32 q5, q13, #12 \n\t" @@ -1286,7 +1301,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "STR r9, [sp, #4*11] \n\t" "VEOR q14, q11, q8 \n\t" "LDR r9, [sp, #4*9] \n\t" - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q3, q12, #8 \n\t" "ROR r5, r5, #25 \n\t" // 5 5 "VSHL.I32 q7, q13, #8 \n\t" @@ -1319,7 +1335,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS "EOR r7, r7, r8 \n\t" // 7 7 8 "VEOR q14, q9, q10 \n\t" "EOR r4, r4, r9 \n\t" // 4 4 9 - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q1, q12, #7 \n\t" "ROR r7, r7, #20 \n\t" // 7 7 "VSHL.I32 q5, q13, #7 \n\t" @@ -1461,7 +1478,8 @@ static WC_INLINE int wc_Chacha_encrypt_256(const word32 input[CHACHA_CHUNK_WORDS } -static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS], const byte* m, byte* c) +static WC_INLINE int wc_Chacha_encrypt_128( + const word32 input[CHACHA_CHUNK_WORDS], const byte* m, byte* c) { #ifdef CHACHA_TEST printf("Entering wc_Chacha_encrypt_128\n"); @@ -1626,7 +1644,8 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS "VADD.I32 q4, q4, q5 \n\t" "VEOR q8, q3, q0 \n\t" "VEOR q9, q7, q4 \n\t" - // rotation by 16 bits may be done by reversing the 16 bit elements in 32 bit words + // rotation by 16 bits may be done by reversing the 16 bit elements in + // 32 bit words "VREV32.16 q3, q8 \n\t" "VREV32.16 q7, q9 \n\t" @@ -1634,7 +1653,8 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS "VADD.I32 q6, q6, q7 \n\t" "VEOR q8, q1, q2 \n\t" "VEOR q9, q5, q6 \n\t" - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q1, q8, #12 \n\t" "VSHL.I32 q5, q9, #12 \n\t" "VSRI.I32 q1, q8, #20 \n\t" @@ -1644,7 +1664,8 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS "VADD.I32 q4, q4, q5 \n\t" "VEOR q8, q3, q0 \n\t" "VEOR q9, q7, q4 \n\t" - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q3, q8, #8 \n\t" "VSHL.I32 q7, q9, #8 \n\t" "VSRI.I32 q3, q8, #24 \n\t" @@ -1654,7 +1675,8 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS "VADD.I32 q6, q6, q7 \n\t" "VEOR q8, q1, q2 \n\t" "VEOR q9, q5, q6 \n\t" - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q1, q8, #7 \n\t" "VSHL.I32 q5, q9, #7 \n\t" "VSRI.I32 q1, q8, #25 \n\t" @@ -1674,7 +1696,8 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS "VADD.I32 q4, q4, q5 \n\t" "VEOR q8, q3, q0 \n\t" "VEOR q9, q7, q4 \n\t" - // rotation by 16 bits may be done by reversing the 16 bit elements in 32 bit words + // rotation by 16 bits may be done by reversing the 16 bit elements in + // 32 bit words "VREV32.16 q3, q8 \n\t" "VREV32.16 q7, q9 \n\t" @@ -1682,7 +1705,8 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS "VADD.I32 q6, q6, q7 \n\t" "VEOR q8, q1, q2 \n\t" "VEOR q9, q5, q6 \n\t" - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q1, q8, #12 \n\t" "VSHL.I32 q5, q9, #12 \n\t" "VSRI.I32 q1, q8, #20 \n\t" @@ -1692,7 +1716,8 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS "VADD.I32 q4, q4, q5 \n\t" "VEOR q8, q3, q0 \n\t" "VEOR q9, q7, q4 \n\t" - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q3, q8, #8 \n\t" "VSHL.I32 q7, q9, #8 \n\t" "VSRI.I32 q3, q8, #24 \n\t" @@ -1702,7 +1727,8 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS "VADD.I32 q6, q6, q7 \n\t" "VEOR q8, q1, q2 \n\t" "VEOR q9, q5, q6 \n\t" - // SIMD instructions don't support rotation so we have to cheat using shifts and a help register + // SIMD instructions don't support rotation so we have to cheat using + // shifts and a help register "VSHL.I32 q1, q8, #7 \n\t" "VSHL.I32 q5, q9, #7 \n\t" "VSRI.I32 q1, q8, #25 \n\t" @@ -2288,7 +2314,8 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "BGT L_chacha20_arm64_64_loop_lt_8_%= \n\t" "\n" "L_chacha20_arm64_64_done_%=: \n\t" - : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes64) + : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), + [bytes] "+r" (bytes64) : [L_chacha20_neon_rol8] "r" (L_chacha20_neon_rol8), [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word), [over] "r" (over) @@ -2875,7 +2902,8 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, : [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word), [over] "r" (over) : "memory", "cc", - "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q14", "r12", "r14" + "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", + "q11", "q14", "r12", "r14" ); #endif /* __aarch64__ */ } @@ -2897,7 +2925,8 @@ static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c, bytes -= processed; c += processed; m += processed; - ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], processed / CHACHA_CHUNK_BYTES); + ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], + processed / CHACHA_CHUNK_BYTES); } if (bytes >= CHACHA_CHUNK_BYTES * 4) { #else @@ -2908,7 +2937,8 @@ static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c, bytes -= processed; c += processed; m += processed; - ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], processed / CHACHA_CHUNK_BYTES); + ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], + processed / CHACHA_CHUNK_BYTES); } if (bytes >= CHACHA_CHUNK_BYTES * 2) { processed = wc_Chacha_encrypt_128(ctx->X, m, c); @@ -2916,7 +2946,8 @@ static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c, bytes -= processed; c += processed; m += processed; - ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], processed / CHACHA_CHUNK_BYTES); + ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], + processed / CHACHA_CHUNK_BYTES); } if (bytes > 0) { wc_Chacha_encrypt_64(ctx->X, m, c, bytes, (byte*)ctx->over); diff --git a/wolfcrypt/src/port/arm/armv8-curve25519.S b/wolfcrypt/src/port/arm/armv8-curve25519.S index fdfcbec32..876bb28b2 100644 --- a/wolfcrypt/src/port/arm/armv8-curve25519.S +++ b/wolfcrypt/src/port/arm/armv8-curve25519.S @@ -26,7 +26,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./x25519/x25519.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S + * ruby ./x25519/x25519.rb arm64 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ diff --git a/wolfcrypt/src/port/arm/armv8-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-curve25519_c.c index 55023b26b..459352ea2 100644 --- a/wolfcrypt/src/port/arm/armv8-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-curve25519_c.c @@ -27,7 +27,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./x25519/x25519.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.c + * ruby ./x25519/x25519.rb arm64 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.c */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ @@ -61,7 +62,7 @@ void fe_frombytes(fe out, const unsigned char* in) "stp x4, x5, [%x[out], #16]\n\t" : [out] "+r" (out), [in] "+r" (in) : - : "memory", "x2", "x3", "x4", "x5", "x6", "cc" + : "memory", "cc", "x2", "x3", "x4", "x5", "x6" ); } @@ -85,7 +86,7 @@ void fe_tobytes(unsigned char* out, const fe n) "stp x4, x5, [%x[out], #16]\n\t" : [out] "+r" (out), [n] "+r" (n) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "cc" + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7" ); } @@ -98,7 +99,7 @@ void fe_1(fe n) "stp xzr, xzr, [%x[n], #16]\n\t" : [n] "+r" (n) : - : "memory", "x1", "cc" + : "memory", "cc", "x1" ); } @@ -124,7 +125,7 @@ void fe_copy(fe r, const fe a) "stp x4, x5, [%x[r], #16]\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "x2", "x3", "x4", "x5", "cc" + : "memory", "cc", "x2", "x3", "x4", "x5" ); } @@ -155,7 +156,8 @@ void fe_sub(fe r, const fe a, const fe b) "stp x5, x6, [%x[r], #16]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13" ); } @@ -186,7 +188,8 @@ void fe_add(fe r, const fe a, const fe b) "stp x5, x6, [%x[r], #16]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13" ); } @@ -207,7 +210,7 @@ void fe_neg(fe r, const fe a) "stp x8, x9, [%x[r], #16]\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "cc" + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" ); } @@ -232,7 +235,7 @@ int fe_isnonzero(const fe a) "orr %x[a], %x[a], x3\n\t" : [a] "+r" (a) : - : "memory", "x1", "x2", "x3", "x4", "x5", "x6", "cc" + : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6" ); return (word32)(size_t)a; } @@ -251,7 +254,7 @@ int fe_isnegative(const fe a) "eor %x[a], %x[a], x5, lsr 63\n\t" : [a] "+r" (a) : - : "memory", "x1", "x2", "x3", "x4", "x5", "x6", "cc" + : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6" ); return (word32)(size_t)a; } @@ -466,7 +469,9 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "ldp x29, x30, [sp], #32\n\t" : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); } @@ -602,7 +607,9 @@ void fe_mul(fe r, const fe a, const fe b) "stp x8, x9, [%x[r], #16]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22" ); } @@ -704,7 +711,8 @@ void fe_sq(fe r, const fe a) "stp x7, x8, [%x[r], #16]\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "cc" + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16" ); } @@ -1581,7 +1589,8 @@ void fe_invert(fe r, const fe a) "ldp x29, x30, [sp], #0xa0\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "x2", "x20", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "cc" + : "memory", "cc", "x2", "x20", "x3", "x4", "x5", "x6", "x7", "x8", "x9", + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17" ); } @@ -3681,7 +3690,9 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x29, x30, [sp], #0xc0\n\t" : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); return (word32)(size_t)r; } @@ -4491,7 +4502,8 @@ void fe_pow22523(fe r, const fe a) "ldp x29, x30, [sp], #0x80\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "x2", "x23", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "cc" + : "memory", "cc", "x2", "x23", "x3", "x4", "x5", "x6", "x7", "x8", "x9", + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17" ); } @@ -4861,7 +4873,9 @@ void ge_p1p1_to_p2(ge_p2* r, const ge_p1p1* p) "ldp x29, x30, [sp], #32\n\t" : [r] "+r" (r), [p] "+r" (p) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc" + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22" ); } @@ -5347,7 +5361,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p) "ldp x29, x30, [sp], #32\n\t" : [r] "+r" (r), [p] "+r" (p) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "cc" + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26" ); } @@ -5808,7 +5824,9 @@ void ge_p2_dbl(ge_p1p1* r, const ge_p2* p) "ldp x29, x30, [sp], #32\n\t" : [r] "+r" (r), [p] "+r" (p) : - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); } @@ -6304,7 +6322,9 @@ void ge_madd(ge_p1p1* r, const ge_p3* p, const ge_precomp* q) "ldp x29, x30, [sp], #48\n\t" : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); } @@ -6800,7 +6820,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q) "ldp x29, x30, [sp], #48\n\t" : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); } @@ -7422,7 +7444,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q) "ldp x29, x30, [sp], #48\n\t" : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); } @@ -8059,7 +8083,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q) "ldp x29, x30, [sp], #48\n\t" : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); } @@ -8242,7 +8268,9 @@ void sc_reduce(byte* s) "stp x4, x5, [%x[s], #16]\n\t" : [s] "+r" (s) : - : "memory", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "cc" + : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", + "x20", "x21", "x22", "x23" ); } @@ -8521,7 +8549,9 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "stp x6, x7, [%x[s], #16]\n\t" : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "cc" + : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", + "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", + "x22", "x23", "x24", "x25", "x26" ); } diff --git a/wolfcrypt/src/port/arm/armv8-kyber-asm.S b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S similarity index 96% rename from wolfcrypt/src/port/arm/armv8-kyber-asm.S rename to wolfcrypt/src/port/arm/armv8-mlkem-asm.S index dee9a168a..b67aca38e 100644 --- a/wolfcrypt/src/port/arm/armv8-kyber-asm.S +++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S @@ -1,4 +1,4 @@ -/* armv8-kyber-asm +/* armv8-mlkem-asm * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -26,16 +26,17 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./kyber/kyber.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-kyber-asm.S + * ruby ./kyber/kyber.rb arm64 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-mlkem-asm.S */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ #ifndef WOLFSSL_ARMASM_INLINE #ifndef __APPLE__ .text - .type L_kyber_aarch64_q, %object + .type L_mlkem_aarch64_q, %object .section .rodata - .size L_kyber_aarch64_q, 16 + .size L_mlkem_aarch64_q, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -44,13 +45,13 @@ #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_q: +L_mlkem_aarch64_q: .short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 #ifndef __APPLE__ .text - .type L_kyber_aarch64_consts, %object + .type L_mlkem_aarch64_consts, %object .section .rodata - .size L_kyber_aarch64_consts, 16 + .size L_mlkem_aarch64_consts, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -59,7 +60,7 @@ L_kyber_aarch64_q: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_consts: +L_mlkem_aarch64_consts: .short 0x0d01,0xf301,0x4ebf,0x0549,0x5049,0x0000,0x0000,0x0000 #ifndef __APPLE__ .text @@ -99,12 +100,12 @@ L_sha3_aarch64_r: .xword 0x8000000000008080 .xword 0x0000000080000001 .xword 0x8000000080008008 -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_WC_MLKEM #ifndef __APPLE__ .text - .type L_kyber_aarch64_zetas, %object + .type L_mlkem_aarch64_zetas, %object .section .rodata - .size L_kyber_aarch64_zetas, 576 + .size L_mlkem_aarch64_zetas, 576 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -113,7 +114,7 @@ L_sha3_aarch64_r: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_zetas: +L_mlkem_aarch64_zetas: .short 0x08ed,0x0a0b,0x0b9a,0x0714,0x05d5,0x058e,0x011f,0x00ca .short 0x0c56,0x026e,0x0629,0x00b6,0x03c2,0x084f,0x073f,0x05bc .short 0x023d,0x07d4,0x0108,0x017f,0x09c4,0x05b2,0x06bf,0x0c7f @@ -152,9 +153,9 @@ L_kyber_aarch64_zetas: .short 0x03be,0x03be,0x074d,0x074d,0x05f2,0x05f2,0x065c,0x065c #ifndef __APPLE__ .text - .type L_kyber_aarch64_zetas_qinv, %object + .type L_mlkem_aarch64_zetas_qinv, %object .section .rodata - .size L_kyber_aarch64_zetas_qinv, 576 + .size L_mlkem_aarch64_zetas_qinv, 576 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -163,7 +164,7 @@ L_kyber_aarch64_zetas: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_zetas_qinv: +L_mlkem_aarch64_zetas_qinv: .short 0xffed,0x7b0b,0x399a,0x0314,0x34d5,0xcf8e,0x6e1f,0xbeca .short 0xae56,0x6c6e,0xf129,0xc2b6,0x29c2,0x054f,0xd43f,0x79bc .short 0xe93d,0x43d4,0x9908,0x8e7f,0x15c4,0xfbb2,0x53bf,0x997f @@ -202,15 +203,15 @@ L_kyber_aarch64_zetas_qinv: .short 0x5dbe,0x5dbe,0x1e4d,0x1e4d,0xbbf2,0xbbf2,0x5a5c,0x5a5c #ifndef __APPLE__ .text -.globl kyber_ntt -.type kyber_ntt,@function +.globl mlkem_ntt +.type mlkem_ntt,@function .align 2 -kyber_ntt: +mlkem_ntt: #else .section __TEXT,__text -.globl _kyber_ntt +.globl _mlkem_ntt .p2align 2 -_kyber_ntt: +_mlkem_ntt: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -219,25 +220,25 @@ _kyber_ntt: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x2, L_kyber_aarch64_zetas - add x2, x2, :lo12:L_kyber_aarch64_zetas + adrp x2, L_mlkem_aarch64_zetas + add x2, x2, :lo12:L_mlkem_aarch64_zetas #else - adrp x2, L_kyber_aarch64_zetas@PAGE - add x2, x2, :lo12:L_kyber_aarch64_zetas@PAGEOFF + adrp x2, L_mlkem_aarch64_zetas@PAGE + add x2, x2, :lo12:L_mlkem_aarch64_zetas@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_zetas_qinv - add x3, x3, :lo12:L_kyber_aarch64_zetas_qinv + adrp x3, L_mlkem_aarch64_zetas_qinv + add x3, x3, :lo12:L_mlkem_aarch64_zetas_qinv #else - adrp x3, L_kyber_aarch64_zetas_qinv@PAGE - add x3, x3, :lo12:L_kyber_aarch64_zetas_qinv@PAGEOFF + adrp x3, L_mlkem_aarch64_zetas_qinv@PAGE + add x3, x3, :lo12:L_mlkem_aarch64_zetas_qinv@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x4, L_kyber_aarch64_consts - add x4, x4, :lo12:L_kyber_aarch64_consts + adrp x4, L_mlkem_aarch64_consts + add x4, x4, :lo12:L_mlkem_aarch64_consts #else - adrp x4, L_kyber_aarch64_consts@PAGE - add x4, x4, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x4, L_mlkem_aarch64_consts@PAGE + add x4, x4, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ add x1, x0, #0x100 ldr q4, [x4] @@ -1492,13 +1493,13 @@ _kyber_ntt: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_ntt,.-kyber_ntt + .size mlkem_ntt,.-mlkem_ntt #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_kyber_aarch64_zetas_inv, %object + .type L_mlkem_aarch64_zetas_inv, %object .section .rodata - .size L_kyber_aarch64_zetas_inv, 576 + .size L_mlkem_aarch64_zetas_inv, 576 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -1507,7 +1508,7 @@ _kyber_ntt: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_zetas_inv: +L_mlkem_aarch64_zetas_inv: .short 0x06a5,0x06a5,0x070f,0x070f,0x05b4,0x05b4,0x0943,0x0943 .short 0x0922,0x0922,0x091d,0x091d,0x0134,0x0134,0x006c,0x006c .short 0x0b23,0x0b23,0x0366,0x0366,0x0356,0x0356,0x05e6,0x05e6 @@ -1546,9 +1547,9 @@ L_kyber_aarch64_zetas_inv: .short 0x0c37,0x0be2,0x0773,0x072c,0x05ed,0x0167,0x02f6,0x05a1 #ifndef __APPLE__ .text - .type L_kyber_aarch64_zetas_inv_qinv, %object + .type L_mlkem_aarch64_zetas_inv_qinv, %object .section .rodata - .size L_kyber_aarch64_zetas_inv_qinv, 576 + .size L_mlkem_aarch64_zetas_inv_qinv, 576 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -1557,7 +1558,7 @@ L_kyber_aarch64_zetas_inv: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_zetas_inv_qinv: +L_mlkem_aarch64_zetas_inv_qinv: .short 0xa5a5,0xa5a5,0x440f,0x440f,0xe1b4,0xe1b4,0xa243,0xa243 .short 0x4f22,0x4f22,0x901d,0x901d,0x5d34,0x5d34,0x846c,0x846c .short 0x4423,0x4423,0xd566,0xd566,0xa556,0xa556,0x57e6,0x57e6 @@ -1596,15 +1597,15 @@ L_kyber_aarch64_zetas_inv_qinv: .short 0x4137,0x91e2,0x3073,0xcb2c,0xfced,0xc667,0x84f6,0xd8a1 #ifndef __APPLE__ .text -.globl kyber_invntt -.type kyber_invntt,@function +.globl mlkem_invntt +.type mlkem_invntt,@function .align 2 -kyber_invntt: +mlkem_invntt: #else .section __TEXT,__text -.globl _kyber_invntt +.globl _mlkem_invntt .p2align 2 -_kyber_invntt: +_mlkem_invntt: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -1613,25 +1614,25 @@ _kyber_invntt: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x2, L_kyber_aarch64_zetas_inv - add x2, x2, :lo12:L_kyber_aarch64_zetas_inv + adrp x2, L_mlkem_aarch64_zetas_inv + add x2, x2, :lo12:L_mlkem_aarch64_zetas_inv #else - adrp x2, L_kyber_aarch64_zetas_inv@PAGE - add x2, x2, :lo12:L_kyber_aarch64_zetas_inv@PAGEOFF + adrp x2, L_mlkem_aarch64_zetas_inv@PAGE + add x2, x2, :lo12:L_mlkem_aarch64_zetas_inv@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_zetas_inv_qinv - add x3, x3, :lo12:L_kyber_aarch64_zetas_inv_qinv + adrp x3, L_mlkem_aarch64_zetas_inv_qinv + add x3, x3, :lo12:L_mlkem_aarch64_zetas_inv_qinv #else - adrp x3, L_kyber_aarch64_zetas_inv_qinv@PAGE - add x3, x3, :lo12:L_kyber_aarch64_zetas_inv_qinv@PAGEOFF + adrp x3, L_mlkem_aarch64_zetas_inv_qinv@PAGE + add x3, x3, :lo12:L_mlkem_aarch64_zetas_inv_qinv@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x4, L_kyber_aarch64_consts - add x4, x4, :lo12:L_kyber_aarch64_consts + adrp x4, L_mlkem_aarch64_consts + add x4, x4, :lo12:L_mlkem_aarch64_consts #else - adrp x4, L_kyber_aarch64_consts@PAGE - add x4, x4, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x4, L_mlkem_aarch64_consts@PAGE + add x4, x4, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ add x1, x0, #0x100 ldr q8, [x4] @@ -3042,20 +3043,20 @@ _kyber_invntt: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_invntt,.-kyber_invntt + .size mlkem_invntt,.-mlkem_invntt #endif /* __APPLE__ */ #ifndef WOLFSSL_AARCH64_NO_SQRDMLSH #ifndef __APPLE__ .text -.globl kyber_ntt_sqrdmlsh -.type kyber_ntt_sqrdmlsh,@function +.globl mlkem_ntt_sqrdmlsh +.type mlkem_ntt_sqrdmlsh,@function .align 2 -kyber_ntt_sqrdmlsh: +mlkem_ntt_sqrdmlsh: #else .section __TEXT,__text -.globl _kyber_ntt_sqrdmlsh +.globl _mlkem_ntt_sqrdmlsh .p2align 2 -_kyber_ntt_sqrdmlsh: +_mlkem_ntt_sqrdmlsh: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -3064,25 +3065,25 @@ _kyber_ntt_sqrdmlsh: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x2, L_kyber_aarch64_zetas - add x2, x2, :lo12:L_kyber_aarch64_zetas + adrp x2, L_mlkem_aarch64_zetas + add x2, x2, :lo12:L_mlkem_aarch64_zetas #else - adrp x2, L_kyber_aarch64_zetas@PAGE - add x2, x2, :lo12:L_kyber_aarch64_zetas@PAGEOFF + adrp x2, L_mlkem_aarch64_zetas@PAGE + add x2, x2, :lo12:L_mlkem_aarch64_zetas@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_zetas_qinv - add x3, x3, :lo12:L_kyber_aarch64_zetas_qinv + adrp x3, L_mlkem_aarch64_zetas_qinv + add x3, x3, :lo12:L_mlkem_aarch64_zetas_qinv #else - adrp x3, L_kyber_aarch64_zetas_qinv@PAGE - add x3, x3, :lo12:L_kyber_aarch64_zetas_qinv@PAGEOFF + adrp x3, L_mlkem_aarch64_zetas_qinv@PAGE + add x3, x3, :lo12:L_mlkem_aarch64_zetas_qinv@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x4, L_kyber_aarch64_consts - add x4, x4, :lo12:L_kyber_aarch64_consts + adrp x4, L_mlkem_aarch64_consts + add x4, x4, :lo12:L_mlkem_aarch64_consts #else - adrp x4, L_kyber_aarch64_consts@PAGE - add x4, x4, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x4, L_mlkem_aarch64_consts@PAGE + add x4, x4, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ add x1, x0, #0x100 ldr q4, [x4] @@ -4225,19 +4226,19 @@ _kyber_ntt_sqrdmlsh: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_ntt_sqrdmlsh,.-kyber_ntt_sqrdmlsh + .size mlkem_ntt_sqrdmlsh,.-mlkem_ntt_sqrdmlsh #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_invntt_sqrdmlsh -.type kyber_invntt_sqrdmlsh,@function +.globl mlkem_invntt_sqrdmlsh +.type mlkem_invntt_sqrdmlsh,@function .align 2 -kyber_invntt_sqrdmlsh: +mlkem_invntt_sqrdmlsh: #else .section __TEXT,__text -.globl _kyber_invntt_sqrdmlsh +.globl _mlkem_invntt_sqrdmlsh .p2align 2 -_kyber_invntt_sqrdmlsh: +_mlkem_invntt_sqrdmlsh: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -4246,25 +4247,25 @@ _kyber_invntt_sqrdmlsh: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x2, L_kyber_aarch64_zetas_inv - add x2, x2, :lo12:L_kyber_aarch64_zetas_inv + adrp x2, L_mlkem_aarch64_zetas_inv + add x2, x2, :lo12:L_mlkem_aarch64_zetas_inv #else - adrp x2, L_kyber_aarch64_zetas_inv@PAGE - add x2, x2, :lo12:L_kyber_aarch64_zetas_inv@PAGEOFF + adrp x2, L_mlkem_aarch64_zetas_inv@PAGE + add x2, x2, :lo12:L_mlkem_aarch64_zetas_inv@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_zetas_inv_qinv - add x3, x3, :lo12:L_kyber_aarch64_zetas_inv_qinv + adrp x3, L_mlkem_aarch64_zetas_inv_qinv + add x3, x3, :lo12:L_mlkem_aarch64_zetas_inv_qinv #else - adrp x3, L_kyber_aarch64_zetas_inv_qinv@PAGE - add x3, x3, :lo12:L_kyber_aarch64_zetas_inv_qinv@PAGEOFF + adrp x3, L_mlkem_aarch64_zetas_inv_qinv@PAGE + add x3, x3, :lo12:L_mlkem_aarch64_zetas_inv_qinv@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x4, L_kyber_aarch64_consts - add x4, x4, :lo12:L_kyber_aarch64_consts + adrp x4, L_mlkem_aarch64_consts + add x4, x4, :lo12:L_mlkem_aarch64_consts #else - adrp x4, L_kyber_aarch64_consts@PAGE - add x4, x4, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x4, L_mlkem_aarch64_consts@PAGE + add x4, x4, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ add x1, x0, #0x100 ldr q8, [x4] @@ -5531,14 +5532,14 @@ _kyber_invntt_sqrdmlsh: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_invntt_sqrdmlsh,.-kyber_invntt_sqrdmlsh + .size mlkem_invntt_sqrdmlsh,.-mlkem_invntt_sqrdmlsh #endif /* __APPLE__ */ #endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */ #ifndef __APPLE__ .text - .type L_kyber_aarch64_zetas_mul, %object + .type L_mlkem_aarch64_zetas_mul, %object .section .rodata - .size L_kyber_aarch64_zetas_mul, 256 + .size L_mlkem_aarch64_zetas_mul, 256 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -5547,7 +5548,7 @@ _kyber_invntt_sqrdmlsh: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_zetas_mul: +L_mlkem_aarch64_zetas_mul: .short 0x08b2,0xf74e,0x01ae,0xfe52,0x022b,0xfdd5,0x034b,0xfcb5 .short 0x081e,0xf7e2,0x0367,0xfc99,0x060e,0xf9f2,0x0069,0xff97 .short 0x01a6,0xfe5a,0x024b,0xfdb5,0x00b1,0xff4f,0x0c16,0xf3ea @@ -5566,15 +5567,15 @@ L_kyber_aarch64_zetas_mul: .short 0x03be,0xfc42,0x074d,0xf8b3,0x05f2,0xfa0e,0x065c,0xf9a4 #ifndef __APPLE__ .text -.globl kyber_basemul_mont -.type kyber_basemul_mont,@function +.globl mlkem_basemul_mont +.type mlkem_basemul_mont,@function .align 2 -kyber_basemul_mont: +mlkem_basemul_mont: #else .section __TEXT,__text -.globl _kyber_basemul_mont +.globl _mlkem_basemul_mont .p2align 2 -_kyber_basemul_mont: +_mlkem_basemul_mont: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -5583,18 +5584,18 @@ _kyber_basemul_mont: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_zetas_mul - add x3, x3, :lo12:L_kyber_aarch64_zetas_mul + adrp x3, L_mlkem_aarch64_zetas_mul + add x3, x3, :lo12:L_mlkem_aarch64_zetas_mul #else - adrp x3, L_kyber_aarch64_zetas_mul@PAGE - add x3, x3, :lo12:L_kyber_aarch64_zetas_mul@PAGEOFF + adrp x3, L_mlkem_aarch64_zetas_mul@PAGE + add x3, x3, :lo12:L_mlkem_aarch64_zetas_mul@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x4, L_kyber_aarch64_consts - add x4, x4, :lo12:L_kyber_aarch64_consts + adrp x4, L_mlkem_aarch64_consts + add x4, x4, :lo12:L_mlkem_aarch64_consts #else - adrp x4, L_kyber_aarch64_consts@PAGE - add x4, x4, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x4, L_mlkem_aarch64_consts@PAGE + add x4, x4, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ ldr q1, [x4] ldp q2, q3, [x1] @@ -6260,19 +6261,19 @@ _kyber_basemul_mont: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_basemul_mont,.-kyber_basemul_mont + .size mlkem_basemul_mont,.-mlkem_basemul_mont #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_basemul_mont_add -.type kyber_basemul_mont_add,@function +.globl mlkem_basemul_mont_add +.type mlkem_basemul_mont_add,@function .align 2 -kyber_basemul_mont_add: +mlkem_basemul_mont_add: #else .section __TEXT,__text -.globl _kyber_basemul_mont_add +.globl _mlkem_basemul_mont_add .p2align 2 -_kyber_basemul_mont_add: +_mlkem_basemul_mont_add: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -6281,18 +6282,18 @@ _kyber_basemul_mont_add: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_zetas_mul - add x3, x3, :lo12:L_kyber_aarch64_zetas_mul + adrp x3, L_mlkem_aarch64_zetas_mul + add x3, x3, :lo12:L_mlkem_aarch64_zetas_mul #else - adrp x3, L_kyber_aarch64_zetas_mul@PAGE - add x3, x3, :lo12:L_kyber_aarch64_zetas_mul@PAGEOFF + adrp x3, L_mlkem_aarch64_zetas_mul@PAGE + add x3, x3, :lo12:L_mlkem_aarch64_zetas_mul@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x4, L_kyber_aarch64_consts - add x4, x4, :lo12:L_kyber_aarch64_consts + adrp x4, L_mlkem_aarch64_consts + add x4, x4, :lo12:L_mlkem_aarch64_consts #else - adrp x4, L_kyber_aarch64_consts@PAGE - add x4, x4, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x4, L_mlkem_aarch64_consts@PAGE + add x4, x4, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ ldr q1, [x4] ldp q2, q3, [x1] @@ -7006,19 +7007,19 @@ _kyber_basemul_mont_add: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_basemul_mont_add,.-kyber_basemul_mont_add + .size mlkem_basemul_mont_add,.-mlkem_basemul_mont_add #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_csubq_neon -.type kyber_csubq_neon,@function +.globl mlkem_csubq_neon +.type mlkem_csubq_neon,@function .align 2 -kyber_csubq_neon: +mlkem_csubq_neon: #else .section __TEXT,__text -.globl _kyber_csubq_neon +.globl _mlkem_csubq_neon .p2align 2 -_kyber_csubq_neon: +_mlkem_csubq_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -7027,11 +7028,11 @@ _kyber_csubq_neon: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x1, L_kyber_aarch64_q - add x1, x1, :lo12:L_kyber_aarch64_q + adrp x1, L_mlkem_aarch64_q + add x1, x1, :lo12:L_mlkem_aarch64_q #else - adrp x1, L_kyber_aarch64_q@PAGE - add x1, x1, :lo12:L_kyber_aarch64_q@PAGEOFF + adrp x1, L_mlkem_aarch64_q@PAGE + add x1, x1, :lo12:L_mlkem_aarch64_q@PAGEOFF #endif /* __APPLE__ */ ldr q20, [x1] ld4 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #0x40 @@ -7187,19 +7188,19 @@ _kyber_csubq_neon: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_csubq_neon,.-kyber_csubq_neon + .size mlkem_csubq_neon,.-mlkem_csubq_neon #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_add_reduce -.type kyber_add_reduce,@function +.globl mlkem_add_reduce +.type mlkem_add_reduce,@function .align 2 -kyber_add_reduce: +mlkem_add_reduce: #else .section __TEXT,__text -.globl _kyber_add_reduce +.globl _mlkem_add_reduce .p2align 2 -_kyber_add_reduce: +_mlkem_add_reduce: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -7208,11 +7209,11 @@ _kyber_add_reduce: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x2, L_kyber_aarch64_consts - add x2, x2, :lo12:L_kyber_aarch64_consts + adrp x2, L_mlkem_aarch64_consts + add x2, x2, :lo12:L_mlkem_aarch64_consts #else - adrp x2, L_kyber_aarch64_consts@PAGE - add x2, x2, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x2, L_mlkem_aarch64_consts@PAGE + add x2, x2, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ ldr q0, [x2] ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40 @@ -7378,19 +7379,19 @@ _kyber_add_reduce: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_add_reduce,.-kyber_add_reduce + .size mlkem_add_reduce,.-mlkem_add_reduce #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_add3_reduce -.type kyber_add3_reduce,@function +.globl mlkem_add3_reduce +.type mlkem_add3_reduce,@function .align 2 -kyber_add3_reduce: +mlkem_add3_reduce: #else .section __TEXT,__text -.globl _kyber_add3_reduce +.globl _mlkem_add3_reduce .p2align 2 -_kyber_add3_reduce: +_mlkem_add3_reduce: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -7399,11 +7400,11 @@ _kyber_add3_reduce: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_consts - add x3, x3, :lo12:L_kyber_aarch64_consts + adrp x3, L_mlkem_aarch64_consts + add x3, x3, :lo12:L_mlkem_aarch64_consts #else - adrp x3, L_kyber_aarch64_consts@PAGE - add x3, x3, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x3, L_mlkem_aarch64_consts@PAGE + add x3, x3, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ ldr q0, [x3] ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40 @@ -7609,19 +7610,19 @@ _kyber_add3_reduce: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_add3_reduce,.-kyber_add3_reduce + .size mlkem_add3_reduce,.-mlkem_add3_reduce #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_rsub_reduce -.type kyber_rsub_reduce,@function +.globl mlkem_rsub_reduce +.type mlkem_rsub_reduce,@function .align 2 -kyber_rsub_reduce: +mlkem_rsub_reduce: #else .section __TEXT,__text -.globl _kyber_rsub_reduce +.globl _mlkem_rsub_reduce .p2align 2 -_kyber_rsub_reduce: +_mlkem_rsub_reduce: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -7630,11 +7631,11 @@ _kyber_rsub_reduce: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x2, L_kyber_aarch64_consts - add x2, x2, :lo12:L_kyber_aarch64_consts + adrp x2, L_mlkem_aarch64_consts + add x2, x2, :lo12:L_mlkem_aarch64_consts #else - adrp x2, L_kyber_aarch64_consts@PAGE - add x2, x2, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x2, L_mlkem_aarch64_consts@PAGE + add x2, x2, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ ldr q0, [x2] ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40 @@ -7800,19 +7801,19 @@ _kyber_rsub_reduce: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_rsub_reduce,.-kyber_rsub_reduce + .size mlkem_rsub_reduce,.-mlkem_rsub_reduce #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_to_mont -.type kyber_to_mont,@function +.globl mlkem_to_mont +.type mlkem_to_mont,@function .align 2 -kyber_to_mont: +mlkem_to_mont: #else .section __TEXT,__text -.globl _kyber_to_mont +.globl _mlkem_to_mont .p2align 2 -_kyber_to_mont: +_mlkem_to_mont: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -7821,11 +7822,11 @@ _kyber_to_mont: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x1, L_kyber_aarch64_consts - add x1, x1, :lo12:L_kyber_aarch64_consts + adrp x1, L_mlkem_aarch64_consts + add x1, x1, :lo12:L_mlkem_aarch64_consts #else - adrp x1, L_kyber_aarch64_consts@PAGE - add x1, x1, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x1, L_mlkem_aarch64_consts@PAGE + add x1, x1, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ ldr q0, [x1] ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40 @@ -8013,20 +8014,20 @@ _kyber_to_mont: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_to_mont,.-kyber_to_mont + .size mlkem_to_mont,.-mlkem_to_mont #endif /* __APPLE__ */ #ifndef WOLFSSL_AARCH64_NO_SQRDMLSH #ifndef __APPLE__ .text -.globl kyber_to_mont_sqrdmlsh -.type kyber_to_mont_sqrdmlsh,@function +.globl mlkem_to_mont_sqrdmlsh +.type mlkem_to_mont_sqrdmlsh,@function .align 2 -kyber_to_mont_sqrdmlsh: +mlkem_to_mont_sqrdmlsh: #else .section __TEXT,__text -.globl _kyber_to_mont_sqrdmlsh +.globl _mlkem_to_mont_sqrdmlsh .p2align 2 -_kyber_to_mont_sqrdmlsh: +_mlkem_to_mont_sqrdmlsh: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -8035,11 +8036,11 @@ _kyber_to_mont_sqrdmlsh: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x1, L_kyber_aarch64_consts - add x1, x1, :lo12:L_kyber_aarch64_consts + adrp x1, L_mlkem_aarch64_consts + add x1, x1, :lo12:L_mlkem_aarch64_consts #else - adrp x1, L_kyber_aarch64_consts@PAGE - add x1, x1, :lo12:L_kyber_aarch64_consts@PAGEOFF + adrp x1, L_mlkem_aarch64_consts@PAGE + add x1, x1, :lo12:L_mlkem_aarch64_consts@PAGEOFF #endif /* __APPLE__ */ ldr q0, [x1] ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x0], #0x40 @@ -8195,14 +8196,14 @@ _kyber_to_mont_sqrdmlsh: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_to_mont_sqrdmlsh,.-kyber_to_mont_sqrdmlsh + .size mlkem_to_mont_sqrdmlsh,.-mlkem_to_mont_sqrdmlsh #endif /* __APPLE__ */ #endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */ #ifndef __APPLE__ .text - .type L_kyber_aarch64_to_msg_neon_low, %object + .type L_mlkem_to_msg_low, %object .section .rodata - .size L_kyber_aarch64_to_msg_neon_low, 16 + .size L_mlkem_to_msg_low, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -8211,13 +8212,13 @@ _kyber_to_mont_sqrdmlsh: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_to_msg_neon_low: +L_mlkem_to_msg_low: .short 0x0373,0x0373,0x0373,0x0373,0x0373,0x0373,0x0373,0x0373 #ifndef __APPLE__ .text - .type L_kyber_aarch64_to_msg_neon_high, %object + .type L_mlkem_to_msg_high, %object .section .rodata - .size L_kyber_aarch64_to_msg_neon_high, 16 + .size L_mlkem_to_msg_high, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -8226,13 +8227,13 @@ L_kyber_aarch64_to_msg_neon_low: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_to_msg_neon_high: +L_mlkem_to_msg_high: .short 0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0 #ifndef __APPLE__ .text - .type L_kyber_aarch64_to_msg_neon_bits, %object + .type L_mlkem_to_msg_bits, %object .section .rodata - .size L_kyber_aarch64_to_msg_neon_bits, 16 + .size L_mlkem_to_msg_bits, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -8241,19 +8242,19 @@ L_kyber_aarch64_to_msg_neon_high: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_to_msg_neon_bits: +L_mlkem_to_msg_bits: .short 0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080 #ifndef __APPLE__ .text -.globl kyber_to_msg_neon -.type kyber_to_msg_neon,@function +.globl mlkem_to_msg_neon +.type mlkem_to_msg_neon,@function .align 2 -kyber_to_msg_neon: +mlkem_to_msg_neon: #else .section __TEXT,__text -.globl _kyber_to_msg_neon +.globl _mlkem_to_msg_neon .p2align 2 -_kyber_to_msg_neon: +_mlkem_to_msg_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-80]! add x29, sp, #0 @@ -8262,25 +8263,25 @@ _kyber_to_msg_neon: stp d12, d13, [x29, #48] stp d14, d15, [x29, #64] #ifndef __APPLE__ - adrp x2, L_kyber_aarch64_to_msg_neon_low - add x2, x2, :lo12:L_kyber_aarch64_to_msg_neon_low + adrp x2, L_mlkem_to_msg_low + add x2, x2, :lo12:L_mlkem_to_msg_low #else - adrp x2, L_kyber_aarch64_to_msg_neon_low@PAGE - add x2, x2, :lo12:L_kyber_aarch64_to_msg_neon_low@PAGEOFF + adrp x2, L_mlkem_to_msg_low@PAGE + add x2, x2, :lo12:L_mlkem_to_msg_low@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_to_msg_neon_high - add x3, x3, :lo12:L_kyber_aarch64_to_msg_neon_high + adrp x3, L_mlkem_to_msg_high + add x3, x3, :lo12:L_mlkem_to_msg_high #else - adrp x3, L_kyber_aarch64_to_msg_neon_high@PAGE - add x3, x3, :lo12:L_kyber_aarch64_to_msg_neon_high@PAGEOFF + adrp x3, L_mlkem_to_msg_high@PAGE + add x3, x3, :lo12:L_mlkem_to_msg_high@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x4, L_kyber_aarch64_to_msg_neon_bits - add x4, x4, :lo12:L_kyber_aarch64_to_msg_neon_bits + adrp x4, L_mlkem_to_msg_bits + add x4, x4, :lo12:L_mlkem_to_msg_bits #else - adrp x4, L_kyber_aarch64_to_msg_neon_bits@PAGE - add x4, x4, :lo12:L_kyber_aarch64_to_msg_neon_bits@PAGEOFF + adrp x4, L_mlkem_to_msg_bits@PAGE + add x4, x4, :lo12:L_mlkem_to_msg_bits@PAGEOFF #endif /* __APPLE__ */ ldr q0, [x2] ldr q1, [x3] @@ -8492,13 +8493,13 @@ _kyber_to_msg_neon: ldp x29, x30, [sp], #0x50 ret #ifndef __APPLE__ - .size kyber_to_msg_neon,.-kyber_to_msg_neon + .size mlkem_to_msg_neon,.-mlkem_to_msg_neon #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_kyber_aarch64_from_msg_neon_q1half, %object + .type L_mlkem_from_msg_q1half, %object .section .rodata - .size L_kyber_aarch64_from_msg_neon_q1half, 16 + .size L_mlkem_from_msg_q1half, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -8507,13 +8508,13 @@ _kyber_to_msg_neon: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_from_msg_neon_q1half: +L_mlkem_from_msg_q1half: .short 0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681 #ifndef __APPLE__ .text - .type L_kyber_aarch64_from_msg_neon_bits, %object + .type L_mlkem_from_msg_bits, %object .section .rodata - .size L_kyber_aarch64_from_msg_neon_bits, 16 + .size L_mlkem_from_msg_bits, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -8522,38 +8523,38 @@ L_kyber_aarch64_from_msg_neon_q1half: #else .p2align 1 #endif /* __APPLE__ */ -L_kyber_aarch64_from_msg_neon_bits: +L_mlkem_from_msg_bits: .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 #ifndef __APPLE__ .text -.globl kyber_from_msg_neon -.type kyber_from_msg_neon,@function +.globl mlkem_from_msg_neon +.type mlkem_from_msg_neon,@function .align 2 -kyber_from_msg_neon: +mlkem_from_msg_neon: #else .section __TEXT,__text -.globl _kyber_from_msg_neon +.globl _mlkem_from_msg_neon .p2align 2 -_kyber_from_msg_neon: +_mlkem_from_msg_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-48]! add x29, sp, #0 stp d8, d9, [x29, #16] stp d10, d11, [x29, #32] #ifndef __APPLE__ - adrp x2, L_kyber_aarch64_from_msg_neon_q1half - add x2, x2, :lo12:L_kyber_aarch64_from_msg_neon_q1half + adrp x2, L_mlkem_from_msg_q1half + add x2, x2, :lo12:L_mlkem_from_msg_q1half #else - adrp x2, L_kyber_aarch64_from_msg_neon_q1half@PAGE - add x2, x2, :lo12:L_kyber_aarch64_from_msg_neon_q1half@PAGEOFF + adrp x2, L_mlkem_from_msg_q1half@PAGE + add x2, x2, :lo12:L_mlkem_from_msg_q1half@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x3, L_kyber_aarch64_from_msg_neon_bits - add x3, x3, :lo12:L_kyber_aarch64_from_msg_neon_bits + adrp x3, L_mlkem_from_msg_bits + add x3, x3, :lo12:L_mlkem_from_msg_bits #else - adrp x3, L_kyber_aarch64_from_msg_neon_bits@PAGE - add x3, x3, :lo12:L_kyber_aarch64_from_msg_neon_bits@PAGEOFF + adrp x3, L_mlkem_from_msg_bits@PAGE + add x3, x3, :lo12:L_mlkem_from_msg_bits@PAGEOFF #endif /* __APPLE__ */ ld1 {v2.16b, v3.16b}, [x1] ldr q1, [x2] @@ -8699,19 +8700,19 @@ _kyber_from_msg_neon: ldp x29, x30, [sp], #48 ret #ifndef __APPLE__ - .size kyber_from_msg_neon,.-kyber_from_msg_neon + .size mlkem_from_msg_neon,.-mlkem_from_msg_neon #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_cmp_neon -.type kyber_cmp_neon,@function +.globl mlkem_cmp_neon +.type mlkem_cmp_neon,@function .align 2 -kyber_cmp_neon: +mlkem_cmp_neon: #else .section __TEXT,__text -.globl _kyber_cmp_neon +.globl _mlkem_cmp_neon .p2align 2 -_kyber_cmp_neon: +_mlkem_cmp_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-48]! add x29, sp, #0 @@ -8834,7 +8835,7 @@ _kyber_cmp_neon: orr v10.16b, v10.16b, v2.16b orr v11.16b, v11.16b, v3.16b subs w2, w2, #0x300 - beq L_kyber_aarch64_cmp_neon_done + beq L_mlkem_aarch64_cmp_neon_done ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40 eor v0.16b, v0.16b, v4.16b @@ -8886,7 +8887,7 @@ _kyber_cmp_neon: orr v10.16b, v10.16b, v2.16b orr v11.16b, v11.16b, v3.16b subs w2, w2, #0x140 - beq L_kyber_aarch64_cmp_neon_done + beq L_mlkem_aarch64_cmp_neon_done ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #0x40 ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #0x40 eor v0.16b, v0.16b, v4.16b @@ -8963,7 +8964,7 @@ _kyber_cmp_neon: eor v1.16b, v1.16b, v5.16b orr v8.16b, v8.16b, v0.16b orr v9.16b, v9.16b, v1.16b -L_kyber_aarch64_cmp_neon_done: +L_mlkem_aarch64_cmp_neon_done: orr v8.16b, v8.16b, v9.16b orr v10.16b, v10.16b, v11.16b orr v8.16b, v8.16b, v10.16b @@ -8977,13 +8978,13 @@ L_kyber_aarch64_cmp_neon_done: ldp x29, x30, [sp], #48 ret #ifndef __APPLE__ - .size kyber_cmp_neon,.-kyber_cmp_neon + .size mlkem_cmp_neon,.-mlkem_cmp_neon #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_kyber_aarch64_rej_uniform_neon_mask, %object + .type L_mlkem_rej_uniform_mask, %object .section .rodata - .size L_kyber_aarch64_rej_uniform_neon_mask, 16 + .size L_mlkem_rej_uniform_mask, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -8992,13 +8993,13 @@ L_kyber_aarch64_cmp_neon_done: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_rej_uniform_neon_mask: +L_mlkem_rej_uniform_mask: .short 0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff #ifndef __APPLE__ .text - .type L_kyber_aarch64_rej_uniform_neon_bits, %object + .type L_mlkem_rej_uniform_bits, %object .section .rodata - .size L_kyber_aarch64_rej_uniform_neon_bits, 16 + .size L_mlkem_rej_uniform_bits, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -9007,13 +9008,13 @@ L_kyber_aarch64_rej_uniform_neon_mask: #else .p2align 2 #endif /* __APPLE__ */ -L_kyber_aarch64_rej_uniform_neon_bits: +L_mlkem_rej_uniform_bits: .short 0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080 #ifndef __APPLE__ .text - .type L_kyber_aarch64_rej_uniform_neon_indices, %object + .type L_mlkem_rej_uniform_indices, %object .section .rodata - .size L_kyber_aarch64_rej_uniform_neon_indices, 4096 + .size L_mlkem_rej_uniform_indices, 4096 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -9022,7 +9023,7 @@ L_kyber_aarch64_rej_uniform_neon_bits: #else .p2align 1 #endif /* __APPLE__ */ -L_kyber_aarch64_rej_uniform_neon_indices: +L_mlkem_rej_uniform_indices: .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff .byte 0x00,0x01,0xff,0xff,0xff,0xff,0xff,0xff @@ -9537,15 +9538,15 @@ L_kyber_aarch64_rej_uniform_neon_indices: .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f #ifndef __APPLE__ .text -.globl kyber_rej_uniform_neon -.type kyber_rej_uniform_neon,@function +.globl mlkem_rej_uniform_neon +.type mlkem_rej_uniform_neon,@function .align 2 -kyber_rej_uniform_neon: +mlkem_rej_uniform_neon: #else .section __TEXT,__text -.globl _kyber_rej_uniform_neon +.globl _mlkem_rej_uniform_neon .p2align 2 -_kyber_rej_uniform_neon: +_mlkem_rej_uniform_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-64]! add x29, sp, #0 @@ -9553,32 +9554,32 @@ _kyber_rej_uniform_neon: stp d10, d11, [x29, #32] stp d12, d13, [x29, #48] #ifndef __APPLE__ - adrp x4, L_kyber_aarch64_rej_uniform_neon_mask - add x4, x4, :lo12:L_kyber_aarch64_rej_uniform_neon_mask + adrp x4, L_mlkem_rej_uniform_mask + add x4, x4, :lo12:L_mlkem_rej_uniform_mask #else - adrp x4, L_kyber_aarch64_rej_uniform_neon_mask@PAGE - add x4, x4, :lo12:L_kyber_aarch64_rej_uniform_neon_mask@PAGEOFF + adrp x4, L_mlkem_rej_uniform_mask@PAGE + add x4, x4, :lo12:L_mlkem_rej_uniform_mask@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x5, L_kyber_aarch64_q - add x5, x5, :lo12:L_kyber_aarch64_q + adrp x5, L_mlkem_aarch64_q + add x5, x5, :lo12:L_mlkem_aarch64_q #else - adrp x5, L_kyber_aarch64_q@PAGE - add x5, x5, :lo12:L_kyber_aarch64_q@PAGEOFF + adrp x5, L_mlkem_aarch64_q@PAGE + add x5, x5, :lo12:L_mlkem_aarch64_q@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x6, L_kyber_aarch64_rej_uniform_neon_bits - add x6, x6, :lo12:L_kyber_aarch64_rej_uniform_neon_bits + adrp x6, L_mlkem_rej_uniform_bits + add x6, x6, :lo12:L_mlkem_rej_uniform_bits #else - adrp x6, L_kyber_aarch64_rej_uniform_neon_bits@PAGE - add x6, x6, :lo12:L_kyber_aarch64_rej_uniform_neon_bits@PAGEOFF + adrp x6, L_mlkem_rej_uniform_bits@PAGE + add x6, x6, :lo12:L_mlkem_rej_uniform_bits@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x7, L_kyber_aarch64_rej_uniform_neon_indices - add x7, x7, :lo12:L_kyber_aarch64_rej_uniform_neon_indices + adrp x7, L_mlkem_rej_uniform_indices + add x7, x7, :lo12:L_mlkem_rej_uniform_indices #else - adrp x7, L_kyber_aarch64_rej_uniform_neon_indices@PAGE - add x7, x7, :lo12:L_kyber_aarch64_rej_uniform_neon_indices@PAGEOFF + adrp x7, L_mlkem_rej_uniform_indices@PAGE + add x7, x7, :lo12:L_mlkem_rej_uniform_indices@PAGEOFF #endif /* __APPLE__ */ eor v1.16b, v1.16b, v1.16b eor v12.16b, v12.16b, v12.16b @@ -9591,10 +9592,10 @@ _kyber_rej_uniform_neon: ldr q3, [x5] ldr q2, [x6] subs wzr, w1, #0 - beq L_kyber_aarch64_rej_uniform_neon_done + beq L_mlkem_rej_uniform_done subs wzr, w1, #16 - blt L_kyber_aarch64_rej_uniform_neon_loop_4 -L_kyber_aarch64_rej_uniform_neon_loop_16: + blt L_mlkem_rej_uniform_loop_4 +L_mlkem_rej_uniform_loop_16: ld3 {v4.8b, v5.8b, v6.8b}, [x2], #24 zip1 v4.16b, v4.16b, v1.16b zip1 v5.16b, v5.16b, v1.16b @@ -9635,16 +9636,16 @@ L_kyber_aarch64_rej_uniform_neon_loop_16: add x0, x0, x11, lsl 1 add x12, x12, x11 subs w3, w3, #24 - beq L_kyber_aarch64_rej_uniform_neon_done + beq L_mlkem_rej_uniform_done sub w10, w1, w12 subs x10, x10, #16 - blt L_kyber_aarch64_rej_uniform_neon_loop_4 - b L_kyber_aarch64_rej_uniform_neon_loop_16 -L_kyber_aarch64_rej_uniform_neon_loop_4: + blt L_mlkem_rej_uniform_loop_4 + b L_mlkem_rej_uniform_loop_16 +L_mlkem_rej_uniform_loop_4: subs w10, w1, w12 - beq L_kyber_aarch64_rej_uniform_neon_done + beq L_mlkem_rej_uniform_done subs x10, x10, #4 - blt L_kyber_aarch64_rej_uniform_neon_loop_lt_4 + blt L_mlkem_rej_uniform_loop_lt_4 ldr x4, [x2], #6 lsr x5, x4, #12 lsr x6, x4, #24 @@ -9674,9 +9675,9 @@ L_kyber_aarch64_rej_uniform_neon_loop_4: cinc x0, x0, lt cinc x12, x12, lt subs w3, w3, #6 - beq L_kyber_aarch64_rej_uniform_neon_done - b L_kyber_aarch64_rej_uniform_neon_loop_4 -L_kyber_aarch64_rej_uniform_neon_loop_lt_4: + beq L_mlkem_rej_uniform_done + b L_mlkem_rej_uniform_loop_4 +L_mlkem_rej_uniform_loop_lt_4: ldr x4, [x2], #6 lsr x5, x4, #12 lsr x6, x4, #24 @@ -9691,32 +9692,32 @@ L_kyber_aarch64_rej_uniform_neon_loop_lt_4: cinc x0, x0, lt cinc x12, x12, lt subs wzr, w1, w12 - beq L_kyber_aarch64_rej_uniform_neon_done + beq L_mlkem_rej_uniform_done strh w5, [x0] subs xzr, x5, x13 cinc x0, x0, lt cinc x0, x0, lt cinc x12, x12, lt subs wzr, w1, w12 - beq L_kyber_aarch64_rej_uniform_neon_done + beq L_mlkem_rej_uniform_done strh w6, [x0] subs xzr, x6, x13 cinc x0, x0, lt cinc x0, x0, lt cinc x12, x12, lt subs wzr, w1, w12 - beq L_kyber_aarch64_rej_uniform_neon_done + beq L_mlkem_rej_uniform_done strh w7, [x0] subs xzr, x7, x13 cinc x0, x0, lt cinc x0, x0, lt cinc x12, x12, lt subs wzr, w1, w12 - beq L_kyber_aarch64_rej_uniform_neon_done + beq L_mlkem_rej_uniform_done subs w3, w3, #6 - beq L_kyber_aarch64_rej_uniform_neon_done - b L_kyber_aarch64_rej_uniform_neon_loop_lt_4 -L_kyber_aarch64_rej_uniform_neon_done: + beq L_mlkem_rej_uniform_done + b L_mlkem_rej_uniform_loop_lt_4 +L_mlkem_rej_uniform_done: mov x0, x12 ldp d8, d9, [x29, #16] ldp d10, d11, [x29, #32] @@ -9724,20 +9725,20 @@ L_kyber_aarch64_rej_uniform_neon_done: ldp x29, x30, [sp], #0x40 ret #ifndef __APPLE__ - .size kyber_rej_uniform_neon,.-kyber_rej_uniform_neon + .size mlkem_rej_uniform_neon,.-mlkem_rej_uniform_neon #endif /* __APPLE__ */ #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 #ifndef __APPLE__ .text -.globl kyber_sha3_blocksx3_neon -.type kyber_sha3_blocksx3_neon,@function +.globl mlkem_sha3_blocksx3_neon +.type mlkem_sha3_blocksx3_neon,@function .align 2 -kyber_sha3_blocksx3_neon: +mlkem_sha3_blocksx3_neon: #else .section __TEXT,__text -.globl _kyber_sha3_blocksx3_neon +.globl _mlkem_sha3_blocksx3_neon .p2align 2 -_kyber_sha3_blocksx3_neon: +_mlkem_sha3_blocksx3_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-224]! add x29, sp, #0 @@ -10050,19 +10051,19 @@ L_SHA3_transform_blocksx3_neon_begin: ldp x29, x30, [sp], #0xe0 ret #ifndef __APPLE__ - .size kyber_sha3_blocksx3_neon,.-kyber_sha3_blocksx3_neon + .size mlkem_sha3_blocksx3_neon,.-mlkem_sha3_blocksx3_neon #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_shake128_blocksx3_seed_neon -.type kyber_shake128_blocksx3_seed_neon,@function +.globl mlkem_shake128_blocksx3_seed_neon +.type mlkem_shake128_blocksx3_seed_neon,@function .align 2 -kyber_shake128_blocksx3_seed_neon: +mlkem_shake128_blocksx3_seed_neon: #else .section __TEXT,__text -.globl _kyber_shake128_blocksx3_seed_neon +.globl _mlkem_shake128_blocksx3_seed_neon .p2align 2 -_kyber_shake128_blocksx3_seed_neon: +_mlkem_shake128_blocksx3_seed_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-224]! add x29, sp, #0 @@ -10397,19 +10398,19 @@ L_SHA3_shake128_blocksx3_seed_neon_begin: ldp x29, x30, [sp], #0xe0 ret #ifndef __APPLE__ - .size kyber_shake128_blocksx3_seed_neon,.-kyber_shake128_blocksx3_seed_neon + .size mlkem_shake128_blocksx3_seed_neon,.-mlkem_shake128_blocksx3_seed_neon #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_shake256_blocksx3_seed_neon -.type kyber_shake256_blocksx3_seed_neon,@function +.globl mlkem_shake256_blocksx3_seed_neon +.type mlkem_shake256_blocksx3_seed_neon,@function .align 2 -kyber_shake256_blocksx3_seed_neon: +mlkem_shake256_blocksx3_seed_neon: #else .section __TEXT,__text -.globl _kyber_shake256_blocksx3_seed_neon +.globl _mlkem_shake256_blocksx3_seed_neon .p2align 2 -_kyber_shake256_blocksx3_seed_neon: +_mlkem_shake256_blocksx3_seed_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-224]! add x29, sp, #0 @@ -10744,20 +10745,20 @@ L_SHA3_shake256_blocksx3_seed_neon_begin: ldp x29, x30, [sp], #0xe0 ret #ifndef __APPLE__ - .size kyber_shake256_blocksx3_seed_neon,.-kyber_shake256_blocksx3_seed_neon + .size mlkem_shake256_blocksx3_seed_neon,.-mlkem_shake256_blocksx3_seed_neon #endif /* __APPLE__ */ #else #ifndef __APPLE__ .text -.globl kyber_sha3_blocksx3_neon -.type kyber_sha3_blocksx3_neon,@function +.globl mlkem_sha3_blocksx3_neon +.type mlkem_sha3_blocksx3_neon,@function .align 2 -kyber_sha3_blocksx3_neon: +mlkem_sha3_blocksx3_neon: #else .section __TEXT,__text -.globl _kyber_sha3_blocksx3_neon +.globl _mlkem_sha3_blocksx3_neon .p2align 2 -_kyber_sha3_blocksx3_neon: +_mlkem_sha3_blocksx3_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-224]! add x29, sp, #0 @@ -11155,19 +11156,19 @@ L_SHA3_transform_blocksx3_neon_begin: ldp x29, x30, [sp], #0xe0 ret #ifndef __APPLE__ - .size kyber_sha3_blocksx3_neon,.-kyber_sha3_blocksx3_neon + .size mlkem_sha3_blocksx3_neon,.-mlkem_sha3_blocksx3_neon #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_shake128_blocksx3_seed_neon -.type kyber_shake128_blocksx3_seed_neon,@function +.globl mlkem_shake128_blocksx3_seed_neon +.type mlkem_shake128_blocksx3_seed_neon,@function .align 2 -kyber_shake128_blocksx3_seed_neon: +mlkem_shake128_blocksx3_seed_neon: #else .section __TEXT,__text -.globl _kyber_shake128_blocksx3_seed_neon +.globl _mlkem_shake128_blocksx3_seed_neon .p2align 2 -_kyber_shake128_blocksx3_seed_neon: +_mlkem_shake128_blocksx3_seed_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-224]! add x29, sp, #0 @@ -11587,19 +11588,19 @@ L_SHA3_shake128_blocksx3_seed_neon_begin: ldp x29, x30, [sp], #0xe0 ret #ifndef __APPLE__ - .size kyber_shake128_blocksx3_seed_neon,.-kyber_shake128_blocksx3_seed_neon + .size mlkem_shake128_blocksx3_seed_neon,.-mlkem_shake128_blocksx3_seed_neon #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_shake256_blocksx3_seed_neon -.type kyber_shake256_blocksx3_seed_neon,@function +.globl mlkem_shake256_blocksx3_seed_neon +.type mlkem_shake256_blocksx3_seed_neon,@function .align 2 -kyber_shake256_blocksx3_seed_neon: +mlkem_shake256_blocksx3_seed_neon: #else .section __TEXT,__text -.globl _kyber_shake256_blocksx3_seed_neon +.globl _mlkem_shake256_blocksx3_seed_neon .p2align 2 -_kyber_shake256_blocksx3_seed_neon: +_mlkem_shake256_blocksx3_seed_neon: #endif /* __APPLE__ */ stp x29, x30, [sp, #-224]! add x29, sp, #0 @@ -12019,10 +12020,10 @@ L_SHA3_shake256_blocksx3_seed_neon_begin: ldp x29, x30, [sp], #0xe0 ret #ifndef __APPLE__ - .size kyber_shake256_blocksx3_seed_neon,.-kyber_shake256_blocksx3_seed_neon + .size mlkem_shake256_blocksx3_seed_neon,.-mlkem_shake256_blocksx3_seed_neon #endif /* __APPLE__ */ #endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ -#endif /* WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_WC_MLKEM */ #endif /* __aarch64__ */ #endif /* WOLFSSL_ARMASM */ diff --git a/wolfcrypt/src/port/arm/armv8-kyber-asm_c.c b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c similarity index 93% rename from wolfcrypt/src/port/arm/armv8-kyber-asm_c.c rename to wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c index bc184370c..b48f366ea 100644 --- a/wolfcrypt/src/port/arm/armv8-kyber-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c @@ -1,4 +1,4 @@ -/* armv8-kyber-asm +/* armv8-mlkem-asm * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -27,16 +27,17 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./kyber/kyber.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-kyber-asm.c + * ruby ./kyber/kyber.rb arm64 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-mlkem-asm.c */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ #ifdef WOLFSSL_ARMASM_INLINE -static const word16 L_kyber_aarch64_q[] = { +static const word16 L_mlkem_aarch64_q[] = { 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, }; -static const word16 L_kyber_aarch64_consts[] = { +static const word16 L_mlkem_aarch64_consts[] = { 0x0d01, 0xf301, 0x4ebf, 0x0549, 0x5049, 0x0000, 0x0000, 0x0000, }; @@ -55,10 +56,10 @@ static const word64 L_sha3_aarch64_r[] = { 0x0000000080000001, 0x8000000080008008, }; -#include +#include -#ifdef WOLFSSL_WC_KYBER -static const word16 L_kyber_aarch64_zetas[] = { +#ifdef WOLFSSL_WC_MLKEM +static const word16 L_mlkem_aarch64_zetas[] = { 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, 0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc, 0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f, @@ -97,7 +98,7 @@ static const word16 L_kyber_aarch64_zetas[] = { 0x03be, 0x03be, 0x074d, 0x074d, 0x05f2, 0x05f2, 0x065c, 0x065c, }; -static const word16 L_kyber_aarch64_zetas_qinv[] = { +static const word16 L_mlkem_aarch64_zetas_qinv[] = { 0xffed, 0x7b0b, 0x399a, 0x0314, 0x34d5, 0xcf8e, 0x6e1f, 0xbeca, 0xae56, 0x6c6e, 0xf129, 0xc2b6, 0x29c2, 0x054f, 0xd43f, 0x79bc, 0xe93d, 0x43d4, 0x9908, 0x8e7f, 0x15c4, 0xfbb2, 0x53bf, 0x997f, @@ -136,29 +137,29 @@ static const word16 L_kyber_aarch64_zetas_qinv[] = { 0x5dbe, 0x5dbe, 0x1e4d, 0x1e4d, 0xbbf2, 0xbbf2, 0x5a5c, 0x5a5c, }; -void kyber_ntt(sword16* r) +void mlkem_ntt(sword16* r) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x2, %[L_kyber_aarch64_zetas]\n\t" - "add x2, x2, :lo12:%[L_kyber_aarch64_zetas]\n\t" + "adrp x2, %[L_mlkem_aarch64_zetas]\n\t" + "add x2, x2, :lo12:%[L_mlkem_aarch64_zetas]\n\t" #else - "adrp x2, %[L_kyber_aarch64_zetas]@PAGE\n\t" - "add x2, x2, %[L_kyber_aarch64_zetas]@PAGEOFF\n\t" + "adrp x2, %[L_mlkem_aarch64_zetas]@PAGE\n\t" + "add x2, x2, %[L_mlkem_aarch64_zetas]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_zetas_qinv]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_zetas_qinv]\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_qinv]\n\t" + "add x3, x3, :lo12:%[L_mlkem_aarch64_zetas_qinv]\n\t" #else - "adrp x3, %[L_kyber_aarch64_zetas_qinv]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_zetas_qinv]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_qinv]@PAGE\n\t" + "add x3, x3, %[L_mlkem_aarch64_zetas_qinv]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x4, %[L_kyber_aarch64_consts]\n\t" - "add x4, x4, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]\n\t" + "add x4, x4, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x4, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x4, x4, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x4, x4, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "add x1, %x[r], #0x100\n\t" "ldr q4, [x4]\n\t" @@ -1407,12 +1408,19 @@ void kyber_ntt(sword16* r) "stp q17, q18, [x1, #192]\n\t" "stp q19, q20, [x1, #224]\n\t" : [r] "+r" (r) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv) - : "memory", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv) + : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", + "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -static const word16 L_kyber_aarch64_zetas_inv[] = { +static const word16 L_mlkem_aarch64_zetas_inv[] = { 0x06a5, 0x06a5, 0x070f, 0x070f, 0x05b4, 0x05b4, 0x0943, 0x0943, 0x0922, 0x0922, 0x091d, 0x091d, 0x0134, 0x0134, 0x006c, 0x006c, 0x0b23, 0x0b23, 0x0366, 0x0366, 0x0356, 0x0356, 0x05e6, 0x05e6, @@ -1451,7 +1459,7 @@ static const word16 L_kyber_aarch64_zetas_inv[] = { 0x0c37, 0x0be2, 0x0773, 0x072c, 0x05ed, 0x0167, 0x02f6, 0x05a1, }; -static const word16 L_kyber_aarch64_zetas_inv_qinv[] = { +static const word16 L_mlkem_aarch64_zetas_inv_qinv[] = { 0xa5a5, 0xa5a5, 0x440f, 0x440f, 0xe1b4, 0xe1b4, 0xa243, 0xa243, 0x4f22, 0x4f22, 0x901d, 0x901d, 0x5d34, 0x5d34, 0x846c, 0x846c, 0x4423, 0x4423, 0xd566, 0xd566, 0xa556, 0xa556, 0x57e6, 0x57e6, @@ -1490,29 +1498,29 @@ static const word16 L_kyber_aarch64_zetas_inv_qinv[] = { 0x4137, 0x91e2, 0x3073, 0xcb2c, 0xfced, 0xc667, 0x84f6, 0xd8a1, }; -void kyber_invntt(sword16* r) +void mlkem_invntt(sword16* r) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x2, %[L_kyber_aarch64_zetas_inv]\n\t" - "add x2, x2, :lo12:%[L_kyber_aarch64_zetas_inv]\n\t" + "adrp x2, %[L_mlkem_aarch64_zetas_inv]\n\t" + "add x2, x2, :lo12:%[L_mlkem_aarch64_zetas_inv]\n\t" #else - "adrp x2, %[L_kyber_aarch64_zetas_inv]@PAGE\n\t" - "add x2, x2, %[L_kyber_aarch64_zetas_inv]@PAGEOFF\n\t" + "adrp x2, %[L_mlkem_aarch64_zetas_inv]@PAGE\n\t" + "add x2, x2, %[L_mlkem_aarch64_zetas_inv]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_zetas_inv_qinv]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_zetas_inv_qinv]\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_inv_qinv]\n\t" + "add x3, x3, :lo12:%[L_mlkem_aarch64_zetas_inv_qinv]\n\t" #else - "adrp x3, %[L_kyber_aarch64_zetas_inv_qinv]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_zetas_inv_qinv]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_inv_qinv]@PAGE\n\t" + "add x3, x3, %[L_mlkem_aarch64_zetas_inv_qinv]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x4, %[L_kyber_aarch64_consts]\n\t" - "add x4, x4, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]\n\t" + "add x4, x4, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x4, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x4, x4, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x4, x4, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "add x1, %x[r], #0x100\n\t" "ldr q8, [x4]\n\t" @@ -2917,35 +2925,44 @@ void kyber_invntt(sword16* r) "str q23, [x1, #208]\n\t" "str q24, [x1, #240]\n\t" : [r] "+r" (r) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv) - : "memory", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv) + : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", + "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28" ); } #ifndef WOLFSSL_AARCH64_NO_SQRDMLSH -void kyber_ntt_sqrdmlsh(sword16* r) +void mlkem_ntt_sqrdmlsh(sword16* r) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x2, %[L_kyber_aarch64_zetas]\n\t" - "add x2, x2, :lo12:%[L_kyber_aarch64_zetas]\n\t" + "adrp x2, %[L_mlkem_aarch64_zetas]\n\t" + "add x2, x2, :lo12:%[L_mlkem_aarch64_zetas]\n\t" #else - "adrp x2, %[L_kyber_aarch64_zetas]@PAGE\n\t" - "add x2, x2, %[L_kyber_aarch64_zetas]@PAGEOFF\n\t" + "adrp x2, %[L_mlkem_aarch64_zetas]@PAGE\n\t" + "add x2, x2, %[L_mlkem_aarch64_zetas]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_zetas_qinv]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_zetas_qinv]\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_qinv]\n\t" + "add x3, x3, :lo12:%[L_mlkem_aarch64_zetas_qinv]\n\t" #else - "adrp x3, %[L_kyber_aarch64_zetas_qinv]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_zetas_qinv]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_qinv]@PAGE\n\t" + "add x3, x3, %[L_mlkem_aarch64_zetas_qinv]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x4, %[L_kyber_aarch64_consts]\n\t" - "add x4, x4, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]\n\t" + "add x4, x4, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x4, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x4, x4, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x4, x4, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "add x1, %x[r], #0x100\n\t" "ldr q4, [x4]\n\t" @@ -4082,34 +4099,43 @@ void kyber_ntt_sqrdmlsh(sword16* r) "stp q17, q18, [x1, #192]\n\t" "stp q19, q20, [x1, #224]\n\t" : [r] "+r" (r) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv) - : "memory", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv) + : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", + "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -void kyber_invntt_sqrdmlsh(sword16* r) +void mlkem_invntt_sqrdmlsh(sword16* r) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x2, %[L_kyber_aarch64_zetas_inv]\n\t" - "add x2, x2, :lo12:%[L_kyber_aarch64_zetas_inv]\n\t" + "adrp x2, %[L_mlkem_aarch64_zetas_inv]\n\t" + "add x2, x2, :lo12:%[L_mlkem_aarch64_zetas_inv]\n\t" #else - "adrp x2, %[L_kyber_aarch64_zetas_inv]@PAGE\n\t" - "add x2, x2, %[L_kyber_aarch64_zetas_inv]@PAGEOFF\n\t" + "adrp x2, %[L_mlkem_aarch64_zetas_inv]@PAGE\n\t" + "add x2, x2, %[L_mlkem_aarch64_zetas_inv]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_zetas_inv_qinv]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_zetas_inv_qinv]\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_inv_qinv]\n\t" + "add x3, x3, :lo12:%[L_mlkem_aarch64_zetas_inv_qinv]\n\t" #else - "adrp x3, %[L_kyber_aarch64_zetas_inv_qinv]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_zetas_inv_qinv]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_inv_qinv]@PAGE\n\t" + "add x3, x3, %[L_mlkem_aarch64_zetas_inv_qinv]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x4, %[L_kyber_aarch64_consts]\n\t" - "add x4, x4, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]\n\t" + "add x4, x4, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x4, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x4, x4, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x4, x4, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "add x1, %x[r], #0x100\n\t" "ldr q8, [x4]\n\t" @@ -5370,13 +5396,22 @@ void kyber_invntt_sqrdmlsh(sword16* r) "str q23, [x1, #208]\n\t" "str q24, [x1, #240]\n\t" : [r] "+r" (r) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv) - : "memory", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv) + : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", + "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28" ); } #endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */ -static const word16 L_kyber_aarch64_zetas_mul[] = { +static const word16 L_mlkem_aarch64_zetas_mul[] = { 0x08b2, 0xf74e, 0x01ae, 0xfe52, 0x022b, 0xfdd5, 0x034b, 0xfcb5, 0x081e, 0xf7e2, 0x0367, 0xfc99, 0x060e, 0xf9f2, 0x0069, 0xff97, 0x01a6, 0xfe5a, 0x024b, 0xfdb5, 0x00b1, 0xff4f, 0x0c16, 0xf3ea, @@ -5395,22 +5430,22 @@ static const word16 L_kyber_aarch64_zetas_mul[] = { 0x03be, 0xfc42, 0x074d, 0xf8b3, 0x05f2, 0xfa0e, 0x065c, 0xf9a4, }; -void kyber_basemul_mont(sword16* r, const sword16* a, const sword16* b) +void mlkem_basemul_mont(sword16* r, const sword16* a, const sword16* b) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_zetas_mul]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_zetas_mul]\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_mul]\n\t" + "add x3, x3, :lo12:%[L_mlkem_aarch64_zetas_mul]\n\t" #else - "adrp x3, %[L_kyber_aarch64_zetas_mul]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_zetas_mul]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_mul]@PAGE\n\t" + "add x3, x3, %[L_mlkem_aarch64_zetas_mul]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x4, %[L_kyber_aarch64_consts]\n\t" - "add x4, x4, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]\n\t" + "add x4, x4, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x4, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x4, x4, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x4, x4, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q1, [x4]\n\t" "ldp q2, q3, [%x[a]]\n\t" @@ -6070,27 +6105,37 @@ void kyber_basemul_mont(sword16* r, const sword16* a, const sword16* b) "zip2 v25.8h, v22.8h, v23.8h\n\t" "stp q24, q25, [%x[r], #480]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul) - : "memory", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", + "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", + "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27" ); } -void kyber_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) +void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_zetas_mul]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_zetas_mul]\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_mul]\n\t" + "add x3, x3, :lo12:%[L_mlkem_aarch64_zetas_mul]\n\t" #else - "adrp x3, %[L_kyber_aarch64_zetas_mul]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_zetas_mul]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_aarch64_zetas_mul]@PAGE\n\t" + "add x3, x3, %[L_mlkem_aarch64_zetas_mul]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x4, %[L_kyber_aarch64_consts]\n\t" - "add x4, x4, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]\n\t" + "add x4, x4, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x4, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x4, x4, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x4, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x4, x4, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q1, [x4]\n\t" "ldp q2, q3, [%x[a]]\n\t" @@ -6798,20 +6843,30 @@ void kyber_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) "add v29.8h, v29.8h, v25.8h\n\t" "stp q28, q29, [%x[r], #480]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul) - : "memory", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", + "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", + "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29" ); } -void kyber_csubq_neon(sword16* p) +void mlkem_csubq_neon(sword16* p) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x1, %[L_kyber_aarch64_q]\n\t" - "add x1, x1, :lo12:%[L_kyber_aarch64_q]\n\t" + "adrp x1, %[L_mlkem_aarch64_q]\n\t" + "add x1, x1, :lo12:%[L_mlkem_aarch64_q]\n\t" #else - "adrp x1, %[L_kyber_aarch64_q]@PAGE\n\t" - "add x1, x1, %[L_kyber_aarch64_q]@PAGEOFF\n\t" + "adrp x1, %[L_mlkem_aarch64_q]@PAGE\n\t" + "add x1, x1, %[L_mlkem_aarch64_q]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q20, [x1]\n\t" "ld4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%x[p]], #0x40\n\t" @@ -6961,20 +7016,29 @@ void kyber_csubq_neon(sword16* p) "st4 {v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t" "st4 {v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t" : [p] "+r" (p) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul) - : "memory", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20" ); } -void kyber_add_reduce(sword16* r, const sword16* a) +void mlkem_add_reduce(sword16* r, const sword16* a) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x2, %[L_kyber_aarch64_consts]\n\t" - "add x2, x2, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x2, %[L_mlkem_aarch64_consts]\n\t" + "add x2, x2, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x2, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x2, x2, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x2, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x2, x2, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q0, [x2]\n\t" "ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" @@ -7134,20 +7198,29 @@ void kyber_add_reduce(sword16* r, const sword16* a) "st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" "st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t" : [r] "+r" (r), [a] "+r" (a) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul) - : "memory", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18" ); } -void kyber_add3_reduce(sword16* r, const sword16* a, const sword16* b) +void mlkem_add3_reduce(sword16* r, const sword16* a, const sword16* b) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_consts]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x3, %[L_mlkem_aarch64_consts]\n\t" + "add x3, x3, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x3, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x3, x3, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q0, [x3]\n\t" "ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" @@ -7347,20 +7420,29 @@ void kyber_add3_reduce(sword16* r, const sword16* a, const sword16* b) "st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" "st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul) - : "memory", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26" ); } -void kyber_rsub_reduce(sword16* r, const sword16* a) +void mlkem_rsub_reduce(sword16* r, const sword16* a) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x2, %[L_kyber_aarch64_consts]\n\t" - "add x2, x2, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x2, %[L_mlkem_aarch64_consts]\n\t" + "add x2, x2, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x2, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x2, x2, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x2, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x2, x2, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q0, [x2]\n\t" "ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" @@ -7520,20 +7602,29 @@ void kyber_rsub_reduce(sword16* r, const sword16* a) "st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" "st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t" : [r] "+r" (r), [a] "+r" (a) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul) - : "memory", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18" ); } -void kyber_to_mont(sword16* p) +void mlkem_to_mont(sword16* p) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x1, %[L_kyber_aarch64_consts]\n\t" - "add x1, x1, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x1, %[L_mlkem_aarch64_consts]\n\t" + "add x1, x1, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x1, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x1, x1, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x1, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x1, x1, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q0, [x1]\n\t" "ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t" @@ -7715,21 +7806,30 @@ void kyber_to_mont(sword16* p) "st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t" "st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t" : [p] "+r" (p) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul) - : "memory", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18" ); } #ifndef WOLFSSL_AARCH64_NO_SQRDMLSH -void kyber_to_mont_sqrdmlsh(sword16* p) +void mlkem_to_mont_sqrdmlsh(sword16* p) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x1, %[L_kyber_aarch64_consts]\n\t" - "add x1, x1, :lo12:%[L_kyber_aarch64_consts]\n\t" + "adrp x1, %[L_mlkem_aarch64_consts]\n\t" + "add x1, x1, :lo12:%[L_mlkem_aarch64_consts]\n\t" #else - "adrp x1, %[L_kyber_aarch64_consts]@PAGE\n\t" - "add x1, x1, %[L_kyber_aarch64_consts]@PAGEOFF\n\t" + "adrp x1, %[L_mlkem_aarch64_consts]@PAGE\n\t" + "add x1, x1, %[L_mlkem_aarch64_consts]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q0, [x1]\n\t" "ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[p]], #0x40\n\t" @@ -7879,47 +7979,56 @@ void kyber_to_mont_sqrdmlsh(sword16* p) "st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t" "st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t" : [p] "+r" (p) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul) - : "memory", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18" ); } #endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */ -static const word16 L_kyber_aarch64_to_msg_neon_low[] = { +static const word16 L_mlkem_to_msg_low[] = { 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, }; -static const word16 L_kyber_aarch64_to_msg_neon_high[] = { +static const word16 L_mlkem_to_msg_high[] = { 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, }; -static const word16 L_kyber_aarch64_to_msg_neon_bits[] = { +static const word16 L_mlkem_to_msg_bits[] = { 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, }; -void kyber_to_msg_neon(byte* msg, sword16* p) +void mlkem_to_msg_neon(byte* msg, sword16* p) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x2, %[L_kyber_aarch64_to_msg_neon_low]\n\t" - "add x2, x2, :lo12:%[L_kyber_aarch64_to_msg_neon_low]\n\t" + "adrp x2, %[L_mlkem_to_msg_low]\n\t" + "add x2, x2, :lo12:%[L_mlkem_to_msg_low]\n\t" #else - "adrp x2, %[L_kyber_aarch64_to_msg_neon_low]@PAGE\n\t" - "add x2, x2, %[L_kyber_aarch64_to_msg_neon_low]@PAGEOFF\n\t" + "adrp x2, %[L_mlkem_to_msg_low]@PAGE\n\t" + "add x2, x2, %[L_mlkem_to_msg_low]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_to_msg_neon_high]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_to_msg_neon_high]\n\t" + "adrp x3, %[L_mlkem_to_msg_high]\n\t" + "add x3, x3, :lo12:%[L_mlkem_to_msg_high]\n\t" #else - "adrp x3, %[L_kyber_aarch64_to_msg_neon_high]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_to_msg_neon_high]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_to_msg_high]@PAGE\n\t" + "add x3, x3, %[L_mlkem_to_msg_high]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x4, %[L_kyber_aarch64_to_msg_neon_bits]\n\t" - "add x4, x4, :lo12:%[L_kyber_aarch64_to_msg_neon_bits]\n\t" + "adrp x4, %[L_mlkem_to_msg_bits]\n\t" + "add x4, x4, :lo12:%[L_mlkem_to_msg_bits]\n\t" #else - "adrp x4, %[L_kyber_aarch64_to_msg_neon_bits]@PAGE\n\t" - "add x4, x4, %[L_kyber_aarch64_to_msg_neon_bits]@PAGEOFF\n\t" + "adrp x4, %[L_mlkem_to_msg_bits]@PAGE\n\t" + "add x4, x4, %[L_mlkem_to_msg_bits]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ldr q0, [x2]\n\t" "ldr q1, [x3]\n\t" @@ -8125,36 +8234,49 @@ void kyber_to_msg_neon(byte* msg, sword16* p) "ins v18.b[7], v25.b[0]\n\t" "st1 {v18.8b}, [%x[msg]], #8\n\t" : [msg] "+r" (msg), [p] "+r" (p) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits) - : "memory", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits) + : "memory", "cc", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", + "v25", "v26" ); } -static const word16 L_kyber_aarch64_from_msg_neon_q1half[] = { +static const word16 L_mlkem_from_msg_q1half[] = { 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, }; -static const word8 L_kyber_aarch64_from_msg_neon_bits[] = { +static const word8 L_mlkem_from_msg_bits[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, }; -void kyber_from_msg_neon(sword16* p, const byte* msg) +void mlkem_from_msg_neon(sword16* p, const byte* msg) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x2, %[L_kyber_aarch64_from_msg_neon_q1half]\n\t" - "add x2, x2, :lo12:%[L_kyber_aarch64_from_msg_neon_q1half]\n\t" + "adrp x2, %[L_mlkem_from_msg_q1half]\n\t" + "add x2, x2, :lo12:%[L_mlkem_from_msg_q1half]\n\t" #else - "adrp x2, %[L_kyber_aarch64_from_msg_neon_q1half]@PAGE\n\t" - "add x2, x2, %[L_kyber_aarch64_from_msg_neon_q1half]@PAGEOFF\n\t" + "adrp x2, %[L_mlkem_from_msg_q1half]@PAGE\n\t" + "add x2, x2, %[L_mlkem_from_msg_q1half]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x3, %[L_kyber_aarch64_from_msg_neon_bits]\n\t" - "add x3, x3, :lo12:%[L_kyber_aarch64_from_msg_neon_bits]\n\t" + "adrp x3, %[L_mlkem_from_msg_bits]\n\t" + "add x3, x3, :lo12:%[L_mlkem_from_msg_bits]\n\t" #else - "adrp x3, %[L_kyber_aarch64_from_msg_neon_bits]@PAGE\n\t" - "add x3, x3, %[L_kyber_aarch64_from_msg_neon_bits]@PAGEOFF\n\t" + "adrp x3, %[L_mlkem_from_msg_bits]@PAGE\n\t" + "add x3, x3, %[L_mlkem_from_msg_bits]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ld1 {v2.16b, v3.16b}, [%x[msg]]\n\t" "ldr q1, [x2]\n\t" @@ -8296,12 +8418,25 @@ void kyber_from_msg_neon(sword16* p, const byte* msg) "and v7.16b, v7.16b, v1.16b\n\t" "st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t" : [p] "+r" (p), [msg] "+r" (msg) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits) - : "memory", "x2", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits) + : "memory", "cc", "x2", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", + "v7", "v8", "v9", "v10", "v11" ); } -int kyber_cmp_neon(const byte* a, const byte* b, int sz) +int mlkem_cmp_neon(const byte* a, const byte* b, int sz) { __asm__ __volatile__ ( "ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t" @@ -8421,7 +8556,7 @@ int kyber_cmp_neon(const byte* a, const byte* b, int sz) "orr v10.16b, v10.16b, v2.16b\n\t" "orr v11.16b, v11.16b, v3.16b\n\t" "subs %w[sz], %w[sz], #0x300\n\t" - "beq L_kyber_aarch64_cmp_neon_done_%=\n\t" + "beq L_mlkem_aarch64_cmp_neon_done_%=\n\t" "ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t" "ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" @@ -8473,7 +8608,7 @@ int kyber_cmp_neon(const byte* a, const byte* b, int sz) "orr v10.16b, v10.16b, v2.16b\n\t" "orr v11.16b, v11.16b, v3.16b\n\t" "subs %w[sz], %w[sz], #0x140\n\t" - "beq L_kyber_aarch64_cmp_neon_done_%=\n\t" + "beq L_mlkem_aarch64_cmp_neon_done_%=\n\t" "ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[a]], #0x40\n\t" "ld4 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[b]], #0x40\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" @@ -8551,7 +8686,7 @@ int kyber_cmp_neon(const byte* a, const byte* b, int sz) "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "\n" - "L_kyber_aarch64_cmp_neon_done_%=: \n\t" + "L_mlkem_aarch64_cmp_neon_done_%=: \n\t" "orr v8.16b, v8.16b, v9.16b\n\t" "orr v10.16b, v10.16b, v11.16b\n\t" "orr v8.16b, v8.16b, v10.16b\n\t" @@ -8561,21 +8696,34 @@ int kyber_cmp_neon(const byte* a, const byte* b, int sz) "subs x0, x0, xzr\n\t" "csetm w0, ne\n\t" : [a] "+r" (a), [b] "+r" (b), [sz] "+r" (sz) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits) - : "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits) + : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11" ); return (word32)(size_t)a; } -static const word16 L_kyber_aarch64_rej_uniform_neon_mask[] = { +static const word16 L_mlkem_rej_uniform_mask[] = { 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, }; -static const word16 L_kyber_aarch64_rej_uniform_neon_bits[] = { +static const word16 L_mlkem_rej_uniform_bits[] = { 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, }; -static const word8 L_kyber_aarch64_rej_uniform_neon_indices[] = { +static const word8 L_mlkem_rej_uniform_indices[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, @@ -9090,36 +9238,37 @@ static const word8 L_kyber_aarch64_rej_uniform_neon_indices[] = { 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, }; -unsigned int kyber_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, unsigned int rLen) +unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, + unsigned int rLen) { __asm__ __volatile__ ( #ifndef __APPLE__ - "adrp x4, %[L_kyber_aarch64_rej_uniform_neon_mask]\n\t" - "add x4, x4, :lo12:%[L_kyber_aarch64_rej_uniform_neon_mask]\n\t" + "adrp x4, %[L_mlkem_rej_uniform_mask]\n\t" + "add x4, x4, :lo12:%[L_mlkem_rej_uniform_mask]\n\t" #else - "adrp x4, %[L_kyber_aarch64_rej_uniform_neon_mask]@PAGE\n\t" - "add x4, x4, %[L_kyber_aarch64_rej_uniform_neon_mask]@PAGEOFF\n\t" + "adrp x4, %[L_mlkem_rej_uniform_mask]@PAGE\n\t" + "add x4, x4, %[L_mlkem_rej_uniform_mask]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x5, %[L_kyber_aarch64_q]\n\t" - "add x5, x5, :lo12:%[L_kyber_aarch64_q]\n\t" + "adrp x5, %[L_mlkem_aarch64_q]\n\t" + "add x5, x5, :lo12:%[L_mlkem_aarch64_q]\n\t" #else - "adrp x5, %[L_kyber_aarch64_q]@PAGE\n\t" - "add x5, x5, %[L_kyber_aarch64_q]@PAGEOFF\n\t" + "adrp x5, %[L_mlkem_aarch64_q]@PAGE\n\t" + "add x5, x5, %[L_mlkem_aarch64_q]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x6, %[L_kyber_aarch64_rej_uniform_neon_bits]\n\t" - "add x6, x6, :lo12:%[L_kyber_aarch64_rej_uniform_neon_bits]\n\t" + "adrp x6, %[L_mlkem_rej_uniform_bits]\n\t" + "add x6, x6, :lo12:%[L_mlkem_rej_uniform_bits]\n\t" #else - "adrp x6, %[L_kyber_aarch64_rej_uniform_neon_bits]@PAGE\n\t" - "add x6, x6, %[L_kyber_aarch64_rej_uniform_neon_bits]@PAGEOFF\n\t" + "adrp x6, %[L_mlkem_rej_uniform_bits]@PAGE\n\t" + "add x6, x6, %[L_mlkem_rej_uniform_bits]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x7, %[L_kyber_aarch64_rej_uniform_neon_indices]\n\t" - "add x7, x7, :lo12:%[L_kyber_aarch64_rej_uniform_neon_indices]\n\t" + "adrp x7, %[L_mlkem_rej_uniform_indices]\n\t" + "add x7, x7, :lo12:%[L_mlkem_rej_uniform_indices]\n\t" #else - "adrp x7, %[L_kyber_aarch64_rej_uniform_neon_indices]@PAGE\n\t" - "add x7, x7, %[L_kyber_aarch64_rej_uniform_neon_indices]@PAGEOFF\n\t" + "adrp x7, %[L_mlkem_rej_uniform_indices]@PAGE\n\t" + "add x7, x7, %[L_mlkem_rej_uniform_indices]@PAGEOFF\n\t" #endif /* __APPLE__ */ "eor v1.16b, v1.16b, v1.16b\n\t" "eor v12.16b, v12.16b, v12.16b\n\t" @@ -9132,11 +9281,11 @@ unsigned int kyber_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "ldr q3, [x5]\n\t" "ldr q2, [x6]\n\t" "subs wzr, %w[len], #0\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" "subs wzr, %w[len], #16\n\t" - "blt L_kyber_aarch64_rej_uniform_neon_loop_4_%=\n\t" + "blt L_mlkem_rej_uniform_loop_4_%=\n\t" "\n" - "L_kyber_aarch64_rej_uniform_neon_loop_16_%=: \n\t" + "L_mlkem_rej_uniform_loop_16_%=: \n\t" "ld3 {v4.8b, v5.8b, v6.8b}, [%x[r]], #24\n\t" "zip1 v4.16b, v4.16b, v1.16b\n\t" "zip1 v5.16b, v5.16b, v1.16b\n\t" @@ -9177,17 +9326,17 @@ unsigned int kyber_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "add %x[p], %x[p], x11, lsl 1\n\t" "add x12, x12, x11\n\t" "subs %w[rLen], %w[rLen], #24\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" "sub w10, %w[len], w12\n\t" "subs x10, x10, #16\n\t" - "blt L_kyber_aarch64_rej_uniform_neon_loop_4_%=\n\t" - "b L_kyber_aarch64_rej_uniform_neon_loop_16_%=\n\t" + "blt L_mlkem_rej_uniform_loop_4_%=\n\t" + "b L_mlkem_rej_uniform_loop_16_%=\n\t" "\n" - "L_kyber_aarch64_rej_uniform_neon_loop_4_%=: \n\t" + "L_mlkem_rej_uniform_loop_4_%=: \n\t" "subs w10, %w[len], w12\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" "subs x10, x10, #4\n\t" - "blt L_kyber_aarch64_rej_uniform_neon_loop_lt_4_%=\n\t" + "blt L_mlkem_rej_uniform_loop_lt_4_%=\n\t" "ldr x4, [%x[r]], #6\n\t" "lsr x5, x4, #12\n\t" "lsr x6, x4, #24\n\t" @@ -9217,10 +9366,10 @@ unsigned int kyber_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "cinc %x[p], %x[p], lt\n\t" "cinc x12, x12, lt\n\t" "subs %w[rLen], %w[rLen], #6\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" - "b L_kyber_aarch64_rej_uniform_neon_loop_4_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" + "b L_mlkem_rej_uniform_loop_4_%=\n\t" "\n" - "L_kyber_aarch64_rej_uniform_neon_loop_lt_4_%=: \n\t" + "L_mlkem_rej_uniform_loop_lt_4_%=: \n\t" "ldr x4, [%x[r]], #6\n\t" "lsr x5, x4, #12\n\t" "lsr x6, x4, #24\n\t" @@ -9235,43 +9384,60 @@ unsigned int kyber_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "cinc %x[p], %x[p], lt\n\t" "cinc x12, x12, lt\n\t" "subs wzr, %w[len], w12\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" "strh w5, [%x[p]]\n\t" "subs xzr, x5, x13\n\t" "cinc %x[p], %x[p], lt\n\t" "cinc %x[p], %x[p], lt\n\t" "cinc x12, x12, lt\n\t" "subs wzr, %w[len], w12\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" "strh w6, [%x[p]]\n\t" "subs xzr, x6, x13\n\t" "cinc %x[p], %x[p], lt\n\t" "cinc %x[p], %x[p], lt\n\t" "cinc x12, x12, lt\n\t" "subs wzr, %w[len], w12\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" "strh w7, [%x[p]]\n\t" "subs xzr, x7, x13\n\t" "cinc %x[p], %x[p], lt\n\t" "cinc %x[p], %x[p], lt\n\t" "cinc x12, x12, lt\n\t" "subs wzr, %w[len], w12\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" "subs %w[rLen], %w[rLen], #6\n\t" - "beq L_kyber_aarch64_rej_uniform_neon_done_%=\n\t" - "b L_kyber_aarch64_rej_uniform_neon_loop_lt_4_%=\n\t" + "beq L_mlkem_rej_uniform_done_%=\n\t" + "b L_mlkem_rej_uniform_loop_lt_4_%=\n\t" "\n" - "L_kyber_aarch64_rej_uniform_neon_done_%=: \n\t" + "L_mlkem_rej_uniform_done_%=: \n\t" "mov x0, x12\n\t" : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits), [L_kyber_aarch64_rej_uniform_neon_mask] "S" (L_kyber_aarch64_rej_uniform_neon_mask), [L_kyber_aarch64_rej_uniform_neon_bits] "S" (L_kyber_aarch64_rej_uniform_neon_bits), [L_kyber_aarch64_rej_uniform_neon_indices] "S" (L_kyber_aarch64_rej_uniform_neon_indices) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), + [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), + [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), + [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", + "x12", "x13", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13" ); return (word32)(size_t)p; } #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 -void kyber_sha3_blocksx3_neon(word64* state) +void mlkem_sha3_blocksx3_neon(word64* state) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-64]!\n\t" @@ -9565,12 +9731,33 @@ void kyber_sha3_blocksx3_neon(word64* state) "str x26, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits), [L_kyber_aarch64_rej_uniform_neon_mask] "S" (L_kyber_aarch64_rej_uniform_neon_mask), [L_kyber_aarch64_rej_uniform_neon_bits] "S" (L_kyber_aarch64_rej_uniform_neon_bits), [L_kyber_aarch64_rej_uniform_neon_indices] "S" (L_kyber_aarch64_rej_uniform_neon_indices) - : "memory", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), + [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), + [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), + [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", + "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", + "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", + "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", + "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "v30", "v31" ); } -void kyber_shake128_blocksx3_seed_neon(word64* state, byte* seed) +void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-64]!\n\t" @@ -9886,12 +10073,33 @@ void kyber_shake128_blocksx3_seed_neon(word64* state, byte* seed) "str x27, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state), [seed] "+r" (seed) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits), [L_kyber_aarch64_rej_uniform_neon_mask] "S" (L_kyber_aarch64_rej_uniform_neon_mask), [L_kyber_aarch64_rej_uniform_neon_bits] "S" (L_kyber_aarch64_rej_uniform_neon_bits), [L_kyber_aarch64_rej_uniform_neon_indices] "S" (L_kyber_aarch64_rej_uniform_neon_indices) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), + [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), + [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), + [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", + "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", + "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", + "v31" ); } -void kyber_shake256_blocksx3_seed_neon(word64* state, byte* seed) +void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-64]!\n\t" @@ -10207,13 +10415,34 @@ void kyber_shake256_blocksx3_seed_neon(word64* state, byte* seed) "str x27, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state), [seed] "+r" (seed) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits), [L_kyber_aarch64_rej_uniform_neon_mask] "S" (L_kyber_aarch64_rej_uniform_neon_mask), [L_kyber_aarch64_rej_uniform_neon_bits] "S" (L_kyber_aarch64_rej_uniform_neon_bits), [L_kyber_aarch64_rej_uniform_neon_indices] "S" (L_kyber_aarch64_rej_uniform_neon_indices) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), + [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), + [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), + [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", + "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", + "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", + "v31" ); } #else -void kyber_sha3_blocksx3_neon(word64* state) +void mlkem_sha3_blocksx3_neon(word64* state) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-64]!\n\t" @@ -10592,12 +10821,33 @@ void kyber_sha3_blocksx3_neon(word64* state) "str x26, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits), [L_kyber_aarch64_rej_uniform_neon_mask] "S" (L_kyber_aarch64_rej_uniform_neon_mask), [L_kyber_aarch64_rej_uniform_neon_bits] "S" (L_kyber_aarch64_rej_uniform_neon_bits), [L_kyber_aarch64_rej_uniform_neon_indices] "S" (L_kyber_aarch64_rej_uniform_neon_indices) - : "memory", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), + [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), + [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), + [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", + "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", + "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", + "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", + "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "v30", "v31" ); } -void kyber_shake128_blocksx3_seed_neon(word64* state, byte* seed) +void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-64]!\n\t" @@ -10998,12 +11248,33 @@ void kyber_shake128_blocksx3_seed_neon(word64* state, byte* seed) "str x27, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state), [seed] "+r" (seed) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits), [L_kyber_aarch64_rej_uniform_neon_mask] "S" (L_kyber_aarch64_rej_uniform_neon_mask), [L_kyber_aarch64_rej_uniform_neon_bits] "S" (L_kyber_aarch64_rej_uniform_neon_bits), [L_kyber_aarch64_rej_uniform_neon_indices] "S" (L_kyber_aarch64_rej_uniform_neon_indices) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), + [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), + [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), + [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", + "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", + "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", + "v31" ); } -void kyber_shake256_blocksx3_seed_neon(word64* state, byte* seed) +void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed) { __asm__ __volatile__ ( "stp x29, x30, [sp, #-64]!\n\t" @@ -11404,13 +11675,34 @@ void kyber_shake256_blocksx3_seed_neon(word64* state, byte* seed) "str x27, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state), [seed] "+r" (seed) - : [L_kyber_aarch64_q] "S" (L_kyber_aarch64_q), [L_kyber_aarch64_consts] "S" (L_kyber_aarch64_consts), [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), [L_kyber_aarch64_zetas] "S" (L_kyber_aarch64_zetas), [L_kyber_aarch64_zetas_qinv] "S" (L_kyber_aarch64_zetas_qinv), [L_kyber_aarch64_zetas_inv] "S" (L_kyber_aarch64_zetas_inv), [L_kyber_aarch64_zetas_inv_qinv] "S" (L_kyber_aarch64_zetas_inv_qinv), [L_kyber_aarch64_zetas_mul] "S" (L_kyber_aarch64_zetas_mul), [L_kyber_aarch64_to_msg_neon_low] "S" (L_kyber_aarch64_to_msg_neon_low), [L_kyber_aarch64_to_msg_neon_high] "S" (L_kyber_aarch64_to_msg_neon_high), [L_kyber_aarch64_to_msg_neon_bits] "S" (L_kyber_aarch64_to_msg_neon_bits), [L_kyber_aarch64_from_msg_neon_q1half] "S" (L_kyber_aarch64_from_msg_neon_q1half), [L_kyber_aarch64_from_msg_neon_bits] "S" (L_kyber_aarch64_from_msg_neon_bits), [L_kyber_aarch64_rej_uniform_neon_mask] "S" (L_kyber_aarch64_rej_uniform_neon_mask), [L_kyber_aarch64_rej_uniform_neon_bits] "S" (L_kyber_aarch64_rej_uniform_neon_bits), [L_kyber_aarch64_rej_uniform_neon_indices] "S" (L_kyber_aarch64_rej_uniform_neon_indices) - : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), + [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), + [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), + [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), + [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), + [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), + [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), + [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), + [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), + [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), + [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", + "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", + "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", + "v31" ); } #endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ -#endif /* WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_WC_MLKEM */ #endif /* __aarch64__ */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c index 33df2a2f0..367b74475 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256.c +++ b/wolfcrypt/src/port/arm/armv8-sha256.c @@ -927,10 +927,10 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) : [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer), "0" (hash) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v8", "v9", "v10", "v11", "v12", "v13", "v14", - "v15", "v16", "v17", "v18", "v19", "v20", "v21", - "v22", "v23", "v24", "v25" + : "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25", "cc" ); } else { @@ -1206,7 +1206,8 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) if (sha256->buffLen > WC_SHA256_PAD_SIZE) { word32* bufPt = sha256->buffer; word32* digPt = sha256->digest; - XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMSET(&local[sha256->buffLen], 0, + WC_SHA256_BLOCK_SIZE - sha256->buffLen); sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; __asm__ volatile ( "#load leftover data\n" @@ -1645,7 +1646,8 @@ extern void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, #endif /* ARMv8 hardware acceleration Aarch32 and Thumb2 */ -static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, + word32 len) { int ret = 0; /* do block size increments */ diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-sha3-asm.S index 833051b20..4a792df84 100644 --- a/wolfcrypt/src/port/arm/armv8-sha3-asm.S +++ b/wolfcrypt/src/port/arm/armv8-sha3-asm.S @@ -26,7 +26,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha3/sha3.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha3-asm.S + * ruby ./sha3/sha3.rb arm64 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-sha3-asm.S */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c index 823724357..04f30817f 100644 --- a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c @@ -27,7 +27,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha3/sha3.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha3-asm.c + * ruby ./sha3/sha3.rb arm64 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-sha3-asm.c */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ @@ -165,7 +166,10 @@ void BlockSha3_crypto(word64* state) "st1 {v24.1d}, [%x[state]]\n\t" : [state] "+r" (state) : [L_SHA3_transform_crypto_r] "S" (L_SHA3_transform_crypto_r) - : "memory", "x1", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + : "memory", "cc", "x1", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", + "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", + "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29", "v30", "v31" ); } @@ -369,7 +373,9 @@ void BlockSha3_base(word64* state) "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state) : [L_SHA3_transform_base_r] "S" (L_SHA3_transform_base_r) - : "memory", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "cc" + : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", + "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); } diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-sha512-asm.S index 9f5530a76..aeecb9c34 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-sha512-asm.S @@ -26,7 +26,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S + * ruby ./sha2/sha512.rb arm64 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ @@ -128,9 +129,9 @@ L_SHA512_transform_neon_len_k: .xword 0x6c44198c4a475817 #ifndef __APPLE__ .text - .type L_SHA512_transform_neon_len_ror8, %object + .type L_SHA512_transform_neon_len_r8, %object .section .rodata - .size L_SHA512_transform_neon_len_ror8, 16 + .size L_SHA512_transform_neon_len_r8, 16 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -139,7 +140,7 @@ L_SHA512_transform_neon_len_k: #else .p2align 4 #endif /* __APPLE__ */ -L_SHA512_transform_neon_len_ror8: +L_SHA512_transform_neon_len_r8: .xword 0x7060504030201, 0x80f0e0d0c0b0a09 #ifndef __APPLE__ .text @@ -170,11 +171,11 @@ _Transform_Sha512_Len_neon: add x3, x3, :lo12:L_SHA512_transform_neon_len_k@PAGEOFF #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x27, L_SHA512_transform_neon_len_ror8 - add x27, x27, :lo12:L_SHA512_transform_neon_len_ror8 + adrp x27, L_SHA512_transform_neon_len_r8 + add x27, x27, :lo12:L_SHA512_transform_neon_len_r8 #else - adrp x27, L_SHA512_transform_neon_len_ror8@PAGE - add x27, x27, :lo12:L_SHA512_transform_neon_len_ror8@PAGEOFF + adrp x27, L_SHA512_transform_neon_len_r8@PAGE + add x27, x27, :lo12:L_SHA512_transform_neon_len_r8@PAGEOFF #endif /* __APPLE__ */ ld1 {v11.16b}, [x27] # Load digest into working vars @@ -1095,9 +1096,9 @@ L_sha512_len_neon_start: #ifdef WOLFSSL_ARMASM_CRYPTO_SHA512 #ifndef __APPLE__ .text - .type L_SHA512_transform_crypto_len_k, %object + .type L_SHA512_trans_crypto_len_k, %object .section .rodata - .size L_SHA512_transform_crypto_len_k, 640 + .size L_SHA512_trans_crypto_len_k, 640 #else .section __DATA,__data #endif /* __APPLE__ */ @@ -1106,7 +1107,7 @@ L_sha512_len_neon_start: #else .p2align 3 #endif /* __APPLE__ */ -L_SHA512_transform_crypto_len_k: +L_SHA512_trans_crypto_len_k: .xword 0x428a2f98d728ae22 .xword 0x7137449123ef65cd .xword 0xb5c0fbcfec4d3b2f @@ -1209,11 +1210,11 @@ _Transform_Sha512_Len_crypto: .arch_extension sha3 #endif /* __APPLE__ */ #ifndef __APPLE__ - adrp x4, L_SHA512_transform_crypto_len_k - add x4, x4, :lo12:L_SHA512_transform_crypto_len_k + adrp x4, L_SHA512_trans_crypto_len_k + add x4, x4, :lo12:L_SHA512_trans_crypto_len_k #else - adrp x4, L_SHA512_transform_crypto_len_k@PAGE - add x4, x4, :lo12:L_SHA512_transform_crypto_len_k@PAGEOFF + adrp x4, L_SHA512_trans_crypto_len_k@PAGE + add x4, x4, :lo12:L_SHA512_trans_crypto_len_k@PAGEOFF #endif /* __APPLE__ */ # Load first 16 64-bit words of K permanently ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x4], #0x40 diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c index 57e60354c..cf05c02c2 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c @@ -27,7 +27,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c + * ruby ./sha2/sha512.rb arm64 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ @@ -78,7 +79,7 @@ static const word64 L_SHA512_transform_neon_len_k[] = { 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, }; -static const word64 L_SHA512_transform_neon_len_ror8[] = { +static const word64 L_SHA512_transform_neon_len_r8[] = { 0x0007060504030201, 0x080f0e0d0c0b0a09, }; @@ -93,11 +94,11 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) "add x3, x3, %[L_SHA512_transform_neon_len_k]@PAGEOFF\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x27, %[L_SHA512_transform_neon_len_ror8]\n\t" - "add x27, x27, :lo12:%[L_SHA512_transform_neon_len_ror8]\n\t" + "adrp x27, %[L_SHA512_transform_neon_len_r8]\n\t" + "add x27, x27, :lo12:%[L_SHA512_transform_neon_len_r8]\n\t" #else - "adrp x27, %[L_SHA512_transform_neon_len_ror8]@PAGE\n\t" - "add x27, x27, %[L_SHA512_transform_neon_len_ror8]@PAGEOFF\n\t" + "adrp x27, %[L_SHA512_transform_neon_len_r8]@PAGE\n\t" + "add x27, x27, %[L_SHA512_transform_neon_len_r8]@PAGEOFF\n\t" #endif /* __APPLE__ */ "ld1 {v11.16b}, [x27]\n\t" /* Load digest into working vars */ @@ -1006,13 +1007,17 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) "stp x8, x9, [%x[sha512], #32]\n\t" "stp x10, x11, [%x[sha512], #48]\n\t" : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) - : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "cc" + : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), + [L_SHA512_transform_neon_len_r8] "S" (L_SHA512_transform_neon_len_r8) + : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", + "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); } #ifdef WOLFSSL_ARMASM_CRYPTO_SHA512 -static const word64 L_SHA512_transform_crypto_len_k[] = { +static const word64 L_SHA512_trans_crypto_len_k[] = { 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, 0x3956c25bf348b538, 0x59f111f1b605d019, @@ -1055,19 +1060,21 @@ static const word64 L_SHA512_transform_crypto_len_k[] = { 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, }; -void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, word32 len); -void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, word32 len) +void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, + word32 len); +void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, + word32 len) { __asm__ __volatile__ ( #ifdef __APPLE__ ".arch_extension sha3\n\t" #endif /* __APPLE__ */ #ifndef __APPLE__ - "adrp x4, %[L_SHA512_transform_crypto_len_k]\n\t" - "add x4, x4, :lo12:%[L_SHA512_transform_crypto_len_k]\n\t" + "adrp x4, %[L_SHA512_trans_crypto_len_k]\n\t" + "add x4, x4, :lo12:%[L_SHA512_trans_crypto_len_k]\n\t" #else - "adrp x4, %[L_SHA512_transform_crypto_len_k]@PAGE\n\t" - "add x4, x4, %[L_SHA512_transform_crypto_len_k]@PAGEOFF\n\t" + "adrp x4, %[L_SHA512_trans_crypto_len_k]@PAGE\n\t" + "add x4, x4, %[L_SHA512_trans_crypto_len_k]@PAGEOFF\n\t" #endif /* __APPLE__ */ /* Load first 16 64-bit words of K permanently */ "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x4], #0x40\n\t" @@ -1576,8 +1583,11 @@ void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, word32 len /* Store digest back */ "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t" : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) - : [L_SHA512_transform_crypto_len_k] "S" (L_SHA512_transform_crypto_len_k) - : "memory", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc" + : [L_SHA512_trans_crypto_len_k] "S" (L_SHA512_trans_crypto_len_k) + : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", + "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", + "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29", "v30", "v31" ); } diff --git a/wolfcrypt/src/port/arm/armv8-sha512.c b/wolfcrypt/src/port/arm/armv8-sha512.c index 63f108aec..49d84b439 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512.c +++ b/wolfcrypt/src/port/arm/armv8-sha512.c @@ -376,7 +376,8 @@ static void Transform_Sha512(wc_Sha512* sha512) #undef DATA #define DATA ((word64*)data) -static void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +static void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, + word32 len) { const word64* K = K512; word32 j; @@ -471,7 +472,8 @@ static WC_INLINE void AddLength(wc_Sha512* sha512, word32 len) sha512->hiLen++; /* carry low to high */ } -static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 len) +static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, + word32 len) { int ret = 0; /* do block size increments */ diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm.S b/wolfcrypt/src/port/arm/thumb2-aes-asm.S index e78d5324a..cc9bfb479 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm.S @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./aes/aes.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-aes-asm.S + * ruby ./aes/aes.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-aes-asm.S */ #ifdef HAVE_CONFIG_H @@ -298,7 +299,9 @@ L_AES_Thumb2_td_data: .word 0x74486c5c .word 0x42d0b857 #endif /* HAVE_AES_DECRYPT */ -#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ + defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_Thumb2_te_data, %object .size L_AES_Thumb2_te_data, 1024 @@ -560,7 +563,8 @@ L_AES_Thumb2_te_data: .word 0xfca85454 .word 0xd66dbbbb .word 0x3a2c1616 -#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || + * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT .text .type L_AES_Thumb2_td, %object @@ -569,14 +573,17 @@ L_AES_Thumb2_te_data: L_AES_Thumb2_td: .word L_AES_Thumb2_td_data #endif /* HAVE_AES_DECRYPT */ -#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ + defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_Thumb2_te, %object .size L_AES_Thumb2_te, 12 .align 4 L_AES_Thumb2_te: .word L_AES_Thumb2_te_data -#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || + * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT .text .align 4 @@ -1134,15 +1141,18 @@ L_AES_encrypt_block_nr: POP {pc} /* Cycle Count = 285 */ .size AES_encrypt_block,.-AES_encrypt_block -#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_Thumb2_te_ecb, %object .size L_AES_Thumb2_te_ecb, 12 .align 4 L_AES_Thumb2_te_ecb: .word L_AES_Thumb2_te_data -#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ -#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || + * WOLFSSL_AES_COUNTER */ +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .align 4 .globl AES_ECB_encrypt @@ -1290,7 +1300,8 @@ L_AES_ECB_encrypt_end: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 212 */ .size AES_ECB_encrypt,.-AES_ECB_encrypt -#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || + * WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_CBC .text .align 4 @@ -1641,7 +1652,8 @@ L_AES_CTR_encrypt_end: .size AES_CTR_encrypt,.-AES_CTR_encrypt #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT -#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) + #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ + defined(HAVE_AES_CBC) .text .align 4 .globl AES_decrypt_block diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c index 55436947e..7372dcdb6 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./aes/aes.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-aes-asm.c + * ruby ./aes/aes.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-aes-asm.c */ #ifdef HAVE_CONFIG_H @@ -115,7 +116,9 @@ XALIGNED(16) static const word32 L_AES_Thumb2_td_data[] = { }; #endif /* HAVE_AES_DECRYPT */ -#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ + defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) XALIGNED(16) static const word32 L_AES_Thumb2_te_data[] = { 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, @@ -183,13 +186,17 @@ XALIGNED(16) static const word32 L_AES_Thumb2_te_data[] = { 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, }; -#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || + * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT static const word32* L_AES_Thumb2_td = L_AES_Thumb2_td_data; #endif /* HAVE_AES_DECRYPT */ -#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ + defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) static const word32* L_AES_Thumb2_te = L_AES_Thumb2_te_data; -#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || + * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT void AES_invert_key(unsigned char* ks, word32 rounds); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -201,8 +208,12 @@ void AES_invert_key(unsigned char* ks, word32 rounds) #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register unsigned char* ks __asm__ ("r0") = (unsigned char*)ks_p; register word32 rounds __asm__ ("r1") = (word32)rounds_p; - register word32* L_AES_Thumb2_te_c __asm__ ("r2") = (word32*)L_AES_Thumb2_te; - register word32* L_AES_Thumb2_td_c __asm__ ("r3") = (word32*)L_AES_Thumb2_td; + register word32* L_AES_Thumb2_te_c __asm__ ("r2") = + (word32*)L_AES_Thumb2_te; + + register word32* L_AES_Thumb2_td_c __asm__ ("r3") = + (word32*)L_AES_Thumb2_td; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -216,8 +227,8 @@ void AES_invert_key(unsigned char* ks, word32 rounds) #else "L_AES_invert_key_loop_%=:\n\t" #endif - "LDM %[ks], {r2, r3, r4, r5}\n\t" - "LDM r10, {r6, r7, r8, r9}\n\t" + "ldm %[ks], {r2, r3, r4, r5}\n\t" + "ldm r10, {r6, r7, r8, r9}\n\t" "STM r10, {r2, r3, r4, r5}\n\t" "STM %[ks]!, {r6, r7, r8, r9}\n\t" "SUBS r11, r11, #0x2\n\t" @@ -238,7 +249,7 @@ void AES_invert_key(unsigned char* ks, word32 rounds) #else "L_AES_invert_key_mix_loop_%=:\n\t" #endif - "LDM %[ks], {r2, r3, r4, r5}\n\t" + "ldm %[ks], {r2, r3, r4, r5}\n\t" "UBFX r6, r2, #0, #8\n\t" "UBFX r7, r2, #8, #8\n\t" "UBFX r8, r2, #16, #8\n\t" @@ -311,16 +322,12 @@ void AES_invert_key(unsigned char* ks, word32 rounds) #else "BNE.W L_AES_invert_key_mix_loop_%=\n\t" #endif -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ks] "+r" (ks), [rounds] "+r" (rounds), - [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c) + [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), + [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" -#else - : [ks] "+r" (ks), [rounds] "+r" (rounds) - : [L_AES_Thumb2_te] "r" (L_AES_Thumb2_te), [L_AES_Thumb2_td] "r" (L_AES_Thumb2_td) - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); } @@ -334,17 +341,24 @@ XALIGNED(16) static const word32 L_AES_Thumb2_rcon[] = { void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p) +void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, + unsigned char* ks_p) #else -void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks) +void AES_set_encrypt_key(const unsigned char* key, word32 len, + unsigned char* ks) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* key __asm__ ("r0") = (const unsigned char*)key_p; + register const unsigned char* key __asm__ ("r0") = + (const unsigned char*)key_p; register word32 len __asm__ ("r1") = (word32)len_p; register unsigned char* ks __asm__ ("r2") = (unsigned char*)ks_p; - register word32* L_AES_Thumb2_te_c __asm__ ("r3") = (word32*)L_AES_Thumb2_te; - register word32* L_AES_Thumb2_rcon_c __asm__ ("r4") = (word32*)&L_AES_Thumb2_rcon; + register word32* L_AES_Thumb2_te_c __asm__ ("r3") = + (word32*)L_AES_Thumb2_te; + + register word32* L_AES_Thumb2_rcon_c __asm__ ("r4") = + (word32*)&L_AES_Thumb2_rcon; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -403,9 +417,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" - "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" "EOR r4, r4, r3\n\t" - "LDM lr!, {r3}\n\t" + "ldm lr!, {r3}\n\t" "EOR r4, r4, r3\n\t" "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" @@ -425,7 +439,7 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "EOR r3, r3, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" - "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" "EOR r4, r4, r3\n\t" "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" @@ -452,9 +466,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" - "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" "EOR r4, r4, r3\n\t" - "LDM lr!, {r3}\n\t" + "ldm lr!, {r3}\n\t" "EOR r4, r4, r3\n\t" "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" @@ -508,9 +522,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "EOR r3, r9, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" - "LDM %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t" "EOR r4, r4, r3\n\t" - "LDM lr!, {r3}\n\t" + "ldm lr!, {r3}\n\t" "EOR r4, r4, r3\n\t" "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" @@ -537,9 +551,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "EOR r3, r9, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" - "LDM %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t" "EOR r4, r4, r3\n\t" - "LDM lr!, {r3}\n\t" + "ldm lr!, {r3}\n\t" "EOR r4, r4, r3\n\t" "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" @@ -585,9 +599,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks "EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r6, LSL #24\n\t" - "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" "EOR r4, r4, r3\n\t" - "LDM lr!, {r3}\n\t" + "ldm lr!, {r3}\n\t" "EOR r4, r4, r3\n\t" "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" @@ -607,22 +621,18 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks #else "L_AES_set_encrypt_key_end_%=:\n\t" #endif -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), - [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c) + [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), + [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c) : - : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "cc" -#else - : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks) - : [L_AES_Thumb2_te] "r" (L_AES_Thumb2_te), [L_AES_Thumb2_rcon] "r" (L_AES_Thumb2_rcon) - : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10" ); } void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void AES_encrypt_block(const word32* te_p, int nr_p, int len_p, const word32* ks_p) +void AES_encrypt_block(const word32* te_p, int nr_p, int len_p, + const word32* ks_p) #else void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -683,7 +693,7 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) "LDR r11, [%[te], r11, LSL #2]\n\t" "LDR r2, [%[te], r2, LSL #2]\n\t" "EOR lr, lr, r6, ROR #24\n\t" - "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" "EOR r11, r11, lr, ROR #24\n\t" "EOR r11, r11, r2, ROR #8\n\t" /* XOR in Key Schedule */ @@ -733,7 +743,7 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) "LDR r7, [%[te], r7, LSL #2]\n\t" "LDR r2, [%[te], r2, LSL #2]\n\t" "EOR lr, lr, r10, ROR #24\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "EOR r7, r7, lr, ROR #24\n\t" "EOR r7, r7, r2, ROR #8\n\t" /* XOR in Key Schedule */ @@ -791,7 +801,7 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) "LDR r11, [%[te], r11, LSL #2]\n\t" "LDR r2, [%[te], r2, LSL #2]\n\t" "EOR lr, lr, r6, ROR #24\n\t" - "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "ldm %[ks]!, {r4, r5, r6, r7}\n\t" "EOR r11, r11, lr, ROR #24\n\t" "EOR r11, r11, r2, ROR #8\n\t" /* XOR in Key Schedule */ @@ -841,7 +851,7 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) "LDRB lr, [%[te], lr, LSL #2]\n\t" "LDRB r2, [%[te], r2, LSL #2]\n\t" "EOR lr, lr, r11, LSL #16\n\t" - "LDM %[ks], {r8, r9, r10, r11}\n\t" + "ldm %[ks], {r8, r9, r10, r11}\n\t" "EOR r7, r7, lr, LSL #8\n\t" "EOR r7, r7, r2, LSL #16\n\t" /* XOR in Key Schedule */ @@ -851,29 +861,38 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) "EOR r7, r7, r11\n\t" : [te] "+r" (te), [nr] "+r" (nr), [len] "+r" (len), [ks] "+r" (ks) : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } -#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) static const word32* L_AES_Thumb2_te_ecb = L_AES_Thumb2_te_data; -#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ -#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || + * WOLFSSL_AES_COUNTER */ +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p) #else -void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) +void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; register unsigned long len __asm__ ("r2") = (unsigned long)len_p; - register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; - register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r5") = (word32*)L_AES_Thumb2_te_ecb; + register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r5") = + (word32*)L_AES_Thumb2_te_ecb; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -916,7 +935,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r6, r6\n\t" "REV r7, r7\n\t" "PUSH {r1, %[len], lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -972,7 +991,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r6, r6\n\t" "REV r7, r7\n\t" "PUSH {r1, %[len], lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1028,7 +1047,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r6, r6\n\t" "REV r7, r7\n\t" "PUSH {r1, %[len], lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1063,40 +1082,39 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long "L_AES_ECB_encrypt_end_%=:\n\t" #endif "POP {%[ks]}\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), - [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) : - : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11", "cc" -#else - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks) - : [L_AES_Thumb2_te_ecb] "r" (L_AES_Thumb2_te_ecb) - : "memory", "r12", "lr", "r4", "r6", "r7", "r8", "r9", "r10", "r11", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" ); -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)nr; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } -#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || + * WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_CBC void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p, + unsigned char* iv_p) #else -void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) +void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; register unsigned long len __asm__ ("r2") = (unsigned long)len_p; - register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p; - register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_ecb; + register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = + (word32*)L_AES_Thumb2_te_ecb; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1112,7 +1130,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" - "LDM r9, {r4, r5, r6, r7}\n\t" + "ldm r9, {r4, r5, r6, r7}\n\t" "PUSH {%[ks], r9}\n\t" "CMP r8, #0xa\n\t" #if defined(__GNUC__) @@ -1145,7 +1163,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" "PUSH {r1, %[len], lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -1205,7 +1223,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" "PUSH {r1, %[len], lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -1265,7 +1283,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" "PUSH {r1, %[len], lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -1305,23 +1323,12 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long #endif "POP {%[ks], r9}\n\t" "STM r9, {r4, r5, r6, r7}\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) : - : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11", "cc" -#else - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks) - : [L_AES_Thumb2_te_ecb] "r" (L_AES_Thumb2_te_ecb) - : "memory", "r12", "lr", "r4", "r5", "r7", "r8", "r9", "r10", "r11", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)nr; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)iv; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #endif /* HAVE_AES_CBC */ @@ -1329,19 +1336,26 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p, + unsigned char* ctr_p) #else -void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) +void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; register unsigned long len __asm__ ("r2") = (unsigned long)len_p; - register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p; - register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_ecb; + register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = + (word32*)L_AES_Thumb2_te_ecb; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1357,7 +1371,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" - "LDM r8, {r4, r5, r6, r7}\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -1393,7 +1407,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "ADCS r9, r5, #0x0\n\t" "ADC r8, r4, #0x0\n\t" "STM lr, {r8, r9, r10, r11}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1420,7 +1434,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "LDM r8, {r4, r5, r6, r7}\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" @@ -1457,7 +1471,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "ADCS r9, r5, #0x0\n\t" "ADC r8, r4, #0x0\n\t" "STM lr, {r8, r9, r10, r11}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1484,7 +1498,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "LDM r8, {r4, r5, r6, r7}\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" @@ -1521,7 +1535,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "ADCS r9, r5, #0x0\n\t" "ADC r8, r4, #0x0\n\t" "STM lr, {r8, r9, r10, r11}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1548,7 +1562,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "LDM r8, {r4, r5, r6, r7}\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" @@ -1571,28 +1585,18 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r6, r6\n\t" "REV r7, r7\n\t" "STM r8, {r4, r5, r6, r7}\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) : - : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11", "cc" -#else - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks) - : [L_AES_Thumb2_te_ecb] "r" (L_AES_Thumb2_te_ecb) - : "memory", "r12", "lr", "r4", "r5", "r7", "r8", "r9", "r10", "r11", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)nr; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)ctr; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT -#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) + #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ + defined(HAVE_AES_CBC) void AES_decrypt_block(const word32* td, int nr, const byte* td4); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG void AES_decrypt_block(const word32* td_p, int nr_p, const byte* td4_p) @@ -1655,7 +1659,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4) "LDR r11, [%[td], r11, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" "EOR r12, r12, r4, ROR #24\n\t" - "LDM r3!, {r4, r5, r6, r7}\n\t" + "ldm r3!, {r4, r5, r6, r7}\n\t" "EOR r11, r11, lr, ROR #8\n\t" "EOR r11, r11, r12, ROR #24\n\t" /* XOR in Key Schedule */ @@ -1705,7 +1709,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4) "LDR r7, [%[td], r7, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" "EOR r12, r12, r8, ROR #24\n\t" - "LDM r3!, {r8, r9, r10, r11}\n\t" + "ldm r3!, {r8, r9, r10, r11}\n\t" "EOR r7, r7, lr, ROR #8\n\t" "EOR r7, r7, r12, ROR #24\n\t" /* XOR in Key Schedule */ @@ -1763,7 +1767,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4) "LDR r11, [%[td], r11, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" "EOR r12, r12, r4, ROR #24\n\t" - "LDM r3!, {r4, r5, r6, r7}\n\t" + "ldm r3!, {r4, r5, r6, r7}\n\t" "EOR r11, r11, lr, ROR #8\n\t" "EOR r11, r11, r12, ROR #24\n\t" /* XOR in Key Schedule */ @@ -1813,7 +1817,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4) "LDRB r7, [%[td4], r7]\n\t" "LDRB lr, [%[td4], lr]\n\t" "EOR r12, r12, r11, LSL #16\n\t" - "LDM r3, {r8, r9, r10, r11}\n\t" + "ldm r3, {r8, r9, r10, r11}\n\t" "EOR r7, r7, r12, LSL #8\n\t" "EOR r7, r7, lr, LSL #16\n\t" /* XOR in Key Schedule */ @@ -1823,7 +1827,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4) "EOR r7, r7, r11\n\t" : [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4) : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -1867,19 +1871,27 @@ static const byte L_AES_Thumb2_td4[] = { void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p) #else -void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) +void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; register unsigned long len __asm__ ("r2") = (unsigned long)len_p; - register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; - register word32* L_AES_Thumb2_td_ecb_c __asm__ ("r5") = (word32*)L_AES_Thumb2_td_ecb; - register byte* L_AES_Thumb2_td4_c __asm__ ("r6") = (byte*)&L_AES_Thumb2_td4; + register word32* L_AES_Thumb2_td_ecb_c __asm__ ("r5") = + (word32*)L_AES_Thumb2_td_ecb; + + register byte* L_AES_Thumb2_td4_c __asm__ ("r6") = + (byte*)&L_AES_Thumb2_td4; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1923,7 +1935,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r6, r6\n\t" "REV r7, r7\n\t" "PUSH {r1, %[ks], r12, lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1978,7 +1990,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r6, r6\n\t" "REV r7, r7\n\t" "PUSH {r1, %[ks], r12, lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -2033,7 +2045,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r6, r6\n\t" "REV r7, r7\n\t" "PUSH {r1, %[ks], r12, lr}\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -2066,20 +2078,12 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long #else "L_AES_ECB_decrypt_end_%=:\n\t" #endif -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), - [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), + [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) : - : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11", "cc" -#else - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks) - : [L_AES_Thumb2_td_ecb] "r" (L_AES_Thumb2_td_ecb), [L_AES_Thumb2_td4] "r" (L_AES_Thumb2_td4) - : "memory", "r12", "lr", "r4", "r7", "r8", "r9", "r10", "r11", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)nr; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ @@ -2087,20 +2091,29 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p, + unsigned char* iv_p) #else -void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) +void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; register unsigned long len __asm__ ("r2") = (unsigned long)len_p; - register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p; - register word32* L_AES_Thumb2_td_ecb_c __asm__ ("r6") = (word32*)L_AES_Thumb2_td_ecb; - register byte* L_AES_Thumb2_td4_c __asm__ ("r7") = (byte*)&L_AES_Thumb2_td4; + register word32* L_AES_Thumb2_td_ecb_c __asm__ ("r6") = + (word32*)L_AES_Thumb2_td_ecb; + + register byte* L_AES_Thumb2_td4_c __asm__ ("r7") = + (byte*)&L_AES_Thumb2_td4; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2149,7 +2162,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "LDR lr, [sp, #16]\n\t" "STRD r4, r5, [lr, #16]\n\t" "STRD r6, r7, [lr, #24]\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -2166,7 +2179,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" - "LDM lr, {r8, r9, r10, r11}\n\t" + "ldm lr, {r8, r9, r10, r11}\n\t" "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" @@ -2195,7 +2208,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "LDR lr, [sp, #16]\n\t" "STRD r4, r5, [lr]\n\t" "STRD r6, r7, [lr, #8]\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -2255,7 +2268,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "LDR lr, [sp, #16]\n\t" "STRD r4, r5, [lr, #16]\n\t" "STRD r6, r7, [lr, #24]\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -2272,7 +2285,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" - "LDM lr, {r8, r9, r10, r11}\n\t" + "ldm lr, {r8, r9, r10, r11}\n\t" "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" @@ -2301,7 +2314,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "LDR lr, [sp, #16]\n\t" "STRD r4, r5, [lr]\n\t" "STRD r6, r7, [lr, #8]\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -2361,7 +2374,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "LDR lr, [sp, #16]\n\t" "STRD r4, r5, [lr, #16]\n\t" "STRD r6, r7, [lr, #24]\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -2378,7 +2391,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" - "LDM lr, {r8, r9, r10, r11}\n\t" + "ldm lr, {r8, r9, r10, r11}\n\t" "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" @@ -2407,7 +2420,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "LDR lr, [sp, #16]\n\t" "STRD r4, r5, [lr]\n\t" "STRD r6, r7, [lr, #8]\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -2471,23 +2484,13 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long "L_AES_CBC_decrypt_end_%=:\n\t" #endif "POP {%[ks], r4}\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), - [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [iv] "+r" (iv), + [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), + [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) : - : "memory", "r12", "lr", "r8", "r9", "r10", "r11", "cc" -#else - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks) - : [L_AES_Thumb2_td_ecb] "r" (L_AES_Thumb2_td_ecb), [L_AES_Thumb2_td4] "r" (L_AES_Thumb2_td4) - : "memory", "r12", "lr", "r4", "r5", "r8", "r9", "r10", "r11", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r8", "r9", "r10", "r11" ); -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)nr; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)iv; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #endif /* HAVE_AES_CBC */ @@ -2504,17 +2507,23 @@ XALIGNED(16) static const word32 L_GCM_gmult_len_r[] = { void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p) +void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, + const unsigned char* data_p, unsigned long len_p) #else -void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len) +void GCM_gmult_len(unsigned char* x, const unsigned char** m, + const unsigned char* data, unsigned long len) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register unsigned char* x __asm__ ("r0") = (unsigned char*)x_p; - register const unsigned char** m __asm__ ("r1") = (const unsigned char**)m_p; - register const unsigned char* data __asm__ ("r2") = (const unsigned char*)data_p; + register const unsigned char** m __asm__ ("r1") = + (const unsigned char**)m_p; + register const unsigned char* data __asm__ ("r2") = + (const unsigned char*)data_p; register unsigned long len __asm__ ("r3") = (unsigned long)len_p; - register word32* L_GCM_gmult_len_r_c __asm__ ("r4") = (word32*)&L_GCM_gmult_len_r; + register word32* L_GCM_gmult_len_r_c __asm__ ("r4") = + (word32*)&L_GCM_gmult_len_r; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2532,7 +2541,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "LSR %[len], r12, #24\n\t" "AND %[len], %[len], #0xf\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t" - "LDM %[len], {r8, r9, r10, r11}\n\t" + "ldm %[len], {r8, r9, r10, r11}\n\t" "LSR r6, r10, #4\n\t" "AND %[len], r11, #0xf\n\t" "LSR r11, r11, #4\n\t" @@ -2542,7 +2551,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2559,7 +2568,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2576,7 +2585,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2593,7 +2602,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2610,7 +2619,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2626,7 +2635,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2643,7 +2652,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2665,7 +2674,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "LSR %[len], r12, #24\n\t" "AND %[len], %[len], #0xf\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t" - "LDM %[len], {r4, r5, r6, r7}\n\t" + "ldm %[len], {r4, r5, r6, r7}\n\t" "EOR r8, r8, r4\n\t" "EOR r9, r9, r5\n\t" "EOR r10, r10, r6\n\t" @@ -2679,7 +2688,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2696,7 +2705,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2713,7 +2722,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2730,7 +2739,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2747,7 +2756,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2763,7 +2772,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2780,7 +2789,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2802,7 +2811,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "LSR %[len], r12, #24\n\t" "AND %[len], %[len], #0xf\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t" - "LDM %[len], {r4, r5, r6, r7}\n\t" + "ldm %[len], {r4, r5, r6, r7}\n\t" "EOR r8, r8, r4\n\t" "EOR r9, r9, r5\n\t" "EOR r10, r10, r6\n\t" @@ -2816,7 +2825,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2833,7 +2842,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2850,7 +2859,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2867,7 +2876,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2884,7 +2893,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2900,7 +2909,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2917,7 +2926,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2939,7 +2948,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "LSR %[len], r12, #24\n\t" "AND %[len], %[len], #0xf\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t" - "LDM %[len], {r4, r5, r6, r7}\n\t" + "ldm %[len], {r4, r5, r6, r7}\n\t" "EOR r8, r8, r4\n\t" "EOR r9, r9, r5\n\t" "EOR r10, r10, r6\n\t" @@ -2953,7 +2962,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2970,7 +2979,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -2987,7 +2996,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -3004,7 +3013,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -3021,7 +3030,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -3037,7 +3046,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -3054,7 +3063,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" "LSR r9, r9, #4\n\t" - "LDM r4, {r4, r5, r6, r7}\n\t" + "ldm r4, {r4, r5, r6, r7}\n\t" "EOR r9, r9, r8, LSL #28\n\t" "EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, r8, r4\n\t" @@ -3076,16 +3085,11 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned cha #else "BNE.W L_GCM_gmult_len_start_block_%=\n\t" #endif -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) : - : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" -#else - : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len) - : [L_GCM_gmult_len_r] "r" (L_GCM_gmult_len_r) - : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", + "r11" ); } @@ -3093,19 +3097,26 @@ static const word32* L_AES_Thumb2_te_gcm = L_AES_Thumb2_te_data; void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p, + unsigned char* ctr_p) #else -void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) +void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in __asm__ ("r0") = (const unsigned char*)in_p; + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; register unsigned long len __asm__ ("r2") = (unsigned long)len_p; - register const unsigned char* ks __asm__ ("r3") = (const unsigned char*)ks_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; register int nr __asm__ ("r4") = (int)nr_p; register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p; - register word32* L_AES_Thumb2_te_gcm_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_gcm; + register word32* L_AES_Thumb2_te_gcm_c __asm__ ("r6") = + (word32*)L_AES_Thumb2_te_gcm; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3121,7 +3132,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_te_gcm]\n\t" - "LDM r8, {r4, r5, r6, r7}\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -3153,7 +3164,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADD r7, r7, #0x1\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "STR r7, [lr, #12]\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" @@ -3181,7 +3192,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "LDM r8, {r4, r5, r6, r7}\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" @@ -3214,7 +3225,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADD r7, r7, #0x1\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "STR r7, [lr, #12]\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" @@ -3242,7 +3253,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "LDM r8, {r4, r5, r6, r7}\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" @@ -3275,7 +3286,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" "ADD r7, r7, #0x1\n\t" - "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "ldm %[ks]!, {r8, r9, r10, r11}\n\t" "STR r7, [lr, #12]\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" @@ -3303,7 +3314,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "LDM r8, {r4, r5, r6, r7}\n\t" + "ldm r8, {r4, r5, r6, r7}\n\t" "SUBS %[len], %[len], #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" @@ -3326,23 +3337,12 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long "REV r6, r6\n\t" "REV r7, r7\n\t" "STM r8, {r4, r5, r6, r7}\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_Thumb2_te_gcm] "+r" (L_AES_Thumb2_te_gcm_c) : - : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11", "cc" -#else - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks) - : [L_AES_Thumb2_te_gcm] "r" (L_AES_Thumb2_te_gcm) - : "memory", "r12", "lr", "r4", "r5", "r7", "r8", "r9", "r10", "r11", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)nr; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ -#ifdef WOLFSSL_NO_VAR_ASSIGN_REG - (void)ctr; -#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ } #endif /* HAVE_AESGCM */ diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm.S b/wolfcrypt/src/port/arm/thumb2-chacha-asm.S index 5422a2f03..a6f1e1c57 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm.S @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./chacha/chacha.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-chacha-asm.S + * ruby ./chacha/chacha.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-chacha-asm.S */ #ifdef HAVE_CONFIG_H diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c index 9707a9718..b33ce26ac 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./chacha/chacha.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-chacha-asm.c + * ruby ./chacha/chacha.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-chacha-asm.c */ #ifdef HAVE_CONFIG_H @@ -72,7 +73,7 @@ void wc_chacha_setiv(word32* x, const byte* iv, word32 counter) "STM r3, {r4, r5, r6}\n\t" : [x] "+r" (x), [iv] "+r" (iv), [counter] "+r" (counter) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); } @@ -91,7 +92,9 @@ void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) register word32* x __asm__ ("r0") = (word32*)x_p; register const byte* key __asm__ ("r1") = (const byte*)key_p; register word32 keySz __asm__ ("r2") = (word32)keySz_p; - register word32* L_chacha_thumb2_constants_c __asm__ ("r3") = (word32*)&L_chacha_thumb2_constants; + register word32* L_chacha_thumb2_constants_c __asm__ ("r3") = + (word32*)&L_chacha_thumb2_constants; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -99,7 +102,7 @@ void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) "SUBS %[keySz], %[keySz], #0x10\n\t" "ADD r7, r7, %[keySz]\n\t" /* Start state with constants */ - "LDM r7, {r3, r4, r5, r6}\n\t" + "ldm r7, {r3, r4, r5, r6}\n\t" "STM %[x]!, {r3, r4, r5, r6}\n\t" /* Next is first 16 bytes of key. */ "LDR r3, [%[key]]\n\t" @@ -134,21 +137,16 @@ void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) "L_chacha_thumb2_setkey_same_keyb_ytes_%=:\n\t" #endif "STM %[x], {r3, r4, r5, r6}\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), [L_chacha_thumb2_constants] "+r" (L_chacha_thumb2_constants_c) : - : "memory", "r4", "r5", "r6", "r7", "cc" -#else - : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz) - : [L_chacha_thumb2_constants] "r" (L_chacha_thumb2_constants) - : "memory", "r4", "r5", "r6", "r7", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r4", "r5", "r6", "r7" ); } #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void wc_chacha_crypt_bytes(ChaCha* ctx_p, byte* c_p, const byte* m_p, word32 len_p) +void wc_chacha_crypt_bytes(ChaCha* ctx_p, byte* c_p, const byte* m_p, + word32 len_p) #else void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -177,7 +175,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "STRD r4, r5, [sp, #16]\n\t" "STRD r6, r7, [sp, #24]\n\t" /* Load x[0]..x[12] into registers. */ - "LDM lr, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "ldm lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}\n\t" /* 10x 2 full rounds to perform. */ "MOV lr, #0xa\n\t" "STR lr, [sp, #48]\n\t" @@ -316,35 +314,35 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "LDR lr, [sp, #32]\n\t" "MOV r12, sp\n\t" /* Add in original state */ - "LDM lr!, {r8, r9, r10, r11}\n\t" + "ldm lr!, {r8, r9, r10, r11}\n\t" "ADD %[ctx], %[ctx], r8\n\t" "ADD %[c], %[c], r9\n\t" "ADD %[m], %[m], r10\n\t" "ADD %[len], %[len], r11\n\t" - "LDM lr!, {r8, r9, r10, r11}\n\t" + "ldm lr!, {r8, r9, r10, r11}\n\t" "ADD r4, r4, r8\n\t" "ADD r5, r5, r9\n\t" "ADD r6, r6, r10\n\t" "ADD r7, r7, r11\n\t" - "LDM r12, {r8, r9}\n\t" - "LDM lr!, {r10, r11}\n\t" + "ldm r12, {r8, r9}\n\t" + "ldm lr!, {r10, r11}\n\t" "ADD r8, r8, r10\n\t" "ADD r9, r9, r11\n\t" "STM r12!, {r8, r9}\n\t" - "LDM r12, {r8, r9}\n\t" - "LDM lr!, {r10, r11}\n\t" + "ldm r12, {r8, r9}\n\t" + "ldm lr!, {r10, r11}\n\t" "ADD r8, r8, r10\n\t" "ADD r9, r9, r11\n\t" "STM r12!, {r8, r9}\n\t" - "LDM r12, {r8, r9}\n\t" - "LDM lr!, {r10, r11}\n\t" + "ldm r12, {r8, r9}\n\t" + "ldm lr!, {r10, r11}\n\t" "ADD r8, r8, r10\n\t" "ADD r9, r9, r11\n\t" "ADD r10, r10, #0x1\n\t" "STM r12!, {r8, r9}\n\t" "STR r10, [lr, #-8]\n\t" - "LDM r12, {r8, r9}\n\t" - "LDM lr, {r10, r11}\n\t" + "ldm r12, {r8, r9}\n\t" + "ldm lr, {r10, r11}\n\t" "ADD r8, r8, r10\n\t" "ADD r9, r9, r11\n\t" "STM r12, {r8, r9}\n\t" @@ -448,7 +446,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "LDR lr, [sp, #32]\n\t" "ADD r12, lr, #0x44\n\t" "STM r12!, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t" - "LDM sp, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t" + "ldm sp, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "STM r12, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t" "LDRD %[m], %[len], [sp, #40]\n\t" "LDR %[c], [sp, #36]\n\t" @@ -470,7 +468,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "BLT.N L_chacha_thumb2_crypt_word_loop_%=\n\t" #endif /* 16 bytes of state XORed into message. */ - "LDM lr!, {r4, r5, r6, r7}\n\t" + "ldm lr!, {r4, r5, r6, r7}\n\t" "LDR r8, [%[m]]\n\t" "LDR r9, [%[m], #4]\n\t" "LDR r10, [%[m], #8]\n\t" @@ -580,12 +578,14 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "ADD sp, sp, #0x34\n\t" : [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "lr" ); } #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void wc_chacha_use_over(byte* over_p, byte* output_p, const byte* input_p, word32 len_p) +void wc_chacha_use_over(byte* over_p, byte* output_p, const byte* input_p, + word32 len_p) #else void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -719,9 +719,10 @@ void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) #else "L_chacha_thumb2_over_done_%=:\n\t" #endif - : [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), [len] "+r" (len) + : [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), + [len] "+r" (len) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519.S b/wolfcrypt/src/port/arm/thumb2-curve25519.S index 735fc58e1..aec22a5d2 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519.S +++ b/wolfcrypt/src/port/arm/thumb2-curve25519.S @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./x25519/x25519.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-curve25519.S + * ruby ./x25519/x25519.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-curve25519.S */ #ifdef HAVE_CONFIG_H diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index 455a26270..d4b2198c9 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./x25519/x25519.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-curve25519.c + * ruby ./x25519/x25519.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-curve25519.c */ #ifdef HAVE_CONFIG_H @@ -155,7 +156,7 @@ void fe_add_sub_op() /* Add -modulus on underflow */ "MOV lr, #0x13\n\t" "AND lr, lr, r11, ASR #31\n\t" - "LDM r1, {r4, r5, r6, r7, r8, r9}\n\t" + "ldm r1, {r4, r5, r6, r7, r8, r9}\n\t" "SUBS r4, r4, lr\n\t" "SBCS r5, r5, #0x0\n\t" "SBCS r6, r6, #0x0\n\t" @@ -169,7 +170,7 @@ void fe_add_sub_op() /* Done Add-Sub */ : : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -184,13 +185,13 @@ void fe_sub_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Sub */ - "LDM r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" - "LDM r1!, {r2, r3, r4, r5}\n\t" + "ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" + "ldm r1!, {r2, r3, r4, r5}\n\t" "SUBS r6, r2, r6\n\t" "SBCS r7, r3, r7\n\t" "SBCS r8, r4, r8\n\t" "SBCS r9, r5, r9\n\t" - "LDM r1!, {r2, r3, r4, r5}\n\t" + "ldm r1!, {r2, r3, r4, r5}\n\t" "SBCS r10, r2, r10\n\t" "SBCS r11, r3, r11\n\t" "SBCS r12, r4, r12\n\t" @@ -210,7 +211,7 @@ void fe_sub_op() /* Done Sub */ : : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -230,7 +231,8 @@ void fe_sub(fe r, const fe a, const fe b) "BL fe_sub_op\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -245,13 +247,13 @@ void fe_add_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Add */ - "LDM r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" - "LDM r1!, {r2, r3, r4, r5}\n\t" + "ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" + "ldm r1!, {r2, r3, r4, r5}\n\t" "ADDS r6, r2, r6\n\t" "ADCS r7, r3, r7\n\t" "ADCS r8, r4, r8\n\t" "ADCS r9, r5, r9\n\t" - "LDM r1!, {r2, r3, r4, r5}\n\t" + "ldm r1!, {r2, r3, r4, r5}\n\t" "ADCS r10, r2, r10\n\t" "ADCS r11, r3, r11\n\t" "ADCS r12, r4, r12\n\t" @@ -271,7 +273,7 @@ void fe_add_op() /* Done Add */ : : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -291,7 +293,8 @@ void fe_add(fe r, const fe a, const fe b) "BL fe_add_op\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -304,7 +307,8 @@ void fe_frombytes(fe out, const unsigned char* in) { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword32* out __asm__ ("r0") = (sword32*)out_p; - register const unsigned char* in __asm__ ("r1") = (const unsigned char*)in_p; + register const unsigned char* in __asm__ ("r1") = + (const unsigned char*)in_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -327,7 +331,7 @@ void fe_frombytes(fe out, const unsigned char* in) "STR r9, [%[out], #28]\n\t" : [out] "+r" (out), [in] "+r" (in) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -343,7 +347,7 @@ void fe_tobytes(unsigned char* out, const fe n) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "LDM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADDS r10, r2, #0x13\n\t" "ADCS r10, r3, #0x0\n\t" "ADCS r10, r4, #0x0\n\t" @@ -373,7 +377,7 @@ void fe_tobytes(unsigned char* out, const fe n) "STR r9, [%[out], #28]\n\t" : [out] "+r" (out), [n] "+r" (n) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -400,7 +404,7 @@ void fe_1(fe n) "STM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" : [n] "+r" (n) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -427,7 +431,7 @@ void fe_0(fe n) "STM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" : [n] "+r" (n) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -454,7 +458,7 @@ void fe_copy(fe r, const fe a) "STRD r4, r5, [%[r], #24]\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); } @@ -472,14 +476,14 @@ void fe_neg(fe r, const fe a) __asm__ __volatile__ ( "MVN r7, #0x0\n\t" "MVN r6, #0x12\n\t" - "LDM %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a]!, {r2, r3, r4, r5}\n\t" "SUBS r2, r6, r2\n\t" "SBCS r3, r7, r3\n\t" "SBCS r4, r7, r4\n\t" "SBCS r5, r7, r5\n\t" "STM %[r]!, {r2, r3, r4, r5}\n\t" "MVN r6, #0x80000000\n\t" - "LDM %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a]!, {r2, r3, r4, r5}\n\t" "SBCS r2, r7, r2\n\t" "SBCS r3, r7, r3\n\t" "SBCS r4, r7, r4\n\t" @@ -487,7 +491,7 @@ void fe_neg(fe r, const fe a) "STM %[r]!, {r2, r3, r4, r5}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); } @@ -502,7 +506,7 @@ int fe_isnonzero(const fe a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADDS r1, r2, #0x13\n\t" "ADCS r1, r3, #0x0\n\t" "ADCS r1, r4, #0x0\n\t" @@ -531,7 +535,8 @@ int fe_isnonzero(const fe a) "ORR %[a], r2, r4\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10" ); return (word32)(size_t)a; } @@ -547,12 +552,12 @@ int fe_isnegative(const fe a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "LDM %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a]!, {r2, r3, r4, r5}\n\t" "ADDS r1, r2, #0x13\n\t" "ADCS r1, r3, #0x0\n\t" "ADCS r1, r4, #0x0\n\t" "ADCS r1, r5, #0x0\n\t" - "LDM %[a], {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" "ADCS r1, r2, #0x0\n\t" "ADCS r1, r3, #0x0\n\t" "ADCS r1, r4, #0x0\n\t" @@ -563,7 +568,7 @@ int fe_isnegative(const fe a) "EOR %[a], %[a], r1\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -1548,7 +1553,8 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "STRD r8, r9, [%[r], #88]\n\t" : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", + "r11", "r12", "lr" ); } @@ -1578,7 +1584,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "MOV r12, #0x60\n\t" "MUL %[b], %[b], r12\n\t" "ADD %[base], %[base], %[b]\n\t" - "LDM %[base]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[base]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "AND r4, r4, lr\n\t" "AND r5, r5, lr\n\t" "AND r6, r6, lr\n\t" @@ -1594,7 +1600,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "ADD %[r], %[r], r12\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "SUB %[r], %[r], r12\n\t" - "LDM %[base]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[base]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "AND r4, r4, lr\n\t" "AND r5, r5, lr\n\t" "AND r6, r6, lr\n\t" @@ -1611,7 +1617,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "SUB %[r], %[r], r12\n\t" "ADD %[r], %[r], #0x40\n\t" - "LDM %[base]!, {r4, r5, r6, r7}\n\t" + "ldm %[base]!, {r4, r5, r6, r7}\n\t" "MVN r12, #0x12\n\t" "SUBS r8, r12, r4\n\t" "SBCS r9, r3, r5\n\t" @@ -1634,7 +1640,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "AND r6, r6, lr\n\t" "AND r7, r7, lr\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" - "LDM %[base]!, {r4, r5, r6, r7}\n\t" + "ldm %[base]!, {r4, r5, r6, r7}\n\t" "MVN r12, #0x80000000\n\t" "SBCS r8, r3, r4\n\t" "SBCS r9, r3, r5\n\t" @@ -1660,7 +1666,8 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "SUB %[base], %[base], %[b]\n\t" : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -2015,7 +2022,7 @@ void fe_mul_op() "LSL r11, r11, #1\n\t" "ORR r11, r11, r10, LSR #31\n\t" "MUL r11, r11, r12\n\t" - "LDM lr!, {r1, r2}\n\t" + "ldm lr!, {r1, r2}\n\t" "MOV r12, #0x26\n\t" "ADDS r1, r1, r11\n\t" "ADC r11, r0, #0x0\n\t" @@ -2023,21 +2030,21 @@ void fe_mul_op() "ADDS r2, r2, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r2, r11, r4, r12\n\t" - "LDM lr!, {r3, r4}\n\t" + "ldm lr!, {r3, r4}\n\t" "ADDS r3, r3, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r3, r11, r5, r12\n\t" "ADDS r4, r4, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r4, r11, r6, r12\n\t" - "LDM lr!, {r5, r6}\n\t" + "ldm lr!, {r5, r6}\n\t" "ADDS r5, r5, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r5, r11, r7, r12\n\t" "ADDS r6, r6, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r6, r11, r8, r12\n\t" - "LDM lr!, {r7, r8}\n\t" + "ldm lr!, {r7, r8}\n\t" "ADDS r7, r7, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r7, r11, r9, r12\n\t" @@ -2049,7 +2056,7 @@ void fe_mul_op() "ADD sp, sp, #0x28\n\t" : : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -2067,8 +2074,8 @@ void fe_mul_op() "SUB sp, sp, #0x2c\n\t" "STRD r0, r1, [sp, #36]\n\t" "MOV lr, r2\n\t" - "LDM r1, {r0, r1, r2, r3}\n\t" - "LDM lr!, {r4, r5, r6}\n\t" + "ldm r1, {r0, r1, r2, r3}\n\t" + "ldm lr!, {r4, r5, r6}\n\t" "UMULL r10, r11, r0, r4\n\t" "UMULL r12, r7, r1, r4\n\t" "UMAAL r11, r12, r0, r5\n\t" @@ -2078,7 +2085,7 @@ void fe_mul_op() "UMAAL r8, r9, r3, r4\n\t" "STM sp, {r10, r11, r12}\n\t" "UMAAL r7, r8, r2, r5\n\t" - "LDM lr!, {r4}\n\t" + "ldm lr!, {r4}\n\t" "UMULL r10, r11, r1, r6\n\t" "UMAAL r8, r9, r2, r6\n\t" "UMAAL r7, r10, r0, r4\n\t" @@ -2088,7 +2095,7 @@ void fe_mul_op() "UMAAL r9, r11, r3, r6\n\t" "UMAAL r9, r10, r2, r4\n\t" "UMAAL r10, r11, r3, r4\n\t" - "LDM lr, {r4, r5, r6, r7}\n\t" + "ldm lr, {r4, r5, r6, r7}\n\t" "MOV r12, #0x0\n\t" "UMLAL r8, r12, r0, r4\n\t" "UMAAL r9, r12, r1, r4\n\t" @@ -2112,48 +2119,48 @@ void fe_mul_op() "UMAAL r4, r6, r2, r7\n\t" "SUB lr, lr, #0x10\n\t" "UMAAL r5, r6, r3, r7\n\t" - "LDM r0, {r0, r1, r2, r3}\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" "STR r6, [sp, #32]\n\t" - "LDM lr!, {r6}\n\t" + "ldm lr!, {r6}\n\t" "MOV r7, #0x0\n\t" "UMLAL r8, r7, r0, r6\n\t" "UMAAL r9, r7, r1, r6\n\t" "STR r8, [sp, #16]\n\t" "UMAAL r10, r7, r2, r6\n\t" "UMAAL r11, r7, r3, r6\n\t" - "LDM lr!, {r6}\n\t" + "ldm lr!, {r6}\n\t" "MOV r8, #0x0\n\t" "UMLAL r9, r8, r0, r6\n\t" "UMAAL r10, r8, r1, r6\n\t" "STR r9, [sp, #20]\n\t" "UMAAL r11, r8, r2, r6\n\t" "UMAAL r12, r8, r3, r6\n\t" - "LDM lr!, {r6}\n\t" + "ldm lr!, {r6}\n\t" "MOV r9, #0x0\n\t" "UMLAL r10, r9, r0, r6\n\t" "UMAAL r11, r9, r1, r6\n\t" "STR r10, [sp, #24]\n\t" "UMAAL r12, r9, r2, r6\n\t" "UMAAL r4, r9, r3, r6\n\t" - "LDM lr!, {r6}\n\t" + "ldm lr!, {r6}\n\t" "MOV r10, #0x0\n\t" "UMLAL r11, r10, r0, r6\n\t" "UMAAL r12, r10, r1, r6\n\t" "STR r11, [sp, #28]\n\t" "UMAAL r4, r10, r2, r6\n\t" "UMAAL r5, r10, r3, r6\n\t" - "LDM lr!, {r11}\n\t" + "ldm lr!, {r11}\n\t" "UMAAL r12, r7, r0, r11\n\t" "UMAAL r4, r7, r1, r11\n\t" "LDR r6, [sp, #32]\n\t" "UMAAL r5, r7, r2, r11\n\t" "UMAAL r6, r7, r3, r11\n\t" - "LDM lr!, {r11}\n\t" + "ldm lr!, {r11}\n\t" "UMAAL r4, r8, r0, r11\n\t" "UMAAL r5, r8, r1, r11\n\t" "UMAAL r6, r8, r2, r11\n\t" "UMAAL r7, r8, r3, r11\n\t" - "LDM lr, {r11, lr}\n\t" + "ldm lr, {r11, lr}\n\t" "UMAAL r5, r9, r0, r11\n\t" "UMAAL r6, r10, r0, lr\n\t" "UMAAL r6, r9, r1, r11\n\t" @@ -2189,7 +2196,7 @@ void fe_mul_op() "ADD sp, sp, #0x10\n\t" : : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -2210,7 +2217,8 @@ void fe_mul(fe r, const fe a, const fe b) "BL fe_mul_op\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -2375,7 +2383,7 @@ void fe_sq_op() "ADD lr, sp, #0x20\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" "ADD lr, sp, #0x4\n\t" - "LDM lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" "ADDS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" "ADCS r6, r6, r6\n\t" @@ -2384,7 +2392,7 @@ void fe_sq_op() "ADCS r9, r9, r9\n\t" "ADCS r10, r10, r10\n\t" "STM lr!, {r4, r5, r6, r7, r8, r9, r10}\n\t" - "LDM lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" "ADCS r3, r3, r3\n\t" "ADCS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" @@ -2395,7 +2403,7 @@ void fe_sq_op() "ADC r10, r0, #0x0\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "ADD lr, sp, #0x4\n\t" - "LDM lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" "MOV lr, sp\n\t" /* A[0] * A[0] */ "LDR r12, [r1]\n\t" @@ -2420,7 +2428,7 @@ void fe_sq_op() "UMLAL r9, r11, r12, r12\n\t" "ADDS r10, r10, r11\n\t" "STM lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" - "LDM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" /* A[4] * A[4] */ "LDR r12, [r1, #16]\n\t" "ADCS r3, r3, #0x0\n\t" @@ -2455,7 +2463,7 @@ void fe_sq_op() "LSL r11, r11, #1\n\t" "ORR r11, r11, r10, LSR #31\n\t" "MUL r11, r11, r12\n\t" - "LDM lr!, {r1, r2}\n\t" + "ldm lr!, {r1, r2}\n\t" "MOV r12, #0x26\n\t" "ADDS r1, r1, r11\n\t" "ADC r11, r0, #0x0\n\t" @@ -2463,21 +2471,21 @@ void fe_sq_op() "ADDS r2, r2, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r2, r11, r4, r12\n\t" - "LDM lr!, {r3, r4}\n\t" + "ldm lr!, {r3, r4}\n\t" "ADDS r3, r3, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r3, r11, r5, r12\n\t" "ADDS r4, r4, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r4, r11, r6, r12\n\t" - "LDM lr!, {r5, r6}\n\t" + "ldm lr!, {r5, r6}\n\t" "ADDS r5, r5, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r5, r11, r7, r12\n\t" "ADDS r6, r6, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r6, r11, r8, r12\n\t" - "LDM lr!, {r7, r8}\n\t" + "ldm lr!, {r7, r8}\n\t" "ADDS r7, r7, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r7, r11, r9, r12\n\t" @@ -2489,7 +2497,7 @@ void fe_sq_op() "ADD sp, sp, #0x44\n\t" : : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -2506,7 +2514,7 @@ void fe_sq_op() __asm__ __volatile__ ( "SUB sp, sp, #0x20\n\t" "STR r0, [sp, #28]\n\t" - "LDM r1, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" + "ldm r1, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" /* Square */ "UMULL r9, r10, r0, r0\n\t" "UMULL r11, r12, r0, r1\n\t" @@ -2615,7 +2623,7 @@ void fe_sq_op() "STM lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" : : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -2635,7 +2643,8 @@ void fe_sq(fe r, const fe a) "BL fe_sq_op\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -2654,7 +2663,7 @@ void fe_mul121666(fe r, fe a) __asm__ __volatile__ ( /* Multiply by 121666 */ - "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "MOV r12, #0xdb42\n\t" "MOVT r12, #0x1\n\t" "UMULL r2, r10, r2, r12\n\t" @@ -2695,7 +2704,8 @@ void fe_mul121666(fe r, fe a) "STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -2713,7 +2723,7 @@ void fe_mul121666(fe r, fe a) __asm__ __volatile__ ( /* Multiply by 121666 */ - "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "MOV r11, #0xdb42\n\t" "MOVT r11, #0x1\n\t" "UMULL r2, r12, r2, r11\n\t" @@ -2741,7 +2751,8 @@ void fe_mul121666(fe r, fe a) "STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -2782,7 +2793,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADD r3, sp, #0x40\n\t" /* Copy */ - "LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "MOV %[n], #0x1e\n\t" "STR %[n], [sp, #180]\n\t" @@ -2814,8 +2825,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "RSB %[n], %[n], #0x0\n\t" "MOV r3, r0\n\t" "ADD r12, sp, #0x40\n\t" - "LDM r3, {r4, r5}\n\t" - "LDM r12, {r6, r7}\n\t" + "ldm r3, {r4, r5}\n\t" + "ldm r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -2826,8 +2837,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r7, r7, r9\n\t" "STM r3!, {r4, r5}\n\t" "STM r12!, {r6, r7}\n\t" - "LDM r3, {r4, r5}\n\t" - "LDM r12, {r6, r7}\n\t" + "ldm r3, {r4, r5}\n\t" + "ldm r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -2838,8 +2849,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r7, r7, r9\n\t" "STM r3!, {r4, r5}\n\t" "STM r12!, {r6, r7}\n\t" - "LDM r3, {r4, r5}\n\t" - "LDM r12, {r6, r7}\n\t" + "ldm r3, {r4, r5}\n\t" + "ldm r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -2850,8 +2861,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r7, r7, r9\n\t" "STM r3!, {r4, r5}\n\t" "STM r12!, {r6, r7}\n\t" - "LDM r3, {r4, r5}\n\t" - "LDM r12, {r6, r7}\n\t" + "ldm r3, {r4, r5}\n\t" + "ldm r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -2867,8 +2878,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "RSB %[n], %[n], #0x0\n\t" "MOV r3, sp\n\t" "ADD r12, sp, #0x20\n\t" - "LDM r3, {r4, r5}\n\t" - "LDM r12, {r6, r7}\n\t" + "ldm r3, {r4, r5}\n\t" + "ldm r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -2879,8 +2890,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r7, r7, r9\n\t" "STM r3!, {r4, r5}\n\t" "STM r12!, {r6, r7}\n\t" - "LDM r3, {r4, r5}\n\t" - "LDM r12, {r6, r7}\n\t" + "ldm r3, {r4, r5}\n\t" + "ldm r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -2891,8 +2902,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r7, r7, r9\n\t" "STM r3!, {r4, r5}\n\t" "STM r12!, {r6, r7}\n\t" - "LDM r3, {r4, r5}\n\t" - "LDM r12, {r6, r7}\n\t" + "ldm r3, {r4, r5}\n\t" + "ldm r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -2903,8 +2914,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r7, r7, r9\n\t" "STM r3!, {r4, r5}\n\t" "STM r12!, {r6, r7}\n\t" - "LDM r3, {r4, r5}\n\t" - "LDM r12, {r6, r7}\n\t" + "ldm r3, {r4, r5}\n\t" + "ldm r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -3237,7 +3248,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "ADD sp, sp, #0xbc\n\t" : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12", "lr" ); return (word32)(size_t)r; } @@ -3283,7 +3295,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADD r3, sp, #0x40\n\t" /* Copy */ - "LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "MOV %[a], #0xfe\n\t" "\n" @@ -3305,7 +3317,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "STR %[a], [sp, #164]\n\t" /* Conditional Swap */ "ADD r11, sp, #0xb0\n\t" - "LDM r11, {r4, r5, r6, r7}\n\t" + "ldm r11, {r4, r5, r6, r7}\n\t" "EOR r8, r4, r5\n\t" "EOR r9, r6, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -3383,10 +3395,10 @@ int curve25519(byte* r, const byte* n, const byte* a) #else "BGE.N L_curve25519_bits_%=\n\t" #endif - /* Cycle Count: 171 */ + /* Cycle Count: 166 */ "LDR %[n], [sp, #184]\n\t" /* Copy */ - "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "STM sp, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Invert */ "ADD r1, sp, #0x0\n\t" @@ -3626,7 +3638,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" /* Ensure result is less than modulus */ "LDR %[r], [sp, #176]\n\t" - "LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "MOV %[a], #0x13\n\t" "AND %[a], %[a], r11, ASR #31\n\t" "ADDS r4, r4, %[a]\n\t" @@ -3643,7 +3655,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "ADD sp, sp, #0xc0\n\t" : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12", "lr" ); return (word32)(size_t)r; } @@ -3903,7 +3916,8 @@ void fe_invert(fe r, const fe a) "ADD sp, sp, #0x88\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", + "r9", "r10", "r11" ); } @@ -4070,7 +4084,7 @@ void fe_sq2(fe r, const fe a) "ADD lr, sp, #0x20\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" "ADD lr, sp, #0x4\n\t" - "LDM lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" "ADDS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" "ADCS r6, r6, r6\n\t" @@ -4079,7 +4093,7 @@ void fe_sq2(fe r, const fe a) "ADCS r9, r9, r9\n\t" "ADCS r10, r10, r10\n\t" "STM lr!, {r4, r5, r6, r7, r8, r9, r10}\n\t" - "LDM lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" "ADCS r3, r3, r3\n\t" "ADCS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" @@ -4090,7 +4104,7 @@ void fe_sq2(fe r, const fe a) "ADC r10, r0, #0x0\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "ADD lr, sp, #0x4\n\t" - "LDM lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" "MOV lr, sp\n\t" /* A[0] * A[0] */ "LDR r12, [r1]\n\t" @@ -4115,7 +4129,7 @@ void fe_sq2(fe r, const fe a) "UMLAL r9, r11, r12, r12\n\t" "ADDS r10, r10, r11\n\t" "STM lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" - "LDM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + "ldm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" /* A[4] * A[4] */ "LDR r12, [r1, #16]\n\t" "ADCS r3, r3, #0x0\n\t" @@ -4150,7 +4164,7 @@ void fe_sq2(fe r, const fe a) "LSL r11, r11, #1\n\t" "ORR r11, r11, r10, LSR #31\n\t" "MUL r11, r11, r12\n\t" - "LDM lr!, {r1, r2}\n\t" + "ldm lr!, {r1, r2}\n\t" "MOV r12, #0x26\n\t" "ADDS r1, r1, r11\n\t" "ADC r11, r0, #0x0\n\t" @@ -4158,21 +4172,21 @@ void fe_sq2(fe r, const fe a) "ADDS r2, r2, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r2, r11, r4, r12\n\t" - "LDM lr!, {r3, r4}\n\t" + "ldm lr!, {r3, r4}\n\t" "ADDS r3, r3, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r3, r11, r5, r12\n\t" "ADDS r4, r4, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r4, r11, r6, r12\n\t" - "LDM lr!, {r5, r6}\n\t" + "ldm lr!, {r5, r6}\n\t" "ADDS r5, r5, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r5, r11, r7, r12\n\t" "ADDS r6, r6, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r6, r11, r8, r12\n\t" - "LDM lr!, {r7, r8}\n\t" + "ldm lr!, {r7, r8}\n\t" "ADDS r7, r7, r11\n\t" "ADC r11, r0, #0x0\n\t" "UMLAL r7, r11, r9, r12\n\t" @@ -4217,7 +4231,7 @@ void fe_sq2(fe r, const fe a) "ADD sp, sp, #0x44\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -4236,7 +4250,7 @@ void fe_sq2(fe r, const fe a) __asm__ __volatile__ ( "SUB sp, sp, #0x24\n\t" "STRD r0, r1, [sp, #28]\n\t" - "LDM r1, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" + "ldm r1, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" /* Square * 2 */ "UMULL r9, r10, r0, r0\n\t" "UMULL r11, r12, r0, r1\n\t" @@ -4380,7 +4394,7 @@ void fe_sq2(fe r, const fe a) "MOV r1, lr\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "lr", "cc" + : "memory", "cc", "lr" ); } @@ -4637,7 +4651,8 @@ void fe_pow22523(fe r, const fe a) "ADD sp, sp, #0x68\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", + "r9", "r10", "r11" ); } @@ -4673,7 +4688,8 @@ void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) "ADD sp, sp, #0x8\n\t" : [r] "+r" (r), [p] "+r" (p) : - : "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12" ); } @@ -4714,7 +4730,8 @@ void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) "ADD sp, sp, #0x8\n\t" : [r] "+r" (r), [p] "+r" (p) : - : "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12" ); } @@ -4767,7 +4784,8 @@ void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) "ADD sp, sp, #0x8\n\t" : [r] "+r" (r), [p] "+r" (p) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -4823,7 +4841,7 @@ void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) "ADD r1, r1, #0x40\n\t" "ADD r0, r0, #0x20\n\t" /* Double */ - "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" "ADCS r6, r6, r6\n\t" @@ -4855,7 +4873,8 @@ void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) "ADD sp, sp, #0xc\n\t" : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -4911,7 +4930,7 @@ void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) "ADD r1, r1, #0x40\n\t" "ADD r0, r0, #0x20\n\t" /* Double */ - "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" "ADCS r6, r6, r6\n\t" @@ -4944,7 +4963,8 @@ void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) "ADD sp, sp, #0xc\n\t" : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -4995,7 +5015,7 @@ void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) "LDR r1, [sp]\n\t" "ADD r0, sp, #0xc\n\t" /* Double */ - "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" "ADCS r6, r6, r6\n\t" @@ -5033,7 +5053,8 @@ void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) "ADD sp, sp, #0x2c\n\t" : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -5084,7 +5105,7 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) "LDR r1, [sp]\n\t" "ADD r0, sp, #0xc\n\t" /* Double */ - "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" "ADCS r6, r6, r6\n\t" @@ -5122,7 +5143,8 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) "ADD sp, sp, #0x2c\n\t" : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -5142,7 +5164,7 @@ void sc_reduce(byte* s) "STR %[s], [sp, #52]\n\t" /* Load bits 252-511 */ "ADD %[s], %[s], #0x1c\n\t" - "LDM %[s], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[s], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "LSR lr, r9, #24\n\t" "LSL r9, r9, #4\n\t" "ORR r9, r9, r8, LSR #28\n\t" @@ -5198,14 +5220,14 @@ void sc_reduce(byte* s) "MOV r1, #0x2c13\n\t" "MOVT r1, #0xa30a\n\t" "MOV lr, #0x0\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "ADDS r11, r11, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5215,7 +5237,7 @@ void sc_reduce(byte* s) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5225,7 +5247,7 @@ void sc_reduce(byte* s) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5241,14 +5263,14 @@ void sc_reduce(byte* s) "MOV r1, #0x9ce5\n\t" "MOVT r1, #0xa7ed\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "ADDS r11, r11, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5258,7 +5280,7 @@ void sc_reduce(byte* s) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5268,7 +5290,7 @@ void sc_reduce(byte* s) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5282,14 +5304,14 @@ void sc_reduce(byte* s) "MOV r1, #0x6329\n\t" "MOVT r1, #0x5d08\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "ADDS r11, r11, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5299,7 +5321,7 @@ void sc_reduce(byte* s) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5309,7 +5331,7 @@ void sc_reduce(byte* s) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5323,14 +5345,14 @@ void sc_reduce(byte* s) "MOV r1, #0x621\n\t" "MOVT r1, #0xeb21\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "ADDS r11, r11, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5340,7 +5362,7 @@ void sc_reduce(byte* s) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5350,7 +5372,7 @@ void sc_reduce(byte* s) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -5362,19 +5384,19 @@ void sc_reduce(byte* s) "STM r12!, {r10, r11, lr}\n\t" "SUB r12, r12, #0x20\n\t" /* Subtract at 4 * 32 */ - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SUBS r10, r10, r2\n\t" "SBCS r11, r11, r3\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r4\n\t" "SBCS r11, r11, r5\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r6\n\t" "SBCS r11, r11, r7\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "STM r12!, {r10, r11}\n\t" @@ -5397,30 +5419,30 @@ void sc_reduce(byte* s) "AND r4, r4, lr\n\t" "AND r5, r5, lr\n\t" "AND r9, r9, lr\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, r1\n\t" "ADCS r11, r11, r2\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, r3\n\t" "ADCS r11, r11, r4\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, r5\n\t" "ADCS r11, r11, #0x0\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, #0x0\n\t" "ADCS r11, r11, #0x0\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10}\n\t" + "ldm r12, {r10}\n\t" "ADCS r10, r10, #0x0\n\t" "STM r12!, {r10}\n\t" "SUB %[s], %[s], #0x10\n\t" "MOV r12, sp\n\t" /* Load bits 252-376 */ "ADD r12, r12, #0x1c\n\t" - "LDM r12, {r1, r2, r3, r4, r5}\n\t" + "ldm r12, {r1, r2, r3, r4, r5}\n\t" "LSL r5, r5, #4\n\t" "ORR r5, r5, r4, LSR #28\n\t" "LSL r4, r4, #4\n\t" @@ -5437,7 +5459,7 @@ void sc_reduce(byte* s) "MOV r1, #0x2c13\n\t" "MOVT r1, #0xa30a\n\t" "MOV lr, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, lr, r2, r1\n\t" "ADDS r7, r7, lr\n\t" "MOV lr, #0x0\n\t" @@ -5457,7 +5479,7 @@ void sc_reduce(byte* s) "MOV r1, #0x9ce5\n\t" "MOVT r1, #0xa7ed\n\t" "MOV r10, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r10, r2, r1\n\t" "ADDS r7, r7, r10\n\t" "MOV r10, #0x0\n\t" @@ -5477,7 +5499,7 @@ void sc_reduce(byte* s) "MOV r1, #0x6329\n\t" "MOVT r1, #0x5d08\n\t" "MOV r11, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r11, r2, r1\n\t" "ADDS r7, r7, r11\n\t" "MOV r11, #0x0\n\t" @@ -5497,7 +5519,7 @@ void sc_reduce(byte* s) "MOV r1, #0x621\n\t" "MOVT r1, #0xeb21\n\t" "MOV r12, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r12, r2, r1\n\t" "ADDS r7, r7, r12\n\t" "MOV r12, #0x0\n\t" @@ -5514,7 +5536,7 @@ void sc_reduce(byte* s) "STM %[s], {r6, r7, r8, r9}\n\t" "ADD %[s], %[s], #0x4\n\t" /* Add overflows at 4 * 32 */ - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "BFC r9, #28, #4\n\t" "ADDS r6, r6, lr\n\t" "ADCS r7, r7, r10\n\t" @@ -5527,7 +5549,7 @@ void sc_reduce(byte* s) "SBCS r9, r9, r5\n\t" "SBC r1, r1, r1\n\t" "SUB %[s], %[s], #0x10\n\t" - "LDM %[s], {r2, r3, r4, r5}\n\t" + "ldm %[s], {r2, r3, r4, r5}\n\t" "MOV r10, #0xd3ed\n\t" "MOVT r10, #0x5cf5\n\t" "MOV r11, #0x631a\n\t" @@ -5556,7 +5578,8 @@ void sc_reduce(byte* s) "ADD sp, sp, #0x38\n\t" : [s] "+r" (s) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12", "lr" ); } @@ -5576,7 +5599,7 @@ void sc_reduce(byte* s) "STR %[s], [sp, #52]\n\t" /* Load bits 252-511 */ "ADD %[s], %[s], #0x1c\n\t" - "LDM %[s], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[s], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "LSR lr, r9, #24\n\t" "LSL r9, r9, #4\n\t" "ORR r9, r9, r8, LSR #28\n\t" @@ -5623,19 +5646,19 @@ void sc_reduce(byte* s) "MOV r1, #0x2c13\n\t" "MOVT r1, #0xa30a\n\t" "MOV lr, #0x0\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "UMAAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMAAL r10, lr, r4, r1\n\t" "UMAAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMAAL r10, lr, r6, r1\n\t" "UMAAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMAAL r10, lr, r8, r1\n\t" "BFC r11, #28, #4\n\t" "UMAAL r11, lr, r9, r1\n\t" @@ -5645,19 +5668,19 @@ void sc_reduce(byte* s) "MOV r1, #0x9ce5\n\t" "MOVT r1, #0xa7ed\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "UMAAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r4, r1\n\t" "UMAAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r6, r1\n\t" "UMAAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r8, r1\n\t" "UMAAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" @@ -5665,19 +5688,19 @@ void sc_reduce(byte* s) "MOV r1, #0x6329\n\t" "MOVT r1, #0x5d08\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "UMAAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r4, r1\n\t" "UMAAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r6, r1\n\t" "UMAAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r8, r1\n\t" "UMAAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" @@ -5685,37 +5708,37 @@ void sc_reduce(byte* s) "MOV r1, #0x621\n\t" "MOVT r1, #0xeb21\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "UMAAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r4, r1\n\t" "UMAAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r6, r1\n\t" "UMAAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r8, r1\n\t" "UMAAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" "SUB r12, r12, #0x20\n\t" /* Subtract at 4 * 32 */ - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SUBS r10, r10, r2\n\t" "SBCS r11, r11, r3\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r4\n\t" "SBCS r11, r11, r5\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r6\n\t" "SBCS r11, r11, r7\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "STM r12!, {r10, r11}\n\t" @@ -5738,30 +5761,30 @@ void sc_reduce(byte* s) "AND r4, r4, lr\n\t" "AND r5, r5, lr\n\t" "AND r9, r9, lr\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, r1\n\t" "ADCS r11, r11, r2\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, r3\n\t" "ADCS r11, r11, r4\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, r5\n\t" "ADCS r11, r11, #0x0\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, #0x0\n\t" "ADCS r11, r11, #0x0\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10}\n\t" + "ldm r12, {r10}\n\t" "ADCS r10, r10, #0x0\n\t" "STM r12!, {r10}\n\t" "SUB %[s], %[s], #0x10\n\t" "MOV r12, sp\n\t" /* Load bits 252-376 */ "ADD r12, r12, #0x1c\n\t" - "LDM r12, {r1, r2, r3, r4, r5}\n\t" + "ldm r12, {r1, r2, r3, r4, r5}\n\t" "LSL r5, r5, #4\n\t" "ORR r5, r5, r4, LSR #28\n\t" "LSL r4, r4, #4\n\t" @@ -5778,7 +5801,7 @@ void sc_reduce(byte* s) "MOV r1, #0x2c13\n\t" "MOVT r1, #0xa30a\n\t" "MOV lr, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, lr, r2, r1\n\t" "UMAAL r7, lr, r3, r1\n\t" "UMAAL r8, lr, r4, r1\n\t" @@ -5789,7 +5812,7 @@ void sc_reduce(byte* s) "MOV r1, #0x9ce5\n\t" "MOVT r1, #0xa7ed\n\t" "MOV r10, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r10, r2, r1\n\t" "UMAAL r7, r10, r3, r1\n\t" "UMAAL r8, r10, r4, r1\n\t" @@ -5800,7 +5823,7 @@ void sc_reduce(byte* s) "MOV r1, #0x6329\n\t" "MOVT r1, #0x5d08\n\t" "MOV r11, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r11, r2, r1\n\t" "UMAAL r7, r11, r3, r1\n\t" "UMAAL r8, r11, r4, r1\n\t" @@ -5811,7 +5834,7 @@ void sc_reduce(byte* s) "MOV r1, #0x621\n\t" "MOVT r1, #0xeb21\n\t" "MOV r12, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r12, r2, r1\n\t" "UMAAL r7, r12, r3, r1\n\t" "UMAAL r8, r12, r4, r1\n\t" @@ -5819,7 +5842,7 @@ void sc_reduce(byte* s) "STM %[s], {r6, r7, r8, r9}\n\t" "ADD %[s], %[s], #0x4\n\t" /* Add overflows at 4 * 32 */ - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "BFC r9, #28, #4\n\t" "ADDS r6, r6, lr\n\t" "ADCS r7, r7, r10\n\t" @@ -5832,7 +5855,7 @@ void sc_reduce(byte* s) "SBCS r9, r9, r5\n\t" "SBC r1, r1, r1\n\t" "SUB %[s], %[s], #0x10\n\t" - "LDM %[s], {r2, r3, r4, r5}\n\t" + "ldm %[s], {r2, r3, r4, r5}\n\t" "MOV r10, #0xd3ed\n\t" "MOVT r10, #0x5cf5\n\t" "MOV r11, #0x631a\n\t" @@ -5861,7 +5884,8 @@ void sc_reduce(byte* s) "ADD sp, sp, #0x38\n\t" : [s] "+r" (s) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12", "lr" ); } @@ -6214,20 +6238,20 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[s], sp\n\t" /* Add c to a * b */ "LDR lr, [sp, #76]\n\t" - "LDM %[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" - "LDM lr!, {%[a], r10, r11, r12}\n\t" + "ldm %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm lr!, {r1, r10, r11, r12}\n\t" "ADDS %[b], %[b], %[a]\n\t" "ADCS %[c], %[c], r10\n\t" "ADCS r4, r4, r11\n\t" "ADCS r5, r5, r12\n\t" - "LDM lr!, {%[a], r10, r11, r12}\n\t" + "ldm lr!, {r1, r10, r11, r12}\n\t" "ADCS r6, r6, %[a]\n\t" "ADCS r7, r7, r10\n\t" "ADCS r8, r8, r11\n\t" "ADCS r9, r9, r12\n\t" "MOV %[a], r9\n\t" "STM %[s]!, {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" - "LDM %[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADCS %[b], %[b], #0x0\n\t" "ADCS %[c], %[c], #0x0\n\t" "ADCS r4, r4, #0x0\n\t" @@ -6292,14 +6316,14 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x2c13\n\t" "MOVT %[a], #0xa30a\n\t" "MOV lr, #0x0\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "ADDS r11, r11, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6309,7 +6333,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6319,7 +6343,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6335,14 +6359,14 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x9ce5\n\t" "MOVT %[a], #0xa7ed\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "ADDS r11, r11, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6352,7 +6376,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6362,7 +6386,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6376,14 +6400,14 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x6329\n\t" "MOVT %[a], #0x5d08\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "ADDS r11, r11, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6393,7 +6417,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6403,7 +6427,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6417,14 +6441,14 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x621\n\t" "MOVT %[a], #0xeb21\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "ADDS r11, r11, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6434,7 +6458,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6444,7 +6468,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADC lr, lr, #0x0\n\t" "UMLAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" "MOV lr, #0x0\n\t" "ADC lr, lr, #0x0\n\t" @@ -6456,19 +6480,19 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "STM r12!, {r10, r11, lr}\n\t" "SUB r12, r12, #0x20\n\t" /* Subtract at 4 * 32 */ - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SUBS r10, r10, %[b]\n\t" "SBCS r11, r11, %[c]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r4\n\t" "SBCS r11, r11, r5\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r6\n\t" "SBCS r11, r11, r7\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "STM r12!, {r10, r11}\n\t" @@ -6491,30 +6515,30 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "AND r4, r4, lr\n\t" "AND r5, r5, lr\n\t" "AND r9, r9, lr\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, %[a]\n\t" "ADCS r11, r11, %[b]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, %[c]\n\t" "ADCS r11, r11, r4\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, r5\n\t" "ADCS r11, r11, #0x0\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, #0x0\n\t" "ADCS r11, r11, #0x0\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10}\n\t" + "ldm r12, {r10}\n\t" "ADCS r10, r10, #0x0\n\t" "STM r12!, {r10}\n\t" "SUB %[s], %[s], #0x10\n\t" "MOV r12, sp\n\t" /* Load bits 252-376 */ "ADD r12, r12, #0x1c\n\t" - "LDM r12, {%[a], %[b], %[c], r4, r5}\n\t" + "ldm r12, {r1, r2, r3, r4, r5}\n\t" "LSL r5, r5, #4\n\t" "ORR r5, r5, r4, LSR #28\n\t" "LSL r4, r4, #4\n\t" @@ -6531,7 +6555,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x2c13\n\t" "MOVT %[a], #0xa30a\n\t" "MOV lr, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, lr, %[b], %[a]\n\t" "ADDS r7, r7, lr\n\t" "MOV lr, #0x0\n\t" @@ -6551,7 +6575,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x9ce5\n\t" "MOVT %[a], #0xa7ed\n\t" "MOV r10, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r10, %[b], %[a]\n\t" "ADDS r7, r7, r10\n\t" "MOV r10, #0x0\n\t" @@ -6571,7 +6595,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x6329\n\t" "MOVT %[a], #0x5d08\n\t" "MOV r11, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r11, %[b], %[a]\n\t" "ADDS r7, r7, r11\n\t" "MOV r11, #0x0\n\t" @@ -6591,7 +6615,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x621\n\t" "MOVT %[a], #0xeb21\n\t" "MOV r12, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r12, %[b], %[a]\n\t" "ADDS r7, r7, r12\n\t" "MOV r12, #0x0\n\t" @@ -6608,7 +6632,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "STM %[s], {r6, r7, r8, r9}\n\t" "ADD %[s], %[s], #0x4\n\t" /* Add overflows at 4 * 32 */ - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "BFC r9, #28, #4\n\t" "ADDS r6, r6, lr\n\t" "ADCS r7, r7, r10\n\t" @@ -6621,7 +6645,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "SBCS r9, r9, r5\n\t" "SBC %[a], %[a], %[a]\n\t" "SUB %[s], %[s], #0x10\n\t" - "LDM %[s], {%[b], %[c], r4, r5}\n\t" + "ldm %[s], {r2, r3, r4, r5}\n\t" "MOV r10, #0xd3ed\n\t" "MOVT r10, #0x5cf5\n\t" "MOV r11, #0x631a\n\t" @@ -6657,7 +6681,8 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADD sp, sp, #0x50\n\t" : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "lr" ); } @@ -6680,8 +6705,8 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADD lr, sp, #0x44\n\t" "STM lr, {%[s], %[a], %[c]}\n\t" "MOV lr, %[b]\n\t" - "LDM %[a], {%[s], %[a], %[b], %[c]}\n\t" - "LDM lr!, {r4, r5, r6}\n\t" + "ldm %[a], {r0, r1, r2, r3}\n\t" + "ldm lr!, {r4, r5, r6}\n\t" "UMULL r10, r11, %[s], r4\n\t" "UMULL r12, r7, %[a], r4\n\t" "UMAAL r11, r12, %[s], r5\n\t" @@ -6691,7 +6716,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "UMAAL r8, r9, %[c], r4\n\t" "STM sp, {r10, r11, r12}\n\t" "UMAAL r7, r8, %[b], r5\n\t" - "LDM lr!, {r4}\n\t" + "ldm lr!, {r4}\n\t" "UMULL r10, r11, %[a], r6\n\t" "UMAAL r8, r9, %[b], r6\n\t" "UMAAL r7, r10, %[s], r4\n\t" @@ -6701,7 +6726,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "UMAAL r9, r11, %[c], r6\n\t" "UMAAL r9, r10, %[b], r4\n\t" "UMAAL r10, r11, %[c], r4\n\t" - "LDM lr, {r4, r5, r6, r7}\n\t" + "ldm lr, {r4, r5, r6, r7}\n\t" "MOV r12, #0x0\n\t" "UMLAL r8, r12, %[s], r4\n\t" "UMAAL r9, r12, %[a], r4\n\t" @@ -6725,48 +6750,48 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "UMAAL r4, r6, %[b], r7\n\t" "SUB lr, lr, #0x10\n\t" "UMAAL r5, r6, %[c], r7\n\t" - "LDM %[s], {%[s], %[a], %[b], %[c]}\n\t" + "ldm %[s], {r0, r1, r2, r3}\n\t" "STR r6, [sp, #64]\n\t" - "LDM lr!, {r6}\n\t" + "ldm lr!, {r6}\n\t" "MOV r7, #0x0\n\t" "UMLAL r8, r7, %[s], r6\n\t" "UMAAL r9, r7, %[a], r6\n\t" "STR r8, [sp, #16]\n\t" "UMAAL r10, r7, %[b], r6\n\t" "UMAAL r11, r7, %[c], r6\n\t" - "LDM lr!, {r6}\n\t" + "ldm lr!, {r6}\n\t" "MOV r8, #0x0\n\t" "UMLAL r9, r8, %[s], r6\n\t" "UMAAL r10, r8, %[a], r6\n\t" "STR r9, [sp, #20]\n\t" "UMAAL r11, r8, %[b], r6\n\t" "UMAAL r12, r8, %[c], r6\n\t" - "LDM lr!, {r6}\n\t" + "ldm lr!, {r6}\n\t" "MOV r9, #0x0\n\t" "UMLAL r10, r9, %[s], r6\n\t" "UMAAL r11, r9, %[a], r6\n\t" "STR r10, [sp, #24]\n\t" "UMAAL r12, r9, %[b], r6\n\t" "UMAAL r4, r9, %[c], r6\n\t" - "LDM lr!, {r6}\n\t" + "ldm lr!, {r6}\n\t" "MOV r10, #0x0\n\t" "UMLAL r11, r10, %[s], r6\n\t" "UMAAL r12, r10, %[a], r6\n\t" "STR r11, [sp, #28]\n\t" "UMAAL r4, r10, %[b], r6\n\t" "UMAAL r5, r10, %[c], r6\n\t" - "LDM lr!, {r11}\n\t" + "ldm lr!, {r11}\n\t" "UMAAL r12, r7, %[s], r11\n\t" "UMAAL r4, r7, %[a], r11\n\t" "LDR r6, [sp, #64]\n\t" "UMAAL r5, r7, %[b], r11\n\t" "UMAAL r6, r7, %[c], r11\n\t" - "LDM lr!, {r11}\n\t" + "ldm lr!, {r11}\n\t" "UMAAL r4, r8, %[s], r11\n\t" "UMAAL r5, r8, %[a], r11\n\t" "UMAAL r6, r8, %[b], r11\n\t" "UMAAL r7, r8, %[c], r11\n\t" - "LDM lr, {r11, lr}\n\t" + "ldm lr, {r11, lr}\n\t" "UMAAL r5, r9, %[s], r11\n\t" "UMAAL r6, r10, %[s], lr\n\t" "UMAAL r6, r9, %[a], r11\n\t" @@ -6781,20 +6806,20 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[s], sp\n\t" /* Add c to a * b */ "LDR lr, [sp, #76]\n\t" - "LDM %[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" - "LDM lr!, {%[a], r10, r11, r12}\n\t" + "ldm %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "ldm lr!, {r1, r10, r11, r12}\n\t" "ADDS %[b], %[b], %[a]\n\t" "ADCS %[c], %[c], r10\n\t" "ADCS r4, r4, r11\n\t" "ADCS r5, r5, r12\n\t" - "LDM lr!, {%[a], r10, r11, r12}\n\t" + "ldm lr!, {r1, r10, r11, r12}\n\t" "ADCS r6, r6, %[a]\n\t" "ADCS r7, r7, r10\n\t" "ADCS r8, r8, r11\n\t" "ADCS r9, r9, r12\n\t" "MOV %[a], r9\n\t" "STM %[s]!, {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" - "LDM %[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" + "ldm %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADCS %[b], %[b], #0x0\n\t" "ADCS %[c], %[c], #0x0\n\t" "ADCS r4, r4, #0x0\n\t" @@ -6850,19 +6875,19 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x2c13\n\t" "MOVT %[a], #0xa30a\n\t" "MOV lr, #0x0\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "UMAAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMAAL r10, lr, r4, %[a]\n\t" "UMAAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMAAL r10, lr, r6, %[a]\n\t" "UMAAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM %[s]!, {r10, r11}\n\t" + "ldm %[s]!, {r10, r11}\n\t" "UMAAL r10, lr, r8, %[a]\n\t" "BFC r11, #28, #4\n\t" "UMAAL r11, lr, r9, %[a]\n\t" @@ -6872,19 +6897,19 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x9ce5\n\t" "MOVT %[a], #0xa7ed\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "UMAAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r4, %[a]\n\t" "UMAAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r6, %[a]\n\t" "UMAAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r8, %[a]\n\t" "UMAAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" @@ -6892,19 +6917,19 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x6329\n\t" "MOVT %[a], #0x5d08\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "UMAAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r4, %[a]\n\t" "UMAAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r6, %[a]\n\t" "UMAAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r8, %[a]\n\t" "UMAAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" @@ -6912,37 +6937,37 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x621\n\t" "MOVT %[a], #0xeb21\n\t" "MOV lr, #0x0\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "UMAAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r4, %[a]\n\t" "UMAAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r6, %[a]\n\t" "UMAAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "UMAAL r10, lr, r8, %[a]\n\t" "UMAAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" "SUB r12, r12, #0x20\n\t" /* Subtract at 4 * 32 */ - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SUBS r10, r10, %[b]\n\t" "SBCS r11, r11, %[c]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r4\n\t" "SBCS r11, r11, r5\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r6\n\t" "SBCS r11, r11, r7\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "STM r12!, {r10, r11}\n\t" @@ -6965,30 +6990,30 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "AND r4, r4, lr\n\t" "AND r5, r5, lr\n\t" "AND r9, r9, lr\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADDS r10, r10, %[a]\n\t" "ADCS r11, r11, %[b]\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, %[c]\n\t" "ADCS r11, r11, r4\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, r5\n\t" "ADCS r11, r11, #0x0\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10, r11}\n\t" + "ldm r12, {r10, r11}\n\t" "ADCS r10, r10, #0x0\n\t" "ADCS r11, r11, #0x0\n\t" "STM r12!, {r10, r11}\n\t" - "LDM r12, {r10}\n\t" + "ldm r12, {r10}\n\t" "ADCS r10, r10, #0x0\n\t" "STM r12!, {r10}\n\t" "SUB %[s], %[s], #0x10\n\t" "MOV r12, sp\n\t" /* Load bits 252-376 */ "ADD r12, r12, #0x1c\n\t" - "LDM r12, {%[a], %[b], %[c], r4, r5}\n\t" + "ldm r12, {r1, r2, r3, r4, r5}\n\t" "LSL r5, r5, #4\n\t" "ORR r5, r5, r4, LSR #28\n\t" "LSL r4, r4, #4\n\t" @@ -7005,7 +7030,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x2c13\n\t" "MOVT %[a], #0xa30a\n\t" "MOV lr, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, lr, %[b], %[a]\n\t" "UMAAL r7, lr, %[c], %[a]\n\t" "UMAAL r8, lr, r4, %[a]\n\t" @@ -7016,7 +7041,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x9ce5\n\t" "MOVT %[a], #0xa7ed\n\t" "MOV r10, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r10, %[b], %[a]\n\t" "UMAAL r7, r10, %[c], %[a]\n\t" "UMAAL r8, r10, r4, %[a]\n\t" @@ -7027,7 +7052,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x6329\n\t" "MOVT %[a], #0x5d08\n\t" "MOV r11, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r11, %[b], %[a]\n\t" "UMAAL r7, r11, %[c], %[a]\n\t" "UMAAL r8, r11, r4, %[a]\n\t" @@ -7038,7 +7063,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "MOV %[a], #0x621\n\t" "MOVT %[a], #0xeb21\n\t" "MOV r12, #0x0\n\t" - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r12, %[b], %[a]\n\t" "UMAAL r7, r12, %[c], %[a]\n\t" "UMAAL r8, r12, r4, %[a]\n\t" @@ -7046,7 +7071,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "STM %[s], {r6, r7, r8, r9}\n\t" "ADD %[s], %[s], #0x4\n\t" /* Add overflows at 4 * 32 */ - "LDM %[s], {r6, r7, r8, r9}\n\t" + "ldm %[s], {r6, r7, r8, r9}\n\t" "BFC r9, #28, #4\n\t" "ADDS r6, r6, lr\n\t" "ADCS r7, r7, r10\n\t" @@ -7059,7 +7084,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "SBCS r9, r9, r5\n\t" "SBC %[a], %[a], %[a]\n\t" "SUB %[s], %[s], #0x10\n\t" - "LDM %[s], {%[b], %[c], r4, r5}\n\t" + "ldm %[s], {r2, r3, r4, r5}\n\t" "MOV r10, #0xd3ed\n\t" "MOVT r10, #0x5cf5\n\t" "MOV r11, #0x631a\n\t" @@ -7095,7 +7120,8 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) "ADD sp, sp, #0x50\n\t" : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "lr" ); } diff --git a/wolfcrypt/src/port/arm/thumb2-kyber-asm.S b/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S similarity index 94% rename from wolfcrypt/src/port/arm/thumb2-kyber-asm.S rename to wolfcrypt/src/port/arm/thumb2-mlkem-asm.S index 1c3761d57..1d941bf56 100644 --- a/wolfcrypt/src/port/arm/thumb2-kyber-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S @@ -1,4 +1,4 @@ -/* thumb2-kyber-asm +/* thumb2-mlkem-asm * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./kyber/kyber.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-kyber-asm.S + * ruby ./kyber/kyber.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S */ #ifdef HAVE_CONFIG_H @@ -34,12 +35,12 @@ #ifndef WOLFSSL_ARMASM_INLINE .thumb .syntax unified -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_WC_MLKEM .text - .type L_kyber_thumb2_ntt_zetas, %object - .size L_kyber_thumb2_ntt_zetas, 256 + .type L_mlkem_thumb2_ntt_zetas, %object + .size L_mlkem_thumb2_ntt_zetas, 256 .align 4 -L_kyber_thumb2_ntt_zetas: +L_mlkem_thumb2_ntt_zetas: .short 0x8ed .short 0xa0b .short 0xb9a @@ -170,18 +171,18 @@ L_kyber_thumb2_ntt_zetas: .short 0x65c .text .align 4 - .globl kyber_thumb2_ntt - .type kyber_thumb2_ntt, %function -kyber_thumb2_ntt: + .globl mlkem_thumb2_ntt + .type mlkem_thumb2_ntt, %function +mlkem_thumb2_ntt: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0x8 - ADR r1, L_kyber_thumb2_ntt_zetas + ADR r1, L_mlkem_thumb2_ntt_zetas #ifndef WOLFSSL_ARM_ARCH_7M MOV r12, #0xd01 MOVT r12, #0xcff #endif /* !WOLFSSL_ARM_ARCH_7M */ MOV r2, #0x10 -L_kyber_thumb2_ntt_loop_123: +L_mlkem_thumb2_ntt_loop_123: STR r2, [sp] LDRH lr, [r1, #2] LDR r2, [r0] @@ -615,18 +616,18 @@ L_kyber_thumb2_ntt_loop_123: SUBS r2, r2, #0x1 ADD r0, r0, #0x4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_ntt_loop_123 + BNE L_mlkem_thumb2_ntt_loop_123 #else - BNE.N L_kyber_thumb2_ntt_loop_123 + BNE.N L_mlkem_thumb2_ntt_loop_123 #endif SUB r0, r0, #0x40 MOV r3, #0x0 -L_kyber_thumb2_ntt_loop_4_j: +L_mlkem_thumb2_ntt_loop_4_j: STR r3, [sp, #4] ADD lr, r1, r3, LSR #4 MOV r2, #0x4 LDR lr, [lr, #16] -L_kyber_thumb2_ntt_loop_4_i: +L_mlkem_thumb2_ntt_loop_4_i: STR r2, [sp] LDR r2, [r0] LDR r3, [r0, #16] @@ -784,21 +785,21 @@ L_kyber_thumb2_ntt_loop_4_i: SUBS r2, r2, #0x1 ADD r0, r0, #0x4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_ntt_loop_4_i + BNE L_mlkem_thumb2_ntt_loop_4_i #else - BNE.N L_kyber_thumb2_ntt_loop_4_i + BNE.N L_mlkem_thumb2_ntt_loop_4_i #endif ADD r3, r3, #0x40 RSBS r10, r3, #0x100 ADD r0, r0, #0x70 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_ntt_loop_4_j + BNE L_mlkem_thumb2_ntt_loop_4_j #else - BNE.N L_kyber_thumb2_ntt_loop_4_j + BNE.N L_mlkem_thumb2_ntt_loop_4_j #endif SUB r0, r0, #0x200 MOV r3, #0x0 -L_kyber_thumb2_ntt_loop_567: +L_mlkem_thumb2_ntt_loop_567: ADD lr, r1, r3, LSR #3 STR r3, [sp, #4] LDRH lr, [lr, #32] @@ -1419,19 +1420,19 @@ L_kyber_thumb2_ntt_loop_567: RSBS r10, r3, #0x100 ADD r0, r0, #0x20 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_ntt_loop_567 + BNE L_mlkem_thumb2_ntt_loop_567 #else - BNE.N L_kyber_thumb2_ntt_loop_567 + BNE.N L_mlkem_thumb2_ntt_loop_567 #endif ADD sp, sp, #0x8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 1270 */ - .size kyber_thumb2_ntt,.-kyber_thumb2_ntt + .size mlkem_thumb2_ntt,.-mlkem_thumb2_ntt .text - .type L_kyber_thumb2_invntt_zetas_inv, %object - .size L_kyber_thumb2_invntt_zetas_inv, 256 + .type L_mlkem_invntt_zetas_inv, %object + .size L_mlkem_invntt_zetas_inv, 256 .align 4 -L_kyber_thumb2_invntt_zetas_inv: +L_mlkem_invntt_zetas_inv: .short 0x6a5 .short 0x70f .short 0x5b4 @@ -1562,18 +1563,18 @@ L_kyber_thumb2_invntt_zetas_inv: .short 0x5a1 .text .align 4 - .globl kyber_thumb2_invntt - .type kyber_thumb2_invntt, %function -kyber_thumb2_invntt: + .globl mlkem_thumb2_invntt + .type mlkem_thumb2_invntt, %function +mlkem_thumb2_invntt: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0x8 - ADR r1, L_kyber_thumb2_invntt_zetas_inv + ADR r1, L_mlkem_invntt_zetas_inv #ifndef WOLFSSL_ARM_ARCH_7M MOV r12, #0xd01 MOVT r12, #0xcff #endif /* !WOLFSSL_ARM_ARCH_7M */ MOV r3, #0x0 -L_kyber_thumb2_invntt_loop_765: +L_mlkem_invntt_loop_765: ADD lr, r1, r3, LSR #1 STR r3, [sp, #4] LDR r2, [r0] @@ -2165,18 +2166,18 @@ L_kyber_thumb2_invntt_loop_765: RSBS r10, r3, #0x100 ADD r0, r0, #0x20 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_invntt_loop_765 + BNE L_mlkem_invntt_loop_765 #else - BNE.N L_kyber_thumb2_invntt_loop_765 + BNE.N L_mlkem_invntt_loop_765 #endif SUB r0, r0, #0x200 MOV r3, #0x0 -L_kyber_thumb2_invntt_loop_4_j: +L_mlkem_invntt_loop_4_j: STR r3, [sp, #4] ADD lr, r1, r3, LSR #4 MOV r2, #0x4 LDR lr, [lr, #224] -L_kyber_thumb2_invntt_loop_4_i: +L_mlkem_invntt_loop_4_i: STR r2, [sp] LDR r2, [r0] LDR r3, [r0, #16] @@ -2354,21 +2355,21 @@ L_kyber_thumb2_invntt_loop_4_i: SUBS r2, r2, #0x1 ADD r0, r0, #0x4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_invntt_loop_4_i + BNE L_mlkem_invntt_loop_4_i #else - BNE.N L_kyber_thumb2_invntt_loop_4_i + BNE.N L_mlkem_invntt_loop_4_i #endif ADD r3, r3, #0x40 RSBS r10, r3, #0x100 ADD r0, r0, #0x70 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_invntt_loop_4_j + BNE L_mlkem_invntt_loop_4_j #else - BNE.N L_kyber_thumb2_invntt_loop_4_j + BNE.N L_mlkem_invntt_loop_4_j #endif SUB r0, r0, #0x200 MOV r2, #0x10 -L_kyber_thumb2_invntt_loop_321: +L_mlkem_invntt_loop_321: STR r2, [sp] LDRH lr, [r1, #2] LDR r2, [r0] @@ -3178,19 +3179,19 @@ L_kyber_thumb2_invntt_loop_321: SUBS r2, r2, #0x1 ADD r0, r0, #0x4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_invntt_loop_321 + BNE L_mlkem_invntt_loop_321 #else - BNE.N L_kyber_thumb2_invntt_loop_321 + BNE.N L_mlkem_invntt_loop_321 #endif ADD sp, sp, #0x8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 1629 */ - .size kyber_thumb2_invntt,.-kyber_thumb2_invntt + .size mlkem_thumb2_invntt,.-mlkem_thumb2_invntt .text - .type L_kyber_thumb2_basemul_mont_zetas, %object - .size L_kyber_thumb2_basemul_mont_zetas, 256 + .type L_mlkem_basemul_mont_zetas, %object + .size L_mlkem_basemul_mont_zetas, 256 .align 4 -L_kyber_thumb2_basemul_mont_zetas: +L_mlkem_basemul_mont_zetas: .short 0x8ed .short 0xa0b .short 0xb9a @@ -3321,18 +3322,18 @@ L_kyber_thumb2_basemul_mont_zetas: .short 0x65c .text .align 4 - .globl kyber_thumb2_basemul_mont - .type kyber_thumb2_basemul_mont, %function -kyber_thumb2_basemul_mont: + .globl mlkem_thumb2_basemul_mont + .type mlkem_thumb2_basemul_mont, %function +mlkem_thumb2_basemul_mont: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - ADR r3, L_kyber_thumb2_basemul_mont_zetas + ADR r3, L_mlkem_basemul_mont_zetas ADD r3, r3, #0x80 #ifndef WOLFSSL_ARM_ARCH_7M MOV r12, #0xd01 MOVT r12, #0xcff #endif /* !WOLFSSL_ARM_ARCH_7M */ MOV r8, #0x0 -L_kyber_thumb2_basemul_mont_loop: +L_mlkem_basemul_mont_loop: LDM r1!, {r4, r5} LDM r2!, {r6, r7} LDR lr, [r3, r8] @@ -3435,27 +3436,27 @@ L_kyber_thumb2_basemul_mont_loop: STM r0!, {r4, r5} POP {r8} #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_basemul_mont_loop + BNE L_mlkem_basemul_mont_loop #else - BNE.N L_kyber_thumb2_basemul_mont_loop + BNE.N L_mlkem_basemul_mont_loop #endif POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 146 */ - .size kyber_thumb2_basemul_mont,.-kyber_thumb2_basemul_mont + .size mlkem_thumb2_basemul_mont,.-mlkem_thumb2_basemul_mont .text .align 4 - .globl kyber_thumb2_basemul_mont_add - .type kyber_thumb2_basemul_mont_add, %function -kyber_thumb2_basemul_mont_add: + .globl mlkem_thumb2_basemul_mont_add + .type mlkem_thumb2_basemul_mont_add, %function +mlkem_thumb2_basemul_mont_add: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - ADR r3, L_kyber_thumb2_basemul_mont_zetas + ADR r3, L_mlkem_basemul_mont_zetas ADD r3, r3, #0x80 #ifndef WOLFSSL_ARM_ARCH_7M MOV r12, #0xd01 MOVT r12, #0xcff #endif /* !WOLFSSL_ARM_ARCH_7M */ MOV r8, #0x0 -L_kyber_thumb2_basemul_mont_add_loop: +L_mlkem_thumb2_basemul_mont_add_loop: LDM r1!, {r4, r5} LDM r2!, {r6, r7} LDR lr, [r3, r8] @@ -3570,18 +3571,18 @@ L_kyber_thumb2_basemul_mont_add_loop: STM r0!, {r4, r5} POP {r8} #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_basemul_mont_add_loop + BNE L_mlkem_thumb2_basemul_mont_add_loop #else - BNE.N L_kyber_thumb2_basemul_mont_add_loop + BNE.N L_mlkem_thumb2_basemul_mont_add_loop #endif POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 162 */ - .size kyber_thumb2_basemul_mont_add,.-kyber_thumb2_basemul_mont_add + .size mlkem_thumb2_basemul_mont_add,.-mlkem_thumb2_basemul_mont_add .text .align 4 - .globl kyber_thumb2_csubq - .type kyber_thumb2_csubq, %function -kyber_thumb2_csubq: + .globl mlkem_thumb2_csubq + .type mlkem_thumb2_csubq, %function +mlkem_thumb2_csubq: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} MOV r11, #0xd01 MOV r12, #0xd01 @@ -3591,7 +3592,7 @@ kyber_thumb2_csubq: MOV lr, #0x8000 MOVT lr, #0x8000 MOV r1, #0x100 -L_kyber_thumb2_csubq_loop: +L_mlkem_thumb2_csubq_loop: LDM r0, {r2, r3, r4, r5} #ifndef WOLFSSL_ARM_ARCH_7M SSUB16 r2, r2, r12 @@ -3659,27 +3660,27 @@ L_kyber_thumb2_csubq_loop: STM r0!, {r2, r3, r4, r5} SUBS r1, r1, #0x8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_csubq_loop + BNE L_mlkem_thumb2_csubq_loop #else - BNE.N L_kyber_thumb2_csubq_loop + BNE.N L_mlkem_thumb2_csubq_loop #endif POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 101 */ - .size kyber_thumb2_csubq,.-kyber_thumb2_csubq + .size mlkem_thumb2_csubq,.-mlkem_thumb2_csubq .text .align 4 - .globl kyber_thumb2_rej_uniform - .type kyber_thumb2_rej_uniform, %function -kyber_thumb2_rej_uniform: + .globl mlkem_thumb2_rej_uniform + .type mlkem_thumb2_rej_uniform, %function +mlkem_thumb2_rej_uniform: PUSH {r4, r5, r6, r7, r8, r9, r10, lr} MOV r8, #0xd01 MOV r9, #0x0 -L_kyber_thumb2_rej_uniform_loop_no_fail: +L_mlkem_thumb2_rej_uniform_loop_no_fail: CMP r1, #0x8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BLT L_kyber_thumb2_rej_uniform_done_no_fail + BLT L_mlkem_thumb2_rej_uniform_done_no_fail #else - BLT.N L_kyber_thumb2_rej_uniform_done_no_fail + BLT.N L_mlkem_thumb2_rej_uniform_done_no_fail #endif LDM r2!, {r4, r5, r6} UBFX r7, r4, #0, #12 @@ -3734,166 +3735,166 @@ L_kyber_thumb2_rej_uniform_loop_no_fail: ADD r9, r9, r10, LSL #1 SUBS r3, r3, #0xc #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BNE L_kyber_thumb2_rej_uniform_loop_no_fail + BNE L_mlkem_thumb2_rej_uniform_loop_no_fail #else - BNE.N L_kyber_thumb2_rej_uniform_loop_no_fail + BNE.N L_mlkem_thumb2_rej_uniform_loop_no_fail #endif #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - B L_kyber_thumb2_rej_uniform_done + B L_mlkem_thumb2_rej_uniform_done #else - B.N L_kyber_thumb2_rej_uniform_done + B.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_done_no_fail: +L_mlkem_thumb2_rej_uniform_done_no_fail: CMP r1, #0x0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_loop: +L_mlkem_thumb2_rej_uniform_loop: LDM r2!, {r4, r5, r6} UBFX r7, r4, #0, #12 CMP r7, r8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGE L_kyber_thumb2_rej_uniform_fail_0 + BGE L_mlkem_thumb2_rej_uniform_fail_0 #else - BGE.N L_kyber_thumb2_rej_uniform_fail_0 + BGE.N L_mlkem_thumb2_rej_uniform_fail_0 #endif STRH r7, [r0, r9] SUBS r1, r1, #0x1 ADD r9, r9, #0x2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_fail_0: +L_mlkem_thumb2_rej_uniform_fail_0: UBFX r7, r4, #12, #12 CMP r7, r8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGE L_kyber_thumb2_rej_uniform_fail_1 + BGE L_mlkem_thumb2_rej_uniform_fail_1 #else - BGE.N L_kyber_thumb2_rej_uniform_fail_1 + BGE.N L_mlkem_thumb2_rej_uniform_fail_1 #endif STRH r7, [r0, r9] SUBS r1, r1, #0x1 ADD r9, r9, #0x2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_fail_1: +L_mlkem_thumb2_rej_uniform_fail_1: UBFX r7, r4, #24, #8 BFI r7, r5, #8, #4 CMP r7, r8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGE L_kyber_thumb2_rej_uniform_fail_2 + BGE L_mlkem_thumb2_rej_uniform_fail_2 #else - BGE.N L_kyber_thumb2_rej_uniform_fail_2 + BGE.N L_mlkem_thumb2_rej_uniform_fail_2 #endif STRH r7, [r0, r9] SUBS r1, r1, #0x1 ADD r9, r9, #0x2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_fail_2: +L_mlkem_thumb2_rej_uniform_fail_2: UBFX r7, r5, #4, #12 CMP r7, r8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGE L_kyber_thumb2_rej_uniform_fail_3 + BGE L_mlkem_thumb2_rej_uniform_fail_3 #else - BGE.N L_kyber_thumb2_rej_uniform_fail_3 + BGE.N L_mlkem_thumb2_rej_uniform_fail_3 #endif STRH r7, [r0, r9] SUBS r1, r1, #0x1 ADD r9, r9, #0x2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_fail_3: +L_mlkem_thumb2_rej_uniform_fail_3: UBFX r7, r5, #16, #12 CMP r7, r8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGE L_kyber_thumb2_rej_uniform_fail_4 + BGE L_mlkem_thumb2_rej_uniform_fail_4 #else - BGE.N L_kyber_thumb2_rej_uniform_fail_4 + BGE.N L_mlkem_thumb2_rej_uniform_fail_4 #endif STRH r7, [r0, r9] SUBS r1, r1, #0x1 ADD r9, r9, #0x2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_fail_4: +L_mlkem_thumb2_rej_uniform_fail_4: UBFX r7, r5, #28, #4 BFI r7, r6, #4, #8 CMP r7, r8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGE L_kyber_thumb2_rej_uniform_fail_5 + BGE L_mlkem_thumb2_rej_uniform_fail_5 #else - BGE.N L_kyber_thumb2_rej_uniform_fail_5 + BGE.N L_mlkem_thumb2_rej_uniform_fail_5 #endif STRH r7, [r0, r9] SUBS r1, r1, #0x1 ADD r9, r9, #0x2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_fail_5: +L_mlkem_thumb2_rej_uniform_fail_5: UBFX r7, r6, #8, #12 CMP r7, r8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGE L_kyber_thumb2_rej_uniform_fail_6 + BGE L_mlkem_thumb2_rej_uniform_fail_6 #else - BGE.N L_kyber_thumb2_rej_uniform_fail_6 + BGE.N L_mlkem_thumb2_rej_uniform_fail_6 #endif STRH r7, [r0, r9] SUBS r1, r1, #0x1 ADD r9, r9, #0x2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_fail_6: +L_mlkem_thumb2_rej_uniform_fail_6: UBFX r7, r6, #20, #12 CMP r7, r8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGE L_kyber_thumb2_rej_uniform_fail_7 + BGE L_mlkem_thumb2_rej_uniform_fail_7 #else - BGE.N L_kyber_thumb2_rej_uniform_fail_7 + BGE.N L_mlkem_thumb2_rej_uniform_fail_7 #endif STRH r7, [r0, r9] SUBS r1, r1, #0x1 ADD r9, r9, #0x2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_kyber_thumb2_rej_uniform_done + BEQ L_mlkem_thumb2_rej_uniform_done #else - BEQ.N L_kyber_thumb2_rej_uniform_done + BEQ.N L_mlkem_thumb2_rej_uniform_done #endif -L_kyber_thumb2_rej_uniform_fail_7: +L_mlkem_thumb2_rej_uniform_fail_7: SUBS r3, r3, #0xc #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BGT L_kyber_thumb2_rej_uniform_loop + BGT L_mlkem_thumb2_rej_uniform_loop #else - BGT.N L_kyber_thumb2_rej_uniform_loop + BGT.N L_mlkem_thumb2_rej_uniform_loop #endif -L_kyber_thumb2_rej_uniform_done: +L_mlkem_thumb2_rej_uniform_done: LSR r0, r9, #1 POP {r4, r5, r6, r7, r8, r9, r10, pc} /* Cycle Count = 225 */ - .size kyber_thumb2_rej_uniform,.-kyber_thumb2_rej_uniform -#endif /* WOLFSSL_WC_KYBER */ + .size mlkem_thumb2_rej_uniform,.-mlkem_thumb2_rej_uniform +#endif /* WOLFSSL_WC_MLKEM */ #endif /* WOLFSSL_ARMASM_THUMB2 */ #endif /* WOLFSSL_ARMASM */ diff --git a/wolfcrypt/src/port/arm/thumb2-kyber-asm_c.c b/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c similarity index 90% rename from wolfcrypt/src/port/arm/thumb2-kyber-asm_c.c rename to wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c index 9c4621110..a693a91ea 100644 --- a/wolfcrypt/src/port/arm/thumb2-kyber-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c @@ -1,4 +1,4 @@ -/* thumb2-kyber-asm +/* thumb2-mlkem-asm * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./kyber/kyber.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-kyber-asm.c + * ruby ./kyber/kyber.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-mlkem-asm.c */ #ifdef HAVE_CONFIG_H @@ -43,10 +44,10 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ -#include +#include -#ifdef WOLFSSL_WC_KYBER -XALIGNED(16) static const word16 L_kyber_thumb2_ntt_zetas[] = { +#ifdef WOLFSSL_WC_MLKEM +XALIGNED(16) static const word16 L_mlkem_thumb2_ntt_zetas[] = { 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, 0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc, 0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f, @@ -66,19 +67,21 @@ XALIGNED(16) static const word16 L_kyber_thumb2_ntt_zetas[] = { }; #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void kyber_thumb2_ntt(sword16* r_p) +void mlkem_thumb2_ntt(sword16* r_p) #else -void kyber_thumb2_ntt(sword16* r) +void mlkem_thumb2_ntt(sword16* r) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* r __asm__ ("r0") = (sword16*)r_p; - register word16* L_kyber_thumb2_ntt_zetas_c __asm__ ("r1") = (word16*)&L_kyber_thumb2_ntt_zetas; + register word16* L_mlkem_thumb2_ntt_zetas_c __asm__ ("r1") = + (word16*)&L_mlkem_thumb2_ntt_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x8\n\t" - "MOV r1, %[L_kyber_thumb2_ntt_zetas]\n\t" + "MOV r1, %[L_mlkem_thumb2_ntt_zetas]\n\t" #ifndef WOLFSSL_ARM_ARCH_7M "MOV r12, #0xd01\n\t" "MOVT r12, #0xcff\n\t" @@ -86,9 +89,9 @@ void kyber_thumb2_ntt(sword16* r) "MOV r2, #0x10\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_ntt_loop_123:\n\t" + "L_mlkem_thumb2_ntt_loop_123:\n\t" #else - "L_kyber_thumb2_ntt_loop_123_%=:\n\t" + "L_mlkem_thumb2_ntt_loop_123_%=:\n\t" #endif "STR r2, [sp]\n\t" "LDRH lr, [r1, #2]\n\t" @@ -523,19 +526,19 @@ void kyber_thumb2_ntt(sword16* r) "SUBS r2, r2, #0x1\n\t" "ADD %[r], %[r], #0x4\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_ntt_loop_123_%=\n\t" + "BNE L_mlkem_thumb2_ntt_loop_123_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_ntt_loop_123\n\t" + "BNE.N L_mlkem_thumb2_ntt_loop_123\n\t" #else - "BNE.N L_kyber_thumb2_ntt_loop_123_%=\n\t" + "BNE.N L_mlkem_thumb2_ntt_loop_123_%=\n\t" #endif "SUB %[r], %[r], #0x40\n\t" "MOV r3, #0x0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_ntt_loop_4_j:\n\t" + "L_mlkem_thumb2_ntt_loop_4_j:\n\t" #else - "L_kyber_thumb2_ntt_loop_4_j_%=:\n\t" + "L_mlkem_thumb2_ntt_loop_4_j_%=:\n\t" #endif "STR r3, [sp, #4]\n\t" "ADD lr, r1, r3, LSR #4\n\t" @@ -543,9 +546,9 @@ void kyber_thumb2_ntt(sword16* r) "LDR lr, [lr, #16]\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_ntt_loop_4_i:\n\t" + "L_mlkem_thumb2_ntt_loop_4_i:\n\t" #else - "L_kyber_thumb2_ntt_loop_4_i_%=:\n\t" + "L_mlkem_thumb2_ntt_loop_4_i_%=:\n\t" #endif "STR r2, [sp]\n\t" "LDR r2, [%[r]]\n\t" @@ -704,29 +707,29 @@ void kyber_thumb2_ntt(sword16* r) "SUBS r2, r2, #0x1\n\t" "ADD %[r], %[r], #0x4\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_ntt_loop_4_i_%=\n\t" + "BNE L_mlkem_thumb2_ntt_loop_4_i_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_ntt_loop_4_i\n\t" + "BNE.N L_mlkem_thumb2_ntt_loop_4_i\n\t" #else - "BNE.N L_kyber_thumb2_ntt_loop_4_i_%=\n\t" + "BNE.N L_mlkem_thumb2_ntt_loop_4_i_%=\n\t" #endif "ADD r3, r3, #0x40\n\t" "RSBS r10, r3, #0x100\n\t" "ADD %[r], %[r], #0x70\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_ntt_loop_4_j_%=\n\t" + "BNE L_mlkem_thumb2_ntt_loop_4_j_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_ntt_loop_4_j\n\t" + "BNE.N L_mlkem_thumb2_ntt_loop_4_j\n\t" #else - "BNE.N L_kyber_thumb2_ntt_loop_4_j_%=\n\t" + "BNE.N L_mlkem_thumb2_ntt_loop_4_j_%=\n\t" #endif "SUB %[r], %[r], #0x200\n\t" "MOV r3, #0x0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_ntt_loop_567:\n\t" + "L_mlkem_thumb2_ntt_loop_567:\n\t" #else - "L_kyber_thumb2_ntt_loop_567_%=:\n\t" + "L_mlkem_thumb2_ntt_loop_567_%=:\n\t" #endif "ADD lr, r1, r3, LSR #3\n\t" "STR r3, [sp, #4]\n\t" @@ -1348,27 +1351,22 @@ void kyber_thumb2_ntt(sword16* r) "RSBS r10, r3, #0x100\n\t" "ADD %[r], %[r], #0x20\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_ntt_loop_567_%=\n\t" + "BNE L_mlkem_thumb2_ntt_loop_567_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_ntt_loop_567\n\t" + "BNE.N L_mlkem_thumb2_ntt_loop_567\n\t" #else - "BNE.N L_kyber_thumb2_ntt_loop_567_%=\n\t" + "BNE.N L_mlkem_thumb2_ntt_loop_567_%=\n\t" #endif "ADD sp, sp, #0x8\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), - [L_kyber_thumb2_ntt_zetas] "+r" (L_kyber_thumb2_ntt_zetas_c) + [L_mlkem_thumb2_ntt_zetas] "+r" (L_mlkem_thumb2_ntt_zetas_c) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#else - : [r] "+r" (r) - : [L_kyber_thumb2_ntt_zetas] "r" (L_kyber_thumb2_ntt_zetas) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } -XALIGNED(16) static const word16 L_kyber_thumb2_invntt_zetas_inv[] = { +XALIGNED(16) static const word16 L_mlkem_invntt_zetas_inv[] = { 0x06a5, 0x070f, 0x05b4, 0x0943, 0x0922, 0x091d, 0x0134, 0x006c, 0x0b23, 0x0366, 0x0356, 0x05e6, 0x09e7, 0x04fe, 0x05fa, 0x04a1, 0x067b, 0x04a3, 0x0c25, 0x036a, 0x0537, 0x083f, 0x0088, 0x04bf, @@ -1388,19 +1386,21 @@ XALIGNED(16) static const word16 L_kyber_thumb2_invntt_zetas_inv[] = { }; #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void kyber_thumb2_invntt(sword16* r_p) +void mlkem_thumb2_invntt(sword16* r_p) #else -void kyber_thumb2_invntt(sword16* r) +void mlkem_thumb2_invntt(sword16* r) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* r __asm__ ("r0") = (sword16*)r_p; - register word16* L_kyber_thumb2_invntt_zetas_inv_c __asm__ ("r1") = (word16*)&L_kyber_thumb2_invntt_zetas_inv; + register word16* L_mlkem_invntt_zetas_inv_c __asm__ ("r1") = + (word16*)&L_mlkem_invntt_zetas_inv; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "SUB sp, sp, #0x8\n\t" - "MOV r1, %[L_kyber_thumb2_invntt_zetas_inv]\n\t" + "MOV r1, %[L_mlkem_invntt_zetas_inv]\n\t" #ifndef WOLFSSL_ARM_ARCH_7M "MOV r12, #0xd01\n\t" "MOVT r12, #0xcff\n\t" @@ -1408,9 +1408,9 @@ void kyber_thumb2_invntt(sword16* r) "MOV r3, #0x0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_invntt_loop_765:\n\t" + "L_mlkem_invntt_loop_765:\n\t" #else - "L_kyber_thumb2_invntt_loop_765_%=:\n\t" + "L_mlkem_invntt_loop_765_%=:\n\t" #endif "ADD lr, r1, r3, LSR #1\n\t" "STR r3, [sp, #4]\n\t" @@ -2003,19 +2003,19 @@ void kyber_thumb2_invntt(sword16* r) "RSBS r10, r3, #0x100\n\t" "ADD %[r], %[r], #0x20\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_invntt_loop_765_%=\n\t" + "BNE L_mlkem_invntt_loop_765_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_invntt_loop_765\n\t" + "BNE.N L_mlkem_invntt_loop_765\n\t" #else - "BNE.N L_kyber_thumb2_invntt_loop_765_%=\n\t" + "BNE.N L_mlkem_invntt_loop_765_%=\n\t" #endif "SUB %[r], %[r], #0x200\n\t" "MOV r3, #0x0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_invntt_loop_4_j:\n\t" + "L_mlkem_invntt_loop_4_j:\n\t" #else - "L_kyber_thumb2_invntt_loop_4_j_%=:\n\t" + "L_mlkem_invntt_loop_4_j_%=:\n\t" #endif "STR r3, [sp, #4]\n\t" "ADD lr, r1, r3, LSR #4\n\t" @@ -2023,9 +2023,9 @@ void kyber_thumb2_invntt(sword16* r) "LDR lr, [lr, #224]\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_invntt_loop_4_i:\n\t" + "L_mlkem_invntt_loop_4_i:\n\t" #else - "L_kyber_thumb2_invntt_loop_4_i_%=:\n\t" + "L_mlkem_invntt_loop_4_i_%=:\n\t" #endif "STR r2, [sp]\n\t" "LDR r2, [%[r]]\n\t" @@ -2204,29 +2204,29 @@ void kyber_thumb2_invntt(sword16* r) "SUBS r2, r2, #0x1\n\t" "ADD %[r], %[r], #0x4\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_invntt_loop_4_i_%=\n\t" + "BNE L_mlkem_invntt_loop_4_i_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_invntt_loop_4_i\n\t" + "BNE.N L_mlkem_invntt_loop_4_i\n\t" #else - "BNE.N L_kyber_thumb2_invntt_loop_4_i_%=\n\t" + "BNE.N L_mlkem_invntt_loop_4_i_%=\n\t" #endif "ADD r3, r3, #0x40\n\t" "RSBS r10, r3, #0x100\n\t" "ADD %[r], %[r], #0x70\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_invntt_loop_4_j_%=\n\t" + "BNE L_mlkem_invntt_loop_4_j_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_invntt_loop_4_j\n\t" + "BNE.N L_mlkem_invntt_loop_4_j\n\t" #else - "BNE.N L_kyber_thumb2_invntt_loop_4_j_%=\n\t" + "BNE.N L_mlkem_invntt_loop_4_j_%=\n\t" #endif "SUB %[r], %[r], #0x200\n\t" "MOV r2, #0x10\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_invntt_loop_321:\n\t" + "L_mlkem_invntt_loop_321:\n\t" #else - "L_kyber_thumb2_invntt_loop_321_%=:\n\t" + "L_mlkem_invntt_loop_321_%=:\n\t" #endif "STR r2, [sp]\n\t" "LDRH lr, [r1, #2]\n\t" @@ -3037,27 +3037,22 @@ void kyber_thumb2_invntt(sword16* r) "SUBS r2, r2, #0x1\n\t" "ADD %[r], %[r], #0x4\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_invntt_loop_321_%=\n\t" + "BNE L_mlkem_invntt_loop_321_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_invntt_loop_321\n\t" + "BNE.N L_mlkem_invntt_loop_321\n\t" #else - "BNE.N L_kyber_thumb2_invntt_loop_321_%=\n\t" + "BNE.N L_mlkem_invntt_loop_321_%=\n\t" #endif "ADD sp, sp, #0x8\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), - [L_kyber_thumb2_invntt_zetas_inv] "+r" (L_kyber_thumb2_invntt_zetas_inv_c) + [L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#else - : [r] "+r" (r) - : [L_kyber_thumb2_invntt_zetas_inv] "r" (L_kyber_thumb2_invntt_zetas_inv) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } -XALIGNED(16) static const word16 L_kyber_thumb2_basemul_mont_zetas[] = { +XALIGNED(16) static const word16 L_mlkem_basemul_mont_zetas[] = { 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, 0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc, 0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f, @@ -3077,20 +3072,23 @@ XALIGNED(16) static const word16 L_kyber_thumb2_basemul_mont_zetas[] = { }; #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void kyber_thumb2_basemul_mont(sword16* r_p, const sword16* a_p, const sword16* b_p) +void mlkem_thumb2_basemul_mont(sword16* r_p, const sword16* a_p, + const sword16* b_p) #else -void kyber_thumb2_basemul_mont(sword16* r, const sword16* a, const sword16* b) +void mlkem_thumb2_basemul_mont(sword16* r, const sword16* a, const sword16* b) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* r __asm__ ("r0") = (sword16*)r_p; register const sword16* a __asm__ ("r1") = (const sword16*)a_p; register const sword16* b __asm__ ("r2") = (const sword16*)b_p; - register word16* L_kyber_thumb2_basemul_mont_zetas_c __asm__ ("r3") = (word16*)&L_kyber_thumb2_basemul_mont_zetas; + register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") = + (word16*)&L_mlkem_basemul_mont_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r3, %[L_kyber_thumb2_basemul_mont_zetas]\n\t" + "MOV r3, %[L_mlkem_basemul_mont_zetas]\n\t" "ADD r3, r3, #0x80\n\t" #ifndef WOLFSSL_ARM_ARCH_7M "MOV r12, #0xd01\n\t" @@ -3099,12 +3097,12 @@ void kyber_thumb2_basemul_mont(sword16* r, const sword16* a, const sword16* b) "MOV r8, #0x0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_basemul_mont_loop:\n\t" + "L_mlkem_basemul_mont_loop:\n\t" #else - "L_kyber_thumb2_basemul_mont_loop_%=:\n\t" + "L_mlkem_basemul_mont_loop_%=:\n\t" #endif - "LDM %[a]!, {r4, r5}\n\t" - "LDM %[b]!, {r6, r7}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "LDR lr, [r3, r8]\n\t" "ADD r8, r8, #0x2\n\t" "PUSH {r8}\n\t" @@ -3205,40 +3203,39 @@ void kyber_thumb2_basemul_mont(sword16* r, const sword16* a, const sword16* b) "STM %[r]!, {r4, r5}\n\t" "POP {r8}\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_basemul_mont_loop_%=\n\t" + "BNE L_mlkem_basemul_mont_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_basemul_mont_loop\n\t" + "BNE.N L_mlkem_basemul_mont_loop\n\t" #else - "BNE.N L_kyber_thumb2_basemul_mont_loop_%=\n\t" + "BNE.N L_mlkem_basemul_mont_loop_%=\n\t" #endif -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), - [L_kyber_thumb2_basemul_mont_zetas] "+r" (L_kyber_thumb2_basemul_mont_zetas_c) + [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#else - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [L_kyber_thumb2_basemul_mont_zetas] "r" (L_kyber_thumb2_basemul_mont_zetas) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "lr" ); } #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void kyber_thumb2_basemul_mont_add(sword16* r_p, const sword16* a_p, const sword16* b_p) +void mlkem_thumb2_basemul_mont_add(sword16* r_p, const sword16* a_p, + const sword16* b_p) #else -void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) +void mlkem_thumb2_basemul_mont_add(sword16* r, const sword16* a, + const sword16* b) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* r __asm__ ("r0") = (sword16*)r_p; register const sword16* a __asm__ ("r1") = (const sword16*)a_p; register const sword16* b __asm__ ("r2") = (const sword16*)b_p; - register word16* L_kyber_thumb2_basemul_mont_zetas_c __asm__ ("r3") = (word16*)&L_kyber_thumb2_basemul_mont_zetas; + register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") = + (word16*)&L_mlkem_basemul_mont_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MOV r3, %[L_kyber_thumb2_basemul_mont_zetas]\n\t" + "MOV r3, %[L_mlkem_basemul_mont_zetas]\n\t" "ADD r3, r3, #0x80\n\t" #ifndef WOLFSSL_ARM_ARCH_7M "MOV r12, #0xd01\n\t" @@ -3247,12 +3244,12 @@ void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a, const sword16* "MOV r8, #0x0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_basemul_mont_add_loop:\n\t" + "L_mlkem_thumb2_basemul_mont_add_loop:\n\t" #else - "L_kyber_thumb2_basemul_mont_add_loop_%=:\n\t" + "L_mlkem_thumb2_basemul_mont_add_loop_%=:\n\t" #endif - "LDM %[a]!, {r4, r5}\n\t" - "LDM %[b]!, {r6, r7}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "LDR lr, [r3, r8]\n\t" "ADD r8, r8, #0x2\n\t" "PUSH {r8}\n\t" @@ -3281,7 +3278,7 @@ void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a, const sword16* "SMULTB r7, r12, r11\n\t" "SMLABB r9, r12, r6, r9\n\t" "SMLABB r11, r12, r7, r11\n\t" - "LDM %[r], {r4, r5}\n\t" + "ldm %[r], {r4, r5}\n\t" "PKHTB r9, r9, r8, ASR #16\n\t" "PKHTB r11, r11, r10, ASR #16\n\t" "SADD16 r4, r4, r9\n\t" @@ -3348,7 +3345,7 @@ void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a, const sword16* "SBFX r5, r7, #0, #16\n\t" "MLA r9, r12, r4, r9\n\t" "MLA r11, r12, r5, r11\n\t" - "LDM %[r], {r4, r5}\n\t" + "ldm %[r], {r4, r5}\n\t" "BFC r9, #0, #16\n\t" "BFC r11, #0, #16\n\t" "ORR r9, r9, r8, LSR #16\n\t" @@ -3365,34 +3362,31 @@ void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a, const sword16* "STM %[r]!, {r4, r5}\n\t" "POP {r8}\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_basemul_mont_add_loop_%=\n\t" + "BNE L_mlkem_thumb2_basemul_mont_add_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_basemul_mont_add_loop\n\t" + "BNE.N L_mlkem_thumb2_basemul_mont_add_loop\n\t" #else - "BNE.N L_kyber_thumb2_basemul_mont_add_loop_%=\n\t" + "BNE.N L_mlkem_thumb2_basemul_mont_add_loop_%=\n\t" #endif -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), - [L_kyber_thumb2_basemul_mont_zetas] "+r" (L_kyber_thumb2_basemul_mont_zetas_c) + [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#else - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [L_kyber_thumb2_basemul_mont_zetas] "r" (L_kyber_thumb2_basemul_mont_zetas) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "lr" ); } #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void kyber_thumb2_csubq(sword16* p_p) +void mlkem_thumb2_csubq(sword16* p_p) #else -void kyber_thumb2_csubq(sword16* p) +void mlkem_thumb2_csubq(sword16* p) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register sword16* p __asm__ ("r0") = (sword16*)p_p; - register word16* L_kyber_thumb2_basemul_mont_zetas_c __asm__ ("r1") = (word16*)&L_kyber_thumb2_basemul_mont_zetas; + register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r1") = + (word16*)&L_mlkem_basemul_mont_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3406,11 +3400,11 @@ void kyber_thumb2_csubq(sword16* p) "MOV r1, #0x100\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_csubq_loop:\n\t" + "L_mlkem_thumb2_csubq_loop:\n\t" #else - "L_kyber_thumb2_csubq_loop_%=:\n\t" + "L_mlkem_thumb2_csubq_loop_%=:\n\t" #endif - "LDM %[p], {r2, r3, r4, r5}\n\t" + "ldm %[p], {r2, r3, r4, r5}\n\t" #ifndef WOLFSSL_ARM_ARCH_7M "SSUB16 r2, r2, r12\n\t" "SSUB16 r3, r3, r12\n\t" @@ -3477,29 +3471,26 @@ void kyber_thumb2_csubq(sword16* p) "STM %[p]!, {r2, r3, r4, r5}\n\t" "SUBS r1, r1, #0x8\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_csubq_loop_%=\n\t" + "BNE L_mlkem_thumb2_csubq_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_csubq_loop\n\t" + "BNE.N L_mlkem_thumb2_csubq_loop\n\t" #else - "BNE.N L_kyber_thumb2_csubq_loop_%=\n\t" + "BNE.N L_mlkem_thumb2_csubq_loop_%=\n\t" #endif -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [p] "+r" (p), - [L_kyber_thumb2_basemul_mont_zetas] "+r" (L_kyber_thumb2_basemul_mont_zetas_c) + [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#else - : [p] "+r" (p) - : [L_kyber_thumb2_basemul_mont_zetas] "r" (L_kyber_thumb2_basemul_mont_zetas) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -unsigned int kyber_thumb2_rej_uniform(sword16* p_p, unsigned int len_p, const byte* r_p, unsigned int rLen_p) +unsigned int mlkem_thumb2_rej_uniform(sword16* p_p, unsigned int len_p, + const byte* r_p, unsigned int rLen_p) #else -unsigned int kyber_thumb2_rej_uniform(sword16* p, unsigned int len, const byte* r, unsigned int rLen) +unsigned int mlkem_thumb2_rej_uniform(sword16* p, unsigned int len, + const byte* r, unsigned int rLen) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -3507,7 +3498,9 @@ unsigned int kyber_thumb2_rej_uniform(sword16* p, unsigned int len, const byte* register unsigned int len __asm__ ("r1") = (unsigned int)len_p; register const byte* r __asm__ ("r2") = (const byte*)r_p; register unsigned int rLen __asm__ ("r3") = (unsigned int)rLen_p; - register word16* L_kyber_thumb2_basemul_mont_zetas_c __asm__ ("r4") = (word16*)&L_kyber_thumb2_basemul_mont_zetas; + register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r4") = + (word16*)&L_mlkem_basemul_mont_zetas; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3515,19 +3508,19 @@ unsigned int kyber_thumb2_rej_uniform(sword16* p, unsigned int len, const byte* "MOV r9, #0x0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_loop_no_fail:\n\t" + "L_mlkem_thumb2_rej_uniform_loop_no_fail:\n\t" #else - "L_kyber_thumb2_rej_uniform_loop_no_fail_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_loop_no_fail_%=:\n\t" #endif "CMP %[len], #0x8\n\t" #if defined(__GNUC__) - "BLT L_kyber_thumb2_rej_uniform_done_no_fail_%=\n\t" + "BLT L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BLT.N L_kyber_thumb2_rej_uniform_done_no_fail\n\t" + "BLT.N L_mlkem_thumb2_rej_uniform_done_no_fail\n\t" #else - "BLT.N L_kyber_thumb2_rej_uniform_done_no_fail_%=\n\t" + "BLT.N L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t" #endif - "LDM %[r]!, {r4, r5, r6}\n\t" + "ldm %[r]!, {r4, r5, r6}\n\t" "UBFX r7, r4, #0, #12\n\t" "STRH r7, [%[p], r9]\n\t" "SUB r10, r7, r8\n\t" @@ -3580,272 +3573,266 @@ unsigned int kyber_thumb2_rej_uniform(sword16* p, unsigned int len, const byte* "ADD r9, r9, r10, LSL #1\n\t" "SUBS %[rLen], %[rLen], #0xc\n\t" #if defined(__GNUC__) - "BNE L_kyber_thumb2_rej_uniform_loop_no_fail_%=\n\t" + "BNE L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_kyber_thumb2_rej_uniform_loop_no_fail\n\t" + "BNE.N L_mlkem_thumb2_rej_uniform_loop_no_fail\n\t" #else - "BNE.N L_kyber_thumb2_rej_uniform_loop_no_fail_%=\n\t" + "BNE.N L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t" #endif #if defined(__GNUC__) - "B L_kyber_thumb2_rej_uniform_done_%=\n\t" + "B L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "B.N L_kyber_thumb2_rej_uniform_done\n\t" + "B.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "B.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "B.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_done_no_fail:\n\t" + "L_mlkem_thumb2_rej_uniform_done_no_fail:\n\t" #else - "L_kyber_thumb2_rej_uniform_done_no_fail_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_done_no_fail_%=:\n\t" #endif "CMP %[len], #0x0\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_loop:\n\t" + "L_mlkem_thumb2_rej_uniform_loop:\n\t" #else - "L_kyber_thumb2_rej_uniform_loop_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_loop_%=:\n\t" #endif - "LDM %[r]!, {r4, r5, r6}\n\t" + "ldm %[r]!, {r4, r5, r6}\n\t" "UBFX r7, r4, #0, #12\n\t" "CMP r7, r8\n\t" #if defined(__GNUC__) - "BGE L_kyber_thumb2_rej_uniform_fail_0_%=\n\t" + "BGE L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGE.N L_kyber_thumb2_rej_uniform_fail_0\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_0\n\t" #else - "BGE.N L_kyber_thumb2_rej_uniform_fail_0_%=\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" "SUBS %[len], %[len], #0x1\n\t" "ADD r9, r9, #0x2\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_fail_0:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_0:\n\t" #else - "L_kyber_thumb2_rej_uniform_fail_0_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_0_%=:\n\t" #endif "UBFX r7, r4, #12, #12\n\t" "CMP r7, r8\n\t" #if defined(__GNUC__) - "BGE L_kyber_thumb2_rej_uniform_fail_1_%=\n\t" + "BGE L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGE.N L_kyber_thumb2_rej_uniform_fail_1\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_1\n\t" #else - "BGE.N L_kyber_thumb2_rej_uniform_fail_1_%=\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" "SUBS %[len], %[len], #0x1\n\t" "ADD r9, r9, #0x2\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_fail_1:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_1:\n\t" #else - "L_kyber_thumb2_rej_uniform_fail_1_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_1_%=:\n\t" #endif "UBFX r7, r4, #24, #8\n\t" "BFI r7, r5, #8, #4\n\t" "CMP r7, r8\n\t" #if defined(__GNUC__) - "BGE L_kyber_thumb2_rej_uniform_fail_2_%=\n\t" + "BGE L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGE.N L_kyber_thumb2_rej_uniform_fail_2\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_2\n\t" #else - "BGE.N L_kyber_thumb2_rej_uniform_fail_2_%=\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" "SUBS %[len], %[len], #0x1\n\t" "ADD r9, r9, #0x2\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_fail_2:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_2:\n\t" #else - "L_kyber_thumb2_rej_uniform_fail_2_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_2_%=:\n\t" #endif "UBFX r7, r5, #4, #12\n\t" "CMP r7, r8\n\t" #if defined(__GNUC__) - "BGE L_kyber_thumb2_rej_uniform_fail_3_%=\n\t" + "BGE L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGE.N L_kyber_thumb2_rej_uniform_fail_3\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_3\n\t" #else - "BGE.N L_kyber_thumb2_rej_uniform_fail_3_%=\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" "SUBS %[len], %[len], #0x1\n\t" "ADD r9, r9, #0x2\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_fail_3:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_3:\n\t" #else - "L_kyber_thumb2_rej_uniform_fail_3_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_3_%=:\n\t" #endif "UBFX r7, r5, #16, #12\n\t" "CMP r7, r8\n\t" #if defined(__GNUC__) - "BGE L_kyber_thumb2_rej_uniform_fail_4_%=\n\t" + "BGE L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGE.N L_kyber_thumb2_rej_uniform_fail_4\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_4\n\t" #else - "BGE.N L_kyber_thumb2_rej_uniform_fail_4_%=\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" "SUBS %[len], %[len], #0x1\n\t" "ADD r9, r9, #0x2\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_fail_4:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_4:\n\t" #else - "L_kyber_thumb2_rej_uniform_fail_4_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_4_%=:\n\t" #endif "UBFX r7, r5, #28, #4\n\t" "BFI r7, r6, #4, #8\n\t" "CMP r7, r8\n\t" #if defined(__GNUC__) - "BGE L_kyber_thumb2_rej_uniform_fail_5_%=\n\t" + "BGE L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGE.N L_kyber_thumb2_rej_uniform_fail_5\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_5\n\t" #else - "BGE.N L_kyber_thumb2_rej_uniform_fail_5_%=\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" "SUBS %[len], %[len], #0x1\n\t" "ADD r9, r9, #0x2\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_fail_5:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_5:\n\t" #else - "L_kyber_thumb2_rej_uniform_fail_5_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_5_%=:\n\t" #endif "UBFX r7, r6, #8, #12\n\t" "CMP r7, r8\n\t" #if defined(__GNUC__) - "BGE L_kyber_thumb2_rej_uniform_fail_6_%=\n\t" + "BGE L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGE.N L_kyber_thumb2_rej_uniform_fail_6\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_6\n\t" #else - "BGE.N L_kyber_thumb2_rej_uniform_fail_6_%=\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" "SUBS %[len], %[len], #0x1\n\t" "ADD r9, r9, #0x2\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_fail_6:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_6:\n\t" #else - "L_kyber_thumb2_rej_uniform_fail_6_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_6_%=:\n\t" #endif "UBFX r7, r6, #20, #12\n\t" "CMP r7, r8\n\t" #if defined(__GNUC__) - "BGE L_kyber_thumb2_rej_uniform_fail_7_%=\n\t" + "BGE L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGE.N L_kyber_thumb2_rej_uniform_fail_7\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_7\n\t" #else - "BGE.N L_kyber_thumb2_rej_uniform_fail_7_%=\n\t" + "BGE.N L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" "SUBS %[len], %[len], #0x1\n\t" "ADD r9, r9, #0x2\n\t" #if defined(__GNUC__) - "BEQ L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_kyber_thumb2_rej_uniform_done\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t" #else - "BEQ.N L_kyber_thumb2_rej_uniform_done_%=\n\t" + "BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_fail_7:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_7:\n\t" #else - "L_kyber_thumb2_rej_uniform_fail_7_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_fail_7_%=:\n\t" #endif "SUBS %[rLen], %[rLen], #0xc\n\t" #if defined(__GNUC__) - "BGT L_kyber_thumb2_rej_uniform_loop_%=\n\t" + "BGT L_mlkem_thumb2_rej_uniform_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BGT.N L_kyber_thumb2_rej_uniform_loop\n\t" + "BGT.N L_mlkem_thumb2_rej_uniform_loop\n\t" #else - "BGT.N L_kyber_thumb2_rej_uniform_loop_%=\n\t" + "BGT.N L_mlkem_thumb2_rej_uniform_loop_%=\n\t" #endif "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_kyber_thumb2_rej_uniform_done:\n\t" + "L_mlkem_thumb2_rej_uniform_done:\n\t" #else - "L_kyber_thumb2_rej_uniform_done_%=:\n\t" + "L_mlkem_thumb2_rej_uniform_done_%=:\n\t" #endif "LSR r0, r9, #1\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen), - [L_kyber_thumb2_basemul_mont_zetas] "+r" (L_kyber_thumb2_basemul_mont_zetas_c) + [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : - : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "cc" -#else - : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen) - : [L_kyber_thumb2_basemul_mont_zetas] "r" (L_kyber_thumb2_basemul_mont_zetas) - : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)p; } -#endif /* WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_WC_MLKEM */ #endif /* WOLFSSL_ARMASM_THUMB2 */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S b/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S index ae81b119d..8e29d7e52 100644 --- a/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./poly1305/poly1305.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S + * ruby ./poly1305/poly1305.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S */ #ifdef HAVE_CONFIG_H diff --git a/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c b/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c index ac6ea4a1c..1bcf57b83 100644 --- a/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./poly1305/poly1305.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-poly1305-asm.c + * ruby ./poly1305/poly1305.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-poly1305-asm.c */ #ifdef HAVE_CONFIG_H @@ -47,9 +48,11 @@ #include #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void poly1305_blocks_thumb2_16(Poly1305* ctx_p, const byte* m_p, word32 len_p, int notLast_p) +void poly1305_blocks_thumb2_16(Poly1305* ctx_p, const byte* m_p, word32 len_p, + int notLast_p) #else -void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len, int notLast) +void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len, + int notLast) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -73,7 +76,7 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len, int not "STM lr, {%[ctx], %[m], %[len], %[notLast]}\n\t" /* Get h pointer */ "ADD lr, %[ctx], #0x10\n\t" - "LDM lr, {r4, r5, r6, r7, r8}\n\t" + "ldm lr, {r4, r5, r6, r7, r8}\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_poly1305_thumb2_16_loop:\n\t" @@ -195,7 +198,7 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len, int not "MOV r12, %[ctx]\n\t" "MLA r11, %[notLast], %[len], r11\n\t" #else - "LDM %[m], {%[ctx], %[m], %[len], %[notLast]}\n\t" + "ldm %[m], {r0, r1, r2, r3}\n\t" /* r[0] * h[0] */ "UMULL r10, r11, %[ctx], r4\n\t" /* r[1] * h[0] */ @@ -243,7 +246,7 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len, int not /* r[3] * h[4] */ "UMAAL r11, r12, %[notLast], r5\n\t" /* DONE */ - "LDM sp, {r4, r5, r6}\n\t" + "ldm sp, {r4, r5, r6}\n\t" #endif /* WOLFSSL_ARM_ARCH_7M */ /* r12 will be zero because r is masked. */ /* Load length */ @@ -288,9 +291,11 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len, int not "L_poly1305_thumb2_16_done_%=:\n\t" #endif "ADD sp, sp, #0x1c\n\t" - : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), [notLast] "+r" (notLast) + : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), + [notLast] "+r" (notLast) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "lr" ); } @@ -307,13 +312,15 @@ void poly1305_set_key(Poly1305* ctx, const byte* key) #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p; register const byte* key __asm__ ("r1") = (const byte*)key_p; - register word32* L_poly1305_thumb2_clamp_c __asm__ ("r2") = (word32*)&L_poly1305_thumb2_clamp; + register word32* L_poly1305_thumb2_clamp_c __asm__ ("r2") = + (word32*)&L_poly1305_thumb2_clamp; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( /* Load mask. */ "MOV r10, %[L_poly1305_thumb2_clamp]\n\t" - "LDM r10, {r6, r7, r8, r9}\n\t" + "ldm r10, {r6, r7, r8, r9}\n\t" /* Load and cache padding. */ "LDR r2, [%[key], #16]\n\t" "LDR r3, [%[key], #20]\n\t" @@ -342,16 +349,10 @@ void poly1305_set_key(Poly1305* ctx, const byte* key) "STM r10, {r5, r6, r7, r8, r9}\n\t" /* Zero leftover */ "STR r5, [%[ctx], #52]\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [key] "+r" (key), [L_poly1305_thumb2_clamp] "+r" (L_poly1305_thumb2_clamp_c) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" -#else - : [ctx] "+r" (ctx), [key] "+r" (key) - : [L_poly1305_thumb2_clamp] "r" (L_poly1305_thumb2_clamp) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -368,7 +369,7 @@ void poly1305_final(Poly1305* ctx, byte* mac) __asm__ __volatile__ ( "ADD r11, %[ctx], #0x10\n\t" - "LDM r11, {r2, r3, r4, r5, r6}\n\t" + "ldm r11, {r2, r3, r4, r5, r6}\n\t" /* Add 5 and check for h larger than p. */ "ADDS r7, r2, #0x5\n\t" "ADCS r7, r3, #0x0\n\t" @@ -386,7 +387,7 @@ void poly1305_final(Poly1305* ctx, byte* mac) "ADC r5, r5, #0x0\n\t" /* Add padding */ "ADD r11, %[ctx], #0x24\n\t" - "LDM r11, {r7, r8, r9, r10}\n\t" + "ldm r11, {r7, r8, r9, r10}\n\t" "ADDS r2, r2, r7\n\t" "ADCS r3, r3, r8\n\t" "ADCS r4, r4, r9\n\t" @@ -412,7 +413,8 @@ void poly1305_final(Poly1305* ctx, byte* mac) "STM r11, {r2, r3, r4, r5}\n\t" : [ctx] "+r" (ctx), [mac] "+r" (mac) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11" ); } diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm.S b/wolfcrypt/src/port/arm/thumb2-sha256-asm.S index 0cb34864f..7af2b5df7 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm.S @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha2/sha256.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm.S + * ruby ./sha2/sha256.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm.S */ #ifdef HAVE_CONFIG_H diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c index fee0d9d23..9804332fa 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha2/sha256.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm.c + * ruby ./sha2/sha256.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm.c */ #ifdef HAVE_CONFIG_H @@ -77,7 +78,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) register wc_Sha256* sha256 __asm__ ("r0") = (wc_Sha256*)sha256_p; register const byte* data __asm__ ("r1") = (const byte*)data_p; register word32 len __asm__ ("r2") = (word32)len_p; - register word32* L_SHA256_transform_len_k_c __asm__ ("r3") = (word32*)&L_SHA256_transform_len_k; + register word32* L_SHA256_transform_len_k_c __asm__ ("r3") = + (word32*)&L_SHA256_transform_len_k; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1460,16 +1463,11 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "BNE.W L_SHA256_transform_len_begin_%=\n\t" #endif "ADD sp, sp, #0xc0\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" -#else - : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len) - : [L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12" ); } diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm.S b/wolfcrypt/src/port/arm/thumb2-sha3-asm.S index 17c4d7d9c..50d645d6a 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha3-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm.S @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha3/sha3.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha3-asm.S + * ruby ./sha3/sha3.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha3-asm.S */ #ifdef HAVE_CONFIG_H diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c index 64763b9eb..0fa620401 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha3/sha3.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha3-asm.c + * ruby ./sha3/sha3.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha3-asm.c */ #ifdef HAVE_CONFIG_H @@ -69,7 +70,9 @@ void BlockSha3(word64* state) { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register word64* state __asm__ ("r0") = (word64*)state_p; - register word64* L_sha3_thumb2_rt_c __asm__ ("r1") = (word64*)&L_sha3_thumb2_rt; + register word64* L_sha3_thumb2_rt_c __asm__ ("r1") = + (word64*)&L_sha3_thumb2_rt; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1149,16 +1152,10 @@ void BlockSha3(word64* state) "BNE.W L_sha3_thumb2_begin_%=\n\t" #endif "ADD sp, sp, #0xcc\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG - : [state] "+r" (state), - [L_sha3_thumb2_rt] "+r" (L_sha3_thumb2_rt_c) + : [state] "+r" (state), [L_sha3_thumb2_rt] "+r" (L_sha3_thumb2_rt_c) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#else - : [state] "+r" (state) - : [L_sha3_thumb2_rt] "r" (L_sha3_thumb2_rt) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S index b3e257cc8..6602e6e51 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha2/sha512.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha512-asm.S + * ruby ./sha2/sha512.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha512-asm.S */ #ifdef HAVE_CONFIG_H diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c index 4968ab692..637f2894d 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c @@ -21,7 +21,8 @@ /* Generated using (from wolfssl): * cd ../scripts - * ruby ./sha2/sha512.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha512-asm.c + * ruby ./sha2/sha512.rb \ + * thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha512-asm.c */ #ifdef HAVE_CONFIG_H @@ -101,7 +102,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) register wc_Sha512* sha512 __asm__ ("r0") = (wc_Sha512*)sha512_p; register const byte* data __asm__ ("r1") = (const byte*)data_p; register word32 len __asm__ ("r2") = (word32)len_p; - register word64* L_SHA512_transform_len_k_c __asm__ ("r3") = (word64*)&L_SHA512_transform_len_k; + register word64* L_SHA512_transform_len_k_c __asm__ ("r3") = + (word64*)&L_SHA512_transform_len_k; + #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3575,16 +3578,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) #endif "EOR r0, r0, r0\n\t" "ADD sp, sp, #0xc0\n\t" -#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" -#else - : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) - : [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12" ); } diff --git a/wolfcrypt/src/sha3_asm.S b/wolfcrypt/src/sha3_asm.S index 6ca133a92..056d31c7c 100644 --- a/wolfcrypt/src/sha3_asm.S +++ b/wolfcrypt/src/sha3_asm.S @@ -9520,15 +9520,15 @@ L_sha3_block_avx2_start: #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_sha3_blocksx4_avx2 -.type kyber_sha3_blocksx4_avx2,@function +.globl sha3_blocksx4_avx2 +.type sha3_blocksx4_avx2,@function .align 16 -kyber_sha3_blocksx4_avx2: +sha3_blocksx4_avx2: #else .section __TEXT,__text -.globl _kyber_sha3_blocksx4_avx2 +.globl _sha3_blocksx4_avx2 .p2align 4 -_kyber_sha3_blocksx4_avx2: +_sha3_blocksx4_avx2: #endif /* __APPLE__ */ leaq L_sha3_x4_avx2_r(%rip), %rdx vmovdqu (%rdi), %ymm15 @@ -14874,7 +14874,7 @@ _kyber_sha3_blocksx4_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_sha3_blocksx4_avx2,.-kyber_sha3_blocksx4_avx2 +.size sha3_blocksx4_avx2,.-sha3_blocksx4_avx2 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ diff --git a/wolfcrypt/src/wc_kyber.c b/wolfcrypt/src/wc_kyber.c deleted file mode 100644 index 781b93c72..000000000 --- a/wolfcrypt/src/wc_kyber.c +++ /dev/null @@ -1,2107 +0,0 @@ -/* wc_kyber.c - * - * Copyright (C) 2006-2025 wolfSSL Inc. - * - * This file is part of wolfSSL. - * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA - */ - -/* Implementation based on FIPS 203: - * https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.203.pdf - * - * Original implementation based on NIST 3rd Round submission package. - * See link at: - * https://csrc.nist.gov/Projects/post-quantum-cryptography/post-quantum-cryptography-standardization/round-3-submissions - */ - -/* Possible Kyber options: - * - * WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM Default: OFF - * Uses less dynamic memory to perform key generation. - * Has a small performance trade-off. - * Only usable with C implementation. - * - * WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM Default: OFF - * Uses less dynamic memory to perform encapsulation. - * Affects decapsulation too as encapsulation called. - * Has a small performance trade-off. - * Only usable with C implementation. - * - * WOLFSSL_KYBER_NO_MAKE_KEY Default: OFF - * Disable the make key or key generation API. - * Reduces the code size. - * Turn on when only doing encapsulation. - * - * WOLFSSL_KYBER_NO_ENCAPSULATE Default: OFF - * Disable the encapsulation API. - * Reduces the code size. - * Turn on when doing make key/decapsulation. - * - * WOLFSSL_KYBER_NO_DECAPSULATE Default: OFF - * Disable the decapsulation API. - * Reduces the code size. - * Turn on when only doing encapsulation. - * - * WOLFSSL_MLKEM_CACHE_A Default: OFF - * Stores the matrix A during key generation for use in encapsulation when - * performing decapsulation. - * KyberKey is 8KB larger but decapsulation is significantly faster. - * Turn on when performing make key and decapsualtion with same object. - */ - -#ifdef HAVE_CONFIG_H - #include -#endif - -#include -#include -#include -#include -#include -#include - -#ifdef NO_INLINE - #include -#else - #define WOLFSSL_MISC_INCLUDED - #include -#endif - -#if defined(USE_INTEL_SPEEDUP) || \ - (defined(__aarch64__) && defined(WOLFSSL_ARMASM)) - #if defined(WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM) || \ - defined(WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM) - #error "Can't use small memory with assembly optimized code" - #endif -#endif -#if defined(WOLFSSL_MLKEM_CACHE_A) - #if defined(WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM) || \ - defined(WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM) - #error "Can't cache A with small memory code" - #endif -#endif - -#if defined(WOLFSSL_KYBER_NO_MAKE_KEY) && \ - defined(WOLFSSL_KYBER_NO_ENCAPSULATE) && \ - defined(WOLFSSL_KYBER_NO_DECAPSULATE) - #error "No ML-KEM operations to be built." -#endif - -#ifdef WOLFSSL_WC_KYBER - -/******************************************************************************/ - -/* Use SHA3-256 to generate 32-bytes of hash. */ -#define KYBER_HASH_H kyber_hash256 -/* Use SHA3-512 to generate 64-bytes of hash. */ -#define KYBER_HASH_G kyber_hash512 -/* Use SHAKE-256 as a key derivation function (KDF). */ -#if defined(USE_INTEL_SPEEDUP) || \ - (defined(WOLFSSL_ARMASM) && defined(__aarch64__)) - #define KYBER_KDF kyber_kdf -#else - #define KYBER_KDF wc_Shake256Hash -#endif - -/******************************************************************************/ - -/* Declare variable to make compiler not optimize code in kyber_from_msg(). */ -volatile sword16 kyber_opt_blocker = 0; - -/******************************************************************************/ - -/** - * Initialize the Kyber key. - * - * @param [in] type Type of key: - * WC_ML_KEM_512, WC_ML_KEM_768, WC_ML_KEM_1024, - * KYBER512, KYBER768, KYBER1024. - * @param [out] key Kyber key object to initialize. - * @param [in] heap Dynamic memory hint. - * @param [in] devId Device Id. - * @return 0 on success. - * @return BAD_FUNC_ARG when key is NULL or type is unrecognized. - * @return NOT_COMPILED_IN when key type is not supported. - */ -int wc_KyberKey_Init(int type, KyberKey* key, void* heap, int devId) -{ - int ret = 0; - - /* Validate key. */ - if (key == NULL) { - ret = BAD_FUNC_ARG; - } - if (ret == 0) { - /* Validate type. */ - switch (type) { - #ifndef WOLFSSL_NO_ML_KEM - case WC_ML_KEM_512: - #ifndef WOLFSSL_WC_ML_KEM_512 - /* Code not compiled in for Kyber-512. */ - ret = NOT_COMPILED_IN; - #endif - break; - case WC_ML_KEM_768: - #ifndef WOLFSSL_WC_ML_KEM_768 - /* Code not compiled in for Kyber-768. */ - ret = NOT_COMPILED_IN; - #endif - break; - case WC_ML_KEM_1024: - #ifndef WOLFSSL_WC_ML_KEM_1024 - /* Code not compiled in for Kyber-1024. */ - ret = NOT_COMPILED_IN; - #endif - break; - #endif - #ifdef WOLFSSL_KYBER_ORIGINAL - case KYBER512: - #ifndef WOLFSSL_KYBER512 - /* Code not compiled in for Kyber-512. */ - ret = NOT_COMPILED_IN; - #endif - break; - case KYBER768: - #ifndef WOLFSSL_KYBER768 - /* Code not compiled in for Kyber-768. */ - ret = NOT_COMPILED_IN; - #endif - break; - case KYBER1024: - #ifndef WOLFSSL_KYBER1024 - /* Code not compiled in for Kyber-1024. */ - ret = NOT_COMPILED_IN; - #endif - break; - #endif - default: - /* No other values supported. */ - ret = BAD_FUNC_ARG; - break; - } - } - if (ret == 0) { - /* Keep type for parameters. */ - key->type = type; - /* Cache heap pointer. */ - key->heap = heap; - #ifdef WOLF_CRYPTO_CB - /* Cache device id - not used in for this algorithm yet. */ - key->devId = devId; - #endif - key->flags = 0; - - /* Zero out all data. */ - XMEMSET(&key->prf, 0, sizeof(key->prf)); - - /* Initialize the hash algorithm object. */ - ret = kyber_hash_new(&key->hash, heap, devId); - } - if (ret == 0) { - /* Initialize the PRF algorithm object. */ - ret = kyber_prf_new(&key->prf, heap, devId); - } - if (ret == 0) { - kyber_init(); - } - - (void)devId; - - return ret; -} - -/** - * Free the Kyber key object. - * - * @param [in, out] key Kyber key object to dispose of. - */ -void wc_KyberKey_Free(KyberKey* key) -{ - if (key != NULL) { - /* Dispose of PRF object. */ - kyber_prf_free(&key->prf); - /* Dispose of hash object. */ - kyber_hash_free(&key->hash); - /* Ensure all private data is zeroed. */ - ForceZero(&key->hash, sizeof(key->hash)); - ForceZero(&key->prf, sizeof(key->prf)); - ForceZero(key->priv, sizeof(key->priv)); - ForceZero(key->z, sizeof(key->z)); - } -} - -/******************************************************************************/ - -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY -/** - * Make a Kyber key object using a random number generator. - * - * FIPS 203 - Algorithm 19: ML-KEM.KeyGen() - * Generates an encapsulation key and a corresponding decapsulation key. - * 1: d <- B_32 > d is 32 random bytes - * 2: z <- B_32 > z is 32 random bytes - * 3: if d == NULL or z == NULL then - * 4: return falsum - * > return an error indication if random bit generation failed - * 5: end if - * 6: (ek,dk) <- ML-KEM.KeyGen_Interal(d, z) - * > run internal key generation algorithm - * &: return (ek,dk) - * - * @param [in, out] key Kyber key object. - * @param [in] rng Random number generator. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or rng is NULL. - * @return MEMORY_E when dynamic memory allocation failed. - * @return MEMORY_E when dynamic memory allocation failed. - * @return RNG_FAILURE_E when generating random numbers failed. - * @return DRBG_CONT_FAILURE when random number generator health check fails. - */ -int wc_KyberKey_MakeKey(KyberKey* key, WC_RNG* rng) -{ - int ret = 0; - unsigned char rand[KYBER_MAKEKEY_RAND_SZ]; - - /* Validate parameters. */ - if ((key == NULL) || (rng == NULL)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - /* Generate random to use with PRFs. - * Step 1: d is 32 random bytes - * Step 2: z is 32 random bytes - */ - ret = wc_RNG_GenerateBlock(rng, rand, KYBER_SYM_SZ * 2); - /* Step 3: ret is not zero when d == NULL or z == NULL. */ - } - if (ret == 0) { - /* Make a key pair from the random. - * Step 6. run internal key generation algorithm - * Step 7. public and private key are stored in key - */ - ret = wc_KyberKey_MakeKeyWithRandom(key, rand, sizeof(rand)); - } - - /* Ensure seeds are zeroized. */ - ForceZero((void*)rand, (word32)sizeof(rand)); - - /* Step 4: return ret != 0 on falsum or internal key generation failure. */ - return ret; -} - -/** - * Make a Kyber key object using random data. - * - * FIPS 203 - Algorithm 16: ML-KEM.KeyGen_internal(d,z) - * Uses randomness to generate an encapsulation key and a corresponding - * decapsulation key. - * 1: (ek_PKE,dk_PKE) < K-PKE.KeyGen(d) > run key generation for K-PKE - * ... - * - * FIPS 203 - Algorithm 13: K-PKE.KeyGen(d) - * Uses randomness to generate an encryption key and a corresponding decryption - * key. - * 1: (rho,sigma) <- G(d||k)A - * > expand 32+1 bytes to two pseudorandom 32-byte seeds - * 2: N <- 0 - * 3-7: generate matrix A_hat - * 8-11: generate s - * 12-15: generate e - * 16-18: calculate t_hat from A_hat, s and e - * ... - * - * @param [in, out] key Kyber key ovject. - * @param [in] rand Random data. - * @param [in] len Length of random data in bytes. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or rand is NULL. - * @return BUFFER_E when length is not KYBER_MAKEKEY_RAND_SZ. - * @return NOT_COMPILED_IN when key type is not supported. - * @return MEMORY_E when dynamic memory allocation failed. - */ -int wc_KyberKey_MakeKeyWithRandom(KyberKey* key, const unsigned char* rand, - int len) -{ - byte buf[2 * KYBER_SYM_SZ + 1]; - byte* rho = buf; - byte* sigma = buf + KYBER_SYM_SZ; -#ifndef WOLFSSL_NO_MALLOC - sword16* e = NULL; -#else -#ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM -#ifndef WOLFSSL_MLKEM_CACHE_A - sword16 e[(KYBER_MAX_K + 1) * KYBER_MAX_K * KYBER_N]; -#else - sword16 e[KYBER_MAX_K * KYBER_N]; -#endif -#else - sword16 e[KYBER_MAX_K * KYBER_N]; -#endif -#endif -#ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM - sword16* a = NULL; -#endif - sword16* s = NULL; - sword16* t = NULL; - int ret = 0; - int k = 0; - - /* Validate parameters. */ - if ((key == NULL) || (rand == NULL)) { - ret = BAD_FUNC_ARG; - } - if ((ret == 0) && (len != KYBER_MAKEKEY_RAND_SZ)) { - ret = BUFFER_E; - } - - if (ret == 0) { - key->flags = 0; - - /* Establish parameters based on key type. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - k = WC_ML_KEM_512_K; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - k = WC_ML_KEM_768_K; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - k = WC_ML_KEM_1024_K; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - k = KYBER512_K; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - k = KYBER768_K; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - k = KYBER1024_K; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - -#ifndef WOLFSSL_NO_MALLOC - if (ret == 0) { - /* Allocate dynamic memory for matrix and error vector. */ -#ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM -#ifndef WOLFSSL_MLKEM_CACHE_A - /* e (v) | a (m) */ - e = (sword16*)XMALLOC((k + 1) * k * KYBER_N * sizeof(sword16), - key->heap, DYNAMIC_TYPE_TMP_BUFFER); -#else - /* e (v) */ - e = (sword16*)XMALLOC(k * KYBER_N * sizeof(sword16), - key->heap, DYNAMIC_TYPE_TMP_BUFFER); -#endif -#else - /* e (v) */ - e = (sword16*)XMALLOC(k * KYBER_N * sizeof(sword16), - key->heap, DYNAMIC_TYPE_TMP_BUFFER); -#endif - if (e == NULL) { - ret = MEMORY_E; - } - } -#endif - if (ret == 0) { - const byte* d = rand; - -#ifdef WOLFSSL_MLKEM_CACHE_A - a = key->a; -#elif !defined(WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM) - /* Matrix A allocated at end of error vector. */ - a = e + (k * KYBER_N); -#endif - -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - { - /* Expand 32 bytes of random to 32. */ - ret = KYBER_HASH_G(&key->hash, d, KYBER_SYM_SZ, NULL, 0, buf); - } -#endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - else -#endif -#ifndef WOLFSSL_NO_ML_KEM - { - buf[0] = k; - /* Expand 33 bytes of random to 32. - * Alg 13: Step 1: (rho,sigma) <- G(d||k) - */ - ret = KYBER_HASH_G(&key->hash, d, KYBER_SYM_SZ, buf, 1, buf); - } -#endif - } - if (ret == 0) { - const byte* z = rand + KYBER_SYM_SZ; - s = key->priv; - t = key->pub; - - /* Cache the public seed for use in encapsulation and encoding public - * key. */ - XMEMCPY(key->pubSeed, rho, KYBER_SYM_SZ); - /* Cache the z value for decapsulation and encoding private key. */ - XMEMCPY(key->z, z, sizeof(key->z)); - - /* Initialize PRF for use in noise generation. */ - kyber_prf_init(&key->prf); -#ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM - /* Generate noise using PRF. - * Alg 13: Steps 8-15: generate s and e - */ - ret = kyber_get_noise(&key->prf, k, s, e, NULL, sigma); - } - if (ret == 0) { - /* Generate the matrix A. - * Alg 13: Steps 3-7 - */ - ret = kyber_gen_matrix(&key->prf, a, k, rho, 0); - } - if (ret == 0) { - /* Generate key pair from random data. - * Alg 13: Steps 16-18. - */ - kyber_keygen(s, t, e, a, k); -#else - /* Generate noise using PRF. - * Alg 13: Steps 8-11: generate s - */ - ret = kyber_get_noise(&key->prf, k, s, NULL, NULL, sigma); - } - if (ret == 0) { - /* Generate key pair from private vector and seeds. - * Alg 13: Steps 3-7: generate matrix A_hat - * Alg 13: 12-15: generate e - * Alg 13: 16-18: calculate t_hat from A_hat, s and e - */ - ret = kyber_keygen_seeds(s, t, &key->prf, e, k, rho, sigma); - } - if (ret == 0) { -#endif - /* Private and public key are set/available. */ - key->flags |= KYBER_FLAG_PRIV_SET | KYBER_FLAG_PUB_SET; -#ifdef WOLFSSL_MLKEM_CACHE_A - key->flags |= KYBER_FLAG_A_SET; -#endif - } - -#ifndef WOLFSSL_NO_MALLOC - /* Free dynamic memory allocated in function. */ - if (key != NULL) { - XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER); - } -#endif - - return ret; -} -#endif /* !WOLFSSL_KYBER_NO_MAKE_KEY */ - -/******************************************************************************/ - -/** - * Get the size in bytes of cipher text for key. - * - * @param [in] key Kyber key object. - * @param [out] len Length of cipher text in bytes. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or len is NULL. - * @return NOT_COMPILED_IN when key type is not supported. - */ -int wc_KyberKey_CipherTextSize(KyberKey* key, word32* len) -{ - int ret = 0; - - /* Validate parameters. */ - if ((key == NULL) || (len == NULL)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - /* Return in 'len' size of the cipher text for the type of this key. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - *len = WC_ML_KEM_512_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - *len = WC_ML_KEM_768_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - *len = WC_ML_KEM_1024_CIPHER_TEXT_SIZE; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - *len = KYBER512_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - *len = KYBER768_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - *len = KYBER1024_CIPHER_TEXT_SIZE; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - - return ret; -} - -/** - * Size of a shared secret in bytes. Always KYBER_SS_SZ. - * - * @param [in] key Kyber key object. Not used. - * @param [out] Size of the shared secret created with a Kyber key. - * @return 0 on success. - * @return 0 to indicate success. - */ -int wc_KyberKey_SharedSecretSize(KyberKey* key, word32* len) -{ - (void)key; - - *len = KYBER_SS_SZ; - - return 0; -} - -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) -/* Encapsulate data and derive secret. - * - * FIPS 203, Algorithm 14: K-PKE.Encrypt(ek_PKE, m, r) - * Uses the encryption key to encrypt a plaintext message using the randomness - * r. - * 1: N <- 0 - * 2: t_hat <- ByteDecode_12(ek_PKE[0:384k]) - * > run ByteDecode_12 k times to decode t_hat - * 3: rho <- ek_PKE[384k : 384K + 32] - * > extract 32-byte seed from ek_PKE - * 4-8: generate matrix A_hat - * 9-12: generate y - * 13-16: generate e_1 - * 17: generate e_2 - * 18-19: calculate u - * 20: mu <- Decompress_1(ByteDecode_1(m)) - * 21: calculate v - * 22: c_1 <- ByteEncode_d_u(Compress_d_u(u)) - * > run ByteEncode_d_u and Compress_d_u k times - * 23: c_2 <- ByteEncode_d_v(Compress_d_v(v)) - * 24: return c <- (c_1||c_2) - * - * @param [in] key Kyber key object. - * @param [in] m Random bytes. - * @param [in] r Seed to feed to PRF when generating y, e1 and e2. - * @param [out] c Calculated cipher text. - * @return 0 on success. - * @return NOT_COMPILED_IN when key type is not supported. - */ -static int kyberkey_encapsulate(KyberKey* key, const byte* m, byte* r, byte* c) -{ - int ret = 0; - sword16* a = NULL; -#ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM - sword16* mu = NULL; - sword16* e1 = NULL; - sword16* e2 = NULL; -#endif - unsigned int k = 0; - unsigned int compVecSz = 0; -#ifndef WOLFSSL_NO_MALLOC - sword16* y = NULL; -#else -#ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM - sword16 y[((KYBER_MAX_K + 3) * KYBER_MAX_K + 3) * KYBER_N]; -#else - sword16 y[3 * KYBER_MAX_K * KYBER_N]; -#endif -#endif -#ifdef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM - sword16* u; - sword16* v; -#endif - - /* Establish parameters based on key type. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM -#ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - k = WC_ML_KEM_512_K; - compVecSz = WC_ML_KEM_512_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - k = WC_ML_KEM_768_K; - compVecSz = WC_ML_KEM_768_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - k = WC_ML_KEM_1024_K; - compVecSz = WC_ML_KEM_1024_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL -#ifdef WOLFSSL_KYBER512 - case KYBER512: - k = KYBER512_K; - compVecSz = KYBER512_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#ifdef WOLFSSL_KYBER768 - case KYBER768: - k = KYBER768_K; - compVecSz = KYBER768_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#ifdef WOLFSSL_KYBER1024 - case KYBER1024: - k = KYBER1024_K; - compVecSz = KYBER1024_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - -#ifndef WOLFSSL_NO_MALLOC - if (ret == 0) { - /* Allocate dynamic memory for all matrices, vectors and polynomials. */ -#ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM - y = (sword16*)XMALLOC(((k + 3) * k + 3) * KYBER_N * sizeof(sword16), - key->heap, DYNAMIC_TYPE_TMP_BUFFER); -#else - y = (sword16*)XMALLOC(3 * k * KYBER_N * sizeof(sword16), key->heap, - DYNAMIC_TYPE_TMP_BUFFER); -#endif - if (y == NULL) { - ret = MEMORY_E; - } - } -#endif - - if (ret == 0) { -#ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM - /* Assign allocated dynamic memory to pointers. - * y (b) | a (m) | mu (p) | e1 (p) | e2 (v) | u (v) | v (p) */ - a = y + KYBER_N * k; - mu = a + KYBER_N * k * k; - e1 = mu + KYBER_N; - e2 = e1 + KYBER_N * k; -#else - /* Assign allocated dynamic memory to pointers. - * y (v) | a (v) | u (v) */ - a = y + KYBER_N * k; -#endif - -#ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM - /* Convert msg to a polynomial. - * Step 20: mu <- Decompress_1(ByteDecode_1(m)) */ - kyber_from_msg(mu, m); - - /* Initialize the PRF for use in the noise generation. */ - kyber_prf_init(&key->prf); - /* Generate noise using PRF. - * Steps 9-17: generate y, e_1, e_2 - */ - ret = kyber_get_noise(&key->prf, k, y, e1, e2, r); - } -#ifdef WOLFSSL_MLKEM_CACHE_A - if ((ret == 0) && ((key->flags & KYBER_FLAG_A_SET) != 0)) { - unsigned int i; - /* Transpose matrix. - * Steps 4-8: generate matrix A_hat (from original) */ - for (i = 0; i < k; i++) { - unsigned int j; - for (j = 0; j < k; j++) { - XMEMCPY(&a[(i * k + j) * KYBER_N], - &key->a[(j * k + i) * KYBER_N], - KYBER_N * 2); - } - } - } - else -#endif - if (ret == 0) { - /* Generate the transposed matrix. - * Step 4-8: generate matrix A_hat */ - ret = kyber_gen_matrix(&key->prf, a, k, key->pubSeed, 1); - } - if (ret == 0) { - sword16* u; - sword16* v; - - /* Assign remaining allocated dynamic memory to pointers. - * y (v) | a (m) | mu (p) | e1 (p) | r2 (v) | u (v) | v (p)*/ - u = e2 + KYBER_N; - v = u + KYBER_N * k; - - /* Perform encapsulation maths. - * Steps 18-19, 21: calculate u and v */ - kyber_encapsulate(key->pub, u, v, a, y, e1, e2, mu, k); -#else - /* Initialize the PRF for use in the noise generation. */ - kyber_prf_init(&key->prf); - /* Generate noise using PRF. - * Steps 9-12: generate y */ - ret = kyber_get_noise(&key->prf, k, y, NULL, NULL, r); - } - if (ret == 0) { - /* Assign remaining allocated dynamic memory to pointers. - * y (v) | at (v) | u (v) */ - u = a + KYBER_N * k; - v = a; - - /* Perform encapsulation maths. - * Steps 13-17: generate e_1 and e_2 - * Steps 18-19, 21: calculate u and v */ - ret = kyber_encapsulate_seeds(key->pub, &key->prf, u, a, y, k, m, - key->pubSeed, r); - } - if (ret == 0) { -#endif - byte* c1 = c; - byte* c2 = c + compVecSz; - - #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) - if (k == KYBER512_K) { - /* Step 22: c_1 <- ByteEncode_d_u(Compress_d_u(u)) */ - kyber_vec_compress_10(c1, u, k); - /* Step 23: c_2 <- ByteEncode_d_v(Compress_d_v(v)) */ - kyber_compress_4(c2, v); - /* Step 24: return c <- (c_1||c_2) */ - } - #endif - #if defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) - if (k == KYBER768_K) { - /* Step 22: c_1 <- ByteEncode_d_u(Compress_d_u(u)) */ - kyber_vec_compress_10(c1, u, k); - /* Step 23: c_2 <- ByteEncode_d_v(Compress_d_v(v)) */ - kyber_compress_4(c2, v); - /* Step 24: return c <- (c_1||c_2) */ - } - #endif - #if defined(WOLFSSL_KYBER1024) || defined(WOLFSSL_WC_ML_KEM_1024) - if (k == KYBER1024_K) { - /* Step 22: c_1 <- ByteEncode_d_u(Compress_d_u(u)) */ - kyber_vec_compress_11(c1, u); - /* Step 23: c_2 <- ByteEncode_d_v(Compress_d_v(v)) */ - kyber_compress_5(c2, v); - /* Step 24: return c <- (c_1||c_2) */ - } - #endif - } - -#ifndef WOLFSSL_NO_MALLOC - /* Dispose of dynamic memory allocated in function. */ - XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER); -#endif - - return ret; -} -#endif - -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE -/** - * Encapsulate with random number generator and derive secret. - * - * FIPS 203, Algorithm 20: ML-KEM.Encaps(ek) - * Uses the encapsulation key to generate a shared secret key and an associated - * ciphertext. - * 1: m <- B_32 > m is 32 random bytes - * 2: if m == NULL then - * 3: return falsum - * 4: end if - * 5: (K,c) <- ML-KEM.Encaps_internal(ek,m) - * > run internal encapsulation algorithm - * 6: return (K,c) - * - * @param [in] key Kyber key object. - * @param [out] c Cipher text. - * @param [out] k Shared secret generated. - * @param [in] rng Random number generator. - * @return 0 on success. - * @return BAD_FUNC_ARG when key, ct, ss or RNG is NULL. - * @return NOT_COMPILED_IN when key type is not supported. - * @return MEMORY_E when dynamic memory allocation failed. - */ -int wc_KyberKey_Encapsulate(KyberKey* key, unsigned char* c, unsigned char* k, - WC_RNG* rng) -{ - int ret = 0; - unsigned char m[KYBER_ENC_RAND_SZ]; - - /* Validate parameters. */ - if ((key == NULL) || (c == NULL) || (k == NULL) || (rng == NULL)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - /* Generate seed for use with PRFs. - * Step 1: m is 32 random bytes - */ - ret = wc_RNG_GenerateBlock(rng, m, sizeof(m)); - /* Step 2: ret is not zero when m == NULL. */ - } - if (ret == 0) { - /* Encapsulate with the random. - * Step 5: run internal encapsulation algorithm - */ - ret = wc_KyberKey_EncapsulateWithRandom(key, c, k, m, sizeof(m)); - } - - /* Step 3: return ret != 0 on falsum or internal key generation failure. */ - return ret; -} - -/** - * Encapsulate with random data and derive secret. - * - * FIPS 203, Algorithm 17: ML-KEM.Encaps_internal(ek, m) - * Uses the encapsulation key and randomness to generate a key and an associated - * ciphertext. - * Step 1: (K,r) <- G(m||H(ek)) - * > derive shared secret key K and randomness r - * Step 2: c <- K-PPKE.Encrypt(ek, m, r) - * > encrypt m using K-PKE with randomness r - * Step 3: return (K,c) - * - * @param [out] c Cipher text. - * @param [out] k Shared secret generated. - * @param [in] m Random bytes. - * @param [in] len Length of random bytes. - * @return 0 on success. - * @return BAD_FUNC_ARG when key, c, k or RNG is NULL. - * @return BUFFER_E when len is not KYBER_ENC_RAND_SZ. - * @return NOT_COMPILED_IN when key type is not supported. - * @return MEMORY_E when dynamic memory allocation failed. - */ -int wc_KyberKey_EncapsulateWithRandom(KyberKey* key, unsigned char* c, - unsigned char* k, const unsigned char* m, int len) -{ -#ifdef WOLFSSL_KYBER_ORIGINAL - byte msg[KYBER_SYM_SZ]; -#endif - byte kr[2 * KYBER_SYM_SZ + 1]; - int ret = 0; -#ifdef WOLFSSL_KYBER_ORIGINAL - unsigned int cSz = 0; -#endif - - /* Validate parameters. */ - if ((key == NULL) || (c == NULL) || (k == NULL) || (m == NULL)) { - ret = BAD_FUNC_ARG; - } - if ((ret == 0) && (len != KYBER_ENC_RAND_SZ)) { - ret = BUFFER_E; - } - -#ifdef WOLFSSL_KYBER_ORIGINAL - if (ret == 0) { - /* Establish parameters based on key type. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - #endif - break; -#endif - #ifdef WOLFSSL_KYBER512 - case KYBER512: - cSz = KYBER512_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - cSz = KYBER768_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - cSz = KYBER1024_CIPHER_TEXT_SIZE; - break; - #endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } -#endif - - /* If public hash (h) is not stored against key, calculate it - * (fields set explicitly instead of using decode). - * Step 1: ... H(ek)... - */ - if ((ret == 0) && ((key->flags & KYBER_FLAG_H_SET) == 0)) { - #ifndef WOLFSSL_NO_MALLOC - byte* pubKey = NULL; - word32 pubKeyLen; - #else - byte pubKey[KYBER_MAX_PUBLIC_KEY_SIZE]; - word32 pubKeyLen = KYBER_MAX_PUBLIC_KEY_SIZE; - #endif - - #ifndef WOLFSSL_NO_MALLOC - /* Determine how big an encoded public key will be. */ - ret = wc_KyberKey_PublicKeySize(key, &pubKeyLen); - if (ret == 0) { - /* Allocate dynamic memory for encoded public key. */ - pubKey = (byte*)XMALLOC(pubKeyLen, key->heap, - DYNAMIC_TYPE_TMP_BUFFER); - if (pubKey == NULL) { - ret = MEMORY_E; - } - } - if (ret == 0) { - #endif - /* Encode public key - h is hash of encoded public key. */ - ret = wc_KyberKey_EncodePublicKey(key, pubKey, pubKeyLen); - #ifndef WOLFSSL_NO_MALLOC - } - /* Dispose of encoded public key. */ - XFREE(pubKey, key->heap, DYNAMIC_TYPE_TMP_BUFFER); - #endif - } - if ((ret == 0) && ((key->flags & KYBER_FLAG_H_SET) == 0)) { - /* Implementation issue if h not cached and flag set. */ - ret = BAD_STATE_E; - } - -#ifdef WOLFSSL_KYBER_ORIGINAL - if (ret == 0) { -#ifndef WOLFSSL_NO_ML_KEM - if (key->type & KYBER_ORIGINAL) -#endif - { - /* Hash random to anonymize as seed data. */ - ret = KYBER_HASH_H(&key->hash, m, KYBER_SYM_SZ, msg); - } - } -#endif - if (ret == 0) { - /* Hash message into seed buffer. */ -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - { - ret = KYBER_HASH_G(&key->hash, msg, KYBER_SYM_SZ, key->h, - KYBER_SYM_SZ, kr); - } -#endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - else -#endif -#ifndef WOLFSSL_NO_ML_KEM - { - /* Step 1: (K,r) <- G(m||H(ek)) */ - ret = KYBER_HASH_G(&key->hash, m, KYBER_SYM_SZ, key->h, - KYBER_SYM_SZ, kr); - } -#endif - } - - if (ret == 0) { - /* Encapsulate the message using the key and the seed. */ -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - { - ret = kyberkey_encapsulate(key, msg, kr + KYBER_SYM_SZ, c); - } -#endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - else -#endif -#ifndef WOLFSSL_NO_ML_KEM - { - /* Step 2: c <- K-PKE.Encrypt(ek,m,r) */ - ret = kyberkey_encapsulate(key, m, kr + KYBER_SYM_SZ, c); - } -#endif - } - -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - { - if (ret == 0) { - /* Hash the cipher text after the seed. */ - ret = KYBER_HASH_H(&key->hash, c, cSz, kr + KYBER_SYM_SZ); - } - if (ret == 0) { - /* Derive the secret from the seed and hash of cipher text. */ - ret = KYBER_KDF(kr, 2 * KYBER_SYM_SZ, k, KYBER_SS_SZ); - } - } -#endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - else -#endif -#ifndef WOLFSSL_NO_ML_KEM - { - if (ret == 0) { - /* return (K,c) */ - XMEMCPY(k, kr, KYBER_SS_SZ); - } - } -#endif - - return ret; -} -#endif /* !WOLFSSL_KYBER_NO_ENCAPSULATE */ - -/******************************************************************************/ - -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE -/* Decapsulate cipher text to the message using key. - * - * FIPS 203, Algorithm 15: K-PKE.Decrypt(dk_PKE,c) - * Uses the decryption key to decrypt a ciphertext. - * 1: c1 <- c[0 : 32.d_u.k] - * 2: c2 <= c[32.d_u.k : 32(d_u.k + d_v)] - * 3: u' <= Decompress_d_u(ByteDecode_d_u(c1)) - * 4: v' <= Decompress_d_v(ByteDecode_d_v(c2)) - * ... - * 6: w <- v' - InvNTT(s_hat_trans o NTT(u')) - * 7: m <- ByteEncode_1(Compress_1(w)) - * 8: return m - * - * @param [in] key Kyber key object. - * @param [out] m Message than was encapsulated. - * @param [in] c Cipher text. - * @return 0 on success. - * @return NOT_COMPILED_IN when key type is not supported. - * @return MEMORY_E when dynamic memory allocation failed. - */ -static KYBER_NOINLINE int kyberkey_decapsulate(KyberKey* key, byte* m, - const byte* c) -{ - int ret = 0; - sword16* v; - sword16* w; - unsigned int k = 0; - unsigned int compVecSz; -#if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) - sword16* u = NULL; -#else - sword16 u[(KYBER_MAX_K + 1) * KYBER_N]; -#endif - - /* Establish parameters based on key type. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM -#ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - k = WC_ML_KEM_512_K; - compVecSz = WC_ML_KEM_512_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - k = WC_ML_KEM_768_K; - compVecSz = WC_ML_KEM_768_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - k = WC_ML_KEM_1024_K; - compVecSz = WC_ML_KEM_1024_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL -#ifdef WOLFSSL_KYBER512 - case KYBER512: - k = KYBER512_K; - compVecSz = KYBER512_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#ifdef WOLFSSL_KYBER768 - case KYBER768: - k = KYBER768_K; - compVecSz = KYBER768_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#ifdef WOLFSSL_KYBER1024 - case KYBER1024: - k = KYBER1024_K; - compVecSz = KYBER1024_POLY_VEC_COMPRESSED_SZ; - break; -#endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - -#if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) - if (ret == 0) { - /* Allocate dynamic memory for a vector and a polynomial. */ - u = (sword16*)XMALLOC((k + 1) * KYBER_N * sizeof(sword16), key->heap, - DYNAMIC_TYPE_TMP_BUFFER); - if (u == NULL) { - ret = MEMORY_E; - } - } -#endif - if (ret == 0) { - /* Step 1: c1 <- c[0 : 32.d_u.k] */ - const byte* c1 = c; - /* Step 2: c2 <= c[32.d_u.k : 32(d_u.k + d_v)] */ - const byte* c2 = c + compVecSz; - - /* Assign allocated dynamic memory to pointers. - * u (v) | v (p) */ - v = u + k * KYBER_N; - w = u; - - #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) - if (k == KYBER512_K) { - /* Step 3: u' <= Decompress_d_u(ByteDecode_d_u(c1)) */ - kyber_vec_decompress_10(u, c1, k); - /* Step 4: v' <= Decompress_d_v(ByteDecode_d_v(c2)) */ - kyber_decompress_4(v, c2); - } - #endif - #if defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) - if (k == KYBER768_K) { - /* Step 3: u' <= Decompress_d_u(ByteDecode_d_u(c1)) */ - kyber_vec_decompress_10(u, c1, k); - /* Step 4: v' <= Decompress_d_v(ByteDecode_d_v(c2)) */ - kyber_decompress_4(v, c2); - } - #endif - #if defined(WOLFSSL_KYBER1024) || defined(WOLFSSL_WC_ML_KEM_1024) - if (k == KYBER1024_K) { - /* Step 3: u' <= Decompress_d_u(ByteDecode_d_u(c1)) */ - kyber_vec_decompress_11(u, c1); - /* Step 4: v' <= Decompress_d_v(ByteDecode_d_v(c2)) */ - kyber_decompress_5(v, c2); - } - #endif - - /* Decapsulate the cipher text into polynomial. - * Step 6: w <- v' - InvNTT(s_hat_trans o NTT(u')) */ - kyber_decapsulate(key->priv, w, u, v, k); - - /* Convert the polynomial into a array of bytes (message). - * Step 7: m <- ByteEncode_1(Compress_1(w)) */ - kyber_to_msg(m, w); - /* Step 8: return m */ - } - -#if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) - /* Dispose of dynamically memory allocated in function. */ - XFREE(u, key->heap, DYNAMIC_TYPE_TMP_BUFFER); -#endif - - return ret; -} - -#ifndef WOLFSSL_NO_ML_KEM -/* Derive the secret from z and cipher text. - * - * @param [in] z Implicit rejection value. - * @param [in] ct Cipher text. - * @param [in] ctSz Length of cipher text in bytes. - * @param [out] ss Shared secret. - * @return 0 on success. - * @return MEMORY_E when dynamic memory allocation failed. - * @return Other negative when a hash error occurred. - */ -static int kyber_derive_secret(const byte* z, const byte* ct, word32 ctSz, - byte* ss) -{ - int ret; - wc_Shake shake; - - ret = wc_InitShake256(&shake, NULL, INVALID_DEVID); - if (ret == 0) { - ret = wc_Shake256_Update(&shake, z, KYBER_SYM_SZ); - if (ret == 0) { - ret = wc_Shake256_Update(&shake, ct, ctSz); - } - if (ret == 0) { - ret = wc_Shake256_Final(&shake, ss, KYBER_SS_SZ); - } - wc_Shake256_Free(&shake); - } - - return ret; -} -#endif - -/** - * Decapsulate the cipher text to calculate the shared secret. - * - * Validates the cipher text by encapsulating and comparing with data passed in. - * - * FIPS 203, Algorithm 21: ML-KEM.Decaps(dk, c) - * Uses the decapsulation key to produce a shared secret key from a ciphertext. - * 1: K' <- ML-KEM.Decaps_internal(dk,c) - * > run internal decapsulation algorithm - * 2: return K' - * - * FIPS 203, Algorithm 18: ML-KEM.Decaps_internal(dk, c) - * Uses the decapsulation key to produce a shared secret key from a ciphertext. - * ... - * 1: dk_PKE <- dk[0 : 384k] - * > extract (from KEM decaps key) the PKE decryption key - * 2: ek_PKE <- dk[384k : 768l + 32] - * > extract PKE encryption key - * 3: h <- dk[768K + 32 : 768k + 64] - * > extract hash of PKE encryption key - * 4: z <- dk[768K + 64 : 768k + 96] - * > extract implicit rejection value - * 5: m' <- K-PKE.Decrypt(dk_PKE, c) > decrypt ciphertext - * 6: (K', r') <- G(m'||h) - * 7: K_bar <- J(z||c) - * 8: c' <- K-PKE.Encrypt(ek_PKE, m', r') - * > re-encrypt using the derived randomness r' - * 9: if c != c' then - * 10: K' <= K_bar - * > if ciphertexts do not match, "implicitly reject" - * 11: end if - * 12: return K' - * - * @param [in] key Kyber key object. - * @param [out] ss Shared secret. - * @param [in] ct Cipher text. - * @param [in] len Length of cipher text. - * @return 0 on success. - * @return BAD_FUNC_ARG when key, ss or cr are NULL. - * @return NOT_COMPILED_IN when key type is not supported. - * @return BUFFER_E when len is not the length of cipher text for the key type. - * @return MEMORY_E when dynamic memory allocation failed. - */ -int wc_KyberKey_Decapsulate(KyberKey* key, unsigned char* ss, - const unsigned char* ct, word32 len) -{ - byte msg[KYBER_SYM_SZ]; - byte kr[2 * KYBER_SYM_SZ + 1]; - int ret = 0; - unsigned int ctSz = 0; - unsigned int i = 0; - int fail = 0; -#if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) - byte* cmp = NULL; -#else - byte cmp[KYBER_MAX_CIPHER_TEXT_SIZE]; -#endif - - /* Validate parameters. */ - if ((key == NULL) || (ss == NULL) || (ct == NULL)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - /* Establish cipher text size based on key type. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - ctSz = WC_ML_KEM_512_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - ctSz = WC_ML_KEM_768_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - ctSz = WC_ML_KEM_1024_CIPHER_TEXT_SIZE; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - ctSz = KYBER512_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - ctSz = KYBER768_CIPHER_TEXT_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - ctSz = KYBER1024_CIPHER_TEXT_SIZE; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - - /* Ensure the cipher text passed in is the correct size. */ - if ((ret == 0) && (len != ctSz)) { - ret = BUFFER_E; - } - -#if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) - if (ret == 0) { - /* Allocate memory for cipher text that is generated. */ - cmp = (byte*)XMALLOC(ctSz, key->heap, DYNAMIC_TYPE_TMP_BUFFER); - if (cmp == NULL) { - ret = MEMORY_E; - } - } -#endif - - if (ret == 0) { - /* Decapsulate the cipher text. */ - ret = kyberkey_decapsulate(key, msg, ct); - } - if (ret == 0) { - /* Hash message into seed buffer. */ - ret = KYBER_HASH_G(&key->hash, msg, KYBER_SYM_SZ, key->h, KYBER_SYM_SZ, - kr); - } - if (ret == 0) { - /* Encapsulate the message. */ - ret = kyberkey_encapsulate(key, msg, kr + KYBER_SYM_SZ, cmp); - } - if (ret == 0) { - /* Compare generated cipher text with that passed in. */ - fail = kyber_cmp(ct, cmp, ctSz); - -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - { - /* Hash the cipher text after the seed. */ - ret = KYBER_HASH_H(&key->hash, ct, ctSz, kr + KYBER_SYM_SZ); - if (ret == 0) { - /* Change seed to z on comparison failure. */ - for (i = 0; i < KYBER_SYM_SZ; i++) { - kr[i] ^= (kr[i] ^ key->z[i]) & fail; - } - - /* Derive the secret from the seed and hash of cipher text. */ - ret = KYBER_KDF(kr, 2 * KYBER_SYM_SZ, ss, KYBER_SS_SZ); - } - } -#endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - else -#endif -#ifndef WOLFSSL_NO_ML_KEM - { - ret = kyber_derive_secret(key->z, ct, ctSz, msg); - if (ret == 0) { - /* Set secret to kr or fake secret on comparison failure. */ - for (i = 0; i < KYBER_SYM_SZ; i++) { - ss[i] = kr[i] ^ ((kr[i] ^ msg[i]) & fail); - } - } - } -#endif - } - -#if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) - /* Dispose of dynamic memory allocated in function. */ - if (key != NULL) { - XFREE(cmp, key->heap, DYNAMIC_TYPE_TMP_BUFFER); - } -#endif - - return ret; -} -#endif /* WOLFSSL_KYBER_NO_DECAPSULATE */ - -/******************************************************************************/ - -/** - * Get the public key and public seed from bytes. - * - * FIPS 203, Algorithm 14 K-PKE.Encrypt(ek_PKE, m, r) - * ... - * 2: t <- ByteDecode_12(ek_PKE[0 : 384k]) - * 3: rho <- ek_PKE[384k : 384k + 32] - * ... - * - * @param [out] pub Public key - vector. - * @param [out] pubSeed Public seed. - * @param [in] p Public key data. - * @param [in] k Number of polynomials in vector. - */ -static void kyberkey_decode_public(sword16* pub, byte* pubSeed, const byte* p, - unsigned int k) -{ - unsigned int i; - - /* Decode public key that is vector of polynomials. - * Step 2: t <- ByteDecode_12(ek_PKE[0 : 384k]) */ - kyber_from_bytes(pub, p, k); - p += k * KYBER_POLY_SIZE; - - /* Read public key seed. - * Step 3: rho <- ek_PKE[384k : 384k + 32] */ - for (i = 0; i < KYBER_SYM_SZ; i++) { - pubSeed[i] = p[i]; - } -} - -/** - * Decode the private key. - * - * Private Vector | Public Key | Public Hash | Randomizer - * - * FIPS 203, Algorithm 18: ML-KEM.Decaps_internal(dk, c) - * 1: dk_PKE <- dk[0 : 384k] - * > extract (from KEM decaps key) the PKE decryption key - * 2: ek_PKE <- dk[384k : 768l + 32] - * > extract PKE encryption key - * 3: h <- dk[768K + 32 : 768k + 64] - * > extract hash of PKE encryption key - * 4: z <- dk[768K + 64 : 768k + 96] - * > extract implicit rejection value - * - * FIPS 203, Algorithm 15: K-PKE.Decrypt(dk_PKE, c) - * ... - * 5: s_hat <= ByteDecode_12(dk_PKE) - * ... - * - * @param [in, out] key Kyber key object. - * @param [in] in Buffer holding encoded key. - * @param [in] len Length of data in buffer. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or in is NULL. - * @return NOT_COMPILED_IN when key type is not supported. - * @return BUFFER_E when len is not the correct size. - */ -int wc_KyberKey_DecodePrivateKey(KyberKey* key, const unsigned char* in, - word32 len) -{ - int ret = 0; - word32 privLen = 0; - word32 pubLen = 0; - unsigned int k = 0; - const unsigned char* p = in; - - /* Validate parameters. */ - if ((key == NULL) || (in == NULL)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - /* Establish parameters based on key type. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - k = WC_ML_KEM_512_K; - privLen = WC_ML_KEM_512_PRIVATE_KEY_SIZE; - pubLen = WC_ML_KEM_512_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - k = WC_ML_KEM_768_K; - privLen = WC_ML_KEM_768_PRIVATE_KEY_SIZE; - pubLen = WC_ML_KEM_768_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - k = WC_ML_KEM_1024_K; - privLen = WC_ML_KEM_1024_PRIVATE_KEY_SIZE; - pubLen = WC_ML_KEM_1024_PUBLIC_KEY_SIZE; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - k = KYBER512_K; - privLen = KYBER512_PRIVATE_KEY_SIZE; - pubLen = KYBER512_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - k = KYBER768_K; - privLen = KYBER768_PRIVATE_KEY_SIZE; - pubLen = KYBER768_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - k = KYBER1024_K; - privLen = KYBER1024_PRIVATE_KEY_SIZE; - pubLen = KYBER1024_PUBLIC_KEY_SIZE; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - /* Ensure the data is the correct length for the key type. */ - if ((ret == 0) && (len != privLen)) { - ret = BUFFER_E; - } - - if (ret == 0) { - /* Decode private key that is vector of polynomials. - * Alg 18 Step 1: dk_PKE <- dk[0 : 384k] - * Alg 15 Step 5: s_hat <- ByteDecode_12(dk_PKE) */ - kyber_from_bytes(key->priv, p, k); - p += k * KYBER_POLY_SIZE; - - /* Decode the public key that is after the private key. */ - kyberkey_decode_public(key->pub, key->pubSeed, p, k); - p += pubLen; - - /* Copy the hash of the encoded public key that is after public key. */ - XMEMCPY(key->h, p, sizeof(key->h)); - p += KYBER_SYM_SZ; - /* Copy the z (randomizer) that is after hash. */ - XMEMCPY(key->z, p, sizeof(key->z)); - - /* Set flags */ - key->flags |= KYBER_FLAG_H_SET | KYBER_FLAG_BOTH_SET; - } - - return ret; -} - -/** - * Decode public key. - * - * Public vector | Public Seed - * - * @param [in, out] key Kyber key object. - * @param [in] in Buffer holding encoded key. - * @param [in] len Length of data in buffer. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or in is NULL. - * @return NOT_COMPILED_IN when key type is not supported. - * @return BUFFER_E when len is not the correct size. - */ -int wc_KyberKey_DecodePublicKey(KyberKey* key, const unsigned char* in, - word32 len) -{ - int ret = 0; - word32 pubLen = 0; - unsigned int k = 0; - const unsigned char* p = in; - - if ((key == NULL) || (in == NULL)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - /* Establish parameters based on key type. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - k = WC_ML_KEM_512_K; - pubLen = WC_ML_KEM_512_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - k = WC_ML_KEM_768_K; - pubLen = WC_ML_KEM_768_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - k = WC_ML_KEM_1024_K; - pubLen = WC_ML_KEM_1024_PUBLIC_KEY_SIZE; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - k = KYBER512_K; - pubLen = KYBER512_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - k = KYBER768_K; - pubLen = KYBER768_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - k = KYBER1024_K; - pubLen = KYBER1024_PUBLIC_KEY_SIZE; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - /* Ensure the data is the correct length for the key type. */ - if ((ret == 0) && (len != pubLen)) { - ret = BUFFER_E; - } - - if (ret == 0) { - kyberkey_decode_public(key->pub, key->pubSeed, p, k); - - /* Calculate public hash. */ - ret = KYBER_HASH_H(&key->hash, in, len, key->h); - } - if (ret == 0) { - /* Record public key and public hash set. */ - key->flags |= KYBER_FLAG_PUB_SET | KYBER_FLAG_H_SET; - } - - return ret; -} - -/** - * Get the size in bytes of encoded private key for the key. - * - * @param [in] key Kyber key object. - * @param [out] len Length of encoded private key in bytes. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or len is NULL. - * @return NOT_COMPILED_IN when key type is not supported. - */ -int wc_KyberKey_PrivateKeySize(KyberKey* key, word32* len) -{ - int ret = 0; - - /* Validate parameters. */ - if ((key == NULL) || (len == NULL)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - /* Return in 'len' size of the encoded private key for the type of this - * key. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - *len = WC_ML_KEM_512_PRIVATE_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - *len = WC_ML_KEM_768_PRIVATE_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - *len = WC_ML_KEM_1024_PRIVATE_KEY_SIZE; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - *len = KYBER512_PRIVATE_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - *len = KYBER768_PRIVATE_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - *len = KYBER1024_PRIVATE_KEY_SIZE; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - - return ret; -} - -/** - * Get the size in bytes of encoded public key for the key. - * - * @param [in] key Kyber key object. - * @param [out] len Length of encoded public key in bytes. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or len is NULL. - * @return NOT_COMPILED_IN when key type is not supported. - */ -int wc_KyberKey_PublicKeySize(KyberKey* key, word32* len) -{ - int ret = 0; - - /* Validate parameters. */ - if ((key == NULL) || (len == NULL)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - /* Return in 'len' size of the encoded public key for the type of this - * key. */ - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - *len = WC_ML_KEM_512_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - *len = WC_ML_KEM_768_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - *len = WC_ML_KEM_1024_PUBLIC_KEY_SIZE; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - *len = KYBER512_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - *len = KYBER768_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - *len = KYBER1024_PUBLIC_KEY_SIZE; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - - return ret; -} - -/** - * Encode the private key. - * - * Private Vector | Public Key | Public Hash | Randomizer - * - * FIPS 203, Algorithm 16: ML-KEM.KeyGen_internal(d,z) - * ... - * 3: dk <- (dk_PKE||ek||H(ek)||z) - * ... - * FIPS 203, Algorithm 13: K-PKE.KeyGen(d) - * ... - * 20: dk_PKE <- ByteEncode_12(s_hat) - * ... - * - * @param [in] key Kyber key object. - * @param [out] out Buffer to hold data. - * @param [in] len Size of buffer in bytes. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or out is NULL or private/public key not - * available. - * @return NOT_COMPILED_IN when key type is not supported. - */ -int wc_KyberKey_EncodePrivateKey(KyberKey* key, unsigned char* out, word32 len) -{ - int ret = 0; - unsigned int k = 0; - unsigned int pubLen = 0; - unsigned int privLen = 0; - unsigned char* p = out; - - if ((key == NULL) || (out == NULL)) { - ret = BAD_FUNC_ARG; - } - if ((ret == 0) && - ((key->flags & KYBER_FLAG_BOTH_SET) != KYBER_FLAG_BOTH_SET)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - k = WC_ML_KEM_512_K; - pubLen = WC_ML_KEM_512_PUBLIC_KEY_SIZE; - privLen = WC_ML_KEM_512_PRIVATE_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - k = WC_ML_KEM_768_K; - pubLen = WC_ML_KEM_768_PUBLIC_KEY_SIZE; - privLen = WC_ML_KEM_768_PRIVATE_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - k = WC_ML_KEM_1024_K; - pubLen = WC_ML_KEM_1024_PUBLIC_KEY_SIZE; - privLen = WC_ML_KEM_1024_PRIVATE_KEY_SIZE; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - k = KYBER512_K; - pubLen = KYBER512_PUBLIC_KEY_SIZE; - privLen = KYBER512_PRIVATE_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - k = KYBER768_K; - pubLen = KYBER768_PUBLIC_KEY_SIZE; - privLen = KYBER768_PRIVATE_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - k = KYBER1024_K; - pubLen = KYBER1024_PUBLIC_KEY_SIZE; - privLen = KYBER1024_PRIVATE_KEY_SIZE; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - /* Check buffer is big enough for encoding. */ - if ((ret == 0) && (len != privLen)) { - ret = BUFFER_E; - } - - if (ret == 0) { - /* Encode private key that is vector of polynomials. */ - kyber_to_bytes(p, key->priv, k); - p += KYBER_POLY_SIZE * k; - - /* Encode public key. */ - ret = wc_KyberKey_EncodePublicKey(key, p, pubLen); - p += pubLen; - } - /* Ensure hash of public key is available. */ - if ((ret == 0) && ((key->flags & KYBER_FLAG_H_SET) == 0)) { - ret = KYBER_HASH_H(&key->hash, p - pubLen, pubLen, key->h); - } - if (ret == 0) { - /* Public hash is available. */ - key->flags |= KYBER_FLAG_H_SET; - /* Append public hash. */ - XMEMCPY(p, key->h, sizeof(key->h)); - p += KYBER_SYM_SZ; - /* Append z (randomizer). */ - XMEMCPY(p, key->z, sizeof(key->z)); - } - - return ret; -} - -/** - * Encode the public key. - * - * Public vector | Public Seed - * - * FIPS 203, Algorithm 16: ML-KEM.KeyGen_internal(d,z) - * ... - * 2: ek <- ek_PKE - * ... - * FIPS 203, Algorithm 13: K-PKE.KeyGen(d) - * ... - * 19: ek_PKE <- ByteEncode_12(t_hat)||rho - * ... - * - * @param [in] key Kyber key object. - * @param [out] out Buffer to hold data. - * @param [in] len Size of buffer in bytes. - * @return 0 on success. - * @return BAD_FUNC_ARG when key or out is NULL or public key not available. - * @return NOT_COMPILED_IN when key type is not supported. - */ -int wc_KyberKey_EncodePublicKey(KyberKey* key, unsigned char* out, word32 len) -{ - int ret = 0; - unsigned int k = 0; - unsigned int pubLen = 0; - unsigned char* p = out; - - if ((key == NULL) || (out == NULL)) { - ret = BAD_FUNC_ARG; - } - if ((ret == 0) && - ((key->flags & KYBER_FLAG_PUB_SET) != KYBER_FLAG_PUB_SET)) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) { - switch (key->type) { -#ifndef WOLFSSL_NO_ML_KEM - #ifdef WOLFSSL_WC_ML_KEM_512 - case WC_ML_KEM_512: - k = WC_ML_KEM_512_K; - pubLen = WC_ML_KEM_512_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_768 - case WC_ML_KEM_768: - k = WC_ML_KEM_768_K; - pubLen = WC_ML_KEM_768_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_WC_ML_KEM_1024 - case WC_ML_KEM_1024: - k = WC_ML_KEM_1024_K; - pubLen = WC_ML_KEM_1024_PUBLIC_KEY_SIZE; - break; - #endif -#endif -#ifdef WOLFSSL_KYBER_ORIGINAL - #ifdef WOLFSSL_KYBER512 - case KYBER512: - k = KYBER512_K; - pubLen = KYBER512_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER768 - case KYBER768: - k = KYBER768_K; - pubLen = KYBER768_PUBLIC_KEY_SIZE; - break; - #endif - #ifdef WOLFSSL_KYBER1024 - case KYBER1024: - k = KYBER1024_K; - pubLen = KYBER1024_PUBLIC_KEY_SIZE; - break; - #endif -#endif - default: - /* No other values supported. */ - ret = NOT_COMPILED_IN; - break; - } - } - /* Check buffer is big enough for encoding. */ - if ((ret == 0) && (len != pubLen)) { - ret = BUFFER_E; - } - - if (ret == 0) { - int i; - - /* Encode public key polynomial by polynomial. */ - kyber_to_bytes(p, key->pub, k); - p += k * KYBER_POLY_SIZE; - - /* Append public seed. */ - for (i = 0; i < KYBER_SYM_SZ; i++) { - p[i] = key->pubSeed[i]; - } - - /* Make sure public hash is set. */ - if ((key->flags & KYBER_FLAG_H_SET) == 0) { - ret = KYBER_HASH_H(&key->hash, out, len, key->h); - } - } - if (ret == 0) { - /* Public hash is set. */ - key->flags |= KYBER_FLAG_H_SET; - } - - return ret; -} - -#endif /* WOLFSSL_WC_KYBER */ diff --git a/wolfcrypt/src/wc_mlkem.c b/wolfcrypt/src/wc_mlkem.c index e8f1580fa..fce8b5638 100644 --- a/wolfcrypt/src/wc_mlkem.c +++ b/wolfcrypt/src/wc_mlkem.c @@ -24,7 +24,8 @@ * * Original implementation based on NIST 3rd Round submission package. * See link at: - * https://csrc.nist.gov/Projects/post-quantum-cryptography/post-quantum-cryptography-standardization/round-3-submissions + * https://csrc.nist.gov/Projects/post-quantum-cryptography/ + * post-quantum-cryptography-standardization/round-3-submissions */ /* Possible Kyber options: @@ -40,17 +41,17 @@ * Has a small performance trade-off. * Only usable with C implementation. * - * WOLFSSL_KYBER_NO_MAKE_KEY Default: OFF + * WOLFSSL_MLKEM_NO_MAKE_KEY Default: OFF * Disable the make key or key generation API. * Reduces the code size. * Turn on when only doing encapsulation. * - * WOLFSSL_KYBER_NO_ENCAPSULATE Default: OFF + * WOLFSSL_MLKEM_NO_ENCAPSULATE Default: OFF * Disable the encapsulation API. * Reduces the code size. * Turn on when doing make key/decapsulation. * - * WOLFSSL_KYBER_NO_DECAPSULATE Default: OFF + * WOLFSSL_MLKEM_NO_DECAPSULATE Default: OFF * Disable the decapsulation API. * Reduces the code size. * Turn on when only doing encapsulation. @@ -94,9 +95,9 @@ #endif #endif -#if defined(WOLFSSL_KYBER_NO_MAKE_KEY) && \ - defined(WOLFSSL_KYBER_NO_ENCAPSULATE) && \ - defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if defined(WOLFSSL_MLKEM_NO_MAKE_KEY) && \ + defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) && \ + defined(WOLFSSL_MLKEM_NO_DECAPSULATE) #error "No ML-KEM operations to be built." #endif @@ -105,21 +106,21 @@ /******************************************************************************/ /* Use SHA3-256 to generate 32-bytes of hash. */ -#define KYBER_HASH_H kyber_hash256 +#define MLKEM_HASH_H mlkem_hash256 /* Use SHA3-512 to generate 64-bytes of hash. */ -#define KYBER_HASH_G kyber_hash512 +#define MLKEM_HASH_G mlkem_hash512 /* Use SHAKE-256 as a key derivation function (KDF). */ #if defined(USE_INTEL_SPEEDUP) || \ (defined(WOLFSSL_ARMASM) && defined(__aarch64__)) - #define KYBER_KDF kyber_kdf + #define MLKEM_KDF mlkem_kdf #else - #define KYBER_KDF wc_Shake256Hash + #define MLKEM_KDF wc_Shake256Hash #endif /******************************************************************************/ -/* Declare variable to make compiler not optimize code in kyber_from_msg(). */ -volatile sword16 kyber_opt_blocker = 0; +/* Declare variable to make compiler not optimize code in mlkem_from_msg(). */ +volatile sword16 mlkem_opt_blocker = 0; /******************************************************************************/ @@ -167,7 +168,7 @@ int wc_MlKemKey_Init(MlKemKey* key, int type, void* heap, int devId) #endif break; #endif - #ifdef WOLFSSL_KYBER_ORIGINAL + #ifdef WOLFSSL_MLKEM_KYBER case KYBER512: #ifndef WOLFSSL_KYBER512 /* Code not compiled in for Kyber-512. */ @@ -208,14 +209,14 @@ int wc_MlKemKey_Init(MlKemKey* key, int type, void* heap, int devId) XMEMSET(&key->prf, 0, sizeof(key->prf)); /* Initialize the hash algorithm object. */ - ret = kyber_hash_new(&key->hash, heap, devId); + ret = mlkem_hash_new(&key->hash, heap, devId); } if (ret == 0) { /* Initialize the PRF algorithm object. */ - ret = kyber_prf_new(&key->prf, heap, devId); + ret = mlkem_prf_new(&key->prf, heap, devId); } if (ret == 0) { - kyber_init(); + mlkem_init(); } (void)devId; @@ -233,22 +234,22 @@ int wc_MlKemKey_Free(MlKemKey* key) { if (key != NULL) { /* Dispose of PRF object. */ - kyber_prf_free(&key->prf); + mlkem_prf_free(&key->prf); /* Dispose of hash object. */ - kyber_hash_free(&key->hash); + mlkem_hash_free(&key->hash); /* Ensure all private data is zeroed. */ ForceZero(&key->hash, sizeof(key->hash)); ForceZero(&key->prf, sizeof(key->prf)); ForceZero(key->priv, sizeof(key->priv)); ForceZero(key->z, sizeof(key->z)); } - + return 0; } /******************************************************************************/ -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY /** * Make a Kyber key object using a random number generator. * @@ -276,7 +277,7 @@ int wc_MlKemKey_Free(MlKemKey* key) int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng) { int ret = 0; - unsigned char rand[KYBER_MAKEKEY_RAND_SZ]; + unsigned char rand[WC_ML_KEM_MAKEKEY_RAND_SZ]; /* Validate parameters. */ if ((key == NULL) || (rng == NULL)) { @@ -288,7 +289,7 @@ int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng) * Step 1: d is 32 random bytes * Step 2: z is 32 random bytes */ - ret = wc_RNG_GenerateBlock(rng, rand, KYBER_SYM_SZ * 2); + ret = wc_RNG_GenerateBlock(rng, rand, WC_ML_KEM_SYM_SZ * 2); /* Step 3: ret is not zero when d == NULL or z == NULL. */ } if (ret == 0) { @@ -332,27 +333,27 @@ int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng) * @param [in] len Length of random data in bytes. * @return 0 on success. * @return BAD_FUNC_ARG when key or rand is NULL. - * @return BUFFER_E when length is not KYBER_MAKEKEY_RAND_SZ. + * @return BUFFER_E when length is not WC_ML_KEM_MAKEKEY_RAND_SZ. * @return NOT_COMPILED_IN when key type is not supported. * @return MEMORY_E when dynamic memory allocation failed. */ -int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, - const unsigned char* rand, int len) +int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, const unsigned char* rand, + int len) { - byte buf[2 * KYBER_SYM_SZ + 1]; + byte buf[2 * WC_ML_KEM_SYM_SZ + 1]; byte* rho = buf; - byte* sigma = buf + KYBER_SYM_SZ; + byte* sigma = buf + WC_ML_KEM_SYM_SZ; #ifndef WOLFSSL_NO_MALLOC sword16* e = NULL; #else #ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM #ifndef WOLFSSL_MLKEM_CACHE_A - sword16 e[(KYBER_MAX_K + 1) * KYBER_MAX_K * KYBER_N]; + sword16 e[(MLKEM_MAX_K + 1) * MLKEM_MAX_K * MLKEM_N]; #else - sword16 e[KYBER_MAX_K * KYBER_N]; + sword16 e[MLKEM_MAX_K * MLKEM_N]; #endif #else - sword16 e[KYBER_MAX_K * KYBER_N]; + sword16 e[MLKEM_MAX_K * MLKEM_N]; #endif #endif #ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM @@ -367,7 +368,7 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, if ((key == NULL) || (rand == NULL)) { ret = BAD_FUNC_ARG; } - if ((ret == 0) && (len != KYBER_MAKEKEY_RAND_SZ)) { + if ((ret == 0) && (len != WC_ML_KEM_MAKEKEY_RAND_SZ)) { ret = BUFFER_E; } @@ -393,7 +394,7 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: k = KYBER512_K; @@ -423,16 +424,16 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, #ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM #ifndef WOLFSSL_MLKEM_CACHE_A /* e (v) | a (m) */ - e = (sword16*)XMALLOC((k + 1) * k * KYBER_N * sizeof(sword16), + e = (sword16*)XMALLOC((k + 1) * k * MLKEM_N * sizeof(sword16), key->heap, DYNAMIC_TYPE_TMP_BUFFER); #else /* e (v) */ - e = (sword16*)XMALLOC(k * KYBER_N * sizeof(sword16), + e = (sword16*)XMALLOC(k * MLKEM_N * sizeof(sword16), key->heap, DYNAMIC_TYPE_TMP_BUFFER); #endif #else /* e (v) */ - e = (sword16*)XMALLOC(k * KYBER_N * sizeof(sword16), + e = (sword16*)XMALLOC(k * MLKEM_N * sizeof(sword16), key->heap, DYNAMIC_TYPE_TMP_BUFFER); #endif if (e == NULL) { @@ -447,19 +448,19 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, a = key->a; #elif !defined(WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM) /* Matrix A allocated at end of error vector. */ - a = e + (k * KYBER_N); + a = e + (k * MLKEM_N); #endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) + if (key->type & MLKEM_KYBER) #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER { /* Expand 32 bytes of random to 32. */ - ret = KYBER_HASH_G(&key->hash, d, KYBER_SYM_SZ, NULL, 0, buf); + ret = MLKEM_HASH_G(&key->hash, d, WC_ML_KEM_SYM_SZ, NULL, 0, buf); } #endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) else #endif #ifndef WOLFSSL_NO_ML_KEM @@ -468,45 +469,45 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, /* Expand 33 bytes of random to 32. * Alg 13: Step 1: (rho,sigma) <- G(d||k) */ - ret = KYBER_HASH_G(&key->hash, d, KYBER_SYM_SZ, buf, 1, buf); + ret = MLKEM_HASH_G(&key->hash, d, WC_ML_KEM_SYM_SZ, buf, 1, buf); } #endif } if (ret == 0) { - const byte* z = rand + KYBER_SYM_SZ; + const byte* z = rand + WC_ML_KEM_SYM_SZ; s = key->priv; t = key->pub; /* Cache the public seed for use in encapsulation and encoding public * key. */ - XMEMCPY(key->pubSeed, rho, KYBER_SYM_SZ); + XMEMCPY(key->pubSeed, rho, WC_ML_KEM_SYM_SZ); /* Cache the z value for decapsulation and encoding private key. */ XMEMCPY(key->z, z, sizeof(key->z)); /* Initialize PRF for use in noise generation. */ - kyber_prf_init(&key->prf); + mlkem_prf_init(&key->prf); #ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM /* Generate noise using PRF. * Alg 13: Steps 8-15: generate s and e */ - ret = kyber_get_noise(&key->prf, k, s, e, NULL, sigma); + ret = mlkem_get_noise(&key->prf, k, s, e, NULL, sigma); } if (ret == 0) { /* Generate the matrix A. * Alg 13: Steps 3-7 */ - ret = kyber_gen_matrix(&key->prf, a, k, rho, 0); + ret = mlkem_gen_matrix(&key->prf, a, k, rho, 0); } if (ret == 0) { /* Generate key pair from random data. * Alg 13: Steps 16-18. */ - kyber_keygen(s, t, e, a, k); + mlkem_keygen(s, t, e, a, k); #else /* Generate noise using PRF. * Alg 13: Steps 8-11: generate s */ - ret = kyber_get_noise(&key->prf, k, s, NULL, NULL, sigma); + ret = mlkem_get_noise(&key->prf, k, s, NULL, NULL, sigma); } if (ret == 0) { /* Generate key pair from private vector and seeds. @@ -514,14 +515,14 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, * Alg 13: 12-15: generate e * Alg 13: 16-18: calculate t_hat from A_hat, s and e */ - ret = kyber_keygen_seeds(s, t, &key->prf, e, k, rho, sigma); + ret = mlkem_keygen_seeds(s, t, &key->prf, e, k, rho, sigma); } if (ret == 0) { #endif /* Private and public key are set/available. */ - key->flags |= KYBER_FLAG_PRIV_SET | KYBER_FLAG_PUB_SET; + key->flags |= MLKEM_FLAG_PRIV_SET | MLKEM_FLAG_PUB_SET; #ifdef WOLFSSL_MLKEM_CACHE_A - key->flags |= KYBER_FLAG_A_SET; + key->flags |= MLKEM_FLAG_A_SET; #endif } @@ -534,7 +535,7 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, return ret; } -#endif /* !WOLFSSL_KYBER_NO_MAKE_KEY */ +#endif /* !WOLFSSL_MLKEM_NO_MAKE_KEY */ /******************************************************************************/ @@ -576,7 +577,7 @@ int wc_MlKemKey_CipherTextSize(MlKemKey* key, word32* len) break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: *len = KYBER512_CIPHER_TEXT_SIZE; @@ -615,13 +616,13 @@ int wc_MlKemKey_SharedSecretSize(MlKemKey* key, word32* len) { (void)key; - *len = KYBER_SS_SZ; + *len = WC_ML_KEM_SS_SZ; return 0; } -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) /* Encapsulate data and derive secret. * * FIPS 203, Algorithm 14: K-PKE.Encrypt(ek_PKE, m, r) @@ -666,9 +667,9 @@ static int mlkemkey_encapsulate(MlKemKey* key, const byte* m, byte* r, byte* c) sword16* y = NULL; #else #ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM - sword16 y[((KYBER_MAX_K + 3) * KYBER_MAX_K + 3) * KYBER_N]; + sword16 y[((MLKEM_MAX_K + 3) * MLKEM_MAX_K + 3) * MLKEM_N]; #else - sword16 y[3 * KYBER_MAX_K * KYBER_N]; + sword16 y[3 * MLKEM_MAX_K * MLKEM_N]; #endif #endif #ifdef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM @@ -698,7 +699,7 @@ static int mlkemkey_encapsulate(MlKemKey* key, const byte* m, byte* r, byte* c) break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: k = KYBER512_K; @@ -728,10 +729,10 @@ static int mlkemkey_encapsulate(MlKemKey* key, const byte* m, byte* r, byte* c) if (ret == 0) { /* Allocate dynamic memory for all matrices, vectors and polynomials. */ #ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM - y = (sword16*)XMALLOC(((k + 3) * k + 3) * KYBER_N * sizeof(sword16), + y = (sword16*)XMALLOC(((k + 3) * k + 3) * MLKEM_N * sizeof(sword16), key->heap, DYNAMIC_TYPE_TMP_BUFFER); #else - y = (sword16*)XMALLOC(3 * k * KYBER_N * sizeof(sword16), key->heap, + y = (sword16*)XMALLOC(3 * k * MLKEM_N * sizeof(sword16), key->heap, DYNAMIC_TYPE_TMP_BUFFER); #endif if (y == NULL) { @@ -744,39 +745,39 @@ static int mlkemkey_encapsulate(MlKemKey* key, const byte* m, byte* r, byte* c) #ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM /* Assign allocated dynamic memory to pointers. * y (b) | a (m) | mu (p) | e1 (p) | e2 (v) | u (v) | v (p) */ - a = y + KYBER_N * k; - mu = a + KYBER_N * k * k; - e1 = mu + KYBER_N; - e2 = e1 + KYBER_N * k; + a = y + MLKEM_N * k; + mu = a + MLKEM_N * k * k; + e1 = mu + MLKEM_N; + e2 = e1 + MLKEM_N * k; #else /* Assign allocated dynamic memory to pointers. * y (v) | a (v) | u (v) */ - a = y + KYBER_N * k; + a = y + MLKEM_N * k; #endif #ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM /* Convert msg to a polynomial. * Step 20: mu <- Decompress_1(ByteDecode_1(m)) */ - kyber_from_msg(mu, m); + mlkem_from_msg(mu, m); /* Initialize the PRF for use in the noise generation. */ - kyber_prf_init(&key->prf); + mlkem_prf_init(&key->prf); /* Generate noise using PRF. * Steps 9-17: generate y, e_1, e_2 */ - ret = kyber_get_noise(&key->prf, k, y, e1, e2, r); + ret = mlkem_get_noise(&key->prf, k, y, e1, e2, r); } #ifdef WOLFSSL_MLKEM_CACHE_A - if ((ret == 0) && ((key->flags & KYBER_FLAG_A_SET) != 0)) { + if ((ret == 0) && ((key->flags & MLKEM_FLAG_A_SET) != 0)) { unsigned int i; /* Transpose matrix. * Steps 4-8: generate matrix A_hat (from original) */ for (i = 0; i < k; i++) { unsigned int j; for (j = 0; j < k; j++) { - XMEMCPY(&a[(i * k + j) * KYBER_N], - &key->a[(j * k + i) * KYBER_N], - KYBER_N * 2); + XMEMCPY(&a[(i * k + j) * MLKEM_N], + &key->a[(j * k + i) * MLKEM_N], + MLKEM_N * 2); } } } @@ -785,7 +786,7 @@ static int mlkemkey_encapsulate(MlKemKey* key, const byte* m, byte* r, byte* c) if (ret == 0) { /* Generate the transposed matrix. * Step 4-8: generate matrix A_hat */ - ret = kyber_gen_matrix(&key->prf, a, k, key->pubSeed, 1); + ret = mlkem_gen_matrix(&key->prf, a, k, key->pubSeed, 1); } if (ret == 0) { sword16* u; @@ -793,29 +794,29 @@ static int mlkemkey_encapsulate(MlKemKey* key, const byte* m, byte* r, byte* c) /* Assign remaining allocated dynamic memory to pointers. * y (v) | a (m) | mu (p) | e1 (p) | r2 (v) | u (v) | v (p)*/ - u = e2 + KYBER_N; - v = u + KYBER_N * k; + u = e2 + MLKEM_N; + v = u + MLKEM_N * k; /* Perform encapsulation maths. * Steps 18-19, 21: calculate u and v */ - kyber_encapsulate(key->pub, u, v, a, y, e1, e2, mu, k); + mlkem_encapsulate(key->pub, u, v, a, y, e1, e2, mu, k); #else /* Initialize the PRF for use in the noise generation. */ - kyber_prf_init(&key->prf); + mlkem_prf_init(&key->prf); /* Generate noise using PRF. * Steps 9-12: generate y */ - ret = kyber_get_noise(&key->prf, k, y, NULL, NULL, r); + ret = mlkem_get_noise(&key->prf, k, y, NULL, NULL, r); } if (ret == 0) { /* Assign remaining allocated dynamic memory to pointers. * y (v) | at (v) | u (v) */ - u = a + KYBER_N * k; + u = a + MLKEM_N * k; v = a; /* Perform encapsulation maths. * Steps 13-17: generate e_1 and e_2 * Steps 18-19, 21: calculate u and v */ - ret = kyber_encapsulate_seeds(key->pub, &key->prf, u, a, y, k, m, + ret = mlkem_encapsulate_seeds(key->pub, &key->prf, u, a, y, k, m, key->pubSeed, r); } if (ret == 0) { @@ -824,29 +825,29 @@ static int mlkemkey_encapsulate(MlKemKey* key, const byte* m, byte* r, byte* c) byte* c2 = c + compVecSz; #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) - if (k == KYBER512_K) { + if (k == WC_ML_KEM_512_K) { /* Step 22: c_1 <- ByteEncode_d_u(Compress_d_u(u)) */ - kyber_vec_compress_10(c1, u, k); + mlkem_vec_compress_10(c1, u, k); /* Step 23: c_2 <- ByteEncode_d_v(Compress_d_v(v)) */ - kyber_compress_4(c2, v); + mlkem_compress_4(c2, v); /* Step 24: return c <- (c_1||c_2) */ } #endif #if defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) - if (k == KYBER768_K) { + if (k == WC_ML_KEM_768_K) { /* Step 22: c_1 <- ByteEncode_d_u(Compress_d_u(u)) */ - kyber_vec_compress_10(c1, u, k); + mlkem_vec_compress_10(c1, u, k); /* Step 23: c_2 <- ByteEncode_d_v(Compress_d_v(v)) */ - kyber_compress_4(c2, v); + mlkem_compress_4(c2, v); /* Step 24: return c <- (c_1||c_2) */ } #endif #if defined(WOLFSSL_KYBER1024) || defined(WOLFSSL_WC_ML_KEM_1024) - if (k == KYBER1024_K) { + if (k == WC_ML_KEM_1024_K) { /* Step 22: c_1 <- ByteEncode_d_u(Compress_d_u(u)) */ - kyber_vec_compress_11(c1, u); + mlkem_vec_compress_11(c1, u); /* Step 23: c_2 <- ByteEncode_d_v(Compress_d_v(v)) */ - kyber_compress_5(c2, v); + mlkem_compress_5(c2, v); /* Step 24: return c <- (c_1||c_2) */ } #endif @@ -861,7 +862,7 @@ static int mlkemkey_encapsulate(MlKemKey* key, const byte* m, byte* r, byte* c) } #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE /** * Encapsulate with random number generator and derive secret. * @@ -889,7 +890,7 @@ int wc_MlKemKey_Encapsulate(MlKemKey* key, unsigned char* c, unsigned char* k, WC_RNG* rng) { int ret = 0; - unsigned char m[KYBER_ENC_RAND_SZ]; + unsigned char m[WC_ML_KEM_ENC_RAND_SZ]; /* Validate parameters. */ if ((key == NULL) || (c == NULL) || (k == NULL) || (rng == NULL)) { @@ -932,19 +933,19 @@ int wc_MlKemKey_Encapsulate(MlKemKey* key, unsigned char* c, unsigned char* k, * @param [in] len Length of random bytes. * @return 0 on success. * @return BAD_FUNC_ARG when key, c, k or RNG is NULL. - * @return BUFFER_E when len is not KYBER_ENC_RAND_SZ. + * @return BUFFER_E when len is not WC_ML_KEM_ENC_RAND_SZ. * @return NOT_COMPILED_IN when key type is not supported. * @return MEMORY_E when dynamic memory allocation failed. */ int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, unsigned char* c, unsigned char* k, const unsigned char* m, int len) { -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER byte msg[KYBER_SYM_SZ]; #endif byte kr[2 * KYBER_SYM_SZ + 1]; int ret = 0; -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER unsigned int cSz = 0; #endif @@ -952,11 +953,11 @@ int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, unsigned char* c, if ((key == NULL) || (c == NULL) || (k == NULL) || (m == NULL)) { ret = BAD_FUNC_ARG; } - if ((ret == 0) && (len != KYBER_ENC_RAND_SZ)) { + if ((ret == 0) && (len != WC_ML_KEM_ENC_RAND_SZ)) { ret = BUFFER_E; } -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER if (ret == 0) { /* Establish parameters based on key type. */ switch (key->type) { @@ -999,13 +1000,13 @@ int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, unsigned char* c, * (fields set explicitly instead of using decode). * Step 1: ... H(ek)... */ - if ((ret == 0) && ((key->flags & KYBER_FLAG_H_SET) == 0)) { + if ((ret == 0) && ((key->flags & MLKEM_FLAG_H_SET) == 0)) { #ifndef WOLFSSL_NO_MALLOC byte* pubKey = NULL; word32 pubKeyLen; #else - byte pubKey[KYBER_MAX_PUBLIC_KEY_SIZE]; - word32 pubKeyLen = KYBER_MAX_PUBLIC_KEY_SIZE; + byte pubKey[WC_ML_KEM_MAX_PUBLIC_KEY_SIZE]; + word32 pubKeyLen = WC_ML_KEM_MAX_PUBLIC_KEY_SIZE; #endif #ifndef WOLFSSL_NO_MALLOC @@ -1029,100 +1030,100 @@ int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, unsigned char* c, XFREE(pubKey, key->heap, DYNAMIC_TYPE_TMP_BUFFER); #endif } - if ((ret == 0) && ((key->flags & KYBER_FLAG_H_SET) == 0)) { + if ((ret == 0) && ((key->flags & MLKEM_FLAG_H_SET) == 0)) { /* Implementation issue if h not cached and flag set. */ ret = BAD_STATE_E; } -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER if (ret == 0) { #ifndef WOLFSSL_NO_ML_KEM - if (key->type & KYBER_ORIGINAL) + if (key->type & MLKEM_KYBER) #endif { /* Hash random to anonymize as seed data. */ - ret = KYBER_HASH_H(&key->hash, m, KYBER_SYM_SZ, msg); + ret = MLKEM_HASH_H(&key->hash, m, WC_ML_KEM_SYM_SZ, msg); } } #endif if (ret == 0) { /* Hash message into seed buffer. */ -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) + if (key->type & MLKEM_KYBER) #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER { - ret = KYBER_HASH_G(&key->hash, msg, KYBER_SYM_SZ, key->h, - KYBER_SYM_SZ, kr); + ret = MLKEM_HASH_G(&key->hash, msg, WC_ML_KEM_SYM_SZ, key->h, + WC_ML_KEM_SYM_SZ, kr); } #endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) else #endif #ifndef WOLFSSL_NO_ML_KEM { /* Step 1: (K,r) <- G(m||H(ek)) */ - ret = KYBER_HASH_G(&key->hash, m, KYBER_SYM_SZ, key->h, - KYBER_SYM_SZ, kr); + ret = MLKEM_HASH_G(&key->hash, m, WC_ML_KEM_SYM_SZ, key->h, + WC_ML_KEM_SYM_SZ, kr); } #endif } if (ret == 0) { /* Encapsulate the message using the key and the seed. */ -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) + if (key->type & MLKEM_KYBER) #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER { - ret = mlkemkey_encapsulate(key, msg, kr + KYBER_SYM_SZ, c); + ret = mlkemkey_encapsulate(key, msg, kr + WC_ML_KEM_SYM_SZ, c); } #endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) else #endif #ifndef WOLFSSL_NO_ML_KEM { /* Step 2: c <- K-PKE.Encrypt(ek,m,r) */ - ret = mlkemkey_encapsulate(key, m, kr + KYBER_SYM_SZ, c); + ret = mlkemkey_encapsulate(key, m, kr + WC_ML_KEM_SYM_SZ, c); } #endif } -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) + if (key->type & MLKEM_KYBER) #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER { if (ret == 0) { /* Hash the cipher text after the seed. */ - ret = KYBER_HASH_H(&key->hash, c, cSz, kr + KYBER_SYM_SZ); + ret = MLKEM_HASH_H(&key->hash, c, cSz, kr + WC_ML_KEM_SYM_SZ); } if (ret == 0) { /* Derive the secret from the seed and hash of cipher text. */ - ret = KYBER_KDF(kr, 2 * KYBER_SYM_SZ, k, KYBER_SS_SZ); + ret = MLKEM_KDF(kr, 2 * WC_ML_KEM_SYM_SZ, k, WC_ML_KEM_SS_SZ); } } #endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) else #endif #ifndef WOLFSSL_NO_ML_KEM { if (ret == 0) { /* return (K,c) */ - XMEMCPY(k, kr, KYBER_SS_SZ); + XMEMCPY(k, kr, WC_ML_KEM_SS_SZ); } } #endif return ret; } -#endif /* !WOLFSSL_KYBER_NO_ENCAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_ENCAPSULATE */ /******************************************************************************/ -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE /* Decapsulate cipher text to the message using key. * * FIPS 203, Algorithm 15: K-PKE.Decrypt(dk_PKE,c) @@ -1143,7 +1144,7 @@ int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, unsigned char* c, * @return NOT_COMPILED_IN when key type is not supported. * @return MEMORY_E when dynamic memory allocation failed. */ -static KYBER_NOINLINE int mlkemkey_decapsulate(MlKemKey* key, byte* m, +static MLKEM_NOINLINE int mlkemkey_decapsulate(MlKemKey* key, byte* m, const byte* c) { int ret = 0; @@ -1154,7 +1155,7 @@ static KYBER_NOINLINE int mlkemkey_decapsulate(MlKemKey* key, byte* m, #if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) sword16* u = NULL; #else - sword16 u[(KYBER_MAX_K + 1) * KYBER_N]; + sword16 u[(WC_ML_KEM_MAX_K + 1) * MLKEM_N]; #endif /* Establish parameters based on key type. */ @@ -1179,7 +1180,7 @@ static KYBER_NOINLINE int mlkemkey_decapsulate(MlKemKey* key, byte* m, break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: k = KYBER512_K; @@ -1208,7 +1209,7 @@ static KYBER_NOINLINE int mlkemkey_decapsulate(MlKemKey* key, byte* m, #if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) if (ret == 0) { /* Allocate dynamic memory for a vector and a polynomial. */ - u = (sword16*)XMALLOC((k + 1) * KYBER_N * sizeof(sword16), key->heap, + u = (sword16*)XMALLOC((k + 1) * MLKEM_N * sizeof(sword16), key->heap, DYNAMIC_TYPE_TMP_BUFFER); if (u == NULL) { ret = MEMORY_E; @@ -1223,41 +1224,41 @@ static KYBER_NOINLINE int mlkemkey_decapsulate(MlKemKey* key, byte* m, /* Assign allocated dynamic memory to pointers. * u (v) | v (p) */ - v = u + k * KYBER_N; + v = u + k * MLKEM_N; w = u; #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) - if (k == KYBER512_K) { + if (k == WC_ML_KEM_512_K) { /* Step 3: u' <= Decompress_d_u(ByteDecode_d_u(c1)) */ - kyber_vec_decompress_10(u, c1, k); + mlkem_vec_decompress_10(u, c1, k); /* Step 4: v' <= Decompress_d_v(ByteDecode_d_v(c2)) */ - kyber_decompress_4(v, c2); + mlkem_decompress_4(v, c2); } #endif #if defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) - if (k == KYBER768_K) { + if (k == WC_ML_KEM_768_K) { /* Step 3: u' <= Decompress_d_u(ByteDecode_d_u(c1)) */ - kyber_vec_decompress_10(u, c1, k); + mlkem_vec_decompress_10(u, c1, k); /* Step 4: v' <= Decompress_d_v(ByteDecode_d_v(c2)) */ - kyber_decompress_4(v, c2); + mlkem_decompress_4(v, c2); } #endif #if defined(WOLFSSL_KYBER1024) || defined(WOLFSSL_WC_ML_KEM_1024) - if (k == KYBER1024_K) { + if (k == WC_ML_KEM_1024_K) { /* Step 3: u' <= Decompress_d_u(ByteDecode_d_u(c1)) */ - kyber_vec_decompress_11(u, c1); + mlkem_vec_decompress_11(u, c1); /* Step 4: v' <= Decompress_d_v(ByteDecode_d_v(c2)) */ - kyber_decompress_5(v, c2); + mlkem_decompress_5(v, c2); } #endif /* Decapsulate the cipher text into polynomial. * Step 6: w <- v' - InvNTT(s_hat_trans o NTT(u')) */ - kyber_decapsulate(key->priv, w, u, v, k); + mlkem_decapsulate(key->priv, w, u, v, k); /* Convert the polynomial into a array of bytes (message). * Step 7: m <- ByteEncode_1(Compress_1(w)) */ - kyber_to_msg(m, w); + mlkem_to_msg(m, w); /* Step 8: return m */ } @@ -1288,12 +1289,12 @@ static int mlkem_derive_secret(const byte* z, const byte* ct, word32 ctSz, ret = wc_InitShake256(&shake, NULL, INVALID_DEVID); if (ret == 0) { - ret = wc_Shake256_Update(&shake, z, KYBER_SYM_SZ); + ret = wc_Shake256_Update(&shake, z, WC_ML_KEM_SYM_SZ); if (ret == 0) { ret = wc_Shake256_Update(&shake, ct, ctSz); } if (ret == 0) { - ret = wc_Shake256_Final(&shake, ss, KYBER_SS_SZ); + ret = wc_Shake256_Final(&shake, ss, WC_ML_KEM_SS_SZ); } wc_Shake256_Free(&shake); } @@ -1348,8 +1349,8 @@ static int mlkem_derive_secret(const byte* z, const byte* ct, word32 ctSz, int wc_MlKemKey_Decapsulate(MlKemKey* key, unsigned char* ss, const unsigned char* ct, word32 len) { - byte msg[KYBER_SYM_SZ]; - byte kr[2 * KYBER_SYM_SZ + 1]; + byte msg[WC_ML_KEM_SYM_SZ]; + byte kr[2 * WC_ML_KEM_SYM_SZ + 1]; int ret = 0; unsigned int ctSz = 0; unsigned int i = 0; @@ -1357,7 +1358,7 @@ int wc_MlKemKey_Decapsulate(MlKemKey* key, unsigned char* ss, #if !defined(USE_INTEL_SPEEDUP) && !defined(WOLFSSL_NO_MALLOC) byte* cmp = NULL; #else - byte cmp[KYBER_MAX_CIPHER_TEXT_SIZE]; + byte cmp[WC_ML_KEM_MAX_CIPHER_TEXT_SIZE]; #endif /* Validate parameters. */ @@ -1385,7 +1386,7 @@ int wc_MlKemKey_Decapsulate(MlKemKey* key, unsigned char* ss, break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: ctSz = KYBER512_CIPHER_TEXT_SIZE; @@ -1430,36 +1431,36 @@ int wc_MlKemKey_Decapsulate(MlKemKey* key, unsigned char* ss, } if (ret == 0) { /* Hash message into seed buffer. */ - ret = KYBER_HASH_G(&key->hash, msg, KYBER_SYM_SZ, key->h, KYBER_SYM_SZ, - kr); + ret = MLKEM_HASH_G(&key->hash, msg, WC_ML_KEM_SYM_SZ, key->h, + WC_ML_KEM_SYM_SZ, kr); } if (ret == 0) { /* Encapsulate the message. */ - ret = mlkemkey_encapsulate(key, msg, kr + KYBER_SYM_SZ, cmp); + ret = mlkemkey_encapsulate(key, msg, kr + WC_ML_KEM_SYM_SZ, cmp); } if (ret == 0) { /* Compare generated cipher text with that passed in. */ - fail = kyber_cmp(ct, cmp, ctSz); + fail = mlkem_cmp(ct, cmp, ctSz); -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) - if (key->type & KYBER_ORIGINAL) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) + if (key->type & MLKEM_KYBER) #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER { /* Hash the cipher text after the seed. */ - ret = KYBER_HASH_H(&key->hash, ct, ctSz, kr + KYBER_SYM_SZ); + ret = MLKEM_HASH_H(&key->hash, ct, ctSz, kr + WC_ML_KEM_SYM_SZ); if (ret == 0) { /* Change seed to z on comparison failure. */ - for (i = 0; i < KYBER_SYM_SZ; i++) { + for (i = 0; i < WC_ML_KEM_SYM_SZ; i++) { kr[i] ^= (kr[i] ^ key->z[i]) & fail; } /* Derive the secret from the seed and hash of cipher text. */ - ret = KYBER_KDF(kr, 2 * KYBER_SYM_SZ, ss, KYBER_SS_SZ); + ret = MLKEM_KDF(kr, 2 * WC_ML_KEM_SYM_SZ, ss, WC_ML_KEM_SS_SZ); } } #endif -#if defined(WOLFSSL_KYBER_ORIGINAL) && !defined(WOLFSSL_NO_ML_KEM) +#if defined(WOLFSSL_MLKEM_KYBER) && !defined(WOLFSSL_NO_ML_KEM) else #endif #ifndef WOLFSSL_NO_ML_KEM @@ -1467,7 +1468,7 @@ int wc_MlKemKey_Decapsulate(MlKemKey* key, unsigned char* ss, ret = mlkem_derive_secret(key->z, ct, ctSz, msg); if (ret == 0) { /* Set secret to kr or fake secret on comparison failure. */ - for (i = 0; i < KYBER_SYM_SZ; i++) { + for (i = 0; i < WC_ML_KEM_SYM_SZ; i++) { ss[i] = kr[i] ^ ((kr[i] ^ msg[i]) & fail); } } @@ -1484,7 +1485,7 @@ int wc_MlKemKey_Decapsulate(MlKemKey* key, unsigned char* ss, return ret; } -#endif /* WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* WOLFSSL_MLKEM_NO_DECAPSULATE */ /******************************************************************************/ @@ -1509,12 +1510,12 @@ static void mlkemkey_decode_public(sword16* pub, byte* pubSeed, const byte* p, /* Decode public key that is vector of polynomials. * Step 2: t <- ByteDecode_12(ek_PKE[0 : 384k]) */ - kyber_from_bytes(pub, p, k); - p += k * KYBER_POLY_SIZE; + mlkem_from_bytes(pub, p, k); + p += k * WC_ML_KEM_POLY_SIZE; /* Read public key seed. * Step 3: rho <- ek_PKE[384k : 384k + 32] */ - for (i = 0; i < KYBER_SYM_SZ; i++) { + for (i = 0; i < WC_ML_KEM_SYM_SZ; i++) { pubSeed[i] = p[i]; } } @@ -1587,7 +1588,7 @@ int wc_MlKemKey_DecodePrivateKey(MlKemKey* key, const unsigned char* in, break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: k = KYBER512_K; @@ -1625,8 +1626,8 @@ int wc_MlKemKey_DecodePrivateKey(MlKemKey* key, const unsigned char* in, /* Decode private key that is vector of polynomials. * Alg 18 Step 1: dk_PKE <- dk[0 : 384k] * Alg 15 Step 5: s_hat <- ByteDecode_12(dk_PKE) */ - kyber_from_bytes(key->priv, p, k); - p += k * KYBER_POLY_SIZE; + mlkem_from_bytes(key->priv, p, k); + p += k * WC_ML_KEM_POLY_SIZE; /* Decode the public key that is after the private key. */ mlkemkey_decode_public(key->pub, key->pubSeed, p, k); @@ -1634,12 +1635,12 @@ int wc_MlKemKey_DecodePrivateKey(MlKemKey* key, const unsigned char* in, /* Copy the hash of the encoded public key that is after public key. */ XMEMCPY(key->h, p, sizeof(key->h)); - p += KYBER_SYM_SZ; + p += WC_ML_KEM_SYM_SZ; /* Copy the z (randomizer) that is after hash. */ XMEMCPY(key->z, p, sizeof(key->z)); /* Set flags */ - key->flags |= KYBER_FLAG_H_SET | KYBER_FLAG_BOTH_SET; + key->flags |= MLKEM_FLAG_H_SET | MLKEM_FLAG_BOTH_SET; } return ret; @@ -1693,7 +1694,7 @@ int wc_MlKemKey_DecodePublicKey(MlKemKey* key, const unsigned char* in, break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: k = KYBER512_K; @@ -1728,11 +1729,11 @@ int wc_MlKemKey_DecodePublicKey(MlKemKey* key, const unsigned char* in, mlkemkey_decode_public(key->pub, key->pubSeed, p, k); /* Calculate public hash. */ - ret = KYBER_HASH_H(&key->hash, in, len, key->h); + ret = MLKEM_HASH_H(&key->hash, in, len, key->h); } if (ret == 0) { /* Record public key and public hash set. */ - key->flags |= KYBER_FLAG_PUB_SET | KYBER_FLAG_H_SET; + key->flags |= MLKEM_FLAG_PUB_SET | MLKEM_FLAG_H_SET; } return ret; @@ -1777,7 +1778,7 @@ int wc_MlKemKey_PrivateKeySize(MlKemKey* key, word32* len) break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: *len = KYBER512_PRIVATE_KEY_SIZE; @@ -1843,7 +1844,7 @@ int wc_MlKemKey_PublicKeySize(MlKemKey* key, word32* len) break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: *len = KYBER512_PUBLIC_KEY_SIZE; @@ -1904,7 +1905,7 @@ int wc_MlKemKey_EncodePrivateKey(MlKemKey* key, unsigned char* out, word32 len) ret = BAD_FUNC_ARG; } if ((ret == 0) && - ((key->flags & KYBER_FLAG_BOTH_SET) != KYBER_FLAG_BOTH_SET)) { + ((key->flags & MLKEM_FLAG_BOTH_SET) != MLKEM_FLAG_BOTH_SET)) { ret = BAD_FUNC_ARG; } @@ -1933,7 +1934,7 @@ int wc_MlKemKey_EncodePrivateKey(MlKemKey* key, unsigned char* out, word32 len) break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: k = KYBER512_K; @@ -1969,23 +1970,23 @@ int wc_MlKemKey_EncodePrivateKey(MlKemKey* key, unsigned char* out, word32 len) if (ret == 0) { /* Encode private key that is vector of polynomials. */ - kyber_to_bytes(p, key->priv, k); - p += KYBER_POLY_SIZE * k; + mlkem_to_bytes(p, key->priv, k); + p += WC_ML_KEM_POLY_SIZE * k; /* Encode public key. */ ret = wc_KyberKey_EncodePublicKey(key, p, pubLen); p += pubLen; } /* Ensure hash of public key is available. */ - if ((ret == 0) && ((key->flags & KYBER_FLAG_H_SET) == 0)) { - ret = KYBER_HASH_H(&key->hash, p - pubLen, pubLen, key->h); + if ((ret == 0) && ((key->flags & MLKEM_FLAG_H_SET) == 0)) { + ret = MLKEM_HASH_H(&key->hash, p - pubLen, pubLen, key->h); } if (ret == 0) { /* Public hash is available. */ - key->flags |= KYBER_FLAG_H_SET; + key->flags |= MLKEM_FLAG_H_SET; /* Append public hash. */ XMEMCPY(p, key->h, sizeof(key->h)); - p += KYBER_SYM_SZ; + p += WC_ML_KEM_SYM_SZ; /* Append z (randomizer). */ XMEMCPY(p, key->z, sizeof(key->z)); } @@ -2025,7 +2026,7 @@ int wc_MlKemKey_EncodePublicKey(MlKemKey* key, unsigned char* out, word32 len) ret = BAD_FUNC_ARG; } if ((ret == 0) && - ((key->flags & KYBER_FLAG_PUB_SET) != KYBER_FLAG_PUB_SET)) { + ((key->flags & MLKEM_FLAG_PUB_SET) != MLKEM_FLAG_PUB_SET)) { ret = BAD_FUNC_ARG; } @@ -2051,7 +2052,7 @@ int wc_MlKemKey_EncodePublicKey(MlKemKey* key, unsigned char* out, word32 len) break; #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 case KYBER512: k = KYBER512_K; @@ -2086,22 +2087,22 @@ int wc_MlKemKey_EncodePublicKey(MlKemKey* key, unsigned char* out, word32 len) int i; /* Encode public key polynomial by polynomial. */ - kyber_to_bytes(p, key->pub, k); - p += k * KYBER_POLY_SIZE; + mlkem_to_bytes(p, key->pub, k); + p += k * WC_ML_KEM_POLY_SIZE; /* Append public seed. */ - for (i = 0; i < KYBER_SYM_SZ; i++) { + for (i = 0; i < WC_ML_KEM_SYM_SZ; i++) { p[i] = key->pubSeed[i]; } /* Make sure public hash is set. */ - if ((key->flags & KYBER_FLAG_H_SET) == 0) { - ret = KYBER_HASH_H(&key->hash, out, len, key->h); + if ((key->flags & MLKEM_FLAG_H_SET) == 0) { + ret = MLKEM_HASH_H(&key->hash, out, len, key->h); } } if (ret == 0) { /* Public hash is set. */ - key->flags |= KYBER_FLAG_H_SET; + key->flags |= MLKEM_FLAG_H_SET; } return ret; diff --git a/wolfcrypt/src/wc_kyber_asm.S b/wolfcrypt/src/wc_mlkem_asm.S similarity index 98% rename from wolfcrypt/src/wc_kyber_asm.S rename to wolfcrypt/src/wc_mlkem_asm.S index b51c3d5d1..ffb77b730 100644 --- a/wolfcrypt/src/wc_kyber_asm.S +++ b/wolfcrypt/src/wc_mlkem_asm.S @@ -1,4 +1,4 @@ -/* wc_kyber_asm.S */ +/* wc_mlkem_asm.S */ /* * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -45,7 +45,7 @@ #define HAVE_INTEL_AVX2 #endif /* NO_AVX2_SUPPORT */ -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_WC_MLKEM #ifdef HAVE_INTEL_AVX2 #ifndef __APPLE__ .data @@ -57,7 +57,7 @@ #else .p2align 4 #endif /* __APPLE__ */ -kyber_q: +mlkem_q: .value 0xd01,0xd01 .value 0xd01,0xd01 .value 0xd01,0xd01 @@ -76,7 +76,7 @@ kyber_q: #else .p2align 4 #endif /* __APPLE__ */ -kyber_qinv: +mlkem_qinv: .value 0xf301,0xf301 .value 0xf301,0xf301 .value 0xf301,0xf301 @@ -95,7 +95,7 @@ kyber_qinv: #else .p2align 4 #endif /* __APPLE__ */ -kyber_f: +mlkem_f: .value 0x549,0x549 .value 0x549,0x549 .value 0x549,0x549 @@ -114,7 +114,7 @@ kyber_f: #else .p2align 4 #endif /* __APPLE__ */ -kyber_f_qinv: +mlkem_f_qinv: .value 0x5049,0x5049 .value 0x5049,0x5049 .value 0x5049,0x5049 @@ -133,7 +133,7 @@ kyber_f_qinv: #else .p2align 4 #endif /* __APPLE__ */ -kyber_v: +mlkem_v: .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf @@ -152,7 +152,7 @@ kyber_v: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_avx2_zetas: +L_mlkem_avx2_zetas: .value 0xa0b,0xa0b .value 0xa0b,0xa0b .value 0xa0b,0xa0b @@ -787,7 +787,7 @@ L_kyber_avx2_zetas: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_avx2_zetas_basemul: +L_mlkem_avx2_zetas_basemul: .value 0x8b2,0x81e .value 0xf74e,0xf7e2 .value 0x1ae,0x367 @@ -926,7 +926,7 @@ L_kyber_avx2_zetas_basemul: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_avx2_zetas_inv: +L_mlkem_avx2_zetas_inv: .value 0x6a5,0x6a5 .value 0x5b4,0x5b4 .value 0x70f,0x70f @@ -1569,23 +1569,23 @@ L_kyber_avx2_zetas_inv: .value 0xd8a1,0xd8a1 #ifndef __APPLE__ .text -.globl kyber_keygen_avx2 -.type kyber_keygen_avx2,@function +.globl mlkem_keygen_avx2 +.type mlkem_keygen_avx2,@function .align 16 -kyber_keygen_avx2: +mlkem_keygen_avx2: #else .section __TEXT,__text -.globl _kyber_keygen_avx2 +.globl _mlkem_keygen_avx2 .p2align 4 -_kyber_keygen_avx2: +_mlkem_keygen_avx2: #endif /* __APPLE__ */ - vmovdqu kyber_q(%rip), %ymm14 - vmovdqu kyber_v(%rip), %ymm15 + vmovdqu mlkem_q(%rip), %ymm14 + vmovdqu mlkem_v(%rip), %ymm15 movslq %r8d, %r9 movq %rdi, %r10 -L_kyber_keygen_avx2_priv: +L_mlkem_keygen_avx2_priv: # ntt - leaq L_kyber_avx2_zetas(%rip), %r11 + leaq L_mlkem_avx2_zetas(%rip), %r11 vmovdqu (%r11), %ymm10 vmovdqu 32(%r11), %ymm12 vmovdqu 128(%r10), %ymm0 @@ -2240,15 +2240,15 @@ L_kyber_keygen_avx2_priv: vmovdqu %ymm9, 480(%r10) addq $0x200, %r10 subq $0x01, %r9 - jg L_kyber_keygen_avx2_priv - vmovdqu kyber_qinv(%rip), %ymm13 + jg L_mlkem_keygen_avx2_priv + vmovdqu mlkem_qinv(%rip), %ymm13 movslq %r8d, %rax movq %rsi, %r10 -L_kyber_keygen_avx2_acc: +L_mlkem_keygen_avx2_acc: # Pointwise acc mont movslq %r8d, %r9 # Base mul mont - leaq L_kyber_avx2_zetas_basemul(%rip), %r11 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r11 vmovdqu (%rcx), %ymm2 vmovdqu 32(%rcx), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -2607,7 +2607,7 @@ L_kyber_keygen_avx2_acc: jz L_pointwise_acc_mont_end_keygen L_pointwise_acc_mont_start_keygen: # Base mul mont add - leaq L_kyber_avx2_zetas_basemul(%rip), %r11 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r11 vmovdqu (%rcx), %ymm2 vmovdqu 32(%rcx), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -2998,7 +2998,7 @@ L_pointwise_acc_mont_start_keygen: jg L_pointwise_acc_mont_start_keygen L_pointwise_acc_mont_end_keygen: # Base mul mont add - leaq L_kyber_avx2_zetas_basemul(%rip), %r11 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r11 vmovdqu (%rcx), %ymm2 vmovdqu 32(%rcx), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -3422,13 +3422,13 @@ L_pointwise_acc_mont_end_keygen: subq %r9, %rdi addq $0x200, %r10 subq $0x01, %rax - jg L_kyber_keygen_avx2_acc + jg L_mlkem_keygen_avx2_acc movslq %r8d, %rax - vmovdqu kyber_f(%rip), %ymm12 - vmovdqu kyber_f_qinv(%rip), %ymm13 + vmovdqu mlkem_f(%rip), %ymm12 + vmovdqu mlkem_f_qinv(%rip), %ymm13 movslq %r8d, %rax movq %rsi, %r10 -L_kyber_keygen_avx2_to_mont: +L_mlkem_keygen_avx2_to_mont: # To Mont vmovdqu (%r10), %ymm0 vmovdqu 32(%r10), %ymm1 @@ -3528,11 +3528,11 @@ L_kyber_keygen_avx2_to_mont: vmovdqu %ymm3, 480(%r10) addq $0x200, %r10 subq $0x01, %rax - jg L_kyber_keygen_avx2_to_mont + jg L_mlkem_keygen_avx2_to_mont movslq %r8d, %rax -L_kyber_keygen_avx2_to_mont_ntt_err: +L_mlkem_keygen_avx2_to_mont_ntt_err: # ntt - leaq L_kyber_avx2_zetas(%rip), %r11 + leaq L_mlkem_avx2_zetas(%rip), %r11 vmovdqu (%r11), %ymm10 vmovdqu 32(%r11), %ymm12 vmovdqu 128(%rdx), %ymm0 @@ -4253,23 +4253,23 @@ L_kyber_keygen_avx2_to_mont_ntt_err: addq $0x200, %rdx addq $0x200, %rsi subq $0x01, %rax - jg L_kyber_keygen_avx2_to_mont_ntt_err + jg L_mlkem_keygen_avx2_to_mont_ntt_err vzeroupper repz retq #ifndef __APPLE__ -.size kyber_keygen_avx2,.-kyber_keygen_avx2 +.size mlkem_keygen_avx2,.-mlkem_keygen_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_encapsulate_avx2 -.type kyber_encapsulate_avx2,@function +.globl mlkem_encapsulate_avx2 +.type mlkem_encapsulate_avx2,@function .align 16 -kyber_encapsulate_avx2: +mlkem_encapsulate_avx2: #else .section __TEXT,__text -.globl _kyber_encapsulate_avx2 +.globl _mlkem_encapsulate_avx2 .p2align 4 -_kyber_encapsulate_avx2: +_mlkem_encapsulate_avx2: #endif /* __APPLE__ */ pushq %r12 pushq %r13 @@ -4279,13 +4279,13 @@ _kyber_encapsulate_avx2: movq 48(%rsp), %r10 movq 56(%rsp), %r11 subq $48, %rsp - vmovdqu kyber_q(%rip), %ymm14 - vmovdqu kyber_v(%rip), %ymm15 + vmovdqu mlkem_q(%rip), %ymm14 + vmovdqu mlkem_v(%rip), %ymm15 movslq %r11d, %r13 movq %r8, %r14 -L_kyber_encapsulate_avx2_trans: +L_mlkem_encapsulate_avx2_trans: # ntt - leaq L_kyber_avx2_zetas(%rip), %r15 + leaq L_mlkem_avx2_zetas(%rip), %r15 vmovdqu (%r15), %ymm10 vmovdqu 32(%r15), %ymm12 vmovdqu 128(%r14), %ymm0 @@ -4876,14 +4876,14 @@ L_kyber_encapsulate_avx2_trans: vmovdqu %ymm7, 480(%r14) addq $0x200, %r14 subq $0x01, %r13 - jg L_kyber_encapsulate_avx2_trans + jg L_mlkem_encapsulate_avx2_trans movslq %r11d, %r12 -L_kyber_encapsulate_avx2_calc: - vmovdqu kyber_qinv(%rip), %ymm12 +L_mlkem_encapsulate_avx2_calc: + vmovdqu mlkem_qinv(%rip), %ymm12 # Pointwise acc mont movslq %r11d, %r13 # Base mul mont - leaq L_kyber_avx2_zetas_basemul(%rip), %r15 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r15 vmovdqu (%rcx), %ymm2 vmovdqu 32(%rcx), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -5242,7 +5242,7 @@ L_kyber_encapsulate_avx2_calc: jz L_pointwise_acc_mont_end_encap_bp L_pointwise_acc_mont_start_encap_bp: # Base mul mont add - leaq L_kyber_avx2_zetas_basemul(%rip), %r15 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r15 vmovdqu (%rcx), %ymm2 vmovdqu 32(%rcx), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -5633,7 +5633,7 @@ L_pointwise_acc_mont_start_encap_bp: jg L_pointwise_acc_mont_start_encap_bp L_pointwise_acc_mont_end_encap_bp: # Base mul mont add - leaq L_kyber_avx2_zetas_basemul(%rip), %r15 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r15 vmovdqu (%rcx), %ymm2 vmovdqu 32(%rcx), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -6056,7 +6056,7 @@ L_pointwise_acc_mont_end_encap_bp: shl $9, %r13d subq %r13, %r8 # invntt - leaq L_kyber_avx2_zetas_inv(%rip), %r15 + leaq L_mlkem_avx2_zetas_inv(%rip), %r15 vmovdqu (%rsi), %ymm0 vmovdqu 32(%rsi), %ymm1 vmovdqu 64(%rsi), %ymm2 @@ -6956,12 +6956,12 @@ L_pointwise_acc_mont_end_encap_bp: addq $0x200, %r9 addq $0x200, %rsi subq $0x01, %r12 - jg L_kyber_encapsulate_avx2_calc - vmovdqu kyber_qinv(%rip), %ymm12 + jg L_mlkem_encapsulate_avx2_calc + vmovdqu mlkem_qinv(%rip), %ymm12 # Pointwise acc mont movslq %r11d, %r13 # Base mul mont - leaq L_kyber_avx2_zetas_basemul(%rip), %r15 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r15 vmovdqu (%rdi), %ymm2 vmovdqu 32(%rdi), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -7320,7 +7320,7 @@ L_pointwise_acc_mont_end_encap_bp: jz L_pointwise_acc_mont_end_encap_v L_pointwise_acc_mont_start_encap_v: # Base mul mont add - leaq L_kyber_avx2_zetas_basemul(%rip), %r15 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r15 vmovdqu (%rdi), %ymm2 vmovdqu 32(%rdi), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -7711,7 +7711,7 @@ L_pointwise_acc_mont_start_encap_v: jg L_pointwise_acc_mont_start_encap_v L_pointwise_acc_mont_end_encap_v: # Base mul mont add - leaq L_kyber_avx2_zetas_basemul(%rip), %r15 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r15 vmovdqu (%rdi), %ymm2 vmovdqu 32(%rdi), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -8134,7 +8134,7 @@ L_pointwise_acc_mont_end_encap_v: shl $9, %r13d subq %r13, %r8 # invntt - leaq L_kyber_avx2_zetas_inv(%rip), %r15 + leaq L_mlkem_avx2_zetas_inv(%rip), %r15 vmovdqu (%rdx), %ymm0 vmovdqu 32(%rdx), %ymm1 vmovdqu 64(%rdx), %ymm2 @@ -9104,27 +9104,27 @@ L_pointwise_acc_mont_end_encap_v: popq %r12 repz retq #ifndef __APPLE__ -.size kyber_encapsulate_avx2,.-kyber_encapsulate_avx2 +.size mlkem_encapsulate_avx2,.-mlkem_encapsulate_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_decapsulate_avx2 -.type kyber_decapsulate_avx2,@function +.globl mlkem_decapsulate_avx2 +.type mlkem_decapsulate_avx2,@function .align 16 -kyber_decapsulate_avx2: +mlkem_decapsulate_avx2: #else .section __TEXT,__text -.globl _kyber_decapsulate_avx2 +.globl _mlkem_decapsulate_avx2 .p2align 4 -_kyber_decapsulate_avx2: +_mlkem_decapsulate_avx2: #endif /* __APPLE__ */ - vmovdqu kyber_q(%rip), %ymm14 - vmovdqu kyber_v(%rip), %ymm15 + vmovdqu mlkem_q(%rip), %ymm14 + vmovdqu mlkem_v(%rip), %ymm15 movslq %r8d, %rax movq %rdx, %r9 -L_kyber_decapsulate_avx2_trans: +L_mlkem_decapsulate_avx2_trans: # ntt - leaq L_kyber_avx2_zetas(%rip), %r10 + leaq L_mlkem_avx2_zetas(%rip), %r10 vmovdqu (%r10), %ymm10 vmovdqu 32(%r10), %ymm12 vmovdqu 128(%r9), %ymm0 @@ -9779,12 +9779,12 @@ L_kyber_decapsulate_avx2_trans: vmovdqu %ymm9, 480(%r9) addq $0x200, %r9 subq $0x01, %rax - jg L_kyber_decapsulate_avx2_trans - vmovdqu kyber_qinv(%rip), %ymm12 + jg L_mlkem_decapsulate_avx2_trans + vmovdqu mlkem_qinv(%rip), %ymm12 # Pointwise acc mont movslq %r8d, %rax # Base mul mont - leaq L_kyber_avx2_zetas_basemul(%rip), %r10 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r10 vmovdqu (%rdi), %ymm2 vmovdqu 32(%rdi), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -10143,7 +10143,7 @@ L_kyber_decapsulate_avx2_trans: jz L_pointwise_acc_mont_end_decap L_pointwise_acc_mont_start_decap: # Base mul mont add - leaq L_kyber_avx2_zetas_basemul(%rip), %r10 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r10 vmovdqu (%rdi), %ymm2 vmovdqu 32(%rdi), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -10534,7 +10534,7 @@ L_pointwise_acc_mont_start_decap: jg L_pointwise_acc_mont_start_decap L_pointwise_acc_mont_end_decap: # Base mul mont add - leaq L_kyber_avx2_zetas_basemul(%rip), %r10 + leaq L_mlkem_avx2_zetas_basemul(%rip), %r10 vmovdqu (%rdi), %ymm2 vmovdqu 32(%rdi), %ymm3 vpslld $16, %ymm3, %ymm6 @@ -10957,7 +10957,7 @@ L_pointwise_acc_mont_end_decap: shl $9, %eax subq %rax, %rdx # invntt - leaq L_kyber_avx2_zetas_inv(%rip), %r10 + leaq L_mlkem_avx2_zetas_inv(%rip), %r10 vmovdqu (%rsi), %ymm0 vmovdqu 32(%rsi), %ymm1 vmovdqu 64(%rsi), %ymm2 @@ -11857,21 +11857,21 @@ L_pointwise_acc_mont_end_decap: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_decapsulate_avx2,.-kyber_decapsulate_avx2 +.size mlkem_decapsulate_avx2,.-mlkem_decapsulate_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_csubq_avx2 -.type kyber_csubq_avx2,@function +.globl mlkem_csubq_avx2 +.type mlkem_csubq_avx2,@function .align 16 -kyber_csubq_avx2: +mlkem_csubq_avx2: #else .section __TEXT,__text -.globl _kyber_csubq_avx2 +.globl _mlkem_csubq_avx2 .p2align 4 -_kyber_csubq_avx2: +_mlkem_csubq_avx2: #endif /* __APPLE__ */ - vmovdqu kyber_q(%rip), %ymm12 + vmovdqu mlkem_q(%rip), %ymm12 vmovdqu (%rdi), %ymm0 vmovdqu 32(%rdi), %ymm1 vmovdqu 64(%rdi), %ymm2 @@ -11971,7 +11971,7 @@ _kyber_csubq_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_csubq_avx2,.-kyber_csubq_avx2 +.size mlkem_csubq_avx2,.-mlkem_csubq_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -11983,7 +11983,7 @@ _kyber_csubq_avx2: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_rej_idx: +L_mlkem_rej_idx: .quad 0xffffffffffffffff,0xffffffffffffff00 .quad 0xffffffffffffff02,0xffffffffffff0200 .quad 0xffffffffffffff04,0xffffffffffff0400 @@ -12122,7 +12122,7 @@ L_kyber_rej_idx: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_rej_q: +L_mlkem_rej_q: .quad 0xd010d010d010d01, 0xd010d010d010d01 .quad 0xd010d010d010d01, 0xd010d010d010d01 #ifndef __APPLE__ @@ -12135,7 +12135,7 @@ L_kyber_rej_q: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_rej_ones: +L_mlkem_rej_ones: .quad 0x101010101010101, 0x101010101010101 .quad 0x101010101010101, 0x101010101010101 #ifndef __APPLE__ @@ -12148,7 +12148,7 @@ L_kyber_rej_ones: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_rej_mask: +L_mlkem_rej_mask: .quad 0xfff0fff0fff0fff, 0xfff0fff0fff0fff .quad 0xfff0fff0fff0fff, 0xfff0fff0fff0fff #ifndef __APPLE__ @@ -12161,20 +12161,20 @@ L_kyber_rej_mask: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_rej_shuffle: +L_mlkem_rej_shuffle: .quad 0x504040302010100, 0xb0a0a0908070706 .quad 0x908080706050504, 0xf0e0e0d0c0b0b0a #ifndef __APPLE__ .text -.globl kyber_rej_uniform_n_avx2 -.type kyber_rej_uniform_n_avx2,@function +.globl mlkem_rej_uniform_n_avx2 +.type mlkem_rej_uniform_n_avx2,@function .align 16 -kyber_rej_uniform_n_avx2: +mlkem_rej_uniform_n_avx2: #else .section __TEXT,__text -.globl _kyber_rej_uniform_n_avx2 +.globl _mlkem_rej_uniform_n_avx2 .p2align 4 -_kyber_rej_uniform_n_avx2: +_mlkem_rej_uniform_n_avx2: #endif /* __APPLE__ */ pushq %rbx pushq %r12 @@ -12184,11 +12184,11 @@ _kyber_rej_uniform_n_avx2: pushq %rbp movq %rcx, %r8 movl %esi, %eax - vmovdqu L_kyber_rej_q(%rip), %ymm6 - vmovdqu L_kyber_rej_ones(%rip), %ymm7 - vmovdqu L_kyber_rej_mask(%rip), %ymm8 - vmovdqu L_kyber_rej_shuffle(%rip), %ymm9 - leaq L_kyber_rej_idx(%rip), %r9 + vmovdqu L_mlkem_rej_q(%rip), %ymm6 + vmovdqu L_mlkem_rej_ones(%rip), %ymm7 + vmovdqu L_mlkem_rej_mask(%rip), %ymm8 + vmovdqu L_mlkem_rej_shuffle(%rip), %ymm9 + leaq L_mlkem_rej_idx(%rip), %r9 movq $0x1111111111111111, %r14 movq $0xe0c0a0806040200, %rbp movq $0x101010101010101, %r13 @@ -12621,7 +12621,7 @@ _kyber_rej_uniform_n_avx2: subl %r12d, %esi addq $0x150, %rdx subl $0x150, %r8d -L_kyber_rej_uniform_n_avx2_start_256: +L_mlkem_rej_uniform_n_avx2_start_256: vpermq $0x94, (%rdx), %ymm0 vpermq $0x94, 24(%rdx), %ymm1 vpshufb %ymm9, %ymm0, %ymm0 @@ -12686,15 +12686,15 @@ L_kyber_rej_uniform_n_avx2_start_256: addq $48, %rdx subl $48, %r8d cmpl $48, %r8d - jl L_kyber_rej_uniform_n_avx2_done_256 + jl L_mlkem_rej_uniform_n_avx2_done_256 cmpl $32, %esi - jge L_kyber_rej_uniform_n_avx2_start_256 -L_kyber_rej_uniform_n_avx2_done_256: + jge L_mlkem_rej_uniform_n_avx2_start_256 +L_mlkem_rej_uniform_n_avx2_done_256: cmpl $8, %esi - jl L_kyber_rej_uniform_n_avx2_done_128 + jl L_mlkem_rej_uniform_n_avx2_done_128 cmpl $12, %r8d - jl L_kyber_rej_uniform_n_avx2_done_128 -L_kyber_rej_uniform_n_avx2_start_128: + jl L_mlkem_rej_uniform_n_avx2_done_128 +L_mlkem_rej_uniform_n_avx2_start_128: vmovdqu (%rdx), %xmm0 vpshufb %xmm9, %xmm0, %xmm0 vpsrlw $4, %xmm0, %xmm2 @@ -12715,58 +12715,58 @@ L_kyber_rej_uniform_n_avx2_start_128: addq $12, %rdx subl $12, %r8d cmpl $12, %r8d - jl L_kyber_rej_uniform_n_avx2_done_128 + jl L_mlkem_rej_uniform_n_avx2_done_128 cmpl $8, %esi - jge L_kyber_rej_uniform_n_avx2_start_128 -L_kyber_rej_uniform_n_avx2_done_128: + jge L_mlkem_rej_uniform_n_avx2_start_128 +L_mlkem_rej_uniform_n_avx2_done_128: cmpl $0x00, %r8d - je L_kyber_rej_uniform_n_avx2_done_64 + je L_mlkem_rej_uniform_n_avx2_done_64 cmpl $0x00, %esi - je L_kyber_rej_uniform_n_avx2_done_64 + je L_mlkem_rej_uniform_n_avx2_done_64 movq $0xfff0fff0fff0fff, %r15 movq $0x2000200020002000, %r10 movq $0xd010d010d010d01, %r11 movq $0x1000100010001000, %r12 -L_kyber_rej_uniform_n_avx2_start_64: +L_mlkem_rej_uniform_n_avx2_start_64: movq (%rdx), %rcx pdepq %r15, %rcx, %rcx cmpw $0xd01, %cx - jge L_kyber_rej_uniform_0_avx2_rej_large_0 + jge L_mlkem_rej_uniform_0_avx2_rej_large_0 movw %cx, (%rdi) addq $2, %rdi subl $0x01, %esi - je L_kyber_rej_uniform_n_avx2_done_64 -L_kyber_rej_uniform_0_avx2_rej_large_0: + je L_mlkem_rej_uniform_n_avx2_done_64 +L_mlkem_rej_uniform_0_avx2_rej_large_0: shrq $16, %rcx cmpw $0xd01, %cx - jge L_kyber_rej_uniform_0_avx2_rej_large_1 + jge L_mlkem_rej_uniform_0_avx2_rej_large_1 movw %cx, (%rdi) addq $2, %rdi subl $0x01, %esi - je L_kyber_rej_uniform_n_avx2_done_64 -L_kyber_rej_uniform_0_avx2_rej_large_1: + je L_mlkem_rej_uniform_n_avx2_done_64 +L_mlkem_rej_uniform_0_avx2_rej_large_1: shrq $16, %rcx cmpw $0xd01, %cx - jge L_kyber_rej_uniform_0_avx2_rej_large_2 + jge L_mlkem_rej_uniform_0_avx2_rej_large_2 movw %cx, (%rdi) addq $2, %rdi subl $0x01, %esi - je L_kyber_rej_uniform_n_avx2_done_64 -L_kyber_rej_uniform_0_avx2_rej_large_2: + je L_mlkem_rej_uniform_n_avx2_done_64 +L_mlkem_rej_uniform_0_avx2_rej_large_2: shrq $16, %rcx cmpw $0xd01, %cx - jge L_kyber_rej_uniform_0_avx2_rej_large_3 + jge L_mlkem_rej_uniform_0_avx2_rej_large_3 movw %cx, (%rdi) addq $2, %rdi subl $0x01, %esi - je L_kyber_rej_uniform_n_avx2_done_64 -L_kyber_rej_uniform_0_avx2_rej_large_3: + je L_mlkem_rej_uniform_n_avx2_done_64 +L_mlkem_rej_uniform_0_avx2_rej_large_3: addq $6, %rdx subl $6, %r8d - jle L_kyber_rej_uniform_n_avx2_done_64 + jle L_mlkem_rej_uniform_n_avx2_done_64 cmpl $0x00, %esi - jg L_kyber_rej_uniform_n_avx2_start_64 -L_kyber_rej_uniform_n_avx2_done_64: + jg L_mlkem_rej_uniform_n_avx2_start_64 +L_mlkem_rej_uniform_n_avx2_done_64: vzeroupper subl %esi, %eax popq %rbp @@ -12777,19 +12777,19 @@ L_kyber_rej_uniform_n_avx2_done_64: popq %rbx repz retq #ifndef __APPLE__ -.size kyber_rej_uniform_n_avx2,.-kyber_rej_uniform_n_avx2 +.size mlkem_rej_uniform_n_avx2,.-mlkem_rej_uniform_n_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_rej_uniform_avx2 -.type kyber_rej_uniform_avx2,@function +.globl mlkem_rej_uniform_avx2 +.type mlkem_rej_uniform_avx2,@function .align 16 -kyber_rej_uniform_avx2: +mlkem_rej_uniform_avx2: #else .section __TEXT,__text -.globl _kyber_rej_uniform_avx2 +.globl _mlkem_rej_uniform_avx2 .p2align 4 -_kyber_rej_uniform_avx2: +_mlkem_rej_uniform_avx2: #endif /* __APPLE__ */ pushq %rbx pushq %r12 @@ -12800,19 +12800,19 @@ _kyber_rej_uniform_avx2: movq %rcx, %r8 movl %esi, %eax cmpl $0x00, %esi - je L_kyber_rej_uniform_avx2_done_64 + je L_mlkem_rej_uniform_avx2_done_64 cmpl $8, %esi - jl L_kyber_rej_uniform_avx2_done_128 - vmovdqu L_kyber_rej_q(%rip), %ymm6 - vmovdqu L_kyber_rej_ones(%rip), %ymm7 - vmovdqu L_kyber_rej_mask(%rip), %ymm8 - vmovdqu L_kyber_rej_shuffle(%rip), %ymm9 - leaq L_kyber_rej_idx(%rip), %r9 + jl L_mlkem_rej_uniform_avx2_done_128 + vmovdqu L_mlkem_rej_q(%rip), %ymm6 + vmovdqu L_mlkem_rej_ones(%rip), %ymm7 + vmovdqu L_mlkem_rej_mask(%rip), %ymm8 + vmovdqu L_mlkem_rej_shuffle(%rip), %ymm9 + leaq L_mlkem_rej_idx(%rip), %r9 movq $0x1111111111111111, %r14 movq $0xe0c0a0806040200, %rbp movq $0x101010101010101, %r13 cmpl $32, %esi - jl L_kyber_rej_uniform_avx2_done_256 + jl L_mlkem_rej_uniform_avx2_done_256 vpermq $0x94, (%rdx), %ymm0 vpermq $0x94, 24(%rdx), %ymm1 vpshufb %ymm9, %ymm0, %ymm0 @@ -12877,7 +12877,7 @@ _kyber_rej_uniform_avx2: addq $48, %rdx subl $48, %r8d cmpl $32, %esi - jl L_kyber_rej_uniform_avx2_done_256 + jl L_mlkem_rej_uniform_avx2_done_256 vpermq $0x94, (%rdx), %ymm0 vpermq $0x94, 24(%rdx), %ymm1 vpshufb %ymm9, %ymm0, %ymm0 @@ -12942,8 +12942,8 @@ _kyber_rej_uniform_avx2: addq $48, %rdx subl $48, %r8d cmpl $32, %esi - jl L_kyber_rej_uniform_avx2_done_256 -L_kyber_rej_uniform_avx2_start_256: + jl L_mlkem_rej_uniform_avx2_done_256 +L_mlkem_rej_uniform_avx2_start_256: vpermq $0x94, (%rdx), %ymm0 vpermq $0x94, 24(%rdx), %ymm1 vpshufb %ymm9, %ymm0, %ymm0 @@ -13008,15 +13008,15 @@ L_kyber_rej_uniform_avx2_start_256: addq $48, %rdx subl $48, %r8d cmpl $48, %r8d - jl L_kyber_rej_uniform_avx2_done_256 + jl L_mlkem_rej_uniform_avx2_done_256 cmpl $32, %esi - jge L_kyber_rej_uniform_avx2_start_256 -L_kyber_rej_uniform_avx2_done_256: + jge L_mlkem_rej_uniform_avx2_start_256 +L_mlkem_rej_uniform_avx2_done_256: cmpl $8, %esi - jl L_kyber_rej_uniform_avx2_done_128 + jl L_mlkem_rej_uniform_avx2_done_128 cmpl $12, %r8d - jl L_kyber_rej_uniform_avx2_done_128 -L_kyber_rej_uniform_avx2_start_128: + jl L_mlkem_rej_uniform_avx2_done_128 +L_mlkem_rej_uniform_avx2_start_128: vmovdqu (%rdx), %xmm0 vpshufb %xmm9, %xmm0, %xmm0 vpsrlw $4, %xmm0, %xmm2 @@ -13037,58 +13037,58 @@ L_kyber_rej_uniform_avx2_start_128: addq $12, %rdx subl $12, %r8d cmpl $12, %r8d - jl L_kyber_rej_uniform_avx2_done_128 + jl L_mlkem_rej_uniform_avx2_done_128 cmpl $8, %esi - jge L_kyber_rej_uniform_avx2_start_128 -L_kyber_rej_uniform_avx2_done_128: + jge L_mlkem_rej_uniform_avx2_start_128 +L_mlkem_rej_uniform_avx2_done_128: cmpl $0x00, %r8d - je L_kyber_rej_uniform_avx2_done_64 + je L_mlkem_rej_uniform_avx2_done_64 cmpl $0x00, %esi - je L_kyber_rej_uniform_avx2_done_64 + je L_mlkem_rej_uniform_avx2_done_64 movq $0xfff0fff0fff0fff, %r15 movq $0x2000200020002000, %r10 movq $0xd010d010d010d01, %r11 movq $0x1000100010001000, %r12 -L_kyber_rej_uniform_avx2_start_64: +L_mlkem_rej_uniform_avx2_start_64: movq (%rdx), %rcx pdepq %r15, %rcx, %rcx cmpw $0xd01, %cx - jge L_kyber_rej_uniform_avx2_rej_large_0 + jge L_mlkem_rej_uniform_avx2_rej_large_0 movw %cx, (%rdi) addq $2, %rdi subl $0x01, %esi - je L_kyber_rej_uniform_avx2_done_64 -L_kyber_rej_uniform_avx2_rej_large_0: + je L_mlkem_rej_uniform_avx2_done_64 +L_mlkem_rej_uniform_avx2_rej_large_0: shrq $16, %rcx cmpw $0xd01, %cx - jge L_kyber_rej_uniform_avx2_rej_large_1 + jge L_mlkem_rej_uniform_avx2_rej_large_1 movw %cx, (%rdi) addq $2, %rdi subl $0x01, %esi - je L_kyber_rej_uniform_avx2_done_64 -L_kyber_rej_uniform_avx2_rej_large_1: + je L_mlkem_rej_uniform_avx2_done_64 +L_mlkem_rej_uniform_avx2_rej_large_1: shrq $16, %rcx cmpw $0xd01, %cx - jge L_kyber_rej_uniform_avx2_rej_large_2 + jge L_mlkem_rej_uniform_avx2_rej_large_2 movw %cx, (%rdi) addq $2, %rdi subl $0x01, %esi - je L_kyber_rej_uniform_avx2_done_64 -L_kyber_rej_uniform_avx2_rej_large_2: + je L_mlkem_rej_uniform_avx2_done_64 +L_mlkem_rej_uniform_avx2_rej_large_2: shrq $16, %rcx cmpw $0xd01, %cx - jge L_kyber_rej_uniform_avx2_rej_large_3 + jge L_mlkem_rej_uniform_avx2_rej_large_3 movw %cx, (%rdi) addq $2, %rdi subl $0x01, %esi - je L_kyber_rej_uniform_avx2_done_64 -L_kyber_rej_uniform_avx2_rej_large_3: + je L_mlkem_rej_uniform_avx2_done_64 +L_mlkem_rej_uniform_avx2_rej_large_3: addq $6, %rdx subl $6, %r8d - jle L_kyber_rej_uniform_avx2_done_64 + jle L_mlkem_rej_uniform_avx2_done_64 cmpl $0x00, %esi - jg L_kyber_rej_uniform_avx2_start_64 -L_kyber_rej_uniform_avx2_done_64: + jg L_mlkem_rej_uniform_avx2_start_64 +L_mlkem_rej_uniform_avx2_done_64: vzeroupper subl %esi, %eax popq %rbp @@ -13099,7 +13099,7 @@ L_kyber_rej_uniform_avx2_done_64: popq %rbx repz retq #ifndef __APPLE__ -.size kyber_rej_uniform_avx2,.-kyber_rej_uniform_avx2 +.size mlkem_rej_uniform_avx2,.-mlkem_rej_uniform_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -13111,7 +13111,7 @@ L_kyber_rej_uniform_avx2_done_64: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_249: +L_mlkem_mask_249: .quad 0x24924900249249, 0x24924900249249 .quad 0x24924900249249, 0x24924900249249 #ifndef __APPLE__ @@ -13124,7 +13124,7 @@ L_kyber_mask_249: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_6db: +L_mlkem_mask_6db: .quad 0x6db6db006db6db, 0x6db6db006db6db .quad 0x6db6db006db6db, 0x6db6db006db6db #ifndef __APPLE__ @@ -13137,7 +13137,7 @@ L_kyber_mask_6db: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_07: +L_mlkem_mask_07: .quad 0x700000007, 0x700000007 .quad 0x700000007, 0x700000007 #ifndef __APPLE__ @@ -13150,7 +13150,7 @@ L_kyber_mask_07: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_70: +L_mlkem_mask_70: .quad 0x7000000070000, 0x7000000070000 .quad 0x7000000070000, 0x7000000070000 #ifndef __APPLE__ @@ -13163,7 +13163,7 @@ L_kyber_mask_70: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_3: +L_mlkem_mask_3: .quad 0x3000300030003, 0x3000300030003 .quad 0x3000300030003, 0x3000300030003 #ifndef __APPLE__ @@ -13176,27 +13176,27 @@ L_kyber_mask_3: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_shuff: +L_mlkem_shuff: .quad 0xff050403ff020100, 0xff0b0a09ff080706 .quad 0xff090807ff060504, 0xff0f0e0dff0c0b0a #ifndef __APPLE__ .text -.globl kyber_cbd_eta3_avx2 -.type kyber_cbd_eta3_avx2,@function +.globl mlkem_cbd_eta3_avx2 +.type mlkem_cbd_eta3_avx2,@function .align 16 -kyber_cbd_eta3_avx2: +mlkem_cbd_eta3_avx2: #else .section __TEXT,__text -.globl _kyber_cbd_eta3_avx2 +.globl _mlkem_cbd_eta3_avx2 .p2align 4 -_kyber_cbd_eta3_avx2: +_mlkem_cbd_eta3_avx2: #endif /* __APPLE__ */ - vmovdqu L_kyber_mask_249(%rip), %ymm8 - vmovdqu L_kyber_mask_6db(%rip), %ymm9 - vmovdqu L_kyber_mask_07(%rip), %ymm10 - vmovdqu L_kyber_mask_70(%rip), %ymm11 - vmovdqu L_kyber_mask_3(%rip), %ymm12 - vmovdqu L_kyber_shuff(%rip), %ymm13 + vmovdqu L_mlkem_mask_249(%rip), %ymm8 + vmovdqu L_mlkem_mask_6db(%rip), %ymm9 + vmovdqu L_mlkem_mask_07(%rip), %ymm10 + vmovdqu L_mlkem_mask_70(%rip), %ymm11 + vmovdqu L_mlkem_mask_3(%rip), %ymm12 + vmovdqu L_mlkem_shuff(%rip), %ymm13 vmovdqu (%rsi), %ymm0 vmovdqu 24(%rsi), %ymm1 vpermq $0x94, %ymm0, %ymm0 @@ -13440,7 +13440,7 @@ _kyber_cbd_eta3_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_cbd_eta3_avx2,.-kyber_cbd_eta3_avx2 +.size mlkem_cbd_eta3_avx2,.-mlkem_cbd_eta3_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -13452,7 +13452,7 @@ _kyber_cbd_eta3_avx2: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_55: +L_mlkem_mask_55: .quad 0x5555555555555555, 0x5555555555555555 .quad 0x5555555555555555, 0x5555555555555555 #ifndef __APPLE__ @@ -13465,7 +13465,7 @@ L_kyber_mask_55: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_33: +L_mlkem_mask_33: .quad 0x3333333333333333, 0x3333333333333333 .quad 0x3333333333333333, 0x3333333333333333 #ifndef __APPLE__ @@ -13478,7 +13478,7 @@ L_kyber_mask_33: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_03: +L_mlkem_mask_03: .quad 0x303030303030303, 0x303030303030303 .quad 0x303030303030303, 0x303030303030303 #ifndef __APPLE__ @@ -13491,25 +13491,25 @@ L_kyber_mask_03: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_mask_0f: +L_mlkem_mask_0f: .quad 0xf0f0f0f0f0f0f0f, 0xf0f0f0f0f0f0f0f .quad 0xf0f0f0f0f0f0f0f, 0xf0f0f0f0f0f0f0f #ifndef __APPLE__ .text -.globl kyber_cbd_eta2_avx2 -.type kyber_cbd_eta2_avx2,@function +.globl mlkem_cbd_eta2_avx2 +.type mlkem_cbd_eta2_avx2,@function .align 16 -kyber_cbd_eta2_avx2: +mlkem_cbd_eta2_avx2: #else .section __TEXT,__text -.globl _kyber_cbd_eta2_avx2 +.globl _mlkem_cbd_eta2_avx2 .p2align 4 -_kyber_cbd_eta2_avx2: +_mlkem_cbd_eta2_avx2: #endif /* __APPLE__ */ - vmovdqu L_kyber_mask_55(%rip), %ymm8 - vmovdqu L_kyber_mask_33(%rip), %ymm9 - vmovdqu L_kyber_mask_03(%rip), %ymm10 - vmovdqu L_kyber_mask_0f(%rip), %ymm11 + vmovdqu L_mlkem_mask_55(%rip), %ymm8 + vmovdqu L_mlkem_mask_33(%rip), %ymm9 + vmovdqu L_mlkem_mask_03(%rip), %ymm10 + vmovdqu L_mlkem_mask_0f(%rip), %ymm11 vmovdqu (%rsi), %ymm0 vmovdqu 32(%rsi), %ymm1 vpsrlw $0x01, %ymm0, %ymm2 @@ -13621,7 +13621,7 @@ _kyber_cbd_eta2_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_cbd_eta2_avx2,.-kyber_cbd_eta2_avx2 +.size mlkem_cbd_eta2_avx2,.-mlkem_cbd_eta2_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -13633,7 +13633,7 @@ _kyber_cbd_eta2_avx2: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_10_avx2_mask: +L_mlkem_compress_10_avx2_mask: .value 0x3ff,0x3ff .value 0x3ff,0x3ff .value 0x3ff,0x3ff @@ -13652,7 +13652,7 @@ L_kyber_compress_10_avx2_mask: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_compress_10_avx2_shift: +L_mlkem_compress_10_avx2_shift: .quad 0x400000104000001, 0x400000104000001 .quad 0x400000104000001, 0x400000104000001 #ifndef __APPLE__ @@ -13665,7 +13665,7 @@ L_kyber_compress_10_avx2_shift: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_compress_10_avx2_shlv: +L_mlkem_compress_10_avx2_shlv: .quad 0xc, 0xc .quad 0xc, 0xc #ifndef __APPLE__ @@ -13678,7 +13678,7 @@ L_kyber_compress_10_avx2_shlv: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_10_avx2_shuf: +L_mlkem_compress_10_avx2_shuf: .value 0x100,0x302 .value 0x804,0xa09 .value 0xc0b,0xffff @@ -13697,7 +13697,7 @@ L_kyber_compress_10_avx2_shuf: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_10_avx2_v: +L_mlkem_compress_10_avx2_v: .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf @@ -13716,7 +13716,7 @@ L_kyber_compress_10_avx2_v: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_10_avx2_offset: +L_mlkem_compress_10_avx2_offset: .value 0xf,0xf .value 0xf,0xf .value 0xf,0xf @@ -13735,7 +13735,7 @@ L_kyber_compress_10_avx2_offset: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_10_avx2_shift12: +L_mlkem_compress_10_avx2_shift12: .value 0x1000,0x1000 .value 0x1000,0x1000 .value 0x1000,0x1000 @@ -13746,26 +13746,26 @@ L_kyber_compress_10_avx2_shift12: .value 0x1000,0x1000 #ifndef __APPLE__ .text -.globl kyber_compress_10_avx2 -.type kyber_compress_10_avx2,@function +.globl mlkem_compress_10_avx2 +.type mlkem_compress_10_avx2,@function .align 16 -kyber_compress_10_avx2: +mlkem_compress_10_avx2: #else .section __TEXT,__text -.globl _kyber_compress_10_avx2 +.globl _mlkem_compress_10_avx2 .p2align 4 -_kyber_compress_10_avx2: +_mlkem_compress_10_avx2: #endif /* __APPLE__ */ vmovdqu (%rsi), %ymm0 - vmovdqu L_kyber_compress_10_avx2_mask(%rip), %ymm9 - vmovdqu L_kyber_compress_10_avx2_shift(%rip), %ymm8 - vmovdqu L_kyber_compress_10_avx2_shlv(%rip), %ymm10 - vmovdqu L_kyber_compress_10_avx2_shuf(%rip), %ymm11 - vmovdqu L_kyber_compress_10_avx2_v(%rip), %ymm6 - vmovdqu L_kyber_compress_10_avx2_offset(%rip), %ymm12 - vmovdqu L_kyber_compress_10_avx2_shift12(%rip), %ymm13 + vmovdqu L_mlkem_compress_10_avx2_mask(%rip), %ymm9 + vmovdqu L_mlkem_compress_10_avx2_shift(%rip), %ymm8 + vmovdqu L_mlkem_compress_10_avx2_shlv(%rip), %ymm10 + vmovdqu L_mlkem_compress_10_avx2_shuf(%rip), %ymm11 + vmovdqu L_mlkem_compress_10_avx2_v(%rip), %ymm6 + vmovdqu L_mlkem_compress_10_avx2_offset(%rip), %ymm12 + vmovdqu L_mlkem_compress_10_avx2_shift12(%rip), %ymm13 vpsllw $3, %ymm6, %ymm7 -L_kyber_compress_10_avx2_start: +L_mlkem_compress_10_avx2_start: vmovdqu (%rsi), %ymm0 vmovdqu 32(%rsi), %ymm1 vpmullw %ymm7, %ymm0, %ymm2 @@ -14073,18 +14073,18 @@ L_kyber_compress_10_avx2_start: addq $0x140, %rdi addq $0x200, %rsi subl $0x01, %edx - jg L_kyber_compress_10_avx2_start + jg L_mlkem_compress_10_avx2_start vzeroupper repz retq #ifndef __APPLE__ -.size kyber_compress_10_avx2,.-kyber_compress_10_avx2 +.size mlkem_compress_10_avx2,.-mlkem_compress_10_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_decompress_10_avx2_mask: +L_mlkem_decompress_10_avx2_mask: .long 0x7fe01ff8,0x7fe01ff8,0x7fe01ff8,0x7fe01ff8 .long 0x7fe01ff8,0x7fe01ff8,0x7fe01ff8,0x7fe01ff8 #ifndef __APPLE__ @@ -14097,7 +14097,7 @@ L_kyber_decompress_10_avx2_mask: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_decompress_10_avx2_sllv: +L_mlkem_decompress_10_avx2_sllv: .quad 0x4, 0x4 .quad 0x4, 0x4 #ifndef __APPLE__ @@ -14105,7 +14105,7 @@ L_kyber_decompress_10_avx2_sllv: #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_decompress_10_avx2_q: +L_mlkem_decompress_10_avx2_q: .long 0xd013404,0xd013404,0xd013404,0xd013404 .long 0xd013404,0xd013404,0xd013404,0xd013404 #ifndef __APPLE__ @@ -14118,7 +14118,7 @@ L_kyber_decompress_10_avx2_q: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_10_avx2_shuf: +L_mlkem_decompress_10_avx2_shuf: .value 0x100,0x201 .value 0x302,0x403 .value 0x605,0x706 @@ -14129,21 +14129,21 @@ L_kyber_decompress_10_avx2_shuf: .value 0xa09,0xb0a #ifndef __APPLE__ .text -.globl kyber_decompress_10_avx2 -.type kyber_decompress_10_avx2,@function +.globl mlkem_decompress_10_avx2 +.type mlkem_decompress_10_avx2,@function .align 16 -kyber_decompress_10_avx2: +mlkem_decompress_10_avx2: #else .section __TEXT,__text -.globl _kyber_decompress_10_avx2 +.globl _mlkem_decompress_10_avx2 .p2align 4 -_kyber_decompress_10_avx2: +_mlkem_decompress_10_avx2: #endif /* __APPLE__ */ - vmovdqu L_kyber_decompress_10_avx2_mask(%rip), %ymm4 - vmovdqu L_kyber_decompress_10_avx2_q(%rip), %ymm5 - vmovdqu L_kyber_decompress_10_avx2_shuf(%rip), %ymm6 - vmovdqu L_kyber_decompress_10_avx2_sllv(%rip), %ymm7 -L_kyber_decompress_10_avx2_start: + vmovdqu L_mlkem_decompress_10_avx2_mask(%rip), %ymm4 + vmovdqu L_mlkem_decompress_10_avx2_q(%rip), %ymm5 + vmovdqu L_mlkem_decompress_10_avx2_shuf(%rip), %ymm6 + vmovdqu L_mlkem_decompress_10_avx2_sllv(%rip), %ymm7 +L_mlkem_decompress_10_avx2_start: vpermq $0x94, (%rsi), %ymm0 vpermq $0x94, 20(%rsi), %ymm1 vpermq $0x94, 40(%rsi), %ymm2 @@ -14259,11 +14259,11 @@ L_kyber_decompress_10_avx2_start: addq $0x140, %rsi addq $0x200, %rdi subl $0x01, %edx - jg L_kyber_decompress_10_avx2_start + jg L_mlkem_decompress_10_avx2_start vzeroupper repz retq #ifndef __APPLE__ -.size kyber_decompress_10_avx2,.-kyber_decompress_10_avx2 +.size mlkem_decompress_10_avx2,.-mlkem_decompress_10_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -14275,7 +14275,7 @@ L_kyber_decompress_10_avx2_start: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_11_avx2_v: +L_mlkem_compress_11_avx2_v: .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf @@ -14294,7 +14294,7 @@ L_kyber_compress_11_avx2_v: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_11_avx2_off: +L_mlkem_compress_11_avx2_off: .value 0x24,0x24 .value 0x24,0x24 .value 0x24,0x24 @@ -14313,7 +14313,7 @@ L_kyber_compress_11_avx2_off: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_11_avx2_shift13: +L_mlkem_compress_11_avx2_shift13: .value 0x2000,0x2000 .value 0x2000,0x2000 .value 0x2000,0x2000 @@ -14332,7 +14332,7 @@ L_kyber_compress_11_avx2_shift13: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_11_avx2_mask: +L_mlkem_compress_11_avx2_mask: .value 0x7ff,0x7ff .value 0x7ff,0x7ff .value 0x7ff,0x7ff @@ -14351,7 +14351,7 @@ L_kyber_compress_11_avx2_mask: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_compress_11_avx2_shift: +L_mlkem_compress_11_avx2_shift: .quad 0x800000108000001, 0x800000108000001 .quad 0x800000108000001, 0x800000108000001 #ifndef __APPLE__ @@ -14359,7 +14359,7 @@ L_kyber_compress_11_avx2_shift: #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_compress_11_avx2_sllvd: +L_mlkem_compress_11_avx2_sllvd: .long 0xa,0x0,0xa,0x0 .long 0xa,0x0,0xa,0x0 #ifndef __APPLE__ @@ -14372,7 +14372,7 @@ L_kyber_compress_11_avx2_sllvd: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_compress_11_avx2_srlvq: +L_mlkem_compress_11_avx2_srlvq: .quad 0xa, 0x1e .quad 0xa, 0x1e #ifndef __APPLE__ @@ -14385,7 +14385,7 @@ L_kyber_compress_11_avx2_srlvq: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_11_avx2_shuf: +L_mlkem_compress_11_avx2_shuf: .value 0x100,0x302 .value 0x504,0x706 .value 0x908,0xff0a @@ -14396,27 +14396,27 @@ L_kyber_compress_11_avx2_shuf: .value 0x201,0x403 #ifndef __APPLE__ .text -.globl kyber_compress_11_avx2 -.type kyber_compress_11_avx2,@function +.globl mlkem_compress_11_avx2 +.type mlkem_compress_11_avx2,@function .align 16 -kyber_compress_11_avx2: +mlkem_compress_11_avx2: #else .section __TEXT,__text -.globl _kyber_compress_11_avx2 +.globl _mlkem_compress_11_avx2 .p2align 4 -_kyber_compress_11_avx2: +_mlkem_compress_11_avx2: #endif /* __APPLE__ */ vmovdqu (%rsi), %ymm0 - vmovdqu L_kyber_compress_11_avx2_v(%rip), %ymm7 - vmovdqu L_kyber_compress_11_avx2_off(%rip), %ymm8 - vmovdqu L_kyber_compress_11_avx2_shift13(%rip), %ymm9 - vmovdqu L_kyber_compress_11_avx2_mask(%rip), %ymm10 - vmovdqu L_kyber_compress_11_avx2_shift(%rip), %ymm11 - vmovdqu L_kyber_compress_11_avx2_sllvd(%rip), %ymm12 - vmovdqu L_kyber_compress_11_avx2_srlvq(%rip), %ymm13 - vmovdqu L_kyber_compress_11_avx2_shuf(%rip), %ymm14 + vmovdqu L_mlkem_compress_11_avx2_v(%rip), %ymm7 + vmovdqu L_mlkem_compress_11_avx2_off(%rip), %ymm8 + vmovdqu L_mlkem_compress_11_avx2_shift13(%rip), %ymm9 + vmovdqu L_mlkem_compress_11_avx2_mask(%rip), %ymm10 + vmovdqu L_mlkem_compress_11_avx2_shift(%rip), %ymm11 + vmovdqu L_mlkem_compress_11_avx2_sllvd(%rip), %ymm12 + vmovdqu L_mlkem_compress_11_avx2_srlvq(%rip), %ymm13 + vmovdqu L_mlkem_compress_11_avx2_shuf(%rip), %ymm14 vpsllw $3, %ymm7, %ymm6 -L_kyber_compress_11_avx2_start: +L_mlkem_compress_11_avx2_start: vmovdqu (%rsi), %ymm0 vmovdqu 32(%rsi), %ymm3 vpmullw %ymm6, %ymm0, %ymm1 @@ -14772,11 +14772,11 @@ L_kyber_compress_11_avx2_start: addq $0x160, %rdi addq $0x200, %rsi subl $0x01, %edx - jg L_kyber_compress_11_avx2_start + jg L_mlkem_compress_11_avx2_start vzeroupper repz retq #ifndef __APPLE__ -.size kyber_compress_11_avx2,.-kyber_compress_11_avx2 +.size mlkem_compress_11_avx2,.-mlkem_compress_11_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -14788,7 +14788,7 @@ L_kyber_compress_11_avx2_start: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_11_avx2_q: +L_mlkem_decompress_11_avx2_q: .value 0xd01,0xd01 .value 0xd01,0xd01 .value 0xd01,0xd01 @@ -14807,7 +14807,7 @@ L_kyber_decompress_11_avx2_q: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_11_avx2_shuf: +L_mlkem_decompress_11_avx2_shuf: .value 0x100,0x201 .value 0x302,0x504 .value 0x605,0x706 @@ -14821,7 +14821,7 @@ L_kyber_decompress_11_avx2_shuf: #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_decompress_11_avx2_sllv: +L_mlkem_decompress_11_avx2_sllv: .long 0x0,0x1,0x0,0x0 .long 0x0,0x1,0x0,0x0 #ifndef __APPLE__ @@ -14834,7 +14834,7 @@ L_kyber_decompress_11_avx2_sllv: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_decompress_11_avx2_srlv: +L_mlkem_decompress_11_avx2_srlv: .quad 0x0, 0x2 .quad 0x0, 0x2 #ifndef __APPLE__ @@ -14847,7 +14847,7 @@ L_kyber_decompress_11_avx2_srlv: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_11_avx2_shift: +L_mlkem_decompress_11_avx2_shift: .value 0x20,0x4 .value 0x1,0x20 .value 0x8,0x1 @@ -14866,7 +14866,7 @@ L_kyber_decompress_11_avx2_shift: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_11_avx2_mask: +L_mlkem_decompress_11_avx2_mask: .value 0x7ff0,0x7ff0 .value 0x7ff0,0x7ff0 .value 0x7ff0,0x7ff0 @@ -14877,23 +14877,23 @@ L_kyber_decompress_11_avx2_mask: .value 0x7ff0,0x7ff0 #ifndef __APPLE__ .text -.globl kyber_decompress_11_avx2 -.type kyber_decompress_11_avx2,@function +.globl mlkem_decompress_11_avx2 +.type mlkem_decompress_11_avx2,@function .align 16 -kyber_decompress_11_avx2: +mlkem_decompress_11_avx2: #else .section __TEXT,__text -.globl _kyber_decompress_11_avx2 +.globl _mlkem_decompress_11_avx2 .p2align 4 -_kyber_decompress_11_avx2: +_mlkem_decompress_11_avx2: #endif /* __APPLE__ */ - vmovdqu L_kyber_decompress_11_avx2_q(%rip), %ymm4 - vmovdqu L_kyber_decompress_11_avx2_shuf(%rip), %ymm5 - vmovdqu L_kyber_decompress_11_avx2_sllv(%rip), %ymm6 - vmovdqu L_kyber_decompress_11_avx2_srlv(%rip), %ymm7 - vmovdqu L_kyber_decompress_11_avx2_shift(%rip), %ymm8 - vmovdqu L_kyber_decompress_11_avx2_mask(%rip), %ymm9 -L_kyber_decompress_11_avx2_start: + vmovdqu L_mlkem_decompress_11_avx2_q(%rip), %ymm4 + vmovdqu L_mlkem_decompress_11_avx2_shuf(%rip), %ymm5 + vmovdqu L_mlkem_decompress_11_avx2_sllv(%rip), %ymm6 + vmovdqu L_mlkem_decompress_11_avx2_srlv(%rip), %ymm7 + vmovdqu L_mlkem_decompress_11_avx2_shift(%rip), %ymm8 + vmovdqu L_mlkem_decompress_11_avx2_mask(%rip), %ymm9 +L_mlkem_decompress_11_avx2_start: vpermq $0x94, (%rsi), %ymm0 vpermq $0x94, 22(%rsi), %ymm1 vpermq $0x94, 44(%rsi), %ymm2 @@ -15041,11 +15041,11 @@ L_kyber_decompress_11_avx2_start: addq $0x160, %rsi addq $0x200, %rdi subl $0x01, %edx - jg L_kyber_decompress_11_avx2_start + jg L_mlkem_decompress_11_avx2_start vzeroupper repz retq #ifndef __APPLE__ -.size kyber_decompress_11_avx2,.-kyber_decompress_11_avx2 +.size mlkem_decompress_11_avx2,.-mlkem_decompress_11_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -15057,7 +15057,7 @@ L_kyber_decompress_11_avx2_start: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_4_avx2_mask: +L_mlkem_compress_4_avx2_mask: .value 0xf,0xf .value 0xf,0xf .value 0xf,0xf @@ -15076,7 +15076,7 @@ L_kyber_compress_4_avx2_mask: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_4_avx2_shift: +L_mlkem_compress_4_avx2_shift: .value 0x200,0x200 .value 0x200,0x200 .value 0x200,0x200 @@ -15090,7 +15090,7 @@ L_kyber_compress_4_avx2_shift: #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_compress_4_avx2_perm: +L_mlkem_compress_4_avx2_perm: .long 0x0,0x4,0x1,0x5 .long 0x2,0x6,0x3,0x7 #ifndef __APPLE__ @@ -15103,7 +15103,7 @@ L_kyber_compress_4_avx2_perm: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_4_avx2_v: +L_mlkem_compress_4_avx2_v: .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf @@ -15122,7 +15122,7 @@ L_kyber_compress_4_avx2_v: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_4_avx2_shift12: +L_mlkem_compress_4_avx2_shift12: .value 0x1001,0x1001 .value 0x1001,0x1001 .value 0x1001,0x1001 @@ -15133,21 +15133,21 @@ L_kyber_compress_4_avx2_shift12: .value 0x1001,0x1001 #ifndef __APPLE__ .text -.globl kyber_compress_4_avx2 -.type kyber_compress_4_avx2,@function +.globl mlkem_compress_4_avx2 +.type mlkem_compress_4_avx2,@function .align 16 -kyber_compress_4_avx2: +mlkem_compress_4_avx2: #else .section __TEXT,__text -.globl _kyber_compress_4_avx2 +.globl _mlkem_compress_4_avx2 .p2align 4 -_kyber_compress_4_avx2: +_mlkem_compress_4_avx2: #endif /* __APPLE__ */ - vmovdqu L_kyber_compress_4_avx2_mask(%rip), %ymm8 - vmovdqu L_kyber_compress_4_avx2_shift(%rip), %ymm9 - vmovdqu L_kyber_compress_4_avx2_perm(%rip), %ymm10 - vmovdqu L_kyber_compress_4_avx2_v(%rip), %ymm11 - vmovdqu L_kyber_compress_4_avx2_shift12(%rip), %ymm12 + vmovdqu L_mlkem_compress_4_avx2_mask(%rip), %ymm8 + vmovdqu L_mlkem_compress_4_avx2_shift(%rip), %ymm9 + vmovdqu L_mlkem_compress_4_avx2_perm(%rip), %ymm10 + vmovdqu L_mlkem_compress_4_avx2_v(%rip), %ymm11 + vmovdqu L_mlkem_compress_4_avx2_shift12(%rip), %ymm12 vpmulhw (%rsi), %ymm11, %ymm0 vpmulhw 32(%rsi), %ymm11, %ymm1 vpmulhw 64(%rsi), %ymm11, %ymm2 @@ -15227,14 +15227,14 @@ _kyber_compress_4_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_compress_4_avx2,.-kyber_compress_4_avx2 +.size mlkem_compress_4_avx2,.-mlkem_compress_4_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_decompress_4_avx2_mask: +L_mlkem_decompress_4_avx2_mask: .long 0xf0000f,0xf0000f,0xf0000f,0xf0000f .long 0xf0000f,0xf0000f,0xf0000f,0xf0000f #ifndef __APPLE__ @@ -15242,7 +15242,7 @@ L_kyber_decompress_4_avx2_mask: #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_decompress_4_avx2_shift: +L_mlkem_decompress_4_avx2_shift: .long 0x800800,0x800800,0x800800,0x800800 .long 0x800800,0x800800,0x800800,0x800800 #ifndef __APPLE__ @@ -15255,7 +15255,7 @@ L_kyber_decompress_4_avx2_shift: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_4_avx2_q: +L_mlkem_decompress_4_avx2_q: .value 0xd01,0xd01 .value 0xd01,0xd01 .value 0xd01,0xd01 @@ -15274,7 +15274,7 @@ L_kyber_decompress_4_avx2_q: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_4_avx2_shuf: +L_mlkem_decompress_4_avx2_shuf: .value 0x0,0x0 .value 0x101,0x101 .value 0x202,0x202 @@ -15285,20 +15285,20 @@ L_kyber_decompress_4_avx2_shuf: .value 0x707,0x707 #ifndef __APPLE__ .text -.globl kyber_decompress_4_avx2 -.type kyber_decompress_4_avx2,@function +.globl mlkem_decompress_4_avx2 +.type mlkem_decompress_4_avx2,@function .align 16 -kyber_decompress_4_avx2: +mlkem_decompress_4_avx2: #else .section __TEXT,__text -.globl _kyber_decompress_4_avx2 +.globl _mlkem_decompress_4_avx2 .p2align 4 -_kyber_decompress_4_avx2: +_mlkem_decompress_4_avx2: #endif /* __APPLE__ */ - vmovdqu L_kyber_decompress_4_avx2_mask(%rip), %ymm4 - vmovdqu L_kyber_decompress_4_avx2_shift(%rip), %ymm5 - vmovdqu L_kyber_decompress_4_avx2_shuf(%rip), %ymm6 - vmovdqu L_kyber_decompress_4_avx2_q(%rip), %ymm7 + vmovdqu L_mlkem_decompress_4_avx2_mask(%rip), %ymm4 + vmovdqu L_mlkem_decompress_4_avx2_shift(%rip), %ymm5 + vmovdqu L_mlkem_decompress_4_avx2_shuf(%rip), %ymm6 + vmovdqu L_mlkem_decompress_4_avx2_q(%rip), %ymm7 vpbroadcastq (%rsi), %ymm0 vpbroadcastq 8(%rsi), %ymm1 vpbroadcastq 16(%rsi), %ymm2 @@ -15398,7 +15398,7 @@ _kyber_decompress_4_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_decompress_4_avx2,.-kyber_decompress_4_avx2 +.size mlkem_decompress_4_avx2,.-mlkem_decompress_4_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -15410,7 +15410,7 @@ _kyber_decompress_4_avx2: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_5_avx2_v: +L_mlkem_compress_5_avx2_v: .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf .value 0x4ebf,0x4ebf @@ -15429,7 +15429,7 @@ L_kyber_compress_5_avx2_v: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_5_avx2_shift: +L_mlkem_compress_5_avx2_shift: .value 0x400,0x400 .value 0x400,0x400 .value 0x400,0x400 @@ -15448,7 +15448,7 @@ L_kyber_compress_5_avx2_shift: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_5_avx2_mask: +L_mlkem_compress_5_avx2_mask: .value 0x1f,0x1f .value 0x1f,0x1f .value 0x1f,0x1f @@ -15467,7 +15467,7 @@ L_kyber_compress_5_avx2_mask: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_5_avx2_shift1: +L_mlkem_compress_5_avx2_shift1: .value 0x2001,0x2001 .value 0x2001,0x2001 .value 0x2001,0x2001 @@ -15481,7 +15481,7 @@ L_kyber_compress_5_avx2_shift1: #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_compress_5_avx2_shift2: +L_mlkem_compress_5_avx2_shift2: .long 0x4000001,0x4000001,0x4000001,0x4000001 .long 0x4000001,0x4000001,0x4000001,0x4000001 #ifndef __APPLE__ @@ -15494,7 +15494,7 @@ L_kyber_compress_5_avx2_shift2: #else .p2align 5 #endif /* __APPLE__ */ -L_kyber_compress_5_avx2_shlv: +L_mlkem_compress_5_avx2_shlv: .quad 0xc, 0xc .quad 0xc, 0xc #ifndef __APPLE__ @@ -15507,7 +15507,7 @@ L_kyber_compress_5_avx2_shlv: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_compress_5_avx2_shuffle: +L_mlkem_compress_5_avx2_shuffle: .value 0x100,0x302 .value 0xff04,0xffff .value 0xffff,0x908 @@ -15518,24 +15518,24 @@ L_kyber_compress_5_avx2_shuffle: .value 0xffff,0x8ff #ifndef __APPLE__ .text -.globl kyber_compress_5_avx2 -.type kyber_compress_5_avx2,@function +.globl mlkem_compress_5_avx2 +.type mlkem_compress_5_avx2,@function .align 16 -kyber_compress_5_avx2: +mlkem_compress_5_avx2: #else .section __TEXT,__text -.globl _kyber_compress_5_avx2 +.globl _mlkem_compress_5_avx2 .p2align 4 -_kyber_compress_5_avx2: +_mlkem_compress_5_avx2: #endif /* __APPLE__ */ vmovdqu (%rsi), %ymm0 - vmovdqu L_kyber_compress_5_avx2_v(%rip), %ymm2 - vmovdqu L_kyber_compress_5_avx2_shift(%rip), %ymm3 - vmovdqu L_kyber_compress_5_avx2_mask(%rip), %ymm4 - vmovdqu L_kyber_compress_5_avx2_shift1(%rip), %ymm5 - vmovdqu L_kyber_compress_5_avx2_shift2(%rip), %ymm6 - vmovdqu L_kyber_compress_5_avx2_shlv(%rip), %ymm7 - vmovdqu L_kyber_compress_5_avx2_shuffle(%rip), %ymm8 + vmovdqu L_mlkem_compress_5_avx2_v(%rip), %ymm2 + vmovdqu L_mlkem_compress_5_avx2_shift(%rip), %ymm3 + vmovdqu L_mlkem_compress_5_avx2_mask(%rip), %ymm4 + vmovdqu L_mlkem_compress_5_avx2_shift1(%rip), %ymm5 + vmovdqu L_mlkem_compress_5_avx2_shift2(%rip), %ymm6 + vmovdqu L_mlkem_compress_5_avx2_shlv(%rip), %ymm7 + vmovdqu L_mlkem_compress_5_avx2_shuffle(%rip), %ymm8 vpmulhw (%rsi), %ymm2, %ymm0 vpmulhw 32(%rsi), %ymm2, %ymm1 vpmulhrsw %ymm3, %ymm0, %ymm0 @@ -15667,7 +15667,7 @@ _kyber_compress_5_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_compress_5_avx2,.-kyber_compress_5_avx2 +.size mlkem_compress_5_avx2,.-mlkem_compress_5_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -15679,7 +15679,7 @@ _kyber_compress_5_avx2: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_5_avx2_q: +L_mlkem_decompress_5_avx2_q: .value 0xd01,0xd01 .value 0xd01,0xd01 .value 0xd01,0xd01 @@ -15698,7 +15698,7 @@ L_kyber_decompress_5_avx2_q: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_5_avx2_shuf: +L_mlkem_decompress_5_avx2_shuf: .value 0x0,0x100 .value 0x101,0x201 .value 0x302,0x303 @@ -15717,7 +15717,7 @@ L_kyber_decompress_5_avx2_shuf: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_5_avx2_mask: +L_mlkem_decompress_5_avx2_mask: .value 0x1f,0x3e0 .value 0x7c,0xf80 .value 0x1f0,0x3e @@ -15736,7 +15736,7 @@ L_kyber_decompress_5_avx2_mask: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_decompress_5_avx2_shift: +L_mlkem_decompress_5_avx2_shift: .value 0x400,0x20 .value 0x100,0x8 .value 0x40,0x200 @@ -15747,20 +15747,20 @@ L_kyber_decompress_5_avx2_shift: .value 0x10,0x80 #ifndef __APPLE__ .text -.globl kyber_decompress_5_avx2 -.type kyber_decompress_5_avx2,@function +.globl mlkem_decompress_5_avx2 +.type mlkem_decompress_5_avx2,@function .align 16 -kyber_decompress_5_avx2: +mlkem_decompress_5_avx2: #else .section __TEXT,__text -.globl _kyber_decompress_5_avx2 +.globl _mlkem_decompress_5_avx2 .p2align 4 -_kyber_decompress_5_avx2: +_mlkem_decompress_5_avx2: #endif /* __APPLE__ */ - vmovdqu L_kyber_decompress_5_avx2_q(%rip), %ymm1 - vmovdqu L_kyber_decompress_5_avx2_shuf(%rip), %ymm2 - vmovdqu L_kyber_decompress_5_avx2_mask(%rip), %ymm3 - vmovdqu L_kyber_decompress_5_avx2_shift(%rip), %ymm4 + vmovdqu L_mlkem_decompress_5_avx2_q(%rip), %ymm1 + vmovdqu L_mlkem_decompress_5_avx2_shuf(%rip), %ymm2 + vmovdqu L_mlkem_decompress_5_avx2_mask(%rip), %ymm3 + vmovdqu L_mlkem_decompress_5_avx2_shift(%rip), %ymm4 vbroadcasti128 (%rsi), %ymm0 vpshufb %ymm2, %ymm0, %ymm0 vpand %ymm3, %ymm0, %ymm0 @@ -15860,14 +15860,14 @@ _kyber_decompress_5_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_decompress_5_avx2,.-kyber_decompress_5_avx2 +.size mlkem_decompress_5_avx2,.-mlkem_decompress_5_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_from_msg_avx2_shift: +L_mlkem_from_msg_avx2_shift: .long 0x3,0x2,0x1,0x0 .long 0x3,0x2,0x1,0x0 #ifndef __APPLE__ @@ -15880,7 +15880,7 @@ L_kyber_from_msg_avx2_shift: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_from_msg_avx2_shuf: +L_mlkem_from_msg_avx2_shuf: .value 0x100,0x504 .value 0x908,0xd0c .value 0x302,0x706 @@ -15899,7 +15899,7 @@ L_kyber_from_msg_avx2_shuf: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_from_msg_avx2_hqs: +L_mlkem_from_msg_avx2_hqs: .value 0x681,0x681 .value 0x681,0x681 .value 0x681,0x681 @@ -15910,20 +15910,20 @@ L_kyber_from_msg_avx2_hqs: .value 0x681,0x681 #ifndef __APPLE__ .text -.globl kyber_from_msg_avx2 -.type kyber_from_msg_avx2,@function +.globl mlkem_from_msg_avx2 +.type mlkem_from_msg_avx2,@function .align 16 -kyber_from_msg_avx2: +mlkem_from_msg_avx2: #else .section __TEXT,__text -.globl _kyber_from_msg_avx2 +.globl _mlkem_from_msg_avx2 .p2align 4 -_kyber_from_msg_avx2: +_mlkem_from_msg_avx2: #endif /* __APPLE__ */ vmovdqu (%rsi), %ymm0 - vmovdqu L_kyber_from_msg_avx2_shift(%rip), %ymm9 - vmovdqu L_kyber_from_msg_avx2_shuf(%rip), %ymm10 - vmovdqu L_kyber_from_msg_avx2_hqs(%rip), %ymm11 + vmovdqu L_mlkem_from_msg_avx2_shift(%rip), %ymm9 + vmovdqu L_mlkem_from_msg_avx2_shuf(%rip), %ymm10 + vmovdqu L_mlkem_from_msg_avx2_hqs(%rip), %ymm11 vpshufd $0x00, %ymm0, %ymm4 vpsllvd %ymm9, %ymm4, %ymm4 vpshufb %ymm10, %ymm4, %ymm4 @@ -16031,7 +16031,7 @@ _kyber_from_msg_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_from_msg_avx2,.-kyber_from_msg_avx2 +.size mlkem_from_msg_avx2,.-mlkem_from_msg_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -16043,7 +16043,7 @@ _kyber_from_msg_avx2: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_to_msg_avx2_hqs: +L_mlkem_to_msg_avx2_hqs: .value 0x680,0x680 .value 0x680,0x680 .value 0x680,0x680 @@ -16062,7 +16062,7 @@ L_kyber_to_msg_avx2_hqs: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_to_msg_avx2_hhqs: +L_mlkem_to_msg_avx2_hhqs: .value 0xfcc1,0xfcc1 .value 0xfcc1,0xfcc1 .value 0xfcc1,0xfcc1 @@ -16073,18 +16073,18 @@ L_kyber_to_msg_avx2_hhqs: .value 0xfcc1,0xfcc1 #ifndef __APPLE__ .text -.globl kyber_to_msg_avx2 -.type kyber_to_msg_avx2,@function +.globl mlkem_to_msg_avx2 +.type mlkem_to_msg_avx2,@function .align 16 -kyber_to_msg_avx2: +mlkem_to_msg_avx2: #else .section __TEXT,__text -.globl _kyber_to_msg_avx2 +.globl _mlkem_to_msg_avx2 .p2align 4 -_kyber_to_msg_avx2: +_mlkem_to_msg_avx2: #endif /* __APPLE__ */ - vmovdqu L_kyber_to_msg_avx2_hqs(%rip), %ymm8 - vmovdqu L_kyber_to_msg_avx2_hhqs(%rip), %ymm9 + vmovdqu L_mlkem_to_msg_avx2_hqs(%rip), %ymm8 + vmovdqu L_mlkem_to_msg_avx2_hhqs(%rip), %ymm9 vpsubw (%rsi), %ymm8, %ymm0 vpsubw 32(%rsi), %ymm8, %ymm1 vpsubw 64(%rsi), %ymm8, %ymm2 @@ -16184,7 +16184,7 @@ _kyber_to_msg_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_to_msg_avx2,.-kyber_to_msg_avx2 +.size mlkem_to_msg_avx2,.-mlkem_to_msg_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -16196,7 +16196,7 @@ _kyber_to_msg_avx2: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_from_bytes_avx2_shuf: +L_mlkem_from_bytes_avx2_shuf: .value 0x100,0xff02 .value 0x403,0xff05 .value 0x706,0xff08 @@ -16210,24 +16210,24 @@ L_kyber_from_bytes_avx2_shuf: #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_from_bytes_avx2_mask: +L_mlkem_from_bytes_avx2_mask: .long 0xfff,0xfff,0xfff,0xfff .long 0xfff,0xfff,0xfff,0xfff #ifndef __APPLE__ .text -.globl kyber_from_bytes_avx2 -.type kyber_from_bytes_avx2,@function +.globl mlkem_from_bytes_avx2 +.type mlkem_from_bytes_avx2,@function .align 16 -kyber_from_bytes_avx2: +mlkem_from_bytes_avx2: #else .section __TEXT,__text -.globl _kyber_from_bytes_avx2 +.globl _mlkem_from_bytes_avx2 .p2align 4 -_kyber_from_bytes_avx2: +_mlkem_from_bytes_avx2: #endif /* __APPLE__ */ vmovdqu (%rsi), %ymm0 - vmovdqu L_kyber_from_bytes_avx2_shuf(%rip), %ymm12 - vmovdqu L_kyber_from_bytes_avx2_mask(%rip), %ymm13 + vmovdqu L_mlkem_from_bytes_avx2_shuf(%rip), %ymm12 + vmovdqu L_mlkem_from_bytes_avx2_mask(%rip), %ymm13 vmovdqu (%rsi), %ymm0 vmovdqu 32(%rsi), %ymm1 vmovdqu 64(%rsi), %ymm2 @@ -16371,14 +16371,14 @@ _kyber_from_bytes_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_from_bytes_avx2,.-kyber_from_bytes_avx2 +.size mlkem_from_bytes_avx2,.-mlkem_from_bytes_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_to_bytes_avx2_mask: +L_mlkem_to_bytes_avx2_mask: .long 0xfff,0xfff,0xfff,0xfff .long 0xfff,0xfff,0xfff,0xfff #ifndef __APPLE__ @@ -16391,7 +16391,7 @@ L_kyber_to_bytes_avx2_mask: #else .p2align 4 #endif /* __APPLE__ */ -L_kyber_to_bytes_avx2_shuf: +L_mlkem_to_bytes_avx2_shuf: .value 0x100,0x402 .value 0x605,0x908 .value 0xc0a,0xe0d @@ -16405,25 +16405,25 @@ L_kyber_to_bytes_avx2_shuf: #else .section __DATA,__data #endif /* __APPLE__ */ -L_kyber_to_bytes_avx2_perm: +L_mlkem_to_bytes_avx2_perm: .long 0x0,0x1,0x2,0x7 .long 0x4,0x5,0x3,0x6 #ifndef __APPLE__ .text -.globl kyber_to_bytes_avx2 -.type kyber_to_bytes_avx2,@function +.globl mlkem_to_bytes_avx2 +.type mlkem_to_bytes_avx2,@function .align 16 -kyber_to_bytes_avx2: +mlkem_to_bytes_avx2: #else .section __TEXT,__text -.globl _kyber_to_bytes_avx2 +.globl _mlkem_to_bytes_avx2 .p2align 4 -_kyber_to_bytes_avx2: +_mlkem_to_bytes_avx2: #endif /* __APPLE__ */ - vmovdqu kyber_q(%rip), %ymm12 - vmovdqu L_kyber_to_bytes_avx2_mask(%rip), %ymm13 - vmovdqu L_kyber_to_bytes_avx2_shuf(%rip), %ymm14 - vmovdqu L_kyber_to_bytes_avx2_perm(%rip), %ymm15 + vmovdqu mlkem_q(%rip), %ymm12 + vmovdqu L_mlkem_to_bytes_avx2_mask(%rip), %ymm13 + vmovdqu L_mlkem_to_bytes_avx2_shuf(%rip), %ymm14 + vmovdqu L_mlkem_to_bytes_avx2_perm(%rip), %ymm15 vmovdqu (%rsi), %ymm0 vmovdqu 32(%rsi), %ymm1 vmovdqu 64(%rsi), %ymm2 @@ -16647,19 +16647,19 @@ _kyber_to_bytes_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_to_bytes_avx2,.-kyber_to_bytes_avx2 +.size mlkem_to_bytes_avx2,.-mlkem_to_bytes_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_cmp_avx2 -.type kyber_cmp_avx2,@function +.globl mlkem_cmp_avx2 +.type mlkem_cmp_avx2,@function .align 16 -kyber_cmp_avx2: +mlkem_cmp_avx2: #else .section __TEXT,__text -.globl _kyber_cmp_avx2 +.globl _mlkem_cmp_avx2 .p2align 4 -_kyber_cmp_avx2: +_mlkem_cmp_avx2: #endif /* __APPLE__ */ vpxor %ymm2, %ymm2, %ymm2 vpxor %ymm3, %ymm3, %ymm3 @@ -16738,7 +16738,7 @@ _kyber_cmp_avx2: vpor %ymm0, %ymm2, %ymm2 vpor %ymm1, %ymm3, %ymm3 subl $0x300, %edx - jz L_kyber_cmp_avx2_done + jz L_mlkem_cmp_avx2_done vmovdqu 768(%rdi), %ymm0 vmovdqu 800(%rdi), %ymm1 vpxor 768(%rsi), %ymm0, %ymm0 @@ -16770,7 +16770,7 @@ _kyber_cmp_avx2: vpor %ymm0, %ymm2, %ymm2 vpor %ymm1, %ymm3, %ymm3 subl $0x140, %edx - jz L_kyber_cmp_avx2_done + jz L_mlkem_cmp_avx2_done vmovdqu 1088(%rdi), %ymm0 vmovdqu 1120(%rdi), %ymm1 vpxor 1088(%rsi), %ymm0, %ymm0 @@ -16813,7 +16813,7 @@ _kyber_cmp_avx2: vpxor 1504(%rsi), %ymm1, %ymm1 vpor %ymm0, %ymm2, %ymm2 vpor %ymm1, %ymm3, %ymm3 -L_kyber_cmp_avx2_done: +L_mlkem_cmp_avx2_done: vpor %ymm3, %ymm2, %ymm2 vptest %ymm2, %ymm2 cmovzl %ecx, %eax @@ -16821,19 +16821,19 @@ L_kyber_cmp_avx2_done: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_cmp_avx2,.-kyber_cmp_avx2 +.size mlkem_cmp_avx2,.-mlkem_cmp_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_redistribute_21_rand_avx2 -.type kyber_redistribute_21_rand_avx2,@function +.globl mlkem_redistribute_21_rand_avx2 +.type mlkem_redistribute_21_rand_avx2,@function .align 16 -kyber_redistribute_21_rand_avx2: +mlkem_redistribute_21_rand_avx2: #else .section __TEXT,__text -.globl _kyber_redistribute_21_rand_avx2 +.globl _mlkem_redistribute_21_rand_avx2 .p2align 4 -_kyber_redistribute_21_rand_avx2: +_mlkem_redistribute_21_rand_avx2: #endif /* __APPLE__ */ vmovdqu (%rdi), %ymm0 vmovdqu 32(%rdi), %ymm1 @@ -16926,19 +16926,19 @@ _kyber_redistribute_21_rand_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_redistribute_21_rand_avx2,.-kyber_redistribute_21_rand_avx2 +.size mlkem_redistribute_21_rand_avx2,.-mlkem_redistribute_21_rand_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_redistribute_17_rand_avx2 -.type kyber_redistribute_17_rand_avx2,@function +.globl mlkem_redistribute_17_rand_avx2 +.type mlkem_redistribute_17_rand_avx2,@function .align 16 -kyber_redistribute_17_rand_avx2: +mlkem_redistribute_17_rand_avx2: #else .section __TEXT,__text -.globl _kyber_redistribute_17_rand_avx2 +.globl _mlkem_redistribute_17_rand_avx2 .p2align 4 -_kyber_redistribute_17_rand_avx2: +_mlkem_redistribute_17_rand_avx2: #endif /* __APPLE__ */ vmovdqu (%rdi), %ymm0 vmovdqu 32(%rdi), %ymm1 @@ -17015,19 +17015,19 @@ _kyber_redistribute_17_rand_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_redistribute_17_rand_avx2,.-kyber_redistribute_17_rand_avx2 +.size mlkem_redistribute_17_rand_avx2,.-mlkem_redistribute_17_rand_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_redistribute_16_rand_avx2 -.type kyber_redistribute_16_rand_avx2,@function +.globl mlkem_redistribute_16_rand_avx2 +.type mlkem_redistribute_16_rand_avx2,@function .align 16 -kyber_redistribute_16_rand_avx2: +mlkem_redistribute_16_rand_avx2: #else .section __TEXT,__text -.globl _kyber_redistribute_16_rand_avx2 +.globl _mlkem_redistribute_16_rand_avx2 .p2align 4 -_kyber_redistribute_16_rand_avx2: +_mlkem_redistribute_16_rand_avx2: #endif /* __APPLE__ */ vmovdqu (%rdi), %ymm0 vmovdqu 32(%rdi), %ymm1 @@ -17096,19 +17096,19 @@ _kyber_redistribute_16_rand_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_redistribute_16_rand_avx2,.-kyber_redistribute_16_rand_avx2 +.size mlkem_redistribute_16_rand_avx2,.-mlkem_redistribute_16_rand_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .text -.globl kyber_redistribute_8_rand_avx2 -.type kyber_redistribute_8_rand_avx2,@function +.globl mlkem_redistribute_8_rand_avx2 +.type mlkem_redistribute_8_rand_avx2,@function .align 16 -kyber_redistribute_8_rand_avx2: +mlkem_redistribute_8_rand_avx2: #else .section __TEXT,__text -.globl _kyber_redistribute_8_rand_avx2 +.globl _mlkem_redistribute_8_rand_avx2 .p2align 4 -_kyber_redistribute_8_rand_avx2: +_mlkem_redistribute_8_rand_avx2: #endif /* __APPLE__ */ vmovdqu (%rdi), %ymm0 vmovdqu 32(%rdi), %ymm1 @@ -17145,7 +17145,7 @@ _kyber_redistribute_8_rand_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_redistribute_8_rand_avx2,.-kyber_redistribute_8_rand_avx2 +.size mlkem_redistribute_8_rand_avx2,.-mlkem_redistribute_8_rand_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -17221,15 +17221,15 @@ L_sha3_128_blockx4_seed_avx2_end_mark: .quad 0x8000000000000000, 0x8000000000000000 #ifndef __APPLE__ .text -.globl kyber_sha3_128_blocksx4_seed_avx2 -.type kyber_sha3_128_blocksx4_seed_avx2,@function +.globl mlkem_sha3_128_blocksx4_seed_avx2 +.type mlkem_sha3_128_blocksx4_seed_avx2,@function .align 16 -kyber_sha3_128_blocksx4_seed_avx2: +mlkem_sha3_128_blocksx4_seed_avx2: #else .section __TEXT,__text -.globl _kyber_sha3_128_blocksx4_seed_avx2 +.globl _mlkem_sha3_128_blocksx4_seed_avx2 .p2align 4 -_kyber_sha3_128_blocksx4_seed_avx2: +_mlkem_sha3_128_blocksx4_seed_avx2: #endif /* __APPLE__ */ leaq L_sha3_parallel_4_r(%rip), %rdx movq %rdi, %rax @@ -22581,7 +22581,7 @@ _kyber_sha3_128_blocksx4_seed_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_sha3_128_blocksx4_seed_avx2,.-kyber_sha3_128_blocksx4_seed_avx2 +.size mlkem_sha3_128_blocksx4_seed_avx2,.-mlkem_sha3_128_blocksx4_seed_avx2 #endif /* __APPLE__ */ #ifndef __APPLE__ .data @@ -22598,15 +22598,15 @@ L_sha3_256_blockx4_seed_avx2_end_mark: .quad 0x8000000000000000, 0x8000000000000000 #ifndef __APPLE__ .text -.globl kyber_sha3_256_blocksx4_seed_avx2 -.type kyber_sha3_256_blocksx4_seed_avx2,@function +.globl mlkem_sha3_256_blocksx4_seed_avx2 +.type mlkem_sha3_256_blocksx4_seed_avx2,@function .align 16 -kyber_sha3_256_blocksx4_seed_avx2: +mlkem_sha3_256_blocksx4_seed_avx2: #else .section __TEXT,__text -.globl _kyber_sha3_256_blocksx4_seed_avx2 +.globl _mlkem_sha3_256_blocksx4_seed_avx2 .p2align 4 -_kyber_sha3_256_blocksx4_seed_avx2: +_mlkem_sha3_256_blocksx4_seed_avx2: #endif /* __APPLE__ */ leaq L_sha3_parallel_4_r(%rip), %rdx movq %rdi, %rax @@ -27959,10 +27959,10 @@ _kyber_sha3_256_blocksx4_seed_avx2: vzeroupper repz retq #ifndef __APPLE__ -.size kyber_sha3_256_blocksx4_seed_avx2,.-kyber_sha3_256_blocksx4_seed_avx2 +.size mlkem_sha3_256_blocksx4_seed_avx2,.-mlkem_sha3_256_blocksx4_seed_avx2 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ -#endif /* WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_WC_MLKEM */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/wolfcrypt/src/wc_kyber_poly.c b/wolfcrypt/src/wc_mlkem_poly.c similarity index 76% rename from wolfcrypt/src/wc_kyber_poly.c rename to wolfcrypt/src/wc_mlkem_poly.c index 0030b139d..a25ecd4de 100644 --- a/wolfcrypt/src/wc_kyber_poly.c +++ b/wolfcrypt/src/wc_mlkem_poly.c @@ -1,4 +1,4 @@ -/* wc_kyber_poly.c +/* wc_mlkem_poly.c * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -24,7 +24,8 @@ * * Original implementation based on NIST 3rd Round submission package. * See link at: - * https://csrc.nist.gov/Projects/post-quantum-cryptography/post-quantum-cryptography-standardization/round-3-submissions + * https://csrc.nist.gov/Projects/post-quantum-cryptography/ + * post-quantum-cryptography-standardization/round-3-submissions */ /* Implementation of the functions that operate on polynomials or vectors of @@ -33,7 +34,7 @@ /* Possible Kyber options: * - * WOLFSSL_WC_KYBER Default: OFF + * WOLFSSL_WC_MLKEM Default: OFF * Enables this code, wolfSSL implementation, to be built. * * WOLFSSL_WC_ML_KEM_512 Default: OFF @@ -51,17 +52,17 @@ * * USE_INTEL_SPEEDUP Default: OFF * Compiles in Intel x64 specific implementations that are faster. - * WOLFSSL_KYBER_NO_LARGE_CODE Default: OFF + * WOLFSSL_MLKEM_NO_LARGE_CODE Default: OFF * Compiles smaller, fast code size with a speed trade-off. - * WOLFSSL_KYBER_SMALL Default: OFF + * WOLFSSL_MLKEM_SMALL Default: OFF * Compiles to small code size with a speed trade-off. * WOLFSSL_SMALL_STACK Default: OFF * Use less stack by dynamically allocating local variables. * - * WOLFSSL_KYBER_NTT_UNROLL Default: OFF + * WOLFSSL_MLKEM_NTT_UNROLL Default: OFF * Enable an alternative NTT implementation that may be faster on some * platforms and is smaller in code size. - * WOLFSSL_KYBER_INVNTT_UNROLL Default: OFF + * WOLFSSL_MLKEM_INVNTT_UNROLL Default: OFF * Enables an alternative inverse NTT implementation that may be faster on * some platforms and is smaller in code size. */ @@ -71,11 +72,11 @@ #endif #include -#include +#include #include #include -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_WC_MLKEM #ifdef NO_INLINE #include @@ -86,16 +87,16 @@ #if defined(WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM) || \ defined(WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM) -static int kyber_gen_matrix_i(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, +static int mlkem_gen_matrix_i(MLKEM_PRF_T* prf, sword16* a, int k, byte* seed, int i, int transposed); -static int kyber_get_noise_i(KYBER_PRF_T* prf, int k, sword16* vec2, +static int mlkem_get_noise_i(MLKEM_PRF_T* prf, int k, sword16* vec2, byte* seed, int i, int make); -static int kyber_get_noise_eta2_c(KYBER_PRF_T* prf, sword16* p, +static int mlkem_get_noise_eta2_c(MLKEM_PRF_T* prf, sword16* p, const byte* seed); #endif -/* Declared in wc_kyber.c to stop compiler optimizer from simplifying. */ -extern volatile sword16 kyber_opt_blocker; +/* Declared in wc_mlkem.c to stop compiler optimizer from simplifying. */ +extern volatile sword16 mlkem_opt_blocker; #if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \ defined(WOLFSSL_ARMASM)) @@ -103,26 +104,26 @@ static word32 cpuid_flags = 0; #endif /* Half of Q plus one. Converted message bit value of 1. */ -#define KYBER_Q_1_HALF ((KYBER_Q + 1) / 2) +#define MLKEM_Q_1_HALF ((MLKEM_Q + 1) / 2) /* Half of Q */ -#define KYBER_Q_HALF (KYBER_Q / 2) +#define MLKEM_Q_HALF (MLKEM_Q / 2) /* q^-1 mod 2^16 (inverse of 3329 mod 16384) */ -#define KYBER_QINV 62209 +#define MLKEM_QINV 62209 /* Used in Barrett Reduction: * r = a mod q * => r = a - ((V * a) >> 26) * q), as V based on 2^26 * V is the multiplier that gets the quotient after shifting. */ -#define KYBER_V (((1U << 26) + (KYBER_Q / 2)) / KYBER_Q) +#define MLKEM_V (((1U << 26) + (MLKEM_Q / 2)) / MLKEM_Q) /* Used in converting to Montgomery form. * f is the normalizer = 2^k % m. * 16-bit value cast to sword32 in use. */ -#define KYBER_F ((1ULL << 32) % KYBER_Q) +#define MLKEM_F ((1ULL << 32) % MLKEM_Q) /* Number of bytes in an output block of SHA-3-128 */ #define SHA3_128_BYTES (WC_SHA3_128_COUNT * 8) @@ -131,15 +132,15 @@ static word32 cpuid_flags = 0; /* Number of blocks to generate for matrix. */ #define GEN_MATRIX_NBLOCKS \ - ((12 * KYBER_N / 8 * (1 << 12) / KYBER_Q + XOF_BLOCK_SIZE) / XOF_BLOCK_SIZE) + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_BLOCK_SIZE) / XOF_BLOCK_SIZE) /* Number of bytes to generate for matrix. */ #define GEN_MATRIX_SIZE GEN_MATRIX_NBLOCKS * XOF_BLOCK_SIZE /* Number of random bytes to generate for ETA3. */ -#define ETA3_RAND_SIZE ((3 * KYBER_N) / 4) +#define ETA3_RAND_SIZE ((3 * MLKEM_N) / 4) /* Number of random bytes to generate for ETA2. */ -#define ETA2_RAND_SIZE ((2 * KYBER_N) / 4) +#define ETA2_RAND_SIZE ((2 * MLKEM_N) / 4) /* Montgomery reduce a. @@ -147,10 +148,10 @@ static word32 cpuid_flags = 0; * @param [in] a 32-bit value to be reduced. * @return Montgomery reduction result. */ -#define KYBER_MONT_RED(a) \ +#define MLKEM_MONT_RED(a) \ (sword16)(((a) - (sword32)(((sword16)((sword16)(a) * \ - (sword16)KYBER_QINV)) * \ - (sword32)KYBER_Q)) >> 16) + (sword16)MLKEM_QINV)) * \ + (sword32)MLKEM_Q)) >> 16) /* Barrett reduce a. r = a mod q. * @@ -159,13 +160,13 @@ static word32 cpuid_flags = 0; * @param [in] a 16-bit value to be reduced to range of q. * @return Modulo result. */ -#define KYBER_BARRETT_RED(a) \ +#define MLKEM_BARRETT_RED(a) \ (sword16)((sword16)(a) - (sword16)((sword16)( \ - ((sword32)((sword32)KYBER_V * (sword16)(a))) >> 26) * (word16)KYBER_Q)) + ((sword32)((sword32)MLKEM_V * (sword16)(a))) >> 26) * (word16)MLKEM_Q)) /* Zetas for NTT. */ -const sword16 zetas[KYBER_N / 2] = { +const sword16 zetas[MLKEM_N / 2] = { 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, 573, 2004, 264, 383, 2500, 1458, 1727, 3199, @@ -208,9 +209,9 @@ const sword16 zetas[KYBER_N / 2] = { * * @param [in, out] r Polynomial to transform. */ -static void kyber_ntt(sword16* r) +static void mlkem_ntt(sword16* r) { -#ifdef WOLFSSL_KYBER_SMALL +#ifdef WOLFSSL_MLKEM_SMALL unsigned int len; unsigned int k; unsigned int j; @@ -218,17 +219,17 @@ static void kyber_ntt(sword16* r) /* Step 2 */ k = 1; /* Step 3 */ - for (len = KYBER_N / 2; len >= 2; len >>= 1) { + for (len = MLKEM_N / 2; len >= 2; len >>= 1) { unsigned int start; /* Step 4 */ - for (start = 0; start < KYBER_N; start = j + len) { + for (start = 0; start < MLKEM_N; start = j + len) { /* Step 5, 6*/ sword16 zeta = zetas[k++]; /* Step 7 */ for (j = start; j < start + len; ++j) { /* Step 8 */ sword32 p = (sword32)zeta * r[j + len]; - sword16 t = KYBER_MONT_RED(p); + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[j]; /* Step 9 */ r[j + len] = rj - t; @@ -239,10 +240,10 @@ static void kyber_ntt(sword16* r) } /* Reduce coefficients with quick algorithm. */ - for (j = 0; j < KYBER_N; ++j) { - r[j] = KYBER_BARRETT_RED(r[j]); + for (j = 0; j < MLKEM_N; ++j) { + r[j] = MLKEM_BARRETT_RED(r[j]); } -#elif defined(WOLFSSL_KYBER_NO_LARGE_CODE) +#elif defined(WOLFSSL_MLKEM_NO_LARGE_CODE) /* Take out the first iteration. */ unsigned int len; unsigned int k = 1; @@ -250,19 +251,19 @@ static void kyber_ntt(sword16* r) unsigned int start; sword16 zeta = zetas[k++]; - for (j = 0; j < KYBER_N / 2; ++j) { - sword32 p = (sword32)zeta * r[j + KYBER_N / 2]; - sword16 t = KYBER_MONT_RED(p); + for (j = 0; j < MLKEM_N / 2; ++j) { + sword32 p = (sword32)zeta * r[j + MLKEM_N / 2]; + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[j]; - r[j + KYBER_N / 2] = rj - t; + r[j + MLKEM_N / 2] = rj - t; r[j] = rj + t; } - for (len = KYBER_N / 4; len >= 2; len >>= 1) { - for (start = 0; start < KYBER_N; start = j + len) { + for (len = MLKEM_N / 4; len >= 2; len >>= 1) { + for (start = 0; start < MLKEM_N; start = j + len) { zeta = zetas[k++]; for (j = start; j < start + len; ++j) { sword32 p = (sword32)zeta * r[j + len]; - sword16 t = KYBER_MONT_RED(p); + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[j]; r[j + len] = rj - t; r[j] = rj + t; @@ -271,10 +272,10 @@ static void kyber_ntt(sword16* r) } /* Reduce coefficients with quick algorithm. */ - for (j = 0; j < KYBER_N; ++j) { - r[j] = KYBER_BARRETT_RED(r[j]); + for (j = 0; j < MLKEM_N; ++j) { + r[j] = MLKEM_BARRETT_RED(r[j]); } -#elif defined(WOLFSSL_KYBER_NTT_UNROLL) +#elif defined(WOLFSSL_MLKEM_NTT_UNROLL) /* Unroll len loop (Step 3). */ unsigned int k = 1; unsigned int j; @@ -282,82 +283,82 @@ static void kyber_ntt(sword16* r) sword16 zeta = zetas[k++]; /* len = 128 */ - for (j = 0; j < KYBER_N / 2; ++j) { - sword32 p = (sword32)zeta * r[j + KYBER_N / 2]; - sword16 t = KYBER_MONT_RED(p); + for (j = 0; j < MLKEM_N / 2; ++j) { + sword32 p = (sword32)zeta * r[j + MLKEM_N / 2]; + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[j]; - r[j + KYBER_N / 2] = rj - t; + r[j + MLKEM_N / 2] = rj - t; r[j] = rj + t; } /* len = 64 */ - for (start = 0; start < KYBER_N; start += 2 * 64) { + for (start = 0; start < MLKEM_N; start += 2 * 64) { zeta = zetas[k++]; for (j = 0; j < 64; ++j) { sword32 p = (sword32)zeta * r[start + j + 64]; - sword16 t = KYBER_MONT_RED(p); + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[start + j]; r[start + j + 64] = rj - t; r[start + j] = rj + t; } } /* len = 32 */ - for (start = 0; start < KYBER_N; start += 2 * 32) { + for (start = 0; start < MLKEM_N; start += 2 * 32) { zeta = zetas[k++]; for (j = 0; j < 32; ++j) { sword32 p = (sword32)zeta * r[start + j + 32]; - sword16 t = KYBER_MONT_RED(p); + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[start + j]; r[start + j + 32] = rj - t; r[start + j] = rj + t; } } /* len = 16 */ - for (start = 0; start < KYBER_N; start += 2 * 16) { + for (start = 0; start < MLKEM_N; start += 2 * 16) { zeta = zetas[k++]; for (j = 0; j < 16; ++j) { sword32 p = (sword32)zeta * r[start + j + 16]; - sword16 t = KYBER_MONT_RED(p); + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[start + j]; r[start + j + 16] = rj - t; r[start + j] = rj + t; } } /* len = 8 */ - for (start = 0; start < KYBER_N; start += 2 * 8) { + for (start = 0; start < MLKEM_N; start += 2 * 8) { zeta = zetas[k++]; for (j = 0; j < 8; ++j) { sword32 p = (sword32)zeta * r[start + j + 8]; - sword16 t = KYBER_MONT_RED(p); + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[start + j]; r[start + j + 8] = rj - t; r[start + j] = rj + t; } } /* len = 4 */ - for (start = 0; start < KYBER_N; start += 2 * 4) { + for (start = 0; start < MLKEM_N; start += 2 * 4) { zeta = zetas[k++]; for (j = 0; j < 4; ++j) { sword32 p = (sword32)zeta * r[start + j + 4]; - sword16 t = KYBER_MONT_RED(p); + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[start + j]; r[start + j + 4] = rj - t; r[start + j] = rj + t; } } /* len = 2 */ - for (start = 0; start < KYBER_N; start += 2 * 2) { + for (start = 0; start < MLKEM_N; start += 2 * 2) { zeta = zetas[k++]; for (j = 0; j < 2; ++j) { sword32 p = (sword32)zeta * r[start + j + 2]; - sword16 t = KYBER_MONT_RED(p); + sword16 t = MLKEM_MONT_RED(p); sword16 rj = r[start + j]; r[start + j + 2] = rj - t; r[start + j] = rj + t; } } /* Reduce coefficients with quick algorithm. */ - for (j = 0; j < KYBER_N; ++j) { - r[j] = KYBER_BARRETT_RED(r[j]); + for (j = 0; j < MLKEM_N; ++j) { + r[j] = MLKEM_BARRETT_RED(r[j]); } #else /* Unroll len (2, 3, 2) and start loops. */ @@ -371,7 +372,7 @@ static void kyber_ntt(sword16* r) sword16 zeta128 = zetas[1]; sword16 zeta64_0 = zetas[2]; sword16 zeta64_1 = zetas[3]; - for (j = 0; j < KYBER_N / 8; j++) { + for (j = 0; j < MLKEM_N / 8; j++) { sword16 r0 = r[j + 0]; sword16 r1 = r[j + 32]; sword16 r2 = r[j + 64]; @@ -381,10 +382,10 @@ static void kyber_ntt(sword16* r) sword16 r6 = r[j + 192]; sword16 r7 = r[j + 224]; - t0 = KYBER_MONT_RED((sword32)zeta128 * r4); - t1 = KYBER_MONT_RED((sword32)zeta128 * r5); - t2 = KYBER_MONT_RED((sword32)zeta128 * r6); - t3 = KYBER_MONT_RED((sword32)zeta128 * r7); + t0 = MLKEM_MONT_RED((sword32)zeta128 * r4); + t1 = MLKEM_MONT_RED((sword32)zeta128 * r5); + t2 = MLKEM_MONT_RED((sword32)zeta128 * r6); + t3 = MLKEM_MONT_RED((sword32)zeta128 * r7); r4 = r0 - t0; r5 = r1 - t1; r6 = r2 - t2; @@ -394,10 +395,10 @@ static void kyber_ntt(sword16* r) r2 += t2; r3 += t3; - t0 = KYBER_MONT_RED((sword32)zeta64_0 * r2); - t1 = KYBER_MONT_RED((sword32)zeta64_0 * r3); - t2 = KYBER_MONT_RED((sword32)zeta64_1 * r6); - t3 = KYBER_MONT_RED((sword32)zeta64_1 * r7); + t0 = MLKEM_MONT_RED((sword32)zeta64_0 * r2); + t1 = MLKEM_MONT_RED((sword32)zeta64_0 * r3); + t2 = MLKEM_MONT_RED((sword32)zeta64_1 * r6); + t3 = MLKEM_MONT_RED((sword32)zeta64_1 * r7); r2 = r0 - t0; r3 = r1 - t1; r6 = r4 - t2; @@ -418,7 +419,7 @@ static void kyber_ntt(sword16* r) } /* len = 32,16,8 */ - for (j = 0; j < KYBER_N; j += 64) { + for (j = 0; j < MLKEM_N; j += 64) { int i; sword16 zeta32 = zetas[ 4 + j / 64 + 0]; sword16 zeta16_0 = zetas[ 8 + j / 32 + 0]; @@ -437,10 +438,10 @@ static void kyber_ntt(sword16* r) sword16 r6 = r[j + i + 48]; sword16 r7 = r[j + i + 56]; - t0 = KYBER_MONT_RED((sword32)zeta32 * r4); - t1 = KYBER_MONT_RED((sword32)zeta32 * r5); - t2 = KYBER_MONT_RED((sword32)zeta32 * r6); - t3 = KYBER_MONT_RED((sword32)zeta32 * r7); + t0 = MLKEM_MONT_RED((sword32)zeta32 * r4); + t1 = MLKEM_MONT_RED((sword32)zeta32 * r5); + t2 = MLKEM_MONT_RED((sword32)zeta32 * r6); + t3 = MLKEM_MONT_RED((sword32)zeta32 * r7); r4 = r0 - t0; r5 = r1 - t1; r6 = r2 - t2; @@ -450,10 +451,10 @@ static void kyber_ntt(sword16* r) r2 += t2; r3 += t3; - t0 = KYBER_MONT_RED((sword32)zeta16_0 * r2); - t1 = KYBER_MONT_RED((sword32)zeta16_0 * r3); - t2 = KYBER_MONT_RED((sword32)zeta16_1 * r6); - t3 = KYBER_MONT_RED((sword32)zeta16_1 * r7); + t0 = MLKEM_MONT_RED((sword32)zeta16_0 * r2); + t1 = MLKEM_MONT_RED((sword32)zeta16_0 * r3); + t2 = MLKEM_MONT_RED((sword32)zeta16_1 * r6); + t3 = MLKEM_MONT_RED((sword32)zeta16_1 * r7); r2 = r0 - t0; r3 = r1 - t1; r6 = r4 - t2; @@ -463,10 +464,10 @@ static void kyber_ntt(sword16* r) r4 += t2; r5 += t3; - t0 = KYBER_MONT_RED((sword32)zeta8_0 * r1); - t1 = KYBER_MONT_RED((sword32)zeta8_1 * r3); - t2 = KYBER_MONT_RED((sword32)zeta8_2 * r5); - t3 = KYBER_MONT_RED((sword32)zeta8_3 * r7); + t0 = MLKEM_MONT_RED((sword32)zeta8_0 * r1); + t1 = MLKEM_MONT_RED((sword32)zeta8_1 * r3); + t2 = MLKEM_MONT_RED((sword32)zeta8_2 * r5); + t3 = MLKEM_MONT_RED((sword32)zeta8_3 * r7); r1 = r0 - t0; r3 = r2 - t1; r5 = r4 - t2; @@ -488,7 +489,7 @@ static void kyber_ntt(sword16* r) } /* len = 4,2 and Final reduction */ - for (j = 0; j < KYBER_N; j += 8) { + for (j = 0; j < MLKEM_N; j += 8) { sword16 zeta4 = zetas[32 + j / 8 + 0]; sword16 zeta2_0 = zetas[64 + j / 4 + 0]; sword16 zeta2_1 = zetas[64 + j / 4 + 1]; @@ -501,10 +502,10 @@ static void kyber_ntt(sword16* r) sword16 r6 = r[j + 6]; sword16 r7 = r[j + 7]; - t0 = KYBER_MONT_RED((sword32)zeta4 * r4); - t1 = KYBER_MONT_RED((sword32)zeta4 * r5); - t2 = KYBER_MONT_RED((sword32)zeta4 * r6); - t3 = KYBER_MONT_RED((sword32)zeta4 * r7); + t0 = MLKEM_MONT_RED((sword32)zeta4 * r4); + t1 = MLKEM_MONT_RED((sword32)zeta4 * r5); + t2 = MLKEM_MONT_RED((sword32)zeta4 * r6); + t3 = MLKEM_MONT_RED((sword32)zeta4 * r7); r4 = r0 - t0; r5 = r1 - t1; r6 = r2 - t2; @@ -514,10 +515,10 @@ static void kyber_ntt(sword16* r) r2 += t2; r3 += t3; - t0 = KYBER_MONT_RED((sword32)zeta2_0 * r2); - t1 = KYBER_MONT_RED((sword32)zeta2_0 * r3); - t2 = KYBER_MONT_RED((sword32)zeta2_1 * r6); - t3 = KYBER_MONT_RED((sword32)zeta2_1 * r7); + t0 = MLKEM_MONT_RED((sword32)zeta2_0 * r2); + t1 = MLKEM_MONT_RED((sword32)zeta2_0 * r3); + t2 = MLKEM_MONT_RED((sword32)zeta2_1 * r6); + t3 = MLKEM_MONT_RED((sword32)zeta2_1 * r7); r2 = r0 - t0; r3 = r1 - t1; r6 = r4 - t2; @@ -527,22 +528,22 @@ static void kyber_ntt(sword16* r) r4 += t2; r5 += t3; - r[j + 0] = KYBER_BARRETT_RED(r0); - r[j + 1] = KYBER_BARRETT_RED(r1); - r[j + 2] = KYBER_BARRETT_RED(r2); - r[j + 3] = KYBER_BARRETT_RED(r3); - r[j + 4] = KYBER_BARRETT_RED(r4); - r[j + 5] = KYBER_BARRETT_RED(r5); - r[j + 6] = KYBER_BARRETT_RED(r6); - r[j + 7] = KYBER_BARRETT_RED(r7); + r[j + 0] = MLKEM_BARRETT_RED(r0); + r[j + 1] = MLKEM_BARRETT_RED(r1); + r[j + 2] = MLKEM_BARRETT_RED(r2); + r[j + 3] = MLKEM_BARRETT_RED(r3); + r[j + 4] = MLKEM_BARRETT_RED(r4); + r[j + 5] = MLKEM_BARRETT_RED(r5); + r[j + 6] = MLKEM_BARRETT_RED(r6); + r[j + 7] = MLKEM_BARRETT_RED(r7); } #endif } -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) /* Zetas for inverse NTT. */ -const sword16 zetas_inv[KYBER_N / 2] = { +const sword16 zetas_inv[MLKEM_N / 2] = { 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, @@ -584,9 +585,9 @@ const sword16 zetas_inv[KYBER_N / 2] = { * * @param [in, out] r Polynomial to transform. */ -static void kyber_invntt(sword16* r) +static void mlkem_invntt(sword16* r) { -#ifdef WOLFSSL_KYBER_SMALL +#ifdef WOLFSSL_MLKEM_SMALL unsigned int len; unsigned int k; unsigned int j; @@ -595,10 +596,10 @@ static void kyber_invntt(sword16* r) /* Step 2 - table reversed */ k = 0; /* Step 3 */ - for (len = 2; len <= KYBER_N / 2; len <<= 1) { + for (len = 2; len <= MLKEM_N / 2; len <<= 1) { unsigned int start; /* Step 4 */ - for (start = 0; start < KYBER_N; start = j + len) { + for (start = 0; start < MLKEM_N; start = j + len) { /* Step 5, 6 */ zeta = zetas_inv[k++]; /* Step 7 */ @@ -609,22 +610,22 @@ static void kyber_invntt(sword16* r) sword16 rjl = r[j + len]; /* Step 9 */ sword16 t = rj + rjl; - r[j] = KYBER_BARRETT_RED(t); + r[j] = MLKEM_BARRETT_RED(t); /* Step 10 */ rjl = rj - rjl; p = (sword32)zeta * rjl; - r[j + len] = KYBER_MONT_RED(p); + r[j + len] = MLKEM_MONT_RED(p); } } } /* Step 14 */ zeta = zetas_inv[127]; - for (j = 0; j < KYBER_N; ++j) { + for (j = 0; j < MLKEM_N; ++j) { sword32 p = (sword32)zeta * r[j]; - r[j] = KYBER_MONT_RED(p); + r[j] = MLKEM_MONT_RED(p); } -#elif defined(WOLFSSL_KYBER_NO_LARGE_CODE) +#elif defined(WOLFSSL_MLKEM_NO_LARGE_CODE) /* Take out last iteration. */ unsigned int len; unsigned int k; @@ -633,41 +634,41 @@ static void kyber_invntt(sword16* r) sword16 zeta2; k = 0; - for (len = 2; len <= KYBER_N / 4; len <<= 1) { + for (len = 2; len <= MLKEM_N / 4; len <<= 1) { unsigned int start; - for (start = 0; start < KYBER_N; start = j + len) { + for (start = 0; start < MLKEM_N; start = j + len) { zeta = zetas_inv[k++]; for (j = start; j < start + len; ++j) { sword32 p; sword16 rj = r[j]; sword16 rjl = r[j + len]; sword16 t = rj + rjl; - r[j] = KYBER_BARRETT_RED(t); + r[j] = MLKEM_BARRETT_RED(t); rjl = rj - rjl; p = (sword32)zeta * rjl; - r[j + len] = KYBER_MONT_RED(p); + r[j + len] = MLKEM_MONT_RED(p); } } } zeta = zetas_inv[126]; zeta2 = zetas_inv[127]; - for (j = 0; j < KYBER_N / 2; ++j) { + for (j = 0; j < MLKEM_N / 2; ++j) { sword32 p; sword16 rj = r[j]; - sword16 rjl = r[j + KYBER_N / 2]; + sword16 rjl = r[j + MLKEM_N / 2]; sword16 t = rj + rjl; rjl = rj - rjl; p = (sword32)zeta * rjl; r[j] = t; - r[j + KYBER_N / 2] = KYBER_MONT_RED(p); + r[j + MLKEM_N / 2] = MLKEM_MONT_RED(p); p = (sword32)zeta2 * r[j]; - r[j] = KYBER_MONT_RED(p); - p = (sword32)zeta2 * r[j + KYBER_N / 2]; - r[j + KYBER_N / 2] = KYBER_MONT_RED(p); + r[j] = MLKEM_MONT_RED(p); + p = (sword32)zeta2 * r[j + MLKEM_N / 2]; + r[j + MLKEM_N / 2] = MLKEM_MONT_RED(p); } -#elif defined(WOLFSSL_KYBER_INVNTT_UNROLL) +#elif defined(WOLFSSL_MLKEM_INVNTT_UNROLL) /* Unroll len loop (Step 3). */ unsigned int k; unsigned int j; @@ -677,7 +678,7 @@ static void kyber_invntt(sword16* r) k = 0; /* len = 2 */ - for (start = 0; start < KYBER_N; start += 2 * 2) { + for (start = 0; start < MLKEM_N; start += 2 * 2) { zeta = zetas_inv[k++]; for (j = 0; j < 2; ++j) { sword32 p; @@ -687,11 +688,11 @@ static void kyber_invntt(sword16* r) r[start + j] = t; rjl = rj - rjl; p = (sword32)zeta * rjl; - r[start + j + 2] = KYBER_MONT_RED(p); + r[start + j + 2] = MLKEM_MONT_RED(p); } } /* len = 4 */ - for (start = 0; start < KYBER_N; start += 2 * 4) { + for (start = 0; start < MLKEM_N; start += 2 * 4) { zeta = zetas_inv[k++]; for (j = 0; j < 4; ++j) { sword32 p; @@ -701,11 +702,11 @@ static void kyber_invntt(sword16* r) r[start + j] = t; rjl = rj - rjl; p = (sword32)zeta * rjl; - r[start + j + 4] = KYBER_MONT_RED(p); + r[start + j + 4] = MLKEM_MONT_RED(p); } } /* len = 8 */ - for (start = 0; start < KYBER_N; start += 2 * 8) { + for (start = 0; start < MLKEM_N; start += 2 * 8) { zeta = zetas_inv[k++]; for (j = 0; j < 8; ++j) { sword32 p; @@ -713,14 +714,14 @@ static void kyber_invntt(sword16* r) sword16 rjl = r[start + j + 8]; sword16 t = rj + rjl; /* Reduce. */ - r[start + j] = KYBER_BARRETT_RED(t); + r[start + j] = MLKEM_BARRETT_RED(t); rjl = rj - rjl; p = (sword32)zeta * rjl; - r[start + j + 8] = KYBER_MONT_RED(p); + r[start + j + 8] = MLKEM_MONT_RED(p); } } /* len = 16 */ - for (start = 0; start < KYBER_N; start += 2 * 16) { + for (start = 0; start < MLKEM_N; start += 2 * 16) { zeta = zetas_inv[k++]; for (j = 0; j < 16; ++j) { sword32 p; @@ -730,11 +731,11 @@ static void kyber_invntt(sword16* r) r[start + j] = t; rjl = rj - rjl; p = (sword32)zeta * rjl; - r[start + j + 16] = KYBER_MONT_RED(p); + r[start + j + 16] = MLKEM_MONT_RED(p); } } /* len = 32 */ - for (start = 0; start < KYBER_N; start += 2 * 32) { + for (start = 0; start < MLKEM_N; start += 2 * 32) { zeta = zetas_inv[k++]; for (j = 0; j < 32; ++j) { sword32 p; @@ -744,11 +745,11 @@ static void kyber_invntt(sword16* r) r[start + j] = t; rjl = rj - rjl; p = (sword32)zeta * rjl; - r[start + j + 32] = KYBER_MONT_RED(p); + r[start + j + 32] = MLKEM_MONT_RED(p); } } /* len = 64 */ - for (start = 0; start < KYBER_N; start += 2 * 64) { + for (start = 0; start < MLKEM_N; start += 2 * 64) { zeta = zetas_inv[k++]; for (j = 0; j < 64; ++j) { sword32 p; @@ -756,29 +757,29 @@ static void kyber_invntt(sword16* r) sword16 rjl = r[start + j + 64]; sword16 t = rj + rjl; /* Reduce. */ - r[start + j] = KYBER_BARRETT_RED(t); + r[start + j] = MLKEM_BARRETT_RED(t); rjl = rj - rjl; p = (sword32)zeta * rjl; - r[start + j + 64] = KYBER_MONT_RED(p); + r[start + j + 64] = MLKEM_MONT_RED(p); } } /* len = 128, 256 */ zeta = zetas_inv[126]; zeta2 = zetas_inv[127]; - for (j = 0; j < KYBER_N / 2; ++j) { + for (j = 0; j < MLKEM_N / 2; ++j) { sword32 p; sword16 rj = r[j]; - sword16 rjl = r[j + KYBER_N / 2]; + sword16 rjl = r[j + MLKEM_N / 2]; sword16 t = rj + rjl; rjl = rj - rjl; p = (sword32)zeta * rjl; r[j] = t; - r[j + KYBER_N / 2] = KYBER_MONT_RED(p); + r[j + MLKEM_N / 2] = MLKEM_MONT_RED(p); p = (sword32)zeta2 * r[j]; - r[j] = KYBER_MONT_RED(p); - p = (sword32)zeta2 * r[j + KYBER_N / 2]; - r[j + KYBER_N / 2] = KYBER_MONT_RED(p); + r[j] = MLKEM_MONT_RED(p); + p = (sword32)zeta2 * r[j + MLKEM_N / 2]; + r[j + MLKEM_N / 2] = MLKEM_MONT_RED(p); } #else /* Unroll len (2, 3, 3) and start loops. */ @@ -793,7 +794,7 @@ static void kyber_invntt(sword16* r) sword16 zeta256; sword32 p; - for (j = 0; j < KYBER_N; j += 8) { + for (j = 0; j < MLKEM_N; j += 8) { sword16 zeta2_0 = zetas_inv[ 0 + j / 4 + 0]; sword16 zeta2_1 = zetas_inv[ 0 + j / 4 + 1]; sword16 zeta4 = zetas_inv[64 + j / 8 + 0]; @@ -807,13 +808,13 @@ static void kyber_invntt(sword16* r) sword16 r7 = r[j + 7]; p = (sword32)zeta2_0 * (sword16)(r0 - r2); - t0 = KYBER_MONT_RED(p); + t0 = MLKEM_MONT_RED(p); p = (sword32)zeta2_0 * (sword16)(r1 - r3); - t1 = KYBER_MONT_RED(p); + t1 = MLKEM_MONT_RED(p); p = (sword32)zeta2_1 * (sword16)(r4 - r6); - t2 = KYBER_MONT_RED(p); + t2 = MLKEM_MONT_RED(p); p = (sword32)zeta2_1 * (sword16)(r5 - r7); - t3 = KYBER_MONT_RED(p); + t3 = MLKEM_MONT_RED(p); r0 += r2; r1 += r3; r4 += r6; @@ -824,13 +825,13 @@ static void kyber_invntt(sword16* r) r7 = t3; p = (sword32)zeta4 * (sword16)(r0 - r4); - t0 = KYBER_MONT_RED(p); + t0 = MLKEM_MONT_RED(p); p = (sword32)zeta4 * (sword16)(r1 - r5); - t1 = KYBER_MONT_RED(p); + t1 = MLKEM_MONT_RED(p); p = (sword32)zeta4 * (sword16)(r2 - r6); - t2 = KYBER_MONT_RED(p); + t2 = MLKEM_MONT_RED(p); p = (sword32)zeta4 * (sword16)(r3 - r7); - t3 = KYBER_MONT_RED(p); + t3 = MLKEM_MONT_RED(p); r0 += r4; r1 += r5; r2 += r6; @@ -850,7 +851,7 @@ static void kyber_invntt(sword16* r) r[j + 7] = r7; } - for (j = 0; j < KYBER_N; j += 64) { + for (j = 0; j < MLKEM_N; j += 64) { int i; sword16 zeta8_0 = zetas_inv[ 96 + j / 16 + 0]; sword16 zeta8_1 = zetas_inv[ 96 + j / 16 + 1]; @@ -870,30 +871,30 @@ static void kyber_invntt(sword16* r) sword16 r7 = r[j + i + 56]; p = (sword32)zeta8_0 * (sword16)(r0 - r1); - t0 = KYBER_MONT_RED(p); + t0 = MLKEM_MONT_RED(p); p = (sword32)zeta8_1 * (sword16)(r2 - r3); - t1 = KYBER_MONT_RED(p); + t1 = MLKEM_MONT_RED(p); p = (sword32)zeta8_2 * (sword16)(r4 - r5); - t2 = KYBER_MONT_RED(p); + t2 = MLKEM_MONT_RED(p); p = (sword32)zeta8_3 * (sword16)(r6 - r7); - t3 = KYBER_MONT_RED(p); - r0 = KYBER_BARRETT_RED(r0 + r1); - r2 = KYBER_BARRETT_RED(r2 + r3); - r4 = KYBER_BARRETT_RED(r4 + r5); - r6 = KYBER_BARRETT_RED(r6 + r7); + t3 = MLKEM_MONT_RED(p); + r0 = MLKEM_BARRETT_RED(r0 + r1); + r2 = MLKEM_BARRETT_RED(r2 + r3); + r4 = MLKEM_BARRETT_RED(r4 + r5); + r6 = MLKEM_BARRETT_RED(r6 + r7); r1 = t0; r3 = t1; r5 = t2; r7 = t3; p = (sword32)zeta16_0 * (sword16)(r0 - r2); - t0 = KYBER_MONT_RED(p); + t0 = MLKEM_MONT_RED(p); p = (sword32)zeta16_0 * (sword16)(r1 - r3); - t1 = KYBER_MONT_RED(p); + t1 = MLKEM_MONT_RED(p); p = (sword32)zeta16_1 * (sword16)(r4 - r6); - t2 = KYBER_MONT_RED(p); + t2 = MLKEM_MONT_RED(p); p = (sword32)zeta16_1 * (sword16)(r5 - r7); - t3 = KYBER_MONT_RED(p); + t3 = MLKEM_MONT_RED(p); r0 += r2; r1 += r3; r4 += r6; @@ -904,13 +905,13 @@ static void kyber_invntt(sword16* r) r7 = t3; p = (sword32)zeta32 * (sword16)(r0 - r4); - t0 = KYBER_MONT_RED(p); + t0 = MLKEM_MONT_RED(p); p = (sword32)zeta32 * (sword16)(r1 - r5); - t1 = KYBER_MONT_RED(p); + t1 = MLKEM_MONT_RED(p); p = (sword32)zeta32 * (sword16)(r2 - r6); - t2 = KYBER_MONT_RED(p); + t2 = MLKEM_MONT_RED(p); p = (sword32)zeta32 * (sword16)(r3 - r7); - t3 = KYBER_MONT_RED(p); + t3 = MLKEM_MONT_RED(p); r0 += r4; r1 += r5; r2 += r6; @@ -935,7 +936,7 @@ static void kyber_invntt(sword16* r) zeta64_1 = zetas_inv[125]; zeta128 = zetas_inv[126]; zeta256 = zetas_inv[127]; - for (j = 0; j < KYBER_N / 8; j++) { + for (j = 0; j < MLKEM_N / 8; j++) { sword16 r0 = r[j + 0]; sword16 r1 = r[j + 32]; sword16 r2 = r[j + 64]; @@ -946,30 +947,30 @@ static void kyber_invntt(sword16* r) sword16 r7 = r[j + 224]; p = (sword32)zeta64_0 * (sword16)(r0 - r2); - t0 = KYBER_MONT_RED(p); + t0 = MLKEM_MONT_RED(p); p = (sword32)zeta64_0 * (sword16)(r1 - r3); - t1 = KYBER_MONT_RED(p); + t1 = MLKEM_MONT_RED(p); p = (sword32)zeta64_1 * (sword16)(r4 - r6); - t2 = KYBER_MONT_RED(p); + t2 = MLKEM_MONT_RED(p); p = (sword32)zeta64_1 * (sword16)(r5 - r7); - t3 = KYBER_MONT_RED(p); - r0 = KYBER_BARRETT_RED(r0 + r2); - r1 = KYBER_BARRETT_RED(r1 + r3); - r4 = KYBER_BARRETT_RED(r4 + r6); - r5 = KYBER_BARRETT_RED(r5 + r7); + t3 = MLKEM_MONT_RED(p); + r0 = MLKEM_BARRETT_RED(r0 + r2); + r1 = MLKEM_BARRETT_RED(r1 + r3); + r4 = MLKEM_BARRETT_RED(r4 + r6); + r5 = MLKEM_BARRETT_RED(r5 + r7); r2 = t0; r3 = t1; r6 = t2; r7 = t3; p = (sword32)zeta128 * (sword16)(r0 - r4); - t0 = KYBER_MONT_RED(p); + t0 = MLKEM_MONT_RED(p); p = (sword32)zeta128 * (sword16)(r1 - r5); - t1 = KYBER_MONT_RED(p); + t1 = MLKEM_MONT_RED(p); p = (sword32)zeta128 * (sword16)(r2 - r6); - t2 = KYBER_MONT_RED(p); + t2 = MLKEM_MONT_RED(p); p = (sword32)zeta128 * (sword16)(r3 - r7); - t3 = KYBER_MONT_RED(p); + t3 = MLKEM_MONT_RED(p); r0 += r4; r1 += r5; r2 += r6; @@ -980,21 +981,21 @@ static void kyber_invntt(sword16* r) r7 = t3; p = (sword32)zeta256 * r0; - r0 = KYBER_MONT_RED(p); + r0 = MLKEM_MONT_RED(p); p = (sword32)zeta256 * r1; - r1 = KYBER_MONT_RED(p); + r1 = MLKEM_MONT_RED(p); p = (sword32)zeta256 * r2; - r2 = KYBER_MONT_RED(p); + r2 = MLKEM_MONT_RED(p); p = (sword32)zeta256 * r3; - r3 = KYBER_MONT_RED(p); + r3 = MLKEM_MONT_RED(p); p = (sword32)zeta256 * r4; - r4 = KYBER_MONT_RED(p); + r4 = MLKEM_MONT_RED(p); p = (sword32)zeta256 * r5; - r5 = KYBER_MONT_RED(p); + r5 = MLKEM_MONT_RED(p); p = (sword32)zeta256 * r6; - r6 = KYBER_MONT_RED(p); + r6 = MLKEM_MONT_RED(p); p = (sword32)zeta256 * r7; - r7 = KYBER_MONT_RED(p); + r7 = MLKEM_MONT_RED(p); r[j + 0] = r0; r[j + 32] = r1; @@ -1025,7 +1026,7 @@ static void kyber_invntt(sword16* r) * @param [in] b Second factor. * @param [in] zeta Integer defining the reduction polynomial. */ -static void kyber_basemul(sword16* r, const sword16* a, const sword16* b, +static void mlkem_basemul(sword16* r, const sword16* a, const sword16* b, sword16 zeta) { sword16 r0; @@ -1039,16 +1040,16 @@ static void kyber_basemul(sword16* r, const sword16* a, const sword16* b, /* Step 1 */ p1 = (sword32)a0 * b0; p2 = (sword32)a1 * b1; - r0 = KYBER_MONT_RED(p2); + r0 = MLKEM_MONT_RED(p2); p2 = (sword32)zeta * r0; p2 += p1; - r[0] = KYBER_MONT_RED(p2); + r[0] = MLKEM_MONT_RED(p2); /* Step 2 */ p1 = (sword32)a0 * b1; p2 = (sword32)a1 * b0; p1 += p2; - r[1] = KYBER_MONT_RED(p1); + r[1] = MLKEM_MONT_RED(p1); } /* Multiply two polynomials in NTT domain. r = a * b. @@ -1066,40 +1067,40 @@ static void kyber_basemul(sword16* r, const sword16* a, const sword16* b, * @param [in] a First polynomial multiplier. * @param [in] b Second polynomial multiplier. */ -static void kyber_basemul_mont(sword16* r, const sword16* a, const sword16* b) +static void mlkem_basemul_mont(sword16* r, const sword16* a, const sword16* b) { const sword16* zeta = zetas + 64; -#if defined(WOLFSSL_KYBER_SMALL) +#if defined(WOLFSSL_MLKEM_SMALL) /* Two multiplications per loop. */ unsigned int i; /* Step 1 */ - for (i = 0; i < KYBER_N; i += 4, zeta++) { + for (i = 0; i < MLKEM_N; i += 4, zeta++) { /* Step 2 */ - kyber_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]); - kyber_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]); + mlkem_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]); + mlkem_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]); } -#elif defined(WOLFSSL_KYBER_NO_LARGE_CODE) +#elif defined(WOLFSSL_MLKEM_NO_LARGE_CODE) /* Four multiplications per loop. */ unsigned int i; - for (i = 0; i < KYBER_N; i += 8, zeta += 2) { - kyber_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]); - kyber_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]); - kyber_basemul(r + i + 4, a + i + 4, b + i + 4, zeta[1]); - kyber_basemul(r + i + 6, a + i + 6, b + i + 6, -zeta[1]); + for (i = 0; i < MLKEM_N; i += 8, zeta += 2) { + mlkem_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]); + mlkem_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]); + mlkem_basemul(r + i + 4, a + i + 4, b + i + 4, zeta[1]); + mlkem_basemul(r + i + 6, a + i + 6, b + i + 6, -zeta[1]); } #else /* Eight multiplications per loop. */ unsigned int i; - for (i = 0; i < KYBER_N; i += 16, zeta += 4) { - kyber_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]); - kyber_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]); - kyber_basemul(r + i + 4, a + i + 4, b + i + 4, zeta[1]); - kyber_basemul(r + i + 6, a + i + 6, b + i + 6, -zeta[1]); - kyber_basemul(r + i + 8, a + i + 8, b + i + 8, zeta[2]); - kyber_basemul(r + i + 10, a + i + 10, b + i + 10, -zeta[2]); - kyber_basemul(r + i + 12, a + i + 12, b + i + 12, zeta[3]); - kyber_basemul(r + i + 14, a + i + 14, b + i + 14, -zeta[3]); + for (i = 0; i < MLKEM_N; i += 16, zeta += 4) { + mlkem_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]); + mlkem_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]); + mlkem_basemul(r + i + 4, a + i + 4, b + i + 4, zeta[1]); + mlkem_basemul(r + i + 6, a + i + 6, b + i + 6, -zeta[1]); + mlkem_basemul(r + i + 8, a + i + 8, b + i + 8, zeta[2]); + mlkem_basemul(r + i + 10, a + i + 10, b + i + 10, -zeta[2]); + mlkem_basemul(r + i + 12, a + i + 12, b + i + 12, zeta[3]); + mlkem_basemul(r + i + 14, a + i + 14, b + i + 14, -zeta[3]); } #endif } @@ -1120,39 +1121,39 @@ static void kyber_basemul_mont(sword16* r, const sword16* a, const sword16* b) * @param [in] a First polynomial multiplier. * @param [in] b Second polynomial multiplier. */ -static void kyber_basemul_mont_add(sword16* r, const sword16* a, +static void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) { const sword16* zeta = zetas + 64; -#if defined(WOLFSSL_KYBER_SMALL) +#if defined(WOLFSSL_MLKEM_SMALL) /* Two multiplications per loop. */ unsigned int i; - for (i = 0; i < KYBER_N; i += 4, zeta++) { + for (i = 0; i < MLKEM_N; i += 4, zeta++) { sword16 t0[2]; sword16 t2[2]; - kyber_basemul(t0, a + i + 0, b + i + 0, zeta[0]); - kyber_basemul(t2, a + i + 2, b + i + 2, -zeta[0]); + mlkem_basemul(t0, a + i + 0, b + i + 0, zeta[0]); + mlkem_basemul(t2, a + i + 2, b + i + 2, -zeta[0]); r[i + 0] += t0[0]; r[i + 1] += t0[1]; r[i + 2] += t2[0]; r[i + 3] += t2[1]; } -#elif defined(WOLFSSL_KYBER_NO_LARGE_CODE) +#elif defined(WOLFSSL_MLKEM_NO_LARGE_CODE) /* Four multiplications per loop. */ unsigned int i; - for (i = 0; i < KYBER_N; i += 8, zeta += 2) { + for (i = 0; i < MLKEM_N; i += 8, zeta += 2) { sword16 t0[2]; sword16 t2[2]; sword16 t4[2]; sword16 t6[2]; - kyber_basemul(t0, a + i + 0, b + i + 0, zeta[0]); - kyber_basemul(t2, a + i + 2, b + i + 2, -zeta[0]); - kyber_basemul(t4, a + i + 4, b + i + 4, zeta[1]); - kyber_basemul(t6, a + i + 6, b + i + 6, -zeta[1]); + mlkem_basemul(t0, a + i + 0, b + i + 0, zeta[0]); + mlkem_basemul(t2, a + i + 2, b + i + 2, -zeta[0]); + mlkem_basemul(t4, a + i + 4, b + i + 4, zeta[1]); + mlkem_basemul(t6, a + i + 6, b + i + 6, -zeta[1]); r[i + 0] += t0[0]; r[i + 1] += t0[1]; @@ -1166,7 +1167,7 @@ static void kyber_basemul_mont_add(sword16* r, const sword16* a, #else /* Eight multiplications per loop. */ unsigned int i; - for (i = 0; i < KYBER_N; i += 16, zeta += 4) { + for (i = 0; i < MLKEM_N; i += 16, zeta += 4) { sword16 t0[2]; sword16 t2[2]; sword16 t4[2]; @@ -1176,14 +1177,14 @@ static void kyber_basemul_mont_add(sword16* r, const sword16* a, sword16 t12[2]; sword16 t14[2]; - kyber_basemul(t0, a + i + 0, b + i + 0, zeta[0]); - kyber_basemul(t2, a + i + 2, b + i + 2, -zeta[0]); - kyber_basemul(t4, a + i + 4, b + i + 4, zeta[1]); - kyber_basemul(t6, a + i + 6, b + i + 6, -zeta[1]); - kyber_basemul(t8, a + i + 8, b + i + 8, zeta[2]); - kyber_basemul(t10, a + i + 10, b + i + 10, -zeta[2]); - kyber_basemul(t12, a + i + 12, b + i + 12, zeta[3]); - kyber_basemul(t14, a + i + 14, b + i + 14, -zeta[3]); + mlkem_basemul(t0, a + i + 0, b + i + 0, zeta[0]); + mlkem_basemul(t2, a + i + 2, b + i + 2, -zeta[0]); + mlkem_basemul(t4, a + i + 4, b + i + 4, zeta[1]); + mlkem_basemul(t6, a + i + 6, b + i + 6, -zeta[1]); + mlkem_basemul(t8, a + i + 8, b + i + 8, zeta[2]); + mlkem_basemul(t10, a + i + 10, b + i + 10, -zeta[2]); + mlkem_basemul(t12, a + i + 12, b + i + 12, zeta[3]); + mlkem_basemul(t14, a + i + 14, b + i + 14, -zeta[3]); r[i + 0] += t0[0]; r[i + 1] += t0[1]; @@ -1213,21 +1214,21 @@ static void kyber_basemul_mont_add(sword16* r, const sword16* a, * @param [in] b Second vector polynomial to multiply with. * @param [in] k Number of polynomials in vector. */ -static void kyber_pointwise_acc_mont(sword16* r, const sword16* a, +static void mlkem_pointwise_acc_mont(sword16* r, const sword16* a, const sword16* b, unsigned int k) { unsigned int i; - kyber_basemul_mont(r, a, b); -#ifdef WOLFSSL_KYBER_SMALL + mlkem_basemul_mont(r, a, b); +#ifdef WOLFSSL_MLKEM_SMALL for (i = 1; i < k; ++i) { - kyber_basemul_mont_add(r, a + i * KYBER_N, b + i * KYBER_N); + mlkem_basemul_mont_add(r, a + i * MLKEM_N, b + i * MLKEM_N); } #else for (i = 1; i < k - 1; ++i) { - kyber_basemul_mont_add(r, a + i * KYBER_N, b + i * KYBER_N); + mlkem_basemul_mont_add(r, a + i * MLKEM_N, b + i * MLKEM_N); } - kyber_basemul_mont_add(r, a + (k - 1) * KYBER_N, b + (k - 1) * KYBER_N); + mlkem_basemul_mont_add(r, a + (k - 1) * MLKEM_N, b + (k - 1) * MLKEM_N); #endif } @@ -1235,7 +1236,7 @@ static void kyber_pointwise_acc_mont(sword16* r, const sword16* a, /* Initialize Kyber implementation. */ -void kyber_init(void) +void mlkem_init(void) { #if defined(USE_INTEL_SPEEDUP) || (defined(__aarch64__) && \ defined(WOLFSSL_ARMASM)) @@ -1247,7 +1248,7 @@ void kyber_init(void) #if defined(__aarch64__) && defined(WOLFSSL_ARMASM) -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY /* Generate a public-private key pair from randomly generated data. * * FIPS 203, Algorithm 13: K-PKE.KeyGen(d) @@ -1263,7 +1264,7 @@ void kyber_init(void) * @param [in] a Random values in an array of vectors of polynomials. * @param [in] k Number of polynomials in vector. */ -void kyber_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) +void mlkem_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) { int i; @@ -1272,7 +1273,7 @@ void kyber_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) /* Transform private key. All of result used in public key calculation. * Step 16: s_hat = NTT(s) */ for (i = 0; i < k; ++i) { - kyber_ntt_sqrdmlsh(s + i * KYBER_N); + mlkem_ntt_sqrdmlsh(s + i * MLKEM_N); } /* For each polynomial in the vectors. @@ -1280,17 +1281,17 @@ void kyber_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) for (i = 0; i < k; ++i) { /* Multiply a by private into public polynomial. * Step 18: ... A_hat o s_hat ... */ - kyber_pointwise_acc_mont(t + i * KYBER_N, a + i * k * KYBER_N, s, + mlkem_pointwise_acc_mont(t + i * MLKEM_N, a + i * k * MLKEM_N, s, k); /* Convert public polynomial to Montgomery form. * Step 18: ... MontRed(A_hat o s_hat) ... */ - kyber_to_mont_sqrdmlsh(t + i * KYBER_N); + mlkem_to_mont_sqrdmlsh(t + i * MLKEM_N); /* Transform error values polynomial. * Step 17: e_hat = NTT(e) */ - kyber_ntt_sqrdmlsh(e + i * KYBER_N); + mlkem_ntt_sqrdmlsh(e + i * MLKEM_N); /* Add errors to public key and reduce. * Step 18: t_hat = BarrettRed(MontRed(A_hat o s_hat) + e_hat) */ - kyber_add_reduce(t + i * KYBER_N, e + i * KYBER_N); + mlkem_add_reduce(t + i * MLKEM_N, e + i * MLKEM_N); } } else @@ -1299,7 +1300,7 @@ void kyber_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) /* Transform private key. All of result used in public key calculation. * Step 16: s_hat = NTT(s) */ for (i = 0; i < k; ++i) { - kyber_ntt(s + i * KYBER_N); + mlkem_ntt(s + i * MLKEM_N); } /* For each polynomial in the vectors. @@ -1307,24 +1308,24 @@ void kyber_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) for (i = 0; i < k; ++i) { /* Multiply a by private into public polynomial. * Step 18: ... A_hat o s_hat ... */ - kyber_pointwise_acc_mont(t + i * KYBER_N, a + i * k * KYBER_N, s, + mlkem_pointwise_acc_mont(t + i * MLKEM_N, a + i * k * MLKEM_N, s, k); /* Convert public polynomial to Montgomery form. * Step 18: ... MontRed(A_hat o s_hat) ... */ - kyber_to_mont(t + i * KYBER_N); + mlkem_to_mont(t + i * MLKEM_N); /* Transform error values polynomial. * Step 17: e_hat = NTT(e) */ - kyber_ntt(e + i * KYBER_N); + mlkem_ntt(e + i * MLKEM_N); /* Add errors to public key and reduce. * Step 18: t_hat = BarrettRed(MontRed(A_hat o s_hat) + e_hat) */ - kyber_add_reduce(t + i * KYBER_N, e + i * KYBER_N); + mlkem_add_reduce(t + i * MLKEM_N, e + i * MLKEM_N); } } } -#endif /* WOLFSSL_KYBER_NO_MAKE_KEY */ +#endif /* WOLFSSL_MLKEM_NO_MAKE_KEY */ -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) /* Encapsulate message. * * FIPS 203, Algorithm 14: K-PKE.Encrypt(ek_PKE, m, r) @@ -1345,7 +1346,7 @@ void kyber_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) * @param [in] m Message polynomial. * @param [in] k Number of polynomials in vector. */ -void kyber_encapsulate(const sword16* t, sword16* u , sword16* v, +void mlkem_encapsulate(const sword16* t, sword16* u , sword16* v, const sword16* a, sword16* y, const sword16* e1, const sword16* e2, const sword16* m, int k) { @@ -1356,7 +1357,7 @@ void kyber_encapsulate(const sword16* t, sword16* u , sword16* v, /* Transform y. All of result used in calculation of u and v. * Step 18: y_hat <- NTT(y) */ for (i = 0; i < k; ++i) { - kyber_ntt_sqrdmlsh(y + i * KYBER_N); + mlkem_ntt_sqrdmlsh(y + i * MLKEM_N); } /* For each polynomial in the vectors. @@ -1364,22 +1365,22 @@ void kyber_encapsulate(const sword16* t, sword16* u , sword16* v, for (i = 0; i < k; ++i) { /* Multiply at by y into u polynomial. * Step 19: ... A_hat_trans o y_hat ... */ - kyber_pointwise_acc_mont(u + i * KYBER_N, a + i * k * KYBER_N, y, + mlkem_pointwise_acc_mont(u + i * MLKEM_N, a + i * k * MLKEM_N, y, k); /* Inverse transform u polynomial. * Step 19: ... InvNTT(A_hat_trans o y_hat) ... */ - kyber_invntt_sqrdmlsh(u + i * KYBER_N); + mlkem_invntt_sqrdmlsh(u + i * MLKEM_N); /* Add errors to u and reduce. * Step 19: u <- InvNTT(A_hat_trans o y_hat) + e_1) */ - kyber_add_reduce(u + i * KYBER_N, e1 + i * KYBER_N); + mlkem_add_reduce(u + i * MLKEM_N, e1 + i * MLKEM_N); } /* Multiply public key by y into v polynomial. * Step 21: ... t_hat_trans o y_hat ... */ - kyber_pointwise_acc_mont(v, t, y, k); + mlkem_pointwise_acc_mont(v, t, y, k); /* Inverse transform v. * Step 22: ... InvNTT(t_hat_trans o y_hat) ... */ - kyber_invntt_sqrdmlsh(v); + mlkem_invntt_sqrdmlsh(v); } else #endif @@ -1387,7 +1388,7 @@ void kyber_encapsulate(const sword16* t, sword16* u , sword16* v, /* Transform y. All of result used in calculation of u and v. * Step 18: y_hat <- NTT(y) */ for (i = 0; i < k; ++i) { - kyber_ntt(y + i * KYBER_N); + mlkem_ntt(y + i * MLKEM_N); } /* For each polynomial in the vectors. @@ -1395,30 +1396,30 @@ void kyber_encapsulate(const sword16* t, sword16* u , sword16* v, for (i = 0; i < k; ++i) { /* Multiply at by y into u polynomial. * Step 19: ... A_hat_trans o y_hat ... */ - kyber_pointwise_acc_mont(u + i * KYBER_N, a + i * k * KYBER_N, y, + mlkem_pointwise_acc_mont(u + i * MLKEM_N, a + i * k * MLKEM_N, y, k); /* Inverse transform u polynomial. * Step 19: ... InvNTT(A_hat_trans o y_hat) ... */ - kyber_invntt(u + i * KYBER_N); + mlkem_invntt(u + i * MLKEM_N); /* Add errors to u and reduce. * Step 19: u <- InvNTT(A_hat_trans o y_hat) + e_1) */ - kyber_add_reduce(u + i * KYBER_N, e1 + i * KYBER_N); + mlkem_add_reduce(u + i * MLKEM_N, e1 + i * MLKEM_N); } /* Multiply public key by y into v polynomial. * Step 21: ... t_hat_trans o y_hat ... */ - kyber_pointwise_acc_mont(v, t, y, k); + mlkem_pointwise_acc_mont(v, t, y, k); /* Inverse transform v. * Step 22: ... InvNTT(t_hat_trans o y_hat) ... */ - kyber_invntt(v); + mlkem_invntt(v); } /* Add errors and message to v and reduce. * Step 21: v <- InvNTT(t_hat_trans o y_hat) + e_2 + mu) */ - kyber_add3_reduce(v, e2, m); + mlkem_add3_reduce(v, e2, m); } -#endif /* !WOLFSSL_KYBER_NO_ENCAPSULATE || !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_ENCAPSULATE || !WOLFSSL_MLKEM_NO_DECAPSULATE */ -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE /* Decapsulate message. * * FIPS 203, Algorithm 15: K-PKE.Decrypt(dk_PKE,c) @@ -1433,7 +1434,7 @@ void kyber_encapsulate(const sword16* t, sword16* u , sword16* v, * @param [in] v Encapsulated message polynomial. * @param [in] k Number of polynomials in vector. */ -void kyber_decapsulate(const sword16* s, sword16* w, sword16* u, +void mlkem_decapsulate(const sword16* s, sword16* w, sword16* u, const sword16* v, int k) { int i; @@ -1443,15 +1444,15 @@ void kyber_decapsulate(const sword16* s, sword16* w, sword16* u, /* Transform u. All of result used in calculation of w. * Step 6: ... NTT(u') */ for (i = 0; i < k; ++i) { - kyber_ntt_sqrdmlsh(u + i * KYBER_N); + mlkem_ntt_sqrdmlsh(u + i * MLKEM_N); } /* Multiply private key by u into w polynomial. * Step 6: ... s_hat_trans o NTT(u') */ - kyber_pointwise_acc_mont(w, s, u, k); + mlkem_pointwise_acc_mont(w, s, u, k); /* Inverse transform w. * Step 6: ... InvNTT(s_hat_trans o NTT(u')) */ - kyber_invntt_sqrdmlsh(w); + mlkem_invntt_sqrdmlsh(w); } else #endif @@ -1459,25 +1460,25 @@ void kyber_decapsulate(const sword16* s, sword16* w, sword16* u, /* Transform u. All of result used in calculation of w. * Step 6: ... NTT(u') */ for (i = 0; i < k; ++i) { - kyber_ntt(u + i * KYBER_N); + mlkem_ntt(u + i * MLKEM_N); } /* Multiply private key by u into w polynomial. * Step 6: ... s_hat_trans o NTT(u') */ - kyber_pointwise_acc_mont(w, s, u, k); + mlkem_pointwise_acc_mont(w, s, u, k); /* Inverse transform w. * Step 6: ... InvNTT(s_hat_trans o NTT(u')) */ - kyber_invntt(w); + mlkem_invntt(w); } /* Subtract errors (in w) out of v and reduce into w. * Step 6: w <- v' - InvNTT(s_hat_trans o NTT(u')) */ - kyber_rsub_reduce(w, v); + mlkem_rsub_reduce(w, v); } -#endif /* !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_DECAPSULATE */ #else -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY #ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM /* Generate a public-private key pair from randomly generated data. * @@ -1494,7 +1495,7 @@ void kyber_decapsulate(const sword16* s, sword16* w, sword16* u, * @param [in] a Random values in an array of vectors of polynomials. * @param [in] k Number of polynomials in vector. */ -static void kyber_keygen_c(sword16* s, sword16* t, sword16* e, const sword16* a, +static void mlkem_keygen_c(sword16* s, sword16* t, sword16* e, const sword16* a, int k) { int i; @@ -1502,7 +1503,7 @@ static void kyber_keygen_c(sword16* s, sword16* t, sword16* e, const sword16* a, /* Transform private key. All of result used in public key calculation * Step 16: s_hat = NTT(s) */ for (i = 0; i < k; ++i) { - kyber_ntt(s + i * KYBER_N); + mlkem_ntt(s + i * MLKEM_N); } /* For each polynomial in the vectors. @@ -1512,21 +1513,21 @@ static void kyber_keygen_c(sword16* s, sword16* t, sword16* e, const sword16* a, /* Multiply a by private into public polynomial. * Step 18: ... A_hat o s_hat ... */ - kyber_pointwise_acc_mont(t + i * KYBER_N, a + i * k * KYBER_N, s, k); + mlkem_pointwise_acc_mont(t + i * MLKEM_N, a + i * k * MLKEM_N, s, k); /* Convert public polynomial to Montgomery form. * Step 18: ... MontRed(A_hat o s_hat) ... */ - for (j = 0; j < KYBER_N; ++j) { - sword32 n = t[i * KYBER_N + j] * (sword32)KYBER_F; - t[i * KYBER_N + j] = KYBER_MONT_RED(n); + for (j = 0; j < MLKEM_N; ++j) { + sword32 n = t[i * MLKEM_N + j] * (sword32)MLKEM_F; + t[i * MLKEM_N + j] = MLKEM_MONT_RED(n); } /* Transform error values polynomial. * Step 17: e_hat = NTT(e) */ - kyber_ntt(e + i * KYBER_N); + mlkem_ntt(e + i * MLKEM_N); /* Add errors to public key and reduce. * Step 18: t_hat = BarrettRed(MontRed(A_hat o s_hat) + e_hat) */ - for (j = 0; j < KYBER_N; ++j) { - sword16 n = t[i * KYBER_N + j] + e[i * KYBER_N + j]; - t[i * KYBER_N + j] = KYBER_BARRETT_RED(n); + for (j = 0; j < MLKEM_N; ++j) { + sword16 n = t[i * MLKEM_N + j] + e[i * MLKEM_N + j]; + t[i * MLKEM_N + j] = MLKEM_BARRETT_RED(n); } } } @@ -1546,19 +1547,19 @@ static void kyber_keygen_c(sword16* s, sword16* t, sword16* e, const sword16* a, * @param [in] a Random values in an array of vectors of polynomials. * @param [in] k Number of polynomials in vector. */ -void kyber_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) +void mlkem_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) { #ifdef USE_INTEL_SPEEDUP if ((IS_INTEL_AVX2(cpuid_flags)) && (SAVE_VECTOR_REGISTERS2() == 0)) { /* Alg 13: Steps 16-18 */ - kyber_keygen_avx2(s, t, e, a, k); + mlkem_keygen_avx2(s, t, e, a, k); RESTORE_VECTOR_REGISTERS(); } else #endif { /* Alg 13: Steps 16-18 */ - kyber_keygen_c(s, t, e, a, k); + mlkem_keygen_c(s, t, e, a, k); } } @@ -1586,7 +1587,7 @@ void kyber_keygen(sword16* s, sword16* t, sword16* e, const sword16* a, int k) * @param [in] rho Random seed to generate matrix A from. * @param [in] sigma Random seed to generate noise from. */ -int kyber_keygen_seeds(sword16* s, sword16* t, KYBER_PRF_T* prf, +int mlkem_keygen_seeds(sword16* s, sword16* t, MLKEM_PRF_T* prf, sword16* tv, int k, byte* rho, byte* sigma) { int i; @@ -1597,7 +1598,7 @@ int kyber_keygen_seeds(sword16* s, sword16* t, KYBER_PRF_T* prf, /* Transform private key. All of result used in public key calculation * Step 16: s_hat = NTT(s) */ for (i = 0; i < k; ++i) { - kyber_ntt(s + i * KYBER_N); + mlkem_ntt(s + i * MLKEM_N); } /* For each polynomial in the vectors. @@ -1607,35 +1608,35 @@ int kyber_keygen_seeds(sword16* s, sword16* t, KYBER_PRF_T* prf, /* Generate a vector of matrix A. * Steps 4-6: generate A[i] */ - ret = kyber_gen_matrix_i(prf, ai, k, rho, i, 0); + ret = mlkem_gen_matrix_i(prf, ai, k, rho, i, 0); if (ret != 0) { break; } /* Multiply a by private into public polynomial. * Step 18: ... A_hat o s_hat ... */ - kyber_pointwise_acc_mont(t + i * KYBER_N, ai, s, k); + mlkem_pointwise_acc_mont(t + i * MLKEM_N, ai, s, k); /* Convert public polynomial to Montgomery form. * Step 18: ... MontRed(A_hat o s_hat) ... */ - for (j = 0; j < KYBER_N; ++j) { - sword32 n = t[i * KYBER_N + j] * (sword32)KYBER_F; - t[i * KYBER_N + j] = KYBER_MONT_RED(n); + for (j = 0; j < MLKEM_N; ++j) { + sword32 n = t[i * MLKEM_N + j] * (sword32)MLKEM_F; + t[i * MLKEM_N + j] = MLKEM_MONT_RED(n); } /* Generate noise using PRF. * Step 9: s[i] <- SamplePolyCBD_eta_1(PRF_eta_1(rho, N)) */ - ret = kyber_get_noise_i(prf, k, e, sigma, i, 1); + ret = mlkem_get_noise_i(prf, k, e, sigma, i, 1); if (ret != 0) { break; } /* Transform error values polynomial. * Step 17: e_hat = NTT(e) */ - kyber_ntt(e); + mlkem_ntt(e); /* Add errors to public key and reduce. * Step 18: t_hat = BarrettRed(MontRed(A_hat o s_hat) + e_hat) */ - for (j = 0; j < KYBER_N; ++j) { - sword16 n = t[i * KYBER_N + j] + e[j]; - t[i * KYBER_N + j] = KYBER_BARRETT_RED(n); + for (j = 0; j < MLKEM_N; ++j) { + sword16 n = t[i * MLKEM_N + j] + e[j]; + t[i * MLKEM_N + j] = MLKEM_BARRETT_RED(n); } } @@ -1643,10 +1644,10 @@ int kyber_keygen_seeds(sword16* s, sword16* t, KYBER_PRF_T* prf, } #endif -#endif /* !WOLFSSL_KYBER_NO_MAKE_KEY */ +#endif /* !WOLFSSL_MLKEM_NO_MAKE_KEY */ -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) #ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM /* Encapsulate message. * @@ -1660,7 +1661,7 @@ int kyber_keygen_seeds(sword16* s, sword16* t, KYBER_PRF_T* prf, * @param [in] m Message polynomial. * @param [in] k Number of polynomials in vector. */ -static void kyber_encapsulate_c(const sword16* pub, sword16* u, sword16* v, +static void mlkem_encapsulate_c(const sword16* pub, sword16* u, sword16* v, const sword16* a, sword16* y, const sword16* e1, const sword16* e2, const sword16* m, int k) { @@ -1668,7 +1669,7 @@ static void kyber_encapsulate_c(const sword16* pub, sword16* u, sword16* v, /* Transform y. All of result used in calculation of u and v. */ for (i = 0; i < k; ++i) { - kyber_ntt(y + i * KYBER_N); + mlkem_ntt(y + i * MLKEM_N); } /* For each polynomial in the vectors. */ @@ -1676,24 +1677,24 @@ static void kyber_encapsulate_c(const sword16* pub, sword16* u, sword16* v, unsigned int j; /* Multiply at by y into u polynomial. */ - kyber_pointwise_acc_mont(u + i * KYBER_N, a + i * k * KYBER_N, y, k); + mlkem_pointwise_acc_mont(u + i * MLKEM_N, a + i * k * MLKEM_N, y, k); /* Inverse transform u polynomial. */ - kyber_invntt(u + i * KYBER_N); + mlkem_invntt(u + i * MLKEM_N); /* Add errors to u and reduce. */ - for (j = 0; j < KYBER_N; ++j) { - sword16 t = u[i * KYBER_N + j] + e1[i * KYBER_N + j]; - u[i * KYBER_N + j] = KYBER_BARRETT_RED(t); + for (j = 0; j < MLKEM_N; ++j) { + sword16 t = u[i * MLKEM_N + j] + e1[i * MLKEM_N + j]; + u[i * MLKEM_N + j] = MLKEM_BARRETT_RED(t); } } /* Multiply public key by y into v polynomial. */ - kyber_pointwise_acc_mont(v, pub, y, k); + mlkem_pointwise_acc_mont(v, pub, y, k); /* Inverse transform v. */ - kyber_invntt(v); + mlkem_invntt(v); /* Add errors and message to v and reduce. */ - for (i = 0; i < KYBER_N; ++i) { + for (i = 0; i < MLKEM_N; ++i) { sword16 t = v[i] + e2[i] + m[i]; - v[i] = KYBER_BARRETT_RED(t); + v[i] = MLKEM_BARRETT_RED(t); } } @@ -1709,19 +1710,19 @@ static void kyber_encapsulate_c(const sword16* pub, sword16* u, sword16* v, * @param [in] m Message polynomial. * @param [in] k Number of polynomials in vector. */ -void kyber_encapsulate(const sword16* pub, sword16* u, sword16* v, +void mlkem_encapsulate(const sword16* pub, sword16* u, sword16* v, const sword16* a, sword16* y, const sword16* e1, const sword16* e2, const sword16* m, int k) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_encapsulate_avx2(pub, u, v, a, y, e1, e2, m, k); + mlkem_encapsulate_avx2(pub, u, v, a, y, e1, e2, m, k); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_encapsulate_c(pub, u, v, a, y, e1, e2, m, k); + mlkem_encapsulate_c(pub, u, v, a, y, e1, e2, m, k); } } @@ -1739,7 +1740,7 @@ void kyber_encapsulate(const sword16* pub, sword16* u, sword16* v, * @param [in] seed Random seed to generate matrix A from. * @param [in] coins Random seed to generate noise from. */ -int kyber_encapsulate_seeds(const sword16* pub, KYBER_PRF_T* prf, sword16* u, +int mlkem_encapsulate_seeds(const sword16* pub, MLKEM_PRF_T* prf, sword16* u, sword16* tp, sword16* y, int k, const byte* msg, byte* seed, byte* coins) { int ret = 0; @@ -1747,12 +1748,12 @@ int kyber_encapsulate_seeds(const sword16* pub, KYBER_PRF_T* prf, sword16* u, sword16* a = tp; sword16* e1 = tp; sword16* v = tp; - sword16* e2 = tp + KYBER_N; + sword16* e2 = tp + MLKEM_N; sword16* m = y; /* Transform y. All of result used in calculation of u and v. */ for (i = 0; i < k; ++i) { - kyber_ntt(y + i * KYBER_N); + mlkem_ntt(y + i * MLKEM_N); } /* For each polynomial in the vectors. */ @@ -1760,52 +1761,52 @@ int kyber_encapsulate_seeds(const sword16* pub, KYBER_PRF_T* prf, sword16* u, unsigned int j; /* Generate a vector of matrix A. */ - ret = kyber_gen_matrix_i(prf, a, k, seed, i, 1); + ret = mlkem_gen_matrix_i(prf, a, k, seed, i, 1); if (ret != 0) { break; } /* Multiply at by y into u polynomial. */ - kyber_pointwise_acc_mont(u + i * KYBER_N, a, y, k); + mlkem_pointwise_acc_mont(u + i * MLKEM_N, a, y, k); /* Inverse transform u polynomial. */ - kyber_invntt(u + i * KYBER_N); + mlkem_invntt(u + i * MLKEM_N); /* Generate noise using PRF. */ - ret = kyber_get_noise_i(prf, k, e1, coins, i, 0); + ret = mlkem_get_noise_i(prf, k, e1, coins, i, 0); if (ret != 0) { break; } /* Add errors to u and reduce. */ - for (j = 0; j < KYBER_N; ++j) { - sword16 t = u[i * KYBER_N + j] + e1[j]; - u[i * KYBER_N + j] = KYBER_BARRETT_RED(t); + for (j = 0; j < MLKEM_N; ++j) { + sword16 t = u[i * MLKEM_N + j] + e1[j]; + u[i * MLKEM_N + j] = MLKEM_BARRETT_RED(t); } } /* Multiply public key by y into v polynomial. */ - kyber_pointwise_acc_mont(v, pub, y, k); + mlkem_pointwise_acc_mont(v, pub, y, k); /* Inverse transform v. */ - kyber_invntt(v); + mlkem_invntt(v); - kyber_from_msg(m, msg); + mlkem_from_msg(m, msg); /* Generate noise using PRF. */ - coins[KYBER_SYM_SZ] = 2 * k; - ret = kyber_get_noise_eta2_c(prf, e2, coins); + coins[WC_ML_KEM_SYM_SZ] = 2 * k; + ret = mlkem_get_noise_eta2_c(prf, e2, coins); if (ret == 0) { /* Add errors and message to v and reduce. */ - for (i = 0; i < KYBER_N; ++i) { + for (i = 0; i < MLKEM_N; ++i) { sword16 t = v[i] + e2[i] + m[i]; - tp[i] = KYBER_BARRETT_RED(t); + tp[i] = MLKEM_BARRETT_RED(t); } } return ret; } #endif -#endif /* !WOLFSSL_KYBER_NO_ENCAPSULATE || !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_ENCAPSULATE || !WOLFSSL_MLKEM_NO_DECAPSULATE */ -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE /* Decapsulate message. * @@ -1821,7 +1822,7 @@ int kyber_encapsulate_seeds(const sword16* pub, KYBER_PRF_T* prf, sword16* u, * @param [in] v Encapsulated message polynomial. * @param [in] k Number of polynomials in vector. */ -static void kyber_decapsulate_c(const sword16* s, sword16* w, sword16* u, +static void mlkem_decapsulate_c(const sword16* s, sword16* w, sword16* u, const sword16* v, int k) { int i; @@ -1829,20 +1830,20 @@ static void kyber_decapsulate_c(const sword16* s, sword16* w, sword16* u, /* Transform u. All of result used in calculation of w. * Step 6: ... NTT(u') */ for (i = 0; i < k; ++i) { - kyber_ntt(u + i * KYBER_N); + mlkem_ntt(u + i * MLKEM_N); } /* Multiply private key by u into w polynomial. * Step 6: ... s_hat_trans o NTT(u') */ - kyber_pointwise_acc_mont(w, s, u, k); + mlkem_pointwise_acc_mont(w, s, u, k); /* Inverse transform w. * Step 6: ... InvNTT(s_hat_trans o NTT(u')) */ - kyber_invntt(w); + mlkem_invntt(w); /* Subtract errors (in w) out of v and reduce into w. * Step 6: w <- v' - InvNTT(s_hat_trans o NTT(u')) */ - for (i = 0; i < KYBER_N; ++i) { + for (i = 0; i < MLKEM_N; ++i) { sword16 t = v[i] - w[i]; - w[i] = KYBER_BARRETT_RED(t); + w[i] = MLKEM_BARRETT_RED(t); } } @@ -1860,22 +1861,22 @@ static void kyber_decapsulate_c(const sword16* s, sword16* w, sword16* u, * @param [in] v Encapsulated message polynomial. * @param [in] k Number of polynomials in vector. */ -void kyber_decapsulate(const sword16* s, sword16* w, sword16* u, +void mlkem_decapsulate(const sword16* s, sword16* w, sword16* u, const sword16* v, int k) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_decapsulate_avx2(s, w, u, v, k); + mlkem_decapsulate_avx2(s, w, u, v, k); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_decapsulate_c(s, w, u, v, k); + mlkem_decapsulate_c(s, w, u, v, k); } } -#endif /* !WOLFSSL_KYBER_ NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_ NO_DECAPSULATE */ #endif /******************************************************************************/ @@ -1893,7 +1894,7 @@ void kyber_decapsulate(const sword16* s, sword16* w, sword16* u, * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -static int kyber_gen_matrix_k2_avx2(sword16* a, byte* seed, int transposed) +static int mlkem_gen_matrix_k2_avx2(sword16* a, byte* seed, int transposed) { int i; byte rand[4 * GEN_MATRIX_SIZE + 2]; @@ -1921,49 +1922,49 @@ static int kyber_gen_matrix_k2_avx2(sword16* a, byte* seed, int transposed) state[4*4 + 3] = 0x1f0000 + 0x101; } - kyber_sha3_128_blocksx4_seed_avx2(state, seed); - kyber_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, + mlkem_sha3_128_blocksx4_seed_avx2(state, seed); + mlkem_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, rand + 1 * GEN_MATRIX_SIZE, rand + 2 * GEN_MATRIX_SIZE, rand + 3 * GEN_MATRIX_SIZE); for (i = SHA3_128_BYTES; i < GEN_MATRIX_SIZE; i += SHA3_128_BYTES) { - kyber_sha3_blocksx4_avx2(state); - kyber_redistribute_21_rand_avx2(state, rand + i + 0 * GEN_MATRIX_SIZE, + sha3_blocksx4_avx2(state); + mlkem_redistribute_21_rand_avx2(state, rand + i + 0 * GEN_MATRIX_SIZE, rand + i + 1 * GEN_MATRIX_SIZE, rand + i + 2 * GEN_MATRIX_SIZE, rand + i + 3 * GEN_MATRIX_SIZE); } /* Sample random bytes to create a polynomial. */ p = rand; - ctr0 = kyber_rej_uniform_n_avx2(a + 0 * KYBER_N, KYBER_N, p, + ctr0 = mlkem_rej_uniform_n_avx2(a + 0 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr1 = kyber_rej_uniform_n_avx2(a + 1 * KYBER_N, KYBER_N, p, + ctr1 = mlkem_rej_uniform_n_avx2(a + 1 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr2 = kyber_rej_uniform_n_avx2(a + 2 * KYBER_N, KYBER_N, p, + ctr2 = mlkem_rej_uniform_n_avx2(a + 2 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr3 = kyber_rej_uniform_n_avx2(a + 3 * KYBER_N, KYBER_N, p, + ctr3 = mlkem_rej_uniform_n_avx2(a + 3 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); /* Create more blocks if too many rejected. */ - while ((ctr0 < KYBER_N) || (ctr1 < KYBER_N) || (ctr2 < KYBER_N) || - (ctr3 < KYBER_N)) { - kyber_sha3_blocksx4_avx2(state); - kyber_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, + while ((ctr0 < MLKEM_N) || (ctr1 < MLKEM_N) || (ctr2 < MLKEM_N) || + (ctr3 < MLKEM_N)) { + sha3_blocksx4_avx2(state); + mlkem_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, rand + 1 * GEN_MATRIX_SIZE, rand + 2 * GEN_MATRIX_SIZE, rand + 3 * GEN_MATRIX_SIZE); p = rand; - ctr0 += kyber_rej_uniform_avx2(a + 0 * KYBER_N + ctr0, KYBER_N - ctr0, + ctr0 += mlkem_rej_uniform_avx2(a + 0 * MLKEM_N + ctr0, MLKEM_N - ctr0, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr1 += kyber_rej_uniform_avx2(a + 1 * KYBER_N + ctr1, KYBER_N - ctr1, + ctr1 += mlkem_rej_uniform_avx2(a + 1 * MLKEM_N + ctr1, MLKEM_N - ctr1, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr2 += kyber_rej_uniform_avx2(a + 2 * KYBER_N + ctr2, KYBER_N - ctr2, + ctr2 += mlkem_rej_uniform_avx2(a + 2 * MLKEM_N + ctr2, MLKEM_N - ctr2, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr3 += kyber_rej_uniform_avx2(a + 3 * KYBER_N + ctr3, KYBER_N - ctr3, + ctr3 += mlkem_rej_uniform_avx2(a + 3 * MLKEM_N + ctr3, MLKEM_N - ctr3, p, XOF_BLOCK_SIZE); } @@ -1983,7 +1984,7 @@ static int kyber_gen_matrix_k2_avx2(sword16* a, byte* seed, int transposed) * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -static int kyber_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed) +static int mlkem_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed) { int i; int k; @@ -2009,53 +2010,53 @@ static int kyber_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed) } } - kyber_sha3_128_blocksx4_seed_avx2(state, seed); - kyber_redistribute_21_rand_avx2(state, + mlkem_sha3_128_blocksx4_seed_avx2(state, seed); + mlkem_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, rand + 1 * GEN_MATRIX_SIZE, rand + 2 * GEN_MATRIX_SIZE, rand + 3 * GEN_MATRIX_SIZE); for (i = SHA3_128_BYTES; i < GEN_MATRIX_SIZE; i += SHA3_128_BYTES) { - kyber_sha3_blocksx4_avx2(state); - kyber_redistribute_21_rand_avx2(state, + sha3_blocksx4_avx2(state); + mlkem_redistribute_21_rand_avx2(state, rand + i + 0 * GEN_MATRIX_SIZE, rand + i + 1 * GEN_MATRIX_SIZE, rand + i + 2 * GEN_MATRIX_SIZE, rand + i + 3 * GEN_MATRIX_SIZE); } /* Sample random bytes to create a polynomial. */ p = rand; - ctr0 = kyber_rej_uniform_n_avx2(a + 0 * KYBER_N, KYBER_N, p, + ctr0 = mlkem_rej_uniform_n_avx2(a + 0 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr1 = kyber_rej_uniform_n_avx2(a + 1 * KYBER_N, KYBER_N, p, + ctr1 = mlkem_rej_uniform_n_avx2(a + 1 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr2 = kyber_rej_uniform_n_avx2(a + 2 * KYBER_N, KYBER_N, p, + ctr2 = mlkem_rej_uniform_n_avx2(a + 2 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr3 = kyber_rej_uniform_n_avx2(a + 3 * KYBER_N, KYBER_N, p, + ctr3 = mlkem_rej_uniform_n_avx2(a + 3 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); /* Create more blocks if too many rejected. */ - while ((ctr0 < KYBER_N) || (ctr1 < KYBER_N) || (ctr2 < KYBER_N) || - (ctr3 < KYBER_N)) { - kyber_sha3_blocksx4_avx2(state); - kyber_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, + while ((ctr0 < MLKEM_N) || (ctr1 < MLKEM_N) || (ctr2 < MLKEM_N) || + (ctr3 < MLKEM_N)) { + sha3_blocksx4_avx2(state); + mlkem_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, rand + 1 * GEN_MATRIX_SIZE, rand + 2 * GEN_MATRIX_SIZE, rand + 3 * GEN_MATRIX_SIZE); p = rand; - ctr0 += kyber_rej_uniform_avx2(a + 0 * KYBER_N + ctr0, - KYBER_N - ctr0, p, XOF_BLOCK_SIZE); + ctr0 += mlkem_rej_uniform_avx2(a + 0 * MLKEM_N + ctr0, + MLKEM_N - ctr0, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr1 += kyber_rej_uniform_avx2(a + 1 * KYBER_N + ctr1, - KYBER_N - ctr1, p, XOF_BLOCK_SIZE); + ctr1 += mlkem_rej_uniform_avx2(a + 1 * MLKEM_N + ctr1, + MLKEM_N - ctr1, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr2 += kyber_rej_uniform_avx2(a + 2 * KYBER_N + ctr2, - KYBER_N - ctr2, p, XOF_BLOCK_SIZE); + ctr2 += mlkem_rej_uniform_avx2(a + 2 * MLKEM_N + ctr2, + MLKEM_N - ctr2, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr3 += kyber_rej_uniform_avx2(a + 3 * KYBER_N + ctr3, - KYBER_N - ctr3, p, XOF_BLOCK_SIZE); + ctr3 += mlkem_rej_uniform_avx2(a + 3 * MLKEM_N + ctr3, + MLKEM_N - ctr3, p, XOF_BLOCK_SIZE); } - a += 4 * KYBER_N; + a += 4 * MLKEM_N; } readUnalignedWords64(state, seed, 4); @@ -2067,7 +2068,8 @@ static int kyber_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed) if (IS_INTEL_BMI2(cpuid_flags)) { sha3_block_bmi2(state); } - else if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + else if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + { sha3_block_avx2(state); RESTORE_VECTOR_REGISTERS(); } @@ -2076,12 +2078,13 @@ static int kyber_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed) } XMEMCPY(rand + i, state, SHA3_128_BYTES); } - ctr0 = kyber_rej_uniform_n_avx2(a, KYBER_N, rand, GEN_MATRIX_SIZE); - while (ctr0 < KYBER_N) { + ctr0 = mlkem_rej_uniform_n_avx2(a, MLKEM_N, rand, GEN_MATRIX_SIZE); + while (ctr0 < MLKEM_N) { if (IS_INTEL_BMI2(cpuid_flags)) { sha3_block_bmi2(state); } - else if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + else if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + { sha3_block_avx2(state); RESTORE_VECTOR_REGISTERS(); } @@ -2089,7 +2092,7 @@ static int kyber_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed) BlockSha3(state); } XMEMCPY(rand, state, SHA3_128_BYTES); - ctr0 += kyber_rej_uniform_avx2(a + ctr0, KYBER_N - ctr0, rand, + ctr0 += mlkem_rej_uniform_avx2(a + ctr0, MLKEM_N - ctr0, rand, XOF_BLOCK_SIZE); } @@ -2108,7 +2111,7 @@ static int kyber_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed) * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -static int kyber_gen_matrix_k4_avx2(sword16* a, byte* seed, int transposed) +static int mlkem_gen_matrix_k4_avx2(sword16* a, byte* seed, int transposed) { int i; int k; @@ -2134,53 +2137,53 @@ static int kyber_gen_matrix_k4_avx2(sword16* a, byte* seed, int transposed) } } - kyber_sha3_128_blocksx4_seed_avx2(state, seed); - kyber_redistribute_21_rand_avx2(state, + mlkem_sha3_128_blocksx4_seed_avx2(state, seed); + mlkem_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, rand + 1 * GEN_MATRIX_SIZE, rand + 2 * GEN_MATRIX_SIZE, rand + 3 * GEN_MATRIX_SIZE); for (i = SHA3_128_BYTES; i < GEN_MATRIX_SIZE; i += SHA3_128_BYTES) { - kyber_sha3_blocksx4_avx2(state); - kyber_redistribute_21_rand_avx2(state, + sha3_blocksx4_avx2(state); + mlkem_redistribute_21_rand_avx2(state, rand + i + 0 * GEN_MATRIX_SIZE, rand + i + 1 * GEN_MATRIX_SIZE, rand + i + 2 * GEN_MATRIX_SIZE, rand + i + 3 * GEN_MATRIX_SIZE); } /* Sample random bytes to create a polynomial. */ p = rand; - ctr0 = kyber_rej_uniform_n_avx2(a + 0 * KYBER_N, KYBER_N, p, + ctr0 = mlkem_rej_uniform_n_avx2(a + 0 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr1 = kyber_rej_uniform_n_avx2(a + 1 * KYBER_N, KYBER_N, p, + ctr1 = mlkem_rej_uniform_n_avx2(a + 1 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr2 = kyber_rej_uniform_n_avx2(a + 2 * KYBER_N, KYBER_N, p, + ctr2 = mlkem_rej_uniform_n_avx2(a + 2 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); p += GEN_MATRIX_SIZE; - ctr3 = kyber_rej_uniform_n_avx2(a + 3 * KYBER_N, KYBER_N, p, + ctr3 = mlkem_rej_uniform_n_avx2(a + 3 * MLKEM_N, MLKEM_N, p, GEN_MATRIX_SIZE); /* Create more blocks if too many rejected. */ - while ((ctr0 < KYBER_N) || (ctr1 < KYBER_N) || (ctr2 < KYBER_N) || - (ctr3 < KYBER_N)) { - kyber_sha3_blocksx4_avx2(state); - kyber_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, + while ((ctr0 < MLKEM_N) || (ctr1 < MLKEM_N) || (ctr2 < MLKEM_N) || + (ctr3 < MLKEM_N)) { + sha3_blocksx4_avx2(state); + mlkem_redistribute_21_rand_avx2(state, rand + 0 * GEN_MATRIX_SIZE, rand + 1 * GEN_MATRIX_SIZE, rand + 2 * GEN_MATRIX_SIZE, rand + 3 * GEN_MATRIX_SIZE); p = rand; - ctr0 += kyber_rej_uniform_avx2(a + 0 * KYBER_N + ctr0, - KYBER_N - ctr0, p, XOF_BLOCK_SIZE); + ctr0 += mlkem_rej_uniform_avx2(a + 0 * MLKEM_N + ctr0, + MLKEM_N - ctr0, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr1 += kyber_rej_uniform_avx2(a + 1 * KYBER_N + ctr1, - KYBER_N - ctr1, p, XOF_BLOCK_SIZE); + ctr1 += mlkem_rej_uniform_avx2(a + 1 * MLKEM_N + ctr1, + MLKEM_N - ctr1, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr2 += kyber_rej_uniform_avx2(a + 2 * KYBER_N + ctr2, - KYBER_N - ctr2, p, XOF_BLOCK_SIZE); + ctr2 += mlkem_rej_uniform_avx2(a + 2 * MLKEM_N + ctr2, + MLKEM_N - ctr2, p, XOF_BLOCK_SIZE); p += GEN_MATRIX_SIZE; - ctr3 += kyber_rej_uniform_avx2(a + 3 * KYBER_N + ctr3, - KYBER_N - ctr3, p, XOF_BLOCK_SIZE); + ctr3 += mlkem_rej_uniform_avx2(a + 3 * MLKEM_N + ctr3, + MLKEM_N - ctr3, p, XOF_BLOCK_SIZE); } - a += 4 * KYBER_N; + a += 4 * MLKEM_N; } return 0; @@ -2199,7 +2202,7 @@ static int kyber_gen_matrix_k4_avx2(sword16* a, byte* seed, int transposed) * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -static int kyber_gen_matrix_k2_aarch64(sword16* a, byte* seed, int transposed) +static int mlkem_gen_matrix_k2_aarch64(sword16* a, byte* seed, int transposed) { word64 state[3 * 25]; word64* st = (word64*)state; @@ -2219,29 +2222,29 @@ static int kyber_gen_matrix_k2_aarch64(sword16* a, byte* seed, int transposed) state[2*25 + 4] = 0x1f0000 + (0 << 8) + 1; } - kyber_shake128_blocksx3_seed_neon(state, seed); + mlkem_shake128_blocksx3_seed_neon(state, seed); /* Sample random bytes to create a polynomial. */ p = (byte*)st; - ctr0 = kyber_rej_uniform_neon(a + 0 * KYBER_N, KYBER_N, p, XOF_BLOCK_SIZE); + ctr0 = mlkem_rej_uniform_neon(a + 0 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr1 = kyber_rej_uniform_neon(a + 1 * KYBER_N, KYBER_N, p, XOF_BLOCK_SIZE); + ctr1 = mlkem_rej_uniform_neon(a + 1 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr2 = kyber_rej_uniform_neon(a + 2 * KYBER_N, KYBER_N, p, XOF_BLOCK_SIZE); - while ((ctr0 < KYBER_N) || (ctr1 < KYBER_N) || (ctr2 < KYBER_N)) { - kyber_sha3_blocksx3_neon(st); + ctr2 = mlkem_rej_uniform_neon(a + 2 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); + while ((ctr0 < MLKEM_N) || (ctr1 < MLKEM_N) || (ctr2 < MLKEM_N)) { + mlkem_sha3_blocksx3_neon(st); p = (byte*)st; - ctr0 += kyber_rej_uniform_neon(a + 0 * KYBER_N + ctr0, KYBER_N - ctr0, + ctr0 += mlkem_rej_uniform_neon(a + 0 * MLKEM_N + ctr0, MLKEM_N - ctr0, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr1 += kyber_rej_uniform_neon(a + 1 * KYBER_N + ctr1, KYBER_N - ctr1, + ctr1 += mlkem_rej_uniform_neon(a + 1 * MLKEM_N + ctr1, MLKEM_N - ctr1, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr2 += kyber_rej_uniform_neon(a + 2 * KYBER_N + ctr2, KYBER_N - ctr2, + ctr2 += mlkem_rej_uniform_neon(a + 2 * MLKEM_N + ctr2, MLKEM_N - ctr2, p, XOF_BLOCK_SIZE); } - a += 3 * KYBER_N; + a += 3 * MLKEM_N; readUnalignedWords64(state, seed, 4); /* Transposed value same as not. */ @@ -2250,10 +2253,10 @@ static int kyber_gen_matrix_k2_aarch64(sword16* a, byte* seed, int transposed) state[20] = W64LIT(0x8000000000000000); BlockSha3(state); p = (byte*)state; - ctr0 = kyber_rej_uniform_neon(a, KYBER_N, p, XOF_BLOCK_SIZE); - while (ctr0 < KYBER_N) { + ctr0 = mlkem_rej_uniform_neon(a, MLKEM_N, p, XOF_BLOCK_SIZE); + while (ctr0 < MLKEM_N) { BlockSha3(state); - ctr0 += kyber_rej_uniform_neon(a + ctr0, KYBER_N - ctr0, p, + ctr0 += mlkem_rej_uniform_neon(a + ctr0, MLKEM_N - ctr0, p, XOF_BLOCK_SIZE); } @@ -2273,7 +2276,7 @@ static int kyber_gen_matrix_k2_aarch64(sword16* a, byte* seed, int transposed) * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -static int kyber_gen_matrix_k3_aarch64(sword16* a, byte* seed, int transposed) +static int mlkem_gen_matrix_k3_aarch64(sword16* a, byte* seed, int transposed) { int i; int k; @@ -2294,33 +2297,33 @@ static int kyber_gen_matrix_k3_aarch64(sword16* a, byte* seed, int transposed) } } - kyber_shake128_blocksx3_seed_neon(state, seed); + mlkem_shake128_blocksx3_seed_neon(state, seed); /* Sample random bytes to create a polynomial. */ p = (byte*)st; - ctr0 = kyber_rej_uniform_neon(a + 0 * KYBER_N, KYBER_N, p, + ctr0 = mlkem_rej_uniform_neon(a + 0 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr1 = kyber_rej_uniform_neon(a + 1 * KYBER_N, KYBER_N, p, + ctr1 = mlkem_rej_uniform_neon(a + 1 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); p +=25 * 8; - ctr2 = kyber_rej_uniform_neon(a + 2 * KYBER_N, KYBER_N, p, + ctr2 = mlkem_rej_uniform_neon(a + 2 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); /* Create more blocks if too many rejected. */ - while ((ctr0 < KYBER_N) || (ctr1 < KYBER_N) || (ctr2 < KYBER_N)) { - kyber_sha3_blocksx3_neon(st); + while ((ctr0 < MLKEM_N) || (ctr1 < MLKEM_N) || (ctr2 < MLKEM_N)) { + mlkem_sha3_blocksx3_neon(st); p = (byte*)st; - ctr0 += kyber_rej_uniform_neon(a + 0 * KYBER_N + ctr0, - KYBER_N - ctr0, p, XOF_BLOCK_SIZE); + ctr0 += mlkem_rej_uniform_neon(a + 0 * MLKEM_N + ctr0, + MLKEM_N - ctr0, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr1 += kyber_rej_uniform_neon(a + 1 * KYBER_N + ctr1, - KYBER_N - ctr1, p, XOF_BLOCK_SIZE); + ctr1 += mlkem_rej_uniform_neon(a + 1 * MLKEM_N + ctr1, + MLKEM_N - ctr1, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr2 += kyber_rej_uniform_neon(a + 2 * KYBER_N + ctr2, - KYBER_N - ctr2, p, XOF_BLOCK_SIZE); + ctr2 += mlkem_rej_uniform_neon(a + 2 * MLKEM_N + ctr2, + MLKEM_N - ctr2, p, XOF_BLOCK_SIZE); } - a += 3 * KYBER_N; + a += 3 * MLKEM_N; } return 0; @@ -2339,7 +2342,7 @@ static int kyber_gen_matrix_k3_aarch64(sword16* a, byte* seed, int transposed) * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -static int kyber_gen_matrix_k4_aarch64(sword16* a, byte* seed, int transposed) +static int mlkem_gen_matrix_k4_aarch64(sword16* a, byte* seed, int transposed) { int i; int k; @@ -2362,33 +2365,33 @@ static int kyber_gen_matrix_k4_aarch64(sword16* a, byte* seed, int transposed) } } - kyber_shake128_blocksx3_seed_neon(state, seed); + mlkem_shake128_blocksx3_seed_neon(state, seed); /* Sample random bytes to create a polynomial. */ p = (byte*)st; - ctr0 = kyber_rej_uniform_neon(a + 0 * KYBER_N, KYBER_N, p, + ctr0 = mlkem_rej_uniform_neon(a + 0 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr1 = kyber_rej_uniform_neon(a + 1 * KYBER_N, KYBER_N, p, + ctr1 = mlkem_rej_uniform_neon(a + 1 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr2 = kyber_rej_uniform_neon(a + 2 * KYBER_N, KYBER_N, p, + ctr2 = mlkem_rej_uniform_neon(a + 2 * MLKEM_N, MLKEM_N, p, XOF_BLOCK_SIZE); /* Create more blocks if too many rejected. */ - while ((ctr0 < KYBER_N) || (ctr1 < KYBER_N) || (ctr2 < KYBER_N)) { - kyber_sha3_blocksx3_neon(st); + while ((ctr0 < MLKEM_N) || (ctr1 < MLKEM_N) || (ctr2 < MLKEM_N)) { + mlkem_sha3_blocksx3_neon(st); p = (byte*)st; - ctr0 += kyber_rej_uniform_neon(a + 0 * KYBER_N + ctr0, - KYBER_N - ctr0, p, XOF_BLOCK_SIZE); + ctr0 += mlkem_rej_uniform_neon(a + 0 * MLKEM_N + ctr0, + MLKEM_N - ctr0, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr1 += kyber_rej_uniform_neon(a + 1 * KYBER_N + ctr1, - KYBER_N - ctr1, p, XOF_BLOCK_SIZE); + ctr1 += mlkem_rej_uniform_neon(a + 1 * MLKEM_N + ctr1, + MLKEM_N - ctr1, p, XOF_BLOCK_SIZE); p += 25 * 8; - ctr2 += kyber_rej_uniform_neon(a + 2 * KYBER_N + ctr2, - KYBER_N - ctr2, p, XOF_BLOCK_SIZE); + ctr2 += mlkem_rej_uniform_neon(a + 2 * MLKEM_N + ctr2, + MLKEM_N - ctr2, p, XOF_BLOCK_SIZE); } - a += 3 * KYBER_N; + a += 3 * MLKEM_N; } readUnalignedWords64(state, seed, 4); @@ -2398,10 +2401,10 @@ static int kyber_gen_matrix_k4_aarch64(sword16* a, byte* seed, int transposed) state[20] = W64LIT(0x8000000000000000); BlockSha3(state); p = (byte*)state; - ctr0 = kyber_rej_uniform_neon(a, KYBER_N, p, XOF_BLOCK_SIZE); - while (ctr0 < KYBER_N) { + ctr0 = mlkem_rej_uniform_neon(a, MLKEM_N, p, XOF_BLOCK_SIZE); + while (ctr0 < MLKEM_N) { BlockSha3(state); - ctr0 += kyber_rej_uniform_neon(a + ctr0, KYBER_N - ctr0, p, + ctr0 += mlkem_rej_uniform_neon(a + ctr0, MLKEM_N - ctr0, p, XOF_BLOCK_SIZE); } @@ -2422,7 +2425,7 @@ static int kyber_gen_matrix_k4_aarch64(sword16* a, byte* seed, int transposed) * @param [in] len Length of data to absorb in bytes. * @return 0 on success always. */ -static int kyber_xof_absorb(wc_Shake* shake128, byte* seed, int len) +static int mlkem_xof_absorb(wc_Shake* shake128, byte* seed, int len) { int ret; @@ -2444,7 +2447,7 @@ static int kyber_xof_absorb(wc_Shake* shake128, byte* seed, int len) * @param [in] blocks Number of blocks to write. * @return 0 on success always. */ -static int kyber_xof_squeezeblocks(wc_Shake* shake128, byte* out, int blocks) +static int mlkem_xof_squeezeblocks(wc_Shake* shake128, byte* out, int blocks) { return wc_Shake128_SqueezeBlocks(shake128, out, blocks); } @@ -2460,7 +2463,7 @@ static int kyber_xof_squeezeblocks(wc_Shake* shake128, byte* out, int blocks) * @param [in] devId Device id. * @return 0 on success always. */ -int kyber_hash_new(wc_Sha3* hash, void* heap, int devId) +int mlkem_hash_new(wc_Sha3* hash, void* heap, int devId) { return wc_InitSha3_256(hash, heap, devId); } @@ -2472,7 +2475,7 @@ int kyber_hash_new(wc_Sha3* hash, void* heap, int devId) * * @param [in, out] hash SHA-3 object. */ -void kyber_hash_free(wc_Sha3* hash) +void mlkem_hash_free(wc_Sha3* hash) { wc_Sha3_256_Free(hash); } @@ -2488,7 +2491,7 @@ void kyber_hash_free(wc_Sha3* hash) * @param [out] out Hash of data. * @return 0 on success. */ -int kyber_hash256(wc_Sha3* hash, const byte* data, word32 dataLen, byte* out) +int mlkem_hash256(wc_Sha3* hash, const byte* data, word32 dataLen, byte* out) { int ret; @@ -2515,7 +2518,7 @@ int kyber_hash256(wc_Sha3* hash, const byte* data, word32 dataLen, byte* out) * @param [out] out Hash of all data. * @return 0 on success. */ -int kyber_hash512(wc_Sha3* hash, const byte* data1, word32 data1Len, +int mlkem_hash512(wc_Sha3* hash, const byte* data1, word32 data1Len, const byte* data2, word32 data2Len, byte* out) { int ret; @@ -2539,7 +2542,7 @@ int kyber_hash512(wc_Sha3* hash, const byte* data1, word32 data1Len, * * @param [in, out] shake256 SHAKE-256 object. */ -void kyber_prf_init(wc_Shake* prf) +void mlkem_prf_init(wc_Shake* prf) { XMEMSET(prf->s, 0, sizeof(prf->s)); } @@ -2554,7 +2557,7 @@ void kyber_prf_init(wc_Shake* prf) * @param [in] devId Device id. * @return 0 on success always. */ -int kyber_prf_new(wc_Shake* prf, void* heap, int devId) +int mlkem_prf_new(wc_Shake* prf, void* heap, int devId) { return wc_InitShake256(prf, heap, devId); } @@ -2566,7 +2569,7 @@ int kyber_prf_new(wc_Shake* prf, void* heap, int devId) * * @param [in, out] shake256 SHAKE-256 object. */ -void kyber_prf_free(wc_Shake* prf) +void mlkem_prf_free(wc_Shake* prf) { wc_Shake256_Free(prf); } @@ -2580,11 +2583,11 @@ void kyber_prf_free(wc_Shake* prf) * @param [in, out] shake256 SHAKE-256 object. * @param [out] out Buffer to write to. * @param [in] outLen Number of bytes to write. - * @param [in] key Data to derive from. Must be KYBER_SYM_SZ + 1 - * bytes in length. + * @param [in] key Data to derive from. Must be: + * WC_ML_KEM_SYM_SZ + 1 bytes in length. * @return 0 on success always. */ -static int kyber_prf(wc_Shake* shake256, byte* out, unsigned int outLen, +static int mlkem_prf(wc_Shake* shake256, byte* out, unsigned int outLen, const byte* key) { #ifdef USE_INTEL_SPEEDUP @@ -2592,13 +2595,13 @@ static int kyber_prf(wc_Shake* shake256, byte* out, unsigned int outLen, (void)shake256; - /* Put first KYBER_SYM_SZ bytes og key into blank state. */ - readUnalignedWords64(state, key, KYBER_SYM_SZ / sizeof(word64)); + /* Put first WC_ML_KEM_SYM_SZ bytes og key into blank state. */ + readUnalignedWords64(state, key, WC_ML_KEM_SYM_SZ / sizeof(word64)); /* Last byte in with end of content marker. */ - state[KYBER_SYM_SZ / 8] = 0x1f00 | key[KYBER_SYM_SZ]; + state[WC_ML_KEM_SYM_SZ / 8] = 0x1f00 | key[WC_ML_KEM_SYM_SZ]; /* Set rest of state to 0. */ - XMEMSET(state + KYBER_SYM_SZ / 8 + 1, 0, - (25 - KYBER_SYM_SZ / 8 - 1) * sizeof(word64)); + XMEMSET(state + WC_ML_KEM_SYM_SZ / 8 + 1, 0, + (25 - WC_ML_KEM_SYM_SZ / 8 - 1) * sizeof(word64)); /* ... except for rate marker. */ state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000); @@ -2632,7 +2635,7 @@ static int kyber_prf(wc_Shake* shake256, byte* out, unsigned int outLen, int ret; /* Process all data. */ - ret = wc_Shake256_Update(shake256, key, KYBER_SYM_SZ + 1); + ret = wc_Shake256_Update(shake256, key, WC_ML_KEM_SYM_SZ + 1); if (ret == 0) { /* Calculate Hash of data passed in an re-initialize. */ ret = wc_Shake256_Final(shake256, out, outLen); @@ -2643,7 +2646,7 @@ static int kyber_prf(wc_Shake* shake256, byte* out, unsigned int outLen, } #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef USE_INTEL_SPEEDUP /* Create pseudo-random key from the seed using SHAKE-256. * @@ -2653,7 +2656,7 @@ static int kyber_prf(wc_Shake* shake256, byte* out, unsigned int outLen, * @param [in] outLen Number of bytes to derive. * @return 0 on success always. */ -int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen) +int mlkem_kdf(byte* seed, int seedLen, byte* out, int outLen) { word64 state[25]; word32 len64 = seedLen / 8; @@ -2688,7 +2691,7 @@ int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen) * @param [in] outLen Number of bytes to derive. * @return 0 on success always. */ -int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen) +int mlkem_kdf(byte* seed, int seedLen, byte* out, int outLen) { word64 state[25]; word32 len64 = seedLen / 8; @@ -2735,13 +2738,13 @@ int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen) * @param [in] rLen Length of random data in buffer. * @return Number of integers sampled. */ -static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, +static unsigned int mlkem_rej_uniform_c(sword16* p, unsigned int len, const byte* r, unsigned int rLen) { unsigned int i; unsigned int j; -#if defined(WOLFSSL_KYBER_SMALL) || !defined(WC_64BIT_CPU) +#if defined(WOLFSSL_MLKEM_SMALL) || !defined(WC_64BIT_CPU) /* Keep sampling until maximum number of integers reached or buffer used up. * Step 4. */ for (i = 0, j = 0; (i < len) && (j <= rLen - 3); j += 3) { @@ -2754,14 +2757,14 @@ static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, /* Reject first 12-bit integer if greater than or equal to q. * Step 8 */ - if (v0 < KYBER_Q) { + if (v0 < MLKEM_Q) { /* Steps 9-10 */ p[i++] = v0; } /* Check second if we don't have enough integers yet. * Reject second 12-bit integer if greater than or equal to q. * Step 12 */ - if ((i < len) && (v1 < KYBER_Q)) { + if ((i < len) && (v1 < MLKEM_Q)) { /* Steps 13-14 */ p[i++] = v1; } @@ -2788,13 +2791,13 @@ static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, sword16 v3 = (r_word >> 36) & 0xfff; p[i] = v0; - i += (v0 < KYBER_Q); + i += (v0 < MLKEM_Q); p[i] = v1; - i += (v1 < KYBER_Q); + i += (v1 < MLKEM_Q); p[i] = v2; - i += (v2 < KYBER_Q); + i += (v2 < MLKEM_Q); p[i] = v3; - i += (v3 < KYBER_Q); + i += (v3 < MLKEM_Q); /* Move over used bytes. */ r += 6; @@ -2812,13 +2815,13 @@ static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, sword16 v3 = (r_word >> 36) & 0xfff; p[i] = v0; - i += (v0 < KYBER_Q); + i += (v0 < MLKEM_Q); p[i] = v1; - i += (v1 < KYBER_Q); + i += (v1 < MLKEM_Q); p[i] = v2; - i += (v2 < KYBER_Q); + i += (v2 < MLKEM_Q); p[i] = v3; - i += (v3 < KYBER_Q); + i += (v3 < MLKEM_Q); /* Move over used bytes. */ r += 6; @@ -2834,22 +2837,22 @@ static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, sword16 v3 = (r_word >> 36) & 0xfff; /* Reject first 12-bit integer if greater than or equal to q. */ - if (v0 < KYBER_Q) { + if (v0 < MLKEM_Q) { p[i++] = v0; } /* Check second if we don't have enough integers yet. * Reject second 12-bit integer if greater than or equal to q. */ - if ((i < len) && (v1 < KYBER_Q)) { + if ((i < len) && (v1 < MLKEM_Q)) { p[i++] = v1; } /* Check second if we don't have enough integers yet. * Reject third 12-bit integer if greater than or equal to q. */ - if ((i < len) && (v2 < KYBER_Q)) { + if ((i < len) && (v2 < MLKEM_Q)) { p[i++] = v2; } /* Check second if we don't have enough integers yet. * Reject fourth 12-bit integer if greater than or equal to q. */ - if ((i < len) && (v3 < KYBER_Q)) { + if ((i < len) && (v3 < MLKEM_Q)) { p[i++] = v3; } @@ -2908,7 +2911,7 @@ static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -static int kyber_gen_matrix_c(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, +static int mlkem_gen_matrix_c(MLKEM_PRF_T* prf, sword16* a, int k, byte* seed, int transposed) { #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) @@ -2916,12 +2919,12 @@ static int kyber_gen_matrix_c(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, #else byte rand[GEN_MATRIX_SIZE + 2]; #endif - byte extSeed[KYBER_SYM_SZ + 2]; + byte extSeed[WC_ML_KEM_SYM_SZ + 2]; int ret = 0; int i; /* Copy seed into buffer than has space for i and j to be appended. */ - XMEMCPY(extSeed, seed, KYBER_SYM_SZ); + XMEMCPY(extSeed, seed, WC_ML_KEM_SYM_SZ); #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) /* Allocate large amount of memory to hold random bytes to be samples. */ @@ -2931,7 +2934,7 @@ static int kyber_gen_matrix_c(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, } #endif -#if !defined(WOLFSSL_KYBER_SMALL) && defined(WC_64BIT_CPU) +#if !defined(WOLFSSL_MLKEM_SMALL) && defined(WC_64BIT_CPU) /* Loading 64 bits, only using 48 bits. Loading 2 bytes more than used. */ if (ret == 0) { rand[GEN_MATRIX_SIZE+0] = 0xff; @@ -2941,29 +2944,29 @@ static int kyber_gen_matrix_c(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, /* Generate each vector of polynomials. * Alg 13, Step 3. Alg 14, Step 4. */ - for (i = 0; (ret == 0) && (i < k); i++, a += k * KYBER_N) { + for (i = 0; (ret == 0) && (i < k); i++, a += k * MLKEM_N) { int j; /* Generate each polynomial in vector from seed with indices. * Alg 13, Step 4. Alg 14, Step 5. */ for (j = 0; (ret == 0) && (j < k); j++) { if (transposed) { /* Alg 14, Step 6: .. rho||i||j ... */ - extSeed[KYBER_SYM_SZ + 0] = i; - extSeed[KYBER_SYM_SZ + 1] = j; + extSeed[WC_ML_KEM_SYM_SZ + 0] = i; + extSeed[WC_ML_KEM_SYM_SZ + 1] = j; } else { /* Alg 13, Step 5: .. rho||j||i ... */ - extSeed[KYBER_SYM_SZ + 0] = j; - extSeed[KYBER_SYM_SZ + 1] = i; + extSeed[WC_ML_KEM_SYM_SZ + 0] = j; + extSeed[WC_ML_KEM_SYM_SZ + 1] = i; } /* Absorb the index specific seed. * Alg 7, Step 1-2 */ - ret = kyber_xof_absorb(prf, extSeed, sizeof(extSeed)); + ret = mlkem_xof_absorb(prf, extSeed, sizeof(extSeed)); if (ret == 0) { /* Create data based on the seed. * Alg 7, Step 5. Generating enough to, on average, be able to * get enough valid values. */ - ret = kyber_xof_squeezeblocks(prf, rand, GEN_MATRIX_NBLOCKS); + ret = mlkem_xof_squeezeblocks(prf, rand, GEN_MATRIX_NBLOCKS); } if (ret == 0) { unsigned int ctr; @@ -2971,16 +2974,16 @@ static int kyber_gen_matrix_c(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, /* Sample random bytes to create a polynomial. * Alg 7, Step 3 - implicitly counter is 0. * Alg 7, Step 4-16. */ - ctr = kyber_rej_uniform_c(a + j * KYBER_N, KYBER_N, rand, + ctr = mlkem_rej_uniform_c(a + j * MLKEM_N, MLKEM_N, rand, GEN_MATRIX_SIZE); /* Create more blocks if too many rejected. * Alg 7, Step 4. */ - while (ctr < KYBER_N) { + while (ctr < MLKEM_N) { /* Alg 7, Step 5. */ - kyber_xof_squeezeblocks(prf, rand, 1); + mlkem_xof_squeezeblocks(prf, rand, 1); /* Alg 7, Step 4-16. */ - ctr += kyber_rej_uniform_c(a + j * KYBER_N + ctr, - KYBER_N - ctr, rand, XOF_BLOCK_SIZE); + ctr += mlkem_rej_uniform_c(a + j * MLKEM_N + ctr, + MLKEM_N - ctr, rand, XOF_BLOCK_SIZE); } } } @@ -3011,63 +3014,64 @@ static int kyber_gen_matrix_c(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -int kyber_gen_matrix(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, +int mlkem_gen_matrix(MLKEM_PRF_T* prf, sword16* a, int k, byte* seed, int transposed) { int ret; #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) - if (k == KYBER512_K) { + if (k == WC_ML_KEM_512_K) { #if defined(WOLFSSL_ARMASM) && defined(__aarch64__) - ret = kyber_gen_matrix_k2_aarch64(a, seed, transposed); + ret = mlkem_gen_matrix_k2_aarch64(a, seed, transposed); #else #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - ret = kyber_gen_matrix_k2_avx2(a, seed, transposed); + ret = mlkem_gen_matrix_k2_avx2(a, seed, transposed); RESTORE_VECTOR_REGISTERS(); } else #endif { - ret = kyber_gen_matrix_c(prf, a, KYBER512_K, seed, transposed); + ret = mlkem_gen_matrix_c(prf, a, WC_ML_KEM_512_K, seed, transposed); } #endif } else #endif #if defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) - if (k == KYBER768_K) { + if (k == WC_ML_KEM_768_K) { #if defined(WOLFSSL_ARMASM) && defined(__aarch64__) - ret = kyber_gen_matrix_k3_aarch64(a, seed, transposed); + ret = mlkem_gen_matrix_k3_aarch64(a, seed, transposed); #else #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - ret = kyber_gen_matrix_k3_avx2(a, seed, transposed); + ret = mlkem_gen_matrix_k3_avx2(a, seed, transposed); RESTORE_VECTOR_REGISTERS(); } else #endif { - ret = kyber_gen_matrix_c(prf, a, KYBER768_K, seed, transposed); + ret = mlkem_gen_matrix_c(prf, a, WC_ML_KEM_768_K, seed, transposed); } #endif } else #endif #if defined(WOLFSSL_KYBER1024) || defined(WOLFSSL_WC_ML_KEM_1024) - if (k == KYBER1024_K) { + if (k == WC_ML_KEM_1024_K) { #if defined(WOLFSSL_ARMASM) && defined(__aarch64__) - ret = kyber_gen_matrix_k4_aarch64(a, seed, transposed); + ret = mlkem_gen_matrix_k4_aarch64(a, seed, transposed); #else #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - ret = kyber_gen_matrix_k4_avx2(a, seed, transposed); + ret = mlkem_gen_matrix_k4_avx2(a, seed, transposed); RESTORE_VECTOR_REGISTERS(); } else #endif { - ret = kyber_gen_matrix_c(prf, a, KYBER1024_K, seed, transposed); + ret = mlkem_gen_matrix_c(prf, a, WC_ML_KEM_1024_K, seed, + transposed); } #endif } @@ -3114,7 +3118,7 @@ int kyber_gen_matrix(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, * @return MEMORY_E when dynamic memory allocation fails. Only possible when * WOLFSSL_SMALL_STACK is defined. */ -static int kyber_gen_matrix_i(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, +static int mlkem_gen_matrix_i(MLKEM_PRF_T* prf, sword16* a, int k, byte* seed, int i, int transposed) { #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) @@ -3122,11 +3126,11 @@ static int kyber_gen_matrix_i(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, #else byte rand[GEN_MATRIX_SIZE + 2]; #endif - byte extSeed[KYBER_SYM_SZ + 2]; + byte extSeed[WC_ML_KEM_SYM_SZ + 2]; int ret = 0; int j; - XMEMCPY(extSeed, seed, KYBER_SYM_SZ); + XMEMCPY(extSeed, seed, WC_ML_KEM_SYM_SZ); #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) /* Allocate large amount of memory to hold random bytes to be samples. */ @@ -3136,7 +3140,7 @@ static int kyber_gen_matrix_i(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, } #endif -#if !defined(WOLFSSL_KYBER_SMALL) && defined(WC_64BIT_CPU) +#if !defined(WOLFSSL_MLKEM_SMALL) && defined(WC_64BIT_CPU) /* Loading 64 bits, only using 48 bits. Loading 2 bytes more than used. */ if (ret == 0) { rand[GEN_MATRIX_SIZE+0] = 0xff; @@ -3149,22 +3153,22 @@ static int kyber_gen_matrix_i(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, for (j = 0; (ret == 0) && (j < k); j++) { if (transposed) { /* Alg 14, Step 6: .. rho||i||j ... */ - extSeed[KYBER_SYM_SZ + 0] = i; - extSeed[KYBER_SYM_SZ + 1] = j; + extSeed[WC_ML_KEM_SYM_SZ + 0] = i; + extSeed[WC_ML_KEM_SYM_SZ + 1] = j; } else { /* Alg 13, Step 5: .. rho||j||i ... */ - extSeed[KYBER_SYM_SZ + 0] = j; - extSeed[KYBER_SYM_SZ + 1] = i; + extSeed[WC_ML_KEM_SYM_SZ + 0] = j; + extSeed[WC_ML_KEM_SYM_SZ + 1] = i; } /* Absorb the index specific seed. * Alg 7, Step 1-2 */ - ret = kyber_xof_absorb(prf, extSeed, sizeof(extSeed)); + ret = mlkem_xof_absorb(prf, extSeed, sizeof(extSeed)); if (ret == 0) { /* Create out based on the seed. * Alg 7, Step 5. Generating enough to, on average, be able to get * enough valid values. */ - ret = kyber_xof_squeezeblocks(prf, rand, GEN_MATRIX_NBLOCKS); + ret = mlkem_xof_squeezeblocks(prf, rand, GEN_MATRIX_NBLOCKS); } if (ret == 0) { unsigned int ctr; @@ -3172,16 +3176,16 @@ static int kyber_gen_matrix_i(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, /* Sample random bytes to create a polynomial. * Alg 7, Step 3 - implicitly counter is 0. * Alg 7, Step 4-16. */ - ctr = kyber_rej_uniform_c(a + j * KYBER_N, KYBER_N, rand, + ctr = mlkem_rej_uniform_c(a + j * MLKEM_N, MLKEM_N, rand, GEN_MATRIX_SIZE); /* Create more blocks if too many rejected. * Alg 7, Step 4. */ - while (ctr < KYBER_N) { + while (ctr < MLKEM_N) { /* Alg 7, Step 5. */ - kyber_xof_squeezeblocks(prf, rand, 1); + mlkem_xof_squeezeblocks(prf, rand, 1); /* Alg 7, Step 4-16. */ - ctr += kyber_rej_uniform_c(a + j * KYBER_N + ctr, - KYBER_N - ctr, rand, XOF_BLOCK_SIZE); + ctr += mlkem_rej_uniform_c(a + j * MLKEM_N + ctr, + MLKEM_N - ctr, rand, XOF_BLOCK_SIZE); } } } @@ -3223,14 +3227,14 @@ static int kyber_gen_matrix_i(KYBER_PRF_T* prf, sword16* a, int k, byte* seed, * @param [out] p Polynomial computed. * @param [in] r Random bytes. */ -static void kyber_cbd_eta2(sword16* p, const byte* r) +static void mlkem_cbd_eta2(sword16* p, const byte* r) { unsigned int i; #ifndef WORD64_AVAILABLE /* Calculate eight integer coefficients at a time. */ - for (i = 0; i < KYBER_N; i += 8) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N; i += 8) { + #ifdef WOLFSSL_MLKEM_SMALL unsigned int j; #endif /* Take the next 4 bytes, little endian, as a 32 bit value. */ @@ -3246,7 +3250,7 @@ static void kyber_cbd_eta2(sword16* p, const byte* r) /* Values 0, 1 or 2 in consecutive 2 bits. * 0 - 1/4, 1 - 2/4, 2 - 1/4. */ - #ifdef WOLFSSL_KYBER_SMALL + #ifdef WOLFSSL_MLKEM_SMALL for (j = 0; j < 8; j++) { p[i + j] = ETA2_SUB(d, j); } @@ -3267,8 +3271,8 @@ static void kyber_cbd_eta2(sword16* p, const byte* r) } #else /* Calculate sixteen integer coefficients at a time. */ - for (i = 0; i < KYBER_N; i += 16) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N; i += 16) { + #ifdef WOLFSSL_MLKEM_SMALL unsigned int j; #endif /* Take the next 8 bytes, little endian, as a 64 bit value. */ @@ -3284,7 +3288,7 @@ static void kyber_cbd_eta2(sword16* p, const byte* r) /* Values 0, 1 or 2 in consecutive 2 bits. * 0 - 1/4, 1 - 2/4, 2 - 1/4. */ - #ifdef WOLFSSL_KYBER_SMALL + #ifdef WOLFSSL_MLKEM_SMALL for (j = 0; j < 16; j++) { p[i + j] = ETA2_SUB(d, j); } @@ -3339,16 +3343,16 @@ static void kyber_cbd_eta2(sword16* p, const byte* r) * @param [out] p Polynomial computed. * @param [in] r Random bytes. */ -static void kyber_cbd_eta3(sword16* p, const byte* r) +static void mlkem_cbd_eta3(sword16* p, const byte* r) { unsigned int i; -#if defined(WOLFSSL_SMALL_STACK) || defined(WOLFSSL_KYBER_NO_LARGE_CODE) || \ +#if defined(WOLFSSL_SMALL_STACK) || defined(WOLFSSL_MLKEM_NO_LARGE_CODE) || \ defined(BIG_ENDIAN_ORDER) #ifndef WORD64_AVAILABLE /* Calculate four integer coefficients at a time. */ - for (i = 0; i < KYBER_N; i += 4) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N; i += 4) { + #ifdef WOLFSSL_MLKEM_SMALL unsigned int j; #endif /* Take the next 3 bytes, little endian, as a 24 bit value. */ @@ -3363,7 +3367,7 @@ static void kyber_cbd_eta3(sword16* p, const byte* r) /* Values 0, 1, 2 or 3 in consecutive 3 bits. * 0 - 1/8, 1 - 3/8, 2 - 3/8, 3 - 1/8. */ - #ifdef WOLFSSL_KYBER_SMALL + #ifdef WOLFSSL_MLKEM_SMALL for (j = 0; j < 4; j++) { p[i + j] = ETA3_SUB(d, j); } @@ -3380,8 +3384,8 @@ static void kyber_cbd_eta3(sword16* p, const byte* r) } #else /* Calculate eight integer coefficients at a time. */ - for (i = 0; i < KYBER_N; i += 8) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N; i += 8) { + #ifdef WOLFSSL_MLKEM_SMALL unsigned int j; #endif /* Take the next 6 bytes, little endian, as a 48 bit value. */ @@ -3399,7 +3403,7 @@ static void kyber_cbd_eta3(sword16* p, const byte* r) /* Values 0, 1, 2 or 3 in consecutive 3 bits. * 0 - 1/8, 1 - 3/8, 2 - 3/8, 3 - 1/8. */ - #ifdef WOLFSSL_KYBER_SMALL + #ifdef WOLFSSL_MLKEM_SMALL for (j = 0; j < 8; j++) { p[i + j] = ETA3_SUB(d, j); } @@ -3421,7 +3425,7 @@ static void kyber_cbd_eta3(sword16* p, const byte* r) #endif /* WORD64_AVAILABLE */ #else /* Calculate eight integer coefficients at a time. */ - for (i = 0; i < KYBER_N; i += 16) { + for (i = 0; i < MLKEM_N; i += 16) { const word32* r32 = (const word32*)r; /* Take the next 12 bytes, little endian, as 24 bit values. */ word32 t0 = r32[0] & 0xffffff; @@ -3470,7 +3474,8 @@ static void kyber_cbd_eta3(sword16* p, const byte* r) /* Move over used bytes. */ r += 12; } -#endif /* WOLFSSL_SMALL_STACK || WOLFSSL_KYBER_NO_LARGE_CODE || BIG_ENDIAN_ORDER */ +#endif /* WOLFSSL_SMALL_STACK || WOLFSSL_MLKEM_NO_LARGE_CODE || + * BIG_ENDIAN_ORDER */ } #endif @@ -3496,7 +3501,7 @@ static void kyber_cbd_eta3(sword16* p, const byte* r) * @param [in] eta1 Size of noise/error integers. * @return 0 on success. */ -static int kyber_get_noise_eta1_c(KYBER_PRF_T* prf, sword16* p, +static int mlkem_get_noise_eta1_c(MLKEM_PRF_T* prf, sword16* p, const byte* seed, byte eta1) { int ret; @@ -3504,14 +3509,14 @@ static int kyber_get_noise_eta1_c(KYBER_PRF_T* prf, sword16* p, (void)eta1; #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) - if (eta1 == KYBER_CBD_ETA3) { + if (eta1 == MLKEM_CBD_ETA3) { byte rand[ETA3_RAND_SIZE]; /* Calculate random bytes from seed with PRF. */ - ret = kyber_prf(prf, rand, sizeof(rand), seed); + ret = mlkem_prf(prf, rand, sizeof(rand), seed); if (ret == 0) { /* Sample for values in range -3..3 from 3 bits of random. */ - kyber_cbd_eta3(p, rand); + mlkem_cbd_eta3(p, rand); } } else @@ -3520,10 +3525,10 @@ static int kyber_get_noise_eta1_c(KYBER_PRF_T* prf, sword16* p, byte rand[ETA2_RAND_SIZE]; /* Calculate random bytes from seed with PRF. */ - ret = kyber_prf(prf, rand, sizeof(rand), seed); + ret = mlkem_prf(prf, rand, sizeof(rand), seed); if (ret == 0) { /* Sample for values in range -2..2 from 2 bits of random. */ - kyber_cbd_eta2(p, rand); + mlkem_cbd_eta2(p, rand); } } @@ -3545,16 +3550,16 @@ static int kyber_get_noise_eta1_c(KYBER_PRF_T* prf, sword16* p, * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -static int kyber_get_noise_eta2_c(KYBER_PRF_T* prf, sword16* p, +static int mlkem_get_noise_eta2_c(MLKEM_PRF_T* prf, sword16* p, const byte* seed) { int ret; byte rand[ETA2_RAND_SIZE]; /* Calculate random bytes from seed with PRF. */ - ret = kyber_prf(prf, rand, sizeof(rand), seed); + ret = mlkem_prf(prf, rand, sizeof(rand), seed); if (ret == 0) { - kyber_cbd_eta2(p, rand); + mlkem_cbd_eta2(p, rand); } return ret; @@ -3580,7 +3585,7 @@ static int kyber_get_noise_eta2_c(KYBER_PRF_T* prf, sword16* p, * @param [in] seed Seed to generate random from. * @param [in] o Offset of seed count. */ -static void kyber_get_noise_x4_eta2_avx2(byte* rand, byte* seed, byte o) +static void mlkem_get_noise_x4_eta2_avx2(byte* rand, byte* seed, byte o) { int i; word64 state[25 * 4]; @@ -3589,13 +3594,46 @@ static void kyber_get_noise_x4_eta2_avx2(byte* rand, byte* seed, byte o) state[4*4 + i] = 0x1f00 + i + o; } - kyber_sha3_256_blocksx4_seed_avx2(state, seed); - kyber_redistribute_16_rand_avx2(state, rand + 0 * ETA2_RAND_SIZE, + mlkem_sha3_256_blocksx4_seed_avx2(state, seed); + mlkem_redistribute_16_rand_avx2(state, rand + 0 * ETA2_RAND_SIZE, rand + 1 * ETA2_RAND_SIZE, rand + 2 * ETA2_RAND_SIZE, rand + 3 * ETA2_RAND_SIZE); } #endif +#if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) || \ + defined(WOLFSSL_KYBER1024) || defined(WOLFSSL_WC_ML_KEM_1024) +/* Get noise/error by calculating random bytes and sampling to a binomial + * distribution. Values -2..2 + * + * FIPS 203, Algorithm 14: K-PKE.Encrypt(ek_PKE,m,r) + * ... + * 14: e1[i] <- SamplePolyCBD_eta_2(PRF_eta_2(r, N)) + * ... + * 17: e2[i] <- SamplePolyCBD_eta_2(PRF_eta_2(r, N)) + * ... + * + * @param [in, out] prf Pseudo-random function object. + * @param [out] p Polynomial. + * @param [in] seed Seed to use when calculating random. + * @return 0 on success. + */ +static int mlkem_get_noise_eta2_avx2(MLKEM_PRF_T* prf, sword16* p, + const byte* seed) +{ + int ret; + byte rand[ETA2_RAND_SIZE]; + + /* Calculate random bytes from seed with PRF. */ + ret = mlkem_prf(prf, rand, sizeof(rand), seed); + if (ret == 0) { + mlkem_cbd_eta2_avx2(p, rand); + } + + return ret; +} +#endif + #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) /* Get the noise/error by calculating random bytes. * @@ -3610,7 +3648,7 @@ static void kyber_get_noise_x4_eta2_avx2(byte* rand, byte* seed, byte o) * @param [in] seed Seed to generate random from. * @param [in] o Offset of seed count. */ -static void kyber_get_noise_x4_eta3_avx2(byte* rand, byte* seed) +static void mlkem_get_noise_x4_eta3_avx2(byte* rand, byte* seed) { word64 state[25 * 4]; int i; @@ -3620,47 +3658,17 @@ static void kyber_get_noise_x4_eta3_avx2(byte* rand, byte* seed) state[4*4 + 2] = 0x1f00 + 2; state[4*4 + 3] = 0x1f00 + 3; - kyber_sha3_256_blocksx4_seed_avx2(state, seed); - kyber_redistribute_17_rand_avx2(state, rand + 0 * PRF_RAND_SZ, + mlkem_sha3_256_blocksx4_seed_avx2(state, seed); + mlkem_redistribute_17_rand_avx2(state, rand + 0 * PRF_RAND_SZ, rand + 1 * PRF_RAND_SZ, rand + 2 * PRF_RAND_SZ, rand + 3 * PRF_RAND_SZ); i = SHA3_256_BYTES; - kyber_sha3_blocksx4_avx2(state); - kyber_redistribute_8_rand_avx2(state, rand + i + 0 * PRF_RAND_SZ, + sha3_blocksx4_avx2(state); + mlkem_redistribute_8_rand_avx2(state, rand + i + 0 * PRF_RAND_SZ, rand + i + 1 * PRF_RAND_SZ, rand + i + 2 * PRF_RAND_SZ, rand + i + 3 * PRF_RAND_SZ); } -/* Get noise/error by calculating random bytes and sampling to a binomial - * distribution. Values -2..2 - * - * FIPS 203, Algorithm 14: K-PKE.Encrypt(ek_PKE,m,r) - * ... - * 14: e1[i] <- SamplePolyCBD_eta_2(PRF_eta_2(r, N)) - * ... - * 17: e2[i] <- SamplePolyCBD_eta_2(PRF_eta_2(r, N)) - * ... - * - * @param [in, out] prf Pseudo-random function object. - * @param [out] p Polynomial. - * @param [in] seed Seed to use when calculating random. - * @return 0 on success. - */ -static int kyber_get_noise_eta2_avx2(KYBER_PRF_T* prf, sword16* p, - const byte* seed) -{ - int ret; - byte rand[ETA2_RAND_SIZE]; - - /* Calculate random bytes from seed with PRF. */ - ret = kyber_prf(prf, rand, sizeof(rand), seed); - if (ret == 0) { - kyber_cbd_eta2_avx2(p, rand); - } - - return ret; -} - /* Get the noise/error by calculating random bytes and sampling to a binomial * distribution. * @@ -3671,25 +3679,25 @@ static int kyber_get_noise_eta2_avx2(KYBER_PRF_T* prf, sword16* p, * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -static int kyber_get_noise_k2_avx2(KYBER_PRF_T* prf, sword16* vec1, +static int mlkem_get_noise_k2_avx2(MLKEM_PRF_T* prf, sword16* vec1, sword16* vec2, sword16* poly, byte* seed) { int ret = 0; byte rand[4 * PRF_RAND_SZ]; - kyber_get_noise_x4_eta3_avx2(rand, seed); - kyber_cbd_eta3_avx2(vec1 , rand + 0 * PRF_RAND_SZ); - kyber_cbd_eta3_avx2(vec1 + KYBER_N, rand + 1 * PRF_RAND_SZ); + mlkem_get_noise_x4_eta3_avx2(rand, seed); + mlkem_cbd_eta3_avx2(vec1 , rand + 0 * PRF_RAND_SZ); + mlkem_cbd_eta3_avx2(vec1 + MLKEM_N, rand + 1 * PRF_RAND_SZ); if (poly == NULL) { - kyber_cbd_eta3_avx2(vec2 , rand + 2 * PRF_RAND_SZ); - kyber_cbd_eta3_avx2(vec2 + KYBER_N, rand + 3 * PRF_RAND_SZ); + mlkem_cbd_eta3_avx2(vec2 , rand + 2 * PRF_RAND_SZ); + mlkem_cbd_eta3_avx2(vec2 + MLKEM_N, rand + 3 * PRF_RAND_SZ); } else { - kyber_cbd_eta2_avx2(vec2 , rand + 2 * PRF_RAND_SZ); - kyber_cbd_eta2_avx2(vec2 + KYBER_N, rand + 3 * PRF_RAND_SZ); + mlkem_cbd_eta2_avx2(vec2 , rand + 2 * PRF_RAND_SZ); + mlkem_cbd_eta2_avx2(vec2 + MLKEM_N, rand + 3 * PRF_RAND_SZ); - seed[KYBER_SYM_SZ] = 4; - ret = kyber_get_noise_eta2_avx2(prf, poly, seed); + seed[WC_ML_KEM_SYM_SZ] = 4; + ret = mlkem_get_noise_eta2_avx2(prf, poly, seed); } return ret; @@ -3706,21 +3714,21 @@ static int kyber_get_noise_k2_avx2(KYBER_PRF_T* prf, sword16* vec1, * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -static int kyber_get_noise_k3_avx2(sword16* vec1, sword16* vec2, sword16* poly, +static int mlkem_get_noise_k3_avx2(sword16* vec1, sword16* vec2, sword16* poly, byte* seed) { byte rand[4 * ETA2_RAND_SIZE]; - kyber_get_noise_x4_eta2_avx2(rand, seed, 0); - kyber_cbd_eta2_avx2(vec1 , rand + 0 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec1 + 1 * KYBER_N, rand + 1 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec1 + 2 * KYBER_N, rand + 2 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec2 , rand + 3 * ETA2_RAND_SIZE); - kyber_get_noise_x4_eta2_avx2(rand, seed, 4); - kyber_cbd_eta2_avx2(vec2 + 1 * KYBER_N, rand + 0 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec2 + 2 * KYBER_N, rand + 1 * ETA2_RAND_SIZE); + mlkem_get_noise_x4_eta2_avx2(rand, seed, 0); + mlkem_cbd_eta2_avx2(vec1 , rand + 0 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec1 + 1 * MLKEM_N, rand + 1 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec1 + 2 * MLKEM_N, rand + 2 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec2 , rand + 3 * ETA2_RAND_SIZE); + mlkem_get_noise_x4_eta2_avx2(rand, seed, 4); + mlkem_cbd_eta2_avx2(vec2 + 1 * MLKEM_N, rand + 0 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec2 + 2 * MLKEM_N, rand + 1 * ETA2_RAND_SIZE); if (poly != NULL) { - kyber_cbd_eta2_avx2(poly, rand + 2 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(poly, rand + 2 * ETA2_RAND_SIZE); } return 0; @@ -3738,7 +3746,7 @@ static int kyber_get_noise_k3_avx2(sword16* vec1, sword16* vec2, sword16* poly, * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -static int kyber_get_noise_k4_avx2(KYBER_PRF_T* prf, sword16* vec1, +static int mlkem_get_noise_k4_avx2(MLKEM_PRF_T* prf, sword16* vec1, sword16* vec2, sword16* poly, byte* seed) { int ret = 0; @@ -3746,19 +3754,19 @@ static int kyber_get_noise_k4_avx2(KYBER_PRF_T* prf, sword16* vec1, (void)prf; - kyber_get_noise_x4_eta2_avx2(rand, seed, 0); - kyber_cbd_eta2_avx2(vec1 , rand + 0 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec1 + 1 * KYBER_N, rand + 1 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec1 + 2 * KYBER_N, rand + 2 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec1 + 3 * KYBER_N, rand + 3 * ETA2_RAND_SIZE); - kyber_get_noise_x4_eta2_avx2(rand, seed, 4); - kyber_cbd_eta2_avx2(vec2 , rand + 0 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec2 + 1 * KYBER_N, rand + 1 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec2 + 2 * KYBER_N, rand + 2 * ETA2_RAND_SIZE); - kyber_cbd_eta2_avx2(vec2 + 3 * KYBER_N, rand + 3 * ETA2_RAND_SIZE); + mlkem_get_noise_x4_eta2_avx2(rand, seed, 0); + mlkem_cbd_eta2_avx2(vec1 , rand + 0 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec1 + 1 * MLKEM_N, rand + 1 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec1 + 2 * MLKEM_N, rand + 2 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec1 + 3 * MLKEM_N, rand + 3 * ETA2_RAND_SIZE); + mlkem_get_noise_x4_eta2_avx2(rand, seed, 4); + mlkem_cbd_eta2_avx2(vec2 , rand + 0 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec2 + 1 * MLKEM_N, rand + 1 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec2 + 2 * MLKEM_N, rand + 2 * ETA2_RAND_SIZE); + mlkem_cbd_eta2_avx2(vec2 + 3 * MLKEM_N, rand + 3 * ETA2_RAND_SIZE); if (poly != NULL) { - seed[KYBER_SYM_SZ] = 8; - ret = kyber_get_noise_eta2_avx2(prf, poly, seed); + seed[WC_ML_KEM_SYM_SZ] = 8; + ret = mlkem_get_noise_eta2_avx2(prf, poly, seed); } return ret; @@ -3783,7 +3791,7 @@ static int kyber_get_noise_k4_avx2(KYBER_PRF_T* prf, sword16* vec1, * @param [in] seed Seed to generate random from. * @param [in] o Offset of seed count. */ -static void kyber_get_noise_x3_eta2_aarch64(byte* rand, byte* seed, byte o) +static void mlkem_get_noise_x3_eta2_aarch64(byte* rand, byte* seed, byte o) { word64* state = (word64*)rand; @@ -3791,7 +3799,7 @@ static void kyber_get_noise_x3_eta2_aarch64(byte* rand, byte* seed, byte o) state[1*25 + 4] = 0x1f00 + 1 + o; state[2*25 + 4] = 0x1f00 + 2 + o; - kyber_shake256_blocksx3_seed_neon(state, seed); + mlkem_shake256_blocksx3_seed_neon(state, seed); } #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) @@ -3808,7 +3816,7 @@ static void kyber_get_noise_x3_eta2_aarch64(byte* rand, byte* seed, byte o) * @param [in] seed Seed to generate random from. * @param [in] o Offset of seed count. */ -static void kyber_get_noise_x3_eta3_aarch64(byte* rand, byte* seed, byte o) +static void mlkem_get_noise_x3_eta3_aarch64(byte* rand, byte* seed, byte o) { word64 state[3 * 25]; @@ -3816,11 +3824,11 @@ static void kyber_get_noise_x3_eta3_aarch64(byte* rand, byte* seed, byte o) state[1*25 + 4] = 0x1f00 + 1 + o; state[2*25 + 4] = 0x1f00 + 2 + o; - kyber_shake256_blocksx3_seed_neon(state, seed); + mlkem_shake256_blocksx3_seed_neon(state, seed); XMEMCPY(rand + 0 * ETA3_RAND_SIZE, state + 0*25, SHA3_256_BYTES); XMEMCPY(rand + 1 * ETA3_RAND_SIZE, state + 1*25, SHA3_256_BYTES); XMEMCPY(rand + 2 * ETA3_RAND_SIZE, state + 2*25, SHA3_256_BYTES); - kyber_sha3_blocksx3_neon(state); + mlkem_sha3_blocksx3_neon(state); rand += SHA3_256_BYTES; XMEMCPY(rand + 0 * ETA3_RAND_SIZE, state + 0*25, ETA3_RAND_SIZE - SHA3_256_BYTES); @@ -3844,7 +3852,7 @@ static void kyber_get_noise_x3_eta3_aarch64(byte* rand, byte* seed, byte o) * @param [in] o Offset of seed count. * @return 0 on success. */ -static void kyber_get_noise_eta3_aarch64(byte* rand, byte* seed, byte o) +static void mlkem_get_noise_eta3_aarch64(byte* rand, byte* seed, byte o) { word64 state[25]; @@ -3870,25 +3878,25 @@ static void kyber_get_noise_eta3_aarch64(byte* rand, byte* seed, byte o) * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -static int kyber_get_noise_k2_aarch64(sword16* vec1, sword16* vec2, +static int mlkem_get_noise_k2_aarch64(sword16* vec1, sword16* vec2, sword16* poly, byte* seed) { int ret = 0; byte rand[3 * 25 * 8]; - kyber_get_noise_x3_eta3_aarch64(rand, seed, 0); - kyber_cbd_eta3(vec1 , rand + 0 * ETA3_RAND_SIZE); - kyber_cbd_eta3(vec1 + KYBER_N, rand + 1 * ETA3_RAND_SIZE); + mlkem_get_noise_x3_eta3_aarch64(rand, seed, 0); + mlkem_cbd_eta3(vec1 , rand + 0 * ETA3_RAND_SIZE); + mlkem_cbd_eta3(vec1 + MLKEM_N, rand + 1 * ETA3_RAND_SIZE); if (poly == NULL) { - kyber_cbd_eta3(vec2 , rand + 2 * ETA3_RAND_SIZE); - kyber_get_noise_eta3_aarch64(rand, seed, 3); - kyber_cbd_eta3(vec2 + KYBER_N, rand ); + mlkem_cbd_eta3(vec2 , rand + 2 * ETA3_RAND_SIZE); + mlkem_get_noise_eta3_aarch64(rand, seed, 3); + mlkem_cbd_eta3(vec2 + MLKEM_N, rand ); } else { - kyber_get_noise_x3_eta2_aarch64(rand, seed, 2); - kyber_cbd_eta2(vec2 , rand + 0 * 25 * 8); - kyber_cbd_eta2(vec2 + KYBER_N, rand + 1 * 25 * 8); - kyber_cbd_eta2(poly , rand + 2 * 25 * 8); + mlkem_get_noise_x3_eta2_aarch64(rand, seed, 2); + mlkem_cbd_eta2(vec2 , rand + 0 * 25 * 8); + mlkem_cbd_eta2(vec2 + MLKEM_N, rand + 1 * 25 * 8); + mlkem_cbd_eta2(poly , rand + 2 * 25 * 8); } return ret; @@ -3910,7 +3918,7 @@ static int kyber_get_noise_k2_aarch64(sword16* vec1, sword16* vec2, * @param [in] o Offset of seed count. * @return 0 on success. */ -static void kyber_get_noise_eta2_aarch64(byte* rand, byte* seed, byte o) +static void mlkem_get_noise_eta2_aarch64(byte* rand, byte* seed, byte o) { word64* state = (word64*)rand; @@ -3934,22 +3942,22 @@ static void kyber_get_noise_eta2_aarch64(byte* rand, byte* seed, byte o) * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -static int kyber_get_noise_k3_aarch64(sword16* vec1, sword16* vec2, +static int mlkem_get_noise_k3_aarch64(sword16* vec1, sword16* vec2, sword16* poly, byte* seed) { byte rand[3 * 25 * 8]; - kyber_get_noise_x3_eta2_aarch64(rand, seed, 0); - kyber_cbd_eta2(vec1 , rand + 0 * 25 * 8); - kyber_cbd_eta2(vec1 + 1 * KYBER_N, rand + 1 * 25 * 8); - kyber_cbd_eta2(vec1 + 2 * KYBER_N, rand + 2 * 25 * 8); - kyber_get_noise_x3_eta2_aarch64(rand, seed, 3); - kyber_cbd_eta2(vec2 , rand + 0 * 25 * 8); - kyber_cbd_eta2(vec2 + 1 * KYBER_N, rand + 1 * 25 * 8); - kyber_cbd_eta2(vec2 + 2 * KYBER_N, rand + 2 * 25 * 8); + mlkem_get_noise_x3_eta2_aarch64(rand, seed, 0); + mlkem_cbd_eta2(vec1 , rand + 0 * 25 * 8); + mlkem_cbd_eta2(vec1 + 1 * MLKEM_N, rand + 1 * 25 * 8); + mlkem_cbd_eta2(vec1 + 2 * MLKEM_N, rand + 2 * 25 * 8); + mlkem_get_noise_x3_eta2_aarch64(rand, seed, 3); + mlkem_cbd_eta2(vec2 , rand + 0 * 25 * 8); + mlkem_cbd_eta2(vec2 + 1 * MLKEM_N, rand + 1 * 25 * 8); + mlkem_cbd_eta2(vec2 + 2 * MLKEM_N, rand + 2 * 25 * 8); if (poly != NULL) { - kyber_get_noise_eta2_aarch64(rand, seed, 6); - kyber_cbd_eta2(poly , rand + 0 * 25 * 8); + mlkem_get_noise_eta2_aarch64(rand, seed, 6); + mlkem_cbd_eta2(poly , rand + 0 * 25 * 8); } return 0; @@ -3966,25 +3974,25 @@ static int kyber_get_noise_k3_aarch64(sword16* vec1, sword16* vec2, * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -static int kyber_get_noise_k4_aarch64(sword16* vec1, sword16* vec2, +static int mlkem_get_noise_k4_aarch64(sword16* vec1, sword16* vec2, sword16* poly, byte* seed) { int ret = 0; byte rand[3 * 25 * 8]; - kyber_get_noise_x3_eta2_aarch64(rand, seed, 0); - kyber_cbd_eta2(vec1 , rand + 0 * 25 * 8); - kyber_cbd_eta2(vec1 + 1 * KYBER_N, rand + 1 * 25 * 8); - kyber_cbd_eta2(vec1 + 2 * KYBER_N, rand + 2 * 25 * 8); - kyber_get_noise_x3_eta2_aarch64(rand, seed, 3); - kyber_cbd_eta2(vec1 + 3 * KYBER_N, rand + 0 * 25 * 8); - kyber_cbd_eta2(vec2 , rand + 1 * 25 * 8); - kyber_cbd_eta2(vec2 + 1 * KYBER_N, rand + 2 * 25 * 8); - kyber_get_noise_x3_eta2_aarch64(rand, seed, 6); - kyber_cbd_eta2(vec2 + 2 * KYBER_N, rand + 0 * 25 * 8); - kyber_cbd_eta2(vec2 + 3 * KYBER_N, rand + 1 * 25 * 8); + mlkem_get_noise_x3_eta2_aarch64(rand, seed, 0); + mlkem_cbd_eta2(vec1 , rand + 0 * 25 * 8); + mlkem_cbd_eta2(vec1 + 1 * MLKEM_N, rand + 1 * 25 * 8); + mlkem_cbd_eta2(vec1 + 2 * MLKEM_N, rand + 2 * 25 * 8); + mlkem_get_noise_x3_eta2_aarch64(rand, seed, 3); + mlkem_cbd_eta2(vec1 + 3 * MLKEM_N, rand + 0 * 25 * 8); + mlkem_cbd_eta2(vec2 , rand + 1 * 25 * 8); + mlkem_cbd_eta2(vec2 + 1 * MLKEM_N, rand + 2 * 25 * 8); + mlkem_get_noise_x3_eta2_aarch64(rand, seed, 6); + mlkem_cbd_eta2(vec2 + 2 * MLKEM_N, rand + 0 * 25 * 8); + mlkem_cbd_eta2(vec2 + 3 * MLKEM_N, rand + 1 * 25 * 8); if (poly != NULL) { - kyber_cbd_eta2(poly, rand + 2 * 25 * 8); + mlkem_cbd_eta2(poly, rand + 2 * 25 * 8); } return ret; @@ -4007,36 +4015,36 @@ static int kyber_get_noise_k4_aarch64(sword16* vec1, sword16* vec2, * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -static int kyber_get_noise_c(KYBER_PRF_T* prf, int k, sword16* vec1, int eta1, +static int mlkem_get_noise_c(MLKEM_PRF_T* prf, int k, sword16* vec1, int eta1, sword16* vec2, int eta2, sword16* poly, byte* seed) { int ret = 0; int i; /* First noise generation has a seed with 0x00 appended. */ - seed[KYBER_SYM_SZ] = 0; + seed[WC_ML_KEM_SYM_SZ] = 0; /* Generate noise as private key. */ for (i = 0; (ret == 0) && (i < k); i++) { /* Generate noise for each dimension of vector. */ - ret = kyber_get_noise_eta1_c(prf, vec1 + i * KYBER_N, seed, eta1); + ret = mlkem_get_noise_eta1_c(prf, vec1 + i * MLKEM_N, seed, eta1); /* Increment value of appended byte. */ - seed[KYBER_SYM_SZ]++; + seed[WC_ML_KEM_SYM_SZ]++; } if ((ret == 0) && (vec2 != NULL)) { /* Generate noise for error. */ for (i = 0; (ret == 0) && (i < k); i++) { /* Generate noise for each dimension of vector. */ - ret = kyber_get_noise_eta1_c(prf, vec2 + i * KYBER_N, seed, eta2); + ret = mlkem_get_noise_eta1_c(prf, vec2 + i * MLKEM_N, seed, eta2); /* Increment value of appended byte. */ - seed[KYBER_SYM_SZ]++; + seed[WC_ML_KEM_SYM_SZ]++; } } else { - seed[KYBER_SYM_SZ] = 2 * k; + seed[WC_ML_KEM_SYM_SZ] = 2 * k; } if ((ret == 0) && (poly != NULL)) { /* Generating random error polynomial. */ - ret = kyber_get_noise_eta2_c(prf, poly, seed); + ret = mlkem_get_noise_eta2_c(prf, poly, seed); } return ret; @@ -4055,70 +4063,70 @@ static int kyber_get_noise_c(KYBER_PRF_T* prf, int k, sword16* vec1, int eta1, * @param [in] seed Seed to use when calculating random. * @return 0 on success. */ -int kyber_get_noise(KYBER_PRF_T* prf, int k, sword16* vec1, sword16* vec2, +int mlkem_get_noise(MLKEM_PRF_T* prf, int k, sword16* vec1, sword16* vec2, sword16* poly, byte* seed) { int ret; #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) - if (k == KYBER512_K) { + if (k == WC_ML_KEM_512_K) { #if defined(WOLFSSL_ARMASM) && defined(__aarch64__) - ret = kyber_get_noise_k2_aarch64(vec1, vec2, poly, seed); + ret = mlkem_get_noise_k2_aarch64(vec1, vec2, poly, seed); #else #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - ret = kyber_get_noise_k2_avx2(prf, vec1, vec2, poly, seed); + ret = mlkem_get_noise_k2_avx2(prf, vec1, vec2, poly, seed); RESTORE_VECTOR_REGISTERS(); } else #endif if (poly == NULL) { - ret = kyber_get_noise_c(prf, k, vec1, KYBER_CBD_ETA3, vec2, - KYBER_CBD_ETA3, NULL, seed); + ret = mlkem_get_noise_c(prf, k, vec1, MLKEM_CBD_ETA3, vec2, + MLKEM_CBD_ETA3, NULL, seed); } else { - ret = kyber_get_noise_c(prf, k, vec1, KYBER_CBD_ETA3, vec2, - KYBER_CBD_ETA2, poly, seed); + ret = mlkem_get_noise_c(prf, k, vec1, MLKEM_CBD_ETA3, vec2, + MLKEM_CBD_ETA2, poly, seed); } #endif } else #endif #if defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) - if (k == KYBER768_K) { + if (k == WC_ML_KEM_768_K) { #if defined(WOLFSSL_ARMASM) && defined(__aarch64__) - ret = kyber_get_noise_k3_aarch64(vec1, vec2, poly, seed); + ret = mlkem_get_noise_k3_aarch64(vec1, vec2, poly, seed); #else #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - ret = kyber_get_noise_k3_avx2(vec1, vec2, poly, seed); + ret = mlkem_get_noise_k3_avx2(vec1, vec2, poly, seed); RESTORE_VECTOR_REGISTERS(); } else #endif { - ret = kyber_get_noise_c(prf, k, vec1, KYBER_CBD_ETA2, vec2, - KYBER_CBD_ETA2, poly, seed); + ret = mlkem_get_noise_c(prf, k, vec1, MLKEM_CBD_ETA2, vec2, + MLKEM_CBD_ETA2, poly, seed); } #endif } else #endif #if defined(WOLFSSL_KYBER1024) || defined(WOLFSSL_WC_ML_KEM_1024) - if (k == KYBER1024_K) { + if (k == WC_ML_KEM_1024_K) { #if defined(WOLFSSL_ARMASM) && defined(__aarch64__) - ret = kyber_get_noise_k4_aarch64(vec1, vec2, poly, seed); + ret = mlkem_get_noise_k4_aarch64(vec1, vec2, poly, seed); #else #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - ret = kyber_get_noise_k4_avx2(prf, vec1, vec2, poly, seed); + ret = mlkem_get_noise_k4_avx2(prf, vec1, vec2, poly, seed); RESTORE_VECTOR_REGISTERS(); } else #endif { - ret = kyber_get_noise_c(prf, k, vec1, KYBER_CBD_ETA2, vec2, - KYBER_CBD_ETA2, poly, seed); + ret = mlkem_get_noise_c(prf, k, vec1, MLKEM_CBD_ETA2, vec2, + MLKEM_CBD_ETA2, poly, seed); } #endif } @@ -4146,25 +4154,25 @@ int kyber_get_noise(KYBER_PRF_T* prf, int k, sword16* vec1, sword16* vec2, * @param [in] make Indicates generation is for making a key. * @return 0 on success. */ -static int kyber_get_noise_i(KYBER_PRF_T* prf, int k, sword16* vec2, +static int mlkem_get_noise_i(MLKEM_PRF_T* prf, int k, sword16* vec2, byte* seed, int i, int make) { int ret; /* Initialize the PRF (generating matrix A leaves it in uninitialized * state). */ - kyber_prf_init(prf); + mlkem_prf_init(prf); /* Set index of polynomial of second vector into seed. */ - seed[KYBER_SYM_SZ] = k + i; + seed[WC_ML_KEM_SYM_SZ] = k + i; #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) - if ((k == KYBER512_K) && make) { - ret = kyber_get_noise_eta1_c(prf, vec2, seed, KYBER_CBD_ETA3); + if ((k == WC_ML_KEM_512_K) && make) { + ret = mlkem_get_noise_eta1_c(prf, vec2, seed, MLKEM_CBD_ETA3); } else #endif { - ret = kyber_get_noise_eta1_c(prf, vec2, seed, KYBER_CBD_ETA2); + ret = mlkem_get_noise_eta1_c(prf, vec2, seed, MLKEM_CBD_ETA2); } (void)make; @@ -4183,7 +4191,7 @@ static int kyber_get_noise_i(KYBER_PRF_T* prf, int k, sword16* vec2, * @return 0 on success. * @return -1 on failure. */ -static int kyber_cmp_c(const byte* a, const byte* b, int sz) +static int mlkem_cmp_c(const byte* a, const byte* b, int sz) { int i; byte r = 0; @@ -4204,22 +4212,22 @@ static int kyber_cmp_c(const byte* a, const byte* b, int sz) * @return 0 on success. * @return -1 on failure. */ -int kyber_cmp(const byte* a, const byte* b, int sz) +int mlkem_cmp(const byte* a, const byte* b, int sz) { #if defined(__aarch64__) && defined(WOLFSSL_ARMASM) - return kyber_cmp_neon(a, b, sz); + return mlkem_cmp_neon(a, b, sz); #else int fail; #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - fail = kyber_cmp_avx2(a, b, sz); + fail = mlkem_cmp_avx2(a, b, sz); RESTORE_VECTOR_REGISTERS(); } else #endif { - fail = kyber_cmp_c(a, b, sz); + fail = mlkem_cmp_c(a, b, sz); } return fail; @@ -4236,14 +4244,14 @@ int kyber_cmp(const byte* a, const byte* b, int sz) * * @param [in, out] p Polynomial. */ -static KYBER_NOINLINE void kyber_csubq_c(sword16* p) +static MLKEM_NOINLINE void mlkem_csubq_c(sword16* p) { unsigned int i; - for (i = 0; i < KYBER_N; ++i) { - sword16 t = p[i] - KYBER_Q; + for (i = 0; i < MLKEM_N; ++i) { + sword16 t = p[i] - MLKEM_Q; /* When top bit set, -ve number - need to add q back. */ - p[i] = ((t >> 15) & KYBER_Q) + t; + p[i] = ((t >> 15) & MLKEM_Q) + t; } } @@ -4255,7 +4263,7 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) * * @param [in, out] p Polynomial. */ -#define kyber_csubq_c kyber_csubq_neon +#define mlkem_csubq_c mlkem_csubq_neon #elif defined(WOLFSSL_ARMASM_THUMB2) @@ -4265,7 +4273,7 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) * * @param [in, out] p Polynomial. */ -#define kyber_csubq_c kyber_thumb2_csubq +#define mlkem_csubq_c mlkem_thumb2_csubq #else @@ -4275,7 +4283,7 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) * * @param [in, out] p Polynomial. */ -#define kyber_csubq_c kyber_arm32_csubq +#define mlkem_csubq_c mlkem_arm32_csubq #endif @@ -4298,7 +4306,7 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) * @return Compressed value. */ #define TO_COMP_WORD_VEC(v, i, j, k, s, m) \ - ((((word32)v[i * KYBER_N + j + k] << s) + KYBER_Q_HALF) / KYBER_Q) & m + ((((word32)v[i * MLKEM_N + j + k] << s) + MLKEM_Q_HALF) / MLKEM_Q) & m /* Compress value to 10 bits. * @@ -4333,22 +4341,22 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) #else /* Multiplier that does div q. - * ((1 << 53) + KYBER_Q_HALF) / KYBER_Q + * ((1 << 53) + MLKEM_Q_HALF) / MLKEM_Q */ -#define KYBER_V53 0x275f6ed0176UL +#define MLKEM_V53 0x275f6ed0176UL /* Multiplier times half of q. - * KYBER_V53 * (KYBER_Q_HALF + 1) + * MLKEM_V53 * (MLKEM_Q_HALF + 1) */ -#define KYBER_V53_HALF 0x10013afb768076UL +#define MLKEM_V53_HALF 0x10013afb768076UL /* Multiplier that does div q. - * ((1 << 54) + KYBER_Q_HALF) / KYBER_Q + * ((1 << 54) + MLKEM_Q_HALF) / MLKEM_Q */ -#define KYBER_V54 0x4ebedda02ecUL +#define MLKEM_V54 0x4ebedda02ecUL /* Multiplier times half of q. - * KYBER_V54 * (KYBER_Q_HALF + 1) + * MLKEM_V54 * (MLKEM_Q_HALF + 1) */ -#define KYBER_V54_HALF 0x200275f6ed00ecUL +#define MLKEM_V54_HALF 0x200275f6ed00ecUL /* Compress value to 10 bits. * @@ -4363,7 +4371,8 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) * @return Compressed value. */ #define TO_COMP_WORD_10(v, i, j, k) \ - ((((KYBER_V54 << 10) * (v)[(i) * KYBER_N + (j) + (k)]) + KYBER_V54_HALF) >> 54) + ((((MLKEM_V54 << 10) * (v)[(i) * MLKEM_N + (j) + (k)]) + \ + MLKEM_V54_HALF) >> 54) /* Compress value to 11 bits. * @@ -4379,12 +4388,13 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) * @return Compressed value. */ #define TO_COMP_WORD_11(v, i, j, k) \ - ((((KYBER_V53 << 11) * (v)[(i) * KYBER_N + (j) + (k)]) + KYBER_V53_HALF) >> 53) + ((((MLKEM_V53 << 11) * (v)[(i) * MLKEM_N + (j) + (k)]) + \ + MLKEM_V53_HALF) >> 53) #endif /* CONV_WITH_DIV */ -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) || \ defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) /* Compress the vector of polynomials into a byte array with 10 bits each. @@ -4395,24 +4405,24 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) * @param [in] v Vector of polynomials. * @param [in] k Number of polynomials in vector. */ -static void kyber_vec_compress_10_c(byte* r, sword16* v, unsigned int k) +static void mlkem_vec_compress_10_c(byte* r, sword16* v, unsigned int k) { unsigned int i; unsigned int j; for (i = 0; i < k; i++) { /* Reduce each coefficient to mod q. */ - kyber_csubq_c(v + i * KYBER_N); + mlkem_csubq_c(v + i * MLKEM_N); /* All values are now positive. */ } /* Each polynomial. */ for (i = 0; i < k; i++) { -#if defined(WOLFSSL_SMALL_STACK) || defined(WOLFSSL_KYBER_NO_LARGE_CODE) || \ +#if defined(WOLFSSL_SMALL_STACK) || defined(WOLFSSL_MLKEM_NO_LARGE_CODE) || \ defined(BIG_ENDIAN_ORDER) /* Each 4 polynomial coefficients. */ - for (j = 0; j < KYBER_N; j += 4) { - #ifdef WOLFSSL_KYBER_SMALL + for (j = 0; j < MLKEM_N; j += 4) { + #ifdef WOLFSSL_MLKEM_SMALL unsigned int l; sword16 t[4]; /* Compress four polynomial values to 10 bits each. */ @@ -4446,7 +4456,7 @@ static void kyber_vec_compress_10_c(byte* r, sword16* v, unsigned int k) } #else /* Each 16 polynomial coefficients. */ - for (j = 0; j < KYBER_N; j += 16) { + for (j = 0; j < MLKEM_N; j += 16) { /* Compress four polynomial values to 10 bits each. */ sword16 t0 = TO_COMP_WORD_10(v, i, j, 0); sword16 t1 = TO_COMP_WORD_10(v, i, j, 1); @@ -4493,17 +4503,17 @@ static void kyber_vec_compress_10_c(byte* r, sword16* v, unsigned int k) * @param [in] v Vector of polynomials. * @param [in] k Number of polynomials in vector. */ -void kyber_vec_compress_10(byte* r, sword16* v, unsigned int k) +void mlkem_vec_compress_10(byte* r, sword16* v, unsigned int k) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_compress_10_avx2(r, v, k); + mlkem_compress_10_avx2(r, v, k); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_vec_compress_10_c(r, v, k); + mlkem_vec_compress_10_c(r, v, k); } } #endif @@ -4516,25 +4526,25 @@ void kyber_vec_compress_10(byte* r, sword16* v, unsigned int k) * @param [out] b Array of bytes. * @param [in] v Vector of polynomials. */ -static void kyber_vec_compress_11_c(byte* r, sword16* v) +static void mlkem_vec_compress_11_c(byte* r, sword16* v) { unsigned int i; unsigned int j; -#ifdef WOLFSSL_KYBER_SMALL +#ifdef WOLFSSL_MLKEM_SMALL unsigned int k; #endif for (i = 0; i < 4; i++) { /* Reduce each coefficient to mod q. */ - kyber_csubq_c(v + i * KYBER_N); + mlkem_csubq_c(v + i * MLKEM_N); /* All values are now positive. */ } /* Each polynomial. */ for (i = 0; i < 4; i++) { /* Each 8 polynomial coefficients. */ - for (j = 0; j < KYBER_N; j += 8) { - #ifdef WOLFSSL_KYBER_SMALL + for (j = 0; j < MLKEM_N; j += 8) { + #ifdef WOLFSSL_MLKEM_SMALL sword16 t[8]; /* Compress eight polynomial values to 11 bits each. */ for (k = 0; k < 8; k++) { @@ -4591,23 +4601,23 @@ static void kyber_vec_compress_11_c(byte* r, sword16* v) * @param [out] b Array of bytes. * @param [in] v Vector of polynomials. */ -void kyber_vec_compress_11(byte* r, sword16* v) +void mlkem_vec_compress_11(byte* r, sword16* v) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_compress_11_avx2(r, v, 4); + mlkem_compress_11_avx2(r, v, 4); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_vec_compress_11_c(r, v); + mlkem_vec_compress_11_c(r, v); } } #endif -#endif /* !WOLFSSL_KYBER_NO_ENCAPSULATE || !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_ENCAPSULATE || !WOLFSSL_MLKEM_NO_DECAPSULATE */ -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE /* Decompress a 10 bit value. * * FIPS 203, Section 4.2.1, Compression and decompression @@ -4620,8 +4630,8 @@ void kyber_vec_compress_11(byte* r, sword16* v) * @return Decompressed value. */ #define DECOMP_10(v, i, j, k, t) \ - v[(i) * KYBER_N + 4 * (j) + (k)] = \ - (word16)((((word32)((t) & 0x3ff) * KYBER_Q) + 512) >> 10) + v[(i) * MLKEM_N + 4 * (j) + (k)] = \ + (word16)((((word32)((t) & 0x3ff) * MLKEM_Q) + 512) >> 10) /* Decompress an 11 bit value. * @@ -4635,8 +4645,8 @@ void kyber_vec_compress_11(byte* r, sword16* v) * @return Decompressed value. */ #define DECOMP_11(v, i, j, k, t) \ - v[(i) * KYBER_N + 8 * (j) + (k)] = \ - (word16)((((word32)((t) & 0x7ff) * KYBER_Q) + 1024) >> 11) + v[(i) * MLKEM_N + 8 * (j) + (k)] = \ + (word16)((((word32)((t) & 0x7ff) * MLKEM_Q) + 1024) >> 11) #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) || \ defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) @@ -4648,19 +4658,19 @@ void kyber_vec_compress_11(byte* r, sword16* v) * @param [in] b Array of bytes. * @param [in] k Number of polynomials in vector. */ -static void kyber_vec_decompress_10_c(sword16* v, const byte* b, unsigned int k) +static void mlkem_vec_decompress_10_c(sword16* v, const byte* b, unsigned int k) { unsigned int i; unsigned int j; -#ifdef WOLFSSL_KYBER_SMALL +#ifdef WOLFSSL_MLKEM_SMALL unsigned int l; #endif /* Each polynomial. */ for (i = 0; i < k; i++) { /* Each 4 polynomial coefficients. */ - for (j = 0; j < KYBER_N / 4; j++) { - #ifdef WOLFSSL_KYBER_SMALL + for (j = 0; j < MLKEM_N / 4; j++) { + #ifdef WOLFSSL_MLKEM_SMALL word16 t[4]; /* Extract out 4 values of 10 bits each. */ t[0] = (b[0] >> 0) | ((word16)b[ 1] << 8); @@ -4699,17 +4709,17 @@ static void kyber_vec_decompress_10_c(sword16* v, const byte* b, unsigned int k) * @param [in] b Array of bytes. * @param [in] k Number of polynomials in vector. */ -void kyber_vec_decompress_10(sword16* v, const byte* b, unsigned int k) +void mlkem_vec_decompress_10(sword16* v, const byte* b, unsigned int k) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_decompress_10_avx2(v, b, k); + mlkem_decompress_10_avx2(v, b, k); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_vec_decompress_10_c(v, b, k); + mlkem_vec_decompress_10_c(v, b, k); } } #endif @@ -4721,19 +4731,19 @@ void kyber_vec_decompress_10(sword16* v, const byte* b, unsigned int k) * @param [out] v Vector of polynomials. * @param [in] b Array of bytes. */ -static void kyber_vec_decompress_11_c(sword16* v, const byte* b) +static void mlkem_vec_decompress_11_c(sword16* v, const byte* b) { unsigned int i; unsigned int j; -#ifdef WOLFSSL_KYBER_SMALL +#ifdef WOLFSSL_MLKEM_SMALL unsigned int l; #endif /* Each polynomial. */ for (i = 0; i < 4; i++) { /* Each 8 polynomial coefficients. */ - for (j = 0; j < KYBER_N / 8; j++) { - #ifdef WOLFSSL_KYBER_SMALL + for (j = 0; j < MLKEM_N / 8; j++) { + #ifdef WOLFSSL_MLKEM_SMALL word16 t[8]; /* Extract out 8 values of 11 bits each. */ t[0] = (b[0] >> 0) | ((word16)b[ 1] << 8); @@ -4787,21 +4797,21 @@ static void kyber_vec_decompress_11_c(sword16* v, const byte* b) * @param [out] v Vector of polynomials. * @param [in] b Array of bytes. */ -void kyber_vec_decompress_11(sword16* v, const byte* b) +void mlkem_vec_decompress_11(sword16* v, const byte* b) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_decompress_11_avx2(v, b, 4); + mlkem_decompress_11_avx2(v, b, 4); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_vec_decompress_11_c(v, b); + mlkem_vec_decompress_11_c(v, b); } } #endif -#endif /* !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_DECAPSULATE */ #ifdef CONV_WITH_DIV @@ -4819,7 +4829,7 @@ void kyber_vec_decompress_11(sword16* v, const byte* b) * @return Compressed value. */ #define TO_COMP_WORD(v, i, j, s, m) \ - ((((word32)v[i + j] << s) + KYBER_Q_HALF) / KYBER_Q) & m + ((((word32)v[i + j] << s) + MLKEM_Q_HALF) / MLKEM_Q) & m /* Compress value to 4 bits. * @@ -4852,14 +4862,14 @@ void kyber_vec_decompress_11(sword16* v, const byte* b) #else /* Multiplier that does div q. */ -#define KYBER_V28 ((word32)(((1U << 28) + KYBER_Q_HALF)) / KYBER_Q) +#define MLKEM_V28 ((word32)(((1U << 28) + MLKEM_Q_HALF)) / MLKEM_Q) /* Multiplier times half of q. */ -#define KYBER_V28_HALF ((word32)(KYBER_V28 * (KYBER_Q_HALF + 1))) +#define MLKEM_V28_HALF ((word32)(MLKEM_V28 * (MLKEM_Q_HALF + 1))) /* Multiplier that does div q. */ -#define KYBER_V27 ((word32)(((1U << 27) + KYBER_Q_HALF)) / KYBER_Q) +#define MLKEM_V27 ((word32)(((1U << 27) + MLKEM_Q_HALF)) / MLKEM_Q) /* Multiplier times half of q. */ -#define KYBER_V27_HALF ((word32)(KYBER_V27 * KYBER_Q_HALF)) +#define MLKEM_V27_HALF ((word32)(MLKEM_V27 * MLKEM_Q_HALF)) /* Compress value to 4 bits. * @@ -4873,7 +4883,7 @@ void kyber_vec_decompress_11(sword16* v, const byte* b) * @return Compressed value. */ #define TO_COMP_WORD_4(p, i, j) \ - ((((KYBER_V28 << 4) * (p)[(i) + (j)]) + KYBER_V28_HALF) >> 28) + ((((MLKEM_V28 << 4) * (p)[(i) + (j)]) + MLKEM_V28_HALF) >> 28) /* Compress value to 5 bits. * @@ -4887,12 +4897,12 @@ void kyber_vec_decompress_11(sword16* v, const byte* b) * @return Compressed value. */ #define TO_COMP_WORD_5(p, i, j) \ - ((((KYBER_V27 << 5) * (p)[(i) + (j)]) + KYBER_V27_HALF) >> 27) + ((((MLKEM_V27 << 5) * (p)[(i) + (j)]) + MLKEM_V27_HALF) >> 27) #endif /* CONV_WITH_DIV */ -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) || \ defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) /* Compress a polynomial into byte array - on coefficients into 4 bits. @@ -4902,21 +4912,21 @@ void kyber_vec_decompress_11(sword16* v, const byte* b) * @param [out] b Array of bytes. * @param [in] p Polynomial. */ -static void kyber_compress_4_c(byte* b, sword16* p) +static void mlkem_compress_4_c(byte* b, sword16* p) { unsigned int i; -#ifdef WOLFSSL_KYBER_SMALL +#ifdef WOLFSSL_MLKEM_SMALL unsigned int j; byte t[8]; #endif /* Reduce each coefficients to mod q. */ - kyber_csubq_c(p); + mlkem_csubq_c(p); /* All values are now positive. */ /* Each 8 polynomial coefficients. */ - for (i = 0; i < KYBER_N; i += 8) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N; i += 8) { + #ifdef WOLFSSL_MLKEM_SMALL /* Compress eight polynomial values to 4 bits each. */ for (j = 0; j < 8; j++) { t[j] = TO_COMP_WORD_4(p, i, j); @@ -4956,17 +4966,17 @@ static void kyber_compress_4_c(byte* b, sword16* p) * @param [out] b Array of bytes. * @param [in] p Polynomial. */ -void kyber_compress_4(byte* b, sword16* p) +void mlkem_compress_4(byte* b, sword16* p) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_compress_4_avx2(b, p); + mlkem_compress_4_avx2(b, p); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_compress_4_c(b, p); + mlkem_compress_4_c(b, p); } } #endif @@ -4978,20 +4988,20 @@ void kyber_compress_4(byte* b, sword16* p) * @param [out] b Array of bytes. * @param [in] p Polynomial. */ -static void kyber_compress_5_c(byte* b, sword16* p) +static void mlkem_compress_5_c(byte* b, sword16* p) { unsigned int i; -#ifdef WOLFSSL_KYBER_SMALL +#ifdef WOLFSSL_MLKEM_SMALL unsigned int j; byte t[8]; #endif /* Reduce each coefficients to mod q. */ - kyber_csubq_c(p); + mlkem_csubq_c(p); /* All values are now positive. */ - for (i = 0; i < KYBER_N; i += 8) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N; i += 8) { + #ifdef WOLFSSL_MLKEM_SMALL /* Compress eight polynomial values to 5 bits each. */ for (j = 0; j < 8; j++) { t[j] = TO_COMP_WORD_5(p, i, j); @@ -5034,23 +5044,23 @@ static void kyber_compress_5_c(byte* b, sword16* p) * @param [out] b Array of bytes. * @param [in] p Polynomial. */ -void kyber_compress_5(byte* b, sword16* p) +void mlkem_compress_5(byte* b, sword16* p) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_compress_5_avx2(b, p); + mlkem_compress_5_avx2(b, p); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_compress_5_c(b, p); + mlkem_compress_5_c(b, p); } } #endif -#endif /* !WOLFSSL_KYBER_NO_ENCAPSULATE || !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_ENCAPSULATE || !WOLFSSL_MLKEM_NO_DECAPSULATE */ -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE /* Decompress a 4 bit value. * * FIPS 203, Section 4.2.1, Compression and decompression @@ -5062,7 +5072,7 @@ void kyber_compress_5(byte* b, sword16* p) * @return Decompressed value. */ #define DECOMP_4(p, i, j, t) \ - p[(i) + (j)] = ((word16)((t) * KYBER_Q) + 8) >> 4 + p[(i) + (j)] = ((word16)((t) * MLKEM_Q) + 8) >> 4 /* Decompress a 5 bit value. * @@ -5075,7 +5085,7 @@ void kyber_compress_5(byte* b, sword16* p) * @return Decompressed value. */ #define DECOMP_5(p, i, j, t) \ - p[(i) + (j)] = (((word32)((t) & 0x1f) * KYBER_Q) + 16) >> 5 + p[(i) + (j)] = (((word32)((t) & 0x1f) * MLKEM_Q) + 16) >> 5 #if defined(WOLFSSL_KYBER512) || defined(WOLFSSL_WC_ML_KEM_512) || \ defined(WOLFSSL_KYBER768) || defined(WOLFSSL_WC_ML_KEM_768) @@ -5086,12 +5096,12 @@ void kyber_compress_5(byte* b, sword16* p) * @param [out] p Polynomial. * @param [in] b Array of bytes. */ -static void kyber_decompress_4_c(sword16* p, const byte* b) +static void mlkem_decompress_4_c(sword16* p, const byte* b) { unsigned int i; /* 2 coefficients at a time. */ - for (i = 0; i < KYBER_N; i += 2) { + for (i = 0; i < MLKEM_N; i += 2) { /* 2 coefficients decompressed from one byte. */ DECOMP_4(p, i, 0, b[0] & 0xf); DECOMP_4(p, i, 1, b[0] >> 4); @@ -5106,17 +5116,17 @@ static void kyber_decompress_4_c(sword16* p, const byte* b) * @param [out] p Polynomial. * @param [in] b Array of bytes. */ -void kyber_decompress_4(sword16* p, const byte* b) +void mlkem_decompress_4(sword16* p, const byte* b) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_decompress_4_avx2(p, b); + mlkem_decompress_4_avx2(p, b); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_decompress_4_c(p, b); + mlkem_decompress_4_c(p, b); } } #endif @@ -5128,13 +5138,13 @@ void kyber_decompress_4(sword16* p, const byte* b) * @param [out] p Polynomial. * @param [in] b Array of bytes. */ -static void kyber_decompress_5_c(sword16* p, const byte* b) +static void mlkem_decompress_5_c(sword16* p, const byte* b) { unsigned int i; /* Each 8 polynomial coefficients. */ - for (i = 0; i < KYBER_N; i += 8) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N; i += 8) { + #ifdef WOLFSSL_MLKEM_SMALL unsigned int j; byte t[8]; @@ -5185,32 +5195,32 @@ static void kyber_decompress_5_c(sword16* p, const byte* b) * @param [out] p Polynomial. * @param [in] b Array of bytes. */ -void kyber_decompress_5(sword16* p, const byte* b) +void mlkem_decompress_5(sword16* p, const byte* b) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_decompress_5_avx2(p, b); + mlkem_decompress_5_avx2(p, b); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_decompress_5_c(p, b); + mlkem_decompress_5_c(p, b); } } #endif -#endif /* !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_DECAPSULATE */ /******************************************************************************/ #if !(defined(__aarch64__) && defined(WOLFSSL_ARMASM)) -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) -/* Convert bit from byte to 0 or (KYBER_Q + 1) / 2. +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) +/* Convert bit from byte to 0 or (MLKEM_Q + 1) / 2. * * Constant time implementation. - * XOR in kyber_opt_blocker to ensure optimizer doesn't know what will be ANDed - * with KYBER_Q_1_HALF and can't optimize to non-constant time code. + * XOR in mlkem_opt_blocker to ensure optimizer doesn't know what will be ANDed + * with MLKEM_Q_1_HALF and can't optimize to non-constant time code. * * FIPS 203, Algorithm 6: ByteDecode_d(B) * @@ -5221,7 +5231,7 @@ void kyber_decompress_5(sword16* p, const byte* b) */ #define FROM_MSG_BIT(p, msg, i, j) \ ((p)[8 * (i) + (j)] = (((sword16)0 - (sword16)(((msg)[i] >> (j)) & 1)) ^ \ - kyber_opt_blocker) & KYBER_Q_1_HALF) + mlkem_opt_blocker) & MLKEM_Q_1_HALF) /* Convert message to polynomial. * @@ -5230,13 +5240,13 @@ void kyber_decompress_5(sword16* p, const byte* b) * @param [out] p Polynomial. * @param [in] msg Message as a byte array. */ -static void kyber_from_msg_c(sword16* p, const byte* msg) +static void mlkem_from_msg_c(sword16* p, const byte* msg) { unsigned int i; /* For each byte of the message. */ - for (i = 0; i < KYBER_N / 8; i++) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N / 8; i++) { + #ifdef WOLFSSL_MLKEM_SMALL unsigned int j; /* For each bit of the message. */ for (j = 0; j < 8; j++) { @@ -5262,22 +5272,22 @@ static void kyber_from_msg_c(sword16* p, const byte* msg) * @param [out] p Polynomial. * @param [in] msg Message as a byte array. */ -void kyber_from_msg(sword16* p, const byte* msg) +void mlkem_from_msg(sword16* p, const byte* msg) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { - kyber_from_msg_avx2(p, msg); + mlkem_from_msg_avx2(p, msg); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_from_msg_c(p, msg); + mlkem_from_msg_c(p, msg); } } #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE #ifdef CONV_WITH_DIV /* Convert to value to bit. @@ -5292,16 +5302,16 @@ void kyber_from_msg(sword16* p, const byte* msg) * @param [in] j Index of bit in byte. */ #define TO_MSG_BIT(m, p, i, j) \ - m[i] |= (((((sword16)p[8 * i + j] << 1) + KYBER_Q_HALF) / KYBER_Q) & 1) << j + m[i] |= (((((sword16)p[8 * i + j] << 1) + MLKEM_Q_HALF) / MLKEM_Q) & 1) << j #else /* Multiplier that does div q. */ -#define KYBER_V31 (((1U << 31) + (KYBER_Q / 2)) / KYBER_Q) +#define MLKEM_V31 (((1U << 31) + (MLKEM_Q / 2)) / MLKEM_Q) /* 2 * multiplier that does div q. Only need bit 32 of result. */ -#define KYBER_V31_2 ((word32)(KYBER_V31 * 2)) +#define MLKEM_V31_2 ((word32)(MLKEM_V31 * 2)) /* Multiplier times half of q. */ -#define KYBER_V31_HALF ((word32)(KYBER_V31 * KYBER_Q_HALF)) +#define MLKEM_V31_HALF ((word32)(MLKEM_V31 * MLKEM_Q_HALF)) /* Convert to value to bit. * @@ -5315,7 +5325,8 @@ void kyber_from_msg(sword16* p, const byte* msg) * @param [in] j Index of bit in byte. */ #define TO_MSG_BIT(m, p, i, j) \ - (m)[i] |= ((word32)((KYBER_V31_2 * (p)[8 * (i) + (j)]) + KYBER_V31_HALF) >> 31) << (j) + (m)[i] |= ((word32)((MLKEM_V31_2 * (p)[8 * (i) + (j)]) + \ + MLKEM_V31_HALF) >> 31) << (j) #endif /* CONV_WITH_DIV */ @@ -5326,16 +5337,16 @@ void kyber_from_msg(sword16* p, const byte* msg) * @param [out] msg Message as a byte array. * @param [in] p Polynomial. */ -static void kyber_to_msg_c(byte* msg, sword16* p) +static void mlkem_to_msg_c(byte* msg, sword16* p) { unsigned int i; /* Reduce each coefficient to mod q. */ - kyber_csubq_c(p); + mlkem_csubq_c(p); /* All values are now in range. */ - for (i = 0; i < KYBER_N / 8; i++) { - #ifdef WOLFSSL_KYBER_SMALL + for (i = 0; i < MLKEM_N / 8; i++) { + #ifdef WOLFSSL_MLKEM_SMALL unsigned int j; msg[i] = 0; for (j = 0; j < 8; j++) { @@ -5362,24 +5373,24 @@ static void kyber_to_msg_c(byte* msg, sword16* p) * @param [out] msg Message as a byte array. * @param [in] p Polynomial. */ -void kyber_to_msg(byte* msg, sword16* p) +void mlkem_to_msg(byte* msg, sword16* p) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { /* Convert the polynomial into a array of bytes (message). */ - kyber_to_msg_avx2(msg, p); + mlkem_to_msg_avx2(msg, p); RESTORE_VECTOR_REGISTERS(); } else #endif { - kyber_to_msg_c(msg, p); + mlkem_to_msg_c(msg, p); } } -#endif /* !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_DECAPSULATE */ #else -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) || \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) || \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) /* Convert message to polynomial. * * FIPS 203, Algorithm 6: ByteDecode_d(B) @@ -5387,13 +5398,13 @@ void kyber_to_msg(byte* msg, sword16* p) * @param [out] p Polynomial. * @param [in] msg Message as a byte array. */ -void kyber_from_msg(sword16* p, const byte* msg) +void mlkem_from_msg(sword16* p, const byte* msg) { - kyber_from_msg_neon(p, msg); + mlkem_from_msg_neon(p, msg); } -#endif /* !WOLFSSL_KYBER_NO_ENCAPSULATE || !WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* !WOLFSSL_MLKEM_NO_ENCAPSULATE || !WOLFSSL_MLKEM_NO_DECAPSULATE */ -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE /* Convert polynomial to message. * * FIPS 203, Algorithm 6: ByteEncode_d(F) @@ -5401,11 +5412,11 @@ void kyber_from_msg(sword16* p, const byte* msg) * @param [out] msg Message as a byte array. * @param [in] p Polynomial. */ -void kyber_to_msg(byte* msg, sword16* p) +void mlkem_to_msg(byte* msg, sword16* p) { - kyber_to_msg_neon(msg, p); + mlkem_to_msg_neon(msg, p); } -#endif /* WOLFSSL_KYBER_NO_DECAPSULATE */ +#endif /* WOLFSSL_MLKEM_NO_DECAPSULATE */ #endif /* !(__aarch64__ && WOLFSSL_ARMASM) */ /******************************************************************************/ @@ -5421,20 +5432,20 @@ void kyber_to_msg(byte* msg, sword16* p) * @param [in] b Array of bytes. * @param [in] k Number of polynomials in vector. */ -static void kyber_from_bytes_c(sword16* p, const byte* b, int k) +static void mlkem_from_bytes_c(sword16* p, const byte* b, int k) { int i; int j; for (j = 0; j < k; j++) { - for (i = 0; i < KYBER_N / 2; i++) { + for (i = 0; i < MLKEM_N / 2; i++) { p[2 * i + 0] = ((b[3 * i + 0] >> 0) | ((word16)b[3 * i + 1] << 8)) & 0xfff; p[2 * i + 1] = ((b[3 * i + 1] >> 4) | ((word16)b[3 * i + 2] << 4)) & 0xfff; } - p += KYBER_N; - b += KYBER_POLY_SIZE; + p += MLKEM_N; + b += WC_ML_KEM_POLY_SIZE; } } @@ -5449,16 +5460,16 @@ static void kyber_from_bytes_c(sword16* p, const byte* b, int k) * @param [in] b Array of bytes. * @param [in] k Number of polynomials in vector. */ -void kyber_from_bytes(sword16* p, const byte* b, int k) +void mlkem_from_bytes(sword16* p, const byte* b, int k) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { int i; for (i = 0; i < k; i++) { - kyber_from_bytes_avx2(p, b); - p += KYBER_N; - b += KYBER_POLY_SIZE; + mlkem_from_bytes_avx2(p, b); + p += MLKEM_N; + b += WC_ML_KEM_POLY_SIZE; } RESTORE_VECTOR_REGISTERS(); @@ -5466,7 +5477,7 @@ void kyber_from_bytes(sword16* p, const byte* b, int k) else #endif { - kyber_from_bytes_c(p, b, k); + mlkem_from_bytes_c(p, b, k); } } @@ -5481,25 +5492,25 @@ void kyber_from_bytes(sword16* p, const byte* b, int k) * @param [in] p Polynomial. * @param [in] k Number of polynomials in vector. */ -static void kyber_to_bytes_c(byte* b, sword16* p, int k) +static void mlkem_to_bytes_c(byte* b, sword16* p, int k) { int i; int j; /* Reduce each coefficient to mod q. */ - kyber_csubq_c(p); + mlkem_csubq_c(p); /* All values are now positive. */ for (j = 0; j < k; j++) { - for (i = 0; i < KYBER_N / 2; i++) { + for (i = 0; i < MLKEM_N / 2; i++) { word16 t0 = p[2 * i]; word16 t1 = p[2 * i + 1]; b[3 * i + 0] = (t0 >> 0); b[3 * i + 1] = (t0 >> 8) | t1 << 4; b[3 * i + 2] = (t1 >> 4); } - p += KYBER_N; - b += KYBER_POLY_SIZE; + p += MLKEM_N; + b += WC_ML_KEM_POLY_SIZE; } } @@ -5514,16 +5525,16 @@ static void kyber_to_bytes_c(byte* b, sword16* p, int k) * @param [in] p Polynomial. * @param [in] k Number of polynomials in vector. */ -void kyber_to_bytes(byte* b, sword16* p, int k) +void mlkem_to_bytes(byte* b, sword16* p, int k) { #ifdef USE_INTEL_SPEEDUP if (IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { int i; for (i = 0; i < k; i++) { - kyber_to_bytes_avx2(b, p); - p += KYBER_N; - b += KYBER_POLY_SIZE; + mlkem_to_bytes_avx2(b, p); + p += MLKEM_N; + b += WC_ML_KEM_POLY_SIZE; } RESTORE_VECTOR_REGISTERS(); @@ -5531,8 +5542,8 @@ void kyber_to_bytes(byte* b, sword16* p, int k) else #endif { - kyber_to_bytes_c(b, p, k); + mlkem_to_bytes_c(b, p, k); } } -#endif /* WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_WC_MLKEM */ diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index c0d3b711c..b47c2d5bd 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -319,13 +319,13 @@ const byte const_byte_array[] = "A+Gd\0\0\0"; #ifdef HAVE_ED448 #include #endif -#ifdef WOLFSSL_HAVE_KYBER - #include -#ifdef WOLFSSL_WC_KYBER - #include +#ifdef WOLFSSL_HAVE_MLKEM + #include +#ifdef WOLFSSL_WC_MLKEM + #include #endif #if defined(HAVE_LIBOQS) - #include + #include #endif #endif #ifdef HAVE_DILITHIUM @@ -676,8 +676,8 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t scrypt_test(void); #ifdef HAVE_ED448 WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed448_test(void); #endif -#ifdef WOLFSSL_HAVE_KYBER - WOLFSSL_TEST_SUBROUTINE wc_test_ret_t kyber_test(void); +#ifdef WOLFSSL_HAVE_MLKEM + WOLFSSL_TEST_SUBROUTINE wc_test_ret_t mlkem_test(void); #endif #ifdef HAVE_DILITHIUM WOLFSSL_TEST_SUBROUTINE wc_test_ret_t dilithium_test(void); @@ -2295,11 +2295,11 @@ options: [-s max_relative_stack_bytes] [-m max_relative_heap_memory_bytes]\n\ PRIVATE_KEY_LOCK(); #endif -#ifdef WOLFSSL_HAVE_KYBER - if ( (ret = kyber_test()) != 0) - TEST_FAIL("KYBER test failed!\n", ret); +#ifdef WOLFSSL_HAVE_MLKEM + if ( (ret = mlkem_test()) != 0) + TEST_FAIL("MLKEM test failed!\n", ret); else - TEST_PASS("KYBER test passed!\n"); + TEST_PASS("MLKEM test passed!\n"); #endif #ifdef HAVE_DILITHIUM @@ -38668,36 +38668,36 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed448_test(void) } #endif /* HAVE_ED448 */ -#ifdef WOLFSSL_HAVE_KYBER -#ifdef WOLFSSL_WC_KYBER /* OQS does not support KATs */ +#ifdef WOLFSSL_HAVE_MLKEM +#ifdef WOLFSSL_WC_MLKEM /* OQS does not support KATs */ #if !defined(WOLFSSL_NO_KYBER512) && !defined(WOLFSSL_NO_ML_KEM_512) -static wc_test_ret_t kyber512_kat(void) +static wc_test_ret_t mlkem512_kat(void) { wc_test_ret_t ret; #ifdef WOLFSSL_SMALL_STACK - KyberKey *key = NULL; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY + MlKemKey *key = NULL; +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY byte *priv = NULL; byte *pub = NULL; #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE byte *ct = NULL; byte *ss = NULL; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE byte *ss_dec = NULL; #endif #else - KyberKey key[1]; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY + MlKemKey key[1]; +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY byte priv[KYBER512_PRIVATE_KEY_SIZE]; byte pub[KYBER512_PUBLIC_KEY_SIZE]; #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE byte ct[KYBER512_CIPHER_TEXT_SIZE]; byte ss[KYBER_SS_SZ]; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE byte ss_dec[KYBER_SS_SZ]; #endif #endif @@ -38718,7 +38718,7 @@ static wc_test_ret_t kyber512_kat(void) 0xc8, 0x0e, 0xfe, 0x79, 0xa3, 0xa9, 0xa8, 0x74, 0xcc, 0x09, 0xfe, 0x76, 0xf6, 0x99, 0x76, 0x15 }; -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber512_pk[] = { 0x11, 0x5A, 0xCE, 0x0E, 0x64, 0x67, 0x7C, 0xBB, 0x7D, 0xCF, 0xC9, 0x3C, 0x16, 0xD3, 0xA3, 0x05, @@ -38926,7 +38926,7 @@ static wc_test_ret_t kyber512_kat(void) 0x19, 0x58, 0x5d, 0xea, 0x30, 0x8e, 0xb0, 0x39 }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber512_sk[] = { 0x6C, 0x89, 0x2B, 0x02, 0x97, 0xA9, 0xC7, 0x64, 0x14, 0x93, 0xF8, 0x7D, 0xAF, 0x35, 0x33, 0xEE, @@ -39342,7 +39342,7 @@ static wc_test_ret_t kyber512_kat(void) 0x90, 0xfa, 0x9e, 0x8b, 0x87, 0x2b, 0xfb, 0x8f }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber512_ct[] = { 0xED, 0xF2, 0x41, 0x45, 0xE4, 0x3B, 0x4F, 0x6D, 0xC6, 0xBF, 0x83, 0x32, 0xF5, 0x4E, 0x02, 0xCA, @@ -39542,7 +39542,7 @@ static wc_test_ret_t kyber512_kat(void) 0x62, 0xd6, 0x94, 0xc6, 0xd8, 0xc3, 0x3b, 0x52 }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber512_ss[] = { 0x0A, 0x69, 0x25, 0x67, 0x6F, 0x24, 0xB2, 0x2C, 0x28, 0x6F, 0x4C, 0x81, 0xA4, 0x22, 0x4C, 0xEC, @@ -39560,42 +39560,42 @@ static wc_test_ret_t kyber512_kat(void) #endif #ifdef WOLFSSL_SMALL_STACK - key = (KyberKey *)XMALLOC(sizeof(KyberKey), HEAP_HINT, + key = (MlKemKey *)XMALLOC(sizeof(MlKemKey), HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (key == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - priv = (byte *)XMALLOC(KYBER512_PRIVATE_KEY_SIZE, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + priv = (byte *)XMALLOC(WC_ML_KEM_512_PRIVATE_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - pub = (byte *)XMALLOC(KYBER512_PUBLIC_KEY_SIZE, HEAP_HINT, + pub = (byte *)XMALLOC(WC_ML_KEM_512_PUBLIC_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (pub == NULL || priv == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - ct = (byte *)XMALLOC(KYBER512_CIPHER_TEXT_SIZE, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + ct = (byte *)XMALLOC(WC_ML_KEM_512_CIPHER_TEXT_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - ss = (byte *)XMALLOC(KYBER_SS_SZ, HEAP_HINT, + ss = (byte *)XMALLOC(WC_ML_KEM_SS_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ct == NULL || ss == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - ss_dec = (byte *)XMALLOC(KYBER_SS_SZ, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + ss_dec = (byte *)XMALLOC(WC_ML_KEM_SS_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ss_dec == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER ret = wc_KyberKey_Init(KYBER512, key, HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); else key_inited = 1; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY ret = wc_KyberKey_MakeKeyWithRandom(key, kyber512_rand, sizeof(kyber512_rand)); if (ret != 0) @@ -39623,7 +39623,7 @@ static wc_test_ret_t kyber512_kat(void) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE ret = wc_KyberKey_EncapsulateWithRandom(key, ct, ss, kyber512enc_rand, sizeof(kyber512enc_rand)); if (ret != 0) @@ -39638,7 +39638,7 @@ static wc_test_ret_t kyber512_kat(void) (void)kyber512enc_rand; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE ret = wc_KyberKey_Decapsulate(key, ss_dec, kyber512_ct, sizeof(kyber512_ct)); if (ret != 0) @@ -39652,23 +39652,23 @@ static wc_test_ret_t kyber512_kat(void) #endif #endif #ifndef WOLFSSL_NO_ML_KEM - ret = wc_KyberKey_Init(WC_ML_KEM_512, key, HEAP_HINT, INVALID_DEVID); + ret = wc_MlKemKey_Init(key, WC_ML_KEM_512, HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); else key_inited = 1; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - ret = wc_KyberKey_MakeKeyWithRandom(key, kyber512_rand, +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + ret = wc_MlKemKey_MakeKeyWithRandom(key, kyber512_rand, sizeof(kyber512_rand)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - ret = wc_KyberKey_EncodePublicKey(key, pub, WC_ML_KEM_512_PUBLIC_KEY_SIZE); + ret = wc_MlKemKey_EncodePublicKey(key, pub, WC_ML_KEM_512_PUBLIC_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - ret = wc_KyberKey_EncodePrivateKey(key, priv, + ret = wc_MlKemKey_EncodePrivateKey(key, priv, WC_ML_KEM_512_PRIVATE_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -39681,14 +39681,14 @@ static wc_test_ret_t kyber512_kat(void) #else (void)kyber512_rand; (void)ml_kem_512_pk; - ret = wc_KyberKey_DecodePrivateKey(key, ml_kem_512_sk, + ret = wc_MlKemKey_DecodePrivateKey(key, ml_kem_512_sk, WC_ML_KEM_512_PRIVATE_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - ret = wc_KyberKey_EncapsulateWithRandom(key, ct, ss, kyber512enc_rand, +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + ret = wc_MlKemKey_EncapsulateWithRandom(key, ct, ss, kyber512enc_rand, sizeof(kyber512enc_rand)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -39702,8 +39702,8 @@ static wc_test_ret_t kyber512_kat(void) (void)kyber512enc_rand; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - ret = wc_KyberKey_Decapsulate(key, ss_dec, ml_kem_512_ct, +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + ret = wc_MlKemKey_Decapsulate(key, ss_dec, ml_kem_512_ct, sizeof(ml_kem_512_ct)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -39719,55 +39719,55 @@ static wc_test_ret_t kyber512_kat(void) out: if (key_inited) - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #ifdef WOLFSSL_SMALL_STACK XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY XFREE(priv, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(pub, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE XFREE(ct, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(ss, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE XFREE(ss_dec, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif #endif return ret; } -#endif /* WOLFSSL_KYBER512 */ +#endif /* !WOLFSSL_NO_KYBER512 && !WOLFSSL_NO_ML_KEM_512 */ #if !defined(WOLFSSL_NO_KYBER768) && !defined(WOLFSSL_NO_ML_KEM_768) -static wc_test_ret_t kyber768_kat(void) +static wc_test_ret_t mlkem768_kat(void) { wc_test_ret_t ret; #ifdef WOLFSSL_SMALL_STACK - KyberKey *key = NULL; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY + MlKemKey *key = NULL; +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY byte *priv = NULL; byte *pub = NULL; #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE byte *ct = NULL; byte *ss = NULL; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE byte *ss_dec = NULL; #endif #else - KyberKey key[1]; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY + MlKemKey key[1]; +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY byte priv[KYBER768_PRIVATE_KEY_SIZE]; byte pub[KYBER768_PUBLIC_KEY_SIZE]; #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE byte ct[KYBER768_CIPHER_TEXT_SIZE]; byte ss[KYBER_SS_SZ]; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE byte ss_dec[KYBER_SS_SZ]; #endif #endif @@ -39789,7 +39789,7 @@ static wc_test_ret_t kyber768_kat(void) 0xcc, 0x09, 0xfe, 0x76, 0xf6, 0x99, 0x76, 0x15 }; -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber768_pk[] = { 0xA7, 0x2C, 0x2D, 0x9C, 0x84, 0x3E, 0xE9, 0xF8, 0x31, 0x3E, 0xCC, 0x7F, 0x86, 0xD6, 0x29, 0x4D, @@ -40093,7 +40093,7 @@ static wc_test_ret_t kyber768_kat(void) 0x00, 0x17, 0xae, 0x13, 0x6e, 0x19, 0xf0, 0x28 }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber768_sk[] = { 0x07, 0x63, 0x8F, 0xB6, 0x98, 0x68, 0xF3, 0xD3, 0x20, 0xE5, 0x86, 0x2B, 0xD9, 0x69, 0x33, 0xFE, @@ -40701,7 +40701,7 @@ static wc_test_ret_t kyber768_kat(void) 0x90, 0xfa, 0x9e, 0x8b, 0x87, 0x2b, 0xfb, 0x8f }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber768_ct[] = { 0xB5, 0x2C, 0x56, 0xB9, 0x2A, 0x4B, 0x7C, 0xE9, 0xE4, 0xCB, 0x7C, 0x5B, 0x1B, 0x16, 0x31, 0x67, @@ -40981,7 +40981,7 @@ static wc_test_ret_t kyber768_kat(void) 0x53, 0x2a, 0xc3, 0xee, 0x1e, 0x52, 0xd4, 0x64 }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber768_ss[] = { 0x91, 0x4C, 0xB6, 0x7F, 0xE5, 0xC3, 0x8E, 0x73, 0xBF, 0x74, 0x18, 0x1C, 0x0A, 0xC5, 0x04, 0x28, @@ -40999,42 +40999,42 @@ static wc_test_ret_t kyber768_kat(void) #endif #ifdef WOLFSSL_SMALL_STACK - key = (KyberKey *)XMALLOC(sizeof(KyberKey), HEAP_HINT, + key = (MlKemKey *)XMALLOC(sizeof(MlKemKey), HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (key == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - priv = (byte *)XMALLOC(KYBER768_PRIVATE_KEY_SIZE, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + priv = (byte *)XMALLOC(WC_ML_KEM_768_PRIVATE_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - pub = (byte *)XMALLOC(KYBER768_PUBLIC_KEY_SIZE, HEAP_HINT, + pub = (byte *)XMALLOC(WC_ML_KEM_768_PUBLIC_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (priv == NULL || pub == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - ct = (byte *)XMALLOC(KYBER768_CIPHER_TEXT_SIZE, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + ct = (byte *)XMALLOC(WC_ML_KEM_768_CIPHER_TEXT_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - ss = (byte *)XMALLOC(KYBER_SS_SZ, HEAP_HINT, + ss = (byte *)XMALLOC(WC_ML_KEM_SS_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ct == NULL || ss == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - ss_dec = (byte *)XMALLOC(KYBER_SS_SZ, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + ss_dec = (byte *)XMALLOC(WC_ML_KEM_SS_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ss_dec == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER ret = wc_KyberKey_Init(KYBER768, key, HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); else key_inited = 1; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY ret = wc_KyberKey_MakeKeyWithRandom(key, kyber768_rand, sizeof(kyber768_rand)); if (ret != 0) @@ -41062,7 +41062,7 @@ static wc_test_ret_t kyber768_kat(void) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE ret = wc_KyberKey_EncapsulateWithRandom(key, ct, ss, kyber768enc_rand, sizeof(kyber768enc_rand)); if (ret != 0) @@ -41077,7 +41077,7 @@ static wc_test_ret_t kyber768_kat(void) (void)kyber768enc_rand; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE ret = wc_KyberKey_Decapsulate(key, ss_dec, kyber768_ct, sizeof(kyber768_ct)); if (ret != 0) @@ -41091,23 +41091,23 @@ static wc_test_ret_t kyber768_kat(void) #endif #endif #ifndef WOLFSSL_NO_ML_KEM - ret = wc_KyberKey_Init(WC_ML_KEM_768, key, HEAP_HINT, INVALID_DEVID); + ret = wc_MlKemKey_Init(key, WC_ML_KEM_768, HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); else key_inited = 1; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - ret = wc_KyberKey_MakeKeyWithRandom(key, kyber768_rand, +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + ret = wc_MlKemKey_MakeKeyWithRandom(key, kyber768_rand, sizeof(kyber768_rand)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - ret = wc_KyberKey_EncodePublicKey(key, pub, WC_ML_KEM_768_PUBLIC_KEY_SIZE); + ret = wc_MlKemKey_EncodePublicKey(key, pub, WC_ML_KEM_768_PUBLIC_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - ret = wc_KyberKey_EncodePrivateKey(key, priv, + ret = wc_MlKemKey_EncodePrivateKey(key, priv, WC_ML_KEM_768_PRIVATE_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -41120,14 +41120,14 @@ static wc_test_ret_t kyber768_kat(void) #else (void)kyber768_rand; (void)ml_kem_768_pk; - ret = wc_KyberKey_DecodePrivateKey(key, ml_kem_768_sk, + ret = wc_MlKemKey_DecodePrivateKey(key, ml_kem_768_sk, WC_ML_KEM_768_PRIVATE_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - ret = wc_KyberKey_EncapsulateWithRandom(key, ct, ss, kyber768enc_rand, +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + ret = wc_MlKemKey_EncapsulateWithRandom(key, ct, ss, kyber768enc_rand, sizeof(kyber768enc_rand)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -41141,8 +41141,8 @@ static wc_test_ret_t kyber768_kat(void) (void)kyber768enc_rand; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - ret = wc_KyberKey_Decapsulate(key, ss_dec, ml_kem_768_ct, +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + ret = wc_MlKemKey_Decapsulate(key, ss_dec, ml_kem_768_ct, sizeof(ml_kem_768_ct)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -41158,56 +41158,56 @@ static wc_test_ret_t kyber768_kat(void) out: if (key_inited) - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #ifdef WOLFSSL_SMALL_STACK XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY XFREE(priv, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(pub, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE XFREE(ct, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(ss, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE XFREE(ss_dec, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif #endif return ret; } -#endif /* WOLFSSL_KYBER768 */ +#endif /* !WOLFSSL_NO_KYBER768 && !WOLFSSL_NO_ML_KEM_768 */ #if !defined(WOLFSSL_NO_KYBER1024) && !defined(WOLFSSL_NO_ML_KEM_1024) -static wc_test_ret_t kyber1024_kat(void) +static wc_test_ret_t mlkem1024_kat(void) { wc_test_ret_t ret; #ifdef WOLFSSL_SMALL_STACK - KyberKey *key = NULL; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY + MlKemKey *key = NULL; +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY byte *priv = NULL; byte *pub = NULL; #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE byte *ct = NULL; byte *ss = NULL; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE byte *ss_dec = NULL; #endif #else - KyberKey key[1]; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - byte priv[KYBER1024_PRIVATE_KEY_SIZE]; - byte pub[KYBER1024_PUBLIC_KEY_SIZE]; + MlKemKey key[1]; +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + byte priv[WC_ML_KEM_1024_PRIVATE_KEY_SIZE]; + byte pub[WC_ML_KEM_1024_PUBLIC_KEY_SIZE]; #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - byte ct[KYBER1024_CIPHER_TEXT_SIZE]; - byte ss[KYBER_SS_SZ]; +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + byte ct[WC_ML_KEM_1024_CIPHER_TEXT_SIZE]; + byte ss[WC_ML_KEM_SS_SZ]; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - byte ss_dec[KYBER_SS_SZ]; +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + byte ss_dec[WC_ML_KEM_SS_SZ]; #endif #endif int key_inited = 0; @@ -41227,7 +41227,7 @@ static wc_test_ret_t kyber1024_kat(void) 0xc8, 0x0e, 0xfe, 0x79, 0xa3, 0xa9, 0xa8, 0x74, 0xcc, 0x09, 0xfe, 0x76, 0xf6, 0x99, 0x76, 0x15 }; -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber1024_pk[] = { 0xD2, 0x23, 0x02, 0xCB, 0xD3, 0x39, 0x9F, 0xAC, 0xC6, 0x30, 0x99, 0x1F, 0xC8, 0xF2, 0x8B, 0xDB, @@ -41627,7 +41627,7 @@ static wc_test_ret_t kyber1024_kat(void) 0x47, 0x46, 0x85, 0x0e, 0x0c, 0x48, 0x47, 0xdb }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber1024_sk[] = { 0x07, 0x63, 0x8F, 0xB6, 0x98, 0x68, 0xF3, 0xD3, 0x20, 0xE5, 0x86, 0x2B, 0xD9, 0x69, 0x33, 0xFE, @@ -42427,7 +42427,7 @@ static wc_test_ret_t kyber1024_kat(void) 0x90, 0xfa, 0x9e, 0x8b, 0x87, 0x2b, 0xfb, 0x8f }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber1024_ct[] = { 0xA6, 0xAF, 0x29, 0xD5, 0xF5, 0xB8, 0x0B, 0xD1, 0x30, 0xF5, 0x18, 0xBA, 0xDD, 0xD6, 0xC8, 0xF1, @@ -42827,7 +42827,7 @@ static wc_test_ret_t kyber1024_kat(void) 0x88, 0xe9, 0xc7, 0x17, 0xdd, 0x44, 0xc9, 0xee }; #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER WOLFSSL_SMALL_STACK_STATIC const byte kyber1024_ss[] = { 0xB1, 0x0F, 0x73, 0x94, 0x92, 0x6A, 0xD3, 0xB4, 0x9C, 0x5D, 0x62, 0xD5, 0xAE, 0xB5, 0x31, 0xD5, @@ -42845,42 +42845,42 @@ static wc_test_ret_t kyber1024_kat(void) #endif #ifdef WOLFSSL_SMALL_STACK - key = (KyberKey *)XMALLOC(sizeof(KyberKey), HEAP_HINT, + key = (MlKemKey *)XMALLOC(sizeof(MlKemKey), HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (key == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - priv = (byte *)XMALLOC(KYBER1024_PRIVATE_KEY_SIZE, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + priv = (byte *)XMALLOC(WC_ML_KEM_1024_PRIVATE_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - pub = (byte *)XMALLOC(KYBER1024_PUBLIC_KEY_SIZE, HEAP_HINT, + pub = (byte *)XMALLOC(WC_ML_KEM_1024_PUBLIC_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (priv == NULL || pub == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - ct = (byte *)XMALLOC(KYBER1024_CIPHER_TEXT_SIZE, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + ct = (byte *)XMALLOC(WC_ML_KEM_1024_CIPHER_TEXT_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - ss = (byte *)XMALLOC(KYBER_SS_SZ, HEAP_HINT, + ss = (byte *)XMALLOC(WC_ML_KEM_SS_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ct == NULL || ss == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - ss_dec = (byte *)XMALLOC(KYBER_SS_SZ, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + ss_dec = (byte *)XMALLOC(WC_ML_KEM_SS_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ss_dec == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER ret = wc_KyberKey_Init(KYBER1024, key, HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); else key_inited = 1; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY ret = wc_KyberKey_MakeKeyWithRandom(key, kyber1024_rand, sizeof(kyber1024_rand)); if (ret != 0) @@ -42908,7 +42908,7 @@ static wc_test_ret_t kyber1024_kat(void) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE ret = wc_KyberKey_EncapsulateWithRandom(key, ct, ss, kyber1024enc_rand, sizeof(kyber1024enc_rand)); if (ret != 0) @@ -42923,7 +42923,7 @@ static wc_test_ret_t kyber1024_kat(void) (void)kyber1024enc_rand; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE ret = wc_KyberKey_Decapsulate(key, ss_dec, kyber1024_ct, sizeof(kyber1024_ct)); if (ret != 0) @@ -42937,23 +42937,23 @@ static wc_test_ret_t kyber1024_kat(void) #endif #endif #ifndef WOLFSSL_NO_ML_KEM - ret = wc_KyberKey_Init(WC_ML_KEM_1024, key, HEAP_HINT, INVALID_DEVID); + ret = wc_MlKemKey_Init(key, WC_ML_KEM_1024, HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); else key_inited = 1; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - ret = wc_KyberKey_MakeKeyWithRandom(key, kyber1024_rand, +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + ret = wc_MlKemKey_MakeKeyWithRandom(key, kyber1024_rand, sizeof(kyber1024_rand)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - ret = wc_KyberKey_EncodePublicKey(key, pub, WC_ML_KEM_MAX_PUBLIC_KEY_SIZE); + ret = wc_MlKemKey_EncodePublicKey(key, pub, WC_ML_KEM_MAX_PUBLIC_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - ret = wc_KyberKey_EncodePrivateKey(key, priv, + ret = wc_MlKemKey_EncodePrivateKey(key, priv, WC_ML_KEM_MAX_PRIVATE_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -42966,14 +42966,14 @@ static wc_test_ret_t kyber1024_kat(void) #else (void)kyber1024_rand; (void)ml_kem_1024_pk; - ret = wc_KyberKey_DecodePrivateKey(key, ml_kem_1024_sk, + ret = wc_MlKemKey_DecodePrivateKey(key, ml_kem_1024_sk, WC_ML_KEM_1024_PRIVATE_KEY_SIZE); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - ret = wc_KyberKey_EncapsulateWithRandom(key, ct, ss, kyber1024enc_rand, +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + ret = wc_MlKemKey_EncapsulateWithRandom(key, ct, ss, kyber1024enc_rand, sizeof(kyber1024enc_rand)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -42987,8 +42987,8 @@ static wc_test_ret_t kyber1024_kat(void) (void)kyber1024enc_rand; #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - ret = wc_KyberKey_Decapsulate(key, ss_dec, ml_kem_1024_ct, +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + ret = wc_MlKemKey_Decapsulate(key, ss_dec, ml_kem_1024_ct, sizeof(ml_kem_1024_ct)); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -43004,60 +43004,60 @@ static wc_test_ret_t kyber1024_kat(void) out: if (key_inited) - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #ifdef WOLFSSL_SMALL_STACK XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY XFREE(priv, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(pub, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE XFREE(ct, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(ss, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE XFREE(ss_dec, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif #endif return ret; } -#endif /* WOLFSSL_KYBER1024 */ -#endif /* WOLFSSL_WC_KYBER */ +#endif /* !WOLFSSL_NO_KYBER1024 && !WOLFSSL_NO_ML_KEM_1024 */ +#endif /* WOLFSSL_WC_MLKEM */ -WOLFSSL_TEST_SUBROUTINE wc_test_ret_t kyber_test(void) +WOLFSSL_TEST_SUBROUTINE wc_test_ret_t mlkem_test(void) { wc_test_ret_t ret; WC_RNG rng; int i; #ifdef WOLFSSL_SMALL_STACK - KyberKey *key = NULL; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY + MlKemKey *key = NULL; +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY byte *priv = NULL; byte *pub = NULL; byte *priv2 = NULL; byte *pub2 = NULL; -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE byte *ct = NULL; byte *ss = NULL; -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE byte *ss_dec = NULL; #endif #endif #endif #else - KyberKey key[1]; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - byte priv[KYBER_MAX_PRIVATE_KEY_SIZE]; - byte pub[KYBER_MAX_PUBLIC_KEY_SIZE]; - byte priv2[KYBER_MAX_PRIVATE_KEY_SIZE]; - byte pub2[KYBER_MAX_PUBLIC_KEY_SIZE]; -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - byte ct[KYBER_MAX_CIPHER_TEXT_SIZE]; - byte ss[KYBER_SS_SZ]; -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - byte ss_dec[KYBER_SS_SZ]; + MlKemKey key[1]; +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + byte priv[WC_ML_KEM_MAX_PRIVATE_KEY_SIZE]; + byte pub[WC_ML_KEM_MAX_PUBLIC_KEY_SIZE]; + byte priv2[WC_ML_KEM_MAX_PRIVATE_KEY_SIZE]; + byte pub2[WC_ML_KEM_MAX_PUBLIC_KEY_SIZE]; +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + byte ct[WC_ML_KEM_MAX_CIPHER_TEXT_SIZE]; + byte ss[WC_ML_KEM_SS_SZ]; +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + byte ss_dec[WC_ML_KEM_SS_SZ]; #endif #endif #endif @@ -43078,7 +43078,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t kyber_test(void) WC_ML_KEM_1024_PUBLIC_KEY_SIZE, WC_ML_KEM_1024_CIPHER_TEXT_SIZE }, #endif #endif -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER #ifdef WOLFSSL_KYBER512 { KYBER512, KYBER512_PRIVATE_KEY_SIZE, KYBER512_PUBLIC_KEY_SIZE, KYBER512_CIPHER_TEXT_SIZE }, @@ -43093,41 +43093,41 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t kyber_test(void) #endif #endif }; - WOLFSSL_ENTER("kyber_test"); + WOLFSSL_ENTER("mlkem_test"); #ifdef WOLFSSL_SMALL_STACK - key = (KyberKey *)XMALLOC(sizeof(KyberKey), HEAP_HINT, + key = (MlKemKey *)XMALLOC(sizeof(MlKemKey), HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (key == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - priv = (byte *)XMALLOC(KYBER_MAX_PRIVATE_KEY_SIZE, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + priv = (byte *)XMALLOC(WC_ML_KEM_MAX_PRIVATE_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (priv == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); - pub = (byte *)XMALLOC(KYBER_MAX_PUBLIC_KEY_SIZE, HEAP_HINT, + pub = (byte *)XMALLOC(WC_ML_KEM_MAX_PUBLIC_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (pub == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); - priv2 = (byte *)XMALLOC(KYBER_MAX_PRIVATE_KEY_SIZE, HEAP_HINT, + priv2 = (byte *)XMALLOC(WC_ML_KEM_MAX_PRIVATE_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (priv2 == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); - pub2 = (byte *)XMALLOC(KYBER_MAX_PUBLIC_KEY_SIZE, HEAP_HINT, + pub2 = (byte *)XMALLOC(WC_ML_KEM_MAX_PUBLIC_KEY_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (pub2 == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - ct = (byte *)XMALLOC(KYBER_MAX_CIPHER_TEXT_SIZE, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + ct = (byte *)XMALLOC(WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ct == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); - ss = (byte *)XMALLOC(KYBER_SS_SZ, HEAP_HINT, + ss = (byte *)XMALLOC(WC_ML_KEM_SS_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ss == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE - ss_dec = (byte *)XMALLOC(KYBER_SS_SZ, HEAP_HINT, +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE + ss_dec = (byte *)XMALLOC(WC_ML_KEM_SS_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); if (ss_dec == NULL) ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); @@ -43145,65 +43145,65 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t kyber_test(void) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); for (i = 0; i < (int)(sizeof(testData) / sizeof(*testData)); i++) { - ret = wc_KyberKey_Init(testData[i][0], key, HEAP_HINT, INVALID_DEVID); + ret = wc_MlKemKey_Init(key, testData[i][0], HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); else key_inited = 1; -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY - ret = wc_KyberKey_MakeKey(key, &rng); +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY + ret = wc_MlKemKey_MakeKey(key, &rng); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); - ret = wc_KyberKey_EncodePublicKey(key, pub, testData[i][2]); + ret = wc_MlKemKey_EncodePublicKey(key, pub, testData[i][2]); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); - ret = wc_KyberKey_EncodePrivateKey(key, priv, testData[i][1]); + ret = wc_MlKemKey_EncodePrivateKey(key, priv, testData[i][1]); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); - ret = wc_KyberKey_Init(testData[i][0], key, HEAP_HINT, INVALID_DEVID); + ret = wc_MlKemKey_Init(key, testData[i][0], HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); - ret = wc_KyberKey_DecodePublicKey(key, pub, testData[i][2]); + ret = wc_MlKemKey_DecodePublicKey(key, pub, testData[i][2]); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE - ret = wc_KyberKey_Encapsulate(key, ct, ss, &rng); +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE + ret = wc_MlKemKey_Encapsulate(key, ct, ss, &rng); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); #endif - ret = wc_KyberKey_EncodePublicKey(key, pub2, testData[i][2]); + ret = wc_MlKemKey_EncodePublicKey(key, pub2, testData[i][2]); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); if (XMEMCMP(pub, pub2, testData[i][2]) != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); - ret = wc_KyberKey_Init(testData[i][0], key, HEAP_HINT, INVALID_DEVID); + ret = wc_MlKemKey_Init(key, testData[i][0], HEAP_HINT, INVALID_DEVID); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); - ret = wc_KyberKey_DecodePrivateKey(key, priv, testData[i][1]); + ret = wc_MlKemKey_DecodePrivateKey(key, priv, testData[i][1]); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); -#if !defined(WOLFSSL_KYBER_NO_ENCAPSULATE) && \ - !defined(WOLFSSL_KYBER_NO_DECAPSULATE) - ret = wc_KyberKey_Decapsulate(key, ss_dec, ct, testData[i][3]); +#if !defined(WOLFSSL_MLKEM_NO_ENCAPSULATE) && \ + !defined(WOLFSSL_MLKEM_NO_DECAPSULATE) + ret = wc_MlKemKey_Decapsulate(key, ss_dec, ct, testData[i][3]); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); - if (XMEMCMP(ss, ss_dec, KYBER_SS_SZ) != 0) + if (XMEMCMP(ss, ss_dec, WC_ML_KEM_SS_SZ) != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); #endif - ret = wc_KyberKey_EncodePrivateKey(key, priv2, testData[i][1]); + ret = wc_MlKemKey_EncodePrivateKey(key, priv2, testData[i][1]); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_I(i), out); @@ -43214,40 +43214,40 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t kyber_test(void) wc_FreeRng(&rng); -#ifdef WOLFSSL_WC_KYBER +#ifdef WOLFSSL_WC_MLKEM #if !defined(WOLFSSL_NO_KYBER512) && !defined(WOLFSSL_NO_ML_KEM_512) - ret = kyber512_kat(); + ret = mlkem512_kat(); if (ret != 0) goto out; #endif #if !defined(WOLFSSL_NO_KYBER768) && !defined(WOLFSSL_NO_ML_KEM_768) - ret = kyber768_kat(); + ret = mlkem768_kat(); if (ret != 0) goto out; #endif #if !defined(WOLFSSL_NO_KYBER1024) && !defined(WOLFSSL_NO_ML_KEM_1024) - ret = kyber1024_kat(); + ret = mlkem1024_kat(); if (ret != 0) goto out; #endif -#endif /* WOLFSSL_WC_KYBER */ +#endif /* WOLFSSL_WC_MLKEM */ out: if (key_inited) - wc_KyberKey_Free(key); + wc_MlKemKey_Free(key); #ifdef WOLFSSL_SMALL_STACK XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); -#ifndef WOLFSSL_KYBER_NO_MAKE_KEY +#ifndef WOLFSSL_MLKEM_NO_MAKE_KEY XFREE(priv, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(pub, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(priv2, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(pub2, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); -#ifndef WOLFSSL_KYBER_NO_ENCAPSULATE +#ifndef WOLFSSL_MLKEM_NO_ENCAPSULATE XFREE(ct, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); XFREE(ss, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); -#ifndef WOLFSSL_KYBER_NO_DECAPSULATE +#ifndef WOLFSSL_MLKEM_NO_DECAPSULATE XFREE(ss_dec, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif #endif @@ -43256,7 +43256,7 @@ out: return ret; } -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ #ifdef HAVE_DILITHIUM #ifndef WOLFSSL_DILITHIUM_NO_VERIFY diff --git a/wolfssl-VS2022.vcxproj b/wolfssl-VS2022.vcxproj index 7c011bbc7..d5e13a396 100644 --- a/wolfssl-VS2022.vcxproj +++ b/wolfssl-VS2022.vcxproj @@ -428,7 +428,7 @@ - + @@ -440,8 +440,8 @@ - - + + diff --git a/wolfssl.vcproj b/wolfssl.vcproj index 8f07d8bf1..02554722d 100644 --- a/wolfssl.vcproj +++ b/wolfssl.vcproj @@ -264,7 +264,7 @@ > - + @@ -439,8 +439,8 @@ - - + + diff --git a/wolfssl/internal.h b/wolfssl/internal.h index 09f218d33..d2b027354 100644 --- a/wolfssl/internal.h +++ b/wolfssl/internal.h @@ -1908,7 +1908,7 @@ enum Misc { #define WOLFSSL_NAMED_GROUP_IS_FFDHE(group) \ (WOLFSSL_FFDHE_START <= (group) && (group) <= WOLFSSL_FFDHE_END) -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM WOLFSSL_LOCAL int NamedGroupIsPqc(int group); WOLFSSL_LOCAL int NamedGroupIsPqcHybrid(int group); #define WOLFSSL_NAMED_GROUP_IS_PQC(group) NamedGroupIsPqc(group) @@ -1916,7 +1916,7 @@ WOLFSSL_LOCAL int NamedGroupIsPqcHybrid(int group); #else #define WOLFSSL_NAMED_GROUP_IS_PQC(group) ((void)(group), 0) #define WOLFSSL_NAMED_GROUP_IS_PQC_HYBRID(group) ((void)(group), 0) -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ /* minimum Downgrade Minor version */ #ifndef WOLFSSL_MIN_DOWNGRADE @@ -3607,7 +3607,7 @@ typedef struct KeyShareEntry { word32 keyLen; /* Key size (bytes) */ byte* pubKey; /* Public key */ word32 pubKeyLen; /* Public key length */ -#if !defined(NO_DH) || defined(WOLFSSL_HAVE_KYBER) +#if !defined(NO_DH) || defined(WOLFSSL_HAVE_MLKEM) byte* privKey; /* Private key */ word32 privKeyLen;/* Private key length - PQC only */ #endif diff --git a/wolfssl/ssl.h b/wolfssl/ssl.h index 7260799fe..0f1ea5ece 100644 --- a/wolfssl/ssl.h +++ b/wolfssl/ssl.h @@ -4572,7 +4572,7 @@ enum { #ifdef HAVE_PQC -#ifdef WOLFSSL_KYBER_ORIGINAL +#ifdef WOLFSSL_MLKEM_KYBER /* Old code points to keep compatibility with Kyber Round 3. * Taken from OQS's openssl provider, see: * https://github.com/open-quantum-safe/oqs-provider/blob/main/oqs-template/ @@ -4589,7 +4589,7 @@ enum { WOLFSSL_X448_KYBER_LEVEL3 = 12176, WOLFSSL_X25519_KYBER_LEVEL3 = 25497, WOLFSSL_P256_KYBER_LEVEL3 = 25498, -#endif /* WOLFSSL_KYBER_ORIGINAL */ +#endif /* WOLFSSL_MLKEM_KYBER */ #ifndef WOLFSSL_NO_ML_KEM /* Taken from draft-connolly-tls-mlkem-key-agreement, see: * https://github.com/dconnolly/draft-connolly-tls-mlkem-key-agreement/ diff --git a/wolfssl/wolfcrypt/cryptocb.h b/wolfssl/wolfcrypt/cryptocb.h index d2d90fe37..ee68c78f2 100644 --- a/wolfssl/wolfcrypt/cryptocb.h +++ b/wolfssl/wolfcrypt/cryptocb.h @@ -74,12 +74,12 @@ #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) #include #endif -#ifdef WOLFSSL_HAVE_KYBER - #include -#ifdef WOLFSSL_WC_KYBER - #include +#ifdef WOLFSSL_HAVE_MLKEM + #include +#ifdef WOLFSSL_WC_MLKEM + #include #elif defined(HAVE_LIBOQS) - #include + #include #endif #endif #if defined(HAVE_DILITHIUM) @@ -254,7 +254,7 @@ typedef struct wc_CryptoInfo { byte contextLen; } ed25519verify; #endif - #if defined(WOLFSSL_HAVE_KYBER) + #if defined(WOLFSSL_HAVE_MLKEM) struct { WC_RNG* rng; int size; @@ -544,7 +544,7 @@ WOLFSSL_LOCAL int wc_CryptoCb_Ed25519Verify(const byte* sig, word32 sigLen, const byte* context, byte contextLen); #endif /* HAVE_ED25519 */ -#if defined(WOLFSSL_HAVE_KYBER) +#if defined(WOLFSSL_HAVE_MLKEM) WOLFSSL_LOCAL int wc_CryptoCb_PqcKemGetDevId(int type, void* key); WOLFSSL_LOCAL int wc_CryptoCb_MakePqcKemKey(WC_RNG* rng, int type, @@ -557,7 +557,7 @@ WOLFSSL_LOCAL int wc_CryptoCb_PqcEncapsulate(byte* ciphertext, WOLFSSL_LOCAL int wc_CryptoCb_PqcDecapsulate(const byte* ciphertext, word32 ciphertextLen, byte* sharedSecret, word32 sharedSecretLen, int type, void* key); -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ #if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) WOLFSSL_LOCAL int wc_CryptoCb_PqcSigGetDevId(int type, void* key); diff --git a/wolfssl/wolfcrypt/ext_kyber.h b/wolfssl/wolfcrypt/ext_mlkem.h similarity index 89% rename from wolfssl/wolfcrypt/ext_kyber.h rename to wolfssl/wolfcrypt/ext_mlkem.h index 0fe421f56..53c6c7d8c 100644 --- a/wolfssl/wolfcrypt/ext_kyber.h +++ b/wolfssl/wolfcrypt/ext_mlkem.h @@ -1,4 +1,4 @@ -/* ext_kyber.h +/* ext_mlkem.h * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -26,14 +26,14 @@ #include #endif -#ifdef WOLFSSL_HAVE_KYBER -#include +#ifdef WOLFSSL_HAVE_MLKEM +#include #if !defined(HAVE_LIBOQS) #error "This code requires liboqs" #endif -#if defined(WOLFSSL_WC_KYBER) +#if defined(WOLFSSL_WC_MLKEM) #error "This code is incompatible with wolfCrypt's implementation of Kyber." #endif @@ -43,7 +43,7 @@ #ifndef WOLFSSL_NO_ML_KEM #define EXT_KYBER_MAX_PRIV_SZ OQS_KEM_ml_kem_1024_length_secret_key #define EXT_KYBER_MAX_PUB_SZ OQS_KEM_ml_kem_1024_length_public_key - #elif defined(WOLFSSL_KYBER_ORIGINAL) + #elif defined(WOLFSSL_MLKEM_KYBER) #define EXT_KYBER_MAX_PRIV_SZ OQS_KEM_kyber_1024_length_secret_key #define EXT_KYBER_MAX_PUB_SZ OQS_KEM_kyber_1024_length_public_key #endif @@ -68,7 +68,7 @@ struct KyberKey { }; #if defined (HAVE_LIBOQS) -WOLFSSL_LOCAL int ext_kyber_enabled(int id); +WOLFSSL_LOCAL int ext_mlkem_enabled(int id); #endif -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ #endif /* EXT_KYBER_H */ diff --git a/wolfssl/wolfcrypt/include.am b/wolfssl/wolfcrypt/include.am index f765ffa4d..15244ce32 100644 --- a/wolfssl/wolfcrypt/include.am +++ b/wolfssl/wolfcrypt/include.am @@ -74,9 +74,9 @@ nobase_include_HEADERS+= \ wolfssl/wolfcrypt/siphash.h \ wolfssl/wolfcrypt/cpuid.h \ wolfssl/wolfcrypt/cryptocb.h \ - wolfssl/wolfcrypt/kyber.h \ - wolfssl/wolfcrypt/wc_kyber.h \ - wolfssl/wolfcrypt/ext_kyber.h \ + wolfssl/wolfcrypt/mlkem.h \ + wolfssl/wolfcrypt/wc_mlkem.h \ + wolfssl/wolfcrypt/ext_mlkem.h \ wolfssl/wolfcrypt/sm2.h \ wolfssl/wolfcrypt/sm3.h \ wolfssl/wolfcrypt/sm4.h \ diff --git a/wolfssl/wolfcrypt/kyber.h b/wolfssl/wolfcrypt/mlkem.h similarity index 64% rename from wolfssl/wolfcrypt/kyber.h rename to wolfssl/wolfcrypt/mlkem.h index 5eb5f1d47..4a922a1cf 100644 --- a/wolfssl/wolfcrypt/kyber.h +++ b/wolfssl/wolfcrypt/mlkem.h @@ -1,4 +1,4 @@ -/* kyber.h +/* mlkem.h * * Copyright (C) 2006-2025 wolfSSL Inc. * @@ -20,28 +20,50 @@ */ /*! - \file wolfssl/wolfcrypt/kyber.h + \file wolfssl/wolfcrypt/mlkem.h */ -#ifndef WOLF_CRYPT_KYBER_H -#define WOLF_CRYPT_KYBER_H +#ifndef WOLF_CRYPT_MLKEM_H +#define WOLF_CRYPT_MLKEM_H #include #include -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM + +/* Number of co-efficients in polynomial. */ +#define MLKEM_N 256 /* Define algorithm type when not excluded. */ +#ifndef WOLFSSL_NO_ML_KEM + #if !defined(WOLFSSL_NO_ML_KEM_512) + #define WOLFSSL_WC_ML_KEM_512 + #endif + #if !defined(WOLFSSL_NO_ML_KEM_768) + #define WOLFSSL_WC_ML_KEM_768 + #endif + #if !defined(WOLFSSL_NO_ML_KEM_1024) + #define WOLFSSL_WC_ML_KEM_1024 + #endif -#ifdef WOLFSSL_KYBER_ORIGINAL + #if !defined(WOLFSSL_WC_ML_KEM_512) && !defined(WOLFSSL_WC_ML_KEM_768) && \ + !defined(WOLFSSL_WC_ML_KEM_1024) + #error "No ML-KEM key size chosen." + #endif +#endif + +#ifdef WOLFSSL_MLKEM_KYBER #ifndef WOLFSSL_NO_KYBER512 #define WOLFSSL_KYBER512 + #define WOLFSSL_WC_ML_KEM_512 #endif #ifndef WOLFSSL_NO_KYBER768 #define WOLFSSL_KYBER768 + #define WOLFSSL_WC_ML_KEM_768 #endif #ifndef WOLFSSL_NO_KYBER1024 #define WOLFSSL_KYBER1024 + #define WOLFSSL_WC_ML_KEM_1024 #endif #if !defined(WOLFSSL_KYBER512) && !defined(WOLFSSL_KYBER768) && \ @@ -50,10 +72,105 @@ #endif #endif +/* Size of a polynomial vector based on dimensions. */ +#define MLKEM_POLY_VEC_SZ(k) ((k) * WC_ML_KEM_POLY_SIZE) +/* Size of a compressed polynomial based on bits per coefficient. */ +#define MLKEM_POLY_COMPRESSED_SZ(b) ((b) * (MLKEM_N / 8)) +/* Size of a compressed vector polynomial based on dimensions and bits per + * coefficient. */ +#define MLKEM_POLY_VEC_COMPRESSED_SZ(k, b) ((k) * ((b) * (MLKEM_N / 8))) -/* Number of co-efficients in polynomial. */ -#define KYBER_N 256 +#ifdef WOLFSSL_WC_ML_KEM_512 +#define WC_ML_KEM_512_K 2 +/* Size of a polynomial vector. */ +#define WC_ML_KEM_512_POLY_VEC_SZ MLKEM_POLY_VEC_SZ(WC_ML_KEM_512_K) +/* Size of a compressed polynomial based on bits per coefficient. */ +#define WC_ML_KEM_512_POLY_COMPRESSED_SZ MLKEM_POLY_COMPRESSED_SZ(4) +/* Size of a compressed vector polynomial based on dimensions and bits per + * coefficient. */ +#define WC_ML_KEM_512_POLY_VEC_COMPRESSED_SZ \ + MLKEM_POLY_VEC_COMPRESSED_SZ(WC_ML_KEM_512_K, 10) +/* Public key size. */ +#define WC_ML_KEM_512_PUBLIC_KEY_SIZE \ + (WC_ML_KEM_512_POLY_VEC_SZ + WC_ML_KEM_SYM_SZ) +/* Private key size. */ +#define WC_ML_KEM_512_PRIVATE_KEY_SIZE \ + (WC_ML_KEM_512_POLY_VEC_SZ + WC_ML_KEM_512_PUBLIC_KEY_SIZE + \ + 2 * WC_ML_KEM_SYM_SZ) +/* Cipher text size. */ +#define WC_ML_KEM_512_CIPHER_TEXT_SIZE \ + (WC_ML_KEM_512_POLY_VEC_COMPRESSED_SZ + WC_ML_KEM_512_POLY_COMPRESSED_SZ) +#endif + +#ifdef WOLFSSL_WC_ML_KEM_768 +#define WC_ML_KEM_768_K 3 + +/* Size of a polynomial vector. */ +#define WC_ML_KEM_768_POLY_VEC_SZ MLKEM_POLY_VEC_SZ(WC_ML_KEM_768_K) +/* Size of a compressed polynomial based on bits per coefficient. */ +#define WC_ML_KEM_768_POLY_COMPRESSED_SZ MLKEM_POLY_COMPRESSED_SZ(4) +/* Size of a compressed vector polynomial based on dimensions and bits per + * coefficient. */ +#define WC_ML_KEM_768_POLY_VEC_COMPRESSED_SZ \ + MLKEM_POLY_VEC_COMPRESSED_SZ(WC_ML_KEM_768_K, 10) + +/* Public key size. */ +#define WC_ML_KEM_768_PUBLIC_KEY_SIZE \ + (WC_ML_KEM_768_POLY_VEC_SZ + WC_ML_KEM_SYM_SZ) +/* Private key size. */ +#define WC_ML_KEM_768_PRIVATE_KEY_SIZE \ + (WC_ML_KEM_768_POLY_VEC_SZ + WC_ML_KEM_768_PUBLIC_KEY_SIZE + \ + 2 * WC_ML_KEM_SYM_SZ) +/* Cipher text size. */ +#define WC_ML_KEM_768_CIPHER_TEXT_SIZE \ + (WC_ML_KEM_768_POLY_VEC_COMPRESSED_SZ + WC_ML_KEM_768_POLY_COMPRESSED_SZ) +#endif + +#ifdef WOLFSSL_WC_ML_KEM_1024 +#define WC_ML_KEM_1024_K 4 + +/* Size of a polynomial vector. */ +#define WC_ML_KEM_1024_POLY_VEC_SZ MLKEM_POLY_VEC_SZ(WC_ML_KEM_1024_K) +/* Size of a compressed polynomial based on bits per coefficient. */ +#define WC_ML_KEM_1024_POLY_COMPRESSED_SZ MLKEM_POLY_COMPRESSED_SZ(5) +/* Size of a compressed vector polynomial based on dimensions and bits per + * coefficient. */ +#define WC_ML_KEM_1024_POLY_VEC_COMPRESSED_SZ \ + MLKEM_POLY_VEC_COMPRESSED_SZ(WC_ML_KEM_1024_K, 11) + +/* Public key size. */ +#define WC_ML_KEM_1024_PUBLIC_KEY_SIZE \ + (WC_ML_KEM_1024_POLY_VEC_SZ + WC_ML_KEM_SYM_SZ) +/* Private key size. */ +#define WC_ML_KEM_1024_PRIVATE_KEY_SIZE \ + (WC_ML_KEM_1024_POLY_VEC_SZ + WC_ML_KEM_1024_PUBLIC_KEY_SIZE + \ + 2 * WC_ML_KEM_SYM_SZ) +/* Cipher text size. */ +#define WC_ML_KEM_1024_CIPHER_TEXT_SIZE \ + (WC_ML_KEM_1024_POLY_VEC_COMPRESSED_SZ + WC_ML_KEM_1024_POLY_COMPRESSED_SZ) +#endif + +#ifndef WC_ML_KEM_MAX_K +#ifdef WOLFSSL_WC_ML_KEM_1024 +#define WC_ML_KEM_MAX_K WC_ML_KEM_1024_K +#define WC_ML_KEM_MAX_PRIVATE_KEY_SIZE WC_ML_KEM_1024_PRIVATE_KEY_SIZE +#define WC_ML_KEM_MAX_PUBLIC_KEY_SIZE WC_ML_KEM_1024_PUBLIC_KEY_SIZE +#define WC_ML_KEM_MAX_CIPHER_TEXT_SIZE WC_ML_KEM_1024_CIPHER_TEXT_SIZE +#elif defined(WOLFSSL_WC_ML_KEM_768) +#define WC_ML_KEM_MAX_K WC_ML_KEM_768_K +#define WC_ML_KEM_MAX_PRIVATE_KEY_SIZE WC_ML_KEM_768_PRIVATE_KEY_SIZE +#define WC_ML_KEM_MAX_PUBLIC_KEY_SIZE WC_ML_KEM_768_PUBLIC_KEY_SIZE +#define WC_ML_KEM_MAX_CIPHER_TEXT_SIZE WC_ML_KEM_768_CIPHER_TEXT_SIZE +#elif defined(WOLFSSL_WC_ML_KEM_512) +#define WC_ML_KEM_MAX_K WC_ML_KEM_512_K +#define WC_ML_KEM_MAX_PRIVATE_KEY_SIZE WC_ML_KEM_512_PRIVATE_KEY_SIZE +#define WC_ML_KEM_MAX_PUBLIC_KEY_SIZE WC_ML_KEM_512_PUBLIC_KEY_SIZE +#define WC_ML_KEM_MAX_CIPHER_TEXT_SIZE WC_ML_KEM_512_CIPHER_TEXT_SIZE +#endif +#endif /* WC_ML_KEM_MAX_K */ + +#define KYBER_N MLKEM_N /* Size of a polynomial vector based on dimensions. */ #define KYBER_POLY_VEC_SZ(k) ((k) * KYBER_POLY_SIZE) @@ -152,218 +269,106 @@ #define KYBER_MAX_CIPHER_TEXT_SIZE KYBER512_CIPHER_TEXT_SIZE #endif +#define KYBER_SYM_SZ WC_ML_KEM_SYM_SZ +#define KYBER_SS_SZ WC_ML_KEM_SS_SZ +#define KYBER_MAKEKEY_RAND_SZ WC_ML_KEM_MAKEKEY_RAND_SZ +#define KYBER_ENC_RAND_SZ WC_ML_KEM_ENC_RAND_SZ +#define KYBER_POLY_SIZE WC_ML_KEM_POLY_SIZE + + enum { /* Types of Kyber keys. */ WC_ML_KEM_512 = 0, WC_ML_KEM_768 = 1, WC_ML_KEM_1024 = 2, - KYBER_ORIGINAL = 0x10, - KYBER512 = 0 | KYBER_ORIGINAL, - KYBER768 = 1 | KYBER_ORIGINAL, - KYBER1024 = 2 | KYBER_ORIGINAL, + MLKEM_KYBER = 0x10, + KYBER512 = 0 | MLKEM_KYBER, + KYBER768 = 1 | MLKEM_KYBER, + KYBER1024 = 2 | MLKEM_KYBER, KYBER_LEVEL1 = KYBER512, KYBER_LEVEL3 = KYBER768, KYBER_LEVEL5 = KYBER1024, /* Symmetric data size. */ - KYBER_SYM_SZ = 32, + WC_ML_KEM_SYM_SZ = 32, /* Shared secret size. */ - KYBER_SS_SZ = 32, + WC_ML_KEM_SS_SZ = 32, /* Size of random required for making a key. */ - KYBER_MAKEKEY_RAND_SZ = 2 * KYBER_SYM_SZ, + WC_ML_KEM_MAKEKEY_RAND_SZ = 2 * WC_ML_KEM_SYM_SZ, /* Size of random required for encapsulation. */ - KYBER_ENC_RAND_SZ = KYBER_SYM_SZ, + WC_ML_KEM_ENC_RAND_SZ = WC_ML_KEM_SYM_SZ, /* Encoded polynomial size. */ - KYBER_POLY_SIZE = 384, + WC_ML_KEM_POLY_SIZE = 384, }; /* Different structures for different implementations. */ -typedef struct KyberKey KyberKey; +typedef struct MlKemKey MlKemKey; #ifdef __cplusplus extern "C" { #endif -WOLFSSL_API int wc_KyberKey_Init(int type, KyberKey* key, void* heap, +WOLFSSL_API int wc_MlKemKey_Init(MlKemKey* key, int type, void* heap, int devId); -WOLFSSL_API void wc_KyberKey_Free(KyberKey* key); +WOLFSSL_API int wc_MlKemKey_Free(MlKemKey* key); -WOLFSSL_API int wc_KyberKey_MakeKey(KyberKey* key, WC_RNG* rng); -WOLFSSL_API int wc_KyberKey_MakeKeyWithRandom(KyberKey* key, +WOLFSSL_API int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng); +WOLFSSL_API int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, const unsigned char* rand, int len); -WOLFSSL_API int wc_KyberKey_CipherTextSize(KyberKey* key, word32* len); -WOLFSSL_API int wc_KyberKey_SharedSecretSize(KyberKey* key, word32* len); +WOLFSSL_API int wc_MlKemKey_CipherTextSize(MlKemKey* key, word32* len); +WOLFSSL_API int wc_MlKemKey_SharedSecretSize(MlKemKey* key, word32* len); -WOLFSSL_API int wc_KyberKey_Encapsulate(KyberKey* key, unsigned char* ct, +WOLFSSL_API int wc_MlKemKey_Encapsulate(MlKemKey* key, unsigned char* ct, unsigned char* ss, WC_RNG* rng); -WOLFSSL_API int wc_KyberKey_EncapsulateWithRandom(KyberKey* key, +WOLFSSL_API int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, unsigned char* ct, unsigned char* ss, const unsigned char* rand, int len); -WOLFSSL_API int wc_KyberKey_Decapsulate(KyberKey* key, unsigned char* ss, +WOLFSSL_API int wc_MlKemKey_Decapsulate(MlKemKey* key, unsigned char* ss, const unsigned char* ct, word32 len); -WOLFSSL_API int wc_KyberKey_DecodePrivateKey(KyberKey* key, +WOLFSSL_API int wc_MlKemKey_DecodePrivateKey(MlKemKey* key, const unsigned char* in, word32 len); -WOLFSSL_API int wc_KyberKey_DecodePublicKey(KyberKey* key, +WOLFSSL_API int wc_MlKemKey_DecodePublicKey(MlKemKey* key, const unsigned char* in, word32 len); -WOLFSSL_API int wc_KyberKey_PrivateKeySize(KyberKey* key, word32* len); -WOLFSSL_API int wc_KyberKey_PublicKeySize(KyberKey* key, word32* len); -WOLFSSL_API int wc_KyberKey_EncodePrivateKey(KyberKey* key, unsigned char* out, +WOLFSSL_API int wc_MlKemKey_PrivateKeySize(MlKemKey* key, word32* len); +WOLFSSL_API int wc_MlKemKey_PublicKeySize(MlKemKey* key, word32* len); +WOLFSSL_API int wc_MlKemKey_EncodePrivateKey(MlKemKey* key, unsigned char* out, word32 len); -WOLFSSL_API int wc_KyberKey_EncodePublicKey(KyberKey* key, unsigned char* out, +WOLFSSL_API int wc_MlKemKey_EncodePublicKey(MlKemKey* key, unsigned char* out, word32 len); +#define KyberKey MlKemKey -#ifndef WOLFSSL_NO_ML_KEM - #if !defined(WOLFSSL_NO_ML_KEM_512) - #define WOLFSSL_WC_ML_KEM_512 - #endif - #if !defined(WOLFSSL_NO_ML_KEM_768) - #define WOLFSSL_WC_ML_KEM_768 - #endif - #if !defined(WOLFSSL_NO_ML_KEM_1024) - #define WOLFSSL_WC_ML_KEM_1024 - #endif - - #if !defined(WOLFSSL_WC_ML_KEM_512) && !defined(WOLFSSL_WC_ML_KEM_768) && \ - !defined(WOLFSSL_WC_ML_KEM_1024) - #error "No ML-KEM key size chosen." - #endif -#endif - -#ifdef WOLFSSL_WC_ML_KEM_512 -#define WC_ML_KEM_512_K 2 -/* Size of a polynomial vector. */ -#define WC_ML_KEM_512_POLY_VEC_SZ KYBER_POLY_VEC_SZ(WC_ML_KEM_512_K) -/* Size of a compressed polynomial based on bits per coefficient. */ -#define WC_ML_KEM_512_POLY_COMPRESSED_SZ KYBER_POLY_COMPRESSED_SZ(4) -/* Size of a compressed vector polynomial based on dimensions and bits per - * coefficient. */ -#define WC_ML_KEM_512_POLY_VEC_COMPRESSED_SZ \ - KYBER_POLY_VEC_COMPRESSED_SZ(WC_ML_KEM_512_K, 10) - -/* Public key size. */ -#define WC_ML_KEM_512_PUBLIC_KEY_SIZE \ - (WC_ML_KEM_512_POLY_VEC_SZ + KYBER_SYM_SZ) -/* Private key size. */ -#define WC_ML_KEM_512_PRIVATE_KEY_SIZE \ - (WC_ML_KEM_512_POLY_VEC_SZ + WC_ML_KEM_512_PUBLIC_KEY_SIZE + \ - 2 * KYBER_SYM_SZ) -/* Cipher text size. */ -#define WC_ML_KEM_512_CIPHER_TEXT_SIZE \ - (WC_ML_KEM_512_POLY_VEC_COMPRESSED_SZ + WC_ML_KEM_512_POLY_COMPRESSED_SZ) -#endif - -#ifdef WOLFSSL_WC_ML_KEM_768 -#define WC_ML_KEM_768_K 3 - -/* Size of a polynomial vector. */ -#define WC_ML_KEM_768_POLY_VEC_SZ KYBER_POLY_VEC_SZ(WC_ML_KEM_768_K) -/* Size of a compressed polynomial based on bits per coefficient. */ -#define WC_ML_KEM_768_POLY_COMPRESSED_SZ KYBER_POLY_COMPRESSED_SZ(4) -/* Size of a compressed vector polynomial based on dimensions and bits per - * coefficient. */ -#define WC_ML_KEM_768_POLY_VEC_COMPRESSED_SZ \ - KYBER_POLY_VEC_COMPRESSED_SZ(WC_ML_KEM_768_K, 10) - -/* Public key size. */ -#define WC_ML_KEM_768_PUBLIC_KEY_SIZE \ - (WC_ML_KEM_768_POLY_VEC_SZ + KYBER_SYM_SZ) -/* Private key size. */ -#define WC_ML_KEM_768_PRIVATE_KEY_SIZE \ - (WC_ML_KEM_768_POLY_VEC_SZ + WC_ML_KEM_768_PUBLIC_KEY_SIZE + \ - 2 * KYBER_SYM_SZ) -/* Cipher text size. */ -#define WC_ML_KEM_768_CIPHER_TEXT_SIZE \ - (WC_ML_KEM_768_POLY_VEC_COMPRESSED_SZ + WC_ML_KEM_768_POLY_COMPRESSED_SZ) -#endif - -#ifdef WOLFSSL_WC_ML_KEM_1024 -#define WC_ML_KEM_1024_K 4 - -/* Size of a polynomial vector. */ -#define WC_ML_KEM_1024_POLY_VEC_SZ KYBER_POLY_VEC_SZ(WC_ML_KEM_1024_K) -/* Size of a compressed polynomial based on bits per coefficient. */ -#define WC_ML_KEM_1024_POLY_COMPRESSED_SZ KYBER_POLY_COMPRESSED_SZ(5) -/* Size of a compressed vector polynomial based on dimensions and bits per - * coefficient. */ -#define WC_ML_KEM_1024_POLY_VEC_COMPRESSED_SZ \ - KYBER_POLY_VEC_COMPRESSED_SZ(WC_ML_KEM_1024_K, 11) - -/* Public key size. */ -#define WC_ML_KEM_1024_PUBLIC_KEY_SIZE \ - (WC_ML_KEM_1024_POLY_VEC_SZ + KYBER_SYM_SZ) -/* Private key size. */ -#define WC_ML_KEM_1024_PRIVATE_KEY_SIZE \ - (WC_ML_KEM_1024_POLY_VEC_SZ + WC_ML_KEM_1024_PUBLIC_KEY_SIZE + \ - 2 * KYBER_SYM_SZ) -/* Cipher text size. */ -#define WC_ML_KEM_1024_CIPHER_TEXT_SIZE \ - (WC_ML_KEM_1024_POLY_VEC_COMPRESSED_SZ + WC_ML_KEM_1024_POLY_COMPRESSED_SZ) -#endif - -#ifndef KYBER_MAX_K -#ifdef WOLFSSL_WC_ML_KEM_1024 -#define KYBER_MAX_K WC_ML_KEM_1024_K -#define KYBER_MAX_PRIVATE_KEY_SIZE WC_ML_KEM_1024_PRIVATE_KEY_SIZE -#define KYBER_MAX_PUBLIC_KEY_SIZE WC_ML_KEM_1024_PUBLIC_KEY_SIZE -#define KYBER_MAX_CIPHER_TEXT_SIZE WC_ML_KEM_1024_CIPHER_TEXT_SIZE -#elif defined(WOLFSSL_WC_ML_KEM_768) -#define KYBER_MAX_K WC_ML_KEM_768_K -#define KYBER_MAX_PRIVATE_KEY_SIZE WC_ML_KEM_768_PRIVATE_KEY_SIZE -#define KYBER_MAX_PUBLIC_KEY_SIZE WC_ML_KEM_768_PUBLIC_KEY_SIZE -#define KYBER_MAX_CIPHER_TEXT_SIZE WC_ML_KEM_768_CIPHER_TEXT_SIZE -#elif defined(WOLFSSL_WC_ML_KEM_512) -#define KYBER_MAX_K WC_ML_KEM_512_K -#define KYBER_MAX_PRIVATE_KEY_SIZE WC_ML_KEM_512_PRIVATE_KEY_SIZE -#define KYBER_MAX_PUBLIC_KEY_SIZE WC_ML_KEM_512_PUBLIC_KEY_SIZE -#define KYBER_MAX_CIPHER_TEXT_SIZE WC_ML_KEM_512_CIPHER_TEXT_SIZE -#endif -#endif /* KYBER_MAX_K */ - -#define WC_ML_KEM_MAX_K KYBER_MAX_K -#define WC_ML_KEM_MAX_PRIVATE_KEY_SIZE KYBER_MAX_PRIVATE_KEY_SIZE -#define WC_ML_KEM_MAX_PUBLIC_KEY_SIZE KYBER_MAX_PUBLIC_KEY_SIZE -#define WC_ML_KEM_MAX_CIPHER_TEXT_SIZE KYBER_MAX_CIPHER_TEXT_SIZE - -#define WC_ML_KEM_SYM_SZ KYBER_SYM_SZ -#define WC_ML_KEM_SS_SZ KYBER_SS_SZ -#define WC_ML_KEM_MAKEKEY_RAND_SZ KYBER_MAKEKEY_RAND_SZ -#define WC_ML_KEM_ENC_RAND_SZ KYBER_ENC_RAND_SZ -#define WC_ML_KEM_POLY_SIZE KYBER_POLY_SIZE - -#define MlKemKey KyberKey - -#define wc_MlKemKey_Init(key, type, heap, devId) \ - wc_KyberKey_Init(type, key, heap, devId) -#define wc_MlKemKey_Free wc_KyberKey_Free -#define wc_MlKemKey_MakeKey wc_KyberKey_MakeKey -#define wc_MlKemKey_MakeKeyWithRandom wc_KyberKey_MakeKeyWithRandom -#define wc_MlKemKey_CipherTextSize wc_KyberKey_CipherTextSize -#define wc_MlKemKey_SharedSecretSize wc_KyberKey_SharedSecretSize -#define wc_MlKemKey_Encapsulate wc_KyberKey_Encapsulate -#define wc_MlKemKey_EncapsulateWithRandom wc_KyberKey_EncapsulateWithRandom -#define wc_MlKemKey_Decapsulate wc_KyberKey_Decapsulate -#define wc_MlKemKey_DecodePrivateKey wc_KyberKey_DecodePrivateKey -#define wc_MlKemKey_DecodePublicKey wc_KyberKey_DecodePublicKey -#define wc_MlKemKey_PrivateKeySize wc_KyberKey_PrivateKeySize -#define wc_MlKemKey_PublicKeySize wc_KyberKey_PublicKeySize -#define wc_MlKemKey_EncodePrivateKey wc_KyberKey_EncodePrivateKey -#define wc_MlKemKey_EncodePublicKey wc_KyberKey_EncodePublicKey +#define wc_KyberKey_Init(type, key, heap, devId) \ + wc_MlKemKey_Init(key, type, heap, devId) +#define wc_KyberKey_Free wc_MlKemKey_Free +#define wc_KyberKey_MakeKey wc_MlKemKey_MakeKey +#define wc_KyberKey_MakeKeyWithRandom wc_MlKemKey_MakeKeyWithRandom +#define wc_KyberKey_CipherTextSize wc_MlKemKey_CipherTextSize +#define wc_KyberKey_SharedSecretSize wc_MlKemKey_SharedSecretSize +#define wc_KyberKey_Encapsulate wc_MlKemKey_Encapsulate +#define wc_KyberKey_EncapsulateWithRandom wc_MlKemKey_EncapsulateWithRandom +#define wc_KyberKey_Decapsulate wc_MlKemKey_Decapsulate +#define wc_KyberKey_DecodePrivateKey wc_MlKemKey_DecodePrivateKey +#define wc_KyberKey_DecodePublicKey wc_MlKemKey_DecodePublicKey +#define wc_KyberKey_PrivateKeySize wc_MlKemKey_PrivateKeySize +#define wc_KyberKey_PublicKeySize wc_MlKemKey_PublicKeySize +#define wc_KyberKey_EncodePrivateKey wc_MlKemKey_EncodePrivateKey +#define wc_KyberKey_EncodePublicKey wc_MlKemKey_EncodePublicKey #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ -#endif /* WOLF_CRYPT_KYBER_H */ +#endif /* WOLF_CRYPT_MLKEM_H */ diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index 2688960d1..37f2e3455 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -841,17 +841,19 @@ #endif /* ESP_ENABLE_WOLFSSH */ - /* Experimental Kyber. */ + /* ML-KEM. */ #ifdef CONFIG_ESP_WOLFSSL_ENABLE_KYBER + #define CONFIG_ESP_WOLFSSL_ENABLE_MLKEM + #endif + #ifdef CONFIG_ESP_WOLFSSL_ENABLE_MLKEM /* Kyber typically needs a minimum 10K stack */ - #define WOLFSSL_EXPERIMENTAL_SETTINGS - #define WOLFSSL_HAVE_KYBER - #define WOLFSSL_WC_KYBER + #define WOLFSSL_HAVE_MLKEM + #define WOLFSSL_WC_MLKEM #define WOLFSSL_SHA3 #if defined(CONFIG_IDF_TARGET_ESP8266) /* With limited RAM, we'll disable some of the Kyber sizes: */ - #define WOLFSSL_NO_KYBER1024 - #define WOLFSSL_NO_KYBER768 + #define WOLFSSL_NO_ML_KEM_1024 + #define WOLFSSL_NO_ML_KEM_768 #define NO_SESSION_CACHE #endif #endif @@ -4096,7 +4098,7 @@ extern void uITRON4_free(void *p) ; #endif #endif -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM #define HAVE_PQC #endif @@ -4111,11 +4113,14 @@ extern void uITRON4_free(void *p) ; #ifndef WOLFSSL_NO_SPHINCS #define HAVE_SPHINCS #endif -#ifndef WOLFSSL_HAVE_KYBER - #define WOLFSSL_HAVE_KYBER +#ifndef WOLFSSL_HAVE_MLKEM + #define WOLFSSL_HAVE_MLKEM #define WOLFSSL_KYBER512 #define WOLFSSL_KYBER768 #define WOLFSSL_KYBER1024 + #define WOLFSSL_WC_ML_KEM_512 + #define WOLFSSL_WC_ML_KEM_768 + #define WOLFSSL_WC_ML_KEM_1024 #endif #endif @@ -4128,7 +4133,7 @@ extern void uITRON4_free(void *p) ; #error Experimental settings without WOLFSSL_EXPERIMENTAL_SETTINGS #endif -#if defined(HAVE_PQC) && !defined(HAVE_LIBOQS) && !defined(WOLFSSL_HAVE_KYBER) +#if defined(HAVE_PQC) && !defined(HAVE_LIBOQS) && !defined(WOLFSSL_HAVE_MLKEM) #error Please do not define HAVE_PQC yourself. #endif diff --git a/wolfssl/wolfcrypt/sha3.h b/wolfssl/wolfcrypt/sha3.h index 26b2ad727..724719a66 100644 --- a/wolfssl/wolfcrypt/sha3.h +++ b/wolfssl/wolfcrypt/sha3.h @@ -225,6 +225,7 @@ WOLFSSL_LOCAL void sha3_block_n_bmi2(word64* s, const byte* data, word32 n, word64 c); WOLFSSL_LOCAL void sha3_block_bmi2(word64* s); WOLFSSL_LOCAL void sha3_block_avx2(word64* s); +WOLFSSL_LOCAL void sha3_blocksx4_avx2(word64* s); WOLFSSL_LOCAL void BlockSha3(word64 *s); #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h index 97b269957..23e424b25 100644 --- a/wolfssl/wolfcrypt/types.h +++ b/wolfssl/wolfcrypt/types.h @@ -1296,7 +1296,7 @@ typedef struct w64wrapper { WC_PK_TYPE_CURVE25519_KEYGEN = 16, WC_PK_TYPE_RSA_GET_SIZE = 17, #define _WC_PK_TYPE_MAX WC_PK_TYPE_RSA_GET_SIZE - #if defined(WOLFSSL_HAVE_KYBER) + #if defined(WOLFSSL_HAVE_MLKEM) WC_PK_TYPE_PQC_KEM_KEYGEN = 18, WC_PK_TYPE_PQC_KEM_ENCAPS = 19, WC_PK_TYPE_PQC_KEM_DECAPS = 20, @@ -1317,12 +1317,12 @@ typedef struct w64wrapper { WC_PK_TYPE_MAX = _WC_PK_TYPE_MAX }; -#if defined(WOLFSSL_HAVE_KYBER) +#if defined(WOLFSSL_HAVE_MLKEM) /* Post quantum KEM algorithms */ enum wc_PqcKemType { WC_PQC_KEM_TYPE_NONE = 0, #define _WC_PQC_KEM_TYPE_MAX WC_PQC_KEM_TYPE_NONE - #if defined(WOLFSSL_HAVE_KYBER) + #if defined(WOLFSSL_HAVE_MLKEM) WC_PQC_KEM_TYPE_KYBER = 1, #undef _WC_PQC_KEM_TYPE_MAX #define _WC_PQC_KEM_TYPE_MAX WC_PQC_KEM_TYPE_KYBER diff --git a/wolfssl/wolfcrypt/wc_kyber.h b/wolfssl/wolfcrypt/wc_kyber.h deleted file mode 100644 index 96d8fa23c..000000000 --- a/wolfssl/wolfcrypt/wc_kyber.h +++ /dev/null @@ -1,413 +0,0 @@ -/* wc_mlkem.h - * - * Copyright (C) 2006-2025 wolfSSL Inc. - * - * This file is part of wolfSSL. - * - * wolfSSL is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * wolfSSL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA - */ - -/*! - \file wolfssl/wolfcrypt/wc_mlkem.h -*/ - - -#ifndef WOLF_CRYPT_WC_MLKEM_H -#define WOLF_CRYPT_WC_MLKEM_H - -#include -#include -#include -#include - -#ifdef WOLFSSL_HAVE_KYBER - -#ifdef noinline - #define KYBER_NOINLINE noinline -#elif defined(_MSC_VER) - #define KYBER_NOINLINE __declspec(noinline) -#elif defined(__GNUC__) - #define KYBER_NOINLINE __attribute__((noinline)) -#else - #define KYBER_NOINLINE -#endif - -enum { - /* Flags of Kyber keys. */ - KYBER_FLAG_PRIV_SET = 0x0001, - KYBER_FLAG_PUB_SET = 0x0002, - KYBER_FLAG_BOTH_SET = 0x0003, - KYBER_FLAG_H_SET = 0x0004, - KYBER_FLAG_A_SET = 0x0008, - - /* 2 bits of random used to create noise value. */ - KYBER_CBD_ETA2 = 2, - /* 3 bits of random used to create noise value. */ - KYBER_CBD_ETA3 = 3, - - /* Number of bits to compress to. */ - KYBER_COMP_4BITS = 4, - KYBER_COMP_5BITS = 5, - KYBER_COMP_10BITS = 10, - KYBER_COMP_11BITS = 11, -}; - - -/* SHAKE128 rate. */ -#define XOF_BLOCK_SIZE 168 - -/* Modulus of co-efficients of polynomial. */ -#define KYBER_Q 3329 - - -/* Kyber-512 parameters */ -#ifdef WOLFSSL_KYBER512 -/* Number of bits of random to create noise from. */ -#define KYBER512_ETA1 KYBER_CBD_ETA3 -#endif /* WOLFSSL_KYBER512 */ - -/* Kyber-768 parameters */ -#ifdef WOLFSSL_KYBER768 -/* Number of bits of random to create noise from. */ -#define KYBER768_ETA1 KYBER_CBD_ETA2 -#endif /* WOLFSSL_KYBER768 */ - -/* Kyber-1024 parameters */ -#ifdef WOLFSSL_KYBER1024 -/* Number of bits of random to create noise from. */ -#define KYBER1024_ETA1 KYBER_CBD_ETA2 -#endif /* WOLFSSL_KYBER1024 */ - - - -/* The data type of the hash function. */ -#define KYBER_HASH_T wc_Sha3 - -/* The data type of the pseudo-random function. */ -#define KYBER_PRF_T wc_Shake - -/* ML-KEM key. */ -struct MlKemKey { - /* Type of key: KYBER512, KYBER768, KYBER1024 */ - int type; - /* Dynamic memory allocation hint. */ - void* heap; -#if defined(WOLF_CRYPTO_CB) - /* Device Id. */ - int devId; -#endif - /* Flags indicating what is stored in the key. */ - int flags; - - /* A pseudo-random function object. */ - KYBER_HASH_T hash; - /* A pseudo-random function object. */ - KYBER_PRF_T prf; - - /* Private key as a vector. */ - sword16 priv[KYBER_MAX_K * KYBER_N]; - /* Public key as a vector. */ - sword16 pub[KYBER_MAX_K * KYBER_N]; - /* Public seed. */ - byte pubSeed[KYBER_SYM_SZ]; - /* Public hash - hash of encoded public key. */ - byte h[KYBER_SYM_SZ]; - /* Randomizer for decapsulation. */ - byte z[KYBER_SYM_SZ]; -#ifdef WOLFSSL_MLKEM_CACHE_A - /* A matrix from key generation. */ - sword16 a[KYBER_MAX_K * KYBER_MAX_K * KYBER_N]; -#endif -}; - -#ifdef __cplusplus - extern "C" { -#endif - -/* For backward compatibility */ -typedef struct MlKemKey KyberKey; - -WOLFSSL_LOCAL -void kyber_init(void); - -#ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM -WOLFSSL_LOCAL -void kyber_keygen(sword16* priv, sword16* pub, sword16* e, const sword16* a, - int kp); -#else -WOLFSSL_LOCAL -int kyber_keygen_seeds(sword16* priv, sword16* pub, KYBER_PRF_T* prf, - sword16* e, int kp, byte* seed, byte* noiseSeed); -#endif -#ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM -WOLFSSL_LOCAL -void kyber_encapsulate(const sword16* pub, sword16* bp, sword16* v, - const sword16* at, sword16* sp, const sword16* ep, const sword16* epp, - const sword16* m, int kp); -#else -WOLFSSL_LOCAL -int kyber_encapsulate_seeds(const sword16* pub, KYBER_PRF_T* prf, sword16* bp, - sword16* tp, sword16* sp, int kp, const byte* msg, byte* seed, - byte* coins); -#endif -WOLFSSL_LOCAL -void kyber_decapsulate(const sword16* priv, sword16* mp, sword16* bp, - const sword16* v, int kp); - -WOLFSSL_LOCAL -int kyber_gen_matrix(KYBER_PRF_T* prf, sword16* a, int kp, byte* seed, - int transposed); -WOLFSSL_LOCAL -int kyber_get_noise(KYBER_PRF_T* prf, int kp, sword16* vec1, sword16* vec2, - sword16* poly, byte* seed); - -#if defined(USE_INTEL_SPEEDUP) || \ - (defined(WOLFSSL_ARMASM) && defined(__aarch64__)) -WOLFSSL_LOCAL -int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen); -#endif -WOLFSSL_LOCAL -void kyber_hash_init(KYBER_HASH_T* hash); -WOLFSSL_LOCAL -int kyber_hash_new(KYBER_HASH_T* hash, void* heap, int devId); -WOLFSSL_LOCAL -void kyber_hash_free(KYBER_HASH_T* hash); -WOLFSSL_LOCAL -int kyber_hash256(wc_Sha3* hash, const byte* data, word32 dataLen, byte* out); -WOLFSSL_LOCAL -int kyber_hash512(wc_Sha3* hash, const byte* data1, word32 data1Len, - const byte* data2, word32 data2Len, byte* out); - -WOLFSSL_LOCAL -void kyber_prf_init(KYBER_PRF_T* prf); -WOLFSSL_LOCAL -int kyber_prf_new(KYBER_PRF_T* prf, void* heap, int devId); -WOLFSSL_LOCAL -void kyber_prf_free(KYBER_PRF_T* prf); - -WOLFSSL_LOCAL -int kyber_cmp(const byte* a, const byte* b, int sz); - -WOLFSSL_LOCAL -void kyber_vec_compress_10(byte* r, sword16* v, unsigned int kp); -WOLFSSL_LOCAL -void kyber_vec_compress_11(byte* r, sword16* v); -WOLFSSL_LOCAL -void kyber_vec_decompress_10(sword16* v, const unsigned char* b, - unsigned int kp); -WOLFSSL_LOCAL -void kyber_vec_decompress_11(sword16* v, const unsigned char* b); - -WOLFSSL_LOCAL -void kyber_compress_4(byte* b, sword16* p); -WOLFSSL_LOCAL -void kyber_compress_5(byte* b, sword16* p); -WOLFSSL_LOCAL -void kyber_decompress_4(sword16* p, const unsigned char* b); -WOLFSSL_LOCAL -void kyber_decompress_5(sword16* p, const unsigned char* b); - -WOLFSSL_LOCAL -void kyber_from_msg(sword16* p, const byte* msg); -WOLFSSL_LOCAL -void kyber_to_msg(byte* msg, sword16* p); -WOLFSSL_LOCAL -void kyber_from_bytes(sword16* p, const byte* b, int k); -WOLFSSL_LOCAL -void kyber_to_bytes(byte* b, sword16* p, int k); - -#ifdef USE_INTEL_SPEEDUP -WOLFSSL_LOCAL -void kyber_keygen_avx2(sword16* priv, sword16* pub, sword16* e, - const sword16* a, int kp); -WOLFSSL_LOCAL -void kyber_encapsulate_avx2(const sword16* pub, sword16* bp, sword16* v, - const sword16* at, sword16* sp, const sword16* ep, const sword16* epp, - const sword16* m, int kp); -WOLFSSL_LOCAL -void kyber_decapsulate_avx2(const sword16* priv, sword16* mp, sword16* bp, - const sword16* v, int kp); - -WOLFSSL_LOCAL -unsigned int kyber_rej_uniform_n_avx2(sword16* p, unsigned int len, - const byte* r, unsigned int rLen); -WOLFSSL_LOCAL -unsigned int kyber_rej_uniform_avx2(sword16* p, unsigned int len, const byte* r, - unsigned int rLen); -WOLFSSL_LOCAL -void kyber_redistribute_21_rand_avx2(const word64* s, byte* r0, byte* r1, - byte* r2, byte* r3); -void kyber_redistribute_17_rand_avx2(const word64* s, byte* r0, byte* r1, - byte* r2, byte* r3); -void kyber_redistribute_16_rand_avx2(const word64* s, byte* r0, byte* r1, - byte* r2, byte* r3); -void kyber_redistribute_8_rand_avx2(const word64* s, byte* r0, byte* r1, - byte* r2, byte* r3); - -WOLFSSL_LOCAL -void kyber_sha3_blocksx4_avx2(word64* s); -WOLFSSL_LOCAL -void kyber_sha3_128_blocksx4_seed_avx2(word64* s, byte* seed); -WOLFSSL_LOCAL -void kyber_sha3_256_blocksx4_seed_avx2(word64* s, byte* seed); - -WOLFSSL_LOCAL -void kyber_cbd_eta2_avx2(sword16* p, const byte* r); -WOLFSSL_LOCAL -void kyber_cbd_eta3_avx2(sword16* p, const byte* r); - -WOLFSSL_LOCAL -void kyber_from_msg_avx2(sword16* p, const byte* msg); -WOLFSSL_LOCAL -void kyber_to_msg_avx2(byte* msg, sword16* p); - -WOLFSSL_LOCAL -void kyber_from_bytes_avx2(sword16* p, const byte* b); -WOLFSSL_LOCAL -void kyber_to_bytes_avx2(byte* b, sword16* p); - -WOLFSSL_LOCAL -void kyber_compress_10_avx2(byte* r, const sword16* p, int n); -WOLFSSL_LOCAL -void kyber_decompress_10_avx2(sword16* p, const byte* r, int n); -WOLFSSL_LOCAL -void kyber_compress_11_avx2(byte* r, const sword16* p, int n); -WOLFSSL_LOCAL -void kyber_decompress_11_avx2(sword16* p, const byte* r, int n); - -WOLFSSL_LOCAL -void kyber_compress_4_avx2(byte* r, const sword16* p); -WOLFSSL_LOCAL -void kyber_decompress_4_avx2(sword16* p, const byte* r); -WOLFSSL_LOCAL -void kyber_compress_5_avx2(byte* r, const sword16* p); -WOLFSSL_LOCAL -void kyber_decompress_5_avx2(sword16* p, const byte* r); - - -WOLFSSL_LOCAL -int kyber_cmp_avx2(const byte* a, const byte* b, int sz); -#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) -WOLFSSL_LOCAL void kyber_ntt(sword16* r); -WOLFSSL_LOCAL void kyber_invntt(sword16* r); -WOLFSSL_LOCAL void kyber_ntt_sqrdmlsh(sword16* r); -WOLFSSL_LOCAL void kyber_invntt_sqrdmlsh(sword16* r); -WOLFSSL_LOCAL void kyber_basemul_mont(sword16* r, const sword16* a, - const sword16* b); -WOLFSSL_LOCAL void kyber_basemul_mont_add(sword16* r, const sword16* a, - const sword16* b); -WOLFSSL_LOCAL void kyber_add_reduce(sword16* r, const sword16* a); -WOLFSSL_LOCAL void kyber_add3_reduce(sword16* r, const sword16* a, - const sword16* b); -WOLFSSL_LOCAL void kyber_rsub_reduce(sword16* r, const sword16* a); -WOLFSSL_LOCAL void kyber_to_mont(sword16* p); -WOLFSSL_LOCAL void kyber_to_mont_sqrdmlsh(sword16* p); -WOLFSSL_LOCAL void kyber_sha3_blocksx3_neon(word64* state); -WOLFSSL_LOCAL void kyber_shake128_blocksx3_seed_neon(word64* state, byte* seed); -WOLFSSL_LOCAL void kyber_shake256_blocksx3_seed_neon(word64* state, byte* seed); -WOLFSSL_LOCAL unsigned int kyber_rej_uniform_neon(sword16* p, unsigned int len, - const byte* r, unsigned int rLen); -WOLFSSL_LOCAL int kyber_cmp_neon(const byte* a, const byte* b, int sz); -WOLFSSL_LOCAL void kyber_csubq_neon(sword16* p); -WOLFSSL_LOCAL void kyber_from_msg_neon(sword16* p, const byte* msg); -WOLFSSL_LOCAL void kyber_to_msg_neon(byte* msg, sword16* p); -#elif defined(WOLFSSL_ARMASM_THUMB2) && defined(WOLFSSL_ARMASM) -#define kyber_ntt kyber_thumb2_ntt -#define kyber_invntt kyber_thumb2_invntt -#define kyber_basemul_mont kyber_thumb2_basemul_mont -#define kyber_basemul_mont_add kyber_thumb2_basemul_mont_add -#define kyber_rej_uniform_c kyber_thumb2_rej_uniform - -WOLFSSL_LOCAL void kyber_thumb2_ntt(sword16* r); -WOLFSSL_LOCAL void kyber_thumb2_invntt(sword16* r); -WOLFSSL_LOCAL void kyber_thumb2_basemul_mont(sword16* r, const sword16* a, - const sword16* b); -WOLFSSL_LOCAL void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a, - const sword16* b); -WOLFSSL_LOCAL void kyber_thumb2_csubq(sword16* p); -WOLFSSL_LOCAL unsigned int kyber_thumb2_rej_uniform(sword16* p, - unsigned int len, const byte* r, unsigned int rLen); -#elif defined(WOLFSSL_ARMASM) -#define kyber_ntt kyber_arm32_ntt -#define kyber_invntt kyber_arm32_invntt -#define kyber_basemul_mont kyber_arm32_basemul_mont -#define kyber_basemul_mont_add kyber_arm32_basemul_mont_add -#define kyber_rej_uniform_c kyber_arm32_rej_uniform - -WOLFSSL_LOCAL void kyber_arm32_ntt(sword16* r); -WOLFSSL_LOCAL void kyber_arm32_invntt(sword16* r); -WOLFSSL_LOCAL void kyber_arm32_basemul_mont(sword16* r, const sword16* a, - const sword16* b); -WOLFSSL_LOCAL void kyber_arm32_basemul_mont_add(sword16* r, const sword16* a, - const sword16* b); -WOLFSSL_LOCAL void kyber_arm32_csubq(sword16* p); -WOLFSSL_LOCAL unsigned int kyber_arm32_rej_uniform(sword16* p, unsigned int len, - const byte* r, unsigned int rLen); -#endif - -#ifdef __cplusplus - } /* extern "C" */ -#endif - -/* ML-KEM API */ -WOLFSSL_API int wc_MlKemKey_Init(MlKemKey* key, int type); -WOLFSSL_API void wc_MlKemKey_Free(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_PrivateKeySize(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_PublicKeySize(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_CipherTextSize(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_SharedSecretSize(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng); -WOLFSSL_API int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, - const byte* seed, word32 seedSz); -WOLFSSL_API int wc_MlKemKey_Encapsulate(MlKemKey* key, WC_RNG* rng, - byte* ct, word32 ctSz, - byte* ss, word32 ssSz); -WOLFSSL_API int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, WC_RNG* rng, - byte* ct, word32 ctSz, - byte* ss, word32 ssSz, - const byte* seed, word32 seedSz); -WOLFSSL_API int wc_MlKemKey_Decapsulate(MlKemKey* key, - byte* ss, word32 ssSz, - const byte* ct, word32 ctSz); -WOLFSSL_API int wc_MlKemKey_DecodePrivateKey(MlKemKey* key, - const byte* priv, word32 privSz); -WOLFSSL_API int wc_MlKemKey_DecodePublicKey(MlKemKey* key, - const byte* pub, word32 pubSz); -WOLFSSL_API int wc_MlKemKey_EncodePrivateKey(MlKemKey* key, - byte* priv, word32 privSz); -WOLFSSL_API int wc_MlKemKey_EncodePublicKey(MlKemKey* key, - byte* pub, word32 pubSz); - -/* Backward compatibility defines */ -#define wc_KyberKey_Init(type, key) wc_MlKemKey_Init((key), (type)) -#define wc_KyberKey_Free wc_MlKemKey_Free -#define wc_KyberKey_PrivateKeySize wc_MlKemKey_PrivateKeySize -#define wc_KyberKey_PublicKeySize wc_MlKemKey_PublicKeySize -#define wc_KyberKey_CipherTextSize wc_MlKemKey_CipherTextSize -#define wc_KyberKey_SharedSecretSize wc_MlKemKey_SharedSecretSize -#define wc_KyberKey_MakeKey wc_MlKemKey_MakeKey -#define wc_KyberKey_MakeKeyWithRandom wc_MlKemKey_MakeKeyWithRandom -#define wc_KyberKey_Encapsulate wc_MlKemKey_Encapsulate -#define wc_KyberKey_EncapsulateWithRandom wc_MlKemKey_EncapsulateWithRandom -#define wc_KyberKey_Decapsulate wc_MlKemKey_Decapsulate -#define wc_KyberKey_DecodePrivateKey wc_MlKemKey_DecodePrivateKey -#define wc_KyberKey_DecodePublicKey wc_MlKemKey_DecodePublicKey -#define wc_KyberKey_EncodePrivateKey wc_MlKemKey_EncodePrivateKey -#define wc_KyberKey_EncodePublicKey wc_MlKemKey_EncodePublicKey - -#endif /* WOLFSSL_HAVE_KYBER */ - -#endif /* WOLF_CRYPT_WC_MLKEM_H */ - diff --git a/wolfssl/wolfcrypt/wc_mlkem.h b/wolfssl/wolfcrypt/wc_mlkem.h index 218b39f19..ee6c9c878 100644 --- a/wolfssl/wolfcrypt/wc_mlkem.h +++ b/wolfssl/wolfcrypt/wc_mlkem.h @@ -30,38 +30,48 @@ #include #include #include -#include +#include -#ifdef WOLFSSL_HAVE_KYBER +#ifdef WOLFSSL_HAVE_MLKEM + +#ifdef WOLFSSL_KYBER_NO_MAKE_KEY + #define WOLFSSL_MLKEM_NO_MAKE_KEY +#endif +#ifdef WOLFSSL_KYBER_NO_ENCAPSULATE + #define WOLFSSL_MLKEM_NO_ENCAPSULATE +#endif +#ifdef WOLFSSL_KYBER_NO_DECAPSULATE + #define WOLFSSL_MLKEM_NO_DECAPSULATE +#endif #ifdef noinline - #define KYBER_NOINLINE noinline + #define MLKEM_NOINLINE noinline #elif defined(_MSC_VER) - #define KYBER_NOINLINE __declspec(noinline) + #define MLKEM_NOINLINE __declspec(noinline) #elif defined(__GNUC__) - #define KYBER_NOINLINE __attribute__((noinline)) + #define MLKEM_NOINLINE __attribute__((noinline)) #else - #define KYBER_NOINLINE + #define MLKEM_NOINLINE #endif enum { /* Flags of Kyber keys. */ - KYBER_FLAG_PRIV_SET = 0x0001, - KYBER_FLAG_PUB_SET = 0x0002, - KYBER_FLAG_BOTH_SET = 0x0003, - KYBER_FLAG_H_SET = 0x0004, - KYBER_FLAG_A_SET = 0x0008, + MLKEM_FLAG_PRIV_SET = 0x0001, + MLKEM_FLAG_PUB_SET = 0x0002, + MLKEM_FLAG_BOTH_SET = 0x0003, + MLKEM_FLAG_H_SET = 0x0004, + MLKEM_FLAG_A_SET = 0x0008, /* 2 bits of random used to create noise value. */ - KYBER_CBD_ETA2 = 2, + MLKEM_CBD_ETA2 = 2, /* 3 bits of random used to create noise value. */ - KYBER_CBD_ETA3 = 3, + MLKEM_CBD_ETA3 = 3, /* Number of bits to compress to. */ - KYBER_COMP_4BITS = 4, - KYBER_COMP_5BITS = 5, - KYBER_COMP_10BITS = 10, - KYBER_COMP_11BITS = 11, + MLKEM_COMP_4BITS = 4, + MLKEM_COMP_5BITS = 5, + MLKEM_COMP_10BITS = 10, + MLKEM_COMP_11BITS = 11, }; @@ -69,38 +79,38 @@ enum { #define XOF_BLOCK_SIZE 168 /* Modulus of co-efficients of polynomial. */ -#define KYBER_Q 3329 +#define MLKEM_Q 3329 /* Kyber-512 parameters */ -#ifdef WOLFSSL_KYBER512 +#ifdef WOLFSSL_WC_ML_KEM_512 /* Number of bits of random to create noise from. */ -#define KYBER512_ETA1 KYBER_CBD_ETA3 -#endif /* WOLFSSL_KYBER512 */ +#define WC_ML_KEM_512_ETA1 MLKEM_CBD_ETA3 +#endif /* WOLFSSL_WC_ML_KEM_512 */ /* Kyber-768 parameters */ -#ifdef WOLFSSL_KYBER768 +#ifdef WOLFSSL_WC_ML_KEM_768 /* Number of bits of random to create noise from. */ -#define KYBER768_ETA1 KYBER_CBD_ETA2 -#endif /* WOLFSSL_KYBER768 */ +#define WC_ML_KEM_768_ETA1 MLKEM_CBD_ETA2 +#endif /* WOLFSSL_WC_ML_KEM_768 */ /* Kyber-1024 parameters */ -#ifdef WOLFSSL_KYBER1024 +#ifdef WOLFSSL_WC_ML_KEM_1024 /* Number of bits of random to create noise from. */ -#define KYBER1024_ETA1 KYBER_CBD_ETA2 +#define WC_ML_KEM_1024_ETA1 MLKEM_CBD_ETA2 #endif /* WOLFSSL_KYBER1024 */ /* The data type of the hash function. */ -#define KYBER_HASH_T wc_Sha3 +#define MLKEM_HASH_T wc_Sha3 /* The data type of the pseudo-random function. */ -#define KYBER_PRF_T wc_Shake +#define MLKEM_PRF_T wc_Shake /* ML-KEM key. */ struct MlKemKey { - /* Type of key: KYBER512, KYBER768, KYBER1024 */ + /* Type of key: WC_ML_KEM_512, WC_ML_KEM_768, WC_ML_KEM_1024 */ int type; /* Dynamic memory allocation hint. */ void* heap; @@ -112,23 +122,23 @@ struct MlKemKey { int flags; /* A pseudo-random function object. */ - KYBER_HASH_T hash; + MLKEM_HASH_T hash; /* A pseudo-random function object. */ - KYBER_PRF_T prf; + MLKEM_PRF_T prf; /* Private key as a vector. */ - sword16 priv[KYBER_MAX_K * KYBER_N]; + sword16 priv[WC_ML_KEM_MAX_K * MLKEM_N]; /* Public key as a vector. */ - sword16 pub[KYBER_MAX_K * KYBER_N]; + sword16 pub[WC_ML_KEM_MAX_K * MLKEM_N]; /* Public seed. */ - byte pubSeed[KYBER_SYM_SZ]; + byte pubSeed[WC_ML_KEM_SYM_SZ]; /* Public hash - hash of encoded public key. */ - byte h[KYBER_SYM_SZ]; + byte h[WC_ML_KEM_SYM_SZ]; /* Randomizer for decapsulation. */ - byte z[KYBER_SYM_SZ]; + byte z[WC_ML_KEM_SYM_SZ]; #ifdef WOLFSSL_MLKEM_CACHE_A /* A matrix from key generation. */ - sword16 a[KYBER_MAX_K * KYBER_MAX_K * KYBER_N]; + sword16 a[WC_ML_KEM_MAX_K * WC_ML_KEM_MAX_K * MLKEM_N]; #endif }; @@ -140,220 +150,218 @@ struct MlKemKey { typedef struct MlKemKey KyberKey; WOLFSSL_LOCAL -void kyber_init(void); +void mlkem_init(void); #ifndef WOLFSSL_MLKEM_MAKEKEY_SMALL_MEM WOLFSSL_LOCAL -void kyber_keygen(sword16* priv, sword16* pub, sword16* e, const sword16* a, +void mlkem_keygen(sword16* priv, sword16* pub, sword16* e, const sword16* a, int kp); #else WOLFSSL_LOCAL -int kyber_keygen_seeds(sword16* priv, sword16* pub, KYBER_PRF_T* prf, +int mlkem_keygen_seeds(sword16* priv, sword16* pub, MLKEM_PRF_T* prf, sword16* e, int kp, byte* seed, byte* noiseSeed); #endif #ifndef WOLFSSL_MLKEM_ENCAPSULATE_SMALL_MEM WOLFSSL_LOCAL -void kyber_encapsulate(const sword16* pub, sword16* bp, sword16* v, +void mlkem_encapsulate(const sword16* pub, sword16* bp, sword16* v, const sword16* at, sword16* sp, const sword16* ep, const sword16* epp, const sword16* m, int kp); #else WOLFSSL_LOCAL -int kyber_encapsulate_seeds(const sword16* pub, KYBER_PRF_T* prf, sword16* bp, +int mlkem_encapsulate_seeds(const sword16* pub, MLKEM_PRF_T* prf, sword16* bp, sword16* tp, sword16* sp, int kp, const byte* msg, byte* seed, byte* coins); #endif WOLFSSL_LOCAL -void kyber_decapsulate(const sword16* priv, sword16* mp, sword16* bp, +void mlkem_decapsulate(const sword16* priv, sword16* mp, sword16* bp, const sword16* v, int kp); WOLFSSL_LOCAL -int kyber_gen_matrix(KYBER_PRF_T* prf, sword16* a, int kp, byte* seed, +int mlkem_gen_matrix(MLKEM_PRF_T* prf, sword16* a, int kp, byte* seed, int transposed); WOLFSSL_LOCAL -int kyber_get_noise(KYBER_PRF_T* prf, int kp, sword16* vec1, sword16* vec2, +int mlkem_get_noise(MLKEM_PRF_T* prf, int kp, sword16* vec1, sword16* vec2, sword16* poly, byte* seed); #if defined(USE_INTEL_SPEEDUP) || \ (defined(WOLFSSL_ARMASM) && defined(__aarch64__)) WOLFSSL_LOCAL -int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen); +int mlkem_kdf(byte* seed, int seedLen, byte* out, int outLen); #endif WOLFSSL_LOCAL -void kyber_hash_init(KYBER_HASH_T* hash); +void mlkem_hash_init(MLKEM_HASH_T* hash); WOLFSSL_LOCAL -int kyber_hash_new(KYBER_HASH_T* hash, void* heap, int devId); +int mlkem_hash_new(MLKEM_HASH_T* hash, void* heap, int devId); WOLFSSL_LOCAL -void kyber_hash_free(KYBER_HASH_T* hash); +void mlkem_hash_free(MLKEM_HASH_T* hash); WOLFSSL_LOCAL -int kyber_hash256(wc_Sha3* hash, const byte* data, word32 dataLen, byte* out); +int mlkem_hash256(wc_Sha3* hash, const byte* data, word32 dataLen, byte* out); WOLFSSL_LOCAL -int kyber_hash512(wc_Sha3* hash, const byte* data1, word32 data1Len, +int mlkem_hash512(wc_Sha3* hash, const byte* data1, word32 data1Len, const byte* data2, word32 data2Len, byte* out); WOLFSSL_LOCAL -void kyber_prf_init(KYBER_PRF_T* prf); +void mlkem_prf_init(MLKEM_PRF_T* prf); WOLFSSL_LOCAL -int kyber_prf_new(KYBER_PRF_T* prf, void* heap, int devId); +int mlkem_prf_new(MLKEM_PRF_T* prf, void* heap, int devId); WOLFSSL_LOCAL -void kyber_prf_free(KYBER_PRF_T* prf); +void mlkem_prf_free(MLKEM_PRF_T* prf); WOLFSSL_LOCAL -int kyber_cmp(const byte* a, const byte* b, int sz); +int mlkem_cmp(const byte* a, const byte* b, int sz); WOLFSSL_LOCAL -void kyber_vec_compress_10(byte* r, sword16* v, unsigned int kp); +void mlkem_vec_compress_10(byte* r, sword16* v, unsigned int kp); WOLFSSL_LOCAL -void kyber_vec_compress_11(byte* r, sword16* v); +void mlkem_vec_compress_11(byte* r, sword16* v); WOLFSSL_LOCAL -void kyber_vec_decompress_10(sword16* v, const unsigned char* b, +void mlkem_vec_decompress_10(sword16* v, const unsigned char* b, unsigned int kp); WOLFSSL_LOCAL -void kyber_vec_decompress_11(sword16* v, const unsigned char* b); +void mlkem_vec_decompress_11(sword16* v, const unsigned char* b); WOLFSSL_LOCAL -void kyber_compress_4(byte* b, sword16* p); +void mlkem_compress_4(byte* b, sword16* p); WOLFSSL_LOCAL -void kyber_compress_5(byte* b, sword16* p); +void mlkem_compress_5(byte* b, sword16* p); WOLFSSL_LOCAL -void kyber_decompress_4(sword16* p, const unsigned char* b); +void mlkem_decompress_4(sword16* p, const unsigned char* b); WOLFSSL_LOCAL -void kyber_decompress_5(sword16* p, const unsigned char* b); +void mlkem_decompress_5(sword16* p, const unsigned char* b); WOLFSSL_LOCAL -void kyber_from_msg(sword16* p, const byte* msg); +void mlkem_from_msg(sword16* p, const byte* msg); WOLFSSL_LOCAL -void kyber_to_msg(byte* msg, sword16* p); +void mlkem_to_msg(byte* msg, sword16* p); WOLFSSL_LOCAL -void kyber_from_bytes(sword16* p, const byte* b, int k); +void mlkem_from_bytes(sword16* p, const byte* b, int k); WOLFSSL_LOCAL -void kyber_to_bytes(byte* b, sword16* p, int k); +void mlkem_to_bytes(byte* b, sword16* p, int k); #ifdef USE_INTEL_SPEEDUP WOLFSSL_LOCAL -void kyber_keygen_avx2(sword16* priv, sword16* pub, sword16* e, +void mlkem_keygen_avx2(sword16* priv, sword16* pub, sword16* e, const sword16* a, int kp); WOLFSSL_LOCAL -void kyber_encapsulate_avx2(const sword16* pub, sword16* bp, sword16* v, +void mlkem_encapsulate_avx2(const sword16* pub, sword16* bp, sword16* v, const sword16* at, sword16* sp, const sword16* ep, const sword16* epp, const sword16* m, int kp); WOLFSSL_LOCAL -void kyber_decapsulate_avx2(const sword16* priv, sword16* mp, sword16* bp, +void mlkem_decapsulate_avx2(const sword16* priv, sword16* mp, sword16* bp, const sword16* v, int kp); WOLFSSL_LOCAL -unsigned int kyber_rej_uniform_n_avx2(sword16* p, unsigned int len, +unsigned int mlkem_rej_uniform_n_avx2(sword16* p, unsigned int len, const byte* r, unsigned int rLen); WOLFSSL_LOCAL -unsigned int kyber_rej_uniform_avx2(sword16* p, unsigned int len, const byte* r, +unsigned int mlkem_rej_uniform_avx2(sword16* p, unsigned int len, const byte* r, unsigned int rLen); WOLFSSL_LOCAL -void kyber_redistribute_21_rand_avx2(const word64* s, byte* r0, byte* r1, +void mlkem_redistribute_21_rand_avx2(const word64* s, byte* r0, byte* r1, byte* r2, byte* r3); -void kyber_redistribute_17_rand_avx2(const word64* s, byte* r0, byte* r1, +void mlkem_redistribute_17_rand_avx2(const word64* s, byte* r0, byte* r1, byte* r2, byte* r3); -void kyber_redistribute_16_rand_avx2(const word64* s, byte* r0, byte* r1, +void mlkem_redistribute_16_rand_avx2(const word64* s, byte* r0, byte* r1, byte* r2, byte* r3); -void kyber_redistribute_8_rand_avx2(const word64* s, byte* r0, byte* r1, +void mlkem_redistribute_8_rand_avx2(const word64* s, byte* r0, byte* r1, byte* r2, byte* r3); WOLFSSL_LOCAL -void kyber_sha3_blocksx4_avx2(word64* s); +void mlkem_sha3_128_blocksx4_seed_avx2(word64* s, byte* seed); WOLFSSL_LOCAL -void kyber_sha3_128_blocksx4_seed_avx2(word64* s, byte* seed); -WOLFSSL_LOCAL -void kyber_sha3_256_blocksx4_seed_avx2(word64* s, byte* seed); +void mlkem_sha3_256_blocksx4_seed_avx2(word64* s, byte* seed); WOLFSSL_LOCAL -void kyber_cbd_eta2_avx2(sword16* p, const byte* r); +void mlkem_cbd_eta2_avx2(sword16* p, const byte* r); WOLFSSL_LOCAL -void kyber_cbd_eta3_avx2(sword16* p, const byte* r); +void mlkem_cbd_eta3_avx2(sword16* p, const byte* r); WOLFSSL_LOCAL -void kyber_from_msg_avx2(sword16* p, const byte* msg); +void mlkem_from_msg_avx2(sword16* p, const byte* msg); WOLFSSL_LOCAL -void kyber_to_msg_avx2(byte* msg, sword16* p); +void mlkem_to_msg_avx2(byte* msg, sword16* p); WOLFSSL_LOCAL -void kyber_from_bytes_avx2(sword16* p, const byte* b); +void mlkem_from_bytes_avx2(sword16* p, const byte* b); WOLFSSL_LOCAL -void kyber_to_bytes_avx2(byte* b, sword16* p); +void mlkem_to_bytes_avx2(byte* b, sword16* p); WOLFSSL_LOCAL -void kyber_compress_10_avx2(byte* r, const sword16* p, int n); +void mlkem_compress_10_avx2(byte* r, const sword16* p, int n); WOLFSSL_LOCAL -void kyber_decompress_10_avx2(sword16* p, const byte* r, int n); +void mlkem_decompress_10_avx2(sword16* p, const byte* r, int n); WOLFSSL_LOCAL -void kyber_compress_11_avx2(byte* r, const sword16* p, int n); +void mlkem_compress_11_avx2(byte* r, const sword16* p, int n); WOLFSSL_LOCAL -void kyber_decompress_11_avx2(sword16* p, const byte* r, int n); +void mlkem_decompress_11_avx2(sword16* p, const byte* r, int n); WOLFSSL_LOCAL -void kyber_compress_4_avx2(byte* r, const sword16* p); +void mlkem_compress_4_avx2(byte* r, const sword16* p); WOLFSSL_LOCAL -void kyber_decompress_4_avx2(sword16* p, const byte* r); +void mlkem_decompress_4_avx2(sword16* p, const byte* r); WOLFSSL_LOCAL -void kyber_compress_5_avx2(byte* r, const sword16* p); +void mlkem_compress_5_avx2(byte* r, const sword16* p); WOLFSSL_LOCAL -void kyber_decompress_5_avx2(sword16* p, const byte* r); +void mlkem_decompress_5_avx2(sword16* p, const byte* r); WOLFSSL_LOCAL -int kyber_cmp_avx2(const byte* a, const byte* b, int sz); +int mlkem_cmp_avx2(const byte* a, const byte* b, int sz); #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) -WOLFSSL_LOCAL void kyber_ntt(sword16* r); -WOLFSSL_LOCAL void kyber_invntt(sword16* r); -WOLFSSL_LOCAL void kyber_ntt_sqrdmlsh(sword16* r); -WOLFSSL_LOCAL void kyber_invntt_sqrdmlsh(sword16* r); -WOLFSSL_LOCAL void kyber_basemul_mont(sword16* r, const sword16* a, +WOLFSSL_LOCAL void mlkem_ntt(sword16* r); +WOLFSSL_LOCAL void mlkem_invntt(sword16* r); +WOLFSSL_LOCAL void mlkem_ntt_sqrdmlsh(sword16* r); +WOLFSSL_LOCAL void mlkem_invntt_sqrdmlsh(sword16* r); +WOLFSSL_LOCAL void mlkem_basemul_mont(sword16* r, const sword16* a, const sword16* b); -WOLFSSL_LOCAL void kyber_basemul_mont_add(sword16* r, const sword16* a, +WOLFSSL_LOCAL void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b); -WOLFSSL_LOCAL void kyber_add_reduce(sword16* r, const sword16* a); -WOLFSSL_LOCAL void kyber_add3_reduce(sword16* r, const sword16* a, +WOLFSSL_LOCAL void mlkem_add_reduce(sword16* r, const sword16* a); +WOLFSSL_LOCAL void mlkem_add3_reduce(sword16* r, const sword16* a, const sword16* b); -WOLFSSL_LOCAL void kyber_rsub_reduce(sword16* r, const sword16* a); -WOLFSSL_LOCAL void kyber_to_mont(sword16* p); -WOLFSSL_LOCAL void kyber_to_mont_sqrdmlsh(sword16* p); -WOLFSSL_LOCAL void kyber_sha3_blocksx3_neon(word64* state); -WOLFSSL_LOCAL void kyber_shake128_blocksx3_seed_neon(word64* state, byte* seed); -WOLFSSL_LOCAL void kyber_shake256_blocksx3_seed_neon(word64* state, byte* seed); -WOLFSSL_LOCAL unsigned int kyber_rej_uniform_neon(sword16* p, unsigned int len, +WOLFSSL_LOCAL void mlkem_rsub_reduce(sword16* r, const sword16* a); +WOLFSSL_LOCAL void mlkem_to_mont(sword16* p); +WOLFSSL_LOCAL void mlkem_to_mont_sqrdmlsh(sword16* p); +WOLFSSL_LOCAL void mlkem_sha3_blocksx3_neon(word64* state); +WOLFSSL_LOCAL void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed); +WOLFSSL_LOCAL void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed); +WOLFSSL_LOCAL unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, unsigned int rLen); -WOLFSSL_LOCAL int kyber_cmp_neon(const byte* a, const byte* b, int sz); -WOLFSSL_LOCAL void kyber_csubq_neon(sword16* p); -WOLFSSL_LOCAL void kyber_from_msg_neon(sword16* p, const byte* msg); -WOLFSSL_LOCAL void kyber_to_msg_neon(byte* msg, sword16* p); +WOLFSSL_LOCAL int mlkem_cmp_neon(const byte* a, const byte* b, int sz); +WOLFSSL_LOCAL void mlkem_csubq_neon(sword16* p); +WOLFSSL_LOCAL void mlkem_from_msg_neon(sword16* p, const byte* msg); +WOLFSSL_LOCAL void mlkem_to_msg_neon(byte* msg, sword16* p); #elif defined(WOLFSSL_ARMASM_THUMB2) && defined(WOLFSSL_ARMASM) -#define kyber_ntt kyber_thumb2_ntt -#define kyber_invntt kyber_thumb2_invntt -#define kyber_basemul_mont kyber_thumb2_basemul_mont -#define kyber_basemul_mont_add kyber_thumb2_basemul_mont_add -#define kyber_rej_uniform_c kyber_thumb2_rej_uniform +#define mlkem_ntt mlkem_thumb2_ntt +#define mlkem_invntt mlkem_thumb2_invntt +#define mlkem_basemul_mont mlkem_thumb2_basemul_mont +#define mlkem_basemul_mont_add mlkem_thumb2_basemul_mont_add +#define mlkem_rej_uniform_c mlkem_thumb2_rej_uniform -WOLFSSL_LOCAL void kyber_thumb2_ntt(sword16* r); -WOLFSSL_LOCAL void kyber_thumb2_invntt(sword16* r); -WOLFSSL_LOCAL void kyber_thumb2_basemul_mont(sword16* r, const sword16* a, +WOLFSSL_LOCAL void mlkem_thumb2_ntt(sword16* r); +WOLFSSL_LOCAL void mlkem_thumb2_invntt(sword16* r); +WOLFSSL_LOCAL void mlkem_thumb2_basemul_mont(sword16* r, const sword16* a, const sword16* b); -WOLFSSL_LOCAL void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a, +WOLFSSL_LOCAL void mlkem_thumb2_basemul_mont_add(sword16* r, const sword16* a, const sword16* b); -WOLFSSL_LOCAL void kyber_thumb2_csubq(sword16* p); -WOLFSSL_LOCAL unsigned int kyber_thumb2_rej_uniform(sword16* p, +WOLFSSL_LOCAL void mlkem_thumb2_csubq(sword16* p); +WOLFSSL_LOCAL unsigned int mlkem_thumb2_rej_uniform(sword16* p, unsigned int len, const byte* r, unsigned int rLen); #elif defined(WOLFSSL_ARMASM) -#define kyber_ntt kyber_arm32_ntt -#define kyber_invntt kyber_arm32_invntt -#define kyber_basemul_mont kyber_arm32_basemul_mont -#define kyber_basemul_mont_add kyber_arm32_basemul_mont_add -#define kyber_rej_uniform_c kyber_arm32_rej_uniform +#define mlkem_ntt mlkem_arm32_ntt +#define mlkem_invntt mlkem_arm32_invntt +#define mlkem_basemul_mont mlkem_arm32_basemul_mont +#define mlkem_basemul_mont_add mlkem_arm32_basemul_mont_add +#define mlkem_rej_uniform_c mlkem_arm32_rej_uniform -WOLFSSL_LOCAL void kyber_arm32_ntt(sword16* r); -WOLFSSL_LOCAL void kyber_arm32_invntt(sword16* r); -WOLFSSL_LOCAL void kyber_arm32_basemul_mont(sword16* r, const sword16* a, +WOLFSSL_LOCAL void mlkem_arm32_ntt(sword16* r); +WOLFSSL_LOCAL void mlkem_arm32_invntt(sword16* r); +WOLFSSL_LOCAL void mlkem_arm32_basemul_mont(sword16* r, const sword16* a, const sword16* b); -WOLFSSL_LOCAL void kyber_arm32_basemul_mont_add(sword16* r, const sword16* a, +WOLFSSL_LOCAL void mlkem_arm32_basemul_mont_add(sword16* r, const sword16* a, const sword16* b); -WOLFSSL_LOCAL void kyber_arm32_csubq(sword16* p); -WOLFSSL_LOCAL unsigned int kyber_arm32_rej_uniform(sword16* p, unsigned int len, +WOLFSSL_LOCAL void mlkem_arm32_csubq(sword16* p); +WOLFSSL_LOCAL unsigned int mlkem_arm32_rej_uniform(sword16* p, unsigned int len, const byte* r, unsigned int rLen); #endif @@ -361,52 +369,6 @@ WOLFSSL_LOCAL unsigned int kyber_arm32_rej_uniform(sword16* p, unsigned int len, } /* extern "C" */ #endif -/* ML-KEM API */ -WOLFSSL_API int wc_MlKemKey_Init(MlKemKey* key, int type); -WOLFSSL_API void wc_MlKemKey_Free(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_PrivateKeySize(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_PublicKeySize(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_CipherTextSize(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_SharedSecretSize(MlKemKey* key); -WOLFSSL_API int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng); -WOLFSSL_API int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, WC_RNG* rng, - const byte* seed, word32 seedSz); -WOLFSSL_API int wc_MlKemKey_Encapsulate(MlKemKey* key, WC_RNG* rng, - byte* ct, word32 ctSz, - byte* ss, word32 ssSz); -WOLFSSL_API int wc_MlKemKey_EncapsulateWithRandom(MlKemKey* key, WC_RNG* rng, - byte* ct, word32 ctSz, - byte* ss, word32 ssSz, - const byte* seed, word32 seedSz); -WOLFSSL_API int wc_MlKemKey_Decapsulate(MlKemKey* key, - byte* ss, word32 ssSz, - const byte* ct, word32 ctSz); -WOLFSSL_API int wc_MlKemKey_DecodePrivateKey(MlKemKey* key, - const byte* priv, word32 privSz); -WOLFSSL_API int wc_MlKemKey_DecodePublicKey(MlKemKey* key, - const byte* pub, word32 pubSz); -WOLFSSL_API int wc_MlKemKey_EncodePrivateKey(MlKemKey* key, - byte* priv, word32 privSz); -WOLFSSL_API int wc_MlKemKey_EncodePublicKey(MlKemKey* key, - byte* pub, word32 pubSz); - -/* Backward compatibility defines */ -#define wc_KyberKey_Init(type, key) wc_MlKemKey_Init((key), (type)) -#define wc_KyberKey_Free wc_MlKemKey_Free -#define wc_KyberKey_PrivateKeySize wc_MlKemKey_PrivateKeySize -#define wc_KyberKey_PublicKeySize wc_MlKemKey_PublicKeySize -#define wc_KyberKey_CipherTextSize wc_MlKemKey_CipherTextSize -#define wc_KyberKey_SharedSecretSize wc_MlKemKey_SharedSecretSize -#define wc_KyberKey_MakeKey wc_MlKemKey_MakeKey -#define wc_KyberKey_MakeKeyWithRandom wc_MlKemKey_MakeKeyWithRandom -#define wc_KyberKey_Encapsulate wc_MlKemKey_Encapsulate -#define wc_KyberKey_EncapsulateWithRandom wc_MlKemKey_EncapsulateWithRandom -#define wc_KyberKey_Decapsulate wc_MlKemKey_Decapsulate -#define wc_KyberKey_DecodePrivateKey wc_MlKemKey_DecodePrivateKey -#define wc_KyberKey_DecodePublicKey wc_MlKemKey_DecodePublicKey -#define wc_KyberKey_EncodePrivateKey wc_MlKemKey_EncodePrivateKey -#define wc_KyberKey_EncodePublicKey wc_MlKemKey_EncodePublicKey - -#endif /* WOLFSSL_HAVE_KYBER */ +#endif /* WOLFSSL_HAVE_MLKEM */ #endif /* WOLF_CRYPT_WC_MLKEM_H */ diff --git a/wrapper/CSharp/wolfssl.vcxproj b/wrapper/CSharp/wolfssl.vcxproj index 534c4255c..1d142db4b 100644 --- a/wrapper/CSharp/wolfssl.vcxproj +++ b/wrapper/CSharp/wolfssl.vcxproj @@ -308,7 +308,7 @@ - + @@ -320,8 +320,8 @@ - - + + diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt index 7117beaeb..6f1157afa 100644 --- a/zephyr/CMakeLists.txt +++ b/zephyr/CMakeLists.txt @@ -78,7 +78,7 @@ if(CONFIG_WOLFSSL) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/ed25519.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/ed448.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/error.c) - zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/ext_kyber.c) + zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/ext_mlkem.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/falcon.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/fe_448.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/fe_low_mem.c) @@ -117,8 +117,8 @@ if(CONFIG_WOLFSSL) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/tfm.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_dsp.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_encrypt.c) - zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_kyber.c) - zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_kyber_poly.c) + zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_mlkem.c) + zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_mlkem_poly.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_lms.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_lms_impl.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/wc_pkcs11.c)