From 668af9b32f1b9cc07bdc7f7eb23fa285d168f25f Mon Sep 17 00:00:00 2001 From: JacobBarthelmeh Date: Mon, 7 Nov 2016 15:28:30 -0800 Subject: [PATCH 1/3] POLY1305 : clang 32 bit warning about macros --- wolfssl/wolfcrypt/poly1305.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/wolfssl/wolfcrypt/poly1305.h b/wolfssl/wolfcrypt/poly1305.h index e64cabf94..75e30d7fb 100644 --- a/wolfssl/wolfcrypt/poly1305.h +++ b/wolfssl/wolfcrypt/poly1305.h @@ -32,12 +32,21 @@ #endif /* auto detect between 32bit / 64bit */ -#define HAS_SIZEOF_INT128_64BIT (defined(__SIZEOF_INT128__) && defined(__LP64__)) -#define HAS_MSVC_64BIT (defined(_MSC_VER) && defined(_M_X64)) -#define HAS_GCC_4_4_64BIT (defined(__GNUC__) && defined(__LP64__) && \ - ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)))) +#if defined(__SIZEOF_INT128__) && defined(__LP64__) +#define WC_HAS_SIZEOF_INT128_64BIT +#endif -#if (HAS_SIZEOF_INT128_64BIT || HAS_MSVC_64BIT || HAS_GCC_4_4_64BIT) +#if defined(_MSC_VER) && defined(_M_X64) +#define WC_HAS_MSVC_64BIT +#endif + +#if (defined(__GNUC__) && defined(__LP64__) && \ + ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)))) +#define WC_HAS_GCC_4_4_64BIT +#endif + +#if (defined(WC_HAS_SIZEOF_INT128_64BIT) || defined(WC_HAS_MSVC_64BIT) || \ + defined(WC_HAS_GCC_4_4_64BIT)) #define POLY130564 #else #define POLY130532 From 46dee9e79289bbfdb0c55df5d9892c43153639ca Mon Sep 17 00:00:00 2001 From: JacobBarthelmeh Date: Mon, 7 Nov 2016 15:48:06 -0800 Subject: [PATCH 2/3] ARMv8 : clang 32 bit build --- wolfcrypt/src/port/arm/armv8-aes.c | 103 +++++++++++--------------- wolfcrypt/src/port/arm/armv8-sha256.c | 2 +- 2 files changed, 43 insertions(+), 62 deletions(-) diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index 2c5e0ee1f..c6c8ea2bf 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -85,10 +85,10 @@ static const byte rcon[] = { #define SBOX(x) \ do { \ __asm__ volatile ( \ - "VDUP.32 q1, %r[in] \n" \ + "VDUP.32 q1, %[in] \n" \ "VMOV.i32 q0, #0 \n" \ "AESE.8 q0, q1 \n" \ - "VMOV.32 %r[out], d0[0] \n" \ + "VMOV.32 %[out], d0[0] \n" \ : [out] "=r"((x)) \ : [in] "r" ((x)) \ : "cc", "memory", "q0", "q1"\ @@ -2868,10 +2868,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, word32* keyPt = aes->key; __asm__ __volatile__ ( "VLD1.32 {q0}, [%[CtrIn]] \n" - "VLD1.32 {q1}, [%[Key]]! \n" - "VLD1.32 {q2}, [%[Key]]! \n" - "VLD1.32 {q3}, [%[Key]]! \n" - "VLD1.32 {q4}, [%[Key]]! \n" + "VLDM %[Key]!, {q1-q4} \n" "AESE.8 q0, q1\n" "AESMC.8 q0, q0\n" @@ -2899,8 +2896,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "AESMC.8 q0, q0\n" "AESE.8 q0, q2\n" - "#subtract rounds done so far and see if should continue\n" - "MOV r12, %r[R] \n" + "MOV r12, %[R] \n" "CMP r12, #10 \n" "BEQ 1f \n" "VLD1.32 {q1}, [%[Key]]! \n" @@ -2925,9 +2921,10 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "VEOR.32 q0, q0, q1\n" "VST1.32 {q0}, [%[CtrOut]] \n" - :[CtrOut] "=r" (outBlock) - :"0" (outBlock), [Key] "r" (keyPt), [R] "r" (aes->rounds), - [CtrIn] "r" (inBlock) + :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds), + "=r" (inBlock) + :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds), + [CtrIn] "3" (inBlock) : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4" ); @@ -2944,68 +2941,66 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, sub bytes for shifted rows */ + word32* keyPt = aes->key; __asm__ __volatile__ ( "VLD1.32 {q0}, [%[CtrIn]] \n" - "VLD1.32 {q1}, %[Key]! \n" - "VLD1.32 {q2}, %[Key]! \n" - "VLD1.32 {q3}, %[Key]! \n" - "VLD1.32 {q4}, %[Key]! \n" + "VLDM %[Key]!, {q1-q4} \n" "AESD.8 q0, q1\n" "AESIMC.8 q0, q0\n" "AESD.8 q0, q2\n" "AESIMC.8 q0, q0\n" - "VLD1.32 {q1}, %[Key]! \n" + "VLD1.32 {q1}, [%[Key]]! \n" "AESD.8 q0, q3\n" "AESIMC.8 q0, q0\n" - "VLD1.32 {q2}, %[Key]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" "AESD.8 q0, q4\n" "AESIMC.8 q0, q0\n" - "VLD1.32 {q3}, %[Key]! \n" + "VLD1.32 {q3}, [%[Key]]! \n" "AESD.8 q0, q1\n" "AESIMC.8 q0, q0\n" - "VLD1.32 {q4}, %[Key]! \n" + "VLD1.32 {q4}, [%[Key]]! \n" "AESD.8 q0, q2\n" "AESIMC.8 q0, q0\n" "AESD.8 q0, q3\n" "AESIMC.8 q0, q0\n" - "VLD1.32 {q1}, %[Key]! \n" + "VLD1.32 {q1}, [%[Key]]! \n" "AESD.8 q0, q4\n" "AESIMC.8 q0, q0\n" - "VLD1.32 {q2}, %[Key]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" "AESD.8 q0, q1\n" "AESIMC.8 q0, q0\n" "AESD.8 q0, q2\n" - "#subtract rounds done so far and see if should continue\n" - "MOV r12, %r[R] \n" - "CMP r12, #10 \n" + "MOV r12, %[R] \n" + "CMP r12, #10 \n" "BEQ 1f \n" - "VLD1.32 {q1}, %[Key]! \n" + "VLD1.32 {q1}, [%[Key]]! \n" "AESIMC.8 q0, q0\n" - "VLD1.32 {q2}, %[Key]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" "AESD.8 q0, q1\n" "AESIMC.8 q0, q0\n" "AESD.8 q0, q2\n" "CMP r12, #12 \n" "BEQ 1f \n" - "VLD1.32 {q1}, %[Key]! \n" + "VLD1.32 {q1}, [%[Key]]! \n" "AESIMC.8 q0, q0\n" - "VLD1.32 {q2}, %[Key]! \n" + "VLD1.32 {q2}, [%[Key]]! \n" "AESD.8 q0, q1\n" "AESIMC.8 q0, q0\n" "AESD.8 q0, q2\n" "#Final AddRoundKey then store result \n" "1: \n" - "VLD1.32 {q1}, %[Key]! \n" + "VLD1.32 {q1}, [%[Key]]! \n" "VEOR.32 q0, q0, q1\n" "VST1.32 {q0}, [%[CtrOut]] \n" - :[CtrOut] "=r" (outBlock) - :[Key] "m" (aes->key), "0" (outBlock), [R] "r" (aes->rounds), - [CtrIn] "r" (inBlock) + :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds), + "=r" (inBlock) + :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds), + [CtrIn] "3" (inBlock) : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4" ); @@ -3038,7 +3033,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, switch(aes->rounds) { case 10: /* AES 128 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" + "MOV r11, %[blocks] \n" "VLD1.32 {q1}, [%[Key]]! \n" "VLD1.32 {q2}, [%[Key]]! \n" "VLD1.32 {q3}, [%[Key]]! \n" @@ -3098,7 +3093,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, case 12: /* AES 192 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" + "MOV r11, %[blocks] \n" "VLD1.32 {q1}, [%[Key]]! \n" "VLD1.32 {q2}, [%[Key]]! \n" "VLD1.32 {q3}, [%[Key]]! \n" @@ -3164,7 +3159,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, case 14: /* AES 256 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" + "MOV r11, %[blocks] \n" "VLD1.32 {q1}, [%[Key]]! \n" "VLD1.32 {q2}, [%[Key]]! \n" "VLD1.32 {q3}, [%[Key]]! \n" @@ -3260,7 +3255,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, switch(aes->rounds) { case 10: /* AES 128 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" + "MOV r11, %[blocks] \n" "VLD1.32 {q1}, [%[Key]]! \n" "VLD1.32 {q2}, [%[Key]]! \n" "VLD1.32 {q3}, [%[Key]]! \n" @@ -3322,7 +3317,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, case 12: /* AES 192 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" + "MOV r11, %[blocks] \n" "VLD1.32 {q1}, [%[Key]]! \n" "VLD1.32 {q2}, [%[Key]]! \n" "VLD1.32 {q3}, [%[Key]]! \n" @@ -3390,7 +3385,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, case 14: /* AES 256 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" + "MOV r11, %[blocks] \n" "VLD1.32 {q1}, [%[Key]]! \n" "VLD1.32 {q2}, [%[Key]]! \n" "VLD1.32 {q3}, [%[Key]]! \n" @@ -3513,7 +3508,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, switch(aes->rounds) { case 10: /* AES 128 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" + "MOV r11, %[blocks] \n" "VLDM %[Key]!, {q1-q4} \n" "#Create vector with the value 1 \n" @@ -3649,24 +3644,17 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, case 12: /* AES 192 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" - "VLD1.32 {q1}, [%[Key]]! \n" - "VLD1.32 {q2}, [%[Key]]! \n" - "VLD1.32 {q3}, [%[Key]]! \n" - "VLD1.32 {q4}, [%[Key]]! \n" + "MOV r11, %[blocks] \n" + "VLDM %[Key]!, {q1-q4} \n" "#Create vector with the value 1 \n" "VMOV.u32 q15, #1 \n" "VSHR.u64 q15, q15, #32 \n" - "VLD1.32 {q5}, [%[Key]]! \n" - "VLD1.32 {q6}, [%[Key]]! \n" - "VLD1.32 {q7}, [%[Key]]! \n" - "VLD1.32 {q8}, [%[Key]]! \n" + "VLDM %[Key]!, {q5-q8} \n" "VEOR.32 q14, q14, q14 \n" "VEXT.8 q14, q15, q14, #8\n" - "VLD1.32 {q9}, [%[Key]]! \n" - "VLD1.32 {q10}, [%[Key]]!\n" + "VLDM %[Key]!, {q9-q10} \n" "VLD1.32 {q13}, [%[reg]]\n" /* double block */ @@ -3815,24 +3803,17 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, case 14: /* AES 256 BLOCK */ __asm__ __volatile__ ( - "MOV r11, %r[blocks] \n" - "VLD1.32 {q1}, [%[Key]]! \n" - "VLD1.32 {q2}, [%[Key]]! \n" - "VLD1.32 {q3}, [%[Key]]! \n" - "VLD1.32 {q4}, [%[Key]]! \n" + "MOV r11, %[blocks] \n" + "VLDM %[Key]!, {q1-q4} \n" "#Create vector with the value 1 \n" "VMOV.u32 q15, #1 \n" "VSHR.u64 q15, q15, #32 \n" - "VLD1.32 {q5}, [%[Key]]! \n" - "VLD1.32 {q6}, [%[Key]]! \n" - "VLD1.32 {q7}, [%[Key]]! \n" - "VLD1.32 {q8}, [%[Key]]! \n" + "VLDM %[Key]!, {q5-q8} \n" "VEOR.32 q14, q14, q14 \n" "VEXT.8 q14, q15, q14, #8\n" - "VLD1.32 {q9}, [%[Key]]! \n" - "VLD1.32 {q10}, [%[Key]]! \n" + "VLDM %[Key]!, {q9-q10} \n" "VLD1.32 {q13}, [%[reg]]\n" /* double block */ diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c index b5fcb6831..cceb8c865 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256.c +++ b/wolfcrypt/src/port/arm/armv8-sha256.c @@ -706,7 +706,7 @@ int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len) "#load current digest\n" "VLDM %[digest], {q12-q13} \n" - "MOV r8, %r[blocks] \n" + "MOV r8, %[blocks] \n" "VREV32.8 q0, q0 \n" "VREV32.8 q1, q1 \n" "VREV32.8 q2, q2 \n" From 208f747a47e917d71100997143ee1678f95f47ff Mon Sep 17 00:00:00 2001 From: Jacob Barthelmeh Date: Tue, 8 Nov 2016 10:28:01 -0700 Subject: [PATCH 3/3] ARMv8 : add armv8-aes.c to EXTRA_DIST --- wolfcrypt/src/include.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wolfcrypt/src/include.am b/wolfcrypt/src/include.am index b1fae4d3e..111ad6331 100644 --- a/wolfcrypt/src/include.am +++ b/wolfcrypt/src/include.am @@ -45,7 +45,8 @@ EXTRA_DIST += wolfcrypt/src/port/ti/ti-aes.c \ wolfcrypt/src/port/ti/ti-ccm.c \ wolfcrypt/src/port/pic32/pic32mz-hash.c \ wolfcrypt/src/port/nrf51.c \ - wolfcrypt/src/port/arm/armv8-sha256.c + wolfcrypt/src/port/arm/armv8-sha256.c \ + wolfcrypt/src/port/arm/armv8-aes.c if BUILD_CAVIUM src_libwolfssl_la_SOURCES += wolfcrypt/src/port/cavium/cavium_nitrox.c