From 68666101b706eecc8f31fd62b9028102d8117694 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Tue, 29 May 2018 09:25:38 +1000 Subject: [PATCH] Fix for g++ 7.3 - macro strings in asm --- wolfcrypt/src/aes.c | 2630 ++++++++++++++++++++-------------------- wolfcrypt/src/sha256.c | 622 +++++----- wolfcrypt/src/sha512.c | 398 +++--- 3 files changed, 1825 insertions(+), 1825 deletions(-) diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 827293b83..2e63ea841 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -3711,7 +3711,7 @@ while (0) -#define _VAR(a) ""#a"" +#define _VAR(a) "" #a "" #define VAR(a) _VAR(a) #define HR %%xmm14 @@ -3739,12 +3739,12 @@ while (0) "aesenc %%xmm12, %%xmm10\n\t" \ "aesenc %%xmm12, %%xmm11\n\t" -#define AESENC_SET(o) \ - "movdqa "#o"(%[KEY]), %%xmm12\n\t" \ +#define AESENC_SET(o) \ + "movdqa " #o "(%[KEY]), %%xmm12\n\t" \ AESENC() #define AESENC_CTR() \ - "movdqu "VAR(CTR1)", %%xmm4\n\t" \ + "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ "movdqa %[BSWAP_EPI64], %%xmm1\n\t" \ "movdqu %%xmm4, %%xmm0\n\t" \ "pshufb %%xmm1, %%xmm4\n\t" \ @@ -3771,241 +3771,241 @@ while (0) "pshufb %%xmm1, %%xmm11\n\t" \ "paddd %[EIGHT], %%xmm0\n\t" -#define AESENC_XOR() \ - "movdqa (%[KEY]), %%xmm12\n\t" \ - "movdqu %%xmm0, "VAR(CTR1)"\n\t" \ - "pxor %%xmm12, %%xmm4\n\t" \ - "pxor %%xmm12, %%xmm5\n\t" \ - "pxor %%xmm12, %%xmm6\n\t" \ - "pxor %%xmm12, %%xmm7\n\t" \ - "pxor %%xmm12, %%xmm8\n\t" \ - "pxor %%xmm12, %%xmm9\n\t" \ - "pxor %%xmm12, %%xmm10\n\t" \ +#define AESENC_XOR() \ + "movdqa (%[KEY]), %%xmm12\n\t" \ + "movdqu %%xmm0, " VAR(CTR1) "\n\t" \ + "pxor %%xmm12, %%xmm4\n\t" \ + "pxor %%xmm12, %%xmm5\n\t" \ + "pxor %%xmm12, %%xmm6\n\t" \ + "pxor %%xmm12, %%xmm7\n\t" \ + "pxor %%xmm12, %%xmm8\n\t" \ + "pxor %%xmm12, %%xmm9\n\t" \ + "pxor %%xmm12, %%xmm10\n\t" \ "pxor %%xmm12, %%xmm11\n\t" /* Encrypt and carry-less multiply for AVX1. */ -#define AESENC_PCLMUL_1(src, o1, o2, o3) \ - "movdqu "#o3"("VAR(HTR)"), %%xmm12\n\t" \ - "movdqu "#o2"("#src"), %%xmm0\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm4\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm0\n\t" \ - "pxor %%xmm2, %%xmm0\n\t" \ - "pshufd $0x4e, %%xmm12, %%xmm1\n\t" \ - "pshufd $0x4e, %%xmm0, %%xmm14\n\t" \ - "pxor %%xmm12, %%xmm1\n\t" \ - "pxor %%xmm0, %%xmm14\n\t" \ - "movdqa %%xmm0, %%xmm3\n\t" \ - "pclmulqdq $0x11, %%xmm12, %%xmm3\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm5\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm6\n\t" \ - "movdqa %%xmm0, %%xmm2\n\t" \ - "pclmulqdq $0x00, %%xmm12, %%xmm2\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm7\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm8\n\t" \ - "pclmulqdq $0x00, %%xmm14, %%xmm1\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm9\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm10\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm11\n\t" \ - "pxor %%xmm2, %%xmm1\n\t" \ - "pxor %%xmm3, %%xmm1\n\t" \ +#define AESENC_PCLMUL_1(src, o1, o2, o3) \ + "movdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ + "movdqu " #o2 "(" #src "), %%xmm0\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm4\n\t" \ + "pshufb %[BSWAP_MASK], %%xmm0\n\t" \ + "pxor %%xmm2, %%xmm0\n\t" \ + "pshufd $0x4e, %%xmm12, %%xmm1\n\t" \ + "pshufd $0x4e, %%xmm0, %%xmm14\n\t" \ + "pxor %%xmm12, %%xmm1\n\t" \ + "pxor %%xmm0, %%xmm14\n\t" \ + "movdqa %%xmm0, %%xmm3\n\t" \ + "pclmulqdq $0x11, %%xmm12, %%xmm3\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm5\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm6\n\t" \ + "movdqa %%xmm0, %%xmm2\n\t" \ + "pclmulqdq $0x00, %%xmm12, %%xmm2\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm7\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm8\n\t" \ + "pclmulqdq $0x00, %%xmm14, %%xmm1\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm9\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm10\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm11\n\t" \ + "pxor %%xmm2, %%xmm1\n\t" \ + "pxor %%xmm3, %%xmm1\n\t" \ -#define AESENC_PCLMUL_N(src, o1, o2, o3) \ - "movdqu "#o3"("VAR(HTR)"), %%xmm12\n\t" \ - "movdqu "#o2"("#src"), %%xmm0\n\t" \ - "pshufd $0x4e, %%xmm12, %%xmm13\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm0\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm4\n\t" \ - "pxor %%xmm12, %%xmm13\n\t" \ - "pshufd $0x4e, %%xmm0, %%xmm14\n\t" \ - "pxor %%xmm0, %%xmm14\n\t" \ - "movdqa %%xmm0, %%xmm15\n\t" \ - "pclmulqdq $0x11, %%xmm12, %%xmm15\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm5\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm6\n\t" \ - "pclmulqdq $0x00, %%xmm0, %%xmm12\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm7\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm8\n\t" \ - "pclmulqdq $0x00, %%xmm14, %%xmm13\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm9\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm10\n\t" \ - "aesenc "#o1"(%[KEY]), %%xmm11\n\t" \ - "pxor %%xmm12, %%xmm1\n\t" \ - "pxor %%xmm12, %%xmm2\n\t" \ - "pxor %%xmm15, %%xmm1\n\t" \ - "pxor %%xmm15, %%xmm3\n\t" \ - "pxor %%xmm13, %%xmm1\n\t" \ +#define AESENC_PCLMUL_N(src, o1, o2, o3) \ + "movdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ + "movdqu " #o2 "(" #src" ), %%xmm0\n\t" \ + "pshufd $0x4e, %%xmm12, %%xmm13\n\t" \ + "pshufb %[BSWAP_MASK], %%xmm0\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm4\n\t" \ + "pxor %%xmm12, %%xmm13\n\t" \ + "pshufd $0x4e, %%xmm0, %%xmm14\n\t" \ + "pxor %%xmm0, %%xmm14\n\t" \ + "movdqa %%xmm0, %%xmm15\n\t" \ + "pclmulqdq $0x11, %%xmm12, %%xmm15\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm5\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm6\n\t" \ + "pclmulqdq $0x00, %%xmm0, %%xmm12\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm7\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm8\n\t" \ + "pclmulqdq $0x00, %%xmm14, %%xmm13\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm9\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm10\n\t" \ + "aesenc " #o1 "(%[KEY]), %%xmm11\n\t" \ + "pxor %%xmm12, %%xmm1\n\t" \ + "pxor %%xmm12, %%xmm2\n\t" \ + "pxor %%xmm15, %%xmm1\n\t" \ + "pxor %%xmm15, %%xmm3\n\t" \ + "pxor %%xmm13, %%xmm1\n\t" \ -#define AESENC_PCLMUL_L(o) \ - "movdqa %%xmm1, %%xmm14\n\t" \ - "psrldq $8, %%xmm1\n\t" \ - "pslldq $8, %%xmm14\n\t" \ - "aesenc "#o"(%[KEY]), %%xmm4\n\t" \ - "pxor %%xmm14, %%xmm2\n\t" \ - "pxor %%xmm1, %%xmm3\n\t" \ - "movdqa %%xmm2, %%xmm12\n\t" \ - "movdqa %%xmm2, %%xmm13\n\t" \ - "movdqa %%xmm2, %%xmm14\n\t" \ - "aesenc "#o"(%[KEY]), %%xmm5\n\t" \ - "pslld $31, %%xmm12\n\t" \ - "pslld $30, %%xmm13\n\t" \ - "pslld $25, %%xmm14\n\t" \ - "aesenc "#o"(%[KEY]), %%xmm6\n\t" \ - "pxor %%xmm13, %%xmm12\n\t" \ - "pxor %%xmm14, %%xmm12\n\t" \ - "aesenc "#o"(%[KEY]), %%xmm7\n\t" \ - "movdqa %%xmm12, %%xmm13\n\t" \ - "pslldq $12, %%xmm12\n\t" \ - "psrldq $4, %%xmm13\n\t" \ - "aesenc "#o"(%[KEY]), %%xmm8\n\t" \ - "pxor %%xmm12, %%xmm2\n\t" \ - "movdqa %%xmm2, %%xmm14\n\t" \ - "movdqa %%xmm2, %%xmm1\n\t" \ - "movdqa %%xmm2, %%xmm0\n\t" \ - "aesenc "#o"(%[KEY]), %%xmm9\n\t" \ - "psrld $1, %%xmm14\n\t" \ - "psrld $2, %%xmm1\n\t" \ - "psrld $7, %%xmm0\n\t" \ - "aesenc "#o"(%[KEY]), %%xmm10\n\t" \ - "pxor %%xmm1, %%xmm14\n\t" \ - "pxor %%xmm0, %%xmm14\n\t" \ - "aesenc "#o"(%[KEY]), %%xmm11\n\t" \ - "pxor %%xmm13, %%xmm14\n\t" \ - "pxor %%xmm14, %%xmm2\n\t" \ - "pxor %%xmm3, %%xmm2\n\t" \ +#define AESENC_PCLMUL_L(o) \ + "movdqa %%xmm1, %%xmm14\n\t" \ + "psrldq $8, %%xmm1\n\t" \ + "pslldq $8, %%xmm14\n\t" \ + "aesenc " #o "(%[KEY]), %%xmm4\n\t" \ + "pxor %%xmm14, %%xmm2\n\t" \ + "pxor %%xmm1, %%xmm3\n\t" \ + "movdqa %%xmm2, %%xmm12\n\t" \ + "movdqa %%xmm2, %%xmm13\n\t" \ + "movdqa %%xmm2, %%xmm14\n\t" \ + "aesenc " #o "(%[KEY]), %%xmm5\n\t" \ + "pslld $31, %%xmm12\n\t" \ + "pslld $30, %%xmm13\n\t" \ + "pslld $25, %%xmm14\n\t" \ + "aesenc " #o "(%[KEY]), %%xmm6\n\t" \ + "pxor %%xmm13, %%xmm12\n\t" \ + "pxor %%xmm14, %%xmm12\n\t" \ + "aesenc " #o "(%[KEY]), %%xmm7\n\t" \ + "movdqa %%xmm12, %%xmm13\n\t" \ + "pslldq $12, %%xmm12\n\t" \ + "psrldq $4, %%xmm13\n\t" \ + "aesenc " #o "(%[KEY]), %%xmm8\n\t" \ + "pxor %%xmm12, %%xmm2\n\t" \ + "movdqa %%xmm2, %%xmm14\n\t" \ + "movdqa %%xmm2, %%xmm1\n\t" \ + "movdqa %%xmm2, %%xmm0\n\t" \ + "aesenc " #o "(%[KEY]), %%xmm9\n\t" \ + "psrld $1, %%xmm14\n\t" \ + "psrld $2, %%xmm1\n\t" \ + "psrld $7, %%xmm0\n\t" \ + "aesenc " #o "(%[KEY]), %%xmm10\n\t" \ + "pxor %%xmm1, %%xmm14\n\t" \ + "pxor %%xmm0, %%xmm14\n\t" \ + "aesenc " #o "(%[KEY]), %%xmm11\n\t" \ + "pxor %%xmm13, %%xmm14\n\t" \ + "pxor %%xmm14, %%xmm2\n\t" \ + "pxor %%xmm3, %%xmm2\n\t" \ /* Encrypt and carry-less multiply with last key. */ -#define AESENC_LAST(in, out) \ - "aesenclast %%xmm12, %%xmm4\n\t" \ - "aesenclast %%xmm12, %%xmm5\n\t" \ - "movdqu ("#in"),%%xmm0\n\t" \ - "movdqu 16("#in"),%%xmm1\n\t" \ - "pxor %%xmm0, %%xmm4\n\t" \ - "pxor %%xmm1, %%xmm5\n\t" \ - "movdqu %%xmm4, ("#out")\n\t" \ - "movdqu %%xmm5, 16("#out")\n\t" \ - "aesenclast %%xmm12, %%xmm6\n\t" \ - "aesenclast %%xmm12, %%xmm7\n\t" \ - "movdqu 32("#in"),%%xmm0\n\t" \ - "movdqu 48("#in"),%%xmm1\n\t" \ - "pxor %%xmm0, %%xmm6\n\t" \ - "pxor %%xmm1, %%xmm7\n\t" \ - "movdqu %%xmm6, 32("#out")\n\t" \ - "movdqu %%xmm7, 48("#out")\n\t" \ - "aesenclast %%xmm12, %%xmm8\n\t" \ - "aesenclast %%xmm12, %%xmm9\n\t" \ - "movdqu 64("#in"),%%xmm0\n\t" \ - "movdqu 80("#in"),%%xmm1\n\t" \ - "pxor %%xmm0, %%xmm8\n\t" \ - "pxor %%xmm1, %%xmm9\n\t" \ - "movdqu %%xmm8, 64("#out")\n\t" \ - "movdqu %%xmm9, 80("#out")\n\t" \ - "aesenclast %%xmm12, %%xmm10\n\t" \ - "aesenclast %%xmm12, %%xmm11\n\t" \ - "movdqu 96("#in"),%%xmm0\n\t" \ - "movdqu 112("#in"),%%xmm1\n\t" \ - "pxor %%xmm0, %%xmm10\n\t" \ - "pxor %%xmm1, %%xmm11\n\t" \ - "movdqu %%xmm10, 96("#out")\n\t" \ - "movdqu %%xmm11, 112("#out")\n\t" +#define AESENC_LAST(in, out) \ + "aesenclast %%xmm12, %%xmm4\n\t" \ + "aesenclast %%xmm12, %%xmm5\n\t" \ + "movdqu (" #in "),%%xmm0\n\t" \ + "movdqu 16(" #in "),%%xmm1\n\t" \ + "pxor %%xmm0, %%xmm4\n\t" \ + "pxor %%xmm1, %%xmm5\n\t" \ + "movdqu %%xmm4, (" #out ")\n\t" \ + "movdqu %%xmm5, 16(" #out ")\n\t" \ + "aesenclast %%xmm12, %%xmm6\n\t" \ + "aesenclast %%xmm12, %%xmm7\n\t" \ + "movdqu 32(" #in "),%%xmm0\n\t" \ + "movdqu 48(" #in "),%%xmm1\n\t" \ + "pxor %%xmm0, %%xmm6\n\t" \ + "pxor %%xmm1, %%xmm7\n\t" \ + "movdqu %%xmm6, 32(" #out ")\n\t" \ + "movdqu %%xmm7, 48(" #out ")\n\t" \ + "aesenclast %%xmm12, %%xmm8\n\t" \ + "aesenclast %%xmm12, %%xmm9\n\t" \ + "movdqu 64(" #in "),%%xmm0\n\t" \ + "movdqu 80(" #in "),%%xmm1\n\t" \ + "pxor %%xmm0, %%xmm8\n\t" \ + "pxor %%xmm1, %%xmm9\n\t" \ + "movdqu %%xmm8, 64(" #out ")\n\t" \ + "movdqu %%xmm9, 80(" #out ")\n\t" \ + "aesenclast %%xmm12, %%xmm10\n\t" \ + "aesenclast %%xmm12, %%xmm11\n\t" \ + "movdqu 96(" #in "),%%xmm0\n\t" \ + "movdqu 112(" #in "),%%xmm1\n\t" \ + "pxor %%xmm0, %%xmm10\n\t" \ + "pxor %%xmm1, %%xmm11\n\t" \ + "movdqu %%xmm10, 96(" #out ")\n\t" \ + "movdqu %%xmm11, 112(" #out ")\n\t" #define _AESENC_AVX(r) \ - "aesenc 16(%[KEY]), "#r"\n\t" \ - "aesenc 32(%[KEY]), "#r"\n\t" \ - "aesenc 48(%[KEY]), "#r"\n\t" \ - "aesenc 64(%[KEY]), "#r"\n\t" \ - "aesenc 80(%[KEY]), "#r"\n\t" \ - "aesenc 96(%[KEY]), "#r"\n\t" \ - "aesenc 112(%[KEY]), "#r"\n\t" \ - "aesenc 128(%[KEY]), "#r"\n\t" \ - "aesenc 144(%[KEY]), "#r"\n\t" \ + "aesenc 16(%[KEY]), " #r "\n\t" \ + "aesenc 32(%[KEY]), " #r "\n\t" \ + "aesenc 48(%[KEY]), " #r "\n\t" \ + "aesenc 64(%[KEY]), " #r "\n\t" \ + "aesenc 80(%[KEY]), " #r "\n\t" \ + "aesenc 96(%[KEY]), " #r "\n\t" \ + "aesenc 112(%[KEY]), " #r "\n\t" \ + "aesenc 128(%[KEY]), " #r "\n\t" \ + "aesenc 144(%[KEY]), " #r "\n\t" \ "cmpl $11, %[nr]\n\t" \ "movdqa 160(%[KEY]), %%xmm5\n\t" \ "jl %=f\n\t" \ - "aesenc %%xmm5, "#r"\n\t" \ - "aesenc 176(%[KEY]), "#r"\n\t" \ + "aesenc %%xmm5, " #r "\n\t" \ + "aesenc 176(%[KEY]), " #r "\n\t" \ "cmpl $13, %[nr]\n\t" \ "movdqa 192(%[KEY]), %%xmm5\n\t" \ "jl %=f\n\t" \ - "aesenc %%xmm5, "#r"\n\t" \ - "aesenc 208(%[KEY]), "#r"\n\t" \ + "aesenc %%xmm5, " #r "\n\t" \ + "aesenc 208(%[KEY]), " #r "\n\t" \ "movdqa 224(%[KEY]), %%xmm5\n\t" \ "%=:\n\t" \ - "aesenclast %%xmm5, "#r"\n\t" + "aesenclast %%xmm5, " #r "\n\t" #define AESENC_AVX(r) \ _AESENC_AVX(r) #define AESENC_BLOCK(in, out) \ - "movdqu "VAR(CTR1)", %%xmm4\n\t" \ + "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ "movdqu %%xmm4, %%xmm5\n\t" \ "pshufb %[BSWAP_EPI64], %%xmm4\n\t" \ "paddd %[ONE], %%xmm5\n\t" \ "pxor (%[KEY]), %%xmm4\n\t" \ - "movdqu %%xmm5, "VAR(CTR1)"\n\t" \ + "movdqu %%xmm5, " VAR(CTR1) "\n\t" \ AESENC_AVX(%%xmm4) \ - "movdqu ("#in"), %%xmm5\n\t" \ + "movdqu (" #in "), %%xmm5\n\t" \ "pxor %%xmm5, %%xmm4\n\t" \ - "movdqu %%xmm4, ("#out")\n\t" \ + "movdqu %%xmm4, (" #out ")\n\t" \ "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ - "pxor %%xmm4, "VAR(XR)"\n\t" + "pxor %%xmm4, " VAR(XR) "\n\t" -#define _AESENC_GFMUL(in, out, H, X) \ - "movdqu "VAR(CTR1)", %%xmm4\n\t" \ - "movdqu %%xmm4, %%xmm5\n\t" \ - "pshufb %[BSWAP_EPI64], %%xmm4\n\t" \ - "paddd %[ONE], %%xmm5\n\t" \ - "pxor (%[KEY]), %%xmm4\n\t" \ - "movdqu %%xmm5, "VAR(CTR1)"\n\t" \ - "movdqa "#X", %%xmm6\n\t" \ - "pclmulqdq $0x10, "#H", %%xmm6\n\t" \ - "aesenc 16(%[KEY]), %%xmm4\n\t" \ - "aesenc 32(%[KEY]), %%xmm4\n\t" \ - "movdqa "#X", %%xmm7\n\t" \ - "pclmulqdq $0x01, "#H", %%xmm7\n\t" \ - "aesenc 48(%[KEY]), %%xmm4\n\t" \ - "aesenc 64(%[KEY]), %%xmm4\n\t" \ - "movdqa "#X", %%xmm8\n\t" \ - "pclmulqdq $0x00, "#H", %%xmm8\n\t" \ - "aesenc 80(%[KEY]), %%xmm4\n\t" \ - "movdqa "#X", %%xmm1\n\t" \ - "pclmulqdq $0x11, "#H", %%xmm1\n\t" \ - "aesenc 96(%[KEY]), %%xmm4\n\t" \ - "pxor %%xmm7, %%xmm6\n\t" \ - "movdqa %%xmm6, %%xmm2\n\t" \ - "psrldq $8, %%xmm6\n\t" \ - "pslldq $8, %%xmm2\n\t" \ - "aesenc 112(%[KEY]), %%xmm4\n\t" \ - "movdqa %%xmm1, %%xmm3\n\t" \ - "pxor %%xmm8, %%xmm2\n\t" \ - "pxor %%xmm6, %%xmm3\n\t" \ - "movdqa %[MOD2_128], %%xmm0\n\t" \ - "movdqa %%xmm2, %%xmm7\n\t" \ - "pclmulqdq $0x10, %%xmm0, %%xmm7\n\t" \ - "aesenc 128(%[KEY]), %%xmm4\n\t" \ - "pshufd $0x4e, %%xmm2, %%xmm6\n\t" \ - "pxor %%xmm7, %%xmm6\n\t" \ - "movdqa %%xmm6, %%xmm7\n\t" \ - "pclmulqdq $0x10, %%xmm0, %%xmm7\n\t" \ - "aesenc 144(%[KEY]), %%xmm4\n\t" \ - "pshufd $0x4e, %%xmm6, "VAR(XR)"\n\t" \ - "pxor %%xmm7, "VAR(XR)"\n\t" \ - "pxor %%xmm3, "VAR(XR)"\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "movdqu 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "aesenc %%xmm5, %%xmm4\n\t" \ - "aesenc 176(%[KEY]), %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "movdqu 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "aesenc %%xmm5, %%xmm4\n\t" \ - "aesenc 208(%[KEY]), %%xmm4\n\t" \ - "movdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "aesenclast %%xmm5, %%xmm4\n\t" \ - "movdqu ("#in"), %%xmm5\n\t" \ - "pxor %%xmm5, %%xmm4\n\t" \ - "movdqu %%xmm4, ("#out")\n\t" -#define AESENC_GFMUL(in, out, H, X) \ +#define _AESENC_GFMUL(in, out, H, X) \ + "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ + "movdqu %%xmm4, %%xmm5\n\t" \ + "pshufb %[BSWAP_EPI64], %%xmm4\n\t" \ + "paddd %[ONE], %%xmm5\n\t" \ + "pxor (%[KEY]), %%xmm4\n\t" \ + "movdqu %%xmm5, " VAR(CTR1) "\n\t" \ + "movdqa " #X ", %%xmm6\n\t" \ + "pclmulqdq $0x10, " #H ", %%xmm6\n\t" \ + "aesenc 16(%[KEY]), %%xmm4\n\t" \ + "aesenc 32(%[KEY]), %%xmm4\n\t" \ + "movdqa " #X ", %%xmm7\n\t" \ + "pclmulqdq $0x01, " #H ", %%xmm7\n\t" \ + "aesenc 48(%[KEY]), %%xmm4\n\t" \ + "aesenc 64(%[KEY]), %%xmm4\n\t" \ + "movdqa " #X ", %%xmm8\n\t" \ + "pclmulqdq $0x00, " #H ", %%xmm8\n\t" \ + "aesenc 80(%[KEY]), %%xmm4\n\t" \ + "movdqa " #X ", %%xmm1\n\t" \ + "pclmulqdq $0x11, " #H ", %%xmm1\n\t" \ + "aesenc 96(%[KEY]), %%xmm4\n\t" \ + "pxor %%xmm7, %%xmm6\n\t" \ + "movdqa %%xmm6, %%xmm2\n\t" \ + "psrldq $8, %%xmm6\n\t" \ + "pslldq $8, %%xmm2\n\t" \ + "aesenc 112(%[KEY]), %%xmm4\n\t" \ + "movdqa %%xmm1, %%xmm3\n\t" \ + "pxor %%xmm8, %%xmm2\n\t" \ + "pxor %%xmm6, %%xmm3\n\t" \ + "movdqa %[MOD2_128], %%xmm0\n\t" \ + "movdqa %%xmm2, %%xmm7\n\t" \ + "pclmulqdq $0x10, %%xmm0, %%xmm7\n\t" \ + "aesenc 128(%[KEY]), %%xmm4\n\t" \ + "pshufd $0x4e, %%xmm2, %%xmm6\n\t" \ + "pxor %%xmm7, %%xmm6\n\t" \ + "movdqa %%xmm6, %%xmm7\n\t" \ + "pclmulqdq $0x10, %%xmm0, %%xmm7\n\t" \ + "aesenc 144(%[KEY]), %%xmm4\n\t" \ + "pshufd $0x4e, %%xmm6, " VAR(XR) "\n\t" \ + "pxor %%xmm7, " VAR(XR) "\n\t" \ + "pxor %%xmm3, " VAR(XR) "\n\t" \ + "cmpl $11, %[nr]\n\t" \ + "movdqu 160(%[KEY]), %%xmm5\n\t" \ + "jl %=f\n\t" \ + "aesenc %%xmm5, %%xmm4\n\t" \ + "aesenc 176(%[KEY]), %%xmm4\n\t" \ + "cmpl $13, %[nr]\n\t" \ + "movdqu 192(%[KEY]), %%xmm5\n\t" \ + "jl %=f\n\t" \ + "aesenc %%xmm5, %%xmm4\n\t" \ + "aesenc 208(%[KEY]), %%xmm4\n\t" \ + "movdqa 224(%[KEY]), %%xmm5\n\t" \ + "%=:\n\t" \ + "aesenclast %%xmm5, %%xmm4\n\t" \ + "movdqu (" #in "), %%xmm5\n\t" \ + "pxor %%xmm5, %%xmm4\n\t" \ + "movdqu %%xmm4, (" #out ")\n\t" +#define AESENC_GFMUL(in, out, H, X) \ _AESENC_GFMUL(in, out, H, X) #define _GHASH_GFMUL_AVX(r, r2, a, b) \ @@ -4022,11 +4022,11 @@ while (0) "pxor %%xmm3, %%xmm1\n\t" \ "movdqa %%xmm1, %%xmm2\n\t" \ "movdqa %%xmm0, "#r2"\n\t" \ - "movdqa %%xmm3, "#r"\n\t" \ + "movdqa %%xmm3, " #r "\n\t" \ "pslldq $8, %%xmm2\n\t" \ "psrldq $8, %%xmm1\n\t" \ "pxor %%xmm2, "#r2"\n\t" \ - "pxor %%xmm1, "#r"\n\t" + "pxor %%xmm1, " #r "\n\t" #define GHASH_GFMUL_AVX(r, r2, a, b) \ _GHASH_GFMUL_AVX(r, r2, a, b) @@ -4044,28 +4044,28 @@ while (0) "pxor %%xmm3, %%xmm1\n\t" \ "movdqa %%xmm1, %%xmm2\n\t" \ "pxor %%xmm0, "#r2"\n\t" \ - "pxor %%xmm3, "#r"\n\t" \ + "pxor %%xmm3, " #r "\n\t" \ "pslldq $8, %%xmm2\n\t" \ "psrldq $8, %%xmm1\n\t" \ "pxor %%xmm2, "#r2"\n\t" \ - "pxor %%xmm1, "#r"\n\t" + "pxor %%xmm1, " #r "\n\t" #define GHASH_GFMUL_XOR_AVX(r, r2, a, b) \ _GHASH_GFMUL_XOR_AVX(r, r2, a, b) #define GHASH_MID_AVX(r, r2) \ "movdqa "#r2", %%xmm0\n\t" \ - "movdqa "#r", %%xmm1\n\t" \ + "movdqa " #r ", %%xmm1\n\t" \ "psrld $31, %%xmm0\n\t" \ "psrld $31, %%xmm1\n\t" \ "pslld $1, "#r2"\n\t" \ - "pslld $1, "#r"\n\t" \ + "pslld $1, " #r "\n\t" \ "movdqa %%xmm0, %%xmm2\n\t" \ "pslldq $4, %%xmm0\n\t" \ "psrldq $12, %%xmm2\n\t" \ "pslldq $4, %%xmm1\n\t" \ - "por %%xmm2, "#r"\n\t" \ + "por %%xmm2, " #r "\n\t" \ "por %%xmm0, "#r2"\n\t" \ - "por %%xmm1, "#r"\n\t" + "por %%xmm1, " #r "\n\t" #define _GHASH_GFMUL_RED_AVX(r, a, b) \ "pshufd $0x4e, "#a", %%xmm5\n\t" \ @@ -4080,11 +4080,11 @@ while (0) "pxor %%xmm4, %%xmm5\n\t" \ "pxor %%xmm7, %%xmm5\n\t" \ "movdqa %%xmm5, %%xmm6\n\t" \ - "movdqa %%xmm7, "#r"\n\t" \ + "movdqa %%xmm7, " #r "\n\t" \ "pslldq $8, %%xmm6\n\t" \ "psrldq $8, %%xmm5\n\t" \ "pxor %%xmm6, %%xmm4\n\t" \ - "pxor %%xmm5, "#r"\n\t" \ + "pxor %%xmm5, " #r "\n\t" \ "movdqa %%xmm4, %%xmm8\n\t" \ "movdqa %%xmm4, %%xmm9\n\t" \ "movdqa %%xmm4, %%xmm10\n\t" \ @@ -4107,7 +4107,7 @@ while (0) "pxor %%xmm5, %%xmm10\n\t" \ "pxor %%xmm9, %%xmm10\n\t" \ "pxor %%xmm4, %%xmm10\n\t" \ - "pxor %%xmm10, "#r"\n\t" + "pxor %%xmm10, " #r "\n\t" #define GHASH_GFMUL_RED_AVX(r, a, b) \ _GHASH_GFMUL_RED_AVX(r, a, b) @@ -4134,7 +4134,7 @@ while (0) "pxor %%xmm0, %%xmm2\n\t" \ "pxor %%xmm1, %%xmm2\n\t" \ "pxor "#r2", %%xmm2\n\t" \ - "pxor %%xmm2, "#r"\n\t" + "pxor %%xmm2, " #r "\n\t" #define GHASH_GFMUL_RED_XOR_AVX(r, r2, a, b) \ GHASH_GFMUL_XOR_AVX(r, r2, a, b) \ @@ -4154,65 +4154,65 @@ while (0) "pinsrd $3, %%ecx, %%xmm13\n\t" \ "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ "movdqu %%xmm13, %%xmm1\n\t" \ - "movdqa 0(%[KEY]), "VAR(HR)"\n\t" \ - "pxor "VAR(HR)", %%xmm1\n\t" \ + "movdqa 0(%[KEY]), " VAR(HR) "\n\t" \ + "pxor " VAR(HR) ", %%xmm1\n\t" \ "movdqa 16(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 32(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 48(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 64(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 80(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 96(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 112(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 128(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 144(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "cmpl $11, %[nr]\n\t" \ "movdqa 160(%[KEY]), %%xmm12\n\t" \ "jl 31f\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqa 176(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "cmpl $13, %[nr]\n\t" \ "movdqa 192(%[KEY]), %%xmm12\n\t" \ "jl 31f\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqu 208(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, "VAR(HR)"\n\t" \ + "aesenc %%xmm12, " VAR(HR) "\n\t" \ "aesenc %%xmm12, %%xmm1\n\t" \ "movdqu 224(%[KEY]), %%xmm12\n\t" \ "31:\n\t" \ - "aesenclast %%xmm12, "VAR(HR)"\n\t" \ + "aesenclast %%xmm12, " VAR(HR) "\n\t" \ "aesenclast %%xmm12, %%xmm1\n\t" \ - "pshufb %[BSWAP_MASK], "VAR(HR)"\n\t" \ - "movdqu %%xmm1, "VAR(TR)"\n\t" \ + "pshufb %[BSWAP_MASK], " VAR(HR) "\n\t" \ + "movdqu %%xmm1, " VAR(TR) "\n\t" \ "jmp 39f\n\t" #define CALC_IV() \ "# Calculate values when IV is not 12 bytes\n\t" \ "# H = Encrypt X(=0)\n\t" \ - "movdqa 0(%[KEY]), "VAR(HR)"\n\t" \ + "movdqa 0(%[KEY]), " VAR(HR) "\n\t" \ AESENC_AVX(HR) \ - "pshufb %[BSWAP_MASK], "VAR(HR)"\n\t" \ + "pshufb %[BSWAP_MASK], " VAR(HR) "\n\t" \ "# Calc counter\n\t" \ "# Initialization vector\n\t" \ "cmpl $0, %%edx\n\t" \ @@ -4264,7 +4264,7 @@ while (0) "movdqa 0(%[KEY]), %%xmm4\n\t" \ "pxor %%xmm13, %%xmm4\n\t" \ AESENC_AVX(%%xmm4) \ - "movdqu %%xmm4, "VAR(TR)"\n\t" + "movdqu %%xmm4, " VAR(TR) "\n\t" #define CALC_AAD() \ "# Additional authentication data\n\t" \ @@ -4280,7 +4280,7 @@ while (0) "23:\n\t" \ "movdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ - "pxor %%xmm4, "VAR(XR)"\n\t" \ + "pxor %%xmm4, " VAR(XR) "\n\t" \ GHASH_FULL_AVX(XR, %%xmm12, XR, HR) \ "addl $16, %%ecx\n\t" \ "cmpl %%edx, %%ecx\n\t" \ @@ -4304,148 +4304,148 @@ while (0) "movdqu (%%rsp), %%xmm4\n\t" \ "addq $16, %%rsp\n\t" \ "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ - "pxor %%xmm4, "VAR(XR)"\n\t" \ + "pxor %%xmm4, " VAR(XR) "\n\t" \ GHASH_FULL_AVX(XR, %%xmm12, XR, HR) \ "\n" \ "25:\n\t" -#define CALC_HT_8_AVX() \ - "movdqa "VAR(XR)", %%xmm2\n\t" \ - "# H ^ 1\n\t" \ - "movdqu "VAR(HR)", 0("VAR(HTR)")\n\t" \ - "# H ^ 2\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm0, HR, HR) \ - "movdqu %%xmm0 , 16("VAR(HTR)")\n\t" \ - "# H ^ 3\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm1, HR, %%xmm0) \ - "movdqu %%xmm1 , 32("VAR(HTR)")\n\t" \ - "# H ^ 4\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm3, %%xmm0, %%xmm0) \ - "movdqu %%xmm3 , 48("VAR(HTR)")\n\t" \ - "# H ^ 5\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm0, %%xmm1) \ - "movdqu %%xmm12, 64("VAR(HTR)")\n\t" \ - "# H ^ 6\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm1) \ - "movdqu %%xmm12, 80("VAR(HTR)")\n\t" \ - "# H ^ 7\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm3) \ - "movdqu %%xmm12, 96("VAR(HTR)")\n\t" \ - "# H ^ 8\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm3, %%xmm3) \ - "movdqu %%xmm12, 112("VAR(HTR)")\n\t" +#define CALC_HT_8_AVX() \ + "movdqa " VAR(XR) ", %%xmm2\n\t" \ + "# H ^ 1\n\t" \ + "movdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ + "# H ^ 2\n\t" \ + GHASH_GFMUL_RED_AVX(%%xmm0, HR, HR) \ + "movdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ + "# H ^ 3\n\t" \ + GHASH_GFMUL_RED_AVX(%%xmm1, HR, %%xmm0) \ + "movdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ + "# H ^ 4\n\t" \ + GHASH_GFMUL_RED_AVX(%%xmm3, %%xmm0, %%xmm0) \ + "movdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ + "# H ^ 5\n\t" \ + GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm0, %%xmm1) \ + "movdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ + "# H ^ 6\n\t" \ + GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm1) \ + "movdqu %%xmm12, 80(" VAR(HTR) ")\n\t" \ + "# H ^ 7\n\t" \ + GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm3) \ + "movdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ + "# H ^ 8\n\t" \ + GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm3, %%xmm3) \ + "movdqu %%xmm12, 112(" VAR(HTR) ")\n\t" -#define AESENC_128_GHASH_AVX(src, o) \ - "leaq (%[in],"VAR(KR64)",1), %%rcx\n\t" \ - "leaq (%[out],"VAR(KR64)",1), %%rdx\n\t" \ - /* src is either %%rcx or %%rdx */ \ - AESENC_CTR() \ - AESENC_XOR() \ - AESENC_PCLMUL_1(src, 16, o-128, 112) \ - AESENC_PCLMUL_N(src, 32, o-112, 96) \ - AESENC_PCLMUL_N(src, 48, o -96, 80) \ - AESENC_PCLMUL_N(src, 64, o -80, 64) \ - AESENC_PCLMUL_N(src, 80, o -64, 48) \ - AESENC_PCLMUL_N(src, 96, o -48, 32) \ - AESENC_PCLMUL_N(src, 112, o -32, 16) \ - AESENC_PCLMUL_N(src, 128, o -16, 0) \ - AESENC_PCLMUL_L(144) \ - "cmpl $11, %[nr]\n\t" \ - "movdqa 160(%[KEY]), %%xmm12\n\t" \ - "jl 4f\n\t" \ - AESENC() \ - AESENC_SET(176) \ - "cmpl $13, %[nr]\n\t" \ - "movdqa 192(%[KEY]), %%xmm12\n\t" \ - "jl 4f\n\t" \ - AESENC() \ - AESENC_SET(208) \ - "movdqa 224(%[KEY]), %%xmm12\n\t" \ - "\n" \ -"4:\n\t" \ +#define AESENC_128_GHASH_AVX(src, o) \ + "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ + "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ + /* src is either %%rcx or %%rdx */ \ + AESENC_CTR() \ + AESENC_XOR() \ + AESENC_PCLMUL_1(src, 16, o-128, 112) \ + AESENC_PCLMUL_N(src, 32, o-112, 96) \ + AESENC_PCLMUL_N(src, 48, o -96, 80) \ + AESENC_PCLMUL_N(src, 64, o -80, 64) \ + AESENC_PCLMUL_N(src, 80, o -64, 48) \ + AESENC_PCLMUL_N(src, 96, o -48, 32) \ + AESENC_PCLMUL_N(src, 112, o -32, 16) \ + AESENC_PCLMUL_N(src, 128, o -16, 0) \ + AESENC_PCLMUL_L(144) \ + "cmpl $11, %[nr]\n\t" \ + "movdqa 160(%[KEY]), %%xmm12\n\t" \ + "jl 4f\n\t" \ + AESENC() \ + AESENC_SET(176) \ + "cmpl $13, %[nr]\n\t" \ + "movdqa 192(%[KEY]), %%xmm12\n\t" \ + "jl 4f\n\t" \ + AESENC() \ + AESENC_SET(208) \ + "movdqa 224(%[KEY]), %%xmm12\n\t" \ + "\n" \ +"4:\n\t" \ AESENC_LAST(%%rcx, %%rdx) -#define AESENC_LAST15_ENC_AVX() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "movdqu "VAR(CTR1)", %%xmm13\n\t" \ - "pshufb %[BSWAP_EPI64], %%xmm13\n\t" \ - "pxor 0(%[KEY]), %%xmm13\n\t" \ - AESENC_AVX(%%xmm13) \ - "subq $16, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "movdqu %%xmm13, (%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in],"VAR(KR64)",1), %%r13d\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out],"VAR(KR64)",1)\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl "VAR(KR)"\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, "VAR(KR)"\n\t" \ - "jl 51b\n\t" \ - "xorq %%r13, %%r13\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "je 53f\n\t" \ - "\n" \ - "52:\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl %%ecx\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "jl 52b\n\t" \ - "53:\n\t" \ - "movdqu (%%rsp), %%xmm13\n\t" \ - "addq $16, %%rsp\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ - "pxor %%xmm13, "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX(XR, HR, XR) \ +#define AESENC_LAST15_ENC_AVX() \ + "movl %[nbytes], %%ecx\n\t" \ + "movl %%ecx, %%edx\n\t" \ + "andl $0x0f, %%ecx\n\t" \ + "jz 55f\n\t" \ + "movdqu " VAR(CTR1) ", %%xmm13\n\t" \ + "pshufb %[BSWAP_EPI64], %%xmm13\n\t" \ + "pxor 0(%[KEY]), %%xmm13\n\t" \ + AESENC_AVX(%%xmm13) \ + "subq $16, %%rsp\n\t" \ + "xorl %%ecx, %%ecx\n\t" \ + "movdqu %%xmm13, (%%rsp)\n\t" \ + "\n" \ + "51:\n\t" \ + "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ + "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ + "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ + "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ + "incl " VAR(KR) "\n\t" \ + "incl %%ecx\n\t" \ + "cmpl %%edx, " VAR(KR) "\n\t" \ + "jl 51b\n\t" \ + "xorq %%r13, %%r13\n\t" \ + "cmpl $16, %%ecx\n\t" \ + "je 53f\n\t" \ + "\n" \ + "52:\n\t" \ + "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ + "incl %%ecx\n\t" \ + "cmpl $16, %%ecx\n\t" \ + "jl 52b\n\t" \ + "53:\n\t" \ + "movdqu (%%rsp), %%xmm13\n\t" \ + "addq $16, %%rsp\n\t" \ + "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ + "pxor %%xmm13, " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX(XR, HR, XR) \ -#define AESENC_LAST15_DEC_AVX() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "movdqu "VAR(CTR1)", %%xmm13\n\t" \ - "pshufb %[BSWAP_EPI64], %%xmm13\n\t" \ - "pxor 0(%[KEY]), %%xmm13\n\t" \ - AESENC_AVX(%%xmm13) \ - "subq $32, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "movdqu %%xmm13, (%%rsp)\n\t" \ - "pxor %%xmm0, %%xmm0\n\t" \ - "movdqu %%xmm0, 16(%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in],"VAR(KR64)",1), %%r13d\n\t" \ - "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out],"VAR(KR64)",1)\n\t" \ - "incl "VAR(KR)"\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, "VAR(KR)"\n\t" \ - "jl 51b\n\t" \ - "53:\n\t" \ - "movdqu 16(%%rsp), %%xmm13\n\t" \ - "addq $32, %%rsp\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ - "pxor %%xmm13, "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX(XR, HR, XR) \ +#define AESENC_LAST15_DEC_AVX() \ + "movl %[nbytes], %%ecx\n\t" \ + "movl %%ecx, %%edx\n\t" \ + "andl $0x0f, %%ecx\n\t" \ + "jz 55f\n\t" \ + "movdqu " VAR(CTR1) ", %%xmm13\n\t" \ + "pshufb %[BSWAP_EPI64], %%xmm13\n\t" \ + "pxor 0(%[KEY]), %%xmm13\n\t" \ + AESENC_AVX(%%xmm13) \ + "subq $32, %%rsp\n\t" \ + "xorl %%ecx, %%ecx\n\t" \ + "movdqu %%xmm13, (%%rsp)\n\t" \ + "pxor %%xmm0, %%xmm0\n\t" \ + "movdqu %%xmm0, 16(%%rsp)\n\t" \ + "\n" \ + "51:\n\t" \ + "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ + "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ + "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ + "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ + "incl " VAR(KR) "\n\t" \ + "incl %%ecx\n\t" \ + "cmpl %%edx, " VAR(KR) "\n\t" \ + "jl 51b\n\t" \ + "53:\n\t" \ + "movdqu 16(%%rsp), %%xmm13\n\t" \ + "addq $32, %%rsp\n\t" \ + "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ + "pxor %%xmm13, " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX(XR, HR, XR) \ -#define CALC_TAG() \ - "movl %[nbytes], %%edx\n\t" \ - "movl %[abytes], %%ecx\n\t" \ - "shlq $3, %%rdx\n\t" \ - "shlq $3, %%rcx\n\t" \ - "pinsrq $0, %%rdx, %%xmm0\n\t" \ - "pinsrq $1, %%rcx, %%xmm0\n\t" \ - "pxor %%xmm0, "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX(XR, HR, XR) \ - "pshufb %[BSWAP_MASK], "VAR(XR)"\n\t" \ - "movdqu "VAR(TR)", %%xmm0\n\t" \ - "pxor "VAR(XR)", %%xmm0\n\t" \ +#define CALC_TAG() \ + "movl %[nbytes], %%edx\n\t" \ + "movl %[abytes], %%ecx\n\t" \ + "shlq $3, %%rdx\n\t" \ + "shlq $3, %%rcx\n\t" \ + "pinsrq $0, %%rdx, %%xmm0\n\t" \ + "pinsrq $1, %%rcx, %%xmm0\n\t" \ + "pxor %%xmm0, " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX(XR, HR, XR) \ + "pshufb %[BSWAP_MASK], " VAR(XR) "\n\t" \ + "movdqu " VAR(TR) ", %%xmm0\n\t" \ + "pxor " VAR(XR) ", %%xmm0\n\t" \ #define STORE_TAG() \ "cmpl $16, %[tbytes]\n\t" \ @@ -4509,10 +4509,10 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, register unsigned int ivLen asm("ebx") = ibytes; __asm__ __volatile__ ( - "subq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" /* Counter is xmm13 */ "pxor %%xmm13, %%xmm13\n\t" - "pxor "VAR(XR)", "VAR(XR)"\n\t" + "pxor " VAR(XR) ", " VAR(XR) "\n\t" "movl %[ibytes], %%edx\n\t" "cmpl $12, %%edx\n\t" "jne 35f\n\t" @@ -4527,20 +4527,20 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "# Calculate counter and H\n\t" "pshufb %[BSWAP_EPI64], %%xmm13\n\t" - "movdqa "VAR(HR)", %%xmm5\n\t" + "movdqa " VAR(HR) ", %%xmm5\n\t" "paddd %[ONE], %%xmm13\n\t" - "movdqa "VAR(HR)", %%xmm4\n\t" - "movdqu %%xmm13, "VAR(CTR1)"\n\t" + "movdqa " VAR(HR) ", %%xmm4\n\t" + "movdqu %%xmm13, " VAR(CTR1) "\n\t" "psrlq $63, %%xmm5\n\t" "psllq $1, %%xmm4\n\t" "pslldq $8, %%xmm5\n\t" "por %%xmm5, %%xmm4\n\t" - "pshufd $0xff, "VAR(HR)", "VAR(HR)"\n\t" - "psrad $31, "VAR(HR)"\n\t" - "pand %[MOD2_128], "VAR(HR)"\n\t" - "pxor %%xmm4, "VAR(HR)"\n\t" + "pshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" + "psrad $31, " VAR(HR) "\n\t" + "pand %[MOD2_128], " VAR(HR) "\n\t" + "pxor %%xmm4, " VAR(HR) "\n\t" - "xorl "VAR(KR)", "VAR(KR)"\n\t" + "xorl " VAR(KR) ", " VAR(KR) "\n\t" #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) "cmpl $128, %[nbytes]\n\t" @@ -4578,15 +4578,15 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, AESENC_LAST(%[in], %[out]) "cmpl $128, %%r13d\n\t" - "movl $128, "VAR(KR)"\n\t" + "movl $128, " VAR(KR) "\n\t" "jle 2f\n\t" "# More 128 bytes of input\n\t" "\n" "3:\n\t" AESENC_128_GHASH_AVX(%%rdx, 0) - "addl $128, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $128, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 3b\n\t" "\n" "2:\n\t" @@ -4601,51 +4601,51 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "pshufb %%xmm13, %%xmm10\n\t" "pshufb %%xmm13, %%xmm11\n\t" - "movdqu 112("VAR(HTR)"), %%xmm12\n\t" + "movdqu 112(" VAR(HTR) "), %%xmm12\n\t" GHASH_GFMUL_AVX(XR, %%xmm13, %%xmm4, %%xmm12) - "movdqu 96("VAR(HTR)"), %%xmm12\n\t" + "movdqu 96(" VAR(HTR) "), %%xmm12\n\t" GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm5, %%xmm12) - "movdqu 80("VAR(HTR)"), %%xmm12\n\t" + "movdqu 80(" VAR(HTR) "), %%xmm12\n\t" GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm6, %%xmm12) - "movdqu 64("VAR(HTR)"), %%xmm12\n\t" + "movdqu 64(" VAR(HTR) "), %%xmm12\n\t" GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm7, %%xmm12) - "movdqu 48("VAR(HTR)"), %%xmm12\n\t" + "movdqu 48(" VAR(HTR) "), %%xmm12\n\t" GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm8, %%xmm12) - "movdqu 32("VAR(HTR)"), %%xmm12\n\t" + "movdqu 32(" VAR(HTR) "), %%xmm12\n\t" GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm9, %%xmm12) - "movdqu 16("VAR(HTR)"), %%xmm12\n\t" + "movdqu 16(" VAR(HTR) "), %%xmm12\n\t" GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm10, %%xmm12) - "movdqu ("VAR(HTR)"), %%xmm12\n\t" + "movdqu (" VAR(HTR) "), %%xmm12\n\t" GHASH_GFMUL_RED_XOR_AVX(XR, %%xmm13, %%xmm11, %%xmm12) - "movdqu 0("VAR(HTR)"), "VAR(HR)"\n\t" + "movdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" "\n" "5:\n\t" "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, "VAR(KR)"\n\t" + "cmpl %%edx, " VAR(KR) "\n\t" "jge 55f\n\t" #endif "movl %[nbytes], %%r13d\n\t" "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 14f\n\t" - "leaq (%[in],"VAR(KR64)",1), %%rcx\n\t" - "leaq (%[out],"VAR(KR64)",1), %%rdx\n\t" + "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" + "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" AESENC_BLOCK(%%rcx, %%rdx) - "addl $16, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $16, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 13f\n\t" "\n" "12:\n\t" - "leaq (%[in],"VAR(KR64)",1), %%rcx\n\t" - "leaq (%[out],"VAR(KR64)",1), %%rdx\n\t" + "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" + "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" AESENC_GFMUL(%%rcx, %%rdx, HR, XR) "pshufb %[BSWAP_MASK], %%xmm4\n\t" - "pxor %%xmm4, "VAR(XR)"\n\t" - "addl $16, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "pxor %%xmm4, " VAR(XR) "\n\t" + "addl $16, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 12b\n\t" "\n" "13:\n\t" @@ -4659,7 +4659,7 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, CALC_TAG() STORE_TAG() - "addq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" : : [KEY] "r" (key), @@ -4700,7 +4700,7 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, VAESENC() #define VAESENC_CTR() \ - "vmovdqu "VAR(CTR1)", %%xmm0\n\t" \ + "vmovdqu " VAR(CTR1) ", %%xmm0\n\t" \ "vmovdqa %[BSWAP_EPI64], %%xmm1\n\t" \ "vpshufb %%xmm1, %%xmm0, %%xmm4\n\t" \ "vpaddd %[ONE], %%xmm0, %%xmm5\n\t" \ @@ -4721,7 +4721,7 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, #define VAESENC_XOR() \ "vmovdqa (%[KEY]), %%xmm12\n\t" \ - "vmovdqu %%xmm0, "VAR(CTR1)"\n\t" \ + "vmovdqu %%xmm0, " VAR(CTR1) "\n\t" \ "vpxor %%xmm12, %%xmm4, %%xmm4\n\t" \ "vpxor %%xmm12, %%xmm5, %%xmm5\n\t" \ "vpxor %%xmm12, %%xmm6, %%xmm6\n\t" \ @@ -4759,53 +4759,53 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, VAESENC_LAST(%[in], %[out]) /* Encrypt and carry-less multiply for AVX1. */ -#define VAESENC_PCLMUL_1(src, o1, o2, o3) \ - "vmovdqu "#o3"("VAR(HTR)"), %%xmm12\n\t" \ - "vmovdqu "#o2"("#src"), %%xmm0\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm2, %%xmm0, %%xmm0\n\t" \ - "vpshufd $0x4e, %%xmm12, %%xmm1\n\t" \ - "vpshufd $0x4e, %%xmm0, %%xmm14\n\t" \ - "vpxor %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ - "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm3\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm5, %%xmm5\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm2\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm7, %%xmm7\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm8, %%xmm8\n\t" \ - "vpclmulqdq $0x00, %%xmm14, %%xmm1, %%xmm1\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm9, %%xmm9\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm10, %%xmm10\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm11, %%xmm11\n\t" \ - "vpxor %%xmm2, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ +#define VAESENC_PCLMUL_1(src, o1, o2, o3) \ + "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ + "vmovdqu " #o2 "(" #src "), %%xmm0\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm0, %%xmm0\n\t" \ + "vpxor %%xmm2, %%xmm0, %%xmm0\n\t" \ + "vpshufd $0x4e, %%xmm12, %%xmm1\n\t" \ + "vpshufd $0x4e, %%xmm0, %%xmm14\n\t" \ + "vpxor %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ + "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm3\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t" \ + "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm2\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t" \ + "vpclmulqdq $0x00, %%xmm14, %%xmm1, %%xmm1\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t" \ + "vpxor %%xmm2, %%xmm1, %%xmm1\n\t" \ + "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ -#define VAESENC_PCLMUL_N(src, o1, o2, o3) \ - "vmovdqu "#o3"("VAR(HTR)"), %%xmm12\n\t" \ - "vmovdqu "#o2"("#src"), %%xmm0\n\t" \ - "vpshufd $0x4e, %%xmm12, %%xmm13\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm0, %%xmm0\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm12, %%xmm13, %%xmm13\n\t" \ - "vpshufd $0x4e, %%xmm0, %%xmm14\n\t" \ - "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ - "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm15\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm5, %%xmm5\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm12\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm7, %%xmm7\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm8, %%xmm8\n\t" \ - "vpclmulqdq $0x00, %%xmm14, %%xmm13, %%xmm13\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm9, %%xmm9\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm10, %%xmm10\n\t" \ - "vaesenc "#o1"(%[KEY]), %%xmm11, %%xmm11\n\t" \ - "vpxor %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm12, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm15, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm15, %%xmm3, %%xmm3\n\t" \ - "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ +#define VAESENC_PCLMUL_N(src, o1, o2, o3) \ + "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ + "vmovdqu " #o2 "(" #src "), %%xmm0\n\t" \ + "vpshufd $0x4e, %%xmm12, %%xmm13\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm0, %%xmm0\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm12, %%xmm13, %%xmm13\n\t" \ + "vpshufd $0x4e, %%xmm0, %%xmm14\n\t" \ + "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ + "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm15\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t" \ + "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm12\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t" \ + "vpclmulqdq $0x00, %%xmm14, %%xmm13, %%xmm13\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t" \ + "vaesenc " #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t" \ + "vpxor %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vpxor %%xmm12, %%xmm2, %%xmm2\n\t" \ + "vpxor %%xmm15, %%xmm1, %%xmm1\n\t" \ + "vpxor %%xmm15, %%xmm3, %%xmm3\n\t" \ + "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ #define VAESENC_PCLMUL_L(o) \ "vpslldq $8, %%xmm1, %%xmm14\n\t" \ @@ -4842,120 +4842,120 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, #define VAESENC_LAST(in, out) \ "vaesenclast %%xmm12, %%xmm4, %%xmm4\n\t" \ "vaesenclast %%xmm12, %%xmm5, %%xmm5\n\t" \ - "vmovdqu ("#in"), %%xmm0\n\t" \ - "vmovdqu 16("#in"), %%xmm1\n\t" \ + "vmovdqu (" #in "), %%xmm0\n\t" \ + "vmovdqu 16(" #in "), %%xmm1\n\t" \ "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ "vpxor %%xmm1, %%xmm5, %%xmm5\n\t" \ - "vmovdqu %%xmm4, ("#out")\n\t" \ - "vmovdqu %%xmm5, 16("#out")\n\t" \ + "vmovdqu %%xmm4, (" #out ")\n\t" \ + "vmovdqu %%xmm5, 16(" #out ")\n\t" \ "vaesenclast %%xmm12, %%xmm6, %%xmm6\n\t" \ "vaesenclast %%xmm12, %%xmm7, %%xmm7\n\t" \ - "vmovdqu 32("#in"), %%xmm0\n\t" \ - "vmovdqu 48("#in"), %%xmm1\n\t" \ + "vmovdqu 32(" #in "), %%xmm0\n\t" \ + "vmovdqu 48(" #in "), %%xmm1\n\t" \ "vpxor %%xmm0, %%xmm6, %%xmm6\n\t" \ "vpxor %%xmm1, %%xmm7, %%xmm7\n\t" \ - "vmovdqu %%xmm6, 32("#out")\n\t" \ - "vmovdqu %%xmm7, 48("#out")\n\t" \ + "vmovdqu %%xmm6, 32(" #out ")\n\t" \ + "vmovdqu %%xmm7, 48(" #out ")\n\t" \ "vaesenclast %%xmm12, %%xmm8, %%xmm8\n\t" \ "vaesenclast %%xmm12, %%xmm9, %%xmm9\n\t" \ - "vmovdqu 64("#in"), %%xmm0\n\t" \ - "vmovdqu 80("#in"), %%xmm1\n\t" \ + "vmovdqu 64(" #in "), %%xmm0\n\t" \ + "vmovdqu 80(" #in "), %%xmm1\n\t" \ "vpxor %%xmm0, %%xmm8, %%xmm8\n\t" \ "vpxor %%xmm1, %%xmm9, %%xmm9\n\t" \ - "vmovdqu %%xmm8, 64("#out")\n\t" \ - "vmovdqu %%xmm9, 80("#out")\n\t" \ + "vmovdqu %%xmm8, 64(" #out ")\n\t" \ + "vmovdqu %%xmm9, 80(" #out ")\n\t" \ "vaesenclast %%xmm12, %%xmm10, %%xmm10\n\t" \ "vaesenclast %%xmm12, %%xmm11, %%xmm11\n\t" \ - "vmovdqu 96("#in"), %%xmm0\n\t" \ - "vmovdqu 112("#in"), %%xmm1\n\t" \ + "vmovdqu 96(" #in "), %%xmm0\n\t" \ + "vmovdqu 112(" #in "), %%xmm1\n\t" \ "vpxor %%xmm0, %%xmm10, %%xmm10\n\t" \ "vpxor %%xmm1, %%xmm11, %%xmm11\n\t" \ - "vmovdqu %%xmm10, 96("#out")\n\t" \ - "vmovdqu %%xmm11, 112("#out")\n\t" + "vmovdqu %%xmm10, 96(" #out ")\n\t" \ + "vmovdqu %%xmm11, 112(" #out ")\n\t" -#define VAESENC_BLOCK() \ - "vmovdqu "VAR(CTR1)", %%xmm5\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ - "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ - "vmovdqu %%xmm5, "VAR(CTR1)"\n\t" \ - "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu (%[in],"VAR(KR64)",1), %%xmm5\n\t" \ - "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu %%xmm4, (%[out],"VAR(KR64)",1)\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, "VAR(XR)", "VAR(XR)"\n\t" +#define VAESENC_BLOCK() \ + "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ + "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ + "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ + "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ + "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "cmpl $11, %[nr]\n\t" \ + "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ + "jl %=f\n\t" \ + "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "cmpl $13, %[nr]\n\t" \ + "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ + "jl %=f\n\t" \ + "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ + "%=:\n\t" \ + "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm5\n\t" \ + "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" -#define _VAESENC_GFMUL(in, H, X) \ - "vmovdqu "VAR(CTR1)", %%xmm5\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ - "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ - "vmovdqu %%xmm5, "VAR(CTR1)"\n\t" \ - "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpclmulqdq $0x10, "#H", "#X", %%xmm6\n\t" \ - "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpclmulqdq $0x01, "#H", "#X", %%xmm7\n\t" \ - "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpclmulqdq $0x00, "#H", "#X", %%xmm8\n\t" \ - "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpclmulqdq $0x11, "#H", "#X", %%xmm1\n\t" \ - "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ - "vpslldq $8, %%xmm6, %%xmm2\n\t" \ - "vpsrldq $8, %%xmm6, %%xmm6\n\t" \ - "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm8, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm6, %%xmm1, %%xmm3\n\t" \ - "vmovdqa %[MOD2_128], %%xmm0\n\t" \ - "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm7\n\t" \ - "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpshufd $0x4e, %%xmm2, %%xmm6\n\t" \ - "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, %%xmm0, %%xmm6, %%xmm7\n\t" \ - "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm3, %%xmm6, "VAR(XR)"\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl 1f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl 1f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "1:\n\t" \ - "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu "#in", %%xmm0\n\t" \ - "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ - "vmovdqu %%xmm4, (%[out],"VAR(KR64)",1)\n\t" -#define VAESENC_GFMUL(in, H, X) \ +#define _VAESENC_GFMUL(in, H, X) \ + "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ + "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ + "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ + "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ + "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpclmulqdq $0x10, " #H ", " #X ", %%xmm6\n\t" \ + "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpclmulqdq $0x01, " #H ", " #X ", %%xmm7\n\t" \ + "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpclmulqdq $0x00, " #H ", " #X ", %%xmm8\n\t" \ + "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpclmulqdq $0x11, " #H ", " #X ", %%xmm1\n\t" \ + "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ + "vpslldq $8, %%xmm6, %%xmm2\n\t" \ + "vpsrldq $8, %%xmm6, %%xmm6\n\t" \ + "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm8, %%xmm2, %%xmm2\n\t" \ + "vpxor %%xmm6, %%xmm1, %%xmm3\n\t" \ + "vmovdqa %[MOD2_128], %%xmm0\n\t" \ + "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm7\n\t" \ + "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpshufd $0x4e, %%xmm2, %%xmm6\n\t" \ + "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ + "vpclmulqdq $0x10, %%xmm0, %%xmm6, %%xmm7\n\t" \ + "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ + "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ + "vpxor %%xmm3, %%xmm6, " VAR(XR) "\n\t" \ + "cmpl $11, %[nr]\n\t" \ + "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ + "jl 1f\n\t" \ + "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "cmpl $13, %[nr]\n\t" \ + "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ + "jl 1f\n\t" \ + "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ + "1:\n\t" \ + "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vmovdqu " #in ", %%xmm0\n\t" \ + "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ + "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" +#define VAESENC_GFMUL(in, H, X) \ _VAESENC_GFMUL(in, H, X) @@ -4970,11 +4970,11 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ "vmovdqa %%xmm0, "#r2"\n\t" \ - "vmovdqa %%xmm3, "#r"\n\t" \ + "vmovdqa %%xmm3, " #r "\n\t" \ "vpslldq $8, %%xmm1, %%xmm2\n\t" \ "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm2, "#r2", "#r2"\n\t" \ - "vpxor %%xmm1, "#r", "#r"\n\t" + "vpxor %%xmm1, " #r ", " #r "\n\t" #define GHASH_GFMUL_AVX1(r, r2, a, b) \ _GHASH_GFMUL_AVX1(r, r2, a, b) @@ -4989,25 +4989,25 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm0, "#r2", "#r2"\n\t" \ - "vpxor %%xmm3, "#r", "#r"\n\t" \ + "vpxor %%xmm3, " #r ", " #r "\n\t" \ "vpslldq $8, %%xmm1, %%xmm2\n\t" \ "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm2, "#r2", "#r2"\n\t" \ - "vpxor %%xmm1, "#r", "#r"\n\t" + "vpxor %%xmm1, " #r ", " #r "\n\t" #define GHASH_GFMUL_XOR_AVX1(r, r2, a, b) \ _GHASH_GFMUL_XOR_AVX1(r, r2, a, b) -#define GHASH_MID_AVX1(r, r2) \ - "vpsrld $31, "#r2", %%xmm0\n\t" \ - "vpsrld $31, "#r", %%xmm1\n\t" \ - "vpslld $1, "#r2", "#r2"\n\t" \ - "vpslld $1, "#r", "#r"\n\t" \ - "vpsrldq $12, %%xmm0, %%xmm2\n\t" \ - "vpslldq $4, %%xmm0, %%xmm0\n\t" \ - "vpslldq $4, %%xmm1, %%xmm1\n\t" \ - "vpor %%xmm2, "#r", "#r"\n\t" \ - "vpor %%xmm0, "#r2", "#r2"\n\t" \ - "vpor %%xmm1, "#r", "#r"\n\t" +#define GHASH_MID_AVX1(r, r2) \ + "vpsrld $31, "#r2", %%xmm0\n\t" \ + "vpsrld $31, " #r ", %%xmm1\n\t" \ + "vpslld $1, "#r2", "#r2"\n\t" \ + "vpslld $1, " #r ", " #r "\n\t" \ + "vpsrldq $12, %%xmm0, %%xmm2\n\t" \ + "vpslldq $4, %%xmm0, %%xmm0\n\t" \ + "vpslldq $4, %%xmm1, %%xmm1\n\t" \ + "vpor %%xmm2, " #r ", " #r "\n\t" \ + "vpor %%xmm0, "#r2", "#r2"\n\t" \ + "vpor %%xmm1, " #r ", " #r "\n\t" #define _GHASH_GFMUL_RED_AVX1(r, a, b) \ "vpshufd $0x4e, "#a", %%xmm5\n\t" \ @@ -5022,7 +5022,7 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "vpslldq $8, %%xmm5, %%xmm6\n\t" \ "vpsrldq $8, %%xmm5, %%xmm5\n\t" \ "vpxor %%xmm6, %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm5, %%xmm7, "#r"\n\t" \ + "vpxor %%xmm5, %%xmm7, " #r "\n\t" \ "vpslld $31, %%xmm4, %%xmm8\n\t" \ "vpslld $30, %%xmm4, %%xmm9\n\t" \ "vpslld $25, %%xmm4, %%xmm10\n\t" \ @@ -5038,13 +5038,13 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "vpxor %%xmm5, %%xmm10, %%xmm10\n\t" \ "vpxor %%xmm9, %%xmm10, %%xmm10\n\t" \ "vpxor %%xmm4, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm10, "#r", "#r"\n\t" + "vpxor %%xmm10, " #r ", " #r "\n\t" #define GHASH_GFMUL_RED_AVX1(r, a, b) \ _GHASH_GFMUL_RED_AVX1(r, a, b) #define _GHASH_GFSQR_RED_AVX1(r, a) \ "vpclmulqdq $0x00, "#a", "#a", %%xmm4\n\t" \ - "vpclmulqdq $0x11, "#a", "#a", "#r"\n\t" \ + "vpclmulqdq $0x11, "#a", "#a", " #r "\n\t" \ "vpslld $31, %%xmm4, %%xmm8\n\t" \ "vpslld $30, %%xmm4, %%xmm9\n\t" \ "vpslld $25, %%xmm4, %%xmm10\n\t" \ @@ -5060,7 +5060,7 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "vpxor %%xmm5, %%xmm10, %%xmm10\n\t" \ "vpxor %%xmm9, %%xmm10, %%xmm10\n\t" \ "vpxor %%xmm4, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm10, "#r", "#r"\n\t" + "vpxor %%xmm10, " #r ", " #r "\n\t" #define GHASH_GFSQR_RED_AVX1(r, a) \ _GHASH_GFSQR_RED_AVX1(r, a) @@ -5081,7 +5081,7 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "vpxor %%xmm0, %%xmm2, %%xmm2\n\t" \ "vpxor %%xmm1, %%xmm2, %%xmm2\n\t" \ "vpxor "#r2", %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm2, "#r", "#r"\n\t" + "vpxor %%xmm2, " #r ", " #r "\n\t" #define GHASH_GFMUL_RED_XOR_AVX1(r, r2, a, b) \ GHASH_GFMUL_XOR_AVX1(r, r2, a, b) \ @@ -5092,198 +5092,198 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, GHASH_MID_AVX1(r, r2) \ GHASH_RED_AVX1(r, r2) -#define CALC_IV_12_AVX1() \ - "# Calculate values when IV is 12 bytes\n\t" \ - "# Set counter based on IV\n\t" \ - "movl $0x01000000, %%ecx\n\t" \ - "vpinsrq $0, 0(%%rax), %%xmm13, %%xmm13\n\t" \ - "vpinsrd $2, 8(%%rax), %%xmm13, %%xmm13\n\t" \ - "vpinsrd $3, %%ecx, %%xmm13, %%xmm13\n\t" \ - "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ - "vmovdqa 0(%[KEY]), "VAR(HR)"\n\t" \ - "vpxor "VAR(HR)", %%xmm13, %%xmm1\n\t" \ - "vmovdqa 16(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 32(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 48(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 64(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 80(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 96(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 112(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 128(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 144(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ - "jl 31f\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 176(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ - "jl 31f\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 208(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqu 224(%[KEY]), %%xmm12\n\t" \ - "31:\n\t" \ - "vaesenclast %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenclast %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vpshufb %[BSWAP_MASK], "VAR(HR)", "VAR(HR)"\n\t" \ - "vmovdqu %%xmm1, "VAR(TR)"\n\t" \ +#define CALC_IV_12_AVX1() \ + "# Calculate values when IV is 12 bytes\n\t" \ + "# Set counter based on IV\n\t" \ + "movl $0x01000000, %%ecx\n\t" \ + "vpinsrq $0, 0(%%rax), %%xmm13, %%xmm13\n\t" \ + "vpinsrd $2, 8(%%rax), %%xmm13, %%xmm13\n\t" \ + "vpinsrd $3, %%ecx, %%xmm13, %%xmm13\n\t" \ + "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ + "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ + "vpxor " VAR(HR) ", %%xmm13, %%xmm1\n\t" \ + "vmovdqa 16(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 32(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 48(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 64(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 80(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 96(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 112(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 128(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 144(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "cmpl $11, %[nr]\n\t" \ + "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ + "jl 31f\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 176(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "cmpl $13, %[nr]\n\t" \ + "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ + "jl 31f\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 208(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqu 224(%[KEY]), %%xmm12\n\t" \ + "31:\n\t" \ + "vaesenclast %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenclast %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ + "vmovdqu %%xmm1, " VAR(TR) "\n\t" \ "jmp 39f\n\t" -#define CALC_IV_AVX1() \ - "# Calculate values when IV is not 12 bytes\n\t" \ - "# H = Encrypt X(=0)\n\t" \ - "vmovdqa 0(%[KEY]), "VAR(HR)"\n\t" \ - VAESENC_AVX(HR) \ - "vpshufb %[BSWAP_MASK], "VAR(HR)", "VAR(HR)"\n\t" \ - "# Calc counter\n\t" \ - "# Initialization vector\n\t" \ - "cmpl $0, %%edx\n\t" \ - "movq $0, %%rcx\n\t" \ - "je 45f\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 44f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "43:\n\t" \ - "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 43b\n\t" \ - "movl %[ibytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 45f\n\t" \ - "\n" \ - "44:\n\t" \ - "subq $16, %%rsp\n\t" \ - "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "vmovdqu %%xmm4, (%%rsp)\n\t" \ - "42:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 42b\n\t" \ - "vmovdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ - "\n" \ - "45:\n\t" \ - "# T = Encrypt counter\n\t" \ - "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ - "shll $3, %%edx\n\t" \ - "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "# Encrypt counter\n\t" \ - "vmovdqa 0(%[KEY]), %%xmm4\n\t" \ - "vpxor %%xmm13, %%xmm4, %%xmm4\n\t" \ - VAESENC_AVX(%%xmm4) \ - "vmovdqu %%xmm4, "VAR(TR)"\n\t" +#define CALC_IV_AVX1() \ + "# Calculate values when IV is not 12 bytes\n\t" \ + "# H = Encrypt X(=0)\n\t" \ + "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ + VAESENC_AVX(HR) \ + "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ + "# Calc counter\n\t" \ + "# Initialization vector\n\t" \ + "cmpl $0, %%edx\n\t" \ + "movq $0, %%rcx\n\t" \ + "je 45f\n\t" \ + "cmpl $16, %%edx\n\t" \ + "jl 44f\n\t" \ + "andl $0xfffffff0, %%edx\n\t" \ + "\n" \ + "43:\n\t" \ + "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ + GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ + "addl $16, %%ecx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "jl 43b\n\t" \ + "movl %[ibytes], %%edx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "je 45f\n\t" \ + "\n" \ + "44:\n\t" \ + "subq $16, %%rsp\n\t" \ + "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ + "xorl %%ebx, %%ebx\n\t" \ + "vmovdqu %%xmm4, (%%rsp)\n\t" \ + "42:\n\t" \ + "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ + "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ + "incl %%ecx\n\t" \ + "incl %%ebx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "jl 42b\n\t" \ + "vmovdqu (%%rsp), %%xmm4\n\t" \ + "addq $16, %%rsp\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ + GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ + "\n" \ + "45:\n\t" \ + "# T = Encrypt counter\n\t" \ + "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ + "shll $3, %%edx\n\t" \ + "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ + "vpxor %%xmm0, %%xmm13, %%xmm13\n\t" \ + GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ + "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ + "# Encrypt counter\n\t" \ + "vmovdqa 0(%[KEY]), %%xmm4\n\t" \ + "vpxor %%xmm13, %%xmm4, %%xmm4\n\t" \ + VAESENC_AVX(%%xmm4) \ + "vmovdqu %%xmm4, " VAR(TR) "\n\t" -#define CALC_AAD_AVX1() \ - "# Additional authentication data\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl $0, %%edx\n\t" \ - "je 25f\n\t" \ - "movq %[addt], %%rax\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 24f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "23:\n\t" \ - "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_FULL_AVX1(XR, %%xmm12, XR, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 23b\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 25f\n\t" \ - "\n" \ - "24:\n\t" \ - "subq $16, %%rsp\n\t" \ - "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "vmovdqu %%xmm4, (%%rsp)\n\t" \ - "22:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 22b\n\t" \ - "vmovdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_FULL_AVX1(XR, %%xmm12, XR, HR) \ - "\n" \ +#define CALC_AAD_AVX1() \ + "# Additional authentication data\n\t" \ + "movl %[abytes], %%edx\n\t" \ + "cmpl $0, %%edx\n\t" \ + "je 25f\n\t" \ + "movq %[addt], %%rax\n\t" \ + "xorl %%ecx, %%ecx\n\t" \ + "cmpl $16, %%edx\n\t" \ + "jl 24f\n\t" \ + "andl $0xfffffff0, %%edx\n\t" \ + "\n" \ + "23:\n\t" \ + "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_FULL_AVX1(XR, %%xmm12, XR, HR) \ + "addl $16, %%ecx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "jl 23b\n\t" \ + "movl %[abytes], %%edx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "je 25f\n\t" \ + "\n" \ + "24:\n\t" \ + "subq $16, %%rsp\n\t" \ + "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ + "xorl %%ebx, %%ebx\n\t" \ + "vmovdqu %%xmm4, (%%rsp)\n\t" \ + "22:\n\t" \ + "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ + "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ + "incl %%ecx\n\t" \ + "incl %%ebx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "jl 22b\n\t" \ + "vmovdqu (%%rsp), %%xmm4\n\t" \ + "addq $16, %%rsp\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_FULL_AVX1(XR, %%xmm12, XR, HR) \ + "\n" \ "25:\n\t" #define CALC_HT_8_AVX1() \ - "vmovdqa "VAR(XR)", %%xmm2\n\t" \ + "vmovdqa " VAR(XR) ", %%xmm2\n\t" \ "# H ^ 1\n\t" \ - "vmovdqu "VAR(HR)", 0("VAR(HTR)")\n\t" \ + "vmovdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ "# H ^ 2\n\t" \ GHASH_GFSQR_RED_AVX1(%%xmm0, HR) \ - "vmovdqu %%xmm0 , 16("VAR(HTR)")\n\t" \ + "vmovdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ "# H ^ 3\n\t" \ GHASH_GFMUL_RED_AVX1(%%xmm1, HR, %%xmm0) \ - "vmovdqu %%xmm1 , 32("VAR(HTR)")\n\t" \ + "vmovdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ "# H ^ 4\n\t" \ GHASH_GFSQR_RED_AVX1(%%xmm3, %%xmm0) \ - "vmovdqu %%xmm3 , 48("VAR(HTR)")\n\t" \ + "vmovdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ "# H ^ 5\n\t" \ GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm0, %%xmm1) \ - "vmovdqu %%xmm12, 64("VAR(HTR)")\n\t" \ + "vmovdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ "# H ^ 6\n\t" \ GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm1) \ - "vmovdqu %%xmm12, 80("VAR(HTR)")\n\t" \ + "vmovdqu %%xmm12, 80(" VAR(HTR) ")\n\t" \ "# H ^ 7\n\t" \ GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm1, %%xmm3) \ - "vmovdqu %%xmm12, 96("VAR(HTR)")\n\t" \ + "vmovdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ "# H ^ 8\n\t" \ GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm3) \ - "vmovdqu %%xmm12, 112("VAR(HTR)")\n\t" + "vmovdqu %%xmm12, 112(" VAR(HTR) ")\n\t" -#define VAESENC_128_GHASH_AVX1(src, o) \ - "leaq (%[in],"VAR(KR64)",1), %%rcx\n\t" \ - "leaq (%[out],"VAR(KR64)",1), %%rdx\n\t" \ +#define VAESENC_128_GHASH_AVX1(src, o) \ + "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ + "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ /* src is either %%rcx or %%rdx */ \ VAESENC_CTR() \ VAESENC_XOR() \ @@ -5311,112 +5311,112 @@ static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, "4:\n\t" \ VAESENC_LAST(%%rcx, %%rdx) -#define _VAESENC_AVX(r) \ - "vaesenc 16(%[KEY]), "#r", "#r"\n\t" \ - "vaesenc 32(%[KEY]), "#r", "#r"\n\t" \ - "vaesenc 48(%[KEY]), "#r", "#r"\n\t" \ - "vaesenc 64(%[KEY]), "#r", "#r"\n\t" \ - "vaesenc 80(%[KEY]), "#r", "#r"\n\t" \ - "vaesenc 96(%[KEY]), "#r", "#r"\n\t" \ - "vaesenc 112(%[KEY]), "#r", "#r"\n\t" \ - "vaesenc 128(%[KEY]), "#r", "#r"\n\t" \ - "vaesenc 144(%[KEY]), "#r", "#r"\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, "#r", "#r"\n\t" \ - "vaesenc 176(%[KEY]), "#r", "#r"\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, "#r", "#r"\n\t" \ - "vaesenc 208(%[KEY]), "#r", "#r"\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "vaesenclast %%xmm5, "#r", "#r"\n\t" -#define VAESENC_AVX(r) \ +#define _VAESENC_AVX(r) \ + "vaesenc 16(%[KEY]), " #r ", " #r "\n\t" \ + "vaesenc 32(%[KEY]), " #r ", " #r "\n\t" \ + "vaesenc 48(%[KEY]), " #r ", " #r "\n\t" \ + "vaesenc 64(%[KEY]), " #r ", " #r "\n\t" \ + "vaesenc 80(%[KEY]), " #r ", " #r "\n\t" \ + "vaesenc 96(%[KEY]), " #r ", " #r "\n\t" \ + "vaesenc 112(%[KEY]), " #r ", " #r "\n\t" \ + "vaesenc 128(%[KEY]), " #r ", " #r "\n\t" \ + "vaesenc 144(%[KEY]), " #r ", " #r "\n\t" \ + "cmpl $11, %[nr]\n\t" \ + "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ + "jl %=f\n\t" \ + "vaesenc %%xmm5, " #r ", " #r "\n\t" \ + "vaesenc 176(%[KEY]), " #r ", " #r "\n\t" \ + "cmpl $13, %[nr]\n\t" \ + "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ + "jl %=f\n\t" \ + "vaesenc %%xmm5, " #r ", " #r "\n\t" \ + "vaesenc 208(%[KEY]), " #r ", " #r "\n\t" \ + "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ + "%=:\n\t" \ + "vaesenclast %%xmm5, " #r ", " #r "\n\t" +#define VAESENC_AVX(r) \ _VAESENC_AVX(r) -#define AESENC_LAST15_ENC_AVX1() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "vmovdqu "VAR(CTR1)", %%xmm13\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ - "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ - VAESENC_AVX(%%xmm13) \ - "subq $16, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "vmovdqu %%xmm13, (%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in],"VAR(KR64)",1), %%r13d\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out],"VAR(KR64)",1)\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl "VAR(KR)"\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, "VAR(KR)"\n\t" \ - "jl 51b\n\t" \ - "xorq %%r13, %%r13\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "je 53f\n\t" \ - "\n" \ - "52:\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl %%ecx\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "jl 52b\n\t" \ - "53:\n\t" \ - "vmovdqu (%%rsp), %%xmm13\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm13, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ +#define AESENC_LAST15_ENC_AVX1() \ + "movl %[nbytes], %%ecx\n\t" \ + "movl %%ecx, %%edx\n\t" \ + "andl $0x0f, %%ecx\n\t" \ + "jz 55f\n\t" \ + "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ + "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ + "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ + VAESENC_AVX(%%xmm13) \ + "subq $16, %%rsp\n\t" \ + "xorl %%ecx, %%ecx\n\t" \ + "vmovdqu %%xmm13, (%%rsp)\n\t" \ + "\n" \ + "51:\n\t" \ + "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ + "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ + "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ + "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ + "incl " VAR(KR) "\n\t" \ + "incl %%ecx\n\t" \ + "cmpl %%edx, " VAR(KR) "\n\t" \ + "jl 51b\n\t" \ + "xorq %%r13, %%r13\n\t" \ + "cmpl $16, %%ecx\n\t" \ + "je 53f\n\t" \ + "\n" \ + "52:\n\t" \ + "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ + "incl %%ecx\n\t" \ + "cmpl $16, %%ecx\n\t" \ + "jl 52b\n\t" \ + "53:\n\t" \ + "vmovdqu (%%rsp), %%xmm13\n\t" \ + "addq $16, %%rsp\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ + "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ -#define AESENC_LAST15_DEC_AVX1() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "vmovdqu "VAR(CTR1)", %%xmm13\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ - "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ - VAESENC_AVX(%%xmm13) \ - "subq $32, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "vmovdqu %%xmm13, (%%rsp)\n\t" \ - "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ - "vmovdqu %%xmm0, 16(%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in],"VAR(KR64)",1), %%r13d\n\t" \ - "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out],"VAR(KR64)",1)\n\t" \ - "incl "VAR(KR)"\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, "VAR(KR)"\n\t" \ - "jl 51b\n\t" \ - "53:\n\t" \ - "vmovdqu 16(%%rsp), %%xmm13\n\t" \ - "addq $32, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm13, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ +#define AESENC_LAST15_DEC_AVX1() \ + "movl %[nbytes], %%ecx\n\t" \ + "movl %%ecx, %%edx\n\t" \ + "andl $0x0f, %%ecx\n\t" \ + "jz 55f\n\t" \ + "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ + "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ + "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ + VAESENC_AVX(%%xmm13) \ + "subq $32, %%rsp\n\t" \ + "xorl %%ecx, %%ecx\n\t" \ + "vmovdqu %%xmm13, (%%rsp)\n\t" \ + "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ + "vmovdqu %%xmm0, 16(%%rsp)\n\t" \ + "\n" \ + "51:\n\t" \ + "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ + "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ + "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ + "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ + "incl " VAR(KR) "\n\t" \ + "incl %%ecx\n\t" \ + "cmpl %%edx, " VAR(KR) "\n\t" \ + "jl 51b\n\t" \ + "53:\n\t" \ + "vmovdqu 16(%%rsp), %%xmm13\n\t" \ + "addq $32, %%rsp\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ + "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ -#define CALC_TAG_AVX1() \ - "movl %[nbytes], %%edx\n\t" \ - "movl %[abytes], %%ecx\n\t" \ - "shlq $3, %%rdx\n\t" \ - "shlq $3, %%rcx\n\t" \ - "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ - "vpinsrq $1, %%rcx, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ - "vpshufb %[BSWAP_MASK], "VAR(XR)", "VAR(XR)"\n\t" \ - "vpxor "VAR(TR)", "VAR(XR)", %%xmm0\n\t" \ +#define CALC_TAG_AVX1() \ + "movl %[nbytes], %%edx\n\t" \ + "movl %[abytes], %%ecx\n\t" \ + "shlq $3, %%rdx\n\t" \ + "shlq $3, %%rcx\n\t" \ + "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ + "vpinsrq $1, %%rcx, %%xmm0, %%xmm0\n\t" \ + "vpxor %%xmm0, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ + "vpshufb %[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \ + "vpxor " VAR(TR) ", " VAR(XR) ", %%xmm0\n\t" \ #define STORE_TAG_AVX() \ "cmpl $16, %[tbytes]\n\t" \ @@ -5479,10 +5479,10 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, register unsigned int ivLen asm("ebx") = ibytes; __asm__ __volatile__ ( - "subq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" /* Counter is xmm13 */ "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" - "vpxor "VAR(XR)", "VAR(XR)", "VAR(XR)"\n\t" + "vpxor " VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t" "movl %[ibytes], %%edx\n\t" "cmpl $12, %%edx\n\t" "jne 35f\n\t" @@ -5496,19 +5496,19 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, CALC_AAD_AVX1() "# Calculate counter and H\n\t" - "vpsrlq $63, "VAR(HR)", %%xmm5\n\t" - "vpsllq $1, "VAR(HR)", %%xmm4\n\t" + "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" + "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" "vpslldq $8, %%xmm5, %%xmm5\n\t" "vpor %%xmm5, %%xmm4, %%xmm4\n\t" - "vpshufd $0xff, "VAR(HR)", "VAR(HR)"\n\t" - "vpsrad $31, "VAR(HR)", "VAR(HR)"\n\t" + "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" + "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" - "vpand %[MOD2_128], "VAR(HR)", "VAR(HR)"\n\t" + "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" - "vpxor %%xmm4, "VAR(HR)", "VAR(HR)"\n\t" - "vmovdqu %%xmm13, "VAR(CTR1)"\n\t" + "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" + "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" - "xorl "VAR(KR)", "VAR(KR)"\n\t" + "xorl " VAR(KR) ", " VAR(KR) "\n\t" #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) "cmpl $128, %[nbytes]\n\t" @@ -5522,15 +5522,15 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, VAESENC_128() "cmpl $128, %%r13d\n\t" - "movl $128, "VAR(KR)"\n\t" + "movl $128, " VAR(KR) "\n\t" "jle 2f\n\t" "# More 128 bytes of input\n\t" "\n" "3:\n\t" VAESENC_128_GHASH_AVX1(%%rdx, 0) - "addl $128, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $128, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 3b\n\t" "\n" "2:\n\t" @@ -5545,48 +5545,48 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpshufb %%xmm13, %%xmm10, %%xmm10\n\t" "vpshufb %%xmm13, %%xmm11, %%xmm11\n\t" - "vmovdqu ("VAR(HTR)"), %%xmm12\n\t" - "vmovdqu 16("VAR(HTR)"), %%xmm14\n\t" + "vmovdqu (" VAR(HTR) "), %%xmm12\n\t" + "vmovdqu 16(" VAR(HTR) "), %%xmm14\n\t" GHASH_GFMUL_AVX1(XR, %%xmm13, %%xmm11, %%xmm12) GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm10, %%xmm14) - "vmovdqu 32("VAR(HTR)"), %%xmm12\n\t" - "vmovdqu 48("VAR(HTR)"), %%xmm14\n\t" + "vmovdqu 32(" VAR(HTR) "), %%xmm12\n\t" + "vmovdqu 48(" VAR(HTR) "), %%xmm14\n\t" GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm9, %%xmm12) GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm8, %%xmm14) - "vmovdqu 64("VAR(HTR)"), %%xmm12\n\t" - "vmovdqu 80("VAR(HTR)"), %%xmm14\n\t" + "vmovdqu 64(" VAR(HTR) "), %%xmm12\n\t" + "vmovdqu 80(" VAR(HTR) "), %%xmm14\n\t" GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm7, %%xmm12) GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm6, %%xmm14) - "vmovdqu 96("VAR(HTR)"), %%xmm12\n\t" - "vmovdqu 112("VAR(HTR)"), %%xmm14\n\t" + "vmovdqu 96(" VAR(HTR) "), %%xmm12\n\t" + "vmovdqu 112(" VAR(HTR) "), %%xmm14\n\t" GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm5, %%xmm12) GHASH_GFMUL_RED_XOR_AVX1(XR, %%xmm13, %%xmm4, %%xmm14) - "vmovdqu 0("VAR(HTR)"), "VAR(HR)"\n\t" + "vmovdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" "\n" "5:\n\t" "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, "VAR(KR)"\n\t" + "cmpl %%edx, " VAR(KR) "\n\t" "jge 55f\n\t" #endif "movl %[nbytes], %%r13d\n\t" "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 14f\n\t" VAESENC_BLOCK() - "addl $16, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $16, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 13f\n\t" "\n" "12:\n\t" - "vmovdqu (%[in],"VAR(KR64)",1), %%xmm9\n\t" + "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" VAESENC_GFMUL(%%xmm9, HR, XR) "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" - "addl $16, "VAR(KR)"\n\t" - "vpxor %%xmm4, "VAR(XR)", "VAR(XR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $16, " VAR(KR) "\n\t" + "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 12b\n\t" "\n" "13:\n\t" @@ -5600,7 +5600,7 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, CALC_TAG_AVX1() STORE_TAG_AVX() - "addq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" "vzeroupper\n\t" : @@ -5628,10 +5628,10 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, #ifdef HAVE_INTEL_AVX2 /* Encrypt and carry-less multiply for AVX2. */ #define VAESENC_PCLMUL_AVX2_1(src, o1, o2, o3) \ - "vmovdqu "#o2"("#src"), %%xmm12\n\t" \ - "vmovdqa "#o1"(%[KEY]), %%xmm0\n\t" \ + "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ + "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ - "vmovdqu "#o3"("VAR(HTR)"), %%xmm13\n\t" \ + "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm13\n\t" \ "vpxor %%xmm2, %%xmm12, %%xmm12\n\t" \ "vpclmulqdq $0x10, %%xmm13, %%xmm12, %%xmm1\n\t" \ "vpclmulqdq $0x01, %%xmm13, %%xmm12, %%xmm14\n\t" \ @@ -5647,15 +5647,15 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vaesenc %%xmm0, %%xmm11, %%xmm11\n\t" \ #define VAESENC_PCLMUL_AVX2_2(src, o1, o2, o3) \ - "vmovdqu "#o2"("#src"), %%xmm12\n\t" \ - "vmovdqu "#o3"("VAR(HTR)"), %%xmm0\n\t" \ + "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ + "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm0\n\t" \ "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ "vpxor %%xmm14, %%xmm1, %%xmm1\n\t" \ "vpclmulqdq $0x10, %%xmm0, %%xmm12, %%xmm13\n\t" \ "vpclmulqdq $0x01, %%xmm0, %%xmm12, %%xmm14\n\t" \ "vpclmulqdq $0x00, %%xmm0, %%xmm12, %%xmm15\n\t" \ "vpclmulqdq $0x11, %%xmm0, %%xmm12, %%xmm12\n\t" \ - "vmovdqa "#o1"(%[KEY]), %%xmm0\n\t" \ + "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm12, %%xmm3, %%xmm3\n\t" \ "vaesenc %%xmm0, %%xmm4, %%xmm4\n\t" \ @@ -5668,8 +5668,8 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vaesenc %%xmm0, %%xmm11, %%xmm11\n\t" \ #define VAESENC_PCLMUL_AVX2_N(src, o1, o2, o3) \ - "vmovdqu "#o2"("#src"), %%xmm12\n\t" \ - "vmovdqu "#o3"("VAR(HTR)"), %%xmm0\n\t" \ + "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ + "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm0\n\t" \ "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ "vpxor %%xmm14, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm15, %%xmm2, %%xmm2\n\t" \ @@ -5677,7 +5677,7 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpclmulqdq $0x01, %%xmm0, %%xmm12, %%xmm14\n\t" \ "vpclmulqdq $0x00, %%xmm0, %%xmm12, %%xmm15\n\t" \ "vpclmulqdq $0x11, %%xmm0, %%xmm12, %%xmm12\n\t" \ - "vmovdqa "#o1"(%[KEY]), %%xmm0\n\t" \ + "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm12, %%xmm3, %%xmm3\n\t" \ "vaesenc %%xmm0, %%xmm4, %%xmm4\n\t" \ @@ -5714,39 +5714,39 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpxor %%xmm3, %%xmm2, %%xmm2\n\t" \ "vaesenc %%xmm15, %%xmm11, %%xmm11\n\t" -#define VAESENC_BLOCK_AVX2() \ - "vmovdqu "VAR(CTR1)", %%xmm5\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ - "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ - "vmovdqu %%xmm5, "VAR(CTR1)"\n\t" \ - "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu (%[in],"VAR(KR64)",1), %%xmm5\n\t" \ - "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu %%xmm4, (%[out],"VAR(KR64)",1)\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, "VAR(XR)", "VAR(XR)"\n\t" +#define VAESENC_BLOCK_AVX2() \ + "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ + "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ + "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ + "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ + "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "cmpl $11, %[nr]\n\t" \ + "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ + "jl %=f\n\t" \ + "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "cmpl $13, %[nr]\n\t" \ + "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ + "jl %=f\n\t" \ + "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ + "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ + "%=:\n\t" \ + "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm5\n\t" \ + "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" \ + "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" /* Karatsuba multiplication - slower * H01 = H[1] ^ H[0] (top and bottom 64-bits XORed) @@ -5775,36 +5775,36 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ "%=:\n\t" \ "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu "#in", %%xmm0\n\t" \ + "vmovdqu " #in ", %%xmm0\n\t" \ "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ -\ - "vpsrldq $8, "#X", %%xmm2\n\t" \ - "vpxor "#X", %%xmm2, %%xmm2\n\t" \ - "vpclmulqdq $0x00, "#H", "#X", %%xmm5\n\t" \ - "vpclmulqdq $0x11, "#H", "#X", %%xmm8\n\t" \ - "vpclmulqdq $0x00, "#H01", %%xmm2, %%xmm7\n\t" \ - "vpxor %%xmm5, %%xmm7, %%xmm7\n\t" \ - "vpxor %%xmm8, %%xmm7, %%xmm7\n\t" \ - "vpslldq $8, %%xmm7, %%xmm6\n\t" \ - "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ - "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ -\ - "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, "VAR(XR)"\n\t" + \ + "vpsrldq $8, " #X ", %%xmm2\n\t" \ + "vpxor " #X ", %%xmm2, %%xmm2\n\t" \ + "vpclmulqdq $0x00, " #H ", " #X ", %%xmm5\n\t" \ + "vpclmulqdq $0x11, " #H ", " #X ", %%xmm8\n\t" \ + "vpclmulqdq $0x00, "#H01", %%xmm2, %%xmm7\n\t" \ + "vpxor %%xmm5, %%xmm7, %%xmm7\n\t" \ + "vpxor %%xmm8, %%xmm7, %%xmm7\n\t" \ + "vpslldq $8, %%xmm7, %%xmm6\n\t" \ + "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ + "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ + "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ + \ + "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ + "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ + "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ + "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ + "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ + "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ + "vpxor %%xmm5, %%xmm6, " VAR(XR) "\n\t" #define VAESENC_GFMUL_AVX2(in, H, X, ctr1) \ _VAESENC_GFMUL_AVX2(in, H, X, ctr1) #define _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) \ - "vpclmulqdq $0x10, "#H", "#X", %%xmm7\n\t" \ - "vpclmulqdq $0x01, "#H", "#X", %%xmm6\n\t" \ - "vpclmulqdq $0x00, "#H", "#X", %%xmm5\n\t" \ - "vpclmulqdq $0x11, "#H", "#X", %%xmm8\n\t" \ + "vpclmulqdq $0x10, " #H ", " #X ", %%xmm7\n\t" \ + "vpclmulqdq $0x01, " #H ", " #X ", %%xmm6\n\t" \ + "vpclmulqdq $0x00, " #H ", " #X ", %%xmm5\n\t" \ + "vpclmulqdq $0x11, " #H ", " #X ", %%xmm8\n\t" \ "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ "vpxor %%xmm6, %%xmm7, %%xmm7\n\t" \ @@ -5839,8 +5839,8 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vmovdqa 224(%[KEY]), %%xmm3\n\t" \ "%=:\n\t" \ "vaesenclast %%xmm3, %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm5, %%xmm6, "VAR(XR)"\n\t" \ - "vmovdqu "#in", %%xmm5\n\t" \ + "vpxor %%xmm5, %%xmm6, " VAR(XR) "\n\t" \ + "vmovdqu " #in ", %%xmm5\n\t" \ "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" #define VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) \ _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) @@ -5855,21 +5855,21 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpslldq $8, %%xmm2, %%xmm1\n\t" \ "vpsrldq $8, %%xmm2, %%xmm2\n\t" \ "vpxor %%xmm1, %%xmm0, "#r2"\n\t" \ - "vpxor %%xmm2, %%xmm3, "#r"\n\t" + "vpxor %%xmm2, %%xmm3, " #r "\n\t" #define GHASH_GFMUL_AVX2(r, r2, a, b) \ _GHASH_GFMUL_AVX2(r, r2, a, b) -#define GHASH_MID_AVX2(r, r2) \ - "vpsrld $31, "#r2", %%xmm0\n\t" \ - "vpsrld $31, "#r", %%xmm1\n\t" \ - "vpslld $1, "#r2", "#r2"\n\t" \ - "vpslld $1, "#r", "#r"\n\t" \ - "vpsrldq $12, %%xmm0, %%xmm2\n\t" \ - "vpslldq $4, %%xmm0, %%xmm0\n\t" \ - "vpslldq $4, %%xmm1, %%xmm1\n\t" \ - "vpor %%xmm2, "#r", "#r"\n\t" \ - "vpor %%xmm0, "#r2", "#r2"\n\t" \ - "vpor %%xmm1, "#r", "#r"\n\t" +#define GHASH_MID_AVX2(r, r2) \ + "vpsrld $31, "#r2", %%xmm0\n\t" \ + "vpsrld $31, " #r ", %%xmm1\n\t" \ + "vpslld $1, "#r2", "#r2"\n\t" \ + "vpslld $1, " #r ", " #r "\n\t" \ + "vpsrldq $12, %%xmm0, %%xmm2\n\t" \ + "vpslldq $4, %%xmm0, %%xmm0\n\t" \ + "vpslldq $4, %%xmm1, %%xmm1\n\t" \ + "vpor %%xmm2, " #r ", " #r "\n\t" \ + "vpor %%xmm0, "#r2", "#r2"\n\t" \ + "vpor %%xmm1, " #r ", " #r "\n\t" #define _GHASH_GFMUL_RED_AVX2(r, a, b) \ "vpclmulqdq $0x10, "#a", "#b", %%xmm7\n\t" \ @@ -5887,7 +5887,7 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, "#r"\n\t" + "vpxor %%xmm5, %%xmm6, " #r "\n\t" #define GHASH_GFMUL_RED_AVX2(r, a, b) \ _GHASH_GFMUL_RED_AVX2(r, a, b) @@ -5900,7 +5900,7 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm6, %%xmm8, "#r"\n\t" + "vpxor %%xmm6, %%xmm8, " #r "\n\t" #define GHASH_GFSQR_RED2_AVX2(r, a, mod128) \ _GHASH_GFSQR_RED2_AVX2(r, a, mod128) @@ -5935,23 +5935,23 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, #define CALC_HT_8_AVX2() \ "vmovdqa %[MOD2_128], %%xmm11\n\t" \ - "vmovdqa "VAR(XR)", %%xmm2\n\t" \ + "vmovdqa " VAR(XR) ", %%xmm2\n\t" \ "# H ^ 1 and H ^ 2\n\t" \ GHASH_GFSQR_RED2_AVX2(%%xmm0, HR, %%xmm11) \ - "vmovdqu "VAR(HR)", 0("VAR(HTR)")\n\t" \ - "vmovdqu %%xmm0 , 16("VAR(HTR)")\n\t" \ + "vmovdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ + "vmovdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ "# H ^ 3 and H ^ 4\n\t" \ GHASH_GFMUL_SQR_RED2_AVX2(%%xmm1, %%xmm3, HR, %%xmm0, %%xmm11) \ - "vmovdqu %%xmm1 , 32("VAR(HTR)")\n\t" \ - "vmovdqu %%xmm3 , 48("VAR(HTR)")\n\t" \ + "vmovdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ + "vmovdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ "# H ^ 5 and H ^ 6\n\t" \ GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm0, %%xmm1, %%xmm11) \ - "vmovdqu %%xmm12, 64("VAR(HTR)")\n\t" \ - "vmovdqu %%xmm0 , 80("VAR(HTR)")\n\t" \ + "vmovdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ + "vmovdqu %%xmm0 , 80(" VAR(HTR) ")\n\t" \ "# H ^ 7 and H ^ 8\n\t" \ GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm1, %%xmm3, %%xmm11) \ - "vmovdqu %%xmm12, 96("VAR(HTR)")\n\t" \ - "vmovdqu %%xmm0 , 112("VAR(HTR)")\n\t" + "vmovdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ + "vmovdqu %%xmm0 , 112(" VAR(HTR) ")\n\t" #define _GHASH_RED_AVX2(r, r2) \ "vmovdqa %[MOD2_128], %%xmm2\n\t" \ @@ -5961,7 +5961,7 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpclmulqdq $0x10, %%xmm2, %%xmm1, %%xmm0\n\t" \ "vpshufd $0x4e, %%xmm1, %%xmm1\n\t" \ "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm1, "#r", "#r"\n\t" + "vpxor %%xmm1, " #r ", " #r "\n\t" #define GHASH_RED_AVX2(r, r2) \ _GHASH_RED_AVX2(r, r2) @@ -5974,7 +5974,7 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpclmulqdq $0x10, "#a", "#b", "#r3"\n\t" \ "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \ "vpclmulqdq $0x00, "#a", "#b", "#r2"\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", "#r"\n\t" \ + "vpclmulqdq $0x11, "#a", "#b", " #r "\n\t" \ "vpxor %%xmm1, "#r3", "#r3"\n\t" #define GFMUL_3V_AVX2(r, r2, r3, a, b) \ _GFMUL_3V_AVX2(r, r2, r3, a, b) @@ -5985,200 +5985,200 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ "vpxor %%xmm1, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm3, "#r", "#r"\n\t" \ + "vpxor %%xmm3, " #r ", " #r "\n\t" \ "vpxor %%xmm2, "#r3", "#r3"\n\t" \ "vpxor %%xmm0, "#r2", "#r2"\n\t" #define GFMUL_XOR_3V_AVX2(r, r2, r3, a, b) \ _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b) #define GHASH_GFMUL_RED_8_AVX2() \ - "vmovdqu ("VAR(HTR)"), %%xmm12\n\t" \ + "vmovdqu (" VAR(HTR) "), %%xmm12\n\t" \ GFMUL_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm11, %%xmm12) \ - "vmovdqu 16("VAR(HTR)"), %%xmm12\n\t" \ + "vmovdqu 16(" VAR(HTR) "), %%xmm12\n\t" \ GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm10, %%xmm12) \ - "vmovdqu 32("VAR(HTR)"), %%xmm11\n\t" \ - "vmovdqu 48("VAR(HTR)"), %%xmm12\n\t" \ + "vmovdqu 32(" VAR(HTR) "), %%xmm11\n\t" \ + "vmovdqu 48(" VAR(HTR) "), %%xmm12\n\t" \ GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm9, %%xmm11) \ GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm8, %%xmm12) \ - "vmovdqu 64("VAR(HTR)"), %%xmm11\n\t" \ - "vmovdqu 80("VAR(HTR)"), %%xmm12\n\t" \ + "vmovdqu 64(" VAR(HTR) "), %%xmm11\n\t" \ + "vmovdqu 80(" VAR(HTR) "), %%xmm12\n\t" \ GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm7, %%xmm11) \ GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm6, %%xmm12) \ - "vmovdqu 96("VAR(HTR)"), %%xmm11\n\t" \ - "vmovdqu 112("VAR(HTR)"), %%xmm12\n\t" \ + "vmovdqu 96(" VAR(HTR) "), %%xmm11\n\t" \ + "vmovdqu 112(" VAR(HTR) "), %%xmm12\n\t" \ GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm5, %%xmm11) \ GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm4, %%xmm12) \ "vpslldq $8, %%xmm14, %%xmm12\n\t" \ "vpsrldq $8, %%xmm14, %%xmm14\n\t" \ "vpxor %%xmm12, %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm14, "VAR(XR)", "VAR(XR)"\n\t" \ + "vpxor %%xmm14, " VAR(XR) ", " VAR(XR) "\n\t" \ GHASH_RED_AVX2(XR, %%xmm13) -#define CALC_IV_12_AVX2() \ - "# Calculate values when IV is 12 bytes\n\t" \ - "# Set counter based on IV\n\t" \ - "movl $0x01000000, %%ecx\n\t" \ - "vpinsrq $0, 0(%%rax), %%xmm13, %%xmm13\n\t" \ - "vpinsrd $2, 8(%%rax), %%xmm13, %%xmm13\n\t" \ - "vpinsrd $3, %%ecx, %%xmm13, %%xmm13\n\t" \ - "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ - "vmovdqa 0(%[KEY]), "VAR(HR)"\n\t" \ - "vmovdqa 16(%[KEY]), %%xmm12\n\t" \ - "vpxor "VAR(HR)", %%xmm13, %%xmm1\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 32(%[KEY]), %%xmm0\n\t" \ - "vmovdqa 48(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 64(%[KEY]), %%xmm0\n\t" \ - "vmovdqa 80(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 96(%[KEY]), %%xmm0\n\t" \ - "vmovdqa 112(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 128(%[KEY]), %%xmm0\n\t" \ - "vmovdqa 144(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm0\n\t" \ - "jl 31f\n\t" \ - "vmovdqa 176(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm0\n\t" \ - "jl 31f\n\t" \ - "vmovdqa 208(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqu 224(%[KEY]), %%xmm0\n\t" \ - "31:\n\t" \ - "vaesenclast %%xmm0, "VAR(HR)", "VAR(HR)"\n\t" \ - "vaesenclast %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vpshufb %[BSWAP_MASK], "VAR(HR)", "VAR(HR)"\n\t" \ - "vmovdqu %%xmm1, "VAR(TR)"\n\t" \ +#define CALC_IV_12_AVX2() \ + "# Calculate values when IV is 12 bytes\n\t" \ + "# Set counter based on IV\n\t" \ + "movl $0x01000000, %%ecx\n\t" \ + "vpinsrq $0, 0(%%rax), %%xmm13, %%xmm13\n\t" \ + "vpinsrd $2, 8(%%rax), %%xmm13, %%xmm13\n\t" \ + "vpinsrd $3, %%ecx, %%xmm13, %%xmm13\n\t" \ + "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ + "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ + "vmovdqa 16(%[KEY]), %%xmm12\n\t" \ + "vpxor " VAR(HR) ", %%xmm13, %%xmm1\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 32(%[KEY]), %%xmm0\n\t" \ + "vmovdqa 48(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 64(%[KEY]), %%xmm0\n\t" \ + "vmovdqa 80(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 96(%[KEY]), %%xmm0\n\t" \ + "vmovdqa 112(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqa 128(%[KEY]), %%xmm0\n\t" \ + "vmovdqa 144(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "cmpl $11, %[nr]\n\t" \ + "vmovdqa 160(%[KEY]), %%xmm0\n\t" \ + "jl 31f\n\t" \ + "vmovdqa 176(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "cmpl $13, %[nr]\n\t" \ + "vmovdqa 192(%[KEY]), %%xmm0\n\t" \ + "jl 31f\n\t" \ + "vmovdqa 208(%[KEY]), %%xmm12\n\t" \ + "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ + "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ + "vmovdqu 224(%[KEY]), %%xmm0\n\t" \ + "31:\n\t" \ + "vaesenclast %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ + "vaesenclast %%xmm0, %%xmm1, %%xmm1\n\t" \ + "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ + "vmovdqu %%xmm1, " VAR(TR) "\n\t" \ -#define CALC_IV_AVX2() \ - "# Calculate values when IV is not 12 bytes\n\t" \ - "# H = Encrypt X(=0)\n\t" \ - "vmovdqa 0(%[KEY]), "VAR(HR)"\n\t" \ - VAESENC_AVX(HR) \ - "vpshufb %[BSWAP_MASK], "VAR(HR)", "VAR(HR)"\n\t" \ - "# Calc counter\n\t" \ - "# Initialization vector\n\t" \ - "cmpl $0, %%edx\n\t" \ - "movq $0, %%rcx\n\t" \ - "je 45f\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 44f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "43:\n\t" \ - "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 43b\n\t" \ - "movl %[ibytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 45f\n\t" \ - "\n" \ - "44:\n\t" \ - "subq $16, %%rsp\n\t" \ - "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "vmovdqu %%xmm4, (%%rsp)\n\t" \ - "42:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 42b\n\t" \ - "vmovdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ - "\n" \ - "45:\n\t" \ - "# T = Encrypt counter\n\t" \ - "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ - "shll $3, %%edx\n\t" \ - "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "# Encrypt counter\n\t" \ - "vmovdqa 0(%[KEY]), %%xmm4\n\t" \ - "vpxor %%xmm13, %%xmm4, %%xmm4\n\t" \ - VAESENC_AVX(%%xmm4) \ - "vmovdqu %%xmm4, "VAR(TR)"\n\t" +#define CALC_IV_AVX2() \ + "# Calculate values when IV is not 12 bytes\n\t" \ + "# H = Encrypt X(=0)\n\t" \ + "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ + VAESENC_AVX(HR) \ + "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ + "# Calc counter\n\t" \ + "# Initialization vector\n\t" \ + "cmpl $0, %%edx\n\t" \ + "movq $0, %%rcx\n\t" \ + "je 45f\n\t" \ + "cmpl $16, %%edx\n\t" \ + "jl 44f\n\t" \ + "andl $0xfffffff0, %%edx\n\t" \ + "\n" \ + "43:\n\t" \ + "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ + GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ + "addl $16, %%ecx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "jl 43b\n\t" \ + "movl %[ibytes], %%edx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "je 45f\n\t" \ + "\n" \ + "44:\n\t" \ + "subq $16, %%rsp\n\t" \ + "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ + "xorl %%ebx, %%ebx\n\t" \ + "vmovdqu %%xmm4, (%%rsp)\n\t" \ + "42:\n\t" \ + "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ + "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ + "incl %%ecx\n\t" \ + "incl %%ebx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "jl 42b\n\t" \ + "vmovdqu (%%rsp), %%xmm4\n\t" \ + "addq $16, %%rsp\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ + GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ + "\n" \ + "45:\n\t" \ + "# T = Encrypt counter\n\t" \ + "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ + "shll $3, %%edx\n\t" \ + "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ + "vpxor %%xmm0, %%xmm13, %%xmm13\n\t" \ + GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ + "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ + "# Encrypt counter\n\t" \ + "vmovdqa 0(%[KEY]), %%xmm4\n\t" \ + "vpxor %%xmm13, %%xmm4, %%xmm4\n\t" \ + VAESENC_AVX(%%xmm4) \ + "vmovdqu %%xmm4, " VAR(TR) "\n\t" -#define CALC_AAD_AVX2() \ - "# Additional authentication data\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl $0, %%edx\n\t" \ - "je 25f\n\t" \ - "movq %[addt], %%rax\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 24f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "23:\n\t" \ - "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_FULL_AVX2(XR, %%xmm12, XR, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 23b\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 25f\n\t" \ - "\n" \ - "24:\n\t" \ - "subq $16, %%rsp\n\t" \ - "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "vmovdqu %%xmm4, (%%rsp)\n\t" \ - "22:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 22b\n\t" \ - "vmovdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_FULL_AVX2(XR, %%xmm12, XR, HR) \ - "\n" \ +#define CALC_AAD_AVX2() \ + "# Additional authentication data\n\t" \ + "movl %[abytes], %%edx\n\t" \ + "cmpl $0, %%edx\n\t" \ + "je 25f\n\t" \ + "movq %[addt], %%rax\n\t" \ + "xorl %%ecx, %%ecx\n\t" \ + "cmpl $16, %%edx\n\t" \ + "jl 24f\n\t" \ + "andl $0xfffffff0, %%edx\n\t" \ + "\n" \ + "23:\n\t" \ + "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_FULL_AVX2(XR, %%xmm12, XR, HR) \ + "addl $16, %%ecx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "jl 23b\n\t" \ + "movl %[abytes], %%edx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "je 25f\n\t" \ + "\n" \ + "24:\n\t" \ + "subq $16, %%rsp\n\t" \ + "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ + "xorl %%ebx, %%ebx\n\t" \ + "vmovdqu %%xmm4, (%%rsp)\n\t" \ + "22:\n\t" \ + "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ + "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ + "incl %%ecx\n\t" \ + "incl %%ebx\n\t" \ + "cmpl %%edx, %%ecx\n\t" \ + "jl 22b\n\t" \ + "vmovdqu (%%rsp), %%xmm4\n\t" \ + "addq $16, %%rsp\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ + "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_FULL_AVX2(XR, %%xmm12, XR, HR) \ + "\n" \ "25:\n\t" -#define VAESENC_128_GHASH_AVX2(src, o) \ - "leaq (%[in],"VAR(KR64)",1), %%rcx\n\t" \ - "leaq (%[out],"VAR(KR64)",1), %%rdx\n\t" \ +#define VAESENC_128_GHASH_AVX2(src, o) \ + "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ + "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ /* src is either %%rcx or %%rdx */ \ VAESENC_CTR() \ VAESENC_XOR() \ @@ -6206,86 +6206,86 @@ static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, "4:\n\t" \ VAESENC_LAST(%%rcx, %%rdx) -#define AESENC_LAST15_ENC_AVX2() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "vmovdqu "VAR(CTR1)", %%xmm13\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ - "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ - VAESENC_AVX(%%xmm13) \ - "subq $16, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "vmovdqu %%xmm13, (%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in],"VAR(KR64)",1), %%r13d\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out],"VAR(KR64)",1)\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl "VAR(KR)"\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, "VAR(KR)"\n\t" \ - "jl 51b\n\t" \ - "xorq %%r13, %%r13\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "je 53f\n\t" \ - "\n" \ - "52:\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl %%ecx\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "jl 52b\n\t" \ - "53:\n\t" \ - "vmovdqu (%%rsp), %%xmm13\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm13, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ +#define AESENC_LAST15_ENC_AVX2() \ + "movl %[nbytes], %%ecx\n\t" \ + "movl %%ecx, %%edx\n\t" \ + "andl $0x0f, %%ecx\n\t" \ + "jz 55f\n\t" \ + "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ + "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ + "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ + VAESENC_AVX(%%xmm13) \ + "subq $16, %%rsp\n\t" \ + "xorl %%ecx, %%ecx\n\t" \ + "vmovdqu %%xmm13, (%%rsp)\n\t" \ + "\n" \ + "51:\n\t" \ + "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ + "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ + "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ + "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ + "incl " VAR(KR) "\n\t" \ + "incl %%ecx\n\t" \ + "cmpl %%edx, " VAR(KR) "\n\t" \ + "jl 51b\n\t" \ + "xorq %%r13, %%r13\n\t" \ + "cmpl $16, %%ecx\n\t" \ + "je 53f\n\t" \ + "\n" \ + "52:\n\t" \ + "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ + "incl %%ecx\n\t" \ + "cmpl $16, %%ecx\n\t" \ + "jl 52b\n\t" \ + "53:\n\t" \ + "vmovdqu (%%rsp), %%xmm13\n\t" \ + "addq $16, %%rsp\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ + "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ -#define AESENC_LAST15_DEC_AVX2() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "vmovdqu "VAR(CTR1)", %%xmm13\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ - "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ - VAESENC_AVX(%%xmm13) \ - "subq $32, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "vmovdqu %%xmm13, (%%rsp)\n\t" \ - "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ - "vmovdqu %%xmm0, 16(%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in],"VAR(KR64)",1), %%r13d\n\t" \ - "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out],"VAR(KR64)",1)\n\t" \ - "incl "VAR(KR)"\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, "VAR(KR)"\n\t" \ - "jl 51b\n\t" \ - "53:\n\t" \ - "vmovdqu 16(%%rsp), %%xmm13\n\t" \ - "addq $32, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm13, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ +#define AESENC_LAST15_DEC_AVX2() \ + "movl %[nbytes], %%ecx\n\t" \ + "movl %%ecx, %%edx\n\t" \ + "andl $0x0f, %%ecx\n\t" \ + "jz 55f\n\t" \ + "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ + "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ + "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ + VAESENC_AVX(%%xmm13) \ + "subq $32, %%rsp\n\t" \ + "xorl %%ecx, %%ecx\n\t" \ + "vmovdqu %%xmm13, (%%rsp)\n\t" \ + "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ + "vmovdqu %%xmm0, 16(%%rsp)\n\t" \ + "\n" \ + "51:\n\t" \ + "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ + "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ + "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ + "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ + "incl " VAR(KR) "\n\t" \ + "incl %%ecx\n\t" \ + "cmpl %%edx, " VAR(KR) "\n\t" \ + "jl 51b\n\t" \ + "53:\n\t" \ + "vmovdqu 16(%%rsp), %%xmm13\n\t" \ + "addq $32, %%rsp\n\t" \ + "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ + "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ -#define CALC_TAG_AVX2() \ - "movl %[nbytes], %%edx\n\t" \ - "movl %[abytes], %%ecx\n\t" \ - "shlq $3, %%rdx\n\t" \ - "shlq $3, %%rcx\n\t" \ - "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ - "vpinsrq $1, %%rcx, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, "VAR(XR)", "VAR(XR)"\n\t" \ - GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ - "vpshufb %[BSWAP_MASK], "VAR(XR)", "VAR(XR)"\n\t" \ - "vpxor "VAR(TR)", "VAR(XR)", %%xmm0\n\t" \ +#define CALC_TAG_AVX2() \ + "movl %[nbytes], %%edx\n\t" \ + "movl %[abytes], %%ecx\n\t" \ + "shlq $3, %%rdx\n\t" \ + "shlq $3, %%rcx\n\t" \ + "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ + "vpinsrq $1, %%rcx, %%xmm0, %%xmm0\n\t" \ + "vpxor %%xmm0, " VAR(XR) ", " VAR(XR) "\n\t" \ + GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ + "vpshufb %[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \ + "vpxor " VAR(TR) ", " VAR(XR) ", %%xmm0\n\t" \ static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, @@ -6299,10 +6299,10 @@ static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, register unsigned int ivLen asm("ebx") = ibytes; __asm__ __volatile__ ( - "subq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" /* Counter is xmm13 */ "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" - "vpxor "VAR(XR)", "VAR(XR)", "VAR(XR)"\n\t" + "vpxor " VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t" "movl %[ibytes], %%edx\n\t" "cmpl $12, %%edx\n\t" "jne 35f\n\t" @@ -6317,19 +6317,19 @@ static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, CALC_AAD_AVX2() "# Calculate counter and H\n\t" - "vpsrlq $63, "VAR(HR)", %%xmm5\n\t" - "vpsllq $1, "VAR(HR)", %%xmm4\n\t" + "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" + "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" "vpslldq $8, %%xmm5, %%xmm5\n\t" "vpor %%xmm5, %%xmm4, %%xmm4\n\t" - "vpshufd $0xff, "VAR(HR)", "VAR(HR)"\n\t" - "vpsrad $31, "VAR(HR)", "VAR(HR)"\n\t" + "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" + "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" - "vpand %[MOD2_128], "VAR(HR)", "VAR(HR)"\n\t" + "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" - "vpxor %%xmm4, "VAR(HR)", "VAR(HR)"\n\t" - "vmovdqu %%xmm13, "VAR(CTR1)"\n\t" + "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" + "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" - "xorl "VAR(KR)", "VAR(KR)"\n\t" + "xorl " VAR(KR) ", " VAR(KR) "\n\t" #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) "cmpl $128, %[nbytes]\n\t" @@ -6343,15 +6343,15 @@ static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, VAESENC_128() "cmpl $128, %%r13d\n\t" - "movl $128, "VAR(KR)"\n\t" + "movl $128, " VAR(KR) "\n\t" "jle 2f\n\t" "# More 128 bytes of input\n\t" "\n" "3:\n\t" VAESENC_128_GHASH_AVX2(%%rdx, 0) - "addl $128, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $128, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 3b\n\t" "\n" "2:\n\t" @@ -6368,37 +6368,37 @@ static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, GHASH_GFMUL_RED_8_AVX2() - "vmovdqu 0("VAR(HTR)"), "VAR(HR)"\n\t" + "vmovdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" "\n" "5:\n\t" "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, "VAR(KR)"\n\t" + "cmpl %%edx, " VAR(KR) "\n\t" "jge 55f\n\t" #endif "movl %[nbytes], %%r13d\n\t" "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 14f\n\t" VAESENC_BLOCK_AVX2() - "addl $16, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $16, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 13f\n\t" "vmovdqa %[MOD2_128], %%xmm0\n\t" "\n" "12:\n\t" - "vmovdqu (%[in],"VAR(KR64)",1), %%xmm9\n\t" - "vmovdqu "VAR(CTR1)", %%xmm5\n\t" + "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" + "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" - "vmovdqu %%xmm5, "VAR(CTR1)"\n\t" + "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, XR, CTR1) - "vmovdqu %%xmm4, (%[out],"VAR(KR64)",1)\n\t" + "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" - "addl $16, "VAR(KR)"\n\t" - "vpxor %%xmm4, "VAR(XR)", "VAR(XR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $16, " VAR(KR) "\n\t" + "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 12b\n\t" "\n" "13:\n\t" @@ -6412,7 +6412,7 @@ static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, CALC_TAG_AVX2() STORE_TAG_AVX() - "addq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" "vzeroupper\n\t" : @@ -6454,7 +6454,7 @@ static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, __asm__ __volatile__ ( "pushq %%rdx\n\t" - "subq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" /* Counter is xmm13 */ "pxor %%xmm13, %%xmm13\n\t" "pxor %%xmm15, %%xmm15\n\t" @@ -6472,20 +6472,20 @@ static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, "# Calculate counter and H\n\t" "pshufb %[BSWAP_EPI64], %%xmm13\n\t" - "movdqa "VAR(HR)", %%xmm5\n\t" + "movdqa " VAR(HR) ", %%xmm5\n\t" "paddd %[ONE], %%xmm13\n\t" - "movdqa "VAR(HR)", %%xmm4\n\t" - "movdqu %%xmm13, "VAR(CTR1)"\n\t" + "movdqa " VAR(HR) ", %%xmm4\n\t" + "movdqu %%xmm13, " VAR(CTR1) "\n\t" "psrlq $63, %%xmm5\n\t" "psllq $1, %%xmm4\n\t" "pslldq $8, %%xmm5\n\t" "por %%xmm5, %%xmm4\n\t" - "pshufd $0xff, "VAR(HR)", "VAR(HR)"\n\t" - "psrad $31, "VAR(HR)"\n\t" - "pand %[MOD2_128], "VAR(HR)"\n\t" - "pxor %%xmm4, "VAR(HR)"\n\t" + "pshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" + "psrad $31, " VAR(HR) "\n\t" + "pand %[MOD2_128], " VAR(HR) "\n\t" + "pxor %%xmm4, " VAR(HR) "\n\t" - "xorl "VAR(KR)", "VAR(KR)"\n\t" + "xorl " VAR(KR) ", " VAR(KR) "\n\t" #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) "cmpl $128, %[nbytes]\n\t" @@ -6498,33 +6498,33 @@ static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, "\n" "2:\n\t" AESENC_128_GHASH_AVX(%%rcx, 128) - "addl $128, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $128, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 2b\n\t" - "movdqa %%xmm2, "VAR(XR)"\n\t" - "movdqu (%%rsp), "VAR(HR)"\n\t" + "movdqa %%xmm2, " VAR(XR) "\n\t" + "movdqu (%%rsp), " VAR(HR) "\n\t" "5:\n\t" "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, "VAR(KR)"\n\t" + "cmpl %%edx, " VAR(KR) "\n\t" "jge 55f\n\t" #endif "movl %[nbytes], %%r13d\n\t" "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 13f\n\t" "\n" "12:\n\t" - "leaq (%[in],"VAR(KR64)",1), %%rcx\n\t" - "leaq (%[out],"VAR(KR64)",1), %%rdx\n\t" + "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" + "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" "movdqu (%%rcx), %%xmm1\n\t" - "movdqa "VAR(HR)", %%xmm0\n\t" + "movdqa " VAR(HR) ", %%xmm0\n\t" "pshufb %[BSWAP_MASK], %%xmm1\n\t" - "pxor "VAR(XR)", %%xmm1\n\t" + "pxor " VAR(XR) ", %%xmm1\n\t" AESENC_GFMUL(%%rcx, %%rdx, %%xmm0, %%xmm1) - "addl $16, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $16, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 12b\n\t" "\n" "13:\n\t" @@ -6534,7 +6534,7 @@ static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, "55:\n\t" CALC_TAG() - "addq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" "popq %%rdx\n\t" CMP_TAG() @@ -6574,7 +6574,7 @@ static void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, __asm__ __volatile__ ( "pushq %%rdx\n\t" - "subq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" /* Counter is xmm13 */ "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" "vpxor %%xmm15, %%xmm15, %%xmm15\n\t" @@ -6591,19 +6591,19 @@ static void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, CALC_AAD_AVX1() "# Calculate counter and H\n\t" - "vpsrlq $63, "VAR(HR)", %%xmm5\n\t" - "vpsllq $1, "VAR(HR)", %%xmm4\n\t" + "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" + "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" "vpslldq $8, %%xmm5, %%xmm5\n\t" "vpor %%xmm5, %%xmm4, %%xmm4\n\t" - "vpshufd $0xff, "VAR(HR)", "VAR(HR)"\n\t" - "vpsrad $31, "VAR(HR)", "VAR(HR)"\n\t" + "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" + "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" - "vpand %[MOD2_128], "VAR(HR)", "VAR(HR)"\n\t" + "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" - "vpxor %%xmm4, "VAR(HR)", "VAR(HR)"\n\t" - "vmovdqu %%xmm13, "VAR(CTR1)"\n\t" + "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" + "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" - "xorl "VAR(KR)", "VAR(KR)"\n\t" + "xorl " VAR(KR) ", " VAR(KR) "\n\t" #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) "cmpl $128, %[nbytes]\n\t" @@ -6616,31 +6616,31 @@ static void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, "\n" "2:\n\t" VAESENC_128_GHASH_AVX1(%%rcx, 128) - "addl $128, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $128, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 2b\n\t" - "vmovdqa %%xmm2, "VAR(XR)"\n\t" - "vmovdqu (%%rsp), "VAR(HR)"\n\t" + "vmovdqa %%xmm2, " VAR(XR) "\n\t" + "vmovdqu (%%rsp), " VAR(HR) "\n\t" "5:\n\t" "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, "VAR(KR)"\n\t" + "cmpl %%edx, " VAR(KR) "\n\t" "jge 55f\n\t" #endif "movl %[nbytes], %%r13d\n\t" "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 13f\n\t" "\n" "12:\n\t" - "vmovdqu (%[in],"VAR(KR64)",1), %%xmm9\n\t" - "vmovdqa "VAR(HR)", %%xmm0\n\t" + "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" + "vmovdqa " VAR(HR) ", %%xmm0\n\t" "vpshufb %[BSWAP_MASK], %%xmm9, %%xmm1\n\t" - "vpxor "VAR(XR)", %%xmm1, %%xmm1\n\t" + "vpxor " VAR(XR) ", %%xmm1, %%xmm1\n\t" VAESENC_GFMUL(%%xmm9, %%xmm0, %%xmm1) - "addl $16, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $16, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 12b\n\t" "\n" "13:\n\t" @@ -6650,7 +6650,7 @@ static void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, "55:\n\t" CALC_TAG_AVX1() - "addq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" "popq %%rdx\n\t" CMP_TAG_AVX() "vzeroupper\n\t" @@ -6691,7 +6691,7 @@ static void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, __asm__ __volatile__ ( "pushq %%rdx\n\t" - "subq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" /* Counter is xmm13 */ "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" "vpxor %%xmm15, %%xmm15, %%xmm15\n\t" @@ -6709,19 +6709,19 @@ static void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, CALC_AAD_AVX2() "# Calculate counter and H\n\t" - "vpsrlq $63, "VAR(HR)", %%xmm5\n\t" - "vpsllq $1, "VAR(HR)", %%xmm4\n\t" + "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" + "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" "vpslldq $8, %%xmm5, %%xmm5\n\t" "vpor %%xmm5, %%xmm4, %%xmm4\n\t" - "vpshufd $0xff, "VAR(HR)", "VAR(HR)"\n\t" - "vpsrad $31, "VAR(HR)", "VAR(HR)"\n\t" + "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" + "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" - "vpand %[MOD2_128], "VAR(HR)", "VAR(HR)"\n\t" + "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" - "vpxor %%xmm4, "VAR(HR)", "VAR(HR)"\n\t" - "vmovdqu %%xmm13, "VAR(CTR1)"\n\t" + "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" + "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" - "xorl "VAR(KR)", "VAR(KR)"\n\t" + "xorl " VAR(KR) ", " VAR(KR) "\n\t" #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) "cmpl $128, %[nbytes]\n\t" @@ -6734,36 +6734,36 @@ static void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, "\n" "2:\n\t" VAESENC_128_GHASH_AVX2(%%rcx, 128) - "addl $128, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "addl $128, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 2b\n\t" - "vmovdqa %%xmm2, "VAR(XR)"\n\t" - "vmovdqu (%%rsp), "VAR(HR)"\n\t" + "vmovdqa %%xmm2, " VAR(XR) "\n\t" + "vmovdqu (%%rsp), " VAR(HR) "\n\t" "5:\n\t" "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, "VAR(KR)"\n\t" + "cmpl %%edx, " VAR(KR) "\n\t" "jge 55f\n\t" #endif "movl %[nbytes], %%r13d\n\t" "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jge 13f\n\t" "vmovdqa %[MOD2_128], %%xmm0\n\t" "\n" "12:\n\t" - "vmovdqu (%[in],"VAR(KR64)",1), %%xmm9\n\t" - "vmovdqu "VAR(CTR1)", %%xmm5\n\t" + "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" + "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" "vpshufb %[BSWAP_MASK], %%xmm9, %%xmm1\n\t" "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" - "vpxor "VAR(XR)", %%xmm1, %%xmm1\n\t" - "vmovdqu %%xmm5, "VAR(CTR1)"\n\t" + "vpxor " VAR(XR) ", %%xmm1, %%xmm1\n\t" + "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, %%xmm1, CTR1) - "vmovdqu %%xmm4, (%[out],"VAR(KR64)",1)\n\t" - "addl $16, "VAR(KR)"\n\t" - "cmpl %%r13d, "VAR(KR)"\n\t" + "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" + "addl $16, " VAR(KR) "\n\t" + "cmpl %%r13d, " VAR(KR) "\n\t" "jl 12b\n\t" "\n" "13:\n\t" @@ -6773,7 +6773,7 @@ static void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, "55:\n\t" CALC_TAG_AVX2() - "addq $"VAR(STACK_OFFSET)", %%rsp\n\t" + "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" "popq %%rdx\n\t" CMP_TAG_AVX() "vzeroupper\n\t" diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 1efe335eb..731e1605f 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -875,231 +875,231 @@ static int InitSha256(wc_Sha256* sha256) #if defined(HAVE_INTEL_RORX) #define RND_STEP_RORX_0_1(a, b, c, d, e, f, g, h, i) \ /* L3 = f */ \ - "movl %"#f", "L3"\n\t" \ + "movl %" #f ", " L3 "\n\t" \ /* L2 = e>>>11 */ \ - "rorx $11, %"#e", "L2"\n\t" \ + "rorx $11, %" #e ", " L2 "\n\t" \ /* h += w_k */ \ - "addl ("#i")*4("WK"), %"#h"\n\t" \ + "addl (" #i ")*4(" WK "), %" #h "\n\t" \ #define RND_STEP_RORX_0_2(a, b, c, d, e, f, g, h, i) \ /* L2 = (e>>>6) ^ (e>>>11) */ \ - "xorl "L1", "L2"\n\t" \ + "xorl " L1 ", " L2 "\n\t" \ /* L3 = f ^ g */ \ - "xorl %"#g", "L3"\n\t" \ + "xorl %" #g ", " L3 "\n\t" \ /* L1 = e>>>25 */ \ - "rorx $25, %"#e", "L1"\n\t" \ + "rorx $25, %" #e ", " L1 "\n\t" \ #define RND_STEP_RORX_0_3(a, b, c, d, e, f, g, h, i) \ /* L3 = (f ^ g) & e */ \ - "andl %"#e", "L3"\n\t" \ + "andl %" #e ", " L3 "\n\t" \ /* L1 = Sigma1(e) */ \ - "xorl "L2", "L1"\n\t" \ + "xorl " L2 ", " L1 "\n\t" \ /* L2 = a>>>13 */ \ - "rorx $13, %"#a", "L2"\n\t" \ + "rorx $13, %" #a ", " L2 "\n\t" \ #define RND_STEP_RORX_0_4(a, b, c, d, e, f, g, h, i) \ /* h += Sigma1(e) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L1 = a>>>2 */ \ - "rorx $2, %"#a", "L1"\n\t" \ + "rorx $2, %" #a ", " L1 "\n\t" \ /* L3 = Ch(e,f,g) */ \ - "xorl %"#g", "L3"\n\t" \ + "xorl %" #g ", " L3 "\n\t" \ #define RND_STEP_RORX_0_5(a, b, c, d, e, f, g, h, i) \ /* L2 = (a>>>2) ^ (a>>>13) */ \ - "xorl "L1", "L2"\n\t" \ + "xorl " L1 ", " L2 "\n\t" \ /* L1 = a>>>22 */ \ - "rorx $22, %"#a", "L1"\n\t" \ + "rorx $22, %" #a ", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addl "L3", %"#h"\n\t" \ + "addl " L3 ", %" #h "\n\t" \ #define RND_STEP_RORX_0_6(a, b, c, d, e, f, g, h, i) \ /* L1 = Sigma0(a) */ \ - "xorl "L2", "L1"\n\t" \ + "xorl " L2 ", " L1 "\n\t" \ /* L3 = b */ \ - "movl %"#b", "L3"\n\t" \ + "movl %" #b ", " L3 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ - "addl %"#h", %"#d"\n\t" \ + "addl %" #h ", %" #d "\n\t" \ #define RND_STEP_RORX_0_7(a, b, c, d, e, f, g, h, i) \ /* L3 = a ^ b */ \ - "xorl %"#a", "L3"\n\t" \ + "xorl %" #a ", " L3 "\n\t" \ /* h += Sigma0(a) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L4 = (a ^ b) & (b ^ c) */ \ - "andl "L3", "L4"\n\t" \ + "andl " L3 ", " L4 "\n\t" \ #define RND_STEP_RORX_0_8(a, b, c, d, e, f, g, h, i) \ /* L4 = Maj(a,b,c) */ \ - "xorl %"#b", "L4"\n\t" \ + "xorl %" #b ", " L4 "\n\t" \ /* L1 = d>>>6 (= e>>>6 next RND) */ \ - "rorx $6, %"#d", "L1"\n\t" \ + "rorx $6, %" #d ", " L1 "\n\t" \ /* h += Maj(a,b,c) */ \ - "addl "L4", %"#h"\n\t" \ + "addl " L4 ", %" #h "\n\t" \ #define RND_STEP_RORX_1_1(a, b, c, d, e, f, g, h, i) \ /* L4 = f */ \ - "movl %"#f", "L4"\n\t" \ + "movl %" #f ", " L4 "\n\t" \ /* L2 = e>>>11 */ \ - "rorx $11, %"#e", "L2"\n\t" \ + "rorx $11, %" #e ", " L2 "\n\t" \ /* h += w_k */ \ - "addl ("#i")*4("WK"), %"#h"\n\t" \ + "addl (" #i ")*4(" WK "), %" #h "\n\t" \ #define RND_STEP_RORX_1_2(a, b, c, d, e, f, g, h, i) \ /* L2 = (e>>>6) ^ (e>>>11) */ \ - "xorl "L1", "L2"\n\t" \ + "xorl " L1 ", " L2 "\n\t" \ /* L4 = f ^ g */ \ - "xorl %"#g", "L4"\n\t" \ + "xorl %" #g ", " L4 "\n\t" \ /* L1 = e>>>25 */ \ - "rorx $25, %"#e", "L1"\n\t" \ + "rorx $25, %" #e ", " L1 "\n\t" \ #define RND_STEP_RORX_1_3(a, b, c, d, e, f, g, h, i) \ /* L4 = (f ^ g) & e */ \ - "andl %"#e", "L4"\n\t" \ + "andl %" #e ", " L4 "\n\t" \ /* L1 = Sigma1(e) */ \ - "xorl "L2", "L1"\n\t" \ + "xorl " L2 ", " L1 "\n\t" \ /* L2 = a>>>13 */ \ - "rorx $13, %"#a", "L2"\n\t" \ + "rorx $13, %" #a ", " L2 "\n\t" \ #define RND_STEP_RORX_1_4(a, b, c, d, e, f, g, h, i) \ /* h += Sigma1(e) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L1 = a>>>2 */ \ - "rorx $2, %"#a", "L1"\n\t" \ + "rorx $2, %" #a ", " L1 "\n\t" \ /* L4 = Ch(e,f,g) */ \ - "xorl %"#g", "L4"\n\t" \ + "xorl %" #g ", " L4 "\n\t" \ #define RND_STEP_RORX_1_5(a, b, c, d, e, f, g, h, i) \ /* L2 = (a>>>2) ^ (a>>>13) */ \ - "xorl "L1", "L2"\n\t" \ + "xorl " L1 ", " L2 "\n\t" \ /* L1 = a>>>22 */ \ - "rorx $22, %"#a", "L1"\n\t" \ + "rorx $22, %" #a ", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addl "L4", %"#h"\n\t" \ + "addl " L4 ", %" #h "\n\t" \ #define RND_STEP_RORX_1_6(a, b, c, d, e, f, g, h, i) \ /* L1 = Sigma0(a) */ \ - "xorl "L2", "L1"\n\t" \ + "xorl " L2 ", " L1 "\n\t" \ /* L4 = b */ \ - "movl %"#b", "L4"\n\t" \ + "movl %" #b ", " L4 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ - "addl %"#h", %"#d"\n\t" \ + "addl %" #h ", %" #d "\n\t" \ #define RND_STEP_RORX_1_7(a, b, c, d, e, f, g, h, i) \ /* L4 = a ^ b */ \ - "xorl %"#a", "L4"\n\t" \ + "xorl %" #a ", " L4 "\n\t" \ /* h += Sigma0(a) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L3 = (a ^ b) & (b ^ c) */ \ - "andl "L4", "L3"\n\t" \ + "andl " L4 ", " L3 "\n\t" \ #define RND_STEP_RORX_1_8(a, b, c, d, e, f, g, h, i) \ /* L3 = Maj(a,b,c) */ \ - "xorl %"#b", "L3"\n\t" \ + "xorl %" #b ", " L3 "\n\t" \ /* L1 = d>>>6 (= e>>>6 next RND) */ \ - "rorx $6, %"#d", "L1"\n\t" \ + "rorx $6, %" #d ", " L1 "\n\t" \ /* h += Maj(a,b,c) */ \ - "addl "L3", %"#h"\n\t" \ + "addl " L3 ", %" #h "\n\t" \ #define _RND_RORX_X_0(a, b, c, d, e, f, g, h, i) \ /* L1 = e>>>6 */ \ - "rorx $6, %"#e", "L1"\n\t" \ + "rorx $6, %" #e ", " L1 "\n\t" \ /* L2 = e>>>11 */ \ - "rorx $11, %"#e", "L2"\n\t" \ + "rorx $11, %" #e ", " L2 "\n\t" \ /* Prev RND: h += Maj(a,b,c) */ \ - "addl "L3", %"#a"\n\t" \ + "addl " L3 ", %" #a "\n\t" \ /* h += w_k */ \ - "addl ("#i")*4("WK"), %"#h"\n\t" \ + "addl (" #i ")*4(" WK "), %" #h "\n\t" \ /* L3 = f */ \ - "movl %"#f", "L3"\n\t" \ + "movl %" #f ", " L3 "\n\t" \ /* L2 = (e>>>6) ^ (e>>>11) */ \ - "xorl "L1", "L2"\n\t" \ + "xorl " L1 ", " L2 "\n\t" \ /* L3 = f ^ g */ \ - "xorl %"#g", "L3"\n\t" \ + "xorl %" #g ", " L3 "\n\t" \ /* L1 = e>>>25 */ \ - "rorx $25, %"#e", "L1"\n\t" \ + "rorx $25, %" #e ", " L1 "\n\t" \ /* L1 = Sigma1(e) */ \ - "xorl "L2", "L1"\n\t" \ + "xorl " L2 ", " L1 "\n\t" \ /* L3 = (f ^ g) & e */ \ - "andl %"#e", "L3"\n\t" \ + "andl %" #e ", " L3 "\n\t" \ /* h += Sigma1(e) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L1 = a>>>2 */ \ - "rorx $2, %"#a", "L1"\n\t" \ + "rorx $2, %" #a ", " L1 "\n\t" \ /* L2 = a>>>13 */ \ - "rorx $13, %"#a", "L2"\n\t" \ + "rorx $13, %" #a ", " L2 "\n\t" \ /* L3 = Ch(e,f,g) */ \ - "xorl %"#g", "L3"\n\t" \ + "xorl %" #g ", " L3 "\n\t" \ /* L2 = (a>>>2) ^ (a>>>13) */ \ - "xorl "L1", "L2"\n\t" \ + "xorl " L1 ", " L2 "\n\t" \ /* L1 = a>>>22 */ \ - "rorx $22, %"#a", "L1"\n\t" \ + "rorx $22, %" #a ", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addl "L3", %"#h"\n\t" \ + "addl " L3 ", %" #h "\n\t" \ /* L1 = Sigma0(a) */ \ - "xorl "L2", "L1"\n\t" \ + "xorl " L2 ", " L1 "\n\t" \ /* L3 = b */ \ - "movl %"#b", "L3"\n\t" \ + "movl %" #b ", " L3 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ - "addl %"#h", %"#d"\n\t" \ + "addl %" #h ", %" #d "\n\t" \ /* L3 = a ^ b */ \ - "xorl %"#a", "L3"\n\t" \ + "xorl %" #a ", " L3 "\n\t" \ /* L4 = (a ^ b) & (b ^ c) */ \ - "andl "L3", "L4"\n\t" \ + "andl " L3 ", " L4 "\n\t" \ /* h += Sigma0(a) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L4 = Maj(a,b,c) */ \ - "xorl %"#b", "L4"\n\t" \ + "xorl %" #b ", " L4 "\n\t" \ #define _RND_RORX_X_1(a, b, c, d, e, f, g, h, i) \ /* L1 = e>>>6 */ \ - "rorx $6, %"#e", "L1"\n\t" \ + "rorx $6, %" #e ", " L1 "\n\t" \ /* L2 = e>>>11 */ \ - "rorx $11, %"#e", "L2"\n\t" \ + "rorx $11, %" #e ", " L2 "\n\t" \ /* Prev RND: h += Maj(a,b,c) */ \ - "addl "L4", %"#a"\n\t" \ + "addl " L4 ", %" #a "\n\t" \ /* h += w_k */ \ - "addl ("#i")*4("WK"), %"#h"\n\t" \ + "addl (" #i ")*4(" WK "), %" #h "\n\t" \ /* L4 = f */ \ - "movl %"#f", "L4"\n\t" \ + "movl %" #f ", " L4 "\n\t" \ /* L2 = (e>>>6) ^ (e>>>11) */ \ - "xorl "L1", "L2"\n\t" \ + "xorl " L1 ", " L2 "\n\t" \ /* L4 = f ^ g */ \ - "xorl %"#g", "L4"\n\t" \ + "xorl %" #g ", " L4 "\n\t" \ /* L1 = e>>>25 */ \ - "rorx $25, %"#e", "L1"\n\t" \ + "rorx $25, %" #e ", " L1 "\n\t" \ /* L1 = Sigma1(e) */ \ - "xorl "L2", "L1"\n\t" \ + "xorl " L2 ", " L1 "\n\t" \ /* L4 = (f ^ g) & e */ \ - "andl %"#e", "L4"\n\t" \ + "andl %" #e ", " L4 "\n\t" \ /* h += Sigma1(e) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L1 = a>>>2 */ \ - "rorx $2, %"#a", "L1"\n\t" \ + "rorx $2, %" #a ", " L1 "\n\t" \ /* L2 = a>>>13 */ \ - "rorx $13, %"#a", "L2"\n\t" \ + "rorx $13, %" #a ", " L2 "\n\t" \ /* L4 = Ch(e,f,g) */ \ - "xorl %"#g", "L4"\n\t" \ + "xorl %" #g ", " L4 "\n\t" \ /* L2 = (a>>>2) ^ (a>>>13) */ \ - "xorl "L1", "L2"\n\t" \ + "xorl " L1 ", " L2 "\n\t" \ /* L1 = a>>>22 */ \ - "rorx $22, %"#a", "L1"\n\t" \ + "rorx $22, %" #a ", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addl "L4", %"#h"\n\t" \ + "addl " L4 ", %" #h "\n\t" \ /* L1 = Sigma0(a) */ \ - "xorl "L2", "L1"\n\t" \ + "xorl " L2 ", " L1 "\n\t" \ /* L4 = b */ \ - "movl %"#b", "L4"\n\t" \ + "movl %" #b ", " L4 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ - "addl %"#h", %"#d"\n\t" \ + "addl %" #h ", %" #d "\n\t" \ /* L4 = a ^ b */ \ - "xorl %"#a", "L4"\n\t" \ + "xorl %" #a ", " L4 "\n\t" \ /* L2 = (a ^ b) & (b ^ c) */ \ - "andl "L4", "L3"\n\t" \ + "andl " L4 ", " L3 "\n\t" \ /* h += Sigma0(a) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L3 = Maj(a,b,c) */ \ - "xorl %"#b", "L3"\n\t" \ + "xorl %" #b ", " L3 "\n\t" \ #define RND_RORX_X_0(a,b,c,d,e,f,g,h,i) \ @@ -1117,247 +1117,247 @@ static int InitSha256(wc_Sha256* sha256) #define RND_STEP_0_1(a,b,c,d,e,f,g,h,i) \ /* L1 = e>>>14 */ \ - "rorl $14, "L1"\n\t" \ + "rorl $14, " L1 "\n\t" \ #define RND_STEP_0_2(a,b,c,d,e,f,g,h,i) \ /* L3 = b */ \ - "movl %"#b", "L3"\n\t" \ + "movl %" #b ", " L3 "\n\t" \ /* L2 = f */ \ - "movl %"#f", "L2"\n\t" \ + "movl %" #f ", " L2 "\n\t" \ /* h += w_k */ \ - "addl ("#i")*4("WK"), %"#h"\n\t" \ + "addl (" #i ")*4(" WK "), %" #h "\n\t" \ /* L2 = f ^ g */ \ - "xorl %"#g", "L2"\n\t" \ + "xorl %" #g ", " L2 "\n\t" \ #define RND_STEP_0_3(a,b,c,d,e,f,g,h,i) \ /* L1 = (e>>>14) ^ e */ \ - "xorl %"#e", "L1"\n\t" \ + "xorl %" #e ", " L1 "\n\t" \ /* L2 = (f ^ g) & e */ \ - "andl %"#e", "L2"\n\t" \ - + "andl %" #e ", " L2 "\n\t" \ + #define RND_STEP_0_4(a,b,c,d,e,f,g,h,i) \ /* L1 = ((e>>>14) ^ e) >>> 5 */ \ - "rorl $5, "L1"\n\t" \ + "rorl $5, " L1 "\n\t" \ /* L2 = Ch(e,f,g) */ \ - "xorl %"#g", "L2"\n\t" \ + "xorl %" #g ", " L2 "\n\t" \ /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \ - "xorl %"#e", "L1"\n\t" \ + "xorl %" #e ", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addl "L2", %"#h"\n\t" \ + "addl " L2 ", %" #h "\n\t" \ #define RND_STEP_0_5(a,b,c,d,e,f,g,h,i) \ /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \ - "rorl $6, "L1"\n\t" \ + "rorl $6, " L1 "\n\t" \ /* L3 = a ^ b (= b ^ c of next RND) */ \ - "xorl %"#a", "L3"\n\t" \ + "xorl %" #a ", " L3 "\n\t" \ /* h = h + w_k + Sigma1(e) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L2 = a */ \ - "movl %"#a", "L2"\n\t" \ + "movl %" #a ", " L2 "\n\t" \ #define RND_STEP_0_6(a,b,c,d,e,f,g,h,i) \ /* L3 = (a ^ b) & (b ^ c) */ \ - "andl "L3", "L4"\n\t" \ + "andl " L3 ", " L4 "\n\t" \ /* L2 = a>>>9 */ \ - "rorl $9, "L2"\n\t" \ + "rorl $9, " L2 "\n\t" \ /* L2 = (a>>>9) ^ a */ \ - "xorl %"#a", "L2"\n\t" \ + "xorl %" #a ", " L2 "\n\t" \ /* L1 = Maj(a,b,c) */ \ - "xorl %"#b", "L4"\n\t" \ + "xorl %" #b ", " L4 "\n\t" \ #define RND_STEP_0_7(a,b,c,d,e,f,g,h,i) \ /* L2 = ((a>>>9) ^ a) >>> 11 */ \ - "rorl $11, "L2"\n\t" \ + "rorl $11, " L2 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ - "addl %"#h", %"#d"\n\t" \ + "addl %" #h ", %" #d "\n\t" \ /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \ - "xorl %"#a", "L2"\n\t" \ + "xorl %" #a ", " L2 "\n\t" \ /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \ - "addl "L4", %"#h"\n\t" \ + "addl " L4 ", %" #h "\n\t" \ #define RND_STEP_0_8(a,b,c,d,e,f,g,h,i) \ /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \ - "rorl $2, "L2"\n\t" \ + "rorl $2, " L2 "\n\t" \ /* L1 = d (e of next RND) */ \ - "movl %"#d", "L1"\n\t" \ + "movl %" #d ", " L1 "\n\t" \ /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \ - "addl "L2", %"#h"\n\t" \ + "addl " L2 ", %" #h "\n\t" \ #define RND_STEP_1_1(a,b,c,d,e,f,g,h,i) \ /* L1 = e>>>14 */ \ - "rorl $14, "L1"\n\t" \ - + "rorl $14, " L1 "\n\t" \ + #define RND_STEP_1_2(a,b,c,d,e,f,g,h,i) \ /* L3 = b */ \ - "movl %"#b", "L4"\n\t" \ + "movl %" #b ", " L4 "\n\t" \ /* L2 = f */ \ - "movl %"#f", "L2"\n\t" \ + "movl %" #f ", " L2 "\n\t" \ /* h += w_k */ \ - "addl ("#i")*4("WK"), %"#h"\n\t" \ + "addl (" #i ")*4(" WK "), %" #h "\n\t" \ /* L2 = f ^ g */ \ - "xorl %"#g", "L2"\n\t" \ - + "xorl %" #g ", " L2 "\n\t" \ + #define RND_STEP_1_3(a,b,c,d,e,f,g,h,i) \ /* L1 = (e>>>14) ^ e */ \ - "xorl %"#e", "L1"\n\t" \ + "xorl %" #e ", " L1 "\n\t" \ /* L2 = (f ^ g) & e */ \ - "andl %"#e", "L2"\n\t" \ - + "andl %" #e ", " L2 "\n\t" \ + #define RND_STEP_1_4(a,b,c,d,e,f,g,h,i) \ /* L1 = ((e>>>14) ^ e) >>> 5 */ \ - "rorl $5, "L1"\n\t" \ + "rorl $5, " L1 "\n\t" \ /* L2 = Ch(e,f,g) */ \ - "xorl %"#g", "L2"\n\t" \ + "xorl %" #g ", " L2 "\n\t" \ /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \ - "xorl %"#e", "L1"\n\t" \ + "xorl %" #e ", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addl "L2", %"#h"\n\t" \ + "addl " L2 ", %" #h "\n\t" \ #define RND_STEP_1_5(a,b,c,d,e,f,g,h,i) \ /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \ - "rorl $6, "L1"\n\t" \ + "rorl $6, " L1 "\n\t" \ /* L4 = a ^ b (= b ^ c of next RND) */ \ - "xorl %"#a", "L4"\n\t" \ + "xorl %" #a ", " L4 "\n\t" \ /* h = h + w_k + Sigma1(e) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L2 = a */ \ - "movl %"#a", "L2"\n\t" \ + "movl %" #a ", " L2 "\n\t" \ #define RND_STEP_1_6(a,b,c,d,e,f,g,h,i) \ /* L3 = (a ^ b) & (b ^ c) */ \ - "andl "L4", "L3"\n\t" \ + "andl " L4 ", " L3 "\n\t" \ /* L2 = a>>>9 */ \ - "rorl $9, "L2"\n\t" \ + "rorl $9, " L2 "\n\t" \ /* L2 = (a>>>9) ^ a */ \ - "xorl %"#a", "L2"\n\t" \ + "xorl %" #a ", " L2 "\n\t" \ /* L1 = Maj(a,b,c) */ \ - "xorl %"#b", "L3"\n\t" \ + "xorl %" #b ", " L3 "\n\t" \ #define RND_STEP_1_7(a,b,c,d,e,f,g,h,i) \ /* L2 = ((a>>>9) ^ a) >>> 11 */ \ - "rorl $11, "L2"\n\t" \ + "rorl $11, " L2 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ - "addl %"#h", %"#d"\n\t" \ + "addl %" #h ", %" #d "\n\t" \ /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \ - "xorl %"#a", "L2"\n\t" \ + "xorl %" #a ", " L2 "\n\t" \ /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \ - "addl "L3", %"#h"\n\t" \ + "addl " L3 ", %" #h "\n\t" \ #define RND_STEP_1_8(a,b,c,d,e,f,g,h,i) \ /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \ - "rorl $2, "L2"\n\t" \ + "rorl $2, " L2 "\n\t" \ /* L1 = d (e of next RND) */ \ - "movl %"#d", "L1"\n\t" \ + "movl %" #d ", " L1 "\n\t" \ /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \ - "addl "L2", %"#h"\n\t" \ + "addl " L2 ", %" #h "\n\t" \ #define _RND_ALL_0(a,b,c,d,e,f,g,h,i) \ /* h += w_k */ \ - "addl ("#i")*4("WK"), %"#h"\n\t" \ + "addl (" #i ")*4(" WK "), %" #h "\n\t" \ /* L2 = f */ \ - "movl %"#f", "L2"\n\t" \ + "movl %" #f ", " L2 "\n\t" \ /* L3 = b */ \ - "movl %"#b", "L3"\n\t" \ + "movl %" #b ", " L3 "\n\t" \ /* L2 = f ^ g */ \ - "xorl %"#g", "L2"\n\t" \ + "xorl %" #g ", " L2 "\n\t" \ /* L1 = e>>>14 */ \ - "rorl $14, "L1"\n\t" \ + "rorl $14, " L1 "\n\t" \ /* L2 = (f ^ g) & e */ \ - "andl %"#e", "L2"\n\t" \ + "andl %" #e ", " L2 "\n\t" \ /* L1 = (e>>>14) ^ e */ \ - "xorl %"#e", "L1"\n\t" \ + "xorl %" #e ", " L1 "\n\t" \ /* L2 = Ch(e,f,g) */ \ - "xorl %"#g", "L2"\n\t" \ + "xorl %" #g ", " L2 "\n\t" \ /* L1 = ((e>>>14) ^ e) >>> 5 */ \ - "rorl $5, "L1"\n\t" \ + "rorl $5, " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addl "L2", %"#h"\n\t" \ + "addl " L2 ", %" #h "\n\t" \ /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \ - "xorl %"#e", "L1"\n\t" \ + "xorl %" #e ", " L1 "\n\t" \ /* L3 = a ^ b */ \ - "xorl %"#a", "L3"\n\t" \ + "xorl %" #a ", " L3 "\n\t" \ /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \ - "rorl $6, "L1"\n\t" \ + "rorl $6, " L1 "\n\t" \ /* L2 = a */ \ - "movl %"#a", "L2"\n\t" \ + "movl %" #a ", " L2 "\n\t" \ /* h = h + w_k + Sigma1(e) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L2 = a>>>9 */ \ - "rorl $9, "L2"\n\t" \ + "rorl $9, " L2 "\n\t" \ /* L3 = (a ^ b) & (b ^ c) */ \ - "andl "L3", "L4"\n\t" \ + "andl " L3 ", " L4 "\n\t" \ /* L2 = (a>>>9) ^ a */ \ - "xorl %"#a", "L2"\n\t" \ + "xorl %" #a ", " L2 "\n\t" \ /* L1 = Maj(a,b,c) */ \ - "xorl %"#b", "L4"\n\t" \ + "xorl %" #b ", " L4 "\n\t" \ /* L2 = ((a>>>9) ^ a) >>> 11 */ \ - "rorl $11, "L2"\n\t" \ + "rorl $11, " L2 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ - "addl %"#h", %"#d"\n\t" \ + "addl %" #h ", %" #d "\n\t" \ /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \ - "xorl %"#a", "L2"\n\t" \ + "xorl %" #a ", " L2 "\n\t" \ /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \ - "addl "L4", %"#h"\n\t" \ + "addl " L4 ", %" #h "\n\t" \ /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \ - "rorl $2, "L2"\n\t" \ + "rorl $2, " L2 "\n\t" \ /* L1 = d (e of next RND) */ \ - "movl %"#d", "L1"\n\t" \ + "movl %" #d ", " L1 "\n\t" \ /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \ - "addl "L2", %"#h"\n\t" \ + "addl " L2 ", %" #h "\n\t" \ #define _RND_ALL_1(a,b,c,d,e,f,g,h,i) \ /* h += w_k */ \ - "addl ("#i")*4("WK"), %"#h"\n\t" \ + "addl (" #i ")*4(" WK "), %" #h "\n\t" \ /* L2 = f */ \ - "movl %"#f", "L2"\n\t" \ + "movl %" #f ", " L2 "\n\t" \ /* L3 = b */ \ - "movl %"#b", "L4"\n\t" \ + "movl %" #b ", " L4 "\n\t" \ /* L2 = f ^ g */ \ - "xorl %"#g", "L2"\n\t" \ + "xorl %" #g ", " L2 "\n\t" \ /* L1 = e>>>14 */ \ - "rorl $14, "L1"\n\t" \ + "rorl $14, " L1 "\n\t" \ /* L2 = (f ^ g) & e */ \ - "andl %"#e", "L2"\n\t" \ + "andl %" #e ", " L2 "\n\t" \ /* L1 = (e>>>14) ^ e */ \ - "xorl %"#e", "L1"\n\t" \ + "xorl %" #e ", " L1 "\n\t" \ /* L2 = Ch(e,f,g) */ \ - "xorl %"#g", "L2"\n\t" \ + "xorl %" #g ", " L2 "\n\t" \ /* L1 = ((e>>>14) ^ e) >>> 5 */ \ - "rorl $5, "L1"\n\t" \ + "rorl $5, " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addl "L2", %"#h"\n\t" \ + "addl " L2 ", %" #h "\n\t" \ /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \ - "xorl %"#e", "L1"\n\t" \ + "xorl %" #e ", " L1 "\n\t" \ /* L3 = a ^ b */ \ - "xorl %"#a", "L4"\n\t" \ + "xorl %" #a ", " L4 "\n\t" \ /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \ - "rorl $6, "L1"\n\t" \ + "rorl $6, " L1 "\n\t" \ /* L2 = a */ \ - "movl %"#a", "L2"\n\t" \ + "movl %" #a ", " L2 "\n\t" \ /* h = h + w_k + Sigma1(e) */ \ - "addl "L1", %"#h"\n\t" \ + "addl " L1 ", %" #h "\n\t" \ /* L2 = a>>>9 */ \ - "rorl $9, "L2"\n\t" \ + "rorl $9, " L2 "\n\t" \ /* L3 = (a ^ b) & (b ^ c) */ \ - "andl "L4", "L3"\n\t" \ + "andl " L4 ", " L3 "\n\t" \ /* L2 = (a>>>9) ^ a */ \ - "xorl %"#a", "L2"\n\t" \ + "xorl %" #a", " L2 "\n\t" \ /* L1 = Maj(a,b,c) */ \ - "xorl %"#b", "L3"\n\t" \ + "xorl %" #b ", " L3 "\n\t" \ /* L2 = ((a>>>9) ^ a) >>> 11 */ \ - "rorl $11, "L2"\n\t" \ + "rorl $11, " L2 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ - "addl %"#h", %"#d"\n\t" \ + "addl %" #h ", %" #d "\n\t" \ /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \ - "xorl %"#a", "L2"\n\t" \ + "xorl %" #a ", " L2 "\n\t" \ /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \ - "addl "L3", %"#h"\n\t" \ + "addl " L3 ", %" #h "\n\t" \ /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \ - "rorl $2, "L2"\n\t" \ + "rorl $2, " L2 "\n\t" \ /* L1 = d (e of next RND) */ \ - "movl %"#d", "L1"\n\t" \ + "movl %" #d ", " L1 "\n\t" \ /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \ - "addl "L2", %"#h"\n\t" \ + "addl " L2 ", %" #h "\n\t" \ #define RND_ALL_0(a, b, c, d, e, f, g, h, i) \ @@ -1376,43 +1376,43 @@ static int InitSha256(wc_Sha256* sha256) #if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */ #define _VPALIGNR(op1, op2, op3, op4) \ - "vpalignr $"#op4", %"#op3", %"#op2", %"#op1"\n\t" + "vpalignr $" #op4", %" #op3", %" #op2", %" #op1"\n\t" #define VPALIGNR(op1, op2, op3, op4) \ _VPALIGNR(op1, op2, op3, op4) #define _VPADDD(op1, op2, op3) \ - "vpaddd %"#op3", %"#op2", %"#op1"\n\t" + "vpaddd %" #op3", %" #op2", %" #op1"\n\t" #define VPADDD(op1, op2, op3) \ _VPADDD(op1, op2, op3) #define _VPSRLD(op1, op2, op3) \ - "vpsrld $"#op3", %"#op2", %"#op1"\n\t" + "vpsrld $" #op3", %" #op2", %" #op1"\n\t" #define VPSRLD(op1, op2, op3) \ _VPSRLD(op1, op2, op3) #define _VPSRLQ(op1, op2, op3) \ - "vpsrlq $"#op3", %"#op2", %"#op1"\n\t" + "vpsrlq $" #op3", %" #op2", %" #op1"\n\t" #define VPSRLQ(op1,op2,op3) \ _VPSRLQ(op1,op2,op3) #define _VPSLLD(op1,op2,op3) \ - "vpslld $"#op3", %"#op2", %"#op1"\n\t" + "vpslld $" #op3", %" #op2", %" #op1"\n\t" #define VPSLLD(op1,op2,op3) \ _VPSLLD(op1,op2,op3) #define _VPOR(op1,op2,op3) \ - "vpor %"#op3", %"#op2", %"#op1"\n\t" + "vpor %" #op3", %" #op2", %" #op1"\n\t" #define VPOR(op1,op2,op3) \ _VPOR(op1,op2,op3) #define _VPXOR(op1,op2,op3) \ - "vpxor %"#op3", %"#op2", %"#op1"\n\t" + "vpxor %" #op3", %" #op2", %" #op1"\n\t" #define VPXOR(op1,op2,op3) \ _VPXOR(op1,op2,op3) #define _VPSHUFD(op1,op2,op3) \ - "vpshufd $"#op3", %"#op2", %"#op1"\n\t" + "vpshufd $" #op3", %" #op2", %" #op1"\n\t" #define VPSHUFD(op1,op2,op3) \ _VPSHUFD(op1,op2,op3) #define _VPSHUFB(op1,op2,op3) \ - "vpshufb %"#op3", %"#op2", %"#op1"\n\t" + "vpshufb %" #op3", %" #op2", %" #op1"\n\t" #define VPSHUFB(op1,op2,op3) \ _VPSHUFB(op1,op2,op3) #define _VPSLLDQ(op1,op2,op3) \ - "vpslldq $"#op3", %"#op2", %"#op1"\n\t" + "vpslldq $" #op3", %" #op2", %" #op1"\n\t" #define VPSLLDQ(op1,op2,op3) \ _VPSLLDQ(op1,op2,op3) @@ -1554,12 +1554,12 @@ static int InitSha256(wc_Sha256* sha256) #define _W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) \ "# X0, X1, X2, X3 = W[0..15]\n\t" \ - "vmovdqu (%%rax), %"#X0"\n\t" \ - "vmovdqu 16(%%rax), %"#X1"\n\t" \ + "vmovdqu (%%rax), %" #X0 "\n\t" \ + "vmovdqu 16(%%rax), %" #X1 "\n\t" \ VPSHUFB(X0, X0, BYTE_FLIP_MASK) \ VPSHUFB(X1, X1, BYTE_FLIP_MASK) \ - "vmovdqu 32(%%rax), %"#X2"\n\t" \ - "vmovdqu 48(%%rax), %"#X3"\n\t" \ + "vmovdqu 32(%%rax), %" #X2 "\n\t" \ + "vmovdqu 48(%%rax), %" #X3 "\n\t" \ VPSHUFB(X2, X2, BYTE_FLIP_MASK) \ VPSHUFB(X3, X3, BYTE_FLIP_MASK) @@ -1568,14 +1568,14 @@ static int InitSha256(wc_Sha256* sha256) #define _SET_W_K_XFER_4(i) \ - "vpaddd ("#i"*4)+ 0+%[K], %%xmm0, %%xmm4\n\t" \ - "vpaddd ("#i"*4)+16+%[K], %%xmm1, %%xmm5\n\t" \ - "vmovdqu %%xmm4, ("WK")\n\t" \ - "vmovdqu %%xmm5, 16("WK")\n\t" \ - "vpaddd ("#i"*4)+32+%[K], %%xmm2, %%xmm6\n\t" \ - "vpaddd ("#i"*4)+48+%[K], %%xmm3, %%xmm7\n\t" \ - "vmovdqu %%xmm6, 32("WK")\n\t" \ - "vmovdqu %%xmm7, 48("WK")\n\t" + "vpaddd (" #i "*4)+ 0+%[K], %%xmm0, %%xmm4\n\t" \ + "vpaddd (" #i "*4)+16+%[K], %%xmm1, %%xmm5\n\t" \ + "vmovdqu %%xmm4, (" WK ")\n\t" \ + "vmovdqu %%xmm5, 16(" WK ")\n\t" \ + "vpaddd (" #i "*4)+32+%[K], %%xmm2, %%xmm6\n\t" \ + "vpaddd (" #i "*4)+48+%[K], %%xmm3, %%xmm7\n\t" \ + "vmovdqu %%xmm6, 32(" WK ")\n\t" \ + "vmovdqu %%xmm7, 48(" WK ")\n\t" #define SET_W_K_XFER_4(i) \ _SET_W_K_XFER_4(i) @@ -1588,10 +1588,10 @@ static const ALIGN32 word64 mSHUF_DC00[] = static const ALIGN32 word64 mBYTE_FLIP_MASK[] = { 0x0405060700010203, 0x0c0d0e0f08090a0b }; -#define _Init_Masks(mask1, mask2, mask3) \ - "vmovdqa %[FLIP], %"#mask1"\n\t" \ - "vmovdqa %[SHUF00BA], %"#mask2"\n\t" \ - "vmovdqa %[SHUFDC00], %"#mask3"\n\t" +#define _Init_Masks(mask1, mask2, mask3) \ + "vmovdqa %[FLIP], %" #mask1 "\n\t" \ + "vmovdqa %[SHUF00BA], %" #mask2 "\n\t" \ + "vmovdqa %[SHUFDC00], %" #mask3 "\n\t" #define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \ _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) @@ -1626,9 +1626,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX1(wc_Sha256* sha256) W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) - "movl %%r9d, "L4"\n\t" - "movl %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "movl %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" SET_W_K_XFER_4(0) MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) @@ -1686,9 +1686,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX1_Len(wc_Sha256* sha256, W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) - "movl %%r9d, "L4"\n\t" - "movl %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "movl %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" SET_W_K_XFER_4(0) MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) @@ -1755,9 +1755,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX(wc_Sha256* sha256) LOAD_DIGEST() SET_W_K_XFER_4(0) - "movl %%r9d, "L4"\n\t" - "rorx $6, %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "rorx $6, %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4) MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8) @@ -1776,13 +1776,13 @@ SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX(wc_Sha256* sha256) MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12) SET_W_K_XFER_4(48) - "xorl "L3", "L3"\n\t" + "xorl " L3 ", " L3 "\n\t" RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4) RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12) /* Prev RND: h += Maj(a,b,c) */ - "addl "L3", %%r8d\n\t" + "addl " L3 ", %%r8d\n\t" STORE_ADD_DIGEST() @@ -1817,9 +1817,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) SET_W_K_XFER_4(0) - "movl %%r9d, "L4"\n\t" - "rorx $6, %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "rorx $6, %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4) MsgSched_RORX(X2, X3, X0, X1, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8) @@ -1838,14 +1838,14 @@ SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12) SET_W_K_XFER_4(48) - "xorl "L3", "L3"\n\t" - "xorl "L2", "L2"\n\t" + "xorl " L3 ", " L3 "\n\t" + "xorl " L2 ", " L2 "\n\t" RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4) RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12) /* Prev RND: h += Maj(a,b,c) */ - "addl "L3", %%r8d\n\t" + "addl " L3 ", %%r8d\n\t" "movq 120(%[sha256]), %%rax\n\t" ADD_DIGEST() @@ -2027,43 +2027,43 @@ SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, #endif /* HAVE_INTEL_RORX */ #define _VINSERTI128(op1,op2,op3,op4) \ - "vinserti128 $"#op4", %"#op3", %"#op2", %"#op1"\n\t" + "vinserti128 $" #op4 ", %" #op3 ", %" #op2 ", %" #op1 "\n\t" #define VINSERTI128(op1,op2,op3,op4) \ _VINSERTI128(op1,op2,op3,op4) -#define _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) \ - "# X0, X1, X2, X3 = W[0..15]\n\t" \ - "vmovdqu (%%"#reg"), %%xmm0\n\t" \ - "vmovdqu 16(%%"#reg"), %%xmm1\n\t" \ - VPSHUFB(X0, X0, BYTE_FLIP_MASK) \ - VPSHUFB(X1, X1, BYTE_FLIP_MASK) \ - "vmovdqu 32(%%"#reg"), %%xmm2\n\t" \ - "vmovdqu 48(%%"#reg"), %%xmm3\n\t" \ - VPSHUFB(X2, X2, BYTE_FLIP_MASK) \ +#define _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) \ + "# X0, X1, X2, X3 = W[0..15]\n\t" \ + "vmovdqu (%%" #reg "), %%xmm0\n\t" \ + "vmovdqu 16(%%" #reg "), %%xmm1\n\t" \ + VPSHUFB(X0, X0, BYTE_FLIP_MASK) \ + VPSHUFB(X1, X1, BYTE_FLIP_MASK) \ + "vmovdqu 32(%%" #reg "), %%xmm2\n\t" \ + "vmovdqu 48(%%" #reg "), %%xmm3\n\t" \ + VPSHUFB(X2, X2, BYTE_FLIP_MASK) \ VPSHUFB(X3, X3, BYTE_FLIP_MASK) #define LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) \ _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) -#define _LOAD_W_K(BYTE_FLIP_Y_MASK, reg) \ - "# X0, X1, X2, X3 = W[0..15]\n\t" \ - "vmovdqu (%%"#reg"), %%xmm0\n\t" \ - "vmovdqu 16(%%"#reg"), %%xmm1\n\t" \ - "vmovdqu 64(%%"#reg"), %%xmm4\n\t" \ - "vmovdqu 80(%%"#reg"), %%xmm5\n\t" \ - VINSERTI128(Y0, Y0, XTMP0, 1) \ - VINSERTI128(Y1, Y1, XTMP1, 1) \ - VPSHUFB(Y0, Y0, BYTE_FLIP_Y_MASK) \ - VPSHUFB(Y1, Y1, BYTE_FLIP_Y_MASK) \ - "vmovdqu 32(%%"#reg"), %%xmm2\n\t" \ - "vmovdqu 48(%%"#reg"), %%xmm3\n\t" \ - "vmovdqu 96(%%"#reg"), %%xmm6\n\t" \ - "vmovdqu 112(%%"#reg"), %%xmm7\n\t" \ - VINSERTI128(Y2, Y2, XTMP2, 1) \ - VINSERTI128(Y3, Y3, XTMP3, 1) \ - VPSHUFB(Y2, Y2, BYTE_FLIP_Y_MASK) \ +#define _LOAD_W_K(BYTE_FLIP_Y_MASK, reg) \ + "# X0, X1, X2, X3 = W[0..15]\n\t" \ + "vmovdqu (%%" #reg "), %%xmm0\n\t" \ + "vmovdqu 16(%%" #reg "), %%xmm1\n\t" \ + "vmovdqu 64(%%" #reg "), %%xmm4\n\t" \ + "vmovdqu 80(%%" #reg "), %%xmm5\n\t" \ + VINSERTI128(Y0, Y0, XTMP0, 1) \ + VINSERTI128(Y1, Y1, XTMP1, 1) \ + VPSHUFB(Y0, Y0, BYTE_FLIP_Y_MASK) \ + VPSHUFB(Y1, Y1, BYTE_FLIP_Y_MASK) \ + "vmovdqu 32(%%" #reg "), %%xmm2\n\t" \ + "vmovdqu 48(%%" #reg "), %%xmm3\n\t" \ + "vmovdqu 96(%%" #reg "), %%xmm6\n\t" \ + "vmovdqu 112(%%" #reg "), %%xmm7\n\t" \ + VINSERTI128(Y2, Y2, XTMP2, 1) \ + VINSERTI128(Y3, Y3, XTMP3, 1) \ + VPSHUFB(Y2, Y2, BYTE_FLIP_Y_MASK) \ VPSHUFB(Y3, Y3, BYTE_FLIP_Y_MASK) #define LOAD_W_K(BYTE_FLIP_Y_MASK, reg) \ @@ -2071,14 +2071,14 @@ SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, #define _SET_W_Y_4(i) \ - "vpaddd ("#i"*8)+ 0+%[K], %%ymm0, %%ymm4\n\t" \ - "vpaddd ("#i"*8)+32+%[K], %%ymm1, %%ymm5\n\t" \ - "vmovdqu %%ymm4, ("#i"*8)+ 0("WK")\n\t" \ - "vmovdqu %%ymm5, ("#i"*8)+32("WK")\n\t" \ - "vpaddd ("#i"*8)+64+%[K], %%ymm2, %%ymm4\n\t" \ - "vpaddd ("#i"*8)+96+%[K], %%ymm3, %%ymm5\n\t" \ - "vmovdqu %%ymm4, ("#i"*8)+64("WK")\n\t" \ - "vmovdqu %%ymm5, ("#i"*8)+96("WK")\n\t" + "vpaddd (" #i "*8)+ 0+%[K], %%ymm0, %%ymm4\n\t" \ + "vpaddd (" #i "*8)+32+%[K], %%ymm1, %%ymm5\n\t" \ + "vmovdqu %%ymm4, (" #i "*8)+ 0(" WK ")\n\t" \ + "vmovdqu %%ymm5, (" #i "*8)+32(" WK ")\n\t" \ + "vpaddd (" #i "*8)+64+%[K], %%ymm2, %%ymm4\n\t" \ + "vpaddd (" #i "*8)+96+%[K], %%ymm3, %%ymm5\n\t" \ + "vmovdqu %%ymm4, (" #i "*8)+64(" WK ")\n\t" \ + "vmovdqu %%ymm5, (" #i "*8)+96(" WK ")\n\t" #define SET_W_Y_4(i) \ _SET_W_Y_4(i) @@ -2095,9 +2095,9 @@ static const ALIGN32 word64 mBYTE_FLIP_Y_MASK[] = 0x0405060700010203, 0x0c0d0e0f08090a0b }; #define _INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \ - "vmovdqa %[FLIP], %"#BYTE_FLIP_MASK"\n\t" \ - "vmovdqa %[SHUF00BA], %"#SHUF_00BA"\n\t" \ - "vmovdqa %[SHUFDC00], %"#SHUF_DC00"\n\t" + "vmovdqa %[FLIP], %" #BYTE_FLIP_MASK "\n\t" \ + "vmovdqa %[SHUF00BA], %" #SHUF_00BA "\n\t" \ + "vmovdqa %[SHUFDC00], %" #SHUF_DC00 "\n\t" #define INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \ _INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) @@ -2149,9 +2149,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX2(wc_Sha256* sha256) LOAD_W_K_LOW(BYTE_FLIP_MASK, rax) - "movl %%r9d, "L4"\n\t" - "movl %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "movl %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" SET_W_Y_4(0) MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) @@ -2218,9 +2218,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX2_Len(wc_Sha256* sha256, LOAD_W_K(BYTE_FLIP_Y_MASK, rax) - "movl %%r9d, "L4"\n\t" - "movl %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "movl %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" SET_W_Y_4(0) MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) @@ -2249,9 +2249,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX2_Len(wc_Sha256* sha256, ADD_DIGEST() STORE_DIGEST() - "movl %%r9d, "L4"\n\t" - "movl %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "movl %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 4) RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12) @@ -2309,9 +2309,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX(wc_Sha256* sha256) LOAD_DIGEST() - "movl %%r9d, "L4"\n\t" - "rorx $6, %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "rorx $6, %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" SET_W_Y_4(0) MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) @@ -2332,14 +2332,14 @@ SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX(wc_Sha256* sha256) MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88) SET_W_Y_4(48) - "xorl "L3", "L3"\n\t" - "xorl "L2", "L2"\n\t" + "xorl " L3 ", " L3 "\n\t" + "xorl " L2 ", " L2 "\n\t" RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104) RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120) /* Prev RND: h += Maj(a,b,c) */ - "addl "L3", %%r8d\n\t" + "addl " L3 ", %%r8d\n\t" STORE_ADD_DIGEST() @@ -2382,9 +2382,9 @@ SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, LOAD_W_K(BYTE_FLIP_Y_MASK, rax) - "movl %%r9d, "L4"\n\t" - "rorx $6, %%r12d, "L1"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "rorx $6, %%r12d, " L1 "\n\t" + "xorl %%r10d, " L4 "\n\t" SET_W_Y_4(0) MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0) @@ -2405,22 +2405,22 @@ SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88) SET_W_Y_4(48) - "xorl "L3", "L3"\n\t" - "xorl "L2", "L2"\n\t" + "xorl " L3 ", " L3 "\n\t" + "xorl " L2 ", " L2 "\n\t" RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104) RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120) /* Prev RND: h += Maj(a,b,c) */ - "addl "L3", %%r8d\n\t" - "xorl "L2", "L2"\n\t" + "addl " L3 ", %%r8d\n\t" + "xorl " L2 ", " L2 "\n\t" ADD_DIGEST() STORE_DIGEST() - "movl %%r9d, "L4"\n\t" - "xorl "L3", "L3"\n\t" - "xorl %%r10d, "L4"\n\t" + "movl %%r9d, " L4 "\n\t" + "xorl " L3 ", " L3 "\n\t" + "xorl %%r10d, " L4 "\n\t" RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 4) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12) @@ -2439,7 +2439,7 @@ SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 116) RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 124) /* Prev RND: h += Maj(a,b,c) */ - "addl "L3", %%r8d\n\t" + "addl " L3 ", %%r8d\n\t" "movq 120(%[sha256]), %%rax\n\t" ADD_DIGEST() diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index b96b29ad6..9def45576 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -764,33 +764,33 @@ static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f }; "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" -#define _VPALIGNR(dest, src1, src2, bits) \ - "vpalignr $"#bits", %%"#src2", %%"#src1", %%"#dest"\n\t" +#define _VPALIGNR(dest, src1, src2, bits) \ + "vpalignr $" #bits ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t" #define VPALIGNR(dest, src1, src2, bits) \ _VPALIGNR(dest, src1, src2, bits) #define _V_SHIFT_R(dest, src, bits) \ - "vpsrlq $"#bits", %%"#src", %%"#dest"\n\t" + "vpsrlq $" #bits ", %%" #src ", %%" #dest "\n\t" #define V_SHIFT_R(dest, src, bits) \ _V_SHIFT_R(dest, src, bits) #define _V_SHIFT_L(dest, src, bits) \ - "vpsllq $"#bits", %%"#src", %%"#dest"\n\t" + "vpsllq $" #bits ", %%" #src ", %%" #dest "\n\t" #define V_SHIFT_L(dest, src, bits) \ _V_SHIFT_L(dest, src, bits) #define _V_ADD(dest, src1, src2) \ - "vpaddq %%"#src1", %%"#src2", %%"#dest"\n\t" + "vpaddq %%" #src1 ", %%" #src2 ", %%" #dest "\n\t" #define V_ADD(dest, src1, src2) \ _V_ADD(dest, src1, src2) #define _V_XOR(dest, src1, src2) \ - "vpxor %%"#src1", %%"#src2", %%"#dest"\n\t" + "vpxor %%" #src1 ", %%" #src2 ", %%" #dest "\n\t" #define V_XOR(dest, src1, src2) \ _V_XOR(dest, src1, src2) #define _V_OR(dest, src1, src2) \ - "vpor %%"#src1", %%"#src2", %%"#dest"\n\t" + "vpor %%" #src1 ", %%" #src2 ", %%" #dest "\n\t" #define V_OR(dest, src1, src2) \ _V_OR(dest, src1, src2) @@ -815,179 +815,179 @@ static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f }; #define RND_0_1(a,b,c,d,e,f,g,h,i) \ /* L1 = e >>> 23 */ \ - "rorq $23, "L1"\n\t" \ + "rorq $23, " L1 "\n\t" \ #define RND_0_2(a,b,c,d,e,f,g,h,i) \ /* L3 = a */ \ - "movq "#a", "L3"\n\t" \ + "movq "#a", " L3 "\n\t" \ /* L2 = f */ \ - "movq "#f", "L2"\n\t" \ + "movq "#f", " L2 "\n\t" \ /* h += W_X[i] */ \ - "addq ("#i")*8("WX"), "#h"\n\t" \ + "addq ("#i")*8(" WX "), "#h"\n\t" \ /* L2 = f ^ g */ \ - "xorq "#g", "L2"\n\t" \ + "xorq "#g", " L2 "\n\t" \ #define RND_0_2_A(a,b,c,d,e,f,g,h,i) \ /* L3 = a */ \ - "movq "#a", "L3"\n\t" \ + "movq "#a", " L3 "\n\t" \ /* L2 = f */ \ - "movq "#f", "L2"\n\t" \ + "movq "#f", " L2 "\n\t" \ #define RND_0_2_B(a,b,c,d,e,f,g,h,i) \ /* h += W_X[i] */ \ - "addq ("#i")*8("WX"), "#h"\n\t" \ + "addq ("#i")*8(" WX "), "#h"\n\t" \ /* L2 = f ^ g */ \ - "xorq "#g", "L2"\n\t" \ + "xorq "#g", " L2 "\n\t" \ #define RND_0_3(a,b,c,d,e,f,g,h,i) \ /* L1 = (e >>> 23) ^ e */ \ - "xorq "#e", "L1"\n\t" \ + "xorq "#e", " L1 "\n\t" \ /* L2 = (f ^ g) & e */ \ - "andq "#e", "L2"\n\t" \ + "andq "#e", " L2 "\n\t" \ #define RND_0_4(a,b,c,d,e,f,g,h,i) \ /* L1 = ((e >>> 23) ^ e) >>> 4 */ \ - "rorq $4, "L1"\n\t" \ + "rorq $4, " L1 "\n\t" \ /* L2 = ((f ^ g) & e) ^ g */ \ - "xorq "#g", "L2"\n\t" \ + "xorq "#g", " L2 "\n\t" \ #define RND_0_5(a,b,c,d,e,f,g,h,i) \ /* L1 = (((e >>> 23) ^ e) >>> 4) ^ e */ \ - "xorq "#e", "L1"\n\t" \ + "xorq "#e", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addq "L2", "#h"\n\t" \ + "addq " L2 ", "#h"\n\t" \ #define RND_0_6(a,b,c,d,e,f,g,h,i) \ /* L1 = ((((e >>> 23) ^ e) >>> 4) ^ e) >>> 14 */ \ - "rorq $14, "L1"\n\t" \ + "rorq $14, " L1 "\n\t" \ /* L3 = a ^ b */ \ - "xorq "#b", "L3"\n\t" \ + "xorq "#b", " L3 "\n\t" \ #define RND_0_7(a,b,c,d,e,f,g,h,i) \ /* h += Sigma1(e) */ \ - "addq "L1", "#h"\n\t" \ + "addq " L1 ", "#h"\n\t" \ /* L2 = a */ \ - "movq "#a", "L2"\n\t" \ + "movq "#a", " L2 "\n\t" \ #define RND_0_8(a,b,c,d,e,f,g,h,i) \ /* L4 = (a ^ b) & (b ^ c) */ \ - "andq "L3", "L4"\n\t" \ + "andq " L3 ", " L4 "\n\t" \ /* L2 = a >>> 5 */ \ - "rorq $5, "L2"\n\t" \ + "rorq $5, " L2 "\n\t" \ #define RND_0_9(a,b,c,d,e,f,g,h,i) \ /* L2 = (a >>> 5) ^ a */ \ - "xorq "#a", "L2"\n\t" \ + "xorq "#a", " L2 "\n\t" \ /* L4 = ((a ^ b) & (b ^ c) ^ b */ \ - "xorq "#b", "L4"\n\t" \ + "xorq "#b", " L4 "\n\t" \ #define RND_0_10(a,b,c,d,e,f,g,h,i) \ /* L2 = ((a >>> 5) ^ a) >>> 6 */ \ - "rorq $6, "L2"\n\t" \ + "rorq $6, " L2 "\n\t" \ /* d += h */ \ "addq "#h", "#d"\n\t" \ #define RND_0_11(a,b,c,d,e,f,g,h,i) \ /* L2 = (((a >>> 5) ^ a) >>> 6) ^ a */ \ - "xorq "#a", "L2"\n\t" \ + "xorq "#a", " L2 "\n\t" \ /* h += Sigma0(a) */ \ - "addq "L4", "#h"\n\t" \ + "addq " L4 ", "#h"\n\t" \ #define RND_0_12(a,b,c,d,e,f,g,h,i) \ /* L2 = ((((a >>> 5) ^ a) >>> 6) ^ a) >>> 28 */ \ - "rorq $28, "L2"\n\t" \ + "rorq $28, " L2 "\n\t" \ /* d (= e next RND) */ \ - "movq "#d", "L1"\n\t" \ + "movq "#d", " L1 "\n\t" \ /* h += Maj(a,b,c) */ \ - "addq "L2", "#h"\n\t" \ + "addq " L2 ", "#h"\n\t" \ #define RND_1_1(a,b,c,d,e,f,g,h,i) \ /* L1 = e >>> 23 */ \ - "rorq $23, "L1"\n\t" \ + "rorq $23, " L1 "\n\t" \ #define RND_1_2(a,b,c,d,e,f,g,h,i) \ /* L4 = a */ \ - "movq "#a", "L4"\n\t" \ + "movq "#a", " L4 "\n\t" \ /* L2 = f */ \ - "movq "#f", "L2"\n\t" \ + "movq "#f", " L2 "\n\t" \ /* h += W_X[i] */ \ - "addq ("#i")*8("WX"), "#h"\n\t" \ + "addq ("#i")*8(" WX "), "#h"\n\t" \ /* L2 = f ^ g */ \ - "xorq "#g", "L2"\n\t" \ + "xorq "#g", " L2 "\n\t" \ #define RND_1_2_A(a,b,c,d,e,f,g,h,i) \ /* L4 = a */ \ - "movq "#a", "L4"\n\t" \ + "movq "#a", " L4 "\n\t" \ /* L2 = f */ \ - "movq "#f", "L2"\n\t" \ + "movq "#f", " L2 "\n\t" \ #define RND_1_2_B(a,b,c,d,e,f,g,h,i) \ /* h += W_X[i] */ \ - "addq ("#i")*8("WX"), "#h"\n\t" \ + "addq ("#i")*8(" WX "), "#h"\n\t" \ /* L2 = f ^ g */ \ - "xorq "#g", "L2"\n\t" \ + "xorq "#g", " L2 "\n\t" \ #define RND_1_3(a,b,c,d,e,f,g,h,i) \ /* L1 = (e >>> 23) ^ e */ \ - "xorq "#e", "L1"\n\t" \ + "xorq "#e", " L1 "\n\t" \ /* L2 = (f ^ g) & e */ \ - "andq "#e", "L2"\n\t" \ + "andq "#e", " L2 "\n\t" \ #define RND_1_4(a,b,c,d,e,f,g,h,i) \ /* ((e >>> 23) ^ e) >>> 4 */ \ - "rorq $4, "L1"\n\t" \ + "rorq $4, " L1 "\n\t" \ /* ((f ^ g) & e) ^ g */ \ - "xorq "#g", "L2"\n\t" \ + "xorq "#g", " L2 "\n\t" \ #define RND_1_5(a,b,c,d,e,f,g,h,i) \ /* (((e >>> 23) ^ e) >>> 4) ^ e */ \ - "xorq "#e", "L1"\n\t" \ + "xorq "#e", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addq "L2", "#h"\n\t" \ + "addq " L2 ", "#h"\n\t" \ #define RND_1_6(a,b,c,d,e,f,g,h,i) \ /* L1 = ((((e >>> 23) ^ e) >>> 4) ^ e) >>> 14 */ \ - "rorq $14, "L1"\n\t" \ + "rorq $14, " L1 "\n\t" \ /* L4 = a ^ b */ \ - "xorq "#b", "L4"\n\t" \ + "xorq "#b", " L4 "\n\t" \ #define RND_1_7(a,b,c,d,e,f,g,h,i) \ /* h += Sigma1(e) */ \ - "addq "L1", "#h"\n\t" \ + "addq " L1 ", "#h"\n\t" \ /* L2 = a */ \ - "movq "#a", "L2"\n\t" \ - + "movq "#a", " L2 "\n\t" \ + #define RND_1_8(a,b,c,d,e,f,g,h,i) \ /* L3 = (a ^ b) & (b ^ c) */ \ - "andq "L4", "L3"\n\t" \ + "andq " L4 ", " L3 "\n\t" \ /* L2 = a >>> 5 */ \ - "rorq $5, "L2"\n\t" \ + "rorq $5, " L2 "\n\t" \ #define RND_1_9(a,b,c,d,e,f,g,h,i) \ /* L2 = (a >>> 5) ^ a */ \ - "xorq "#a", "L2"\n\t" \ + "xorq "#a", " L2 "\n\t" \ /* L3 = ((a ^ b) & (b ^ c) ^ b */ \ - "xorq "#b", "L3"\n\t" \ + "xorq "#b", " L3 "\n\t" \ #define RND_1_10(a,b,c,d,e,f,g,h,i) \ /* L2 = ((a >>> 5) ^ a) >>> 6 */ \ - "rorq $6, "L2"\n\t" \ + "rorq $6, " L2 "\n\t" \ /* d += h */ \ "addq "#h", "#d"\n\t" \ #define RND_1_11(a,b,c,d,e,f,g,h,i) \ /* L2 = (((a >>> 5) ^ a) >>> 6) ^ a */ \ - "xorq "#a", "L2"\n\t" \ + "xorq "#a", " L2 "\n\t" \ /* h += Sigma0(a) */ \ - "addq "L3", "#h"\n\t" \ + "addq " L3 ", "#h"\n\t" \ #define RND_1_12(a,b,c,d,e,f,g,h,i) \ /* L2 = ((((a >>> 5) ^ a) >>> 6) ^ a) >>> 28 */ \ - "rorq $28, "L2"\n\t" \ + "rorq $28, " L2 "\n\t" \ /* d (= e next RND) */ \ - "movq "#d", "L1"\n\t" \ + "movq "#d", " L1 "\n\t" \ /* h += Maj(a,b,c) */ \ - "addq "L2", "#h"\n\t" \ + "addq " L2 ", "#h"\n\t" \ #define MsgSched2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e,f,g,h,i) \ @@ -1070,131 +1070,131 @@ static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f }; #define RND_RORX_0_1(a, b, c, d, e, f, g, h, i) \ /* L1 = e>>>14 */ \ - "rorxq $14, "#e", "L1"\n\t" \ + "rorxq $14, "#e", " L1 "\n\t" \ /* L2 = e>>>18 */ \ - "rorxq $18, "#e", "L2"\n\t" \ + "rorxq $18, "#e", " L2 "\n\t" \ /* Prev RND: h += Maj(a,b,c) */ \ - "addq "L3", "#a"\n\t" \ + "addq " L3 ", "#a"\n\t" \ #define RND_RORX_0_2(a, b, c, d, e, f, g, h, i) \ /* h += w_k */ \ - "addq ("#i")*8("WX"), "#h"\n\t" \ + "addq ("#i")*8(" WX "), "#h"\n\t" \ /* L3 = f */ \ - "movq "#f", "L3"\n\t" \ + "movq "#f", " L3 "\n\t" \ /* L2 = (e>>>14) ^ (e>>>18) */ \ - "xorq "L1", "L2"\n\t" \ + "xorq " L1 ", " L2 "\n\t" \ #define RND_RORX_0_3(a, b, c, d, e, f, g, h, i) \ /* L3 = f ^ g */ \ - "xorq "#g", "L3"\n\t" \ + "xorq "#g", " L3 "\n\t" \ /* L1 = e>>>41 */ \ - "rorxq $41, "#e", "L1"\n\t" \ + "rorxq $41, "#e", " L1 "\n\t" \ /* L1 = Sigma1(e) */ \ - "xorq "L2", "L1"\n\t" \ + "xorq " L2 ", " L1 "\n\t" \ #define RND_RORX_0_4(a, b, c, d, e, f, g, h, i) \ /* L3 = (f ^ g) & e */ \ - "andq "#e", "L3"\n\t" \ + "andq "#e", " L3 "\n\t" \ /* h += Sigma1(e) */ \ - "addq "L1", "#h"\n\t" \ + "addq " L1 ", "#h"\n\t" \ /* L1 = a>>>28 */ \ - "rorxq $28, "#a", "L1"\n\t" \ + "rorxq $28, "#a", " L1 "\n\t" \ #define RND_RORX_0_5(a, b, c, d, e, f, g, h, i) \ /* L2 = a>>>34 */ \ - "rorxq $34, "#a", "L2"\n\t" \ + "rorxq $34, "#a", " L2 "\n\t" \ /* L3 = Ch(e,f,g) */ \ - "xorq "#g", "L3"\n\t" \ + "xorq "#g", " L3 "\n\t" \ /* L2 = (a>>>28) ^ (a>>>34) */ \ - "xorq "L1", "L2"\n\t" \ + "xorq " L1 ", " L2 "\n\t" \ #define RND_RORX_0_6(a, b, c, d, e, f, g, h, i) \ /* L1 = a>>>39 */ \ - "rorxq $39, "#a", "L1"\n\t" \ + "rorxq $39, "#a", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addq "L3", "#h"\n\t" \ + "addq " L3 ", "#h"\n\t" \ /* L1 = Sigma0(a) */ \ - "xorq "L2", "L1"\n\t" \ + "xorq " L2 ", " L1 "\n\t" \ #define RND_RORX_0_7(a, b, c, d, e, f, g, h, i) \ /* L3 = b */ \ - "movq "#b", "L3"\n\t" \ + "movq "#b", " L3 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ "addq "#h", "#d"\n\t" \ /* L3 = a ^ b */ \ - "xorq "#a", "L3"\n\t" \ + "xorq "#a", " L3 "\n\t" \ #define RND_RORX_0_8(a, b, c, d, e, f, g, h, i) \ /* L4 = (a ^ b) & (b ^ c) */ \ - "andq "L3", "L4"\n\t" \ + "andq " L3 ", " L4 "\n\t" \ /* h += Sigma0(a) */ \ - "addq "L1", "#h"\n\t" \ + "addq " L1 ", "#h"\n\t" \ /* L4 = Maj(a,b,c) */ \ - "xorq "#b", "L4"\n\t" \ + "xorq "#b", " L4 "\n\t" \ #define RND_RORX_1_1(a, b, c, d, e, f, g, h, i) \ /* L1 = e>>>14 */ \ - "rorxq $14, "#e", "L1"\n\t" \ + "rorxq $14, "#e", " L1 "\n\t" \ /* L2 = e>>>18 */ \ - "rorxq $18, "#e", "L2"\n\t" \ + "rorxq $18, "#e", " L2 "\n\t" \ /* Prev RND: h += Maj(a,b,c) */ \ - "addq "L4", "#a"\n\t" \ + "addq " L4 ", "#a"\n\t" \ #define RND_RORX_1_2(a, b, c, d, e, f, g, h, i) \ /* h += w_k */ \ - "addq ("#i")*8("WX"), "#h"\n\t" \ + "addq ("#i")*8(" WX "), "#h"\n\t" \ /* L4 = f */ \ - "movq "#f", "L4"\n\t" \ + "movq "#f", " L4 "\n\t" \ /* L2 = (e>>>14) ^ (e>>>18) */ \ - "xorq "L1", "L2"\n\t" \ + "xorq " L1 ", " L2 "\n\t" \ #define RND_RORX_1_3(a, b, c, d, e, f, g, h, i) \ /* L4 = f ^ g */ \ - "xorq "#g", "L4"\n\t" \ + "xorq "#g", " L4 "\n\t" \ /* L1 = e>>>41 */ \ - "rorxq $41, "#e", "L1"\n\t" \ + "rorxq $41, "#e", " L1 "\n\t" \ /* L1 = Sigma1(e) */ \ - "xorq "L2", "L1"\n\t" \ + "xorq " L2 ", " L1 "\n\t" \ #define RND_RORX_1_4(a, b, c, d, e, f, g, h, i) \ /* L4 = (f ^ g) & e */ \ - "andq "#e", "L4"\n\t" \ + "andq "#e", " L4 "\n\t" \ /* h += Sigma1(e) */ \ - "addq "L1", "#h"\n\t" \ + "addq " L1 ", "#h"\n\t" \ /* L1 = a>>>28 */ \ - "rorxq $28, "#a", "L1"\n\t" \ + "rorxq $28, "#a", " L1 "\n\t" \ #define RND_RORX_1_5(a, b, c, d, e, f, g, h, i) \ /* L2 = a>>>34 */ \ - "rorxq $34, "#a", "L2"\n\t" \ + "rorxq $34, "#a", " L2 "\n\t" \ /* L4 = Ch(e,f,g) */ \ - "xorq "#g", "L4"\n\t" \ + "xorq "#g", " L4 "\n\t" \ /* L2 = (a>>>28) ^ (a>>>34) */ \ - "xorq "L1", "L2"\n\t" \ + "xorq " L1 ", " L2 "\n\t" \ #define RND_RORX_1_6(a, b, c, d, e, f, g, h, i) \ /* L1 = a>>>39 */ \ - "rorxq $39, "#a", "L1"\n\t" \ + "rorxq $39, "#a", " L1 "\n\t" \ /* h += Ch(e,f,g) */ \ - "addq "L4", "#h"\n\t" \ + "addq " L4 ", "#h"\n\t" \ /* L1 = Sigma0(a) */ \ - "xorq "L2", "L1"\n\t" \ + "xorq " L2 ", " L1 "\n\t" \ #define RND_RORX_1_7(a, b, c, d, e, f, g, h, i) \ /* L4 = b */ \ - "movq "#b", "L4"\n\t" \ + "movq "#b", " L4 "\n\t" \ /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ "addq "#h", "#d"\n\t" \ /* L4 = a ^ b */ \ - "xorq "#a", "L4"\n\t" \ + "xorq "#a", " L4 "\n\t" \ #define RND_RORX_1_8(a, b, c, d, e, f, g, h, i) \ /* L2 = (a ^ b) & (b ^ c) */ \ - "andq "L4", "L3"\n\t" \ + "andq " L4 ", " L3 "\n\t" \ /* h += Sigma0(a) */ \ - "addq "L1", "#h"\n\t" \ + "addq " L1 ", "#h"\n\t" \ /* L3 = Maj(a,b,c) */ \ - "xorq "#b", "L3"\n\t" \ + "xorq "#b", " L3 "\n\t" \ #define RND_RORX_ALL_2(a, b, c, d, e, f, g, h, i) \ RND_RORX_0_1(a, b, c, d, e, f, g, h, i+0) \ @@ -1262,15 +1262,15 @@ static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f }; #endif #define _INIT_MASK(mask) \ - "vmovdqu %[mask], %%"#mask"\n\t" + "vmovdqu %[mask], %%" #mask "\n\t" #define INIT_MASK(mask) \ _INIT_MASK(mask) -#define _LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg) \ - "vmovdqu "#i1"*16(%%"#reg"), %%"#xmm1"\n\t" \ - "vmovdqu "#i2"*16(%%"#reg"), %%"#xmm2"\n\t" \ - "vpshufb %%"#mask", %%"#xmm1", %%"#xmm1"\n\t" \ - "vpshufb %%"#mask", %%"#xmm2", %%"#xmm2"\n\t" +#define _LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg) \ + "vmovdqu " #i1 "*16(%%" #reg "), %%" #xmm1 "\n\t" \ + "vmovdqu " #i2 "*16(%%" #reg "), %%" #xmm2 "\n\t" \ + "vpshufb %%" #mask ", %%" #xmm1 ", %%" #xmm1 "\n\t" \ + "vpshufb %%" #mask ", %%" #xmm2 ", %%" #xmm2 "\n\t" #define LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg) \ _LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg) @@ -1281,11 +1281,11 @@ static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f }; LOAD_W_2(4, 5, W_8 , W_10, mask, reg) \ LOAD_W_2(6, 7, W_12, W_14, mask, reg) -#define _SET_W_X_2(xmm0, xmm1, reg, i) \ - "vpaddq "#i"+ 0(%%"#reg"), %%"#xmm0", %%xmm8\n\t" \ - "vpaddq "#i"+16(%%"#reg"), %%"#xmm1", %%xmm9\n\t" \ - "vmovdqu %%xmm8, "#i"+ 0("WX")\n\t" \ - "vmovdqu %%xmm9, "#i"+16("WX")\n\t" \ +#define _SET_W_X_2(xmm0, xmm1, reg, i) \ + "vpaddq " #i "+ 0(%%" #reg "), %%" #xmm0 ", %%xmm8\n\t" \ + "vpaddq " #i "+16(%%" #reg "), %%" #xmm1 ", %%xmm9\n\t" \ + "vmovdqu %%xmm8, " #i "+ 0(" WX ")\n\t" \ + "vmovdqu %%xmm9, " #i "+16(" WX ")\n\t" \ #define SET_W_X_2(xmm0, xmm1, reg, i) \ _SET_W_X_2(xmm0, xmm1, reg, i) @@ -1354,14 +1354,14 @@ static int Transform_Sha512_AVX1(wc_Sha512* sha512) LOAD_W(MASK, rax) - "movl $4, 16*8("WX")\n\t" + "movl $4, 16*8(" WX ")\n\t" "leaq %[K512], %%rsi\n\t" /* b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* e */ - "movq %%r12, "L1"\n\t" + "movq %%r12, " L1 "\n\t" /* b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" "# Start of 16 rounds\n" "1:\n\t" @@ -1379,7 +1379,7 @@ static int Transform_Sha512_AVX1(wc_Sha512* sha512) MsgSched2(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12) MsgSched2(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14) - "subl $1, 16*8("WX")\n\t" + "subl $1, 16*8(" WX ")\n\t" "jne 1b\n\t" SET_W_X(rsi) @@ -1427,13 +1427,13 @@ static int Transform_Sha512_AVX1_Len(wc_Sha512* sha512, word32 len) LOAD_W(MASK, rsi) - "movl $4, 16*8("WX")\n\t" + "movl $4, 16*8(" WX ")\n\t" /* b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* e */ - "movq %%r12, "L1"\n\t" + "movq %%r12, " L1 "\n\t" /* b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" SET_W_X(rdx) @@ -1456,7 +1456,7 @@ static int Transform_Sha512_AVX1_Len(wc_Sha512* sha512, word32 len) SET_W_X(rdx) - "subl $1, 16*8("WX")\n\t" + "subl $1, 16*8(" WX ")\n\t" "jne 1b\n\t" RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) @@ -1506,14 +1506,14 @@ static int Transform_Sha512_AVX1_RORX(wc_Sha512* sha512) LOAD_W(MASK, rax) - "movl $4, 16*8("WX")\n\t" + "movl $4, 16*8(" WX ")\n\t" "leaq %[K512], %%rsi\n\t" /* L4 = b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* L3 = 0 (add to prev h) */ - "xorq "L3", "L3"\n\t" + "xorq " L3 ", " L3 "\n\t" /* L4 = b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" SET_W_X(rsi) @@ -1533,7 +1533,7 @@ static int Transform_Sha512_AVX1_RORX(wc_Sha512* sha512) SET_W_X(rsi) - "subl $1, 16*8("WX")\n\t" + "subl $1, 16*8(" WX ")\n\t" "jne 1b\n\t" RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) @@ -1547,7 +1547,7 @@ static int Transform_Sha512_AVX1_RORX(wc_Sha512* sha512) RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) /* Prev RND: h += Maj(a,b,c) */ - "addq "L3", %%r8\n\t" + "addq " L3 ", %%r8\n\t" "addq $144, %%rsp\n\t" STORE_ADD_DIGEST() @@ -1581,13 +1581,13 @@ static int Transform_Sha512_AVX1_RORX_Len(wc_Sha512* sha512, word32 len) LOAD_W(MASK, rsi) - "movl $4, 16*8("WX")\n\t" + "movl $4, 16*8(" WX ")\n\t" /* L4 = b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* L3 = 0 (add to prev h) */ - "xorq "L3", "L3"\n\t" + "xorq " L3 ", " L3 "\n\t" /* L4 = b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" SET_W_X(rcx) @@ -1610,7 +1610,7 @@ static int Transform_Sha512_AVX1_RORX_Len(wc_Sha512* sha512, word32 len) SET_W_X(rcx) - "subl $1, 16*8("WX")\n\t" + "subl $1, 16*8(" WX ")\n\t" "jne 1b\n\t" SET_W_X(rcx) @@ -1626,7 +1626,7 @@ static int Transform_Sha512_AVX1_RORX_Len(wc_Sha512* sha512, word32 len) RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) /* Prev RND: h += Maj(a,b,c) */ - "addq "L3", %%r8\n\t" + "addq " L3 ", %%r8\n\t" "addq $256, %%rsp\n\t" ADD_DIGEST() @@ -1694,28 +1694,28 @@ static const unsigned long mBYTE_FLIP_MASK_Y[] = "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", \ "xmm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" -#define _VPERM2I128(dest, src1, src2, sel) \ - "vperm2I128 $"#sel", %%"#src2", %%"#src1", %%"#dest"\n\t" +#define _VPERM2I128(dest, src1, src2, sel) \ + "vperm2I128 $" #sel ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t" #define VPERM2I128(dest, src1, src2, sel) \ _VPERM2I128(dest, src1, src2, sel) -#define _VPERMQ(dest, src, sel) \ - "vpermq $"#sel", %%"#src", %%"#dest"\n\t" +#define _VPERMQ(dest, src, sel) \ + "vpermq $" #sel ", %%" #src ", %%" #dest "\n\t" #define VPERMQ(dest, src, sel) \ _VPERMQ(dest, src, sel) -#define _VPBLENDD(dest, src1, src2, sel) \ - "vpblendd $"#sel", %%"#src2", %%"#src1", %%"#dest"\n\t" +#define _VPBLENDD(dest, src1, src2, sel) \ + "vpblendd $" #sel ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t" #define VPBLENDD(dest, src1, src2, sel) \ _VPBLENDD(dest, src1, src2, sel) -#define _V_ADD_I(dest, src1, addr, i) \ - "vpaddq "#i"*8(%%"#addr"), %%"#src1", %%"#dest"\n\t" +#define _V_ADD_I(dest, src1, addr, i) \ + "vpaddq "#i"*8(%%" #addr "), %%" #src1 ", %%" #dest "\n\t" #define V_ADD_I(dest, src1, addr, i) \ _V_ADD_I(dest, src1, addr, i) -#define _VMOVDQU_I(addr, i, src) \ - "vmovdqu %%"#src", "#i"*8(%%"#addr")\n\t" +#define _VMOVDQU_I(addr, i, src) \ + "vmovdqu %%" #src ", " #i "*8(%%" #addr ")\n\t" #define VMOVDQU_I(addr, i, src) \ _VMOVDQU_I(addr, i, src) @@ -2052,12 +2052,12 @@ static const unsigned long mBYTE_FLIP_MASK_Y[] = _INIT_MASK_Y(mask) /* Load into YMM registers and swap endian. */ -#define _LOAD_BLOCK_W_Y_2(mask, ymm0, ymm1, reg, i) \ - /* buffer[0..15] => ymm0..ymm3; */ \ - "vmovdqu "#i"+ 0(%%"#reg"), %%"#ymm0"\n\t" \ - "vmovdqu "#i"+32(%%"#reg"), %%"#ymm1"\n\t" \ - "vpshufb %%"#mask", %%"#ymm0", %%"#ymm0"\n\t" \ - "vpshufb %%"#mask", %%"#ymm1", %%"#ymm1"\n\t" +#define _LOAD_BLOCK_W_Y_2(mask, ymm0, ymm1, reg, i) \ + /* buffer[0..15] => ymm0..ymm3; */ \ + "vmovdqu " #i "+ 0(%%" #reg "), %%" #ymm0 "\n\t" \ + "vmovdqu " #i "+32(%%" #reg "), %%" #ymm1 "\n\t" \ + "vpshufb %%" #mask ", %%" #ymm0 ", %%" #ymm0 "\n\t" \ + "vpshufb %%" #mask ", %%" #ymm1 ", %%" #ymm1 "\n\t" #define LOAD_BLOCK_W_Y_2(mask, ymm1, ymm2, reg, i) \ _LOAD_BLOCK_W_Y_2(mask, ymm1, ymm2, reg, i) @@ -2066,11 +2066,11 @@ static const unsigned long mBYTE_FLIP_MASK_Y[] = LOAD_BLOCK_W_Y_2(mask, W_Y_0, W_Y_4 , reg, 0) \ LOAD_BLOCK_W_Y_2(mask, W_Y_8, W_Y_12, reg, 64) -#define _SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i) \ - "vpaddq "#i"+ 0(%%"#reg"), %%"#ymm0", %%"#ymm2"\n\t" \ - "vpaddq "#i"+32(%%"#reg"), %%"#ymm1", %%"#ymm3"\n\t" \ - "vmovdqu %%"#ymm2", "#i"+ 0("WX")\n\t" \ - "vmovdqu %%"#ymm3", "#i"+32("WX")\n\t" +#define _SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i) \ + "vpaddq " #i "+ 0(%%" #reg "), %%" #ymm0 ", %%" #ymm2 "\n\t" \ + "vpaddq " #i "+32(%%" #reg "), %%" #ymm1 ", %%" #ymm3 "\n\t" \ + "vmovdqu %%" #ymm2 ", " #i "+ 0(" WX ")\n\t" \ + "vmovdqu %%" #ymm3 ", " #i "+32(" WX ")\n\t" #define SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i) \ _SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i) @@ -2081,14 +2081,14 @@ static const unsigned long mBYTE_FLIP_MASK_Y[] = /* Load into YMM registers and swap endian. */ #define _LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i) \ - "vmovdqu "#i"+ 0(%%"#reg"), %%"#X0"\n\t" \ - "vmovdqu "#i"+ 16(%%"#reg"), %%"#X1"\n\t" \ - "vmovdqu "#i"+128(%%"#reg"), %%"#X8"\n\t" \ - "vmovdqu "#i"+144(%%"#reg"), %%"#X9"\n\t" \ - "vinserti128 $1, %%"#X8", %%"#Y0", %%"#Y0"\n\t" \ - "vinserti128 $1, %%"#X9", %%"#Y1", %%"#Y1"\n\t" \ - "vpshufb %%"#mask", %%"#Y0", %%"#Y0"\n\t" \ - "vpshufb %%"#mask", %%"#Y1", %%"#Y1"\n\t" + "vmovdqu " #i "+ 0(%%" #reg "), %%" #X0 "\n\t" \ + "vmovdqu " #i "+ 16(%%" #reg "), %%" #X1 "\n\t" \ + "vmovdqu " #i "+128(%%" #reg "), %%" #X8 "\n\t" \ + "vmovdqu " #i "+144(%%" #reg "), %%" #X9 "\n\t" \ + "vinserti128 $1, %%" #X8 ", %%" #Y0 ", %%" #Y0 "\n\t" \ + "vinserti128 $1, %%" #X9 ", %%" #Y1 ", %%" #Y1 "\n\t" \ + "vpshufb %%" #mask ", %%" #Y0 ", %%" #Y0 "\n\t" \ + "vpshufb %%" #mask ", %%" #Y1 ", %%" #Y1 "\n\t" #define LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i) \ _LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i) @@ -2202,14 +2202,14 @@ static int Transform_Sha512_AVX2(wc_Sha512* sha512) LOAD_BLOCK_W_Y(MASK_Y, rax) - "movl $4, 16*8("WX")\n\t" + "movl $4, 16*8(" WX ")\n\t" "leaq %[K512], %%rsi\n\t" /* b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* e */ - "movq %%r12, "L1"\n\t" + "movq %%r12, " L1 "\n\t" /* b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" SET_BLOCK_W_Y(rsi) @@ -2225,7 +2225,7 @@ static int Transform_Sha512_AVX2(wc_Sha512* sha512) SET_BLOCK_W_Y(rsi) - "subl $1, 16*8("WX")\n\t" + "subl $1, 16*8(" WX ")\n\t" "jne 1b\n\t" RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) @@ -2277,14 +2277,14 @@ static int Transform_Sha512_AVX2_Len(wc_Sha512* sha512, word32 len) "leaq %[K512], %%rsi\n\t" /* L4 = b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* e */ - "movq %%r12, "L1"\n\t" + "movq %%r12, " L1 "\n\t" LOAD_BLOCK2_W_Y(MASK_Y, rcx) /* L4 = b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" "\n" "1:\n\t" SET_BLOCK2_W_Y(rsi) @@ -2317,11 +2317,11 @@ static int Transform_Sha512_AVX2_Len(wc_Sha512* sha512, word32 len) STORE_DIGEST() /* L4 = b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* e */ - "movq %%r12, "L1"\n\t" + "movq %%r12, " L1 "\n\t" /* L4 = b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" "movq $5, %%rsi\n\t" "\n" @@ -2370,21 +2370,21 @@ static int Transform_Sha512_AVX2_RORX(wc_Sha512* sha512) /* 16 Ws plus loop counter. */ "subq $136, %%rsp\n\t" - "leaq 64(%[sha512]), "L2"\n\t" + "leaq 64(%[sha512]), " L2 "\n\t" INIT_MASK(MASK_Y) LOAD_DIGEST() LOAD_BLOCK_W_Y(MASK_Y, rcx) - "movl $4, 16*8("WX")\n\t" + "movl $4, 16*8(" WX ")\n\t" "leaq %[K512], %%rsi\n\t" /* b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* L3 = 0 (add to prev h) */ - "xorq "L3", "L3"\n\t" + "xorq " L3 ", " L3 "\n\t" /* b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" SET_BLOCK_W_Y(rsi) @@ -2406,7 +2406,7 @@ static int Transform_Sha512_AVX2_RORX(wc_Sha512* sha512) RND_RORX_ALL_4(RA,RB,RC,RD,RE,RF,RG,RH, 8) RND_RORX_ALL_4(RE,RF,RG,RH,RA,RB,RC,RD,12) /* Prev RND: h += Maj(a,b,c) */ - "addq "L3", %%r8\n\t" + "addq " L3 ", %%r8\n\t" "addq $136, %%rsp\n\t" STORE_ADD_DIGEST() @@ -2446,14 +2446,14 @@ static int Transform_Sha512_AVX2_RORX_Len(wc_Sha512* sha512, word32 len) "leaq %[K512], %%rsi\n\t" /* L4 = b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* L3 = 0 (add to prev h) */ - "xorq "L3", "L3"\n\t" + "xorq " L3 ", " L3 "\n\t" LOAD_BLOCK2_W_Y(MASK_Y, rax) /* L4 = b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" "\n" "1:\n\t" SET_BLOCK2_W_Y(rsi) @@ -2480,18 +2480,18 @@ static int Transform_Sha512_AVX2_RORX_Len(wc_Sha512* sha512, word32 len) RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,20) RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,24) RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,28) - "addq "L3", %%r8\n\t" + "addq " L3 ", %%r8\n\t" "subq $1024, %%rsp\n\t" ADD_DIGEST() STORE_DIGEST() /* L4 = b */ - "movq %%r9, "L4"\n\t" + "movq %%r9, " L4 "\n\t" /* L3 = 0 (add to prev h) */ - "xorq "L3", "L3"\n\t" + "xorq " L3 ", " L3 "\n\t" /* L4 = b ^ c */ - "xorq %%r10, "L4"\n\t" + "xorq %%r10, " L4 "\n\t" "movq $5, %%rsi\n\t" "\n" @@ -2509,7 +2509,7 @@ static int Transform_Sha512_AVX2_RORX_Len(wc_Sha512* sha512, word32 len) "subq $1, %%rsi\n\t" "jnz 3b\n\t" - "addq "L3", %%r8\n\t" + "addq " L3 ", %%r8\n\t" ADD_DIGEST()