ARM64 AES ASM base: TD4 is only 256 bytes long

Pre-fetch fewer entries of TD4, than TD, as it is only 256 bytes long.
This commit is contained in:
Sean Parkinson
2026-02-19 09:31:00 +10:00
parent 3a1aa8310e
commit 88451a71b3
2 changed files with 0 additions and 144 deletions
-72
View File
@@ -53564,18 +53564,6 @@ L_AES_ECB_decrypt_loop_nr:
ldr x14, [x6, #64]
ldr x14, [x6, #128]
ldr x14, [x6, #192]
ldr x14, [x6, #256]
ldr x14, [x6, #320]
ldr x14, [x6, #384]
ldr x14, [x6, #448]
ldr x14, [x6, #512]
ldr x14, [x6, #576]
ldr x14, [x6, #640]
ldr x14, [x6, #704]
ldr x14, [x6, #768]
ldr x14, [x6, #832]
ldr x14, [x6, #896]
ldr x14, [x6, #960]
ldrb w7, [x6, x7, LSL 0]
ldrb w10, [x6, x10, LSL 0]
ldrb w15, [x6, x15, LSL 0]
@@ -53889,18 +53877,6 @@ L_AES_CBC_decrypt_loop_nr_even:
ldr x15, [x6, #64]
ldr x15, [x6, #128]
ldr x15, [x6, #192]
ldr x15, [x6, #256]
ldr x15, [x6, #320]
ldr x15, [x6, #384]
ldr x15, [x6, #448]
ldr x15, [x6, #512]
ldr x15, [x6, #576]
ldr x15, [x6, #640]
ldr x15, [x6, #704]
ldr x15, [x6, #768]
ldr x15, [x6, #832]
ldr x15, [x6, #896]
ldr x15, [x6, #960]
ldrb w8, [x6, x8, LSL 0]
ldrb w11, [x6, x11, LSL 0]
ldrb w16, [x6, x16, LSL 0]
@@ -54178,18 +54154,6 @@ L_AES_CBC_decrypt_loop_nr_odd:
ldr x15, [x6, #64]
ldr x15, [x6, #128]
ldr x15, [x6, #192]
ldr x15, [x6, #256]
ldr x15, [x6, #320]
ldr x15, [x6, #384]
ldr x15, [x6, #448]
ldr x15, [x6, #512]
ldr x15, [x6, #576]
ldr x15, [x6, #640]
ldr x15, [x6, #704]
ldr x15, [x6, #768]
ldr x15, [x6, #832]
ldr x15, [x6, #896]
ldr x15, [x6, #960]
ldrb w8, [x6, x8, LSL 0]
ldrb w11, [x6, x11, LSL 0]
ldrb w16, [x6, x16, LSL 0]
@@ -56594,18 +56558,6 @@ L_AES_XTS_decrypt_loop_nr:
ldr x20, [x9, #64]
ldr x20, [x9, #128]
ldr x20, [x9, #192]
ldr x20, [x9, #256]
ldr x20, [x9, #320]
ldr x20, [x9, #384]
ldr x20, [x9, #448]
ldr x20, [x9, #512]
ldr x20, [x9, #576]
ldr x20, [x9, #640]
ldr x20, [x9, #704]
ldr x20, [x9, #768]
ldr x20, [x9, #832]
ldr x20, [x9, #896]
ldr x20, [x9, #960]
ldrb w12, [x9, x12, LSL 0]
ldrb w15, [x9, x15, LSL 0]
ldrb w21, [x9, x21, LSL 0]
@@ -56890,18 +56842,6 @@ L_AES_XTS_decrypt_loop_nr_partial_1:
ldr x20, [x9, #64]
ldr x20, [x9, #128]
ldr x20, [x9, #192]
ldr x20, [x9, #256]
ldr x20, [x9, #320]
ldr x20, [x9, #384]
ldr x20, [x9, #448]
ldr x20, [x9, #512]
ldr x20, [x9, #576]
ldr x20, [x9, #640]
ldr x20, [x9, #704]
ldr x20, [x9, #768]
ldr x20, [x9, #832]
ldr x20, [x9, #896]
ldr x20, [x9, #960]
ldrb w12, [x9, x12, LSL 0]
ldrb w15, [x9, x15, LSL 0]
ldrb w21, [x9, x21, LSL 0]
@@ -57185,18 +57125,6 @@ L_AES_XTS_decrypt_loop_nr_partial_2:
ldr x20, [x9, #64]
ldr x20, [x9, #128]
ldr x20, [x9, #192]
ldr x20, [x9, #256]
ldr x20, [x9, #320]
ldr x20, [x9, #384]
ldr x20, [x9, #448]
ldr x20, [x9, #512]
ldr x20, [x9, #576]
ldr x20, [x9, #640]
ldr x20, [x9, #704]
ldr x20, [x9, #768]
ldr x20, [x9, #832]
ldr x20, [x9, #896]
ldr x20, [x9, #960]
ldrb w12, [x9, x12, LSL 0]
ldrb w15, [x9, x15, LSL 0]
ldrb w21, [x9, x21, LSL 0]
-72
View File
@@ -53256,18 +53256,6 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
"ldr x14, [%[td4], #64]\n\t"
"ldr x14, [%[td4], #128]\n\t"
"ldr x14, [%[td4], #192]\n\t"
"ldr x14, [%[td4], #256]\n\t"
"ldr x14, [%[td4], #320]\n\t"
"ldr x14, [%[td4], #384]\n\t"
"ldr x14, [%[td4], #448]\n\t"
"ldr x14, [%[td4], #512]\n\t"
"ldr x14, [%[td4], #576]\n\t"
"ldr x14, [%[td4], #640]\n\t"
"ldr x14, [%[td4], #704]\n\t"
"ldr x14, [%[td4], #768]\n\t"
"ldr x14, [%[td4], #832]\n\t"
"ldr x14, [%[td4], #896]\n\t"
"ldr x14, [%[td4], #960]\n\t"
"ldrb w7, [%[td4], x7, LSL 0]\n\t"
"ldrb w10, [%[td4], x10, LSL 0]\n\t"
"ldrb w15, [%[td4], x15, LSL 0]\n\t"
@@ -53562,18 +53550,6 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"ldr x15, [%[td4], #64]\n\t"
"ldr x15, [%[td4], #128]\n\t"
"ldr x15, [%[td4], #192]\n\t"
"ldr x15, [%[td4], #256]\n\t"
"ldr x15, [%[td4], #320]\n\t"
"ldr x15, [%[td4], #384]\n\t"
"ldr x15, [%[td4], #448]\n\t"
"ldr x15, [%[td4], #512]\n\t"
"ldr x15, [%[td4], #576]\n\t"
"ldr x15, [%[td4], #640]\n\t"
"ldr x15, [%[td4], #704]\n\t"
"ldr x15, [%[td4], #768]\n\t"
"ldr x15, [%[td4], #832]\n\t"
"ldr x15, [%[td4], #896]\n\t"
"ldr x15, [%[td4], #960]\n\t"
"ldrb w8, [%[td4], x8, LSL 0]\n\t"
"ldrb w11, [%[td4], x11, LSL 0]\n\t"
"ldrb w16, [%[td4], x16, LSL 0]\n\t"
@@ -53852,18 +53828,6 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"ldr x15, [%[td4], #64]\n\t"
"ldr x15, [%[td4], #128]\n\t"
"ldr x15, [%[td4], #192]\n\t"
"ldr x15, [%[td4], #256]\n\t"
"ldr x15, [%[td4], #320]\n\t"
"ldr x15, [%[td4], #384]\n\t"
"ldr x15, [%[td4], #448]\n\t"
"ldr x15, [%[td4], #512]\n\t"
"ldr x15, [%[td4], #576]\n\t"
"ldr x15, [%[td4], #640]\n\t"
"ldr x15, [%[td4], #704]\n\t"
"ldr x15, [%[td4], #768]\n\t"
"ldr x15, [%[td4], #832]\n\t"
"ldr x15, [%[td4], #896]\n\t"
"ldr x15, [%[td4], #960]\n\t"
"ldrb w8, [%[td4], x8, LSL 0]\n\t"
"ldrb w11, [%[td4], x11, LSL 0]\n\t"
"ldrb w16, [%[td4], x16, LSL 0]\n\t"
@@ -56169,18 +56133,6 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i,
"ldr x20, [%[td4], #64]\n\t"
"ldr x20, [%[td4], #128]\n\t"
"ldr x20, [%[td4], #192]\n\t"
"ldr x20, [%[td4], #256]\n\t"
"ldr x20, [%[td4], #320]\n\t"
"ldr x20, [%[td4], #384]\n\t"
"ldr x20, [%[td4], #448]\n\t"
"ldr x20, [%[td4], #512]\n\t"
"ldr x20, [%[td4], #576]\n\t"
"ldr x20, [%[td4], #640]\n\t"
"ldr x20, [%[td4], #704]\n\t"
"ldr x20, [%[td4], #768]\n\t"
"ldr x20, [%[td4], #832]\n\t"
"ldr x20, [%[td4], #896]\n\t"
"ldr x20, [%[td4], #960]\n\t"
"ldrb w12, [%[td4], x12, LSL 0]\n\t"
"ldrb w15, [%[td4], x15, LSL 0]\n\t"
"ldrb w21, [%[td4], x21, LSL 0]\n\t"
@@ -56467,18 +56419,6 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i,
"ldr x20, [%[td4], #64]\n\t"
"ldr x20, [%[td4], #128]\n\t"
"ldr x20, [%[td4], #192]\n\t"
"ldr x20, [%[td4], #256]\n\t"
"ldr x20, [%[td4], #320]\n\t"
"ldr x20, [%[td4], #384]\n\t"
"ldr x20, [%[td4], #448]\n\t"
"ldr x20, [%[td4], #512]\n\t"
"ldr x20, [%[td4], #576]\n\t"
"ldr x20, [%[td4], #640]\n\t"
"ldr x20, [%[td4], #704]\n\t"
"ldr x20, [%[td4], #768]\n\t"
"ldr x20, [%[td4], #832]\n\t"
"ldr x20, [%[td4], #896]\n\t"
"ldr x20, [%[td4], #960]\n\t"
"ldrb w12, [%[td4], x12, LSL 0]\n\t"
"ldrb w15, [%[td4], x15, LSL 0]\n\t"
"ldrb w21, [%[td4], x21, LSL 0]\n\t"
@@ -56764,18 +56704,6 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i,
"ldr x20, [%[td4], #64]\n\t"
"ldr x20, [%[td4], #128]\n\t"
"ldr x20, [%[td4], #192]\n\t"
"ldr x20, [%[td4], #256]\n\t"
"ldr x20, [%[td4], #320]\n\t"
"ldr x20, [%[td4], #384]\n\t"
"ldr x20, [%[td4], #448]\n\t"
"ldr x20, [%[td4], #512]\n\t"
"ldr x20, [%[td4], #576]\n\t"
"ldr x20, [%[td4], #640]\n\t"
"ldr x20, [%[td4], #704]\n\t"
"ldr x20, [%[td4], #768]\n\t"
"ldr x20, [%[td4], #832]\n\t"
"ldr x20, [%[td4], #896]\n\t"
"ldr x20, [%[td4], #960]\n\t"
"ldrb w12, [%[td4], x12, LSL 0]\n\t"
"ldrb w15, [%[td4], x15, LSL 0]\n\t"
"ldrb w21, [%[td4], x21, LSL 0]\n\t"