forked from espressif/esp-idf
change(mbedtls/port): optimize gcm_mult()
1) pre-shift GCM last4 to use 32-bit shift On 32-bit architectures like Aarch32, RV32, Xtensa, shifting a 64-bit variable by 32-bits is free, since it changes the register representing half of the 64-bit var. Pre-shift the last4 array to take advantage of this. 2) unroll first GCM iteration The first loop of gcm_mult() is different from the others. By unrolling it separately from the others, the other iterations may take advantage of the zero-overhead loop construct, in addition to saving a conditional branch in the loop.
This commit is contained in:
committed by
Mahavir Jain
parent
357e28826d
commit
0b51c24238
@ -192,11 +192,11 @@ static int gcm_gen_table( esp_gcm_context *ctx )
|
|||||||
* last4[x] = x times P^128
|
* last4[x] = x times P^128
|
||||||
* where x and last4[x] are seen as elements of GF(2^128) as in [MGV]
|
* where x and last4[x] are seen as elements of GF(2^128) as in [MGV]
|
||||||
*/
|
*/
|
||||||
static const uint64_t last4[16] = {
|
static const uint32_t last4[16] = {
|
||||||
0x0000, 0x1c20, 0x3840, 0x2460,
|
0x00000000, 0x1c200000, 0x38400000, 0x24600000,
|
||||||
0x7080, 0x6ca0, 0x48c0, 0x54e0,
|
0x70800000, 0x6ca00000, 0x48c00000, 0x54e00000,
|
||||||
0xe100, 0xfd20, 0xd940, 0xc560,
|
0xe1000000, 0xfd200000, 0xd9400000, 0xc5600000,
|
||||||
0x9180, 0x8da0, 0xa9c0, 0xb5e0
|
0x91800000, 0x8da00000, 0xa9c00000, 0xb5e00000
|
||||||
};
|
};
|
||||||
/* Based on MbedTLS's implementation
|
/* Based on MbedTLS's implementation
|
||||||
*
|
*
|
||||||
@ -211,28 +211,33 @@ static void gcm_mult( esp_gcm_context *ctx, const unsigned char x[16],
|
|||||||
uint64_t zh, zl;
|
uint64_t zh, zl;
|
||||||
|
|
||||||
lo = x[15] & 0xf;
|
lo = x[15] & 0xf;
|
||||||
|
hi = x[15] >> 4;
|
||||||
|
|
||||||
zh = ctx->HH[lo];
|
zh = ctx->HH[lo];
|
||||||
zl = ctx->HL[lo];
|
zl = ctx->HL[lo];
|
||||||
|
|
||||||
for ( i = 15; i >= 0; i-- ) {
|
rem = (unsigned char) zl & 0xf;
|
||||||
|
zl = ( zh << 60 ) | ( zl >> 4 );
|
||||||
|
zh = ( zh >> 4 );
|
||||||
|
zh ^= (uint64_t) last4[rem] << 32;
|
||||||
|
zh ^= ctx->HH[hi];
|
||||||
|
zl ^= ctx->HL[hi];
|
||||||
|
|
||||||
|
for ( i = 14; i >= 0; i-- ) {
|
||||||
lo = x[i] & 0xf;
|
lo = x[i] & 0xf;
|
||||||
hi = x[i] >> 4;
|
hi = x[i] >> 4;
|
||||||
|
|
||||||
if ( i != 15 ) {
|
|
||||||
rem = (unsigned char) zl & 0xf;
|
|
||||||
zl = ( zh << 60 ) | ( zl >> 4 );
|
|
||||||
zh = ( zh >> 4 );
|
|
||||||
zh ^= (uint64_t) last4[rem] << 48;
|
|
||||||
zh ^= ctx->HH[lo];
|
|
||||||
zl ^= ctx->HL[lo];
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
rem = (unsigned char) zl & 0xf;
|
rem = (unsigned char) zl & 0xf;
|
||||||
zl = ( zh << 60 ) | ( zl >> 4 );
|
zl = ( zh << 60 ) | ( zl >> 4 );
|
||||||
zh = ( zh >> 4 );
|
zh = ( zh >> 4 );
|
||||||
zh ^= (uint64_t) last4[rem] << 48;
|
zh ^= (uint64_t) last4[rem] << 32;
|
||||||
|
zh ^= ctx->HH[lo];
|
||||||
|
zl ^= ctx->HL[lo];
|
||||||
|
|
||||||
|
rem = (unsigned char) zl & 0xf;
|
||||||
|
zl = ( zh << 60 ) | ( zl >> 4 );
|
||||||
|
zh = ( zh >> 4 );
|
||||||
|
zh ^= (uint64_t) last4[rem] << 32;
|
||||||
zh ^= ctx->HH[hi];
|
zh ^= ctx->HH[hi];
|
||||||
zl ^= ctx->HL[hi];
|
zl ^= ctx->HL[hi];
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user