crypto-native: 256-bit AES CBC support
Used on intel client CPUs which suppport VAES instruction set without AVX512 Type: improvement Change-Id: I5f816a1ea9f89a8d298d2c0f38d8d7c06f414ba0 Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:

committed by
Beno�t Ganne

parent
1ca681838c
commit
adeaf16960
@ -20,6 +20,9 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
if(compiler_flag_march_icelake_client AND compiler_flag_mprefer_vector_width_512)
|
||||
list(APPEND VARIANTS "icl\;-march=icelake-client -mprefer-vector-width=512")
|
||||
endif()
|
||||
if(compiler_flag_march_alderlake)
|
||||
list(APPEND VARIANTS "adl\;-march=alderlake -mprefer-vector-width=256")
|
||||
endif()
|
||||
set (COMPILE_FILES aes_cbc.c aes_gcm.c)
|
||||
set (COMPILE_OPTS -Wall -fno-common -maes)
|
||||
endif()
|
||||
|
@ -48,7 +48,7 @@ aes_enc_round (u8x16 a, u8x16 k)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined (__VAES__)
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
static_always_inline u8x64
|
||||
aes_enc_round_x4 (u8x64 a, u8x64 k)
|
||||
{
|
||||
@ -74,6 +74,32 @@ aes_dec_last_round_x4 (u8x64 a, u8x64 k)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VAES__
|
||||
static_always_inline u8x32
|
||||
aes_enc_round_x2 (u8x32 a, u8x32 k)
|
||||
{
|
||||
return (u8x32) _mm256_aesenc_epi128 ((__m256i) a, (__m256i) k);
|
||||
}
|
||||
|
||||
static_always_inline u8x32
|
||||
aes_enc_last_round_x2 (u8x32 a, u8x32 k)
|
||||
{
|
||||
return (u8x32) _mm256_aesenclast_epi128 ((__m256i) a, (__m256i) k);
|
||||
}
|
||||
|
||||
static_always_inline u8x32
|
||||
aes_dec_round_x2 (u8x32 a, u8x32 k)
|
||||
{
|
||||
return (u8x32) _mm256_aesdec_epi128 ((__m256i) a, (__m256i) k);
|
||||
}
|
||||
|
||||
static_always_inline u8x32
|
||||
aes_dec_last_round_x2 (u8x32 a, u8x32 k)
|
||||
{
|
||||
return (u8x32) _mm256_aesdeclast_epi128 ((__m256i) a, (__m256i) k);
|
||||
}
|
||||
#endif
|
||||
|
||||
static_always_inline u8x16
|
||||
aes_enc_last_round (u8x16 a, u8x16 k)
|
||||
{
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -26,7 +26,7 @@
|
||||
#pragma GCC optimize ("O3")
|
||||
#endif
|
||||
|
||||
#ifdef __VAES__
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
#define NUM_HI 32
|
||||
#else
|
||||
#define NUM_HI 8
|
||||
@ -38,7 +38,7 @@ typedef struct
|
||||
const u8x16 Hi[NUM_HI];
|
||||
/* extracted AES key */
|
||||
const u8x16 Ke[15];
|
||||
#ifdef __VAES__
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
const u8x64 Ke4[15];
|
||||
#endif
|
||||
} aes_gcm_key_data_t;
|
||||
@ -63,7 +63,6 @@ typedef enum
|
||||
|
||||
static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
|
||||
|
||||
#ifndef __VAES__
|
||||
static_always_inline void
|
||||
aes_gcm_enc_first_round (u8x16 * r, aes_gcm_counter_t * ctr, u8x16 k,
|
||||
int n_blocks)
|
||||
@ -107,7 +106,6 @@ aes_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k,
|
||||
for (int i = 0; i < n_blocks; i++)
|
||||
d[i] ^= aes_enc_last_round (r[i], k[rounds]);
|
||||
}
|
||||
#endif
|
||||
|
||||
static_always_inline u8x16
|
||||
aes_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
|
||||
@ -163,11 +161,10 @@ aes_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
|
||||
return T;
|
||||
}
|
||||
|
||||
#ifndef __VAES__
|
||||
static_always_inline u8x16
|
||||
aes_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
|
||||
aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
|
||||
int rounds, int n, int last_block_bytes, aes_gcm_flags_t f)
|
||||
static_always_inline __clib_unused u8x16
|
||||
aes_gcm_calc (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d,
|
||||
aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, int rounds,
|
||||
int n, int last_block_bytes, aes_gcm_flags_t f)
|
||||
{
|
||||
u8x16 r[n];
|
||||
ghash_data_t _gd = { }, *gd = &_gd;
|
||||
@ -258,9 +255,9 @@ aes_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
|
||||
return T;
|
||||
}
|
||||
|
||||
static_always_inline u8x16
|
||||
aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
|
||||
aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
|
||||
static_always_inline __clib_unused u8x16
|
||||
aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d,
|
||||
aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv,
|
||||
int rounds, aes_gcm_flags_t f)
|
||||
{
|
||||
u8x16 r[4];
|
||||
@ -396,9 +393,9 @@ aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
|
||||
return ghash_final (gd);
|
||||
}
|
||||
|
||||
static_always_inline u8x16
|
||||
aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
|
||||
int n_blocks, int n_bytes)
|
||||
static_always_inline __clib_unused u8x16
|
||||
aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, int n_blocks,
|
||||
int n_bytes)
|
||||
{
|
||||
ghash_data_t _gd, *gd = &_gd;
|
||||
u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
|
||||
@ -417,9 +414,8 @@ aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
|
||||
ghash_reduce2 (gd);
|
||||
return ghash_final (gd);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VAES__
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
static const u32x16 ctr_inv_1234 = {
|
||||
0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
|
||||
};
|
||||
@ -757,7 +753,7 @@ aes_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
|
||||
if (n_left == 0)
|
||||
return T;
|
||||
|
||||
#if __VAES__
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
u8x64 d4[4];
|
||||
if (n_left < 256)
|
||||
{
|
||||
@ -939,7 +935,7 @@ aes_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
|
||||
u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
|
||||
{
|
||||
aes_gcm_flags_t f = AES_GCM_F_WITH_GHASH | AES_GCM_F_DECRYPT;
|
||||
#ifdef __VAES__
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
u8x64 d4[4] = { };
|
||||
|
||||
while (n_left >= 512)
|
||||
@ -1045,7 +1041,7 @@ aes_gcm (u8x16u *in, u8x16u *out, u8x16u *addt, u8 *ivp, u8x16u *tag,
|
||||
Y0.as_u64x2[0] = *(u64u *) ivp;
|
||||
Y0.as_u32x4[2] = *(u32u *) (ivp + 8);
|
||||
Y0.as_u32x4 += ctr_inv_1;
|
||||
#ifdef __VAES__
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
ctr->Y4 = u32x16_splat_u32x4 (Y0.as_u32x4) + ctr_inv_1234;
|
||||
#else
|
||||
ctr->Y = Y0.as_u32x4 + ctr_inv_1;
|
||||
@ -1177,7 +1173,7 @@ aes_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
|
||||
H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
|
||||
H = u8x16_reflect (H);
|
||||
ghash_precompute (H, (u8x16 *) kd->Hi, NUM_HI);
|
||||
#ifdef __VAES__
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
u8x64 *Ke4 = (u8x64 *) kd->Ke4;
|
||||
for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
|
||||
Ke4[i] = u8x64_splat_u8x16 (kd->Ke[i]);
|
||||
@ -1201,8 +1197,8 @@ foreach_aes_gcm_handler_type;
|
||||
#undef _
|
||||
|
||||
clib_error_t *
|
||||
#ifdef __VAES__
|
||||
crypto_native_aes_gcm_init_icl (vlib_main_t * vm)
|
||||
#if defined(__VAES__) && defined(__AVX512F__)
|
||||
crypto_native_aes_gcm_init_icl (vlib_main_t *vm)
|
||||
#elif __AVX512F__
|
||||
crypto_native_aes_gcm_init_skx (vlib_main_t * vm)
|
||||
#elif __AVX2__
|
||||
|
@ -29,7 +29,8 @@ typedef struct
|
||||
|
||||
extern crypto_native_main_t crypto_native_main;
|
||||
|
||||
#define foreach_crypto_native_march_variant _(slm) _(hsw) _(skx) _(icl) _(neon)
|
||||
#define foreach_crypto_native_march_variant \
|
||||
_ (slm) _ (hsw) _ (skx) _ (icl) _ (adl) _ (neon)
|
||||
|
||||
#define _(v) \
|
||||
clib_error_t __clib_weak *crypto_native_aes_cbc_init_##v (vlib_main_t * vm); \
|
||||
|
@ -257,7 +257,7 @@ ghash_mul (u8x16 a, u8x16 b)
|
||||
return ghash_final (gd);
|
||||
}
|
||||
|
||||
#ifdef __VPCLMULQDQ__
|
||||
#if defined(__VPCLMULQDQ__) && defined(__AVX512F__)
|
||||
|
||||
static const u8x64 ghash4_poly2 = {
|
||||
0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
|
||||
|
@ -78,6 +78,8 @@ crypto_native_init (vlib_main_t * vm)
|
||||
else if (crypto_native_aes_cbc_init_icl && clib_cpu_supports_vaes () &&
|
||||
clib_cpu_supports_avx512f ())
|
||||
error = crypto_native_aes_cbc_init_icl (vm);
|
||||
else if (crypto_native_aes_cbc_init_adl && clib_cpu_supports_vaes ())
|
||||
error = crypto_native_aes_cbc_init_adl (vm);
|
||||
else if (crypto_native_aes_cbc_init_skx && clib_cpu_supports_avx512f ())
|
||||
error = crypto_native_aes_cbc_init_skx (vm);
|
||||
else if (crypto_native_aes_cbc_init_hsw && clib_cpu_supports_avx2 ())
|
||||
@ -101,6 +103,8 @@ crypto_native_init (vlib_main_t * vm)
|
||||
if (crypto_native_aes_gcm_init_icl && clib_cpu_supports_vaes () &&
|
||||
clib_cpu_supports_avx512f ())
|
||||
error = crypto_native_aes_gcm_init_icl (vm);
|
||||
else if (crypto_native_aes_gcm_init_adl && clib_cpu_supports_vaes ())
|
||||
error = crypto_native_aes_gcm_init_adl (vm);
|
||||
else if (crypto_native_aes_gcm_init_skx && clib_cpu_supports_avx512f ())
|
||||
error = crypto_native_aes_gcm_init_skx (vm);
|
||||
else if (crypto_native_aes_gcm_init_hsw && clib_cpu_supports_avx2 ())
|
||||
|
@ -213,6 +213,16 @@ u32x8_hxor (u32x8 v)
|
||||
return v4[0];
|
||||
}
|
||||
|
||||
static_always_inline u8x32
|
||||
u8x32_xor3 (u8x32 a, u8x32 b, u8x32 c)
|
||||
{
|
||||
#if __AVX512F__
|
||||
return (u8x32) _mm256_ternarylogic_epi32 ((__m256i) a, (__m256i) b,
|
||||
(__m256i) c, 0x96);
|
||||
#endif
|
||||
return a ^ b ^ c;
|
||||
}
|
||||
|
||||
static_always_inline u16x16
|
||||
u16x16_mask_last (u16x16 v, u8 n_last)
|
||||
{
|
||||
@ -391,6 +401,12 @@ u64x4_transpose (u64x4 a[8])
|
||||
a[3] = u64x4_permute_lanes (r[1], r[3], 0x31);
|
||||
}
|
||||
|
||||
static_always_inline u8x32
|
||||
u8x32_splat_u8x16 (u8x16 a)
|
||||
{
|
||||
return (u8x32) _mm256_broadcastsi128_si256 ((__m128i) a);
|
||||
}
|
||||
|
||||
#endif /* included_vector_avx2_h */
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user