bihash key compare improvements
Looks like CPU doesn't like overlaping loads. This new codes in some cases shows 3-4 clock improvements. Change-Id: Ia1b49976ad95140c573f892fdc0a32eebbfa06c8 Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:

committed by
Marco Varlese

parent
697faeace7
commit
09fdf9d074
@ -72,9 +72,8 @@ clib_bihash_key_compare_24_8 (u64 * a, u64 * b)
|
||||
u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
|
||||
return (u64x8_is_zero_mask (v) & 0x7) == 0;
|
||||
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
|
||||
u64x2 v;
|
||||
v = u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
|
||||
v |= u64x2_load_unaligned (a + 1) ^ u64x2_load_unaligned (b + 1);
|
||||
u64x2 v = { a[2] ^ b[2], 0 };
|
||||
v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
|
||||
return u64x2_is_all_zero (v);
|
||||
#else
|
||||
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
|
||||
|
@ -74,15 +74,13 @@ clib_bihash_key_compare_40_8 (u64 * a, u64 * b)
|
||||
v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
|
||||
return (u64x8_is_zero_mask (v) & 0x1f) == 0;
|
||||
#elif defined (CLIB_HAVE_VEC256)
|
||||
u64x4 v;
|
||||
v = u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
|
||||
v |= u64x4_load_unaligned (a + 1) ^ u64x4_load_unaligned (b + 1);
|
||||
u64x4 v = { a[4] ^ b[4], 0, 0, 0 };
|
||||
v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
|
||||
return u64x4_is_all_zero (v);
|
||||
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
|
||||
u64x2 v;
|
||||
v = u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
|
||||
u64x2 v = { a[4] ^ b[4], 0 };
|
||||
v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
|
||||
v |= u64x2_load_unaligned (a + 2) ^ u64x2_load_unaligned (b + 2);
|
||||
v |= u64x2_load_unaligned (a + 3) ^ u64x2_load_unaligned (b + 3);
|
||||
return u64x2_is_all_zero (v);
|
||||
#else
|
||||
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])
|
||||
|
@ -75,9 +75,10 @@ clib_bihash_key_compare_48_8 (u64 * a, u64 * b)
|
||||
u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
|
||||
return (u64x8_is_zero_mask (v) & 0x3f) == 0;
|
||||
#elif defined (CLIB_HAVE_VEC256)
|
||||
u64x4 v;
|
||||
v = u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
|
||||
v |= u64x4_load_unaligned (a + 2) ^ u64x4_load_unaligned (b + 2);
|
||||
u64x4 v = { 0 };
|
||||
v = u64x4_insert_lo (v, u64x2_load_unaligned (a + 4) ^
|
||||
u64x2_load_unaligned (b + 4));
|
||||
v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
|
||||
return u64x4_is_all_zero (v);
|
||||
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
|
||||
u64x2 v;
|
||||
|
Reference in New Issue
Block a user