Re-enable aarch64 neon instruction in vlib_buffer_free_inline
int vaddvq_u8 (uint8x16_t __a) is not appropriate to implement xxx_is_all_zero, as there may be overflow causing incorrect return value. Here's an example. u8x16 x = {0 <repeats 12 times>, 1, 255, 0, 0}; Change-Id: Ia6a10bdf8da360dec12db902d028751a1a77e9a4 Signed-off-by: Lijian Zhang <Lijian.Zhang@arm.com> Reviewed-by: Sirshak Das <Sirshak.Das@arm.com>
This commit is contained in:

committed by
Damjan Marion

parent
e225f71766
commit
e6a47cf557
@ -712,7 +712,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
|
|||||||
u8 buffer_pool_index = ~0;
|
u8 buffer_pool_index = ~0;
|
||||||
u32 n_queue = 0, queue[queue_size + 4];
|
u32 n_queue = 0, queue[queue_size + 4];
|
||||||
vlib_buffer_t bt = { };
|
vlib_buffer_t bt = { };
|
||||||
#if defined(CLIB_HAVE_VEC128) && !__aarch64__
|
#if defined(CLIB_HAVE_VEC128)
|
||||||
vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 };
|
vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 };
|
||||||
vlib_buffer_t bpi_vec = {.buffer_pool_index = ~0 };
|
vlib_buffer_t bpi_vec = {.buffer_pool_index = ~0 };
|
||||||
vlib_buffer_t flags_refs_mask = {
|
vlib_buffer_t flags_refs_mask = {
|
||||||
@ -737,7 +737,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
|
|||||||
vlib_prefetch_buffer_header (b[6], LOAD);
|
vlib_prefetch_buffer_header (b[6], LOAD);
|
||||||
vlib_prefetch_buffer_header (b[7], LOAD);
|
vlib_prefetch_buffer_header (b[7], LOAD);
|
||||||
|
|
||||||
#if defined(CLIB_HAVE_VEC128) && !__aarch64__
|
#if defined(CLIB_HAVE_VEC128)
|
||||||
u8x16 p0, p1, p2, p3, r;
|
u8x16 p0, p1, p2, p3, r;
|
||||||
p0 = u8x16_load_unaligned (b[0]);
|
p0 = u8x16_load_unaligned (b[0]);
|
||||||
p1 = u8x16_load_unaligned (b[1]);
|
p1 = u8x16_load_unaligned (b[1]);
|
||||||
@ -815,7 +815,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
|
|||||||
}
|
}
|
||||||
|
|
||||||
buffer_pool_index = b[0]->buffer_pool_index;
|
buffer_pool_index = b[0]->buffer_pool_index;
|
||||||
#if defined(CLIB_HAVE_VEC128) && !__aarch64__
|
#if defined(CLIB_HAVE_VEC128)
|
||||||
bpi_vec.buffer_pool_index = buffer_pool_index;
|
bpi_vec.buffer_pool_index = buffer_pool_index;
|
||||||
#endif
|
#endif
|
||||||
bp = vlib_get_buffer_pool (vm, buffer_pool_index);
|
bp = vlib_get_buffer_pool (vm, buffer_pool_index);
|
||||||
|
@ -22,6 +22,13 @@
|
|||||||
#define i16x8_sub_saturate(a,b) vsubq_s16(a,b)
|
#define i16x8_sub_saturate(a,b) vsubq_s16(a,b)
|
||||||
/* Dummy. Aid making uniform macros */
|
/* Dummy. Aid making uniform macros */
|
||||||
#define vreinterpretq_u8_u8(a) a
|
#define vreinterpretq_u8_u8(a) a
|
||||||
|
/* Implement the missing intrinsics to make uniform macros */
|
||||||
|
#define vminvq_u64(x) \
|
||||||
|
({ \
|
||||||
|
u64 x0 = vgetq_lane_u64(x, 0); \
|
||||||
|
u64 x1 = vgetq_lane_u64(x, 1); \
|
||||||
|
x0 < x1 ? x0 : x1; \
|
||||||
|
})
|
||||||
|
|
||||||
/* Converts all ones/zeros compare mask to bitmap. */
|
/* Converts all ones/zeros compare mask to bitmap. */
|
||||||
always_inline u32
|
always_inline u32
|
||||||
@ -62,11 +69,11 @@ t##s##x##c##_store_unaligned (t##s##x##c v, void *p) \
|
|||||||
\
|
\
|
||||||
static_always_inline int \
|
static_always_inline int \
|
||||||
t##s##x##c##_is_all_zero (t##s##x##c x) \
|
t##s##x##c##_is_all_zero (t##s##x##c x) \
|
||||||
{ return !(vaddvq_##i (x)); } \
|
{ return !!(vminvq_u##s (vceqq_##i (vdupq_n_##i(0), x))); } \
|
||||||
\
|
\
|
||||||
static_always_inline int \
|
static_always_inline int \
|
||||||
t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
|
t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
|
||||||
{ return t##s##x##c##_is_all_zero (a ^ b); } \
|
{ return !!(vminvq_u##s (vceqq_##i (a, b))); } \
|
||||||
\
|
\
|
||||||
static_always_inline int \
|
static_always_inline int \
|
||||||
t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
|
t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
|
||||||
|
Reference in New Issue
Block a user