vppinfra: vector shuffle cleanup

Type: refactor
Change-Id: I8b3fc2ce30df313467274a174c5ac6adbf296153
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2021-12-02 13:02:38 +01:00
committed by Ole Tr�an
parent 9ab2e5d8d7
commit efd6de87d3
6 changed files with 80 additions and 53 deletions
+18 -17
View File
@@ -291,7 +291,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
sample_next_t next_index;
u32 pkts_swapped = 0;
/* Vector shuffle mask to swap src, dst */
u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -345,8 +344,10 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
src_dst0 = ((u8x16 *) en0)[0];
src_dst1 = ((u8x16 *) en1)[0];
src_dst0 = u8x16_shuffle (src_dst0, swapmac);
src_dst1 = u8x16_shuffle (src_dst1, swapmac);
src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
4, 5, 12, 13, 14, 15);
src_dst1 = u8x16_shuffle (src_dst1, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
4, 5, 12, 13, 14, 15);
((u8x16 *) en0)[0] = src_dst0;
((u8x16 *) en1)[0] = src_dst1;
@@ -418,7 +419,8 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
en0 = vlib_buffer_get_current (b0);
src_dst0 = ((u8x16 *) en0)[0];
src_dst0 = u8x16_shuffle (src_dst0, swapmac);
src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
4, 5, 12, 13, 14, 15);
((u8x16 *) en0)[0] = src_dst0;
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
@@ -469,7 +471,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef VERSION_3
#define u8x16_shuffle __builtin_shuffle
/* This would normally be a stack local, but since it's a constant... */
static const u16 nexts[VLIB_FRAME_SIZE] = { 0 };
@@ -479,7 +480,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_from, *from;
u32 pkts_swapped = 0;
/* Vector shuffle mask to swap src, dst */
u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
/* See comment below about sending all pkts to the same place... */
u16 *next __attribute__ ((unused));
@@ -518,10 +518,14 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
src_dst2 = ((u8x16 *) vlib_buffer_get_current (b[2]))[0];
src_dst3 = ((u8x16 *) vlib_buffer_get_current (b[3]))[0];
src_dst0 = u8x16_shuffle (src_dst0, swapmac);
src_dst1 = u8x16_shuffle (src_dst1, swapmac);
src_dst2 = u8x16_shuffle (src_dst2, swapmac);
src_dst3 = u8x16_shuffle (src_dst3, swapmac);
src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
12, 13, 14, 15);
src_dst1 = u8x16_shuffle (src_dst1, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
12, 13, 14, 15);
src_dst2 = u8x16_shuffle (src_dst2, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
12, 13, 14, 15);
src_dst3 = u8x16_shuffle (src_dst3, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
12, 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
((u8x16 *) vlib_buffer_get_current (b[1]))[0] = src_dst1;
@@ -552,7 +556,8 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u8x16 src_dst0;
src_dst0 = ((u8x16 *) vlib_buffer_get_current (b[0]))[0];
src_dst0 = u8x16_shuffle (src_dst0, swapmac);
src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
12, 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
vnet_buffer (b[0])->sw_if_index[VLIB_RX];
@@ -611,18 +616,14 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef VERSION_4
#define u8x16_shuffle __builtin_shuffle
static u8x16 swapmac =
{ 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
/* Final stage in the pipeline, do the mac swap */
static inline u32
last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
{
u8x16 src_dst0;
src_dst0 = ((u8x16 *) vlib_buffer_get_current (b))[0];
src_dst0 = u8x16_shuffle (src_dst0, swapmac);
src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12,
13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b))[0] = src_dst0;
vnet_buffer (b)->sw_if_index[VLIB_TX] =
vnet_buffer (b)->sw_if_index[VLIB_RX];
+62
View File
@@ -126,6 +126,68 @@ foreach_vec
#undef _vector_size
/* _shuffle and _shuffle2 */
#if defined(__GNUC__) && !defined(__clang__)
#define __builtin_shufflevector(v1, v2, ...) \
__builtin_shuffle ((v1), (v2), (__typeof__ (v1)){ __VA_ARGS__ })
#endif
#define u8x16_shuffle(v1, ...) \
(u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v1), __VA_ARGS__)
#define u8x32_shuffle(v1, ...) \
(u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v1), __VA_ARGS__)
#define u8x64_shuffle(v1, ...) \
(u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v1), __VA_ARGS__)
#define u16x8_shuffle(v1, ...) \
(u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v1), __VA_ARGS__)
#define u16x16_shuffle(v1, ...) \
(u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v1), __VA_ARGS__)
#define u16x32_shuffle(v1, ...) \
(u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v1), __VA_ARGS__);
#define u32x4_shuffle(v1, ...) \
(u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v1), __VA_ARGS__)
#define u32x8_shuffle(v1, ...) \
(u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v1), __VA_ARGS__)
#define u32x16_shuffle(v1, ...) \
(u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v1), __VA_ARGS__)
#define u64x2_shuffle(v1, ...) \
(u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v1), __VA_ARGS__)
#define u64x4_shuffle(v1, ...) \
(u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v1), __VA_ARGS__)
#define u64x8_shuffle(v1, ...) \
(u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v1), __VA_ARGS__)
#define u8x16_shuffle2(v1, v2, ...) \
(u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v2), __VA_ARGS__)
#define u8x32_shuffle2(v1, v2, ...) \
(u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v2), __VA_ARGS__)
#define u8x64_shuffle2(v1, v2, ...) \
(u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v2), __VA_ARGS__)
#define u16x8_shuffle2(v1, v2, ...) \
(u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v2), __VA_ARGS__)
#define u16x16_shuffle2(v1, v2, ...) \
(u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v2), __VA_ARGS__)
#define u16x32_shuffle2(v1, v2, ...) \
(u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v2), __VA_ARGS__);
#define u32x4_shuffle2(v1, v2, ...) \
(u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v2), __VA_ARGS__)
#define u32x8_shuffle2(v1, v2, ...) \
(u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v2), __VA_ARGS__)
#define u32x16_shuffle2(v1, v2, ...) \
(u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v2), __VA_ARGS__)
#define u64x2_shuffle2(v1, v2, ...) \
(u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v2), __VA_ARGS__)
#define u64x4_shuffle2(v1, v2, ...) \
(u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v2), __VA_ARGS__)
#define u64x8_shuffle2(v1, v2, ...) \
(u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v2), __VA_ARGS__)
#define VECTOR_WORD_TYPE(t) t##x
#define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t))
-6
View File
@@ -183,12 +183,6 @@ u16x16_byte_swap (u16x16 v)
return (u16x16) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) swap);
}
static_always_inline u8x32
u8x32_shuffle (u8x32 v, u8x32 m)
{
return (u8x32) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) m);
}
#define u8x32_align_right(a, b, imm) \
(u8x32) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, imm)
-6
View File
@@ -196,12 +196,6 @@ u8x64_reflect_u8x16 (u8x64 x)
return (u8x64) _mm512_shuffle_epi8 ((__m512i) x, (__m512i) mask);
}
static_always_inline u8x64
u8x64_shuffle (u8x64 v, u8x64 m)
{
return (u8x64) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) m);
}
#define u8x64_align_right(a, b, imm) \
(u8x64) _mm512_alignr_epi8 ((__m512i) a, (__m512i) b, imm)
-6
View File
@@ -129,12 +129,6 @@ u32x4_byte_swap (u32x4 v)
return (u32x4) vrev32q_u8 ((u8x16) v);
}
static_always_inline u8x16
u8x16_shuffle (u8x16 v, u8x16 m)
{
return (u8x16) vqtbl1q_u8 (v, m);
}
static_always_inline u32x4
u32x4_hadd (u32x4 v1, u32x4 v2)
{
-18
View File
@@ -411,24 +411,6 @@ u32x4_sum_elts (u32x4 sum4)
return sum4[0];
}
static_always_inline u8x16
u8x16_shuffle (u8x16 v, u8x16 m)
{
return (u8x16) _mm_shuffle_epi8 ((__m128i) v, (__m128i) m);
}
static_always_inline u32x4
u32x4_shuffle (u32x4 v, const int a, const int b, const int c, const int d)
{
#if defined(__clang__) || !__OPTIMIZE__
u32x4 r = { v[a], v[b], v[c], v[d] };
return r;
#else
return (u32x4) _mm_shuffle_epi32 ((__m128i) v,
a | b << 2 | c << 4 | d << 6);
#endif
}
/* _from_ */
/* *INDENT-OFF* */
#define _(f,t,i) \