vppinfra: new vectorized ip checksum functions incl. csum_and_copy
Type: improvement Change-Id: Id5810b7f4a6d6e4ce16b73c235b50db5d475ebf7 Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:

committed by
Florin Coras

parent
29355644c5
commit
aa63bc6cf4
@@ -19,6 +19,7 @@
|
||||
#include <vlib/unix/unix.h>
|
||||
#include <vlib/pci/pci.h>
|
||||
#include <vppinfra/ring.h>
|
||||
#include <vppinfra/vector/ip_csum.h>
|
||||
|
||||
#include <vnet/ethernet/ethernet.h>
|
||||
#include <vnet/ip/ip4_packet.h>
|
||||
@@ -110,7 +111,7 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso)
|
||||
is_tso ? 0 :
|
||||
clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
|
||||
(l4_hdr_offset - l3_hdr_offset));
|
||||
sum = ~ip_csum (&psh, sizeof (psh));
|
||||
sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -119,7 +120,7 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso)
|
||||
psh.dst = ip6->dst_address;
|
||||
psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
|
||||
psh.l4len = is_tso ? 0 : ip6->payload_length;
|
||||
sum = ~ip_csum (&psh, sizeof (psh));
|
||||
sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
|
||||
}
|
||||
|
||||
/* ip_csum does a byte swap for some reason... */
|
||||
|
@@ -42,6 +42,7 @@
|
||||
|
||||
#include <vnet/ip/ip.h>
|
||||
#include <vnet/ethernet/ethernet.h>
|
||||
#include <vppinfra/vector/ip_csum.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@@ -63,15 +64,16 @@ check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum)
|
||||
if ((ip->ip_version_and_header_length & 0xf) != 5)
|
||||
{
|
||||
*error = IP4_ERROR_OPTIONS;
|
||||
if (verify_checksum && ip_csum (ip, ip4_header_bytes (ip)) != 0)
|
||||
if (verify_checksum &&
|
||||
clib_ip_csum ((u8 *) ip, ip4_header_bytes (ip)) != 0)
|
||||
*error = IP4_ERROR_BAD_CHECKSUM;
|
||||
}
|
||||
else
|
||||
*error = IP4_ERROR_VERSION;
|
||||
}
|
||||
else
|
||||
if (PREDICT_FALSE (verify_checksum &&
|
||||
ip_csum (ip, sizeof (ip4_header_t)) != 0))
|
||||
else if (PREDICT_FALSE (verify_checksum &&
|
||||
clib_ip_csum ((u8 *) ip, sizeof (ip4_header_t)) !=
|
||||
0))
|
||||
*error = IP4_ERROR_BAD_CHECKSUM;
|
||||
}
|
||||
|
||||
|
@@ -149,98 +149,6 @@ STATIC_ASSERT_SIZEOF (ip_ecn_t, 1);
|
||||
|
||||
extern u8 *format_ip_ecn (u8 * s, va_list * va);
|
||||
|
||||
/* IP checksum support. */
|
||||
|
||||
static_always_inline u16
|
||||
ip_csum (void *data, u16 n_left)
|
||||
{
|
||||
u32 sum;
|
||||
#ifdef CLIB_HAVE_VEC256
|
||||
u16x16 v1, v2;
|
||||
u32x8 zero = { 0 };
|
||||
u32x8 sum8 = { 0 };
|
||||
u32x4 sum4;
|
||||
#endif
|
||||
|
||||
/* if there is odd number of bytes, pad by zero and store in sum */
|
||||
sum = (n_left & 1) ? ((u8 *) data)[n_left - 1] << 8 : 0;
|
||||
|
||||
/* we deal with words */
|
||||
n_left >>= 1;
|
||||
|
||||
#ifdef CLIB_HAVE_VEC256
|
||||
while (n_left >= 32)
|
||||
{
|
||||
v1 = u16x16_load_unaligned (data);
|
||||
v2 = u16x16_load_unaligned (data + 32);
|
||||
|
||||
#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
|
||||
v1 = u16x16_byte_swap (v1);
|
||||
v2 = u16x16_byte_swap (v2);
|
||||
#endif
|
||||
sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
|
||||
sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
|
||||
sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v2));
|
||||
sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v2));
|
||||
n_left -= 32;
|
||||
data += 64;
|
||||
}
|
||||
|
||||
if (n_left >= 16)
|
||||
{
|
||||
v1 = u16x16_load_unaligned (data);
|
||||
#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
|
||||
v1 = u16x16_byte_swap (v1);
|
||||
#endif
|
||||
sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
|
||||
sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
|
||||
n_left -= 16;
|
||||
data += 32;
|
||||
}
|
||||
|
||||
if (n_left)
|
||||
{
|
||||
v1 = u16x16_load_unaligned (data);
|
||||
#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
|
||||
v1 = u16x16_byte_swap (v1);
|
||||
#endif
|
||||
v1 = u16x16_mask_last (v1, 16 - n_left);
|
||||
sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
|
||||
sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
|
||||
}
|
||||
|
||||
sum8 = u32x8_hadd (sum8, zero);
|
||||
sum4 = u32x8_extract_lo (sum8) + u32x8_extract_hi (sum8);
|
||||
sum += sum4[0] + sum4[1];
|
||||
|
||||
#else
|
||||
/* scalar version */
|
||||
while (n_left >= 8)
|
||||
{
|
||||
sum += clib_net_to_host_u16 (*((u16 *) data + 0));
|
||||
sum += clib_net_to_host_u16 (*((u16 *) data + 1));
|
||||
sum += clib_net_to_host_u16 (*((u16 *) data + 2));
|
||||
sum += clib_net_to_host_u16 (*((u16 *) data + 3));
|
||||
sum += clib_net_to_host_u16 (*((u16 *) data + 4));
|
||||
sum += clib_net_to_host_u16 (*((u16 *) data + 5));
|
||||
sum += clib_net_to_host_u16 (*((u16 *) data + 6));
|
||||
sum += clib_net_to_host_u16 (*((u16 *) data + 7));
|
||||
n_left -= 8;
|
||||
data += 16;
|
||||
}
|
||||
while (n_left)
|
||||
{
|
||||
sum += clib_net_to_host_u16 (*(u16 *) data);
|
||||
n_left -= 1;
|
||||
data += 2;
|
||||
}
|
||||
#endif
|
||||
|
||||
sum = (sum & 0xffff) + (sum >> 16);
|
||||
sum = (sum & 0xffff) + (sum >> 16);
|
||||
return ~((u16) sum);
|
||||
}
|
||||
|
||||
/* Incremental checksum update. */
|
||||
typedef uword ip_csum_t;
|
||||
|
||||
|
@@ -7,6 +7,7 @@
|
||||
#define included_ip_psh_cksum_h
|
||||
|
||||
#include <vnet/ip/ip.h>
|
||||
#include <vppinfra/vector/ip_csum.h>
|
||||
|
||||
typedef struct _ip4_psh
|
||||
{
|
||||
@@ -37,7 +38,8 @@ ip4_pseudo_header_cksum (ip4_header_t *ip4)
|
||||
psh.proto = ip4->protocol;
|
||||
psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
|
||||
sizeof (ip4_header_t));
|
||||
return ~clib_net_to_host_u16 (ip_csum (&psh, sizeof (ip4_psh_t)));
|
||||
return ~clib_net_to_host_u16 (
|
||||
clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
|
||||
}
|
||||
|
||||
static_always_inline u16
|
||||
@@ -48,7 +50,8 @@ ip6_pseudo_header_cksum (ip6_header_t *ip6)
|
||||
psh.dst = ip6->dst_address;
|
||||
psh.l4len = ip6->payload_length;
|
||||
psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
|
||||
return ~clib_net_to_host_u16 (ip_csum (&psh, sizeof (ip6_psh_t)));
|
||||
return ~clib_net_to_host_u16 (
|
||||
clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
|
||||
}
|
||||
|
||||
#endif /* included_ip_psh_cksum_h */
|
||||
|
@@ -194,6 +194,7 @@ set(VPPINFRA_HEADERS
|
||||
vector/compress.h
|
||||
vector/count_equal.h
|
||||
vector/index_to_ptr.h
|
||||
vector/ip_csum.h
|
||||
vector/mask_compare.h
|
||||
vector.h
|
||||
vector_neon.h
|
||||
@@ -275,6 +276,7 @@ set(test_files
|
||||
vector/test/compress.c
|
||||
vector/test/count_equal.c
|
||||
vector/test/index_to_ptr.c
|
||||
vector/test/ip_csum.c
|
||||
vector/test/mask_compare.c
|
||||
vector/test/memcpy_x86_64.c
|
||||
)
|
||||
|
339
src/vppinfra/vector/ip_csum.h
Normal file
339
src/vppinfra/vector/ip_csum.h
Normal file
File diff suppressed because it is too large
Load Diff
120
src/vppinfra/vector/test/ip_csum.c
Normal file
120
src/vppinfra/vector/test/ip_csum.c
Normal file
@@ -0,0 +1,120 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright(c) 2021 Cisco Systems, Inc.
|
||||
*/
|
||||
|
||||
#include <vppinfra/format.h>
|
||||
#include <vppinfra/vector/test/test.h>
|
||||
#include <vppinfra/vector/ip_csum.h>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
u8 *src;
|
||||
u32 count;
|
||||
} chunk[5];
|
||||
u16 result;
|
||||
} ip_csum_test_t;
|
||||
|
||||
static u8 test1[] = { 0x45, 0x00, 0x00, 0x73, 0x00, 0x00, 0x40,
|
||||
0x00, 0x40, 0x11, 0x00, 0x00, 0xc0, 0xa8,
|
||||
0x00, 0x01, 0xc0, 0xa8, 0x00, 0xc7, 0x00 };
|
||||
#define TEST_LEN(x) (ARRAY_LEN (x) - 1)
|
||||
|
||||
static ip_csum_test_t tests[] = { {
|
||||
.chunk[0].src = test1,
|
||||
.chunk[0].count = TEST_LEN (test1),
|
||||
.result = 0x61b8,
|
||||
},
|
||||
{
|
||||
.chunk[0].src = test1,
|
||||
.chunk[0].count = 1,
|
||||
.chunk[1].src = test1 + 1,
|
||||
.chunk[1].count = 2,
|
||||
.chunk[2].src = test1 + 3,
|
||||
.chunk[2].count = 3,
|
||||
.chunk[3].src = test1 + 6,
|
||||
.chunk[3].count = 4,
|
||||
.chunk[4].src = test1 + 10,
|
||||
.chunk[4].count = TEST_LEN (test1) - 10,
|
||||
.result = 0x61b8,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 1,
|
||||
.result = 0xff0f,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 2,
|
||||
.result = 0x080f,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 3,
|
||||
.result = 0x0711,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 4,
|
||||
.result = 0x1210,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 63,
|
||||
.result = 0xda01,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 64,
|
||||
.result = 0xe100,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 65,
|
||||
.result = 0xe010,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 65535,
|
||||
.result = 0xfc84,
|
||||
},
|
||||
{
|
||||
.chunk[0].count = 65536,
|
||||
.result = 0xffff,
|
||||
} };
|
||||
|
||||
static clib_error_t *
|
||||
test_clib_ip_csum (clib_error_t *err)
|
||||
{
|
||||
u8 *buf;
|
||||
buf = clib_mem_alloc_aligned (65536, CLIB_CACHE_LINE_BYTES);
|
||||
for (int i = 0; i < 65536; i++)
|
||||
buf[i] = 0xf0 + ((i * 7) & 0xf);
|
||||
|
||||
for (int i = 0; i < ARRAY_LEN (tests); i++)
|
||||
{
|
||||
clib_ip_csum_t c = {};
|
||||
ip_csum_test_t *t = tests + i;
|
||||
u16 rv;
|
||||
|
||||
for (int j = 0; j < ARRAY_LEN (((ip_csum_test_t *) 0)->chunk); j++)
|
||||
if (t->chunk[j].count > 0)
|
||||
{
|
||||
if (t->chunk[j].src == 0)
|
||||
clib_ip_csum_chunk (&c, buf, t->chunk[j].count);
|
||||
else
|
||||
clib_ip_csum_chunk (&c, t->chunk[j].src, t->chunk[j].count);
|
||||
}
|
||||
rv = clib_ip_csum_fold (&c);
|
||||
|
||||
if (rv != tests[i].result)
|
||||
{
|
||||
err = clib_error_return (err,
|
||||
"bad checksum in test case %u (expected "
|
||||
"0x%04x, calculated 0x%04x)",
|
||||
i, tests[i].result, rv);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
done:
|
||||
clib_mem_free (buf);
|
||||
return err;
|
||||
}
|
||||
|
||||
REGISTER_TEST (clib_ip_csum) = {
|
||||
.name = "clib_ip_csum",
|
||||
.fn = test_clib_ip_csum,
|
||||
};
|
@@ -211,6 +211,18 @@ u32x4_min_scalar (u32x4 v)
|
||||
#define u8x16_word_shift_left(x,n) vextq_u8(u8x16_splat (0), x, 16 - n)
|
||||
#define u8x16_word_shift_right(x,n) vextq_u8(x, u8x16_splat (0), n)
|
||||
|
||||
always_inline u32x4
|
||||
u32x4_interleave_hi (u32x4 a, u32x4 b)
|
||||
{
|
||||
return (u32x4) vzip2q_u32 (a, b);
|
||||
}
|
||||
|
||||
always_inline u32x4
|
||||
u32x4_interleave_lo (u32x4 a, u32x4 b)
|
||||
{
|
||||
return (u32x4) vzip1q_u32 (a, b);
|
||||
}
|
||||
|
||||
static_always_inline u8x16
|
||||
u8x16_reflect (u8x16 v)
|
||||
{
|
||||
|
Reference in New Issue
Block a user