vppinfra: new vectorized ip checksum functions incl. csum_and_copy

Type: improvement
Change-Id: Id5810b7f4a6d6e4ce16b73c235b50db5d475ebf7
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2021-11-08 11:18:30 +00:00
committed by Florin Coras
parent 29355644c5
commit aa63bc6cf4
8 changed files with 487 additions and 100 deletions

View File

@@ -19,6 +19,7 @@
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
#include <vppinfra/ring.h>
#include <vppinfra/vector/ip_csum.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip4_packet.h>
@@ -110,7 +111,7 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso)
is_tso ? 0 :
clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
(l4_hdr_offset - l3_hdr_offset));
sum = ~ip_csum (&psh, sizeof (psh));
sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
}
else
{
@@ -119,7 +120,7 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso)
psh.dst = ip6->dst_address;
psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
psh.l4len = is_tso ? 0 : ip6->payload_length;
sum = ~ip_csum (&psh, sizeof (psh));
sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
}
/* ip_csum does a byte swap for some reason... */

View File

@@ -42,6 +42,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vppinfra/vector/ip_csum.h>
typedef enum
{
@@ -63,15 +64,16 @@ check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum)
if ((ip->ip_version_and_header_length & 0xf) != 5)
{
*error = IP4_ERROR_OPTIONS;
if (verify_checksum && ip_csum (ip, ip4_header_bytes (ip)) != 0)
if (verify_checksum &&
clib_ip_csum ((u8 *) ip, ip4_header_bytes (ip)) != 0)
*error = IP4_ERROR_BAD_CHECKSUM;
}
else
*error = IP4_ERROR_VERSION;
}
else
if (PREDICT_FALSE (verify_checksum &&
ip_csum (ip, sizeof (ip4_header_t)) != 0))
else if (PREDICT_FALSE (verify_checksum &&
clib_ip_csum ((u8 *) ip, sizeof (ip4_header_t)) !=
0))
*error = IP4_ERROR_BAD_CHECKSUM;
}

View File

@@ -149,98 +149,6 @@ STATIC_ASSERT_SIZEOF (ip_ecn_t, 1);
extern u8 *format_ip_ecn (u8 * s, va_list * va);
/* IP checksum support. */
static_always_inline u16
ip_csum (void *data, u16 n_left)
{
u32 sum;
#ifdef CLIB_HAVE_VEC256
u16x16 v1, v2;
u32x8 zero = { 0 };
u32x8 sum8 = { 0 };
u32x4 sum4;
#endif
/* if there is odd number of bytes, pad by zero and store in sum */
sum = (n_left & 1) ? ((u8 *) data)[n_left - 1] << 8 : 0;
/* we deal with words */
n_left >>= 1;
#ifdef CLIB_HAVE_VEC256
while (n_left >= 32)
{
v1 = u16x16_load_unaligned (data);
v2 = u16x16_load_unaligned (data + 32);
#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
v1 = u16x16_byte_swap (v1);
v2 = u16x16_byte_swap (v2);
#endif
sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v2));
sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v2));
n_left -= 32;
data += 64;
}
if (n_left >= 16)
{
v1 = u16x16_load_unaligned (data);
#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
v1 = u16x16_byte_swap (v1);
#endif
sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
n_left -= 16;
data += 32;
}
if (n_left)
{
v1 = u16x16_load_unaligned (data);
#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
v1 = u16x16_byte_swap (v1);
#endif
v1 = u16x16_mask_last (v1, 16 - n_left);
sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
}
sum8 = u32x8_hadd (sum8, zero);
sum4 = u32x8_extract_lo (sum8) + u32x8_extract_hi (sum8);
sum += sum4[0] + sum4[1];
#else
/* scalar version */
while (n_left >= 8)
{
sum += clib_net_to_host_u16 (*((u16 *) data + 0));
sum += clib_net_to_host_u16 (*((u16 *) data + 1));
sum += clib_net_to_host_u16 (*((u16 *) data + 2));
sum += clib_net_to_host_u16 (*((u16 *) data + 3));
sum += clib_net_to_host_u16 (*((u16 *) data + 4));
sum += clib_net_to_host_u16 (*((u16 *) data + 5));
sum += clib_net_to_host_u16 (*((u16 *) data + 6));
sum += clib_net_to_host_u16 (*((u16 *) data + 7));
n_left -= 8;
data += 16;
}
while (n_left)
{
sum += clib_net_to_host_u16 (*(u16 *) data);
n_left -= 1;
data += 2;
}
#endif
sum = (sum & 0xffff) + (sum >> 16);
sum = (sum & 0xffff) + (sum >> 16);
return ~((u16) sum);
}
/* Incremental checksum update. */
typedef uword ip_csum_t;

View File

@@ -7,6 +7,7 @@
#define included_ip_psh_cksum_h
#include <vnet/ip/ip.h>
#include <vppinfra/vector/ip_csum.h>
typedef struct _ip4_psh
{
@@ -37,7 +38,8 @@ ip4_pseudo_header_cksum (ip4_header_t *ip4)
psh.proto = ip4->protocol;
psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
sizeof (ip4_header_t));
return ~clib_net_to_host_u16 (ip_csum (&psh, sizeof (ip4_psh_t)));
return ~clib_net_to_host_u16 (
clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
}
static_always_inline u16
@@ -48,7 +50,8 @@ ip6_pseudo_header_cksum (ip6_header_t *ip6)
psh.dst = ip6->dst_address;
psh.l4len = ip6->payload_length;
psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
return ~clib_net_to_host_u16 (ip_csum (&psh, sizeof (ip6_psh_t)));
return ~clib_net_to_host_u16 (
clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
}
#endif /* included_ip_psh_cksum_h */

View File

@@ -194,6 +194,7 @@ set(VPPINFRA_HEADERS
vector/compress.h
vector/count_equal.h
vector/index_to_ptr.h
vector/ip_csum.h
vector/mask_compare.h
vector.h
vector_neon.h
@@ -275,6 +276,7 @@ set(test_files
vector/test/compress.c
vector/test/count_equal.c
vector/test/index_to_ptr.c
vector/test/ip_csum.c
vector/test/mask_compare.c
vector/test/memcpy_x86_64.c
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,120 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright(c) 2021 Cisco Systems, Inc.
*/
#include <vppinfra/format.h>
#include <vppinfra/vector/test/test.h>
#include <vppinfra/vector/ip_csum.h>
typedef struct
{
struct
{
u8 *src;
u32 count;
} chunk[5];
u16 result;
} ip_csum_test_t;
static u8 test1[] = { 0x45, 0x00, 0x00, 0x73, 0x00, 0x00, 0x40,
0x00, 0x40, 0x11, 0x00, 0x00, 0xc0, 0xa8,
0x00, 0x01, 0xc0, 0xa8, 0x00, 0xc7, 0x00 };
#define TEST_LEN(x) (ARRAY_LEN (x) - 1)
static ip_csum_test_t tests[] = { {
.chunk[0].src = test1,
.chunk[0].count = TEST_LEN (test1),
.result = 0x61b8,
},
{
.chunk[0].src = test1,
.chunk[0].count = 1,
.chunk[1].src = test1 + 1,
.chunk[1].count = 2,
.chunk[2].src = test1 + 3,
.chunk[2].count = 3,
.chunk[3].src = test1 + 6,
.chunk[3].count = 4,
.chunk[4].src = test1 + 10,
.chunk[4].count = TEST_LEN (test1) - 10,
.result = 0x61b8,
},
{
.chunk[0].count = 1,
.result = 0xff0f,
},
{
.chunk[0].count = 2,
.result = 0x080f,
},
{
.chunk[0].count = 3,
.result = 0x0711,
},
{
.chunk[0].count = 4,
.result = 0x1210,
},
{
.chunk[0].count = 63,
.result = 0xda01,
},
{
.chunk[0].count = 64,
.result = 0xe100,
},
{
.chunk[0].count = 65,
.result = 0xe010,
},
{
.chunk[0].count = 65535,
.result = 0xfc84,
},
{
.chunk[0].count = 65536,
.result = 0xffff,
} };
static clib_error_t *
test_clib_ip_csum (clib_error_t *err)
{
u8 *buf;
buf = clib_mem_alloc_aligned (65536, CLIB_CACHE_LINE_BYTES);
for (int i = 0; i < 65536; i++)
buf[i] = 0xf0 + ((i * 7) & 0xf);
for (int i = 0; i < ARRAY_LEN (tests); i++)
{
clib_ip_csum_t c = {};
ip_csum_test_t *t = tests + i;
u16 rv;
for (int j = 0; j < ARRAY_LEN (((ip_csum_test_t *) 0)->chunk); j++)
if (t->chunk[j].count > 0)
{
if (t->chunk[j].src == 0)
clib_ip_csum_chunk (&c, buf, t->chunk[j].count);
else
clib_ip_csum_chunk (&c, t->chunk[j].src, t->chunk[j].count);
}
rv = clib_ip_csum_fold (&c);
if (rv != tests[i].result)
{
err = clib_error_return (err,
"bad checksum in test case %u (expected "
"0x%04x, calculated 0x%04x)",
i, tests[i].result, rv);
goto done;
}
}
done:
clib_mem_free (buf);
return err;
}
REGISTER_TEST (clib_ip_csum) = {
.name = "clib_ip_csum",
.fn = test_clib_ip_csum,
};

View File

@@ -211,6 +211,18 @@ u32x4_min_scalar (u32x4 v)
#define u8x16_word_shift_left(x,n) vextq_u8(u8x16_splat (0), x, 16 - n)
#define u8x16_word_shift_right(x,n) vextq_u8(x, u8x16_splat (0), n)
always_inline u32x4
u32x4_interleave_hi (u32x4 a, u32x4 b)
{
return (u32x4) vzip2q_u32 (a, b);
}
always_inline u32x4
u32x4_interleave_lo (u32x4 a, u32x4 b)
{
return (u32x4) vzip1q_u32 (a, b);
}
static_always_inline u8x16
u8x16_reflect (u8x16 v)
{