tcp: custom checksum calculations for Ipv4/Ipv6

Type: feature

Based on the configuration, we can disable checksum offload capability
and calculate checksum while pushing the TCP & IP header.
This saves some cycles when VPP stack is used in legacy hardware devices.

Signed-off-by: Srikanth A <srakula@cisco.com>
Change-Id: Ic1b3fcf3040917e47ee65263694ebf7437ac5668
This commit is contained in:
Srikanth A
2019-10-02 17:48:58 -07:00
committed by John Lo
parent 43b36af1fc
commit 3642782a27
6 changed files with 194 additions and 122 deletions

View File

@ -195,6 +195,85 @@ ip_incremental_checksum_buffer (vlib_main_t * vm,
return sum;
}
always_inline u16
ip_calculate_l4_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
ip_csum_t sum0, u32 payload_length,
u8 * iph, u32 ip_header_size, u8 * l4h)
{
u16 sum16;
u8 *data_this_buffer, length_odd;
u32 n_bytes_left, n_this_buffer, n_ip_bytes_this_buffer;
n_bytes_left = payload_length;
if (l4h) /* packet l4 header and no buffer chain involved */
{
ASSERT (p0 == NULL);
n_this_buffer = payload_length;
data_this_buffer = l4h;
}
else
{
ASSERT (p0);
if (iph) /* ip header pointer set to packet in buffer */
{
ASSERT (ip_header_size);
n_this_buffer = payload_length;
data_this_buffer = iph + ip_header_size; /* at l4 header */
n_ip_bytes_this_buffer =
p0->current_length - (((u8 *) iph - p0->data) - p0->current_data);
if (PREDICT_FALSE (payload_length + ip_header_size >
n_ip_bytes_this_buffer))
{
n_this_buffer = n_ip_bytes_this_buffer - ip_header_size;
if (PREDICT_FALSE (n_this_buffer >> 31))
{ /* error - ip header don't fit this buffer */
ASSERT (0);
return 0xfefe;
}
}
}
else /* packet in buffer with no ip header */
{ /* buffer current pointer at l4 header */
n_this_buffer = p0->current_length;
data_this_buffer = vlib_buffer_get_current (p0);
}
n_this_buffer = clib_min (n_this_buffer, n_bytes_left);
}
while (1)
{
sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
n_bytes_left -= n_this_buffer;
if (n_bytes_left == 0)
break;
if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
{
ASSERT (0); /* error - more buffer expected */
return 0xfefe;
}
length_odd = (n_this_buffer & 1);
p0 = vlib_get_buffer (vm, p0->next_buffer);
data_this_buffer = vlib_buffer_get_current (p0);
n_this_buffer = clib_min (p0->current_length, n_bytes_left);
if (PREDICT_FALSE (length_odd))
{
/* Prepend a 0 byte to maintain 2-byte checksum alignment */
data_this_buffer--;
n_this_buffer++;
n_bytes_left++;
data_this_buffer[0] = 0;
}
}
sum16 = ~ip_csum_fold (sum0);
return sum16;
}
void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index);
extern vlib_node_registration_t ip4_inacl_node;

View File

@ -1301,10 +1301,6 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
{
ip_csum_t sum0;
u32 ip_header_length, payload_length_host_byte_order;
u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
u16 sum16;
u8 *data_this_buffer;
u8 length_odd;
/* Initialize checksum with ip header. */
ip_header_length = ip4_header_bytes (ip0);
@ -1327,45 +1323,9 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
sum0 =
ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
n_bytes_left = n_this_buffer = payload_length_host_byte_order;
data_this_buffer = (u8 *) ip0 + ip_header_length;
n_ip_bytes_this_buffer =
p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
{
n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
n_ip_bytes_this_buffer - ip_header_length : 0;
}
while (1)
{
sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
n_bytes_left -= n_this_buffer;
if (n_bytes_left == 0)
break;
ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
return 0xfefe;
length_odd = (n_this_buffer & 1);
p0 = vlib_get_buffer (vm, p0->next_buffer);
data_this_buffer = vlib_buffer_get_current (p0);
n_this_buffer = clib_min (p0->current_length, n_bytes_left);
if (PREDICT_FALSE (length_odd))
{
/* Prepend a 0 or the resulting checksum will be incorrect. */
data_this_buffer--;
n_this_buffer++;
n_bytes_left++;
data_this_buffer[0] = 0;
}
}
sum16 = ~ip_csum_fold (sum0);
return sum16;
return ip_calculate_l4_checksum (vm, p0, sum0,
payload_length_host_byte_order, (u8 *) ip0,
ip_header_length, NULL);
}
u32

View File

@ -683,7 +683,8 @@ vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b,
sizeof (ip6h->src_address));
clib_memcpy_fast (ip6h->dst_address.as_u8, dst->as_u8,
sizeof (ip6h->src_address));
b->flags |= VNET_BUFFER_F_IS_IP6;
vnet_buffer (b)->l3_hdr_offset = (u8 *) ip6h - b->data;
b->flags |= VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
return ip6h;
}

View File

@ -1014,11 +1014,10 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
ip6_header_t * ip0, int *bogus_lengthp)
{
ip_csum_t sum0;
u16 sum16, payload_length_host_byte_order;
u32 i, n_this_buffer, n_bytes_left;
u16 payload_length_host_byte_order;
u32 i;
u32 headers_size = sizeof (ip0[0]);
u8 *data_this_buffer;
u8 length_odd;
ASSERT (bogus_lengthp);
*bogus_lengthp = 0;
@ -1030,14 +1029,10 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
{
sum0 = ip_csum_with_carry (sum0,
clib_mem_unaligned (&ip0->
src_address.as_uword[i],
uword));
sum0 =
ip_csum_with_carry (sum0,
clib_mem_unaligned (&ip0->dst_address.as_uword[i],
uword));
sum0 = ip_csum_with_carry
(sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
sum0 = ip_csum_with_carry
(sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
}
/* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
@ -1059,52 +1054,14 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
headers_size += skip_bytes;
}
n_bytes_left = n_this_buffer = payload_length_host_byte_order;
if (p0)
{
u32 n_ip_bytes_this_buffer =
p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
if (n_this_buffer + headers_size > n_ip_bytes_this_buffer)
{
n_this_buffer = p0->current_length > headers_size ?
n_ip_bytes_this_buffer - headers_size : 0;
}
}
while (1)
{
sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
n_bytes_left -= n_this_buffer;
if (n_bytes_left == 0)
break;
ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
{
*bogus_lengthp = 1;
return 0xfefe;
}
length_odd = (n_this_buffer & 1);
p0 = vlib_get_buffer (vm, p0->next_buffer);
data_this_buffer = vlib_buffer_get_current (p0);
n_this_buffer = clib_min (p0->current_length, n_bytes_left);
if (PREDICT_FALSE (length_odd))
{
/* Prepend a 0 or the resulting checksum will be incorrect. */
data_this_buffer--;
n_this_buffer++;
n_bytes_left++;
data_this_buffer[0] = 0;
}
}
sum16 = ~ip_csum_fold (sum0);
return sum16;
return ip_calculate_l4_checksum (vm, p0, sum0,
payload_length_host_byte_order,
(u8 *) ip0, headers_size, NULL);
else
return ip_calculate_l4_checksum (vm, 0, sum0,
payload_length_host_byte_order, NULL, 0,
data_this_buffer);
}
u32

View File

@ -120,6 +120,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
_(RATE_SAMPLE, "Conn does rate sampling") \
_(TRACK_BURST, "Track burst") \
_(ZERO_RWND_SENT, "Zero RWND sent") \
_(NO_CSUM_OFFLOAD, "No Checksum Offload") \
typedef enum _tcp_connection_flag_bits
{
@ -1233,6 +1234,8 @@ vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq,
th->window = wnd;
th->checksum = 0;
th->urgent_pointer = 0;
vnet_buffer (b)->l4_hdr_offset = (u8 *) th - b->data;
b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
return th;
}

View File

@ -444,6 +444,78 @@ tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
}
/* Compute TCP checksum in software when offloading is disabled for a connection */
u16
ip6_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
ip46_address_t * src, ip46_address_t * dst)
{
ip_csum_t sum0;
u16 payload_length_host_byte_order;
u32 i;
/* Initialize checksum with ip header. */
sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
clib_host_to_net_u16 (IP_PROTOCOL_TCP);
payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
{
sum0 = ip_csum_with_carry
(sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
sum0 = ip_csum_with_carry
(sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
}
return ip_calculate_l4_checksum (vm, p0, sum0,
payload_length_host_byte_order, NULL, 0,
NULL);
}
u16
ip4_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
ip46_address_t * src, ip46_address_t * dst)
{
ip_csum_t sum0;
u32 payload_length_host_byte_order;
payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
sum0 =
clib_host_to_net_u32 (payload_length_host_byte_order +
(IP_PROTOCOL_TCP << 16));
sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
return ip_calculate_l4_checksum (vm, p0, sum0,
payload_length_host_byte_order, NULL, 0,
NULL);
}
static inline u16
tcp_compute_checksum (tcp_connection_t * tc, vlib_buffer_t * b)
{
u16 checksum = 0;
if (PREDICT_FALSE (tc->flags & TCP_CONN_NO_CSUM_OFFLOAD))
{
tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
vlib_main_t *vm = wrk->vm;
if (tc->c_is_ip4)
checksum = ip4_tcp_compute_checksum_custom
(vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
else
checksum = ip6_tcp_compute_checksum_custom
(vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
}
else
{
b->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
}
return checksum;
}
/**
* Prepare ACK
*/
@ -466,6 +538,9 @@ tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
tcp_options_write ((u8 *) (th + 1), snd_opts);
th->checksum = tcp_compute_checksum (tc, b);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
if (wnd == 0)
@ -517,6 +592,7 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b)
initial_wnd);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
tcp_options_write ((u8 *) (th + 1), &snd_opts);
th->checksum = tcp_compute_checksum (tc, b);
}
/**
@ -541,6 +617,7 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
tcp_options_write ((u8 *) (th + 1), snd_opts);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
th->checksum = tcp_compute_checksum (tc, b);
}
always_inline void
@ -786,7 +863,8 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
{
ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
&pkt_ih4->src_address, IP_PROTOCOL_TCP, 1);
&pkt_ih4->src_address, IP_PROTOCOL_TCP,
(!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
}
else
@ -833,6 +911,7 @@ tcp_send_reset (tcp_connection_t * tc)
tc->rcv_nxt, tcp_hdr_opts_len, flags,
advertise_wnd);
opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
th->checksum = tcp_compute_checksum (tc, b);
ASSERT (opts_write_len == tc->snd_opts_len);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
@ -851,7 +930,8 @@ tcp_push_ip_hdr (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
{
ip4_header_t *ih;
ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4,
&tc->c_rmt_ip4, IP_PROTOCOL_TCP, 1);
&tc->c_rmt_ip4, IP_PROTOCOL_TCP,
(!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih);
}
else
@ -1082,6 +1162,9 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
tc->bytes_out += data_len;
tc->data_segs_out += 1;
th->checksum = tcp_compute_checksum (tc, b);
TCP_EVT (TCP_EVT_PKTIZE, tc);
}
@ -2154,30 +2237,19 @@ always_inline void
tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
tcp_connection_t * tc0, u8 is_ip4)
{
tcp_header_t *th0 = 0;
u8 __clib_unused *ih0;
tcp_header_t __clib_unused *th0 = vlib_buffer_get_current (b0);
th0 = vlib_buffer_get_current (b0);
TCP_EVT (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
if (is_ip4)
{
vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
IP_PROTOCOL_TCP, 1);
b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
th0->checksum = 0;
}
ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
IP_PROTOCOL_TCP,
(!(tc0->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
else
{
ip6_header_t *ih0;
ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
&tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
b0->flags |=
VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
th0->checksum = 0;
}
ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
IP_PROTOCOL_TCP);
}
always_inline void