gso: do not gro on small packets

This has two benefits: fix an issue where small packets can be
padded to 64 bytes by the ethernet layer, and we included these bytes
in the tcp stream; and also if we receive a small packet, it signals
that this tcp stream is likely more latency-sensitive than throughput-
sensitive, and skipping gro buffering should be beneficial.

Type: fix
Change-Id: I165b97673d8cdce95ebf0a66c362ae9f9e6f3f34
Signed-off-by: Aloys Augustin <aloaugus@cisco.com>
This commit is contained in:
Aloys Augustin
2021-09-15 16:06:04 +02:00
committed by Beno�t Ganne
parent b21fd4b7e0
commit 86490da4ce
2 changed files with 143 additions and 3 deletions

View File

@@ -21,10 +21,14 @@
#include <vnet/gso/hdr_offset_parser.h>
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
#include <vnet/ip/ip6_inlines.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/vnet.h>
#define GRO_MIN_PACKET_SIZE 256
#define GRO_PADDED_PACKET_SIZE 64
static_always_inline u8
gro_is_bad_packet (vlib_buffer_t * b, u8 flags, i16 l234_sz)
{
@@ -159,6 +163,34 @@ gro_validate_checksum (vlib_main_t * vm, vlib_buffer_t * b0,
return flags;
}
static_always_inline u32
gro_fix_padded_packet_len (vlib_buffer_t *b0, generic_header_offset_t *gho0,
ip4_header_t *ip4_0, ip6_header_t *ip6_0,
u32 pkt_len0, u16 l234_sz0)
{
u32 tcp_payload_len0 = 0;
if (gho0->gho_flags & GHO_F_IP4)
{
tcp_payload_len0 = clib_net_to_host_u16 (ip4_0->length) -
ip4_header_bytes (ip4_0) - gho0->l4_hdr_sz;
}
else
{
tcp_payload_len0 =
clib_net_to_host_u16 (ip6_0->payload_length) - gho0->l4_hdr_sz;
}
ASSERT (l234_sz0 + tcp_payload_len0 <= pkt_len0);
if (PREDICT_FALSE (l234_sz0 + tcp_payload_len0 < pkt_len0))
{
/* small packet with padding at the end, remove padding */
b0->current_length = l234_sz0 + tcp_payload_len0;
pkt_len0 = b0->current_length;
}
return pkt_len0;
}
static_always_inline u32
gro_get_packet_data (vlib_main_t *vm, vlib_buffer_t *b0,
generic_header_offset_t *gho0, gro_flow_key_t *flow_key0,
@@ -222,6 +254,11 @@ gro_get_packet_data (vlib_main_t *vm, vlib_buffer_t *b0,
if (PREDICT_FALSE (pkt_len0 >= TCP_MAX_GSO_SZ))
return 0;
if (PREDICT_FALSE (pkt_len0 <= GRO_PADDED_PACKET_SIZE))
{
pkt_len0 =
gro_fix_padded_packet_len (b0, gho0, ip4_0, ip6_0, pkt_len0, l234_sz0);
}
return pkt_len0;
}
@@ -264,8 +301,8 @@ gro_coalesce_buffers (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1,
pkt_len0 = vlib_buffer_length_in_chain (vm, b0);
pkt_len1 = vlib_buffer_length_in_chain (vm, b1);
if (((gho0.gho_flags & GHO_F_TCP) == 0)
|| ((gho1.gho_flags & GHO_F_TCP) == 0))
if (((gho0.gho_flags & GHO_F_TCP) == 0 || pkt_len0 <= GRO_MIN_PACKET_SIZE) ||
((gho1.gho_flags & GHO_F_TCP) == 0 || pkt_len1 <= GRO_MIN_PACKET_SIZE))
return 0;
ip4_0 =
@@ -483,7 +520,8 @@ vnet_gro_flow_table_inline (vlib_main_t * vm, gro_flow_table_t * flow_table,
}
tcp0 = (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
if (PREDICT_TRUE ((tcp0->flags & TCP_FLAG_PSH) == 0))
if (PREDICT_TRUE (((tcp0->flags & TCP_FLAG_PSH) == 0) &&
(pkt_len0 > GRO_MIN_PACKET_SIZE)))
gro_flow = gro_flow_table_find_or_add_flow (flow_table, &flow_key0);
else
{

View File

@@ -138,5 +138,107 @@ class TestGRO(VppTestCase):
self.assertEqual(rx[TCP].sport, 1234)
self.assertEqual(rx[TCP].dport, 4321)
#
# Same test with IPv6
#
p = []
s = 0
for n in range(0, 88):
p.append((Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /
IPv6(src=self.pg0.remote_ip6, dst=self.pg2.remote_ip6) /
TCP(sport=1234, dport=4321, seq=s, ack=n, flags='A') /
Raw(b'\xa5' * 1460)))
s += 1460
p[-1][TCP].flags = 'AP' # push to flush second packet
rxs = self.send_and_expect(self.pg0, p, self.pg2, n_rx=2)
i = 0
for rx in rxs:
i += 1
self.assertEqual(rx[Ether].src, self.pg2.local_mac)
self.assertEqual(rx[Ether].dst, self.pg2.remote_mac)
self.assertEqual(rx[IPv6].src, self.pg0.remote_ip6)
self.assertEqual(rx[IPv6].dst, self.pg2.remote_ip6)
self.assertEqual(rx[IPv6].plen, 64260) # 1460 * 44 + 20 < 65536
self.assertEqual(rx[TCP].sport, 1234)
self.assertEqual(rx[TCP].dport, 4321)
self.assertEqual(rx[TCP].ack, (44*i - 1))
#
# Send a series of 1500 bytes packets each followed by a packet with a
# PSH flag. Verify that GRO stops everytime a PSH flag is encountered
#
p = []
s = 0
for n in range(0, n_packets):
p.append((Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /
IP(src=self.pg0.remote_ip4, dst=self.pg2.remote_ip4,
flags='DF') /
TCP(sport=1234, dport=4321, seq=s, ack=2*n, flags='A') /
Raw(b'\xa5' * 1460)))
s += 1460
p.append((Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /
IP(src=self.pg0.remote_ip4, dst=self.pg2.remote_ip4,
flags='DF') /
TCP(sport=1234, dport=4321, seq=s, ack=2*n+1,
flags='AP') /
Raw(b'\xa5' * 1340)))
s += 1340
rxs = self.send_and_expect(self.pg0, p, self.pg2, n_rx=n_packets)
i = 0
for rx in rxs:
self.assertEqual(rx[Ether].src, self.pg2.local_mac)
self.assertEqual(rx[Ether].dst, self.pg2.remote_mac)
self.assertEqual(rx[IP].src, self.pg0.remote_ip4)
self.assertEqual(rx[IP].dst, self.pg2.remote_ip4)
self.assertEqual(rx[IP].len, 40 + 1460 + 1340)
self.assertEqual(rx[TCP].sport, 1234)
self.assertEqual(rx[TCP].dport, 4321)
self.assertEqual(rx[TCP].ack, (2*i + 1))
i += 1
#
# Send a series of 1500 bytes packets each followed by a short packet
# with padding. Verify that GRO removes the padding and stops on short
# packets
#
p = []
s = 0
for n in range(0, n_packets):
i = self.pg0
p.append((Ether(src=i.remote_mac, dst=i.local_mac) /
IP(src=i.remote_ip4, dst=self.pg2.remote_ip4,
flags='DF') /
TCP(sport=1234, dport=4321, seq=s, ack=2*n, flags='A') /
Raw(b'\xa5' * 1459)))
s += 1459
p2 = (Ether(src=i.remote_mac, dst=i.local_mac) /
IP(src=i.remote_ip4, dst=self.pg2.remote_ip4,
flags='DF', len=41) /
TCP(sport=1234, dport=4321, seq=s, ack=2*n+1, flags='A') /
Raw(b'\xa5'))
# first compute csum of pkt w/o padding to work around scapy bug
p2 = Ether(bytes(p2))
p.append(p2 / Raw(b'\xa5' * 5)) # 1 byte data + 5 bytes padding
s += 1
rxs = self.send_and_expect(self.pg0, p, self.pg2, n_rx=n_packets)
i = 0
for rx in rxs:
self.assertEqual(rx[Ether].src, self.pg2.local_mac)
self.assertEqual(rx[Ether].dst, self.pg2.remote_mac)
self.assertEqual(rx[IP].src, self.pg0.remote_ip4)
self.assertEqual(rx[IP].dst, self.pg2.remote_ip4)
self.assertEqual(rx[IP].len, 40 + 1459 + 1)
self.assertEqual(rx[TCP].sport, 1234)
self.assertEqual(rx[TCP].dport, 4321)
self.assertEqual(rx[TCP].ack, (2*i + 1))
i += 1
if __name__ == '__main__':
unittest.main(testRunner=VppTestRunner)