diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 6d5f07bda2c..3444eb88b7c 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -39,6 +39,7 @@ #include #include +#include #include /* for ethernet_header_t */ #include /* for ethernet_arp_header_t */ #include @@ -2013,6 +2014,8 @@ typedef enum { IP4_REWRITE_NEXT_DROP, IP4_REWRITE_NEXT_ICMP_ERROR, + IP4_REWRITE_NEXT_FRAGMENT, + IP4_REWRITE_N_NEXT /* Last */ } ip4_rewrite_next_t; /** @@ -2042,8 +2045,10 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, } else { - /* Add support for fragmentation here */ - *next = IP4_REWRITE_NEXT_DROP; + /* IP fragmentation */ + ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes, + IP4_FRAG_NEXT_IP4_LOOKUP, 0); + *next = IP4_REWRITE_NEXT_FRAGMENT; } } } @@ -2539,10 +2544,11 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .format_trace = format_ip4_rewrite_trace, - .n_next_nodes = 2, + .n_next_nodes = IP4_REWRITE_N_NEXT, .next_nodes = { [IP4_REWRITE_NEXT_DROP] = "ip4-drop", [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag", }, }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite) diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index a66070302c0..f7946b0b3af 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -39,6 +39,7 @@ #include #include +#include #include #include /* for ethernet_header_t */ #include /* for srp_hw_interface_class */ @@ -1542,6 +1543,8 @@ typedef enum { IP6_REWRITE_NEXT_DROP, IP6_REWRITE_NEXT_ICMP_ERROR, + IP6_REWRITE_NEXT_FRAGMENT, + IP6_REWRITE_N_NEXT /* Last */ } ip6_rewrite_next_t; /** @@ -1552,14 +1555,25 @@ typedef enum always_inline void ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes, - u16 adj_packet_bytes, u32 * next, u32 * error) + u16 adj_packet_bytes, bool is_locally_generated, + u32 * next, u32 * error) { if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes) { - *error = IP6_ERROR_MTU_EXCEEDED; - icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0, - adj_packet_bytes); - *next = IP6_REWRITE_NEXT_ICMP_ERROR; + if (is_locally_generated) + { + /* IP fragmentation */ + ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes, + IP6_FRAG_NEXT_IP6_LOOKUP, 0); + *next = IP6_REWRITE_NEXT_FRAGMENT; + } + else + { + *error = IP6_ERROR_MTU_EXCEEDED; + icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0, + adj_packet_bytes); + *next = IP6_REWRITE_NEXT_ICMP_ERROR; + } } } @@ -1591,6 +1605,7 @@ ip6_rewrite_inline (vlib_main_t * vm, u32 pi0, rw_len0, next0, error0, adj_index0; u32 pi1, rw_len1, next1, error1, adj_index1; u32 tx_sw_if_index0, tx_sw_if_index1; + bool is_locally_originated0, is_locally_originated1; /* Prefetch next iteration. */ { @@ -1629,7 +1644,9 @@ ip6_rewrite_inline (vlib_main_t * vm, error0 = error1 = IP6_ERROR_NONE; next0 = next1 = IP6_REWRITE_NEXT_DROP; - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + is_locally_originated0 = + p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + if (PREDICT_TRUE (!is_locally_originated0)) { i32 hop_limit0 = ip0->hop_limit; @@ -1658,7 +1675,9 @@ ip6_rewrite_inline (vlib_main_t * vm, { p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } - if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + is_locally_originated1 = + p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + if (PREDICT_TRUE (!is_locally_originated1)) { i32 hop_limit1 = ip1->hop_limit; @@ -1711,11 +1730,11 @@ ip6_rewrite_inline (vlib_main_t * vm, ip6_mtu_check (p0, clib_net_to_host_u16 (ip0->payload_length) + sizeof (ip6_header_t), adj0[0].rewrite_header.max_l3_packet_bytes, - &next0, &error0); + is_locally_originated0, &next0, &error0); ip6_mtu_check (p1, clib_net_to_host_u16 (ip1->payload_length) + sizeof (ip6_header_t), adj1[0].rewrite_header.max_l3_packet_bytes, - &next1, &error1); + is_locally_originated1, &next1, &error1); /* Don't adjust the buffer for hop count issue; icmp-error node * wants to see the IP headerr */ @@ -1789,6 +1808,7 @@ ip6_rewrite_inline (vlib_main_t * vm, u32 pi0, rw_len0; u32 adj_index0, next0, error0; u32 tx_sw_if_index0; + bool is_locally_originated0; pi0 = to_next[0] = from[0]; @@ -1804,7 +1824,9 @@ ip6_rewrite_inline (vlib_main_t * vm, next0 = IP6_REWRITE_NEXT_DROP; /* Check hop limit */ - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + is_locally_originated0 = + p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + if (PREDICT_TRUE (!is_locally_originated0)) { i32 hop_limit0 = ip0->hop_limit; @@ -1852,7 +1874,7 @@ ip6_rewrite_inline (vlib_main_t * vm, ip6_mtu_check (p0, clib_net_to_host_u16 (ip0->payload_length) + sizeof (ip6_header_t), adj0[0].rewrite_header.max_l3_packet_bytes, - &next0, &error0); + is_locally_originated0, &next0, &error0); /* Don't adjust the buffer for hop count issue; icmp-error node * wants to see the IP header */ @@ -1968,11 +1990,12 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = .name = "ip6-rewrite", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, - .n_next_nodes = 2, + .n_next_nodes = IP6_REWRITE_N_NEXT, .next_nodes = { [IP6_REWRITE_NEXT_DROP] = "ip6-drop", [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag", }, }; /* *INDENT-ON* */ diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index fba25fffff7..1207ec54b49 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -85,6 +85,12 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, return; } + if (p->flags & VLIB_BUFFER_NEXT_PRESENT) + { + *error = IP_FRAG_ERROR_MALFORMED; + return; + } + if (ip4_is_fragment (ip4)) { ip_frag_id = ip4->fragment_id; @@ -342,6 +348,12 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, return; } + if (p->flags & VLIB_BUFFER_NEXT_PRESENT) + { + *error = IP_FRAG_ERROR_MALFORMED; + return; + } + u8 has_more; u16 initial_offset; if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION) diff --git a/test/test_mtu.py b/test/test_mtu.py index 1327c4bfd8e..d6be2e2afb5 100644 --- a/test/test_mtu.py +++ b/test/test_mtu.py @@ -119,7 +119,6 @@ class TestMTU(VppTestCase): # n.show2() self.validate_bytes(str(p[1]), icmp4_reply) - ''' # Now with DF off. Expect fragments. # First go with 1500 byte packets. p_payload = UDP(sport=1234, dport=1234) / self.payload( @@ -127,19 +126,18 @@ class TestMTU(VppTestCase): p4 = p_ether / p_ip4 / p_payload p4.flags = 0 p4_reply = p_ip4 / p_payload - p4_reply.ttl = 62 # check this + p4_reply.ttl = 62 # check this p4_reply.flags = 0 p4_reply.id = 256 self.pg_enable_capture() self.pg0.add_stream(p4*1) self.pg_start() rx = self.pg1.get_capture(3) - print('RX', len(rx)) reass_pkt = reassemble(rx) self.validate(reass_pkt, p4_reply) + ''' # Now what happens with a 9K frame - ''' p_payload = UDP(sport=1234, dport=1234) / self.payload( current_mtu - 20 - 8) p4 = p_ether / p_ip4 / p_payload @@ -158,6 +156,7 @@ class TestMTU(VppTestCase): p4_reply.show2() self.validate(reass_pkt, p4_reply) ''' + # Reset MTU self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [current_mtu, 0, 0, 0])