diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c index ed9e5ac2258..97f1387ee94 100644 --- a/src/vnet/l2/l2_flood.c +++ b/src/vnet/l2/l2_flood.c @@ -58,6 +58,10 @@ typedef struct /* convenience variables */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; + + /* per-cpu vector of cloned packets */ + u32 **clones; + l2_flood_member_t ***members; } l2flood_main_t; typedef struct @@ -131,158 +135,6 @@ typedef enum * could be turned into an ICMP reply. If BVI processing is not performed * last, the modified packet would be replicated to the remaining members. */ - -static_always_inline void -l2flood_process (vlib_main_t * vm, - vlib_node_runtime_t * node, - l2flood_main_t * msm, - u64 * counter_base, - vlib_buffer_t * b0, - u32 * sw_if_index0, - l2fib_entry_key_t * key0, - u32 * bucket0, l2fib_entry_result_t * result0, u32 * next0) -{ - u16 bd_index0; - l2_bridge_domain_t *bd_config; - l2_flood_member_t *members; - i32 current_member; /* signed */ - replication_context_t *ctx; - u8 in_shg = vnet_buffer (b0)->l2.shg; - - if (!replication_is_recycled (b0)) - { - - /* Do flood "prep node" processing */ - - /* Get config for the bridge domain interface */ - bd_index0 = vnet_buffer (b0)->l2.bd_index; - bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index0); - members = bd_config->members; - - /* Find first member that passes the reflection and SHG checks */ - current_member = bd_config->flood_count - 1; - while ((current_member >= 0) && - ((members[current_member].sw_if_index == *sw_if_index0) || - (in_shg && members[current_member].shg == in_shg))) - { - current_member--; - } - - if (current_member < 0) - { - /* No members to flood to */ - *next0 = L2FLOOD_NEXT_DROP; - b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; - return; - } - - if ((current_member > 0) && - ((current_member > 1) || - ((members[0].sw_if_index != *sw_if_index0) && - (!in_shg || members[0].shg != in_shg)))) - { - /* If more than one member then initiate replication */ - ctx = - replication_prep (vm, b0, l2flood_node.index, 1 /* l2_packet */ ); - ctx->feature_replicas = (uword) members; - ctx->feature_counter = current_member; - } - - } - else - { - vnet_buffer_opaque_t *vnet_buff_op; - - /* Do flood "recycle node" processing */ - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL)) - { - (void) replication_recycle (vm, b0, 1 /* is_last */ ); - *next0 = L2FLOOD_NEXT_DROP; - b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL]; - return; - } - - ctx = replication_get_ctx (b0); - replication_clear_recycled (b0); - - members = (l2_flood_member_t *) (intptr_t) ctx->feature_replicas; - current_member = (i32) ctx->feature_counter - 1; - - /* Need to update input index from saved packet context */ - vnet_buff_op = (vnet_buffer_opaque_t *) ctx->vnet_buffer; - *sw_if_index0 = vnet_buff_op->sw_if_index[VLIB_RX]; - - /* Find next member that passes the reflection and SHG check */ - while ((current_member >= 0) && - ((members[current_member].sw_if_index == *sw_if_index0) || - (in_shg && members[current_member].shg == in_shg))) - { - current_member--; - } - - if (current_member < 0) - { - /* - * No more members to flood to. - * Terminate replication and drop packet. - */ - - replication_recycle (vm, b0, 1 /* is_last */ ); - - *next0 = L2FLOOD_NEXT_DROP; - /* Ideally we woudn't bump a counter here, just silently complete */ - b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; - return; - } - - /* Restore packet and context and continue replication */ - ctx->feature_counter = current_member; - replication_recycle (vm, b0, ((current_member == 0) || /*is_last */ - ((current_member == 1) && - ((members[0].sw_if_index == - *sw_if_index0) || (in_shg - && members[0].shg == - in_shg))))); - } - - /* Forward packet to the current member */ - if (PREDICT_FALSE (members[current_member].flags & L2_FLOOD_MEMBER_BVI)) - { - /* Do BVI processing */ - u32 rc; - rc = l2_to_bvi (vm, - msm->vnet_main, - b0, - members[current_member].sw_if_index, - &msm->l3_next, next0); - - if (PREDICT_FALSE (rc)) - { - if (rc == TO_BVI_ERR_BAD_MAC) - { - b0->error = node->errors[L2FLOOD_ERROR_BVI_BAD_MAC]; - *next0 = L2FLOOD_NEXT_DROP; - } - else if (rc == TO_BVI_ERR_ETHERTYPE) - { - b0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE]; - *next0 = L2FLOOD_NEXT_DROP; - } - } - } - else - { - /* Do normal L2 forwarding */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - members[current_member].sw_if_index; - *next0 = L2FLOOD_NEXT_L2_OUTPUT; - - } - -} - - static uword l2flood_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -290,161 +142,214 @@ l2flood_node_fn (vlib_main_t * vm, u32 n_left_from, *from, *to_next; l2flood_next_t next_index; l2flood_main_t *msm = &l2flood_main; - vlib_node_t *n = vlib_get_node (vm, l2flood_node.index); - u32 node_counter_base_index = n->error_heap_index; - vlib_error_main_t *em = &vm->error_main; + u32 thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; /* number of packets to process */ + n_left_from = frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; - /* get space to enqueue frame to graph node "next_index" */ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 6 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - u32 next0, next1; - u32 sw_if_index0, sw_if_index1; - l2fib_entry_key_t key0, key1; - l2fib_entry_result_t result0, result1; - u32 bucket0, bucket1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3, *p4, *p5; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - - /* Prefetch the buffer header for the N+2 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - - /* Prefetch the replication context for the N+1 loop iteration */ - /* This depends on the buffer header above */ - replication_prefetch_ctx (p2); - replication_prefetch_ctx (p3); - - /* Prefetch the packet for the N+1 loop iteration */ - CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - /* speculatively enqueue b0 and b1 to the current next frame */ - /* bi is "buffer index", b is pointer to the buffer */ - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* RX interface handles */ - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; - - /* process 2 pkts */ - em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 2; - - l2flood_process (vm, node, msm, - &em->counters[node_counter_base_index], b0, - &sw_if_index0, &key0, &bucket0, &result0, &next0); - - l2flood_process (vm, node, msm, - &em->counters[node_counter_base_index], b1, - &sw_if_index1, &key1, &bucket1, &result1, &next1); - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - l2flood_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - ethernet_header_t *h0 = vlib_buffer_get_current (b0); - t->sw_if_index = sw_if_index0; - t->bd_index = vnet_buffer (b0)->l2.bd_index; - clib_memcpy (t->src, h0->src_address, 6); - clib_memcpy (t->dst, h0->dst_address, 6); - } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - l2flood_trace_t *t = - vlib_add_trace (vm, node, b1, sizeof (*t)); - ethernet_header_t *h1 = vlib_buffer_get_current (b1); - t->sw_if_index = sw_if_index1; - t->bd_index = vnet_buffer (b1)->l2.bd_index; - clib_memcpy (t->src, h1->src_address, 6); - clib_memcpy (t->dst, h1->dst_address, 6); - } - } - - /* verify speculative enqueues, maybe switch current next frame */ - /* if next0==next1==next_index then nothing special needs to be done */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - while (n_left_from > 0 && n_left_to_next > 0) { - u32 bi0; - vlib_buffer_t *b0; - u32 next0; - u32 sw_if_index0; - l2fib_entry_key_t key0; - l2fib_entry_result_t result0; - u32 bucket0; + u32 next0, sw_if_index0, bi0, ci0; + u16 n_clones, n_cloned, clone0; + l2_bridge_domain_t *bd_config; + l2_flood_member_t *member; + vlib_buffer_t *b0, *c0; + u8 in_shg; + i32 mi; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; - to_next[0] = bi0; from += 1; - to_next += 1; n_left_from -= 1; - n_left_to_next -= 1; + next0 = L2FLOOD_NEXT_DROP; b0 = vlib_get_buffer (vm, bi0); + /* Get config for the bridge domain interface */ + bd_config = vec_elt_at_index (l2input_main.bd_configs, + vnet_buffer (b0)->l2.bd_index); + in_shg = vnet_buffer (b0)->l2.shg; sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - /* process 1 pkt */ - em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 1; + vec_validate (msm->members[thread_index], + vec_len (bd_config->members)); - l2flood_process (vm, node, msm, - &em->counters[node_counter_base_index], b0, - &sw_if_index0, &key0, &bucket0, &result0, &next0); + vec_reset_length (msm->members[thread_index]); + + /* Find first members that passes the reflection and SHG checks */ + for (mi = bd_config->flood_count - 1; mi >= 0; mi--) + { + member = &bd_config->members[mi]; + if ((member->sw_if_index != sw_if_index0) && + (!in_shg || (member->shg != in_shg))) + { + vec_add1 (msm->members[thread_index], member); + } + } + + n_clones = vec_len (msm->members[thread_index]); + + if (0 == n_clones) + { + /* No members to flood to */ + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + + b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, L2FLOOD_NEXT_DROP); + continue; + } + + vec_validate (msm->clones[thread_index], n_clones); + vec_reset_length (msm->clones[thread_index]); + + /* + * the header offset needs to be large enoguh to incorporate + * all the L3 headers that could be touched when doing BVI + * processing. So take the current l2 length plus 2 * IPv6 + * headers (for tunnel encap) + */ + n_cloned = vlib_buffer_clone (vm, bi0, + msm->clones[thread_index], + n_clones, + (vnet_buffer (b0)->l2.l2_len + + sizeof (udp_header_t) + + 2 * sizeof (ip6_header_t))); + + if (PREDICT_FALSE (n_cloned != n_clones)) + { + b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL]; + } + + /* + * for all but the last clone, these are not BVI bound + */ + for (clone0 = 0; clone0 < n_cloned - 1; clone0++) + { + member = msm->members[thread_index][clone0]; + ci0 = msm->clones[thread_index][clone0]; + c0 = vlib_get_buffer (vm, ci0); + + to_next[0] = ci0; + to_next += 1; + n_left_to_next -= 1; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ethernet_header_t *h0; + l2flood_trace_t *t; + + if (c0 != b0) + vlib_buffer_copy_trace_flag (vm, b0, ci0); + + t = vlib_add_trace (vm, node, c0, sizeof (*t)); + h0 = vlib_buffer_get_current (c0); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer (c0)->l2.bd_index; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + + /* Do normal L2 forwarding */ + vnet_buffer (c0)->sw_if_index[VLIB_TX] = member->sw_if_index; + next0 = L2FLOOD_NEXT_L2_OUTPUT; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + ci0, next0); + if (PREDICT_FALSE (0 == n_left_to_next)) + { + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } + } + + /* + * the last clone that might go to a BVI + */ + member = msm->members[thread_index][clone0]; + ci0 = msm->clones[thread_index][clone0]; + c0 = vlib_get_buffer (vm, ci0); + + to_next[0] = ci0; + to_next += 1; + n_left_to_next -= 1; if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { - l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - ethernet_header_t *h0 = vlib_buffer_get_current (b0); + ethernet_header_t *h0; + l2flood_trace_t *t; + + if (c0 != b0) + vlib_buffer_copy_trace_flag (vm, b0, ci0); + + t = vlib_add_trace (vm, node, c0, sizeof (*t)); + h0 = vlib_buffer_get_current (c0); t->sw_if_index = sw_if_index0; - t->bd_index = vnet_buffer (b0)->l2.bd_index; + t->bd_index = vnet_buffer (c0)->l2.bd_index; clib_memcpy (t->src, h0->src_address, 6); clib_memcpy (t->dst, h0->dst_address, 6); } - /* verify speculative enqueue, maybe switch current next frame */ + + /* Forward packet to the current member */ + if (PREDICT_FALSE (member->flags & L2_FLOOD_MEMBER_BVI)) + { + /* Do BVI processing */ + u32 rc; + rc = l2_to_bvi (vm, + msm->vnet_main, + c0, member->sw_if_index, &msm->l3_next, &next0); + + if (PREDICT_FALSE (rc)) + { + if (rc == TO_BVI_ERR_BAD_MAC) + { + c0->error = node->errors[L2FLOOD_ERROR_BVI_BAD_MAC]; + } + else if (rc == TO_BVI_ERR_ETHERTYPE) + { + c0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE]; + } + } + } + else + { + /* Do normal L2 forwarding */ + vnet_buffer (c0)->sw_if_index[VLIB_TX] = member->sw_if_index; + next0 = L2FLOOD_NEXT_L2_OUTPUT; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, - bi0, next0); + ci0, next0); + if (PREDICT_FALSE (0 == n_left_to_next)) + { + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } + vlib_node_increment_counter (vm, node->node_index, + L2FLOOD_ERROR_L2FLOOD, frame->n_vectors); + return frame->n_vectors; } @@ -478,6 +383,9 @@ VLIB_NODE_FUNCTION_MULTIARCH (l2flood_node, l2flood_node_fn) mp->vlib_main = vm; mp->vnet_main = vnet_get_main (); + vec_validate (mp->clones, vlib_num_workers ()); + vec_validate (mp->members, vlib_num_workers ()); + /* Initialize the feature next-node indexes */ feat_bitmap_init_next_nodes (vm, l2flood_node.index, @@ -485,7 +393,7 @@ VLIB_NODE_FUNCTION_MULTIARCH (l2flood_node, l2flood_node_fn) l2input_get_feat_names (), mp->feat_next_node_index); - return 0; + return NULL; } VLIB_INIT_FUNCTION (l2flood_init); diff --git a/test/test_l2_flood.py b/test/test_l2_flood.py new file mode 100644 index 00000000000..36ec309ad97 --- /dev/null +++ b/test/test_l2_flood.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python + +import unittest +import socket + +from framework import VppTestCase, VppTestRunner +from vpp_ip_route import VppIpRoute, VppRoutePath + +from scapy.packet import Raw +from scapy.layers.l2 import Ether +from scapy.layers.inet import IP, UDP + + +class TestL2Flood(VppTestCase): + """ L2-flood """ + + def setUp(self): + super(TestL2Flood, self).setUp() + + # 12 l2 interface and one l3 + self.create_pg_interfaces(range(13)) + self.create_loopback_interfaces(1) + + for i in self.pg_interfaces: + i.admin_up() + for i in self.lo_interfaces: + i.admin_up() + + self.pg12.config_ip4() + self.pg12.resolve_arp() + self.loop0.config_ip4() + + def tearDown(self): + self.pg12.unconfig_ip4() + self.loop0.unconfig_ip4() + + for i in self.pg_interfaces: + i.admin_down() + for i in self.lo_interfaces: + i.admin_down() + super(TestL2Flood, self).tearDown() + + def test_flood(self): + """ L2 Flood Tests """ + + # + # Create a single bridge Domain + # + self.vapi.bridge_domain_add_del(1) + + # + # add each interface to the BD. 3 interfaces per split horizon group + # + for i in self.pg_interfaces[0:4]: + self.vapi.sw_interface_set_l2_bridge(i.sw_if_index, 1, 0) + for i in self.pg_interfaces[4:8]: + self.vapi.sw_interface_set_l2_bridge(i.sw_if_index, 1, 1) + for i in self.pg_interfaces[8:12]: + self.vapi.sw_interface_set_l2_bridge(i.sw_if_index, 1, 2) + for i in self.lo_interfaces: + self.vapi.sw_interface_set_l2_bridge(i.sw_if_index, 1, 2, bvi=1) + + p = (Ether(dst="ff:ff:ff:ff:ff:ff", + src="00:00:de:ad:be:ef") / + IP(src="10.10.10.10", dst="1.1.1.1") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + # + # input on pg0 expect copies on pg1->11 + # this is in SHG=0 so its flooded to all, expect the pg0 since that's + # the ingress link + # + self.pg0.add_stream(p*65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + for i in self.pg_interfaces[1:12]: + rx0 = i.get_capture(65, timeout=1) + + self.logger.error(self.vapi.cli("sh trace")) + + # + # input on pg4 (SHG=1) expect copies on pg0->3 (SHG=0) + # and pg8->11 (SHG=2) + # + self.pg4.add_stream(p*65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + for i in self.pg_interfaces[:4]: + rx0 = i.get_capture(65, timeout=1) + for i in self.pg_interfaces[8:12]: + rx0 = i.get_capture(65, timeout=1) + for i in self.pg_interfaces[4:8]: + i.assert_nothing_captured(remark="Different SH group") + + # + # An IP route so the packet that hits the BVI is sent out of pg12 + # + ip_route = VppIpRoute(self, "1.1.1.1", 32, + [VppRoutePath(self.pg12.remote_ip4, + self.pg12.sw_if_index)]) + ip_route.add_vpp_config() + + self.logger.info(self.vapi.cli("sh bridge 1 detail")) + + # + # input on pg0 expect copies on pg1->12 + # this is in SHG=0 so its flooded to all, expect the pg0 since that's + # the ingress link + # + self.pg0.add_stream(p*65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + for i in self.pg_interfaces[1:]: + rx0 = i.get_capture(65, timeout=1) + + # + # input on pg4 (SHG=1) expect copies on pg0->3 (SHG=0) + # and pg8->12 (SHG=2) + # + self.pg4.add_stream(p*65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + for i in self.pg_interfaces[:4]: + rx0 = i.get_capture(65, timeout=1) + for i in self.pg_interfaces[8:13]: + rx0 = i.get_capture(65, timeout=1) + for i in self.pg_interfaces[4:8]: + i.assert_nothing_captured(remark="Different SH group") + + # + # cleanup + # + for i in self.pg_interfaces[:12]: + self.vapi.sw_interface_set_l2_bridge(i.sw_if_index, 1, enable=0) + for i in self.lo_interfaces: + self.vapi.sw_interface_set_l2_bridge(i.sw_if_index, 1, 2, + bvi=1, enable=0) + + self.vapi.bridge_domain_add_del(1, is_add=0) + + +if __name__ == '__main__': + unittest.main(testRunner=VppTestRunner)