From d1f5d047988655a001655357f1ce35152161bedf Mon Sep 17 00:00:00 2001 From: John Lo Date: Tue, 12 Apr 2016 18:20:39 -0400 Subject: [PATCH] Change ARP and IP6-ND nodes to use interface-output node for output The current mechanism for setting up arp-input and ip6-discover-neighbor output nodes for interfaces using their interface link up/down callback function is inefficient and has potential timing issue, as observed for bonded interface. Now both nodes will setup output interface sw_if_index in the the sw_if_index[VLIB_TX] field of current packet buffer and then use the interface-ouput node to tx the packet. One side effect is that vlib_node_add_next_with_slot() needs to be modified to allow the same output node-id to be put at the specified slot, even if another slot contain that same node-id already exist. This requirement is caused by BVI support where all loopback interfaces set up as BVIs will have the same output node-id being l2-input while, for output-interface node, the output slot must match the hw_if_index of the interface. Change-Id: I18bd1d4fe9bea047018796f7b8a4d4c20ee31d6e Signed-off-by: John Lo --- vlib/vlib/node.c | 9 +++---- vnet/vnet/ethernet/arp.c | 52 +++++++++--------------------------- vnet/vnet/interface_output.c | 5 ---- vnet/vnet/ip/ip6.h | 2 -- vnet/vnet/ip/ip6_forward.c | 30 +++------------------ vnet/vnet/l2/l2_input.c | 24 +++++++---------- 6 files changed, 28 insertions(+), 94 deletions(-) diff --git a/vlib/vlib/node.c b/vlib/vlib/node.c index 40ef7c71a51..573b3a79567 100644 --- a/vlib/vlib/node.c +++ b/vlib/vlib/node.c @@ -177,10 +177,9 @@ vlib_node_add_next_with_slot (vlib_main_t * vm, if ((p = hash_get (node->next_slot_by_node, next_node_index))) { - /* Next already exists: slot must match. */ - if (slot != ~0) - ASSERT (slot == p[0]); - return p[0]; + /* Next already exists: use it if slot not specified or the same. */ + if ((slot == ~0) || (slot == p[0])) + return p[0]; } if (slot == ~0) @@ -190,7 +189,7 @@ vlib_node_add_next_with_slot (vlib_main_t * vm, vec_validate (node->n_vectors_by_next_node, slot); node->next_nodes[slot] = next_node_index; - hash_set (node->next_slot_by_node, next_node_index, slot); + if (!p) hash_set (node->next_slot_by_node, next_node_index, slot); vlib_node_runtime_update (vm, node_index, slot); diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c index 205023a6ffb..a5b015c715a 100644 --- a/vnet/vnet/ethernet/arp.c +++ b/vnet/vnet/ethernet/arp.c @@ -70,8 +70,6 @@ typedef struct { uword * mac_changes_by_address; pending_resolution_t * mac_changes; - u32 * arp_input_next_index_by_hw_if_index; - ethernet_arp_ip4_entry_t * ip4_entry_pool; mhash_t ip4_entry_by_key; @@ -627,6 +625,7 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, /* Either we drop the packet or we send a reply to the sender. */ typedef enum { ARP_INPUT_NEXT_DROP, + ARP_INPUT_NEXT_REPLY_TX, ARP_INPUT_N_NEXT, } arp_input_next_t; @@ -704,12 +703,11 @@ static void unset_random_arp_entry (void) e->key.fib_index, &delme); } -static u32 arp_unnumbered (vlib_buffer_t * p0, - u32 pi0, - ethernet_header_t * eth0, - ip_interface_address_t * ifa0) +static void arp_unnumbered (vlib_buffer_t * p0, + u32 pi0, + ethernet_header_t * eth0, + ip_interface_address_t * ifa0) { - ethernet_arp_main_t * am = ðernet_arp_main; vlib_main_t * vm = vlib_get_main(); vnet_main_t * vnm = vnet_get_main(); vnet_interface_main_t * vim = &vnm->interface_main; @@ -833,13 +831,11 @@ static u32 arp_unnumbered (vlib_buffer_t * p0, } } - hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[0]); + /* The regular path outputs the original pkt.. */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0]; vec_free (broadcast_swifs); vec_free (buffers); - - /* The regular path outputs the original pkt.. */ - return vec_elt (am->arp_input_next_index_by_hw_if_index, hi->hw_if_index); } static uword @@ -982,14 +978,9 @@ arp_input (vlib_main_t * vm, vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); - /* Can happen in a multi-core env. */ - if (PREDICT_FALSE(hw_if0->hw_if_index >= vec_len (am->arp_input_next_index_by_hw_if_index))) - { - error0 = ETHERNET_ARP_ERROR_missing_interface_address; - goto drop2; - } - - next0 = vec_elt (am->arp_input_next_index_by_hw_if_index, hw_if0->hw_if_index); + /* Send reply back through input interface */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + next0 = ARP_INPUT_NEXT_REPLY_TX; arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply); @@ -1015,7 +1006,7 @@ arp_input (vlib_main_t * vm, goto drop2; } if (is_unnum0) - next0 = arp_unnumbered (p0, pi0, eth0, ifa0); + arp_unnumbered (p0, pi0, eth0, ifa0); else vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes); } @@ -1117,32 +1108,13 @@ VLIB_REGISTER_NODE (arp_input_node,static) = { .n_next_nodes = ARP_INPUT_N_NEXT, .next_nodes = { [ARP_INPUT_NEXT_DROP] = "error-drop", + [ARP_INPUT_NEXT_REPLY_TX] = "interface-output", }, .format_buffer = format_ethernet_arp_header, .format_trace = format_ethernet_arp_input_trace, }; -clib_error_t * -ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm, - u32 hw_if_index, - u32 flags) -{ - ethernet_arp_main_t * am = ðernet_arp_main; - vnet_hw_interface_t * hw_if; - - hw_if = vnet_get_hw_interface (vnm, hw_if_index); - - /* Fill in lookup tables with default table (0). */ - vec_validate_init_empty (am->arp_input_next_index_by_hw_if_index, hw_if_index, ~0); - am->arp_input_next_index_by_hw_if_index[hw_if_index] - = vlib_node_add_next (vnm->vlib_main, arp_input_node.index, hw_if->output_node_index); - - return 0; -} - -VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (ethernet_arp_hw_interface_link_up_down); - static int ip4_arp_entry_sort (void *a1, void *a2) { diff --git a/vnet/vnet/interface_output.c b/vnet/vnet/interface_output.c index 84dc0392a51..033e9fc44b3 100644 --- a/vnet/vnet/interface_output.c +++ b/vnet/vnet/interface_output.c @@ -1147,11 +1147,6 @@ VLIB_REGISTER_NODE (punt_buffers,static) = { .validate_frame = validate_error_frame, }; -static clib_error_t * -vnet_per_buffer_interface_output_hw_interface_add_del (vnet_main_t * vnm, - u32 hw_if_index, - u32 is_create); - VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node,static) = { .function = vnet_per_buffer_interface_output, .name = "interface-output", diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h index b32b1b85796..ff65d3ae9fe 100644 --- a/vnet/vnet/ip/ip6.h +++ b/vnet/vnet/ip/ip6.h @@ -163,8 +163,6 @@ typedef struct ip6_main_t { /* Template used to generate IP6 neighbor solicitation packets. */ vlib_packet_template_t discover_neighbor_packet_template; - u32 * discover_neighbor_next_index_by_hw_if_index; - /* ip6 lookup table config parameters */ u32 lookup_table_nbuckets; uword lookup_table_size; diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index a79bae6905a..4a161e7271c 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -1768,6 +1768,7 @@ void ip6_register_protocol (u32 protocol, u32 node_index) typedef enum { IP6_DISCOVER_NEIGHBOR_NEXT_DROP, + IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX, IP6_DISCOVER_NEIGHBOR_N_NEXT, } ip6_discover_neighbor_next_t; @@ -1933,11 +1934,7 @@ ip6_discover_neighbor (vlib_main_t * vm, sizeof (ethernet_header_t)); vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); - /* $$$$ hack in case next0 == 0 */ - b0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_DROP]; - next0 = - vec_elt (im->discover_neighbor_next_index_by_hw_if_index, - hw_if0->hw_if_index); + next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX; vlib_set_next_frame_buffer (vm, node, next0, bi0); } @@ -1969,31 +1966,10 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = { .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, .next_nodes = { [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop", + [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output", }, }; -clib_error_t * -ip6_discover_neighbor_hw_interface_link_up_down (vnet_main_t * vnm, - u32 hw_if_index, - u32 flags) -{ - vlib_main_t * vm = vnm->vlib_main; - ip6_main_t * im = &ip6_main; - vnet_hw_interface_t * hw_if; - - hw_if = vnet_get_hw_interface (vnm, hw_if_index); - - vec_validate_init_empty - (im->discover_neighbor_next_index_by_hw_if_index, hw_if_index, 0); - im->discover_neighbor_next_index_by_hw_if_index[hw_if_index] - = vlib_node_add_next (vm, ip6_discover_neighbor_node.index, - hw_if->output_node_index); - return 0; -} - -VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION -(ip6_discover_neighbor_hw_interface_link_up_down); - clib_error_t * ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) { diff --git a/vnet/vnet/l2/l2_input.c b/vnet/vnet/l2/l2_input.c index a42fcae059b..3d3d51f54b4 100644 --- a/vnet/vnet/l2/l2_input.c +++ b/vnet/vnet/l2/l2_input.c @@ -35,14 +35,8 @@ #include extern clib_error_t * -ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm, - u32 hw_if_index, - u32 flags); - -extern clib_error_t * -ip6_discover_neighbor_hw_interface_link_up_down (vnet_main_t * vnm, - u32 hw_if_index, - u32 flags); +vnet_per_buffer_interface_output_hw_interface_add_del ( + vnet_main_t * vnm, u32 hw_if_index, u32 is_create); // Feature graph node names static char * l2input_feat_names[] = { @@ -567,9 +561,9 @@ u32 set_int_l2_mode (vlib_main_t * vm, mac = *((u64 *)hi->hw_address); l2fib_del_entry (mac, config->bd_index); - // Let ARP and NDP know that the output node index changed - ethernet_arp_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0); - ip6_discover_neighbor_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0); + // Let interface-output node know that the output node index changed + vnet_per_buffer_interface_output_hw_interface_add_del( + vnet_main, hi->hw_if_index, 0); } l2_if_adjust--; } else if (config->xconnect) { @@ -653,10 +647,10 @@ u32 set_int_l2_mode (vlib_main_t * vm, // Disable learning by default. no use since l2fib entry is static. config->feature_bitmap &= ~L2INPUT_FEAT_LEARN; - // Let ARP and NDP know that the output_index_node changed so they - // can send requests via BVI to BD - ethernet_arp_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0); - ip6_discover_neighbor_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0); + // Let interface-output node know that the output node index changed + // so output can be sent via BVI to BD + vnet_per_buffer_interface_output_hw_interface_add_del( + vnet_main, hi->hw_if_index, 0); } // Add interface to bridge-domain flood vector