From dc30c6d3d6fadea3fca54ebedc4ca066597c369c Mon Sep 17 00:00:00 2001 From: John Lo Date: Mon, 26 Jun 2017 01:40:20 -0400 Subject: [PATCH] Send GARP/NA on bonded intf slave up/down if in active-backup mode If a bonded interface is in active-backup mode and configured with IPv4 and/or IPv6 addresses, on slave interface link up/down, send a GARP packet if configured with an IPv4 address and an unsolcited NA if configured with an IPv6 address. These packets can help with faster route convergence in the next hop router/switch. Change-Id: I68ccb11a4a40cda414704fa08ee0171c952befa2 Signed-off-by: John Lo (cherry picked from commit 8b81cb43359380e50d3fc216d93ff05894149939) --- src/plugins/dpdk/device/common.c | 62 ++++++++++++++++++++++++ src/plugins/dpdk/device/dpdk.h | 8 +++ src/plugins/dpdk/device/init.c | 23 ++++++--- src/vnet/ethernet/arp.c | 83 ++++++++++++++++++++++++++++++++ src/vnet/ethernet/arp_packet.h | 9 ++++ src/vnet/ip/ip6.h | 2 + src/vnet/ip/ip6_neighbor.c | 53 ++++++++++++++++++++ 7 files changed, 232 insertions(+), 8 deletions(-) diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index 1a9688e75e5..df52c58fa18 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -12,13 +12,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include #include #include #include #include +#include #include +#include #include #include @@ -178,6 +181,65 @@ dpdk_device_stop (dpdk_device_t * xd) } } +void +dpdk_port_state_callback (uint8_t port_id, + enum rte_eth_event_type type, void *param) +{ + struct rte_eth_link link; + vlib_main_t *vm = vlib_get_main (); + dpdk_device_t *xd = &dpdk_main.devices[port_id]; + + RTE_SET_USED (param); + if (type != RTE_ETH_EVENT_INTR_LSC) + { + clib_warning ("Unknown event %d received for port %d", type, port_id); + return; + } + + rte_eth_link_get_nowait (port_id, &link); + u8 link_up = link.link_status; + + if (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE) + { + u8 bd_port = xd->bond_port; + int bd_mode = rte_eth_bond_mode_get (bd_port); + + if ((link_up && !(xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP)) || + (!link_up && (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP))) + { + clib_warning ("Port %d state to %s, " + "slave of port %d BondEthernet%d in mode %d", + port_id, (link_up) ? "UP" : "DOWN", + bd_port, xd->port_id, bd_mode); + if (bd_mode == BONDING_MODE_ACTIVE_BACKUP) + { + rte_eth_link_get_nowait (bd_port, &link); + if (link.link_status) /* bonded interface up */ + { + u32 hw_if_index = dpdk_main.devices[bd_port].hw_if_index; + vlib_process_signal_event + (vm, send_garp_na_process_node_index, SEND_GARP_NA, + hw_if_index); + } + } + } + if (link_up) /* Update slave link status */ + xd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE_UP; + else + xd->flags &= ~DPDK_DEVICE_FLAG_BOND_SLAVE_UP; + } + else /* Should not happen as callback not setup for "normal" links */ + { + if (link_up) + clib_warning ("Port %d Link Up - speed %u Mbps - %s", + port_id, (unsigned) link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + "full-duplex" : "half-duplex"); + else + clib_warning ("Port %d Link Down\n\n", port_id); + } +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index d82ba5ddaca..c6fd7388fdb 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -173,6 +173,8 @@ typedef struct #define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) #define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) #define DPDK_DEVICE_FLAG_HQOS (1 << 6) +#define DPDK_DEVICE_FLAG_BOND_SLAVE (1 << 7) +#define DPDK_DEVICE_FLAG_BOND_SLAVE_UP (1 << 8) u16 nb_tx_desc; CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); @@ -197,6 +199,10 @@ typedef struct /* af_packet or BondEthernet instance number */ u8 port_id; + /* Bonded interface port# of a slave - + only valid if DPDK_DEVICE_FLAG_BOND_SLAVE bit is set */ + u8 bond_port; + struct rte_eth_link link; f64 time_last_link_update; @@ -408,6 +414,8 @@ typedef struct void dpdk_device_setup (dpdk_device_t * xd); void dpdk_device_start (dpdk_device_t * xd); void dpdk_device_stop (dpdk_device_t * xd); +void dpdk_port_state_callback (uint8_t port_id, + enum rte_eth_event_type type, void *param); #define foreach_dpdk_error \ _(NONE, "no error") \ diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 90968075c0b..d9ab0756f2d 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -1373,8 +1373,10 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* * Extra set up for bond interfaces: * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_device_setup() but needs to be done again here to take effect. - * 2. Set up info for bond interface related CLI support. + * in dpdk_device_setup() but needs to be done again here to take + * effect. + * 2. Set up info and register slave link state change callback handling. + * 3. Set up info for bond interface related CLI support. */ int nports = rte_eth_dev_count (); if (nports > 0) @@ -1399,7 +1401,8 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) (slink[0], (struct ether_addr *) addr); /* Set MAC of bounded interface to that of 1st slave link */ - clib_warning ("Set MAC for bond dev# %d", i); + clib_warning ("Set MAC for bond port %d BondEthernet%d", + i, xd->port_id); rv = rte_eth_bond_mac_address_set (i, (struct ether_addr *) addr); if (rv) @@ -1428,34 +1431,38 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* Add MAC to all slave links except the first one */ if (nlink) { - clib_warning ("Add MAC for slave dev# %d", slave); + clib_warning ("Add MAC for slave port %d", slave); rv = rte_eth_dev_mac_addr_add (slave, (struct ether_addr *) addr, 0); if (rv) clib_warning ("Add MAC addr failure rv=%d", rv); } + /* Setup slave link state change callback handling */ + rte_eth_dev_callback_register + (slave, RTE_ETH_EVENT_INTR_LSC, + dpdk_port_state_callback, NULL); + dpdk_device_t *sxd = &dm->devices[slave]; + sxd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE; + sxd->bond_port = i; /* Set slaves bitmap for bonded interface */ bhi->bond_info = clib_bitmap_set (bhi->bond_info, sdev->hw_if_index, 1); - /* Set slave link flags on slave interface */ + /* Set MACs and slave link flags on slave interface */ shi = vnet_get_hw_interface (vnm, sdev->hw_if_index); ssi = vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); sei = pool_elt_at_index (em->interfaces, shi->hw_instance); - shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; clib_memcpy (shi->hw_address, addr, 6); clib_memcpy (sei->address, addr, 6); - /* Set l3 packet size allowed as the lowest of slave */ if (bhi->max_l3_packet_bytes[VLIB_RX] > shi->max_l3_packet_bytes[VLIB_RX]) bhi->max_l3_packet_bytes[VLIB_RX] = bhi->max_l3_packet_bytes[VLIB_TX] = shi->max_l3_packet_bytes[VLIB_RX]; - /* Set max packet size allowed as the lowest of slave */ if (bhi->max_packet_bytes > shi->max_packet_bytes) bhi->max_packet_bytes = shi->max_packet_bytes; diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index d5dc9cceb39..df68175055e 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -110,6 +110,9 @@ typedef struct static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 }; +/* Node index for send_garp_na_process */ +u32 send_garp_na_process_node_index; + static void set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t * a); @@ -2378,6 +2381,86 @@ ethernet_arp_change_mac (u32 sw_if_index) /* *INDENT-ON* */ } +void static +send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi) +{ + ip4_main_t *i4m = &ip4_main; + u32 sw_if_index = hi->sw_if_index; + ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0); + + if (ip4_addr) + { + clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d", + format_ip4_address, ip4_addr, sw_if_index); + + /* Form GARP packet for output - Gratuitous ARP is an ARP request packet + where the interface IP/MAC pair is used for both source and request + MAC/IP pairs in the request */ + u32 bi = 0; + ethernet_arp_header_t *h = vlib_packet_template_get_packet + (vm, &i4m->ip4_arp_request_packet_template, &bi); + clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, + sizeof (h->ip4_over_ethernet[0].ethernet)); + clib_memcpy (h->ip4_over_ethernet[1].ethernet, hi->hw_address, + sizeof (h->ip4_over_ethernet[1].ethernet)); + h->ip4_over_ethernet[0].ip4 = ip4_addr[0]; + h->ip4_over_ethernet[1].ip4 = ip4_addr[0]; + + /* Setup MAC header with ARP Etype and broadcast DMAC */ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_advance (b, -sizeof (ethernet_header_t)); + ethernet_header_t *e = vlib_buffer_get_current (b); + e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP); + clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address)); + memset (e->dst_address, 0xff, sizeof (e->dst_address)); + + /* Send GARP packet out the specified interface */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + +static vlib_node_registration_t send_garp_na_proc_node; + +static uword +send_garp_na_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vnet_main_t *vnm = vnet_get_main (); + uword event_type, *event_data = 0; + + send_garp_na_process_node_index = send_garp_na_proc_node.index; + + while (1) + { + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + if ((event_type == SEND_GARP_NA) && (vec_len (event_data) >= 1)) + { + u32 hw_if_index = event_data[0]; + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + send_ip4_garp (vm, hi); + send_ip6_na (vm, hi); + } + vec_reset_length (event_data); + } + return 0; +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (send_garp_na_proc_node, static) = { + .function = send_garp_na_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "send-garp-na-process", +}; +/* *INDENT-ON* */ + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h index 17e64f43049..d740b844e12 100644 --- a/src/vnet/ethernet/arp_packet.h +++ b/src/vnet/ethernet/arp_packet.h @@ -167,6 +167,15 @@ typedef struct ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index); u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va); +/* Node index for send_garp_na_process */ +extern u32 send_garp_na_process_node_index; + +/* Even type for send_garp_na_process */ +enum +{ + SEND_GARP_NA = 1, +} dpdk_send_garp_na_process_event_t; + #endif /* included_ethernet_arp_packet_h */ /* diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index d623c95f52f..cf52994e720 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -375,6 +375,8 @@ int vnet_ip6_nd_term (vlib_main_t * vm, ethernet_header_t * eth, ip6_header_t * ip, u32 sw_if_index, u16 bd_index); +void send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi); + u8 *format_ip6_forward_next_trace (u8 * s, va_list * args); u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0); diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index ba7ea143318..b8f6f9b10e7 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -4192,6 +4192,59 @@ ethernet_ndp_change_mac (u32 sw_if_index) /* *INDENT-ON* */ } +void +send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi) +{ + ip6_main_t *i6m = &ip6_main; + u32 sw_if_index = hi->sw_if_index; + ip6_address_t *ip6_addr = ip6_interface_first_address (i6m, sw_if_index); + if (ip6_addr) + { + clib_warning + ("Sending unsolicitated NA IP6 address %U on sw_if_idex %d", + format_ip6_address, ip6_addr, sw_if_index); + + /* Form unsolicited neighbor advertisement packet from NS pkt template */ + int bogus_length; + u32 bi = 0; + icmp6_neighbor_solicitation_header_t *h = + vlib_packet_template_get_packet (vm, + &i6m->discover_neighbor_packet_template, + &bi); + ip6_set_reserved_multicast_address (&h->ip.dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + h->ip.src_address = ip6_addr[0]; + h->neighbor.icmp.type = ICMP6_neighbor_advertisement; + h->neighbor.target_address = ip6_addr[0]; + h->neighbor.advertisement_flags = clib_host_to_net_u32 + (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE); + clib_memcpy (h->link_layer_option.ethernet_address, + hi->hw_address, vec_len (hi->hw_address)); + h->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length); + ASSERT (bogus_length == 0); + + /* Setup MAC header with IP6 Etype and mcast DMAC */ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_advance (b, -sizeof (ethernet_header_t)); + ethernet_header_t *e = vlib_buffer_get_current (b); + e->type = clib_host_to_net_u16 (ETHERNET_TYPE_IP6); + clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address)); + ip6_multicast_ethernet_address (e->dst_address, + IP6_MULTICAST_GROUP_ID_all_hosts); + + /* Send unsolicited ND advertisement packet out the specified interface */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + /* * fd.io coding-style-patch-verification: ON *