diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index 1a9688e75e5..df52c58fa18 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -12,13 +12,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include #include #include #include #include +#include #include +#include #include #include @@ -178,6 +181,65 @@ dpdk_device_stop (dpdk_device_t * xd) } } +void +dpdk_port_state_callback (uint8_t port_id, + enum rte_eth_event_type type, void *param) +{ + struct rte_eth_link link; + vlib_main_t *vm = vlib_get_main (); + dpdk_device_t *xd = &dpdk_main.devices[port_id]; + + RTE_SET_USED (param); + if (type != RTE_ETH_EVENT_INTR_LSC) + { + clib_warning ("Unknown event %d received for port %d", type, port_id); + return; + } + + rte_eth_link_get_nowait (port_id, &link); + u8 link_up = link.link_status; + + if (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE) + { + u8 bd_port = xd->bond_port; + int bd_mode = rte_eth_bond_mode_get (bd_port); + + if ((link_up && !(xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP)) || + (!link_up && (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP))) + { + clib_warning ("Port %d state to %s, " + "slave of port %d BondEthernet%d in mode %d", + port_id, (link_up) ? "UP" : "DOWN", + bd_port, xd->port_id, bd_mode); + if (bd_mode == BONDING_MODE_ACTIVE_BACKUP) + { + rte_eth_link_get_nowait (bd_port, &link); + if (link.link_status) /* bonded interface up */ + { + u32 hw_if_index = dpdk_main.devices[bd_port].hw_if_index; + vlib_process_signal_event + (vm, send_garp_na_process_node_index, SEND_GARP_NA, + hw_if_index); + } + } + } + if (link_up) /* Update slave link status */ + xd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE_UP; + else + xd->flags &= ~DPDK_DEVICE_FLAG_BOND_SLAVE_UP; + } + else /* Should not happen as callback not setup for "normal" links */ + { + if (link_up) + clib_warning ("Port %d Link Up - speed %u Mbps - %s", + port_id, (unsigned) link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + "full-duplex" : "half-duplex"); + else + clib_warning ("Port %d Link Down\n\n", port_id); + } +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index d82ba5ddaca..c6fd7388fdb 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -173,6 +173,8 @@ typedef struct #define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) #define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) #define DPDK_DEVICE_FLAG_HQOS (1 << 6) +#define DPDK_DEVICE_FLAG_BOND_SLAVE (1 << 7) +#define DPDK_DEVICE_FLAG_BOND_SLAVE_UP (1 << 8) u16 nb_tx_desc; CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); @@ -197,6 +199,10 @@ typedef struct /* af_packet or BondEthernet instance number */ u8 port_id; + /* Bonded interface port# of a slave - + only valid if DPDK_DEVICE_FLAG_BOND_SLAVE bit is set */ + u8 bond_port; + struct rte_eth_link link; f64 time_last_link_update; @@ -408,6 +414,8 @@ typedef struct void dpdk_device_setup (dpdk_device_t * xd); void dpdk_device_start (dpdk_device_t * xd); void dpdk_device_stop (dpdk_device_t * xd); +void dpdk_port_state_callback (uint8_t port_id, + enum rte_eth_event_type type, void *param); #define foreach_dpdk_error \ _(NONE, "no error") \ diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 90968075c0b..d9ab0756f2d 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -1373,8 +1373,10 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* * Extra set up for bond interfaces: * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_device_setup() but needs to be done again here to take effect. - * 2. Set up info for bond interface related CLI support. + * in dpdk_device_setup() but needs to be done again here to take + * effect. + * 2. Set up info and register slave link state change callback handling. + * 3. Set up info for bond interface related CLI support. */ int nports = rte_eth_dev_count (); if (nports > 0) @@ -1399,7 +1401,8 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) (slink[0], (struct ether_addr *) addr); /* Set MAC of bounded interface to that of 1st slave link */ - clib_warning ("Set MAC for bond dev# %d", i); + clib_warning ("Set MAC for bond port %d BondEthernet%d", + i, xd->port_id); rv = rte_eth_bond_mac_address_set (i, (struct ether_addr *) addr); if (rv) @@ -1428,34 +1431,38 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* Add MAC to all slave links except the first one */ if (nlink) { - clib_warning ("Add MAC for slave dev# %d", slave); + clib_warning ("Add MAC for slave port %d", slave); rv = rte_eth_dev_mac_addr_add (slave, (struct ether_addr *) addr, 0); if (rv) clib_warning ("Add MAC addr failure rv=%d", rv); } + /* Setup slave link state change callback handling */ + rte_eth_dev_callback_register + (slave, RTE_ETH_EVENT_INTR_LSC, + dpdk_port_state_callback, NULL); + dpdk_device_t *sxd = &dm->devices[slave]; + sxd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE; + sxd->bond_port = i; /* Set slaves bitmap for bonded interface */ bhi->bond_info = clib_bitmap_set (bhi->bond_info, sdev->hw_if_index, 1); - /* Set slave link flags on slave interface */ + /* Set MACs and slave link flags on slave interface */ shi = vnet_get_hw_interface (vnm, sdev->hw_if_index); ssi = vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); sei = pool_elt_at_index (em->interfaces, shi->hw_instance); - shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; clib_memcpy (shi->hw_address, addr, 6); clib_memcpy (sei->address, addr, 6); - /* Set l3 packet size allowed as the lowest of slave */ if (bhi->max_l3_packet_bytes[VLIB_RX] > shi->max_l3_packet_bytes[VLIB_RX]) bhi->max_l3_packet_bytes[VLIB_RX] = bhi->max_l3_packet_bytes[VLIB_TX] = shi->max_l3_packet_bytes[VLIB_RX]; - /* Set max packet size allowed as the lowest of slave */ if (bhi->max_packet_bytes > shi->max_packet_bytes) bhi->max_packet_bytes = shi->max_packet_bytes; diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index d5dc9cceb39..df68175055e 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -110,6 +110,9 @@ typedef struct static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 }; +/* Node index for send_garp_na_process */ +u32 send_garp_na_process_node_index; + static void set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t * a); @@ -2378,6 +2381,86 @@ ethernet_arp_change_mac (u32 sw_if_index) /* *INDENT-ON* */ } +void static +send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi) +{ + ip4_main_t *i4m = &ip4_main; + u32 sw_if_index = hi->sw_if_index; + ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0); + + if (ip4_addr) + { + clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d", + format_ip4_address, ip4_addr, sw_if_index); + + /* Form GARP packet for output - Gratuitous ARP is an ARP request packet + where the interface IP/MAC pair is used for both source and request + MAC/IP pairs in the request */ + u32 bi = 0; + ethernet_arp_header_t *h = vlib_packet_template_get_packet + (vm, &i4m->ip4_arp_request_packet_template, &bi); + clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, + sizeof (h->ip4_over_ethernet[0].ethernet)); + clib_memcpy (h->ip4_over_ethernet[1].ethernet, hi->hw_address, + sizeof (h->ip4_over_ethernet[1].ethernet)); + h->ip4_over_ethernet[0].ip4 = ip4_addr[0]; + h->ip4_over_ethernet[1].ip4 = ip4_addr[0]; + + /* Setup MAC header with ARP Etype and broadcast DMAC */ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_advance (b, -sizeof (ethernet_header_t)); + ethernet_header_t *e = vlib_buffer_get_current (b); + e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP); + clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address)); + memset (e->dst_address, 0xff, sizeof (e->dst_address)); + + /* Send GARP packet out the specified interface */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + +static vlib_node_registration_t send_garp_na_proc_node; + +static uword +send_garp_na_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vnet_main_t *vnm = vnet_get_main (); + uword event_type, *event_data = 0; + + send_garp_na_process_node_index = send_garp_na_proc_node.index; + + while (1) + { + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + if ((event_type == SEND_GARP_NA) && (vec_len (event_data) >= 1)) + { + u32 hw_if_index = event_data[0]; + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + send_ip4_garp (vm, hi); + send_ip6_na (vm, hi); + } + vec_reset_length (event_data); + } + return 0; +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (send_garp_na_proc_node, static) = { + .function = send_garp_na_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "send-garp-na-process", +}; +/* *INDENT-ON* */ + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h index 17e64f43049..d740b844e12 100644 --- a/src/vnet/ethernet/arp_packet.h +++ b/src/vnet/ethernet/arp_packet.h @@ -167,6 +167,15 @@ typedef struct ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index); u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va); +/* Node index for send_garp_na_process */ +extern u32 send_garp_na_process_node_index; + +/* Even type for send_garp_na_process */ +enum +{ + SEND_GARP_NA = 1, +} dpdk_send_garp_na_process_event_t; + #endif /* included_ethernet_arp_packet_h */ /* diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index d623c95f52f..cf52994e720 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -375,6 +375,8 @@ int vnet_ip6_nd_term (vlib_main_t * vm, ethernet_header_t * eth, ip6_header_t * ip, u32 sw_if_index, u16 bd_index); +void send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi); + u8 *format_ip6_forward_next_trace (u8 * s, va_list * args); u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0); diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index ba7ea143318..b8f6f9b10e7 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -4192,6 +4192,59 @@ ethernet_ndp_change_mac (u32 sw_if_index) /* *INDENT-ON* */ } +void +send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi) +{ + ip6_main_t *i6m = &ip6_main; + u32 sw_if_index = hi->sw_if_index; + ip6_address_t *ip6_addr = ip6_interface_first_address (i6m, sw_if_index); + if (ip6_addr) + { + clib_warning + ("Sending unsolicitated NA IP6 address %U on sw_if_idex %d", + format_ip6_address, ip6_addr, sw_if_index); + + /* Form unsolicited neighbor advertisement packet from NS pkt template */ + int bogus_length; + u32 bi = 0; + icmp6_neighbor_solicitation_header_t *h = + vlib_packet_template_get_packet (vm, + &i6m->discover_neighbor_packet_template, + &bi); + ip6_set_reserved_multicast_address (&h->ip.dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + h->ip.src_address = ip6_addr[0]; + h->neighbor.icmp.type = ICMP6_neighbor_advertisement; + h->neighbor.target_address = ip6_addr[0]; + h->neighbor.advertisement_flags = clib_host_to_net_u32 + (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE); + clib_memcpy (h->link_layer_option.ethernet_address, + hi->hw_address, vec_len (hi->hw_address)); + h->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length); + ASSERT (bogus_length == 0); + + /* Setup MAC header with IP6 Etype and mcast DMAC */ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_advance (b, -sizeof (ethernet_header_t)); + ethernet_header_t *e = vlib_buffer_get_current (b); + e->type = clib_host_to_net_u16 (ETHERNET_TYPE_IP6); + clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address)); + ip6_multicast_ethernet_address (e->dst_address, + IP6_MULTICAST_GROUP_ID_all_hosts); + + /* Send unsolicited ND advertisement packet out the specified interface */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + /* * fd.io coding-style-patch-verification: ON *