Send GARP/NA on bonded intf slave up/down if in active-backup mode

If a bonded interface is in active-backup mode and configured with
IPv4 and/or IPv6 addresses, on slave interface link up/down, send
a GARP packet if configured with an IPv4 address and an unsolcited
NA if configured with an IPv6 address. These packets can help with
faster route convergence in the next hop router/switch.

Change-Id: I68ccb11a4a40cda414704fa08ee0171c952befa2
Signed-off-by: John Lo <loj@cisco.com>
(cherry picked from commit 8b81cb43359380e50d3fc216d93ff05894149939)
This commit is contained in:
John Lo
2017-06-26 01:40:20 -04:00
committed by Neale Ranns
parent 0786710856
commit dc30c6d3d6
7 changed files with 232 additions and 8 deletions

View File

@ -12,13 +12,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vnet/vnet.h>
#include <vppinfra/vec.h>
#include <vppinfra/format.h>
#include <vlib/unix/cj.h>
#include <assert.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/arp_packet.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/device/dpdk_priv.h>
@ -178,6 +181,65 @@ dpdk_device_stop (dpdk_device_t * xd)
}
}
void
dpdk_port_state_callback (uint8_t port_id,
enum rte_eth_event_type type, void *param)
{
struct rte_eth_link link;
vlib_main_t *vm = vlib_get_main ();
dpdk_device_t *xd = &dpdk_main.devices[port_id];
RTE_SET_USED (param);
if (type != RTE_ETH_EVENT_INTR_LSC)
{
clib_warning ("Unknown event %d received for port %d", type, port_id);
return;
}
rte_eth_link_get_nowait (port_id, &link);
u8 link_up = link.link_status;
if (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE)
{
u8 bd_port = xd->bond_port;
int bd_mode = rte_eth_bond_mode_get (bd_port);
if ((link_up && !(xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP)) ||
(!link_up && (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP)))
{
clib_warning ("Port %d state to %s, "
"slave of port %d BondEthernet%d in mode %d",
port_id, (link_up) ? "UP" : "DOWN",
bd_port, xd->port_id, bd_mode);
if (bd_mode == BONDING_MODE_ACTIVE_BACKUP)
{
rte_eth_link_get_nowait (bd_port, &link);
if (link.link_status) /* bonded interface up */
{
u32 hw_if_index = dpdk_main.devices[bd_port].hw_if_index;
vlib_process_signal_event
(vm, send_garp_na_process_node_index, SEND_GARP_NA,
hw_if_index);
}
}
}
if (link_up) /* Update slave link status */
xd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE_UP;
else
xd->flags &= ~DPDK_DEVICE_FLAG_BOND_SLAVE_UP;
}
else /* Should not happen as callback not setup for "normal" links */
{
if (link_up)
clib_warning ("Port %d Link Up - speed %u Mbps - %s",
port_id, (unsigned) link.link_speed,
(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
"full-duplex" : "half-duplex");
else
clib_warning ("Port %d Link Down\n\n", port_id);
}
}
/*
* fd.io coding-style-patch-verification: ON
*

View File

@ -173,6 +173,8 @@ typedef struct
#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4)
#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5)
#define DPDK_DEVICE_FLAG_HQOS (1 << 6)
#define DPDK_DEVICE_FLAG_BOND_SLAVE (1 << 7)
#define DPDK_DEVICE_FLAG_BOND_SLAVE_UP (1 << 8)
u16 nb_tx_desc;
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
@ -197,6 +199,10 @@ typedef struct
/* af_packet or BondEthernet instance number */
u8 port_id;
/* Bonded interface port# of a slave -
only valid if DPDK_DEVICE_FLAG_BOND_SLAVE bit is set */
u8 bond_port;
struct rte_eth_link link;
f64 time_last_link_update;
@ -408,6 +414,8 @@ typedef struct
void dpdk_device_setup (dpdk_device_t * xd);
void dpdk_device_start (dpdk_device_t * xd);
void dpdk_device_stop (dpdk_device_t * xd);
void dpdk_port_state_callback (uint8_t port_id,
enum rte_eth_event_type type, void *param);
#define foreach_dpdk_error \
_(NONE, "no error") \

View File

@ -1373,8 +1373,10 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/*
* Extra set up for bond interfaces:
* 1. Setup MACs for bond interfaces and their slave links which was set
* in dpdk_device_setup() but needs to be done again here to take effect.
* 2. Set up info for bond interface related CLI support.
* in dpdk_device_setup() but needs to be done again here to take
* effect.
* 2. Set up info and register slave link state change callback handling.
* 3. Set up info for bond interface related CLI support.
*/
int nports = rte_eth_dev_count ();
if (nports > 0)
@ -1399,7 +1401,8 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
(slink[0], (struct ether_addr *) addr);
/* Set MAC of bounded interface to that of 1st slave link */
clib_warning ("Set MAC for bond dev# %d", i);
clib_warning ("Set MAC for bond port %d BondEthernet%d",
i, xd->port_id);
rv = rte_eth_bond_mac_address_set
(i, (struct ether_addr *) addr);
if (rv)
@ -1428,34 +1431,38 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/* Add MAC to all slave links except the first one */
if (nlink)
{
clib_warning ("Add MAC for slave dev# %d", slave);
clib_warning ("Add MAC for slave port %d", slave);
rv = rte_eth_dev_mac_addr_add
(slave, (struct ether_addr *) addr, 0);
if (rv)
clib_warning ("Add MAC addr failure rv=%d", rv);
}
/* Setup slave link state change callback handling */
rte_eth_dev_callback_register
(slave, RTE_ETH_EVENT_INTR_LSC,
dpdk_port_state_callback, NULL);
dpdk_device_t *sxd = &dm->devices[slave];
sxd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE;
sxd->bond_port = i;
/* Set slaves bitmap for bonded interface */
bhi->bond_info = clib_bitmap_set
(bhi->bond_info, sdev->hw_if_index, 1);
/* Set slave link flags on slave interface */
/* Set MACs and slave link flags on slave interface */
shi = vnet_get_hw_interface (vnm, sdev->hw_if_index);
ssi = vnet_get_sw_interface
(vnm, sdev->vlib_sw_if_index);
sei = pool_elt_at_index
(em->interfaces, shi->hw_instance);
shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE;
ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE;
clib_memcpy (shi->hw_address, addr, 6);
clib_memcpy (sei->address, addr, 6);
/* Set l3 packet size allowed as the lowest of slave */
if (bhi->max_l3_packet_bytes[VLIB_RX] >
shi->max_l3_packet_bytes[VLIB_RX])
bhi->max_l3_packet_bytes[VLIB_RX] =
bhi->max_l3_packet_bytes[VLIB_TX] =
shi->max_l3_packet_bytes[VLIB_RX];
/* Set max packet size allowed as the lowest of slave */
if (bhi->max_packet_bytes > shi->max_packet_bytes)
bhi->max_packet_bytes = shi->max_packet_bytes;

View File

@ -110,6 +110,9 @@ typedef struct
static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
/* Node index for send_garp_na_process */
u32 send_garp_na_process_node_index;
static void
set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
* a);
@ -2378,6 +2381,86 @@ ethernet_arp_change_mac (u32 sw_if_index)
/* *INDENT-ON* */
}
void static
send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi)
{
ip4_main_t *i4m = &ip4_main;
u32 sw_if_index = hi->sw_if_index;
ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
if (ip4_addr)
{
clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d",
format_ip4_address, ip4_addr, sw_if_index);
/* Form GARP packet for output - Gratuitous ARP is an ARP request packet
where the interface IP/MAC pair is used for both source and request
MAC/IP pairs in the request */
u32 bi = 0;
ethernet_arp_header_t *h = vlib_packet_template_get_packet
(vm, &i4m->ip4_arp_request_packet_template, &bi);
clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
sizeof (h->ip4_over_ethernet[0].ethernet));
clib_memcpy (h->ip4_over_ethernet[1].ethernet, hi->hw_address,
sizeof (h->ip4_over_ethernet[1].ethernet));
h->ip4_over_ethernet[0].ip4 = ip4_addr[0];
h->ip4_over_ethernet[1].ip4 = ip4_addr[0];
/* Setup MAC header with ARP Etype and broadcast DMAC */
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
vlib_buffer_advance (b, -sizeof (ethernet_header_t));
ethernet_header_t *e = vlib_buffer_get_current (b);
e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
memset (e->dst_address, 0xff, sizeof (e->dst_address));
/* Send GARP packet out the specified interface */
vnet_buffer (b)->sw_if_index[VLIB_RX] =
vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
u32 *to_next = vlib_frame_vector_args (f);
to_next[0] = bi;
f->n_vectors = 1;
vlib_put_frame_to_node (vm, hi->output_node_index, f);
}
}
static vlib_node_registration_t send_garp_na_proc_node;
static uword
send_garp_na_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
{
vnet_main_t *vnm = vnet_get_main ();
uword event_type, *event_data = 0;
send_garp_na_process_node_index = send_garp_na_proc_node.index;
while (1)
{
vlib_process_wait_for_event (vm);
event_type = vlib_process_get_events (vm, &event_data);
if ((event_type == SEND_GARP_NA) && (vec_len (event_data) >= 1))
{
u32 hw_if_index = event_data[0];
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
send_ip4_garp (vm, hi);
send_ip6_na (vm, hi);
}
vec_reset_length (event_data);
}
return 0;
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (send_garp_na_proc_node, static) = {
.function = send_garp_na_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "send-garp-na-process",
};
/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*

View File

@ -167,6 +167,15 @@ typedef struct
ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index);
u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va);
/* Node index for send_garp_na_process */
extern u32 send_garp_na_process_node_index;
/* Even type for send_garp_na_process */
enum
{
SEND_GARP_NA = 1,
} dpdk_send_garp_na_process_event_t;
#endif /* included_ethernet_arp_packet_h */
/*

View File

@ -375,6 +375,8 @@ int vnet_ip6_nd_term (vlib_main_t * vm,
ethernet_header_t * eth,
ip6_header_t * ip, u32 sw_if_index, u16 bd_index);
void send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi);
u8 *format_ip6_forward_next_trace (u8 * s, va_list * args);
u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);

View File

@ -4192,6 +4192,59 @@ ethernet_ndp_change_mac (u32 sw_if_index)
/* *INDENT-ON* */
}
void
send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi)
{
ip6_main_t *i6m = &ip6_main;
u32 sw_if_index = hi->sw_if_index;
ip6_address_t *ip6_addr = ip6_interface_first_address (i6m, sw_if_index);
if (ip6_addr)
{
clib_warning
("Sending unsolicitated NA IP6 address %U on sw_if_idex %d",
format_ip6_address, ip6_addr, sw_if_index);
/* Form unsolicited neighbor advertisement packet from NS pkt template */
int bogus_length;
u32 bi = 0;
icmp6_neighbor_solicitation_header_t *h =
vlib_packet_template_get_packet (vm,
&i6m->discover_neighbor_packet_template,
&bi);
ip6_set_reserved_multicast_address (&h->ip.dst_address,
IP6_MULTICAST_SCOPE_link_local,
IP6_MULTICAST_GROUP_ID_all_hosts);
h->ip.src_address = ip6_addr[0];
h->neighbor.icmp.type = ICMP6_neighbor_advertisement;
h->neighbor.target_address = ip6_addr[0];
h->neighbor.advertisement_flags = clib_host_to_net_u32
(ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
clib_memcpy (h->link_layer_option.ethernet_address,
hi->hw_address, vec_len (hi->hw_address));
h->neighbor.icmp.checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
ASSERT (bogus_length == 0);
/* Setup MAC header with IP6 Etype and mcast DMAC */
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
vlib_buffer_advance (b, -sizeof (ethernet_header_t));
ethernet_header_t *e = vlib_buffer_get_current (b);
e->type = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
ip6_multicast_ethernet_address (e->dst_address,
IP6_MULTICAST_GROUP_ID_all_hosts);
/* Send unsolicited ND advertisement packet out the specified interface */
vnet_buffer (b)->sw_if_index[VLIB_RX] =
vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
u32 *to_next = vlib_frame_vector_args (f);
to_next[0] = bi;
f->n_vectors = 1;
vlib_put_frame_to_node (vm, hi->output_node_index, f);
}
}
/*
* fd.io coding-style-patch-verification: ON
*