bonding: add weight support for active-backup mode

Not all interfaces have the same characteristics within the bonding group.
For active-backup mode, we should do our best to select the slave that
performs the best as the primary slave. We already did that by preferring
the slave that is local numa. Sometimes, this is not enough. For example,
when all are local numas, the selection is arbitrary. Some slave interfaces
may have higher speed or better qos than the others. But this is hard to
infer.

One rule does not fit all. So we let the operator to optionally specify the
weight for each slave interface. Our primary slave selection rule is now
1. biggest weight
2. is local numa
3. current primary slave (to avoid churn)
4. lowest sw_if_index (for deterministic behavior)

This selection rule only applies to active-backup mode which only one slave
is used for forwarding traffic until it becomes unreachable. At that time,
the next "best" slave candidate is automatically promoted. The slaves are
sorted according to the preference rule when they are up. So there is no need
to find the next best candidate when the primary slave goes down.

Another good thing about this rule is when the down slave comes back up, it
is selected as the primary slave again unless there is indeed a "better"
slave than this down slave that were added during that period.

To set the weight for the slave interface, do this after the interface is
enslaved

set interface bond <interface-name> weight <value>

Type: feature

Signed-off-by: Steven Luong <sluong@cisco.com>
Change-Id: I59ced6d20ce1dec532e667dbe1afd1b4243e04f9
This commit is contained in:
Steven Luong
2019-08-20 16:58:00 -07:00
committed by Damjan Marion
parent ffbfe3a2d6
commit a1876b84e5
8 changed files with 332 additions and 96 deletions
+56 -6
View File
@@ -1999,6 +1999,49 @@ static void vl_api_bond_detach_slave_reply_t_handler_json
vam->result_ready = 1;
}
static int
api_sw_interface_set_bond_weight (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
vl_api_sw_interface_set_bond_weight_t *mp;
u32 sw_if_index = ~0;
u32 weight = 0;
u8 weight_enter = 0;
int ret;
while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
{
if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
;
else if (unformat (i, "sw_if_index %d", &sw_if_index))
;
else if (unformat (i, "weight %u", &weight))
weight_enter = 1;
else
break;
}
if (sw_if_index == ~0)
{
errmsg ("missing interface name or sw_if_index");
return -99;
}
if (weight_enter == 0)
{
errmsg ("missing valid weight");
return -99;
}
/* Construct the API message */
M (SW_INTERFACE_SET_BOND_WEIGHT, mp);
mp->sw_if_index = ntohl (sw_if_index);
mp->weight = ntohl (weight);
S (mp);
W (ret);
return ret;
}
static void vl_api_sw_interface_bond_details_t_handler
(vl_api_sw_interface_bond_details_t * mp)
{
@@ -2064,8 +2107,9 @@ static void vl_api_sw_interface_slave_details_t_handler
vat_main_t *vam = &vat_main;
print (vam->ofp,
"%-25s %-12d %-12d %d", mp->interface_name,
ntohl (mp->sw_if_index), mp->is_passive, mp->is_long_timeout);
"%-25s %-12d %-7d %-12d %-10d %-10d", mp->interface_name,
ntohl (mp->sw_if_index), mp->is_passive, mp->is_long_timeout,
ntohl (mp->weight), mp->is_local_numa);
}
static void vl_api_sw_interface_slave_details_t_handler_json
@@ -2087,6 +2131,8 @@ static void vl_api_sw_interface_slave_details_t_handler_json
mp->interface_name);
vat_json_object_add_uint (node, "passive", mp->is_passive);
vat_json_object_add_uint (node, "long_timeout", mp->is_long_timeout);
vat_json_object_add_uint (node, "weight", ntohl (mp->weight));
vat_json_object_add_uint (node, "is_local_numa", mp->is_local_numa);
}
static int
@@ -2117,8 +2163,9 @@ api_sw_interface_slave_dump (vat_main_t * vam)
}
print (vam->ofp,
"\n%-25s %-12s %-12s %s",
"slave interface name", "sw_if_index", "passive", "long_timeout");
"\n%-25s %-12s %-7s %-12s %-10s %-10s",
"slave interface name", "sw_if_index", "passive", "long_timeout",
"weight", "local numa");
/* Get list of bond interfaces */
M (SW_INTERFACE_SLAVE_DUMP, mp);
@@ -5040,6 +5087,7 @@ _(sw_interface_set_vxlan_bypass_reply) \
_(sw_interface_set_geneve_bypass_reply) \
_(sw_interface_set_vxlan_gpe_bypass_reply) \
_(sw_interface_set_l2_bridge_reply) \
_(sw_interface_set_bond_weight_reply) \
_(bridge_domain_add_del_reply) \
_(sw_interface_set_l2_xconnect_reply) \
_(l2fib_add_del_reply) \
@@ -5242,6 +5290,7 @@ _(BOND_CREATE_REPLY, bond_create_reply) \
_(BOND_DELETE_REPLY, bond_delete_reply) \
_(BOND_ENSLAVE_REPLY, bond_enslave_reply) \
_(BOND_DETACH_SLAVE_REPLY, bond_detach_slave_reply) \
_(SW_INTERFACE_SET_BOND_WEIGHT_REPLY, sw_interface_set_bond_weight_reply) \
_(SW_INTERFACE_BOND_DETAILS, sw_interface_bond_details) \
_(SW_INTERFACE_SLAVE_DETAILS, sw_interface_slave_details) \
_(IP_ROUTE_ADD_DEL_REPLY, ip_route_add_del_reply) \
@@ -21724,13 +21773,14 @@ _(sw_interface_virtio_pci_dump, "") \
_(bond_create, \
"[hw-addr <mac-addr>] {round-robin | active-backup | " \
"broadcast | {lacp | xor} [load-balance { l2 | l23 | l34 }]} " \
"[id <if-id>]") \
"[id <if-id>]") \
_(bond_delete, \
"<vpp-if-name> | sw_if_index <id>") \
_(bond_enslave, \
"sw_if_index <n> bond <sw_if_index> [is_passive] [is_long_timeout]") \
"sw_if_index <n> bond <sw_if_index> [is_passive] [is_long_timeout]") \
_(bond_detach_slave, \
"sw_if_index <n>") \
_(sw_interface_set_bond_weight, "<intfc> | sw_if_index <nn> weight <value>") \
_(sw_interface_bond_dump, "") \
_(sw_interface_slave_dump, \
"<vpp-if-name> | sw_if_index <id>") \
+19 -1
View File
@@ -19,7 +19,7 @@
the bonding device driver
*/
option version = "1.0.1";
option version = "1.0.2";
/** \brief Initialize a new bond interface with the given paramters
@param client_index - opaque cookie to identify the sender
@@ -154,6 +154,8 @@ define sw_interface_slave_dump
@param interface_name - name of interface
@param is_passve - interface does not initiate the lacp protocol, remote must be active speaker
@param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout
@param is_local_numa - the slave interface is local numa
@param weight - the weight for the slave interface (active-backup mode only)
*/
define sw_interface_slave_details
{
@@ -162,6 +164,22 @@ define sw_interface_slave_details
u8 interface_name[64];
u8 is_passive;
u8 is_long_timeout;
u8 is_local_numa;
u32 weight;
};
/** \brief Interface set bond weight
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param sw_if_index - slave interface for which to set the weight
@param weight - weight value to be set for the slave interface
*/
autoreply define sw_interface_set_bond_weight
{
u32 client_index;
u32 context;
u32 sw_if_index;
u32 weight;
};
/*
+22
View File
@@ -47,6 +47,7 @@
_(BOND_CREATE, bond_create) \
_(BOND_DELETE, bond_delete) \
_(BOND_ENSLAVE, bond_enslave) \
_(SW_INTERFACE_SET_BOND_WEIGHT, sw_interface_set_bond_weight) \
_(BOND_DETACH_SLAVE, bond_detach_slave) \
_(SW_INTERFACE_BOND_DUMP, sw_interface_bond_dump)\
_(SW_INTERFACE_SLAVE_DUMP, sw_interface_slave_dump)
@@ -116,6 +117,25 @@ vl_api_bond_enslave_t_handler (vl_api_bond_enslave_t * mp)
REPLY_MACRO (VL_API_BOND_ENSLAVE_REPLY);
}
static void
vl_api_sw_interface_set_bond_weight_t_handler
(vl_api_sw_interface_set_bond_weight_t * mp)
{
vlib_main_t *vm = vlib_get_main ();
bond_set_intf_weight_args_t _a, *ap = &_a;
vl_api_sw_interface_set_bond_weight_reply_t *rmp;
int rv = 0;
clib_memset (ap, 0, sizeof (*ap));
ap->sw_if_index = ntohl (mp->sw_if_index);
ap->weight = ntohl (mp->weight);
bond_set_intf_weight (vm, ap);
REPLY_MACRO (VL_API_SW_INTERFACE_SET_BOND_WEIGHT_REPLY);
}
static void
vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp)
{
@@ -200,6 +220,8 @@ bond_send_sw_interface_slave_details (vpe_api_main_t * am,
strlen ((const char *) slave_if->interface_name)));
mp->is_passive = slave_if->is_passive;
mp->is_long_timeout = slave_if->is_long_timeout;
mp->is_local_numa = slave_if->is_local_numa;
mp->weight = htonl (slave_if->weight);
mp->context = context;
vl_api_send_msg (reg, (u8 *) mp);
+198 -76
View File
File diff suppressed because it is too large Load Diff
+19 -5
View File
@@ -110,6 +110,15 @@ typedef struct
clib_error_t *error;
} bond_detach_slave_args_t;
typedef struct
{
u32 sw_if_index;
u32 weight;
/* return */
int rv;
clib_error_t *error;
} bond_set_intf_weight_args_t;
/** BOND interface details struct */
typedef struct
{
@@ -130,6 +139,8 @@ typedef struct
u8 interface_name[64];
u8 is_passive;
u8 is_long_timeout;
u8 is_local_numa;
u32 weight;
u32 active_slaves;
} slave_interface_details_t;
@@ -159,11 +170,6 @@ typedef struct
u8 mode;
u8 lb;
/* This flag works for active-backup mode only
and marks if the working port is local numa. */
u8 is_local_numa;
/* current working sw_if_index in active-bakeup mode. */
u32 sw_if_index_working;
/* the last slave index for the rr lb */
u32 lb_rr_last_index;
@@ -239,6 +245,9 @@ typedef struct
/* neighbor vlib hw_if_index */
u32 hw_if_index;
/* weight -- valid only for active backup */
u32 weight;
/* actor does not initiate the protocol exchange */
u8 is_passive;
@@ -336,6 +345,9 @@ typedef struct
/* pdu sent */
u64 marker_pdu_sent;
/* slave is numa node */
u8 is_local_numa;
} slave_if_t;
typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif,
@@ -398,6 +410,8 @@ void bond_disable_collecting_distributing (vlib_main_t * vm,
void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif);
u8 *format_bond_interface_name (u8 * s, va_list * args);
void bond_set_intf_weight (vlib_main_t * vm,
bond_set_intf_weight_args_t * args);
void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args);
int bond_delete_if (vlib_main_t * vm, u32 sw_if_index);
void bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args);
+13
View File
@@ -700,6 +700,18 @@ static void *vl_api_bond_enslave_t_print
FINISH;
}
static void *vl_api_sw_interface_set_bond_weight_t_print
(vl_api_sw_interface_set_bond_weight_t * mp, void *handle)
{
u8 *s;
s = format (0, "SCRIPT: sw_interface_set_bond_weight ");
s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index));
s = format (s, "weight %u ", ntohl (mp->weight));
FINISH;
}
static void *vl_api_bond_detach_slave_t_print
(vl_api_bond_detach_slave_t * mp, void *handle)
{
@@ -3774,6 +3786,7 @@ _(BOND_CREATE, bond_create) \
_(BOND_DELETE, bond_delete) \
_(BOND_ENSLAVE, bond_enslave) \
_(BOND_DETACH_SLAVE, bond_detach_slave) \
_(SW_INTERFACE_SET_BOND_WEIGHT, sw_interface_set_bond_weight) \
_(SW_INTERFACE_SLAVE_DUMP, sw_interface_slave_dump) \
_(SW_INTERFACE_BOND_DUMP, sw_interface_bond_dump) \
_(SW_INTERFACE_RX_PLACEMENT_DUMP, sw_interface_rx_placement_dump) \
+3 -6
View File
@@ -86,13 +86,9 @@ class TestBondInterface(VppTestCase):
# enslave pg0 and pg1 to BondEthernet0
self.logger.info("bond enslave interface pg0 to BondEthernet0")
bond0.enslave_vpp_bond_interface(sw_if_index=self.pg0.sw_if_index,
is_passive=0,
is_long_timeout=0)
bond0.enslave_vpp_bond_interface(sw_if_index=self.pg0.sw_if_index)
self.logger.info("bond enslave interface pg1 to BondEthernet0")
bond0.enslave_vpp_bond_interface(sw_if_index=self.pg1.sw_if_index,
is_passive=0,
is_long_timeout=0)
bond0.enslave_vpp_bond_interface(sw_if_index=self.pg1.sw_if_index)
# verify both slaves in BondEthernet0
if_dump = self.vapi.sw_interface_slave_dump(bond0.sw_if_index)
@@ -276,5 +272,6 @@ class TestBondInterface(VppTestCase):
if_dump = self.vapi.sw_interface_bond_dump()
self.assertFalse(bond0.is_interface_config_in_dump(if_dump))
if __name__ == '__main__':
unittest.main(testRunner=VppTestRunner)
+2 -2
View File
@@ -29,8 +29,8 @@ class VppBondInterface(VppInterface):
def enslave_vpp_bond_interface(self,
sw_if_index,
is_passive,
is_long_timeout):
is_passive=0,
is_long_timeout=0):
self.test.vapi.bond_enslave(sw_if_index,
self.sw_if_index,
is_passive,