Distributed Virtual Router Support

A distributed virtual router works by attmpeting to switch a packet, but on failing to find a local consumer (i.e. the packet is destined to a locally attached host) then the packet is sent unmodified 'upstream' to where the rest of the 'distributed' router is present. When L3 switching a packet this means the L2 header must not be modifed. This patch adds a 'l2-bridge' object to the L3 FIB which re-injects packets from the L3 path back into the L2 path - use with extreme caution.

Change-Id: I069724eb45956647d7980cbe40a80a788ee6ee82
Signed-off-by: Neale Ranns <nranns@cisco.com>
This commit is contained in:
Neale Ranns
2017-10-03 08:20:21 -07:00
committed by Damjan Marion
parent d3c008d108
commit 6f63115603
16 changed files with 727 additions and 109 deletions
+2 -1
View File
@@ -1015,7 +1015,8 @@ libvnet_la_SOURCES += \
vnet/dpo/interface_rx_dpo.c \
vnet/dpo/interface_tx_dpo.c \
vnet/dpo/mpls_disposition.c \
vnet/dpo/mpls_label_dpo.c
vnet/dpo/mpls_label_dpo.c \
vnet/dpo/l2_bridge_dpo.c
nobase_include_HEADERS += \
vnet/dpo/load_balance.h \
+2
View File
@@ -40,6 +40,7 @@
#include <vnet/dpo/interface_rx_dpo.h>
#include <vnet/dpo/interface_tx_dpo.h>
#include <vnet/dpo/mpls_disposition.h>
#include <vnet/dpo/l2_bridge_dpo.h>
/**
* Array of char* names for the DPO types and protos
@@ -523,6 +524,7 @@ dpo_module_init (vlib_main_t * vm)
interface_rx_dpo_module_init();
interface_tx_dpo_module_init();
mpls_disp_dpo_module_init();
l2_bridge_dpo_module_init();
return (NULL);
}
+3 -1
View File
@@ -114,6 +114,7 @@ typedef enum dpo_type_t_ {
DPO_MFIB_ENTRY,
DPO_INTERFACE_RX,
DPO_INTERFACE_TX,
DPO_L2_BRIDGE,
DPO_LAST,
} __attribute__((packed)) dpo_type_t;
@@ -140,7 +141,8 @@ typedef enum dpo_type_t_ {
[DPO_MPLS_DISPOSITION] = "dpo-mpls-diposition", \
[DPO_MFIB_ENTRY] = "dpo-mfib_entry", \
[DPO_INTERFACE_RX] = "dpo-interface-rx", \
[DPO_INTERFACE_TX] = "dpo-interface-tx" \
[DPO_INTERFACE_TX] = "dpo-interface-tx", \
[DPO_L2_BRIDGE] = "dpo-l2-bridge" \
}
/**
File diff suppressed because it is too large Load Diff
+56
View File
@@ -0,0 +1,56 @@
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __L2_BRIDGE_DPO_H__
#define __L2_BRIDGE_DPO_H__
#include <vnet/dpo/dpo.h>
/**
* @brief
* The data-path object representing an L2 bridge.
* If a packet encounters an object of this type in the L3 data-path, it
* is injected back into the L2 bridge.
*/
typedef struct l2_bridge_dpo_t_
{
/**
* The Software interface index that the packets will output on
*/
u32 l2b_sw_if_index;
/**
* number of locks.
*/
u16 l2b_locks;
} l2_bridge_dpo_t;
extern void l2_bridge_dpo_add_or_lock (u32 sw_if_index,
dpo_id_t *dpo);
extern void l2_bridge_dpo_module_init(void);
/**
* @brief pool of all interface DPOs
*/
l2_bridge_dpo_t *l2_bridge_dpo_pool;
static inline l2_bridge_dpo_t *
l2_bridge_dpo_get (index_t index)
{
return (pool_elt_at_index(l2_bridge_dpo_pool, index));
}
#endif
+1
View File
@@ -40,6 +40,7 @@ add_del_route_t_handler (u8 is_multipath,
u8 is_resolve_attached,
u8 is_interface_rx,
u8 is_rpf_id,
u8 is_l2_bridged,
u32 fib_index,
const fib_prefix_t * prefix,
dpo_proto_t next_hop_proto,
+42 -22
View File
@@ -23,6 +23,7 @@
#include <vnet/dpo/lookup_dpo.h>
#include <vnet/dpo/interface_rx_dpo.h>
#include <vnet/dpo/mpls_disposition.h>
#include <vnet/dpo/l2_bridge_dpo.h>
#include <vnet/adj/adj.h>
#include <vnet/adj/adj_mcast.h>
@@ -771,11 +772,18 @@ fib_path_unresolve (fib_path_t *path)
}
break;
case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
case FIB_PATH_TYPE_ATTACHED:
adj_child_remove(path->fp_dpo.dpoi_index,
path->fp_sibling);
adj_unlock(path->fp_dpo.dpoi_index);
break;
case FIB_PATH_TYPE_ATTACHED:
if (DPO_PROTO_ETHERNET != path->fp_nh_proto)
{
adj_child_remove(path->fp_dpo.dpoi_index,
path->fp_sibling);
adj_unlock(path->fp_dpo.dpoi_index);
}
break;
case FIB_PATH_TYPE_EXCLUSIVE:
dpo_reset(&path->exclusive.fp_ex_dpo);
break;
@@ -1594,28 +1602,35 @@ fib_path_resolve (fib_node_index_t path_index)
fib_path_attached_next_hop_set(path);
break;
case FIB_PATH_TYPE_ATTACHED:
/*
* path->attached.fp_interface
*/
if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
path->attached.fp_interface))
{
path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
}
dpo_set(&path->fp_dpo,
DPO_ADJACENCY,
path->fp_nh_proto,
fib_path_attached_get_adj(path,
dpo_proto_to_link(path->fp_nh_proto)));
/*
* become a child of the adjacency so we receive updates
* when the interface state changes
*/
path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
FIB_NODE_TYPE_PATH,
fib_path_get_index(path));
if (DPO_PROTO_ETHERNET == path->fp_nh_proto)
{
l2_bridge_dpo_add_or_lock(path->attached.fp_interface,
&path->fp_dpo);
}
else
{
/*
* path->attached.fp_interface
*/
if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
path->attached.fp_interface))
{
path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
}
dpo_set(&path->fp_dpo,
DPO_ADJACENCY,
path->fp_nh_proto,
fib_path_attached_get_adj(path,
dpo_proto_to_link(path->fp_nh_proto)));
/*
* become a child of the adjacency so we receive updates
* when the interface state changes
*/
path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
FIB_NODE_TYPE_PATH,
fib_path_get_index(path));
}
break;
case FIB_PATH_TYPE_RECURSIVE:
{
@@ -1996,6 +2011,11 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
break;
case FIB_PATH_TYPE_ATTACHED:
if (DPO_PROTO_ETHERNET == path->fp_nh_proto)
{
dpo_copy(dpo, &path->fp_dpo);
break;
}
switch (fct)
{
case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+2 -1
View File
@@ -481,7 +481,8 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix,
}
if (fib_prefix_is_host(prefix) &&
ip46_address_is_zero(&path->frp_addr) &&
path->frp_sw_if_index != ~0)
path->frp_sw_if_index != ~0 &&
path->frp_proto != DPO_PROTO_ETHERNET)
{
path->frp_addr = prefix->fp_addr;
path->frp_flags |= FIB_ROUTE_PATH_ATTACHED;
+53 -77
View File
@@ -27,9 +27,11 @@
#include <vnet/bfd/bfd_main.h>
#include <vnet/dpo/interface_rx_dpo.h>
#include <vnet/dpo/replicate_dpo.h>
#include <vnet/dpo/l2_bridge_dpo.h>
#include <vnet/mpls/mpls.h>
#include <vnet/fib/fib_test.h>
#include <vnet/fib/fib_path_list.h>
#include <vnet/fib/fib_entry_src.h>
#include <vnet/fib/fib_walk.h>
@@ -266,83 +268,6 @@ fib_test_build_rewrite (u8 *eth_addr)
return (rewrite);
}
typedef enum fib_test_lb_bucket_type_t_ {
FT_LB_LABEL_O_ADJ,
FT_LB_LABEL_STACK_O_ADJ,
FT_LB_LABEL_O_LB,
FT_LB_O_LB,
FT_LB_SPECIAL,
FT_LB_ADJ,
FT_LB_INTF,
} fib_test_lb_bucket_type_t;
typedef struct fib_test_lb_bucket_t_ {
fib_test_lb_bucket_type_t type;
union
{
struct
{
mpls_eos_bit_t eos;
mpls_label_t label;
u8 ttl;
adj_index_t adj;
} label_o_adj;
struct
{
mpls_eos_bit_t eos;
mpls_label_t label_stack[8];
u8 label_stack_size;
u8 ttl;
adj_index_t adj;
} label_stack_o_adj;
struct
{
mpls_eos_bit_t eos;
mpls_label_t label;
u8 ttl;
index_t lb;
} label_o_lb;
struct
{
index_t adj;
} adj;
struct
{
index_t lb;
} lb;
struct
{
index_t adj;
} special;
};
} fib_test_lb_bucket_t;
typedef enum fib_test_rep_bucket_type_t_ {
FT_REP_LABEL_O_ADJ,
FT_REP_DISP_MFIB_LOOKUP,
FT_REP_INTF,
} fib_test_rep_bucket_type_t;
typedef struct fib_test_rep_bucket_t_ {
fib_test_rep_bucket_type_t type;
union
{
struct
{
mpls_eos_bit_t eos;
mpls_label_t label;
u8 ttl;
adj_index_t adj;
} label_o_adj;
struct
{
adj_index_t adj;
} adj;
};
} fib_test_rep_bucket_t;
#define FIB_TEST_LB(_cond, _comment, _args...) \
{ \
if (!FIB_TEST_I(_cond, _comment, ##_args)) { \
@@ -598,6 +523,16 @@ fib_test_validate_lb_v (const load_balance_t *lb,
bucket,
exp->adj.adj);
break;
case FT_LB_L2:
FIB_TEST_I((DPO_L2_BRIDGE == dpo->dpoi_type),
"bucket %d stacks on %U",
bucket,
format_dpo_type, dpo->dpoi_type);
FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
"bucket %d stacks on adj %d",
bucket,
exp->adj.adj);
break;
case FT_LB_O_LB:
FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type),
"bucket %d stacks on %U",
@@ -4066,6 +4001,45 @@ fib_test_v4 (void)
"Table and LB newhash config match: %U",
format_ip_flow_hash_config, lb->lb_hash_config);
/*
* A route via an L2 Bridge
*/
fei = fib_table_entry_path_add(fib_index,
&pfx_10_10_10_3_s_32,
FIB_SOURCE_API,
FIB_ENTRY_FLAG_NONE,
DPO_PROTO_ETHERNET,
&zero_addr,
tm->hw[0]->sw_if_index,
~0,
1,
NULL,
FIB_ROUTE_PATH_FLAG_NONE);
dpo_id_t l2_dpo = DPO_INVALID;
l2_bridge_dpo_add_or_lock(tm->hw[0]->sw_if_index, &l2_dpo);
fib_test_lb_bucket_t ip_o_l2 = {
.type = FT_LB_L2,
.adj = {
.adj = l2_dpo.dpoi_index,
},
};
FIB_TEST(fib_test_validate_entry(fei,
FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
1,
&ip_o_l2),
"10.10.10.3 via L2 on Eth0");
fib_table_entry_path_remove(fib_index,
&pfx_10_10_10_3_s_32,
FIB_SOURCE_API,
DPO_PROTO_ETHERNET,
&zero_addr,
tm->hw[0]->sw_if_index,
fib_index,
1,
FIB_ROUTE_PATH_FLAG_NONE);
dpo_reset(&l2_dpo);
/*
* CLEANUP
* remove adj-fibs:
@@ -4165,6 +4139,8 @@ fib_test_v4 (void)
pool_elts(load_balance_map_pool));
FIB_TEST((lb_count == pool_elts(load_balance_pool)), "LB pool size is %d",
pool_elts(load_balance_pool));
FIB_TEST((0 == pool_elts(l2_bridge_dpo_pool)), "L2 DPO pool size is %d",
pool_elts(l2_bridge_dpo_pool));
return 0;
}
+2
View File
@@ -26,6 +26,7 @@ typedef enum fib_test_lb_bucket_type_t_ {
FT_LB_SPECIAL,
FT_LB_ADJ,
FT_LB_INTF,
FT_LB_L2,
} fib_test_lb_bucket_type_t;
typedef struct fib_test_lb_bucket_t_ {
@@ -72,6 +73,7 @@ typedef struct fib_test_lb_bucket_t_ {
typedef enum fib_test_rep_bucket_type_t_ {
FT_REP_LABEL_O_ADJ,
FT_REP_DISP_MFIB_LOOKUP,
FT_REP_INTF,
} fib_test_rep_bucket_type_t;
+1
View File
@@ -397,6 +397,7 @@ autoreply define ip_add_del_route
u8 is_multipath;
u8 is_resolve_host;
u8 is_resolve_attached;
u8 is_l2_bridged;
/* Is last/not-last message in group of multiple add/del messages. */
u8 not_last;
u8 next_hop_weight;
+5
View File
@@ -769,6 +769,7 @@ add_del_route_t_handler (u8 is_multipath,
u8 is_resolve_attached,
u8 is_interface_rx,
u8 is_rpf_id,
u8 is_l2_bridged,
u32 fib_index,
const fib_prefix_t * prefix,
dpo_proto_t next_hop_proto,
@@ -806,6 +807,8 @@ add_del_route_t_handler (u8 is_multipath,
path.frp_local_label = next_hop_via_label;
path.frp_eos = MPLS_NON_EOS;
}
if (is_l2_bridged)
path.frp_proto = DPO_PROTO_ETHERNET;
if (is_resolve_host)
path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
if (is_resolve_attached)
@@ -1043,6 +1046,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
mp->classify_table_index,
mp->is_resolve_host,
mp->is_resolve_attached, 0, 0,
mp->is_l2_bridged,
fib_index, &pfx, DPO_PROTO_IP4,
&nh,
ntohl (mp->next_hop_sw_if_index),
@@ -1102,6 +1106,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
mp->classify_table_index,
mp->is_resolve_host,
mp->is_resolve_attached, 0, 0,
mp->is_l2_bridged,
fib_index, &pfx, DPO_PROTO_IP6,
&nh, ntohl (mp->next_hop_sw_if_index),
next_hop_fib_index,
+1 -7
View File
@@ -223,13 +223,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
0, // mp->is_unreach,
0, // mp->is_prohibit,
0, // mp->is_local,
mp->mr_is_multicast,
mp->mr_is_classify,
mp->mr_classify_table_index,
mp->mr_is_resolve_host,
mp->mr_is_resolve_attached,
mp->mr_is_interface_rx,
mp->mr_is_rpf_id,
mp->mr_is_multicast, mp->mr_is_classify, mp->mr_classify_table_index, mp->mr_is_resolve_host, mp->mr_is_resolve_attached, mp->mr_is_interface_rx, mp->mr_is_rpf_id, 0, // l2_bridged
fib_index, &pfx,
mp->mr_next_hop_proto,
&nh, ntohl (mp->mr_next_hop_sw_if_index),
+178
View File
@@ -0,0 +1,178 @@
#!/usr/bin/env python
import random
import socket
import unittest
from framework import VppTestCase, VppTestRunner
from vpp_sub_interface import VppSubInterface, VppDot1QSubint
from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
from vpp_papi_provider import L2_VTR_OP
from scapy.packet import Raw
from scapy.layers.l2 import Ether, Dot1Q, ARP
from scapy.layers.inet import IP, UDP
from util import ppp
class TestDVR(VppTestCase):
""" IPv4 Load-Balancing """
def setUp(self):
super(TestDVR, self).setUp()
self.create_pg_interfaces(range(4))
self.create_loopback_interfaces(range(1))
for i in self.pg_interfaces:
i.admin_up()
self.loop0.config_ip4()
def tearDown(self):
for i in self.pg_interfaces:
i.admin_down()
self.loop0.unconfig_ip4()
super(TestDVR, self).tearDown()
def test_dvr(self):
""" Distributed Virtual Router """
#
# A packet destined to an IP address that is L2 bridged via
# a non-tag interface
#
ip_non_tag_bridged = "10.10.10.10"
ip_tag_bridged = "10.10.10.11"
any_src_addr = "1.1.1.1"
pkt_no_tag = (Ether(src=self.pg0.remote_mac,
dst=self.loop0.local_mac) /
IP(src=any_src_addr,
dst=ip_non_tag_bridged) /
UDP(sport=1234, dport=1234) /
Raw('\xa5' * 100))
pkt_tag = (Ether(src=self.pg0.remote_mac,
dst=self.loop0.local_mac) /
IP(src=any_src_addr,
dst=ip_tag_bridged) /
UDP(sport=1234, dport=1234) /
Raw('\xa5' * 100))
#
# Two sub-interfaces so we can test VLAN tag push/pop
#
sub_if_on_pg2 = VppDot1QSubint(self, self.pg2, 92)
sub_if_on_pg3 = VppDot1QSubint(self, self.pg3, 93)
sub_if_on_pg2.admin_up()
sub_if_on_pg3.admin_up()
#
# Put all the interfaces into a new bridge domain
#
self.vapi.sw_interface_set_l2_bridge(self.pg0.sw_if_index, 1)
self.vapi.sw_interface_set_l2_bridge(self.pg1.sw_if_index, 1)
self.vapi.sw_interface_set_l2_bridge(sub_if_on_pg2.sw_if_index, 1)
self.vapi.sw_interface_set_l2_bridge(sub_if_on_pg3.sw_if_index, 1)
self.vapi.sw_interface_set_l2_bridge(self.loop0.sw_if_index, 1, bvi=1)
self.vapi.sw_interface_set_l2_tag_rewrite(sub_if_on_pg2.sw_if_index,
L2_VTR_OP.L2_POP_1,
92)
self.vapi.sw_interface_set_l2_tag_rewrite(sub_if_on_pg3.sw_if_index,
L2_VTR_OP.L2_POP_1,
93)
self.logger.error(self.vapi.ppcli("show bridge-domain 1 detail"))
#
# Add routes to bridge the traffic via a tagged an nontagged interface
#
route_no_tag = VppIpRoute(
self, ip_non_tag_bridged, 32,
[VppRoutePath("0.0.0.0",
self.pg1.sw_if_index,
proto=DpoProto.DPO_PROTO_ETHERNET)])
route_no_tag.add_vpp_config()
#
# Inject the packet that arrives and leaves on a non-tagged interface
# Since it's 'bridged' expect that the MAC headed is unchanged.
#
self.pg0.add_stream(pkt_no_tag)
self.pg_enable_capture(self.pg_interfaces)
self.pg_start()
rx = self.pg1.get_capture(1)
self.assertEqual(rx[0][Ether].dst, pkt_no_tag[Ether].dst)
self.assertEqual(rx[0][Ether].src, pkt_no_tag[Ether].src)
#
# Add routes to bridge the traffic via a tagged interface
#
route_no_tag = VppIpRoute(
self, ip_tag_bridged, 32,
[VppRoutePath("0.0.0.0",
sub_if_on_pg3.sw_if_index,
proto=DpoProto.DPO_PROTO_ETHERNET)])
route_no_tag.add_vpp_config()
#
# Inject the packet that arrives and leaves on a non-tagged interface
# Since it's 'bridged' expect that the MAC headed is unchanged.
#
self.pg0.add_stream(pkt_tag)
self.pg_enable_capture(self.pg_interfaces)
self.pg_start()
rx = self.pg3.get_capture(1)
self.assertEqual(rx[0][Ether].dst, pkt_tag[Ether].dst)
self.assertEqual(rx[0][Ether].src, pkt_tag[Ether].src)
self.assertEqual(rx[0][Dot1Q].vlan, 93)
#
# Tag to tag
#
pkt_tag_to_tag = (Ether(src=self.pg2.remote_mac,
dst=self.loop0.local_mac) /
Dot1Q(vlan=92) /
IP(src=any_src_addr,
dst=ip_tag_bridged) /
UDP(sport=1234, dport=1234) /
Raw('\xa5' * 100))
self.pg2.add_stream(pkt_tag_to_tag)
self.pg_enable_capture(self.pg_interfaces)
self.pg_start()
rx = self.pg3.get_capture(1)
self.assertEqual(rx[0][Ether].dst, pkt_tag_to_tag[Ether].dst)
self.assertEqual(rx[0][Ether].src, pkt_tag_to_tag[Ether].src)
self.assertEqual(rx[0][Dot1Q].vlan, 93)
#
# Tag to non-Tag
#
pkt_tag_to_non_tag = (Ether(src=self.pg2.remote_mac,
dst=self.loop0.local_mac) /
Dot1Q(vlan=92) /
IP(src=any_src_addr,
dst=ip_non_tag_bridged) /
UDP(sport=1234, dport=1234) /
Raw('\xa5' * 100))
self.pg2.add_stream(pkt_tag_to_non_tag)
self.pg_enable_capture(self.pg_interfaces)
self.pg_start()
rx = self.pg1.get_capture(1)
self.assertEqual(rx[0][Ether].dst, pkt_tag_to_tag[Ether].dst)
self.assertEqual(rx[0][Ether].src, pkt_tag_to_tag[Ether].src)
self.assertFalse(rx[0].haslayer(Dot1Q))
if __name__ == '__main__':
unittest.main(testRunner=VppTestRunner)
+2
View File
@@ -193,6 +193,8 @@ class VppIpRoute(VppObject):
next_hop_via_label=path.nh_via_label,
next_hop_table_id=path.nh_table_id,
is_ipv6=self.is_ip6,
is_l2_bridged=1
if path.proto == DpoProto.DPO_PROTO_ETHERNET else 0,
is_resolve_host=path.is_resolve_host,
is_resolve_attached=path.is_resolve_attached,
is_multipath=1 if len(self.paths) > 1 else 0)
+2
View File
@@ -714,6 +714,7 @@ class VppPapiProvider(object):
is_local=0,
is_classify=0,
is_multipath=0,
is_l2_bridged=0,
not_last=0):
"""
@@ -754,6 +755,7 @@ class VppPapiProvider(object):
'is_multipath': is_multipath,
'is_resolve_host': is_resolve_host,
'is_resolve_attached': is_resolve_attached,
'is_l2_bridged': is_l2_bridged,
'not_last': not_last,
'next_hop_weight': next_hop_weight,
'dst_address_length': dst_address_length,