Rework kube-proxy into LB plugin
Add support of NAT66 Change-Id: Ie6aa79078a3835f989829b9a597c448dfd2f9ea3 Signed-off-by: Hongjun Ni <hongjun.ni@intel.com>
This commit is contained in:

committed by
Damjan Marion

parent
afe56de947
commit
d92a0b553f
@ -229,7 +229,6 @@ PLUGIN_ENABLED(igmp)
|
||||
PLUGIN_ENABLED(ila)
|
||||
PLUGIN_ENABLED(ioam)
|
||||
PLUGIN_ENABLED(ixge)
|
||||
PLUGIN_ENABLED(kubeproxy)
|
||||
PLUGIN_ENABLED(l2e)
|
||||
PLUGIN_ENABLED(lacp)
|
||||
PLUGIN_ENABLED(lb)
|
||||
|
@ -75,10 +75,6 @@ if ENABLE_IXGE_PLUGIN
|
||||
include ixge.am
|
||||
endif
|
||||
|
||||
if ENABLE_KUBEPROXY_PLUGIN
|
||||
include kubeproxy.am
|
||||
endif
|
||||
|
||||
if ENABLE_LACP_PLUGIN
|
||||
include lacp.am
|
||||
endif
|
||||
|
@ -1,38 +0,0 @@
|
||||
# Copyright (c) 2017 Intel Corporation, Inc.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at:
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
vppapitestplugins_LTLIBRARIES += kubeproxy_test_plugin.la
|
||||
vppplugins_LTLIBRARIES += kubeproxy_plugin.la
|
||||
|
||||
kubeproxy_plugin_la_SOURCES = \
|
||||
kubeproxy/kp.c \
|
||||
kubeproxy/kp_node.c \
|
||||
kubeproxy/kp_cli.c \
|
||||
kubeproxy/kp_api.c
|
||||
|
||||
BUILT_SOURCES += \
|
||||
kubeproxy/kp.api.h \
|
||||
kubeproxy/kp.api.json
|
||||
|
||||
API_FILES += kubeproxy/kp.api
|
||||
|
||||
noinst_HEADERS += \
|
||||
kubeproxy/kp.h \
|
||||
kubeproxy/kphash.h \
|
||||
kubeproxy/kp.api.h
|
||||
|
||||
kubeproxy_test_plugin_la_SOURCES = \
|
||||
kubeproxy/kp_test.c \
|
||||
kubeproxy/kp_plugin.api.h
|
||||
|
||||
# vi:syntax=automake
|
@ -1,81 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Intel and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
option version = "1.0.0";
|
||||
|
||||
/** \brief Configure Kube-proxy global parameters
|
||||
@param client_index - opaque cookie to identify the sender
|
||||
@param context - sender context, to match reply w/ request
|
||||
@param sticky_buckets_per_core - Number of buckets *per worker thread* in the
|
||||
established flow table (must be power of 2).
|
||||
@param flow_timeout - Time in seconds after which, if no packet is received
|
||||
for a given flow, the flow is removed from the established flow table.
|
||||
*/
|
||||
autoreply define kp_conf
|
||||
{
|
||||
u32 client_index;
|
||||
u32 context;
|
||||
u32 sticky_buckets_per_core;
|
||||
u32 flow_timeout;
|
||||
};
|
||||
|
||||
/** \brief Add a virtual address (or prefix)
|
||||
@param client_index - opaque cookie to identify the sender
|
||||
@param context - sender context, to match reply w/ request
|
||||
@param ip_prefix - IP address (IPv4 in lower order 32 bits).
|
||||
@param prefix_length - IP prefix length (96 + 'IPv4 prefix length' for IPv4).
|
||||
@param is_ipv6 - Is IPv6 addresss.
|
||||
@param port - service port;
|
||||
@param target_port - Pod's port corresponding to specific service.
|
||||
@param node_port - Node's port.
|
||||
@param is_nat4 - DNAT is NAT44 (NAT64 otherwise).
|
||||
@param new_flows_table_length - Size of the new connections flow table used
|
||||
for this VIP (must be power of 2).
|
||||
@param is_del - The VIP should be removed.
|
||||
*/
|
||||
autoreply define kp_add_del_vip {
|
||||
u32 client_index;
|
||||
u32 context;
|
||||
u8 ip_prefix[16];
|
||||
u8 prefix_length;
|
||||
u8 is_ipv6;
|
||||
u16 port;
|
||||
u16 target_port;
|
||||
u16 node_port;
|
||||
u8 is_nat4;
|
||||
u32 new_flows_table_length;
|
||||
u8 is_del;
|
||||
};
|
||||
|
||||
/** \brief Add a pod for a given VIP
|
||||
@param client_index - opaque cookie to identify the sender
|
||||
@param context - sender context, to match reply w/ request
|
||||
@param vip_ip_prefix - VIP IP address (IPv4 in lower order 32 bits).
|
||||
@param vip_ip_prefix - VIP IP prefix length (96 + 'IPv4 prefix length' for IPv4).
|
||||
@param vip_is_ipv6 - VIP is IPv6 addresss.
|
||||
@param pod_address - The pod's IP address (IPv4 in lower order 32 bits).
|
||||
@param pod_is_ipv6 - Pod is IPv6 addresss.
|
||||
@param is_del - The Pod should be removed.
|
||||
*/
|
||||
autoreply define kp_add_del_pod {
|
||||
u32 client_index;
|
||||
u32 context;
|
||||
u8 vip_ip_prefix[16];
|
||||
u8 vip_prefix_length;
|
||||
u8 vip_is_ipv6;
|
||||
u8 pod_address[16];
|
||||
u8 pod_is_ipv6;
|
||||
u8 is_del;
|
||||
};
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,249 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "POD IS" BPODIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <kubeproxy/kp.h>
|
||||
|
||||
#include <vppinfra/byte_order.h>
|
||||
#include <vlibapi/api.h>
|
||||
#include <vlibmemory/api.h>
|
||||
|
||||
#define vl_msg_id(n,h) n,
|
||||
typedef enum {
|
||||
#include <kubeproxy/kp.api.h>
|
||||
/* We'll want to know how many messages IDs we need... */
|
||||
VL_MSG_FIRST_AVAILABLE,
|
||||
} vl_msg_id_t;
|
||||
#undef vl_msg_id
|
||||
|
||||
|
||||
/* define message structures */
|
||||
#define vl_typedefs
|
||||
#include <kubeproxy/kp.api.h>
|
||||
#undef vl_typedefs
|
||||
|
||||
/* define generated endian-swappers */
|
||||
#define vl_endianfun
|
||||
#include <kubeproxy/kp.api.h>
|
||||
#undef vl_endianfun
|
||||
|
||||
#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
|
||||
|
||||
/* Get the API version number */
|
||||
#define vl_api_version(n,v) static u32 api_version=(v);
|
||||
#include <kubeproxy/kp.api.h>
|
||||
#undef vl_api_version
|
||||
|
||||
#define vl_msg_name_crc_list
|
||||
#include <kubeproxy/kp.api.h>
|
||||
#undef vl_msg_name_crc_list
|
||||
|
||||
|
||||
#define REPLY_MSG_ID_BASE kpm->msg_id_base
|
||||
#include <vlibapi/api_helper_macros.h>
|
||||
|
||||
static void
|
||||
setup_message_id_table (kp_main_t * kpm, api_main_t * am)
|
||||
{
|
||||
#define _(id,n,crc) \
|
||||
vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + kpm->msg_id_base);
|
||||
foreach_vl_msg_name_crc_kp;
|
||||
#undef _
|
||||
}
|
||||
|
||||
/* Macro to finish up custom dump fns */
|
||||
#define FINISH \
|
||||
vec_add1 (s, 0); \
|
||||
vl_print (handle, (char *)s); \
|
||||
vec_free (s); \
|
||||
return handle;
|
||||
|
||||
static void
|
||||
vl_api_kp_conf_t_handler
|
||||
(vl_api_kp_conf_t * mp)
|
||||
{
|
||||
kp_main_t *kpm = &kp_main;
|
||||
vl_api_kp_conf_reply_t * rmp;
|
||||
int rv = 0;
|
||||
|
||||
rv = kp_conf(mp->sticky_buckets_per_core,
|
||||
mp->flow_timeout);
|
||||
|
||||
REPLY_MACRO (VL_API_KP_CONF_REPLY);
|
||||
}
|
||||
|
||||
static void *vl_api_kp_conf_t_print
|
||||
(vl_api_kp_conf_t *mp, void * handle)
|
||||
{
|
||||
u8 * s;
|
||||
s = format (0, "SCRIPT: kp_conf ");
|
||||
s = format (s, "%u ", mp->sticky_buckets_per_core);
|
||||
s = format (s, "%u ", mp->flow_timeout);
|
||||
FINISH;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
vl_api_kp_add_del_vip_t_handler
|
||||
(vl_api_kp_add_del_vip_t * mp)
|
||||
{
|
||||
kp_main_t *kpm = &kp_main;
|
||||
vl_api_kp_conf_reply_t * rmp;
|
||||
int rv = 0;
|
||||
ip46_address_t prefix;
|
||||
u8 prefix_length = mp->prefix_length;
|
||||
|
||||
if (mp->is_ipv6 == 0)
|
||||
{
|
||||
prefix_length += 96;
|
||||
memcpy(&prefix.ip4, mp->ip_prefix, sizeof(prefix.ip4));
|
||||
prefix.pad[0] = prefix.pad[1] = prefix.pad[2] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(&prefix.ip6, mp->ip_prefix, sizeof(prefix.ip6));
|
||||
}
|
||||
|
||||
if (mp->is_del) {
|
||||
u32 vip_index;
|
||||
if (!(rv = kp_vip_find_index(&prefix, prefix_length, &vip_index)))
|
||||
rv = kp_vip_del(vip_index);
|
||||
} else {
|
||||
u32 vip_index;
|
||||
kp_vip_type_t type;
|
||||
if (mp->is_ipv6 == 0) {
|
||||
type = mp->is_nat4?KP_VIP_TYPE_IP4_NAT44:KP_VIP_TYPE_IP4_NAT46;
|
||||
} else {
|
||||
type = mp->is_nat4?KP_VIP_TYPE_IP6_NAT64:KP_VIP_TYPE_IP6_NAT66;
|
||||
}
|
||||
|
||||
rv = kp_vip_add(&prefix, prefix_length, type,
|
||||
ntohl(mp->new_flows_table_length), &vip_index,
|
||||
ntohs(mp->port), ntohs(mp->target_port),
|
||||
ntohs(mp->node_port));
|
||||
}
|
||||
REPLY_MACRO (VL_API_KP_CONF_REPLY);
|
||||
}
|
||||
|
||||
static void *vl_api_kp_add_del_vip_t_print
|
||||
(vl_api_kp_add_del_vip_t *mp, void * handle)
|
||||
{
|
||||
u8 * s;
|
||||
s = format (0, "SCRIPT: kp_add_del_vip ");
|
||||
s = format (s, "%U ", format_ip46_prefix,
|
||||
(ip46_address_t *)mp->ip_prefix, mp->prefix_length, IP46_TYPE_ANY);
|
||||
s = format (s, "port %u ", mp->port);
|
||||
s = format (s, "target_port %u ", mp->target_port);
|
||||
s = format (s, "node_port %u ", mp->node_port);
|
||||
s = format (s, "%s ", mp->is_nat4?"nat4":"nat6");
|
||||
s = format (s, "%u ", mp->new_flows_table_length);
|
||||
s = format (s, "%s ", mp->is_del?"del":"add");
|
||||
FINISH;
|
||||
}
|
||||
|
||||
static void
|
||||
vl_api_kp_add_del_pod_t_handler
|
||||
(vl_api_kp_add_del_pod_t * mp)
|
||||
{
|
||||
kp_main_t *kpm = &kp_main;
|
||||
vl_api_kp_conf_reply_t * rmp;
|
||||
int rv = 0;
|
||||
u32 vip_index;
|
||||
|
||||
ip46_address_t vip_ip_prefix;
|
||||
u8 vip_prefix_length = mp->vip_prefix_length;
|
||||
|
||||
if (mp->vip_is_ipv6 == 0)
|
||||
{
|
||||
vip_prefix_length += 96;
|
||||
memcpy(&vip_ip_prefix.ip4, mp->vip_ip_prefix,
|
||||
sizeof(vip_ip_prefix.ip4));
|
||||
vip_ip_prefix.pad[0] = vip_ip_prefix.pad[1] = vip_ip_prefix.pad[2] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(&vip_ip_prefix.ip6, mp->vip_ip_prefix,
|
||||
sizeof(vip_ip_prefix.ip6));
|
||||
}
|
||||
|
||||
ip46_address_t pod_address;
|
||||
|
||||
if (mp->pod_is_ipv6 == 0)
|
||||
{
|
||||
memcpy(&pod_address.ip4, mp->pod_address,
|
||||
sizeof(pod_address.ip4));
|
||||
pod_address.pad[0] = pod_address.pad[1] = pod_address.pad[2] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(&pod_address.ip6, mp->pod_address,
|
||||
sizeof(pod_address.ip6));
|
||||
}
|
||||
|
||||
if ((rv = kp_vip_find_index(&vip_ip_prefix, vip_prefix_length, &vip_index)))
|
||||
goto done;
|
||||
|
||||
if (mp->is_del)
|
||||
rv = kp_vip_del_pods(vip_index, &pod_address, 1);
|
||||
else
|
||||
rv = kp_vip_add_pods(vip_index, &pod_address, 1);
|
||||
|
||||
done:
|
||||
REPLY_MACRO (VL_API_KP_CONF_REPLY);
|
||||
}
|
||||
|
||||
static void *vl_api_kp_add_del_pod_t_print
|
||||
(vl_api_kp_add_del_pod_t *mp, void * handle)
|
||||
{
|
||||
u8 * s;
|
||||
s = format (0, "SCRIPT: kp_add_del_pod ");
|
||||
s = format (s, "%U ", format_ip46_prefix,
|
||||
(ip46_address_t *)mp->vip_ip_prefix, mp->vip_prefix_length, IP46_TYPE_ANY);
|
||||
s = format (s, "%U ", format_ip46_address,
|
||||
(ip46_address_t *)mp->pod_address, IP46_TYPE_ANY);
|
||||
s = format (s, "%s ", mp->is_del?"del":"add");
|
||||
FINISH;
|
||||
}
|
||||
|
||||
/* List of message types that this plugin understands */
|
||||
#define foreach_kp_plugin_api_msg \
|
||||
_(KP_CONF, kp_conf) \
|
||||
_(KP_ADD_DEL_VIP, kp_add_del_vip) \
|
||||
_(KP_ADD_DEL_POD, kp_add_del_pod)
|
||||
|
||||
static clib_error_t * kp_api_init (vlib_main_t * vm)
|
||||
{
|
||||
kp_main_t *kpm = &kp_main;
|
||||
u8 *name = format (0, "kp_%08x%c", api_version, 0);
|
||||
kpm->msg_id_base = vl_msg_api_get_msg_ids
|
||||
((char *) name, VL_MSG_FIRST_AVAILABLE);
|
||||
|
||||
#define _(N,n) \
|
||||
vl_msg_api_set_handlers((VL_API_##N + kpm->msg_id_base), \
|
||||
#n, \
|
||||
vl_api_##n##_t_handler, \
|
||||
vl_noop_handler, \
|
||||
vl_api_##n##_t_endian, \
|
||||
vl_api_##n##_t_print, \
|
||||
sizeof(vl_api_##n##_t), 1);
|
||||
foreach_kp_plugin_api_msg;
|
||||
#undef _
|
||||
|
||||
/* Add our API messages to the global name_crc hash table */
|
||||
setup_message_id_table (kpm, &api_main);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
VLIB_INIT_FUNCTION (kp_api_init);
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,105 +0,0 @@
|
||||
# Kube-proxy plugin for VPP {#kp_plugin_doc}
|
||||
|
||||
## Overview
|
||||
|
||||
This plugin provides kube-proxy data plane on user space,
|
||||
which is used to replace linux kernal's kube-proxy based on iptables.
|
||||
The idea is largely inspired from VPP LB plugin.
|
||||
|
||||
Currently, kube-proxy plugin supports three service types:
|
||||
1) Cluster IP plus Port: support any protocols, including TCP, UDP.
|
||||
2) Node IP plus Node Port: currently only support UDP.
|
||||
3) External Load Balancer.
|
||||
|
||||
For Cluster IP plus Port case:
|
||||
kube-proxy is configured with a set of Virtual IPs (VIP, which can be
|
||||
prefixes), and for each VIP, with a set of POD addresses (PODs).
|
||||
|
||||
For a specific session received for a given VIP (or VIP prefix),
|
||||
first packet selects a Pod according to internal load balancing algorithm,
|
||||
then does DNAT operation and sent to chosen Pod.
|
||||
At the same time, will create a session entry to store Pod chosen result.
|
||||
Following packets for that session will look up session table first,
|
||||
which ensures that a given session will always be routed to the same Pod.
|
||||
|
||||
For returned packet from Pod, it will do SNAT operation and sent out.
|
||||
|
||||
Please refer to below for details:
|
||||
https://schd.ws/hosted_files/ossna2017/1e/VPP_K8S_GTPU_OSSNA.pdf
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
### Global KP parameters
|
||||
|
||||
The kube-proxy needs to be configured with some parameters:
|
||||
|
||||
ku conf [buckets <n>] [timeout <s>]
|
||||
|
||||
buckets: the *per-thread* established-connections-table number of buckets.
|
||||
|
||||
timeout: the number of seconds a connection will remain in the
|
||||
established-connections-table while no packet for this flow
|
||||
is received.
|
||||
|
||||
### Configure VIPs and Ports
|
||||
|
||||
ku vip <prefix> port <n> target_port <n> node_port <n> \
|
||||
[nat4|nat6)] [new_len <n>] [del]
|
||||
|
||||
new_len is the size of the new-connection-table. It should be 1 or 2 orders of
|
||||
magnitude bigger than the number of PODs for the VIP in order to ensure a good
|
||||
load balancing.
|
||||
|
||||
Examples:
|
||||
|
||||
ku vip 90.0.0.0/8 nat44 new_len 2048
|
||||
ku vip 2003::/16 nat66 new_len 2048
|
||||
|
||||
### Configure PODs (for each VIP)
|
||||
|
||||
ku pod <vip-prefix> [<address> [<address> [...]]] [del]
|
||||
|
||||
You can add (or delete) as many PODs at a time (for a single VIP).
|
||||
|
||||
Examples:
|
||||
|
||||
ku pod 90.0.0.0/8 10.0.0.1
|
||||
ku pod 2002::/16 2001::2 2001::3 2001::4
|
||||
|
||||
### Configure SNAT
|
||||
|
||||
ku set interface nat4 in <intfc> [del]
|
||||
|
||||
Set SNAT feature in a specific interface.
|
||||
|
||||
|
||||
## Monitoring
|
||||
|
||||
The plugin provides quite a bunch of counters and information.
|
||||
|
||||
show ku
|
||||
show ku vip verbose
|
||||
show node counters
|
||||
|
||||
|
||||
## Design notes
|
||||
|
||||
### Multi-Threading
|
||||
|
||||
This implementation implement parallelism by using
|
||||
one established-connections table per thread. This is equivalent to assuming
|
||||
that RSS will make a job similar to ECMP, and is pretty useful as threads don't
|
||||
need to get a lock in order to write in the table.
|
||||
|
||||
### Hash Table
|
||||
|
||||
A kube-proxy requires an efficient read and write Hash table. The Hash table
|
||||
used by ip6-forward is very read-efficient, but not so much for writing. In
|
||||
addition, it is not a big deal if writing into the Hash table fails.
|
||||
|
||||
The plugin therefore uses a very specific Hash table.
|
||||
- Fixed (and power of 2) number of buckets (configured at runtime)
|
||||
- Fixed (and power of 2) elements per buckets (configured at compilation time)
|
||||
|
||||
|
@ -1,268 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "POD IS" BPODIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vat/vat.h>
|
||||
#include <vlibapi/api.h>
|
||||
#include <vlibmemory/api.h>
|
||||
#include <vppinfra/error.h>
|
||||
#include <kubeproxy/kp.h>
|
||||
|
||||
#define __plugin_msg_base kp_test_main.msg_id_base
|
||||
#include <vlibapi/vat_helper_macros.h>
|
||||
|
||||
//TODO: Move that to vat/plugin_api.c
|
||||
//////////////////////////
|
||||
uword unformat_ip46_address (unformat_input_t * input, va_list * args)
|
||||
{
|
||||
ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
|
||||
ip46_type_t type = va_arg (*args, ip46_type_t);
|
||||
if ((type != IP46_TYPE_IP6) &&
|
||||
unformat(input, "%U", unformat_ip4_address, &ip46->ip4)) {
|
||||
ip46_address_mask_ip4(ip46);
|
||||
return 1;
|
||||
} else if ((type != IP46_TYPE_IP4) &&
|
||||
unformat(input, "%U", unformat_ip6_address, &ip46->ip6)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
|
||||
{
|
||||
ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
|
||||
u8 *len = va_arg (*args, u8 *);
|
||||
ip46_type_t type = va_arg (*args, ip46_type_t);
|
||||
|
||||
u32 l;
|
||||
if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
|
||||
if (l > 32)
|
||||
return 0;
|
||||
*len = l + 96;
|
||||
ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
|
||||
} else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
|
||||
if (l > 128)
|
||||
return 0;
|
||||
*len = l;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
/////////////////////////
|
||||
|
||||
#define vl_msg_id(n,h) n,
|
||||
typedef enum {
|
||||
#include <kubeproxy/kp.api.h>
|
||||
/* We'll want to know how many messages IDs we need... */
|
||||
VL_MSG_FIRST_AVAILABLE,
|
||||
} vl_msg_id_t;
|
||||
#undef vl_msg_id
|
||||
|
||||
/* define message structures */
|
||||
#define vl_typedefs
|
||||
#include <kubeproxy/kp.api.h>
|
||||
#undef vl_typedefs
|
||||
|
||||
/* declare message handlers for each api */
|
||||
|
||||
#define vl_endianfun /* define message structures */
|
||||
#include <kubeproxy/kp.api.h>
|
||||
#undef vl_endianfun
|
||||
|
||||
/* instantiate all the print functions we know about */
|
||||
#define vl_print(handle, ...)
|
||||
#define vl_printfun
|
||||
#include <kubeproxy/kp.api.h>
|
||||
#undef vl_printfun
|
||||
|
||||
/* Get the API version number. */
|
||||
#define vl_api_version(n,v) static u32 api_version=(v);
|
||||
#include <kubeproxy/kp.api.h>
|
||||
#undef vl_api_version
|
||||
|
||||
typedef struct {
|
||||
/* API message ID base */
|
||||
u16 msg_id_base;
|
||||
vat_main_t *vat_main;
|
||||
} kp_test_main_t;
|
||||
|
||||
kp_test_main_t kp_test_main;
|
||||
|
||||
#define foreach_standard_reply_retval_handler \
|
||||
_(kp_conf_reply) \
|
||||
_(kp_add_del_vip_reply) \
|
||||
_(kp_add_del_pod_reply)
|
||||
|
||||
#define _(n) \
|
||||
static void vl_api_##n##_t_handler \
|
||||
(vl_api_##n##_t * mp) \
|
||||
{ \
|
||||
vat_main_t * vam = kp_test_main.vat_main; \
|
||||
i32 retval = ntohl(mp->retval); \
|
||||
if (vam->async_mode) { \
|
||||
vam->async_errors += (retval < 0); \
|
||||
} else { \
|
||||
vam->retval = retval; \
|
||||
vam->result_ready = 1; \
|
||||
} \
|
||||
}
|
||||
foreach_standard_reply_retval_handler;
|
||||
#undef _
|
||||
|
||||
/*
|
||||
* Table of message reply handlers, must include boilerplate handlers
|
||||
* we just generated
|
||||
*/
|
||||
#define foreach_vpe_api_reply_msg \
|
||||
_(KP_CONF_REPLY, kp_conf_reply) \
|
||||
_(KP_ADD_DEL_VIP_REPLY, kp_add_del_vip_reply) \
|
||||
_(KP_ADD_DEL_POD_REPLY, kp_add_del_pod_reply)
|
||||
|
||||
static int api_kp_conf (vat_main_t * vam)
|
||||
{
|
||||
unformat_input_t *i = vam->input;
|
||||
vl_api_kp_conf_t mps, *mp;
|
||||
int ret;
|
||||
|
||||
if (!unformat(i, "%u %u",
|
||||
&mps.sticky_buckets_per_core,
|
||||
&mps.flow_timeout)) {
|
||||
errmsg ("invalid arguments\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
M(KP_CONF, mp);
|
||||
S(mp);
|
||||
W (ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int api_kp_add_del_vip (vat_main_t * vam)
|
||||
{
|
||||
unformat_input_t * i = vam->input;
|
||||
vl_api_kp_add_del_vip_t mps, *mp;
|
||||
int ret;
|
||||
mps.is_del = 0;
|
||||
mps.is_nat4 = 0;
|
||||
|
||||
if (!unformat(i, "%U",
|
||||
unformat_ip46_prefix, mps.ip_prefix, &mps.prefix_length, IP46_TYPE_ANY)) {
|
||||
errmsg ("invalid prefix\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
if (unformat(i, "nat4")) {
|
||||
mps.is_nat4 = 1;
|
||||
} else if (unformat(i, "nat6")) {
|
||||
mps.is_nat4 = 0;
|
||||
} else {
|
||||
errmsg ("no nat\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
if (!unformat(i, "%d", &mps.new_flows_table_length)) {
|
||||
errmsg ("no table lentgh\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
if (unformat(i, "del")) {
|
||||
mps.is_del = 1;
|
||||
}
|
||||
|
||||
M(KP_ADD_DEL_VIP, mp);
|
||||
S(mp);
|
||||
W (ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int api_kp_add_del_pod (vat_main_t * vam)
|
||||
{
|
||||
unformat_input_t * i = vam->input;
|
||||
vl_api_kp_add_del_pod_t mps, *mp;
|
||||
int ret;
|
||||
mps.is_del = 0;
|
||||
|
||||
if (!unformat(i, "%U %U",
|
||||
unformat_ip46_prefix, mps.vip_ip_prefix, &mps.vip_prefix_length, IP46_TYPE_ANY,
|
||||
unformat_ip46_address, mps.pod_address)) {
|
||||
errmsg ("invalid prefix or address\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
if (unformat(i, "del")) {
|
||||
mps.is_del = 1;
|
||||
}
|
||||
|
||||
M(KP_ADD_DEL_POD, mp);
|
||||
S(mp);
|
||||
W (ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* List of messages that the api test plugin sends,
|
||||
* and that the data plane plugin processes
|
||||
*/
|
||||
#define foreach_vpe_api_msg \
|
||||
_(kp_conf, "<sticky_buckets_per_core> <flow_timeout>") \
|
||||
_(kp_add_del_vip, "<ip-prefix> <port> <target_port> <node_port> " \
|
||||
"[nat4|nat6] <new_table_len> [del]") \
|
||||
_(kp_add_del_pod, "<vip-ip-prefix> <address> [del]")
|
||||
|
||||
static void
|
||||
kp_vat_api_hookup (vat_main_t *vam)
|
||||
{
|
||||
kp_test_main_t * kptm = &kp_test_main;
|
||||
/* Hook up handlers for replies from the data plane plug-in */
|
||||
#define _(N,n) \
|
||||
vl_msg_api_set_handlers((VL_API_##N + kptm->msg_id_base), \
|
||||
#n, \
|
||||
vl_api_##n##_t_handler, \
|
||||
vl_noop_handler, \
|
||||
vl_api_##n##_t_endian, \
|
||||
vl_api_##n##_t_print, \
|
||||
sizeof(vl_api_##n##_t), 1);
|
||||
foreach_vpe_api_reply_msg;
|
||||
#undef _
|
||||
|
||||
/* API messages we can send */
|
||||
#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
|
||||
foreach_vpe_api_msg;
|
||||
#undef _
|
||||
|
||||
/* Help strings */
|
||||
#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
|
||||
foreach_vpe_api_msg;
|
||||
#undef _
|
||||
}
|
||||
|
||||
clib_error_t * vat_plugin_register (vat_main_t *vam)
|
||||
{
|
||||
kp_test_main_t * kptm = &kp_test_main;
|
||||
|
||||
u8 * name;
|
||||
|
||||
kptm->vat_main = vam;
|
||||
|
||||
/* Ask the vpp engine for the first assigned message-id */
|
||||
name = format (0, "kp_%08x%c", api_version, 0);
|
||||
kptm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
|
||||
|
||||
if (kptm->msg_id_base != (u16) ~0)
|
||||
kp_vat_api_hookup (vam);
|
||||
|
||||
vec_free(name);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,216 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Intel and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* vppinfra already includes tons of different hash tables.
|
||||
* MagLev flow table is a bit different. It has to be very efficient
|
||||
* for both writing and reading operations. But it does not need to
|
||||
* be 100% reliable (write can fail). It also needs to recycle
|
||||
* old entries in a lazy way.
|
||||
*
|
||||
* This hash table is the most dummy hash table you can do.
|
||||
* Fixed total size, fixed bucket size.
|
||||
* Advantage is that it could be very efficient (maybe).
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef KP_PLUGIN_KP_KPHASH_H_
|
||||
#define KP_PLUGIN_KP_KPHASH_H_
|
||||
|
||||
#include <vnet/vnet.h>
|
||||
#include <vppinfra/xxhash.h>
|
||||
#include <vppinfra/crc32.h>
|
||||
|
||||
/*
|
||||
* @brief Number of entries per bucket.
|
||||
*/
|
||||
#define KPHASH_ENTRY_PER_BUCKET 4
|
||||
|
||||
#define KP_HASH_DO_NOT_USE_SSE_BUCKETS 0
|
||||
|
||||
/**
|
||||
* 32 bits integer comparison for running values.
|
||||
* 1 > 0 is true. But 1 > 0xffffffff also is.
|
||||
*/
|
||||
#define clib_u32_loop_gt(a, b) (((u32)(a)) - ((u32)(b)) < 0x7fffffff)
|
||||
|
||||
/*
|
||||
* @brief One bucket contains 4 entries.
|
||||
* Each bucket takes one 64B cache line in memory.
|
||||
*/
|
||||
typedef struct {
|
||||
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
|
||||
u32 hash[KPHASH_ENTRY_PER_BUCKET];
|
||||
u32 timeout[KPHASH_ENTRY_PER_BUCKET];
|
||||
u32 vip[KPHASH_ENTRY_PER_BUCKET];
|
||||
u32 value[KPHASH_ENTRY_PER_BUCKET];
|
||||
} kp_hash_bucket_t;
|
||||
|
||||
typedef struct {
|
||||
u32 buckets_mask;
|
||||
u32 timeout;
|
||||
kp_hash_bucket_t buckets[];
|
||||
} kp_hash_t;
|
||||
|
||||
#define kp_hash_nbuckets(h) (((h)->buckets_mask) + 1)
|
||||
#define kp_hash_size(h) ((h)->buckets_mask + KPHASH_ENTRY_PER_BUCKET)
|
||||
|
||||
#define kp_hash_foreach_bucket(h, bucket) \
|
||||
for (bucket = (h)->buckets; \
|
||||
bucket < (h)->buckets + kp_hash_nbuckets(h); \
|
||||
bucket++)
|
||||
|
||||
#define kp_hash_foreach_entry(h, bucket, i) \
|
||||
kp_hash_foreach_bucket(h, bucket) \
|
||||
for (i = 0; i < KPHASH_ENTRY_PER_BUCKET; i++)
|
||||
|
||||
#define kp_hash_foreach_valid_entry(h, bucket, i, now) \
|
||||
kp_hash_foreach_entry(h, bucket, i) \
|
||||
if (!clib_u32_loop_gt((now), bucket->timeout[i]))
|
||||
|
||||
static_always_inline
|
||||
kp_hash_t *kp_hash_alloc(u32 buckets, u32 timeout)
|
||||
{
|
||||
if (!is_pow2(buckets))
|
||||
return NULL;
|
||||
|
||||
// Allocate 1 more bucket for prefetch
|
||||
u32 size = ((u64)&((kp_hash_t *)(0))->buckets[0]) +
|
||||
sizeof(kp_hash_bucket_t) * (buckets + 1);
|
||||
u8 *mem = 0;
|
||||
kp_hash_t *h;
|
||||
vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES);
|
||||
h = (kp_hash_t *)mem;
|
||||
h->buckets_mask = (buckets - 1);
|
||||
h->timeout = timeout;
|
||||
return h;
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
void kp_hash_free(kp_hash_t *h)
|
||||
{
|
||||
u8 *mem = (u8 *)h;
|
||||
vec_free(mem);
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
u32 kp_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
|
||||
{
|
||||
#ifdef clib_crc32c_uses_intrinsics
|
||||
u64 key[5];
|
||||
key[0] = k0;
|
||||
key[1] = k1;
|
||||
key[2] = k2;
|
||||
key[3] = k3;
|
||||
key[4] = k4;
|
||||
return clib_crc32c ((u8 *) key, 40);
|
||||
#else
|
||||
u64 tmp = k0 ^ k1 ^ k2 ^ k3 ^ k4;
|
||||
return (u32)clib_xxhash (tmp);
|
||||
#endif
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
void kp_hash_prefetch_bucket(kp_hash_t *ht, u32 hash)
|
||||
{
|
||||
kp_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask];
|
||||
CLIB_PREFETCH(bucket, sizeof(*bucket), READ);
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
void kp_hash_get(kp_hash_t *ht, u32 hash, u32 vip, u32 time_now,
|
||||
u32 *available_index, u32 *found_value)
|
||||
{
|
||||
kp_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask];
|
||||
*found_value = ~0;
|
||||
*available_index = ~0;
|
||||
#if __SSE4_2__ && KP_HASH_DO_NOT_USE_SSE_BUCKETS == 0
|
||||
u32 bitmask, found_index;
|
||||
__m128i mask;
|
||||
|
||||
// mask[*] = timeout[*] > now
|
||||
mask = _mm_cmpgt_epi32(_mm_loadu_si128 ((__m128i *) bucket->timeout),
|
||||
_mm_set1_epi32 (time_now));
|
||||
// bitmask[*] = now <= timeout[*/4]
|
||||
bitmask = (~_mm_movemask_epi8(mask)) & 0xffff;
|
||||
// Get first index with now <= timeout[*], if any.
|
||||
*available_index = (bitmask)?__builtin_ctz(bitmask)/4:*available_index;
|
||||
|
||||
// mask[*] = (timeout[*] > now) && (hash[*] == hash)
|
||||
mask = _mm_and_si128(mask,
|
||||
_mm_cmpeq_epi32(
|
||||
_mm_loadu_si128 ((__m128i *) bucket->hash),
|
||||
_mm_set1_epi32 (hash)));
|
||||
|
||||
// Load the array of vip values
|
||||
// mask[*] = (timeout[*] > now) && (hash[*] == hash) && (vip[*] == vip)
|
||||
mask = _mm_and_si128(mask,
|
||||
_mm_cmpeq_epi32(
|
||||
_mm_loadu_si128 ((__m128i *) bucket->vip),
|
||||
_mm_set1_epi32 (vip)));
|
||||
|
||||
// mask[*] = (timeout[*x4] > now) && (hash[*x4] == hash) && (vip[*x4] == vip)
|
||||
bitmask = _mm_movemask_epi8(mask);
|
||||
// Get first index, if any
|
||||
found_index = (bitmask)?__builtin_ctzll(bitmask)/4:0;
|
||||
ASSERT(found_index < 4);
|
||||
*found_value = (bitmask)?bucket->value[found_index]:*found_value;
|
||||
bucket->timeout[found_index] =
|
||||
(bitmask)?time_now + ht->timeout:bucket->timeout[found_index];
|
||||
#else
|
||||
u32 i;
|
||||
for (i = 0; i < KPHASH_ENTRY_PER_BUCKET; i++) {
|
||||
u8 cmp = (bucket->hash[i] == hash && bucket->vip[i] == vip);
|
||||
u8 timeouted = clib_u32_loop_gt(time_now, bucket->timeout[i]);
|
||||
*found_value = (cmp || timeouted)?*found_value:bucket->value[i];
|
||||
bucket->timeout[i] = (cmp || timeouted)?time_now + ht->timeout:bucket->timeout[i];
|
||||
*available_index = (timeouted && (*available_index == ~0))?i:*available_index;
|
||||
|
||||
if (!cmp)
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
u32 kp_hash_available_value(kp_hash_t *h, u32 hash, u32 available_index)
|
||||
{
|
||||
return h->buckets[hash & h->buckets_mask].value[available_index];
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
void kp_hash_put(kp_hash_t *h, u32 hash, u32 value, u32 vip,
|
||||
u32 available_index, u32 time_now)
|
||||
{
|
||||
kp_hash_bucket_t *bucket = &h->buckets[hash & h->buckets_mask];
|
||||
bucket->hash[available_index] = hash;
|
||||
bucket->value[available_index] = value;
|
||||
bucket->timeout[available_index] = time_now + h->timeout;
|
||||
bucket->vip[available_index] = vip;
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
u32 kp_hash_elts(kp_hash_t *h, u32 time_now)
|
||||
{
|
||||
u32 tot = 0;
|
||||
kp_hash_bucket_t *bucket;
|
||||
u32 i;
|
||||
kp_hash_foreach_valid_entry(h, bucket, i, time_now) {
|
||||
tot++;
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
#endif /* KP_PLUGIN_KP_KPHASH_H_ */
|
@ -107,33 +107,52 @@ vl_api_lb_add_del_vip_t_handler
|
||||
lb_main_t *lbm = &lb_main;
|
||||
vl_api_lb_conf_reply_t * rmp;
|
||||
int rv = 0;
|
||||
ip46_address_t prefix;
|
||||
memcpy(&prefix.ip6, mp->ip_prefix, sizeof(prefix.ip6));
|
||||
lb_vip_add_args_t args;
|
||||
|
||||
memcpy (&(args.prefix.ip6), mp->ip_prefix, sizeof(args.prefix.ip6));
|
||||
|
||||
if (mp->is_del) {
|
||||
u32 vip_index;
|
||||
if (!(rv = lb_vip_find_index(&prefix, mp->prefix_length, &vip_index)))
|
||||
if (!(rv = lb_vip_find_index(&(args.prefix), mp->prefix_length, &vip_index)))
|
||||
rv = lb_vip_del(vip_index);
|
||||
} else {
|
||||
u32 vip_index;
|
||||
lb_vip_type_t type = 0;
|
||||
|
||||
if (ip46_prefix_is_ip4(&prefix, mp->prefix_length)) {
|
||||
if (ip46_prefix_is_ip4(&(args.prefix), mp->prefix_length)) {
|
||||
if (mp->encap == LB_ENCAP_TYPE_GRE4)
|
||||
type = LB_VIP_TYPE_IP4_GRE4;
|
||||
else if (mp->encap == LB_ENCAP_TYPE_GRE6)
|
||||
type = LB_VIP_TYPE_IP4_GRE6;
|
||||
else if (mp->encap == LB_ENCAP_TYPE_L3DSR)
|
||||
type = LB_VIP_TYPE_IP4_L3DSR;
|
||||
else if (mp->encap == LB_ENCAP_TYPE_NAT4)
|
||||
type = LB_VIP_TYPE_IP4_NAT4;
|
||||
} else {
|
||||
if (mp->encap == LB_ENCAP_TYPE_GRE4)
|
||||
type = LB_VIP_TYPE_IP6_GRE4;
|
||||
else if (mp->encap == LB_ENCAP_TYPE_GRE6)
|
||||
type = LB_VIP_TYPE_IP6_GRE6;
|
||||
else if (mp->encap == LB_ENCAP_TYPE_NAT6)
|
||||
type = LB_VIP_TYPE_IP6_NAT6;
|
||||
}
|
||||
|
||||
rv = lb_vip_add(&prefix, mp->prefix_length, type, mp->dscp,
|
||||
mp->new_flows_table_length, &vip_index);
|
||||
args.plen = mp->prefix_length;
|
||||
args.type = type;
|
||||
args.new_length = mp->new_flows_table_length;
|
||||
|
||||
if (mp->encap == LB_ENCAP_TYPE_L3DSR) {
|
||||
args.encap_args.dscp = (u8)(mp->dscp & 0x3F);
|
||||
}
|
||||
else if ((mp->encap == LB_ENCAP_TYPE_NAT4)
|
||||
||(mp->encap == LB_ENCAP_TYPE_NAT6)) {
|
||||
args.encap_args.srv_type = mp->type;
|
||||
args.encap_args.port = ntohs(mp->port);
|
||||
args.encap_args.target_port = ntohs(mp->target_port);
|
||||
args.encap_args.node_port = ntohs(mp->node_port);
|
||||
}
|
||||
|
||||
rv = lb_vip_add(args, &vip_index);
|
||||
}
|
||||
REPLY_MACRO (VL_API_LB_CONF_REPLY);
|
||||
}
|
||||
@ -146,8 +165,26 @@ static void *vl_api_lb_add_del_vip_t_print
|
||||
s = format (s, "%U ", format_ip46_prefix,
|
||||
(ip46_address_t *)mp->ip_prefix, mp->prefix_length, IP46_TYPE_ANY);
|
||||
|
||||
s = format (s, "%s ", (mp->encap==LB_ENCAP_TYPE_GRE4)?
|
||||
"gre4":(mp->encap==LB_ENCAP_TYPE_GRE6)?"gre6":"l3dsr");
|
||||
s = format (s, "%s ", (mp->encap == LB_ENCAP_TYPE_GRE4)? "gre4"
|
||||
: (mp->encap == LB_ENCAP_TYPE_GRE6)? "gre6"
|
||||
: (mp->encap == LB_ENCAP_TYPE_NAT4)? "nat4"
|
||||
: (mp->encap == LB_ENCAP_TYPE_NAT6)? "nat6"
|
||||
: "l3dsr");
|
||||
|
||||
if (mp->encap==LB_ENCAP_TYPE_L3DSR)
|
||||
{
|
||||
s = format (s, "dscp %u ", mp->dscp);
|
||||
}
|
||||
|
||||
if ((mp->encap==LB_ENCAP_TYPE_NAT4)
|
||||
|| (mp->encap==LB_ENCAP_TYPE_NAT6))
|
||||
{
|
||||
s = format (s, "type %u ", mp->type);
|
||||
s = format (s, "port %u ", mp->port);
|
||||
s = format (s, "target_port %u ", mp->target_port);
|
||||
s = format (s, "node_port %u ", mp->node_port);
|
||||
}
|
||||
|
||||
s = format (s, "%u ", mp->new_flows_table_length);
|
||||
s = format (s, "%s ", mp->is_del?"del":"add");
|
||||
FINISH;
|
||||
@ -161,14 +198,23 @@ vl_api_lb_add_del_as_t_handler
|
||||
vl_api_lb_conf_reply_t * rmp;
|
||||
int rv = 0;
|
||||
u32 vip_index;
|
||||
if ((rv = lb_vip_find_index((ip46_address_t *)mp->vip_ip_prefix,
|
||||
mp->vip_prefix_length, &vip_index)))
|
||||
ip46_address_t vip_ip_prefix;
|
||||
|
||||
memcpy(&vip_ip_prefix.ip6, mp->vip_ip_prefix,
|
||||
sizeof(vip_ip_prefix.ip6));
|
||||
|
||||
ip46_address_t as_address;
|
||||
|
||||
memcpy(&as_address.ip6, mp->as_address,
|
||||
sizeof(as_address.ip6));
|
||||
|
||||
if ((rv = lb_vip_find_index(&vip_ip_prefix, mp->vip_prefix_length, &vip_index)))
|
||||
goto done;
|
||||
|
||||
if (mp->is_del)
|
||||
rv = lb_vip_del_ass(vip_index, (ip46_address_t *)mp->as_address, 1);
|
||||
rv = lb_vip_del_ass(vip_index, &as_address, 1);
|
||||
else
|
||||
rv = lb_vip_add_ass(vip_index, (ip46_address_t *)mp->as_address, 1);
|
||||
rv = lb_vip_add_ass(vip_index, &as_address, 1);
|
||||
|
||||
done:
|
||||
REPLY_MACRO (VL_API_LB_CONF_REPLY);
|
||||
|
@ -21,20 +21,24 @@ lb_vip_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input, vlib_cli_command_t * cmd)
|
||||
{
|
||||
unformat_input_t _line_input, *line_input = &_line_input;
|
||||
ip46_address_t prefix;
|
||||
u8 plen;
|
||||
u32 new_len = 1024;
|
||||
lb_vip_add_args_t args;
|
||||
u8 del = 0;
|
||||
int ret;
|
||||
u32 encap = 0;
|
||||
u32 dscp = ~0;
|
||||
lb_vip_type_t type = 0;
|
||||
u32 srv_type = LB_SRV_TYPE_CLUSTERIP;
|
||||
u32 port = 0;
|
||||
u32 target_port = 0;
|
||||
u32 node_port = 0;
|
||||
clib_error_t *error = 0;
|
||||
|
||||
args.new_length = 1024;
|
||||
|
||||
if (!unformat_user (input, unformat_line_input, line_input))
|
||||
return 0;
|
||||
|
||||
if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY)) {
|
||||
if (!unformat(line_input, "%U", unformat_ip46_prefix, &(args.prefix),
|
||||
&(args.plen), IP46_TYPE_ANY, &(args.plen))) {
|
||||
error = clib_error_return (0, "invalid vip prefix: '%U'",
|
||||
format_unformat_error, line_input);
|
||||
goto done;
|
||||
@ -42,7 +46,7 @@ lb_vip_command_fn (vlib_main_t * vm,
|
||||
|
||||
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
|
||||
{
|
||||
if (unformat(line_input, "new_len %d", &new_len))
|
||||
if (unformat(line_input, "new_len %d", &(args.new_length)))
|
||||
;
|
||||
else if (unformat(line_input, "del"))
|
||||
del = 1;
|
||||
@ -52,8 +56,22 @@ lb_vip_command_fn (vlib_main_t * vm,
|
||||
encap = LB_ENCAP_TYPE_GRE6;
|
||||
else if (unformat(line_input, "encap l3dsr"))
|
||||
encap = LB_ENCAP_TYPE_L3DSR;
|
||||
else if (unformat(line_input, "encap nat4"))
|
||||
encap = LB_ENCAP_TYPE_NAT4;
|
||||
else if (unformat(line_input, "encap nat6"))
|
||||
encap = LB_ENCAP_TYPE_NAT6;
|
||||
else if (unformat(line_input, "dscp %d", &dscp))
|
||||
;
|
||||
else if (unformat(line_input, "type clusterip"))
|
||||
srv_type = LB_SRV_TYPE_CLUSTERIP;
|
||||
else if (unformat(line_input, "type nodeport"))
|
||||
srv_type = LB_SRV_TYPE_NODEPORT;
|
||||
else if (unformat(line_input, "port %d", &port))
|
||||
;
|
||||
else if (unformat(line_input, "target_port %d", &target_port))
|
||||
;
|
||||
else if (unformat(line_input, "node_port %d", &node_port))
|
||||
;
|
||||
else {
|
||||
error = clib_error_return (0, "parse error: '%U'",
|
||||
format_unformat_error, line_input);
|
||||
@ -75,32 +93,61 @@ lb_vip_command_fn (vlib_main_t * vm,
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (ip46_prefix_is_ip4(&prefix, plen)) {
|
||||
if (ip46_prefix_is_ip4(&(args.prefix), (args.plen)))
|
||||
{
|
||||
if (encap == LB_ENCAP_TYPE_GRE4)
|
||||
type = LB_VIP_TYPE_IP4_GRE4;
|
||||
args.type = LB_VIP_TYPE_IP4_GRE4;
|
||||
else if (encap == LB_ENCAP_TYPE_GRE6)
|
||||
type = LB_VIP_TYPE_IP4_GRE6;
|
||||
args.type = LB_VIP_TYPE_IP4_GRE6;
|
||||
else if (encap == LB_ENCAP_TYPE_L3DSR)
|
||||
type = LB_VIP_TYPE_IP4_L3DSR;
|
||||
} else {
|
||||
args.type = LB_VIP_TYPE_IP4_L3DSR;
|
||||
else if (encap == LB_ENCAP_TYPE_NAT4)
|
||||
args.type = LB_VIP_TYPE_IP4_NAT4;
|
||||
else if (encap == LB_ENCAP_TYPE_NAT6)
|
||||
{
|
||||
error = clib_error_return(0, "currently does not support NAT46");
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (encap == LB_ENCAP_TYPE_GRE4)
|
||||
type = LB_VIP_TYPE_IP6_GRE4;
|
||||
args.type = LB_VIP_TYPE_IP6_GRE4;
|
||||
else if (encap == LB_ENCAP_TYPE_GRE6)
|
||||
type = LB_VIP_TYPE_IP6_GRE6;
|
||||
args.type = LB_VIP_TYPE_IP6_GRE6;
|
||||
else if (encap == LB_ENCAP_TYPE_NAT6)
|
||||
args.type = LB_VIP_TYPE_IP6_NAT6;
|
||||
else if (encap == LB_ENCAP_TYPE_NAT4)
|
||||
{
|
||||
error = clib_error_return(0, "currently does not support NAT64");
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
lb_garbage_collection();
|
||||
|
||||
u32 index;
|
||||
if (!del) {
|
||||
if ((ret = lb_vip_add(&prefix, plen, type, (u8)(dscp & 0x3F), new_len, &index))) {
|
||||
if (encap == LB_ENCAP_TYPE_L3DSR) {
|
||||
args.encap_args.dscp = (u8)(dscp & 0x3F);
|
||||
}
|
||||
else if ((encap == LB_ENCAP_TYPE_NAT4)
|
||||
|| (encap == LB_ENCAP_TYPE_NAT6))
|
||||
{
|
||||
args.encap_args.srv_type = (u8) srv_type;
|
||||
args.encap_args.port = (u16) port;
|
||||
args.encap_args.target_port = (u16) target_port;
|
||||
args.encap_args.node_port = (u16) node_port;
|
||||
}
|
||||
|
||||
if ((ret = lb_vip_add(args, &index))) {
|
||||
error = clib_error_return (0, "lb_vip_add error %d", ret);
|
||||
goto done;
|
||||
} else {
|
||||
vlib_cli_output(vm, "lb_vip_add ok %d", index);
|
||||
}
|
||||
} else {
|
||||
if ((ret = lb_vip_find_index(&prefix, plen, &index))) {
|
||||
if ((ret = lb_vip_find_index(&(args.prefix), args.plen, &index))) {
|
||||
error = clib_error_return (0, "lb_vip_find_index error %d", ret);
|
||||
goto done;
|
||||
} else if ((ret = lb_vip_del(index))) {
|
||||
@ -118,7 +165,10 @@ done:
|
||||
VLIB_CLI_COMMAND (lb_vip_command, static) =
|
||||
{
|
||||
.path = "lb vip",
|
||||
.short_help = "lb vip <prefix> [encap (gre6|gre4|l3dsr)] [dscp <n>] [new_len <n>] [del]",
|
||||
.short_help = "lb vip <prefix> [encap (gre6|gre4|l3dsr|nat4|nat6)] "
|
||||
"[dscp <n>] "
|
||||
"[type (nodeport|clusterip) port <n> target_port <n> node_port <n>] "
|
||||
"[new_len <n>] [del]",
|
||||
.function = lb_vip_command_fn,
|
||||
};
|
||||
|
||||
@ -300,6 +350,99 @@ VLIB_CLI_COMMAND (lb_show_vips_command, static) =
|
||||
.function = lb_show_vips_command_fn,
|
||||
};
|
||||
|
||||
static clib_error_t *
|
||||
lb_set_interface_nat_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input,
|
||||
vlib_cli_command_t * cmd,
|
||||
u8 is_nat6)
|
||||
{
|
||||
unformat_input_t _line_input, *line_input = &_line_input;
|
||||
vnet_main_t * vnm = vnet_get_main();
|
||||
clib_error_t * error = 0;
|
||||
u32 * sw_if_index = 0;
|
||||
u32 * inside_sw_if_indices = 0;
|
||||
int is_del = 0;
|
||||
|
||||
/* Get a line of input. */
|
||||
if (!unformat_user (input, unformat_line_input, line_input))
|
||||
return 0;
|
||||
|
||||
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
|
||||
{
|
||||
if (unformat (line_input, "in %U", unformat_vnet_sw_interface,
|
||||
vnm, sw_if_index))
|
||||
vec_add1 (inside_sw_if_indices, *sw_if_index);
|
||||
else if (unformat (line_input, "del"))
|
||||
is_del = 1;
|
||||
else
|
||||
{
|
||||
error = clib_error_return (0, "unknown input '%U'",
|
||||
format_unformat_error, line_input);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
vec_foreach (sw_if_index, inside_sw_if_indices)
|
||||
{
|
||||
if (!is_nat6)
|
||||
{
|
||||
if (lb_nat4_interface_add_del (*sw_if_index, is_del))
|
||||
{
|
||||
error = clib_error_return(
|
||||
0, "%s %U failed", is_del ? "del" : "add",
|
||||
format_vnet_sw_interface_name, vnm,
|
||||
vnet_get_sw_interface (vnm, *sw_if_index));
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lb_nat6_interface_add_del (*sw_if_index, is_del))
|
||||
{
|
||||
error = clib_error_return(
|
||||
0, "%s %U failed", is_del ? "del" : "add",
|
||||
format_vnet_sw_interface_name, vnm,
|
||||
vnet_get_sw_interface (vnm, *sw_if_index));
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
unformat_free (line_input);
|
||||
vec_free (inside_sw_if_indices);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static clib_error_t *
|
||||
lb_set_interface_nat4_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input,
|
||||
vlib_cli_command_t * cmd)
|
||||
{
|
||||
return lb_set_interface_nat_command_fn(vm, input, cmd, 0);
|
||||
}
|
||||
|
||||
VLIB_CLI_COMMAND (lb_set_interface_nat4_command, static) = {
|
||||
.path = "lb set interface nat4",
|
||||
.function = lb_set_interface_nat4_command_fn,
|
||||
.short_help = "lb set interface nat4 in <intfc> [del]",
|
||||
};
|
||||
|
||||
static clib_error_t *
|
||||
lb_set_interface_nat6_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input,
|
||||
vlib_cli_command_t * cmd)
|
||||
{
|
||||
return lb_set_interface_nat_command_fn(vm, input, cmd, 1);
|
||||
}
|
||||
|
||||
VLIB_CLI_COMMAND (lb_set_interface_nat6_command, static) = {
|
||||
.path = "lb set interface nat6",
|
||||
.function = lb_set_interface_nat6_command_fn,
|
||||
.short_help = "lb set interface nat6 in <intfc> [del]",
|
||||
};
|
||||
|
||||
static clib_error_t *
|
||||
lb_flowtable_flush_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input, vlib_cli_command_t * cmd)
|
||||
|
@ -3,9 +3,9 @@ option version = "1.0.0";
|
||||
/** \brief Configure Load-Balancer global parameters
|
||||
@param client_index - opaque cookie to identify the sender
|
||||
@param context - sender context, to match reply w/ request
|
||||
@param ip4_src_address - IPv4 address to be used as source for IPv4 GRE traffic.
|
||||
@param ip6_src_address - IPv6 address to be used as source for IPv6 GRE traffic.
|
||||
@param n_sticky_buckets - Number of buckets *per worker thread* in the
|
||||
@param ip4_src_address - IPv4 address to be used as source for IPv4 traffic(applicable in GRE4/GRE6/NAT4/NAT6 mode only).
|
||||
@param ip6_src_address - IPv6 address to be used as source for IPv6 traffic(applicable in GRE4/GRE6/NAT4/NAT6 mode only).
|
||||
@param sticky_buckets_per_core - Number of buckets *per worker thread* in the
|
||||
established flow table (must be power of 2).
|
||||
@param flow_timeout - Time in seconds after which, if no packet is received
|
||||
for a given flow, the flow is removed from the established flow table.
|
||||
@ -25,8 +25,12 @@ autoreply define lb_conf
|
||||
@param context - sender context, to match reply w/ request
|
||||
@param ip_prefix - IP address (IPv4 in lower order 32 bits).
|
||||
@param prefix_length - IP prefix length (96 + 'IPv4 prefix length' for IPv4).
|
||||
@param encap - Encap is ip4 GRE(0) or ip6 GRE(1) or L3DSR(2).
|
||||
@param encap - Encap is ip4 GRE(0) or ip6 GRE(1) or L3DSR(2) or NAT4(3) or NAT6(4).
|
||||
@param dscp - DSCP bit corresponding to VIP(applicable in L3DSR mode only).
|
||||
@param type - service type(applicable in NAT4/NAT6 mode only).
|
||||
@param port - service port(applicable in NAT4/NAT6 mode only).
|
||||
@param target_port - Pod's port corresponding to specific service(applicable in NAT4/NAT6 mode only).
|
||||
@param node_port - Node's port(applicable in NAT4/NAT6 mode only).
|
||||
@param new_flows_table_length - Size of the new connections flow table used
|
||||
for this VIP (must be power of 2).
|
||||
@param is_del - The VIP should be removed.
|
||||
@ -38,6 +42,10 @@ autoreply define lb_add_del_vip {
|
||||
u8 prefix_length;
|
||||
u8 encap;
|
||||
u8 dscp;
|
||||
u8 type;
|
||||
u16 port;
|
||||
u16 target_port;
|
||||
u16 node_port;
|
||||
u32 new_flows_table_length;
|
||||
u8 is_del;
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -29,6 +29,32 @@ Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must be using
|
||||
the same encap. type (i.e. IPv4+GRE or IPv6+GRE or IPv4+L3DSR).
|
||||
Meaning that for a given VIP, all AS addresses must be of the same family.
|
||||
|
||||
3). IPv4/IPv6 + NAT4/NAT6 encap types:
|
||||
This type provides kube-proxy data plane on user space,
|
||||
which is used to replace linux kernal's kube-proxy based on iptables.
|
||||
|
||||
Currently, load balancer plugin supports three service types:
|
||||
a) Cluster IP plus Port: support any protocols, including TCP, UDP.
|
||||
b) Node IP plus Node Port: currently only support UDP.
|
||||
c) External Load Balancer.
|
||||
|
||||
For Cluster IP plus Port case:
|
||||
kube-proxy is configured with a set of Virtual IPs (VIP, which can be
|
||||
prefixes), and for each VIP, with a set of AS addresses (ASs).
|
||||
|
||||
For a specific session received for a given VIP (or VIP prefix),
|
||||
first packet selects a AS according to internal load balancing algorithm,
|
||||
then does DNAT operation and sent to chosen AS.
|
||||
At the same time, will create a session entry to store AS chosen result.
|
||||
Following packets for that session will look up session table first,
|
||||
which ensures that a given session will always be routed to the same AS.
|
||||
|
||||
For returned packet from AS, it will do SNAT operation and sent out.
|
||||
|
||||
Please refer to below for details:
|
||||
https://schd.ws/hosted_files/ossna2017/1e/VPP_K8S_GTPU_OSSNA.pdf
|
||||
|
||||
|
||||
## Performances
|
||||
|
||||
The load balancer has been tested up to 1 millions flows and still forwards more
|
||||
@ -45,9 +71,11 @@ The load balancer needs to be configured with some parameters:
|
||||
lb conf [ip4-src-address <addr>] [ip6-src-address <addr>]
|
||||
[buckets <n>] [timeout <s>]
|
||||
|
||||
ip4-src-address: the source address used to send encap. packets using IPv4.
|
||||
ip4-src-address: the source address used to send encap. packets using IPv4 for GRE4 mode.
|
||||
or Node IP4 address for NAT4 mode.
|
||||
|
||||
ip6-src-address: the source address used to send encap. packets using IPv6.
|
||||
ip6-src-address: the source address used to send encap. packets using IPv6 for GRE6 mode.
|
||||
or Node IP6 address for NAT6 mode.
|
||||
|
||||
buckets: the *per-thread* established-connexions-table number of buckets.
|
||||
|
||||
@ -57,13 +85,15 @@ timeout: the number of seconds a connection will remain in the
|
||||
|
||||
### Configure the VIPs
|
||||
|
||||
lb vip <prefix> [encap (gre6|gre4|l3dsr)] [dscp <n>] [new_len <n>] [del]
|
||||
lb vip <prefix> [encap (gre6|gre4|l3dsr|nat4|nat6)] \
|
||||
[dscp <n>] [port <n> target_port <n> node_port <n>] [new_len <n>] [del]
|
||||
|
||||
new_len is the size of the new-connection-table. It should be 1 or 2 orders of
|
||||
magnitude bigger than the number of ASs for the VIP in order to ensure a good
|
||||
load balancing.
|
||||
Encap l3dsr and dscp is used to map VIP to dscp bit and rewrite DSCP bit in packets.
|
||||
So the selected server could get VIP from DSCP bit in this packet and perform DSR.
|
||||
Encap nat4/nat6 and port/target_port/node_port is used to do kube-proxy data plane.
|
||||
|
||||
Examples:
|
||||
|
||||
@ -72,6 +102,8 @@ Examples:
|
||||
lb vip 80.0.0.0/8 encap gre6 new_len 16
|
||||
lb vip 90.0.0.0/8 encap gre4 new_len 1024
|
||||
lb vip 100.0.0.0/8 encap l3dsr dscp 2 new_len 32
|
||||
lb vip 90.1.2.1/32 encap nat4 port 3306 target_port 3307 node_port 30964 new_len 1024
|
||||
lb vip 2004::/16 encap nat6 port 6306 target_port 6307 node_port 30966 new_len 1024
|
||||
|
||||
### Configure the ASs (for each VIP)
|
||||
|
||||
@ -87,7 +119,17 @@ Examples:
|
||||
lb as 80.0.0.0/8 2001::2
|
||||
lb as 90.0.0.0/8 10.0.0.1
|
||||
|
||||
### Configure SNAT
|
||||
|
||||
lb set interface nat4 in <intfc> [del]
|
||||
|
||||
Set SNAT feature in a specific interface.
|
||||
(applicable in NAT4 mode only)
|
||||
|
||||
lb set interface nat6 in <intfc> [del]
|
||||
|
||||
Set SNAT feature in a specific interface.
|
||||
(applicable in NAT6 mode only)
|
||||
|
||||
## Monitoring
|
||||
|
||||
|
@ -171,6 +171,10 @@ static int api_lb_add_del_vip (vat_main_t * vam)
|
||||
mps.encap = LB_ENCAP_TYPE_GRE6;
|
||||
} else if (unformat(i, "l3dsr")) {
|
||||
mps.encap = LB_ENCAP_TYPE_L3DSR;
|
||||
} else if (unformat(i, "nat4")) {
|
||||
mps.encap = LB_ENCAP_TYPE_NAT4;
|
||||
} else if (unformat(i, "nat6")) {
|
||||
mps.encap = LB_ENCAP_TYPE_NAT6;
|
||||
} else {
|
||||
errmsg ("no encap\n");
|
||||
return -99;
|
||||
@ -221,7 +225,9 @@ static int api_lb_add_del_as (vat_main_t * vam)
|
||||
*/
|
||||
#define foreach_vpe_api_msg \
|
||||
_(lb_conf, "<ip4-src-addr> <ip6-src-address> <sticky_buckets_per_core> <flow_timeout>") \
|
||||
_(lb_add_del_vip, "<ip-prefix> [gre4|gre6] <new_table_len> [del]") \
|
||||
_(lb_add_del_vip, "<ip-prefix> [gre4|gre6|l3dsr|nat4|nat6] " \
|
||||
"<dscp> <port> <target_port> <node_port> " \
|
||||
"<new_table_len> [del]") \
|
||||
_(lb_add_del_as, "<vip-ip-prefix> <address> [del]")
|
||||
|
||||
static void
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,207 +0,0 @@
|
||||
import socket
|
||||
import unittest
|
||||
|
||||
from scapy.layers.inet import IP, UDP
|
||||
from scapy.layers.inet6 import IPv6
|
||||
from scapy.layers.l2 import Ether
|
||||
from scapy.packet import Raw
|
||||
|
||||
from framework import VppTestCase, running_extended_tests
|
||||
from util import ppp
|
||||
|
||||
""" TestKP is a subclass of VPPTestCase classes.
|
||||
|
||||
TestKP class defines Four NAT test case for:
|
||||
- IP4 to IP4 NAT
|
||||
- IP4 to IP6 NAT
|
||||
- IP6 to IP4 NAT
|
||||
- IP6 to IP6 NAT
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class TestKP(VppTestCase):
|
||||
""" Kube-proxy Test Case """
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
super(TestKP, cls).setUpClass()
|
||||
cls.pods = range(5)
|
||||
cls.packets = range(5)
|
||||
|
||||
try:
|
||||
cls.create_pg_interfaces(range(2))
|
||||
cls.interfaces = list(cls.pg_interfaces)
|
||||
|
||||
for i in cls.interfaces:
|
||||
i.admin_up()
|
||||
i.config_ip4()
|
||||
i.config_ip6()
|
||||
i.disable_ipv6_ra()
|
||||
i.resolve_arp()
|
||||
i.resolve_ndp()
|
||||
dst4 = socket.inet_pton(socket.AF_INET, "10.0.0.0")
|
||||
dst6 = socket.inet_pton(socket.AF_INET6, "2002::")
|
||||
cls.vapi.ip_add_del_route(dst4, 24, cls.pg1.remote_ip4n)
|
||||
cls.vapi.ip_add_del_route(dst6, 16, cls.pg1.remote_ip6n, is_ipv6=1)
|
||||
except Exception:
|
||||
super(TestKP, cls).tearDownClass()
|
||||
raise
|
||||
|
||||
def tearDown(self):
|
||||
super(TestKP, self).tearDown()
|
||||
if not self.vpp_dead:
|
||||
self.logger.info(self.vapi.cli("show ku vip verbose"))
|
||||
|
||||
def getIPv4Flow(self, id):
|
||||
return (IP(dst="90.0.%u.%u" % (id / 255, id % 255),
|
||||
src="40.0.%u.%u" % (id / 255, id % 255)) /
|
||||
UDP(sport=10000 + id, dport=3306))
|
||||
|
||||
def getIPv6Flow(self, id):
|
||||
return (IPv6(dst="2001::%u" % (id), src="fd00:f00d:ffff::%u" % (id)) /
|
||||
UDP(sport=10000 + id, dport=3306))
|
||||
|
||||
def generatePackets(self, src_if, isv4):
|
||||
self.reset_packet_infos()
|
||||
pkts = []
|
||||
for pktid in self.packets:
|
||||
info = self.create_packet_info(src_if, self.pg1)
|
||||
payload = self.info_to_payload(info)
|
||||
ip = self.getIPv4Flow(pktid) if isv4 else self.getIPv6Flow(pktid)
|
||||
packet = (Ether(dst=src_if.local_mac, src=src_if.remote_mac) /
|
||||
ip /
|
||||
Raw(payload))
|
||||
self.extend_packet(packet, 128)
|
||||
info.data = packet.copy()
|
||||
pkts.append(packet)
|
||||
return pkts
|
||||
|
||||
def checkInner(self, udp):
|
||||
self.assertEqual(udp.dport, 3307)
|
||||
|
||||
def checkCapture(self, nat4, isv4):
|
||||
self.pg0.assert_nothing_captured()
|
||||
out = self.pg1.get_capture(len(self.packets))
|
||||
|
||||
load = [0] * len(self.pods)
|
||||
self.info = None
|
||||
for p in out:
|
||||
try:
|
||||
podid = 0
|
||||
udp = None
|
||||
if nat4:
|
||||
ip = p[IP]
|
||||
podid = int(ip.dst.split(".")[3])
|
||||
self.assertEqual(ip.version, 4)
|
||||
self.assertEqual(ip.flags, 0)
|
||||
self.assertEqual(ip.dst, "10.0.0.%u" % podid)
|
||||
self.assertEqual(ip.proto, 17)
|
||||
self.assertEqual(len(ip.options), 0)
|
||||
self.assertGreaterEqual(ip.ttl, 63)
|
||||
udp = p[UDP]
|
||||
else:
|
||||
ip = p[IPv6]
|
||||
podid = ip.dst.split(":")
|
||||
podid = podid[len(podid) - 1]
|
||||
podid = 0 if podid == "" else int(podid)
|
||||
self.assertEqual(ip.version, 6)
|
||||
self.assertEqual(ip.tc, 0)
|
||||
self.assertEqual(ip.fl, 0)
|
||||
self.assertEqual(
|
||||
socket.inet_pton(socket.AF_INET6, ip.dst),
|
||||
socket.inet_pton(socket.AF_INET6, "2002::%u" % podid)
|
||||
)
|
||||
self.assertEqual(ip.nh, 17)
|
||||
self.assertGreaterEqual(ip.hlim, 63)
|
||||
udp = UDP(str(p[IPv6].payload))
|
||||
# self.assertEqual(len(ip.options), 0)
|
||||
self.checkInner(udp)
|
||||
load[podid] += 1
|
||||
except:
|
||||
self.logger.error(ppp("Unexpected or invalid packet:", p))
|
||||
raise
|
||||
|
||||
# This is just to roughly check that the balancing algorithm
|
||||
# is not completly biased.
|
||||
for podid in self.pods:
|
||||
if load[podid] < len(self.packets) / (len(self.pods) * 2):
|
||||
self.log(
|
||||
"Pod isn't balanced: load[%d] = %d" % (podid, load[podid]))
|
||||
raise Exception("Kube-proxy algorithm is biased")
|
||||
|
||||
def test_kp_ip4_nat4(self):
|
||||
""" Kube-proxy NAT44 """
|
||||
try:
|
||||
self.vapi.cli("ku vip 90.0.0.0/8 port 3306 target_port 3307 nat4")
|
||||
for podid in self.pods:
|
||||
self.vapi.cli("ku pod 90.0.0.0/8 10.0.0.%u" % (podid))
|
||||
|
||||
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
|
||||
self.pg_enable_capture(self.pg_interfaces)
|
||||
self.pg_start()
|
||||
self.checkCapture(nat4=True, isv4=True)
|
||||
|
||||
finally:
|
||||
for podid in self.pods:
|
||||
self.vapi.cli("ku pod 90.0.0.0/8 10.0.0.%u del" % (podid))
|
||||
self.vapi.cli("ku vip 90.0.0.0/8 nat4 del")
|
||||
self.vapi.cli("test kube-proxy flowtable flush")
|
||||
|
||||
@unittest.skip("this test is broken")
|
||||
def test_kp_ip6_nat4(self):
|
||||
""" Kube-proxy NAT64 """
|
||||
|
||||
try:
|
||||
self.vapi.cli("ku vip 90.0.0.0/8 port 3306 target_port 3307 nat4")
|
||||
for podid in self.pods:
|
||||
self.vapi.cli("ku pod 2001::/16 10.0.0.%u" % (podid))
|
||||
|
||||
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=False))
|
||||
self.pg_enable_capture(self.pg_interfaces)
|
||||
self.pg_start()
|
||||
|
||||
self.checkCapture(nat4=True, isv4=False)
|
||||
finally:
|
||||
for podid in self.pods:
|
||||
self.vapi.cli("ku pod 2001::/16 10.0.0.%u del" % (podid))
|
||||
self.vapi.cli("ku vip 2001::/16 nat4 del")
|
||||
self.vapi.cli("test kube-proxy flowtable flush")
|
||||
|
||||
@unittest.skip("this test is broken")
|
||||
def test_kp_ip4_nat6(self):
|
||||
""" Kube-proxy NAT46 """
|
||||
try:
|
||||
self.vapi.cli("ku vip 90.0.0.0/8 port 3306 target_port 3307 nat6")
|
||||
for podid in self.pods:
|
||||
self.vapi.cli("ku pod 90.0.0.0/8 2002::%u" % (podid))
|
||||
|
||||
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
|
||||
self.pg_enable_capture(self.pg_interfaces)
|
||||
self.pg_start()
|
||||
|
||||
self.checkCapture(nat4=False, isv4=True)
|
||||
finally:
|
||||
for podid in self.pods:
|
||||
self.vapi.cli("ku pod 90.0.0.0/8 2002::%u del" % (podid))
|
||||
self.vapi.cli("ku vip 90.0.0.0/8 nat6 del")
|
||||
self.vapi.cli("test kube-proxy flowtable flush")
|
||||
|
||||
@unittest.skipUnless(running_extended_tests(), "part of extended tests")
|
||||
def test_kp_ip6_nat6(self):
|
||||
""" Kube-proxy NAT66 """
|
||||
try:
|
||||
self.vapi.cli("ku vip 2001::/16 port 3306 target_port 3307 nat6")
|
||||
for podid in self.pods:
|
||||
self.vapi.cli("ku pod 2001::/16 2002::%u" % (podid))
|
||||
|
||||
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=False))
|
||||
self.pg_enable_capture(self.pg_interfaces)
|
||||
self.pg_start()
|
||||
|
||||
self.checkCapture(nat4=False, isv4=False)
|
||||
finally:
|
||||
for podid in self.pods:
|
||||
self.vapi.cli("ku pod 2001::/16 2002::%u del" % (podid))
|
||||
self.vapi.cli("ku vip 2001::/16 nat6 del")
|
||||
self.vapi.cli("test kube-proxy flowtable flush")
|
@ -16,6 +16,8 @@ from util import ppp
|
||||
- IP6 to GRE4 encap
|
||||
- IP6 to GRE6 encap
|
||||
- IP4 to L3DSR encap
|
||||
- IP4 to NAT4 encap
|
||||
- IP6 to NAT6 encap
|
||||
|
||||
As stated in comments below, GRE has issues with IPv6.
|
||||
All test cases involving IPv6 are executed, but
|
||||
@ -135,7 +137,7 @@ class TestLB(VppTestCase):
|
||||
# self.assertEqual(len(ip.options), 0)
|
||||
gre = GRE(str(p[IPv6].payload))
|
||||
self.checkInner(gre, isv4)
|
||||
if (encap == 'l3dsr'):
|
||||
elif (encap == 'l3dsr'):
|
||||
ip = p[IP]
|
||||
asid = int(ip.dst.split(".")[3])
|
||||
self.assertEqual(ip.version, 4)
|
||||
@ -143,6 +145,33 @@ class TestLB(VppTestCase):
|
||||
self.assertEqual(ip.dst, "10.0.0.%u" % asid)
|
||||
self.assertEqual(ip.tos, 0x1c)
|
||||
self.assertEqual(len(ip.options), 0)
|
||||
elif (encap == 'nat4'):
|
||||
ip = p[IP]
|
||||
asid = int(ip.dst.split(".")[3])
|
||||
self.assertEqual(ip.version, 4)
|
||||
self.assertEqual(ip.flags, 0)
|
||||
self.assertEqual(ip.dst, "10.0.0.%u" % asid)
|
||||
self.assertEqual(ip.proto, 17)
|
||||
self.assertEqual(len(ip.options), 0)
|
||||
self.assertGreaterEqual(ip.ttl, 63)
|
||||
udp = p[UDP]
|
||||
self.assertEqual(udp.dport, 3307)
|
||||
elif (encap == 'nat6'):
|
||||
ip = p[IPv6]
|
||||
asid = ip.dst.split(":")
|
||||
asid = asid[len(asid) - 1]
|
||||
asid = 0 if asid == "" else int(asid)
|
||||
self.assertEqual(ip.version, 6)
|
||||
self.assertEqual(ip.tc, 0)
|
||||
self.assertEqual(ip.fl, 0)
|
||||
self.assertEqual(
|
||||
socket.inet_pton(socket.AF_INET6, ip.dst),
|
||||
socket.inet_pton(socket.AF_INET6, "2002::%u" % asid)
|
||||
)
|
||||
self.assertEqual(ip.nh, 17)
|
||||
self.assertGreaterEqual(ip.hlim, 63)
|
||||
udp = UDP(str(p[IPv6].payload))
|
||||
self.assertEqual(udp.dport, 3307)
|
||||
load[asid] += 1
|
||||
except:
|
||||
self.logger.error(ppp("Unexpected or invalid packet:", p))
|
||||
@ -246,3 +275,43 @@ class TestLB(VppTestCase):
|
||||
self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u del" % (asid))
|
||||
self.vapi.cli("lb vip 90.0.0.0/8 encap l3dsr dscp 7 del")
|
||||
self.vapi.cli("test lb flowtable flush")
|
||||
|
||||
def test_lb_ip4_nat4(self):
|
||||
""" Load Balancer IP4 NAT4 """
|
||||
try:
|
||||
self.vapi.cli("lb vip 90.0.0.0/8 encap nat4"
|
||||
" type clusterip port 3306 target_port 3307")
|
||||
for asid in self.ass:
|
||||
self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u" % (asid))
|
||||
|
||||
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
|
||||
self.pg_enable_capture(self.pg_interfaces)
|
||||
self.pg_start()
|
||||
self.checkCapture(encap='nat4', isv4=True)
|
||||
|
||||
finally:
|
||||
for asid in self.ass:
|
||||
self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u del" % (asid))
|
||||
self.vapi.cli("lb vip 90.0.0.0/8 encap nat4"
|
||||
" type clusterip port 3306 target_port 3307 del")
|
||||
self.vapi.cli("test lb flowtable flush")
|
||||
|
||||
def test_lb_ip6_nat6(self):
|
||||
""" Load Balancer IP6 NAT6 """
|
||||
try:
|
||||
self.vapi.cli("lb vip 2001::/16 encap nat6"
|
||||
" type clusterip port 3306 target_port 3307")
|
||||
for asid in self.ass:
|
||||
self.vapi.cli("lb as 2001::/16 2002::%u" % (asid))
|
||||
|
||||
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=False))
|
||||
self.pg_enable_capture(self.pg_interfaces)
|
||||
self.pg_start()
|
||||
self.checkCapture(encap='nat6', isv4=False)
|
||||
|
||||
finally:
|
||||
for asid in self.ass:
|
||||
self.vapi.cli("lb as 2001::/16 2002::%u del" % (asid))
|
||||
self.vapi.cli("lb vip 2001::/16 encap nat6"
|
||||
" type clusterip port 3306 target_port 3307 del")
|
||||
self.vapi.cli("test lb flowtable flush")
|
||||
|
Reference in New Issue
Block a user