VPP-130: MagLev-like Load Balancer
This plugin provides load balancing for VPP in a way that is largely inspired from Google's MagLev: http://research.google.com/pubs/pub44824.html More info in the README.md Change-Id: I1223f495d5c2d5200808a398504119f2830337e9 Signed-off-by: Pierre Pfister <ppfister@cisco.com>
This commit is contained in:
Pierre Pfister
committed by
Dave Barach
parent
3590ac5881
commit
041eacc816
@ -47,3 +47,7 @@ endif
|
||||
if ENABLE_ila_PLUGIN
|
||||
SUBDIRS += ila-plugin
|
||||
endif
|
||||
|
||||
if ENABLE_lb_PLUGIN
|
||||
SUBDIRS += lb-plugin
|
||||
endif
|
||||
|
@ -57,6 +57,7 @@ PLUGIN_ENABLED(sixrd)
|
||||
PLUGIN_ENABLED(ioam)
|
||||
PLUGIN_ENABLED(snat)
|
||||
PLUGIN_ENABLED(ila)
|
||||
PLUGIN_ENABLED(lb)
|
||||
|
||||
# Disabled plugins, require --enable-XXX-plugin
|
||||
PLUGIN_DISABLED(vcgn)
|
||||
|
42
plugins/lb-plugin/Makefile.am
Normal file
42
plugins/lb-plugin/Makefile.am
Normal file
@ -0,0 +1,42 @@
|
||||
# Copyright (c) 2016 Cisco Systems, Inc.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at:
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
AUTOMAKE_OPTIONS = foreign subdir-objects
|
||||
|
||||
AM_CFLAGS = -Wall
|
||||
AM_LDFLAGS = -module -shared -avoid-version
|
||||
|
||||
vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins
|
||||
vpppluginsdir = ${libdir}/vpp_plugins
|
||||
|
||||
vppapitestplugins_LTLIBRARIES = lb_test_plugin.la
|
||||
vppplugins_LTLIBRARIES = lb_plugin.la
|
||||
|
||||
lb_plugin_la_SOURCES = lb/lb.c lb/node.c lb/cli.c lb/util.c lb/refcount.c lb/api.c
|
||||
|
||||
SUFFIXES = .api.h .api
|
||||
|
||||
%.api.h: %.api
|
||||
mkdir -p `dirname $@` ; \
|
||||
$(CC) $(CPPFLAGS) -E -P -C -x c $^ \
|
||||
| vppapigen --input - --output $@ --show-name $@
|
||||
|
||||
noinst_HEADERS = lb/lb.h lb/util.h lb/refcount.h lb/lbhash.h lb/lb.api.h
|
||||
|
||||
lb_test_plugin_la_SOURCES = \
|
||||
lb/lb_test.c lb/lb_plugin.api.h
|
||||
|
||||
# Remove *.la files
|
||||
install-data-hook:
|
||||
@(cd $(vpppluginsdir) && $(RM) $(vppplugins_LTLIBRARIES))
|
||||
@(cd $(vppapitestpluginsdir) && $(RM) $(vppapitestplugins_LTLIBRARIES))
|
141
plugins/lb-plugin/README.md
Normal file
141
plugins/lb-plugin/README.md
Normal file
@ -0,0 +1,141 @@
|
||||
# Load Balancer plugin for VPP
|
||||
|
||||
## Version
|
||||
|
||||
The load balancer plugin is currently in *beta* version.
|
||||
Both CLIs and APIs are subject to *heavy* changes.
|
||||
Wich also means feedback is really welcome regarding features, apis, etc...
|
||||
|
||||
## Overview
|
||||
|
||||
This plugin provides load balancing for VPP in a way that is largely inspired
|
||||
from Google's MagLev: http://research.google.com/pubs/pub44824.html
|
||||
|
||||
The load balancer is configured with a set of Virtual IPs (VIP, which can be
|
||||
prefixes), and for each VIP, with a set of Application Server addresses (ASs).
|
||||
|
||||
Traffic received for a given VIP (or VIP prefix) is tunneled using GRE towards
|
||||
the different ASs in a way that (tries to) ensure that a given session will
|
||||
always be tunneled to the same AS.
|
||||
|
||||
Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must be using
|
||||
the same encap. type (i.e. IPv4+GRE or IPv6+GRE). Meaning that for a given VIP,
|
||||
all AS addresses must be of the same family.
|
||||
|
||||
## Performances
|
||||
|
||||
The load balancer has been tested up to 1 millions flows and still forwards more
|
||||
than 3Mpps per core in such circumstances.
|
||||
Although 3Mpps seems already good, it is likely that performances will be improved
|
||||
in next versions.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Global LB parameters
|
||||
|
||||
The load balancer needs to be configured with some parameters:
|
||||
|
||||
lb conf [ip4-src-address <addr>] [ip6-src-address <addr>]
|
||||
[buckets <n>] [timeout <s>]
|
||||
|
||||
ip4-src-address: the source address used to send encap. packets using IPv4.
|
||||
|
||||
ip6-src-address: the source address used to send encap. packets using IPv6.
|
||||
|
||||
buckets: the *per-thread* established-connexions-table number of buckets.
|
||||
|
||||
timeout: the number of seconds a connection will remain in the
|
||||
established-connexions-table while no packet for this flow
|
||||
is received.
|
||||
|
||||
|
||||
### Configure the VIPs
|
||||
|
||||
lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del]
|
||||
|
||||
new_len is the size of the new-connection-table. It should be 1 or 2 orders of
|
||||
magnitude bigger than the number of ASs for the VIP in order to ensure a good
|
||||
load balancing.
|
||||
|
||||
Examples:
|
||||
|
||||
lb vip 2002::/16 encap gre6 new_len 1024
|
||||
lb vip 2003::/16 encap gre4 new_len 2048
|
||||
lb vip 80.0.0.0/8 encap gre6 new_len 16
|
||||
lb vip 90.0.0.0/8 encap gre4 new_len 1024
|
||||
|
||||
### Configure the ASs (for each VIP)
|
||||
|
||||
lb as <vip-prefix> [<address> [<address> [...]]] [del]
|
||||
|
||||
You can add (or delete) as many ASs at a time (for a single VIP).
|
||||
Note that the AS address family must correspond to the VIP encap. IP family.
|
||||
|
||||
Examples:
|
||||
|
||||
lb as 2002::/16 2001::2 2001::3 2001::4
|
||||
lb as 2003::/16 10.0.0.1 10.0.0.2
|
||||
lb as 80.0.0.0/8 2001::2
|
||||
lb as 90.0.0.0/8 10.0.0.1
|
||||
|
||||
|
||||
|
||||
## Monitoring
|
||||
|
||||
The plugin provides quite a bunch of counters and information.
|
||||
These are still subject to quite significant changes.
|
||||
|
||||
show lb
|
||||
show lb vip
|
||||
show lb vip verbose
|
||||
|
||||
show node counters
|
||||
|
||||
|
||||
## Design notes
|
||||
|
||||
### Multi-Threading
|
||||
|
||||
MagLev is a distributed system which pseudo-randomly generates a
|
||||
new-connections-table based on AS names such that each server configured with
|
||||
the same set of ASs ends up with the same table. Connection stickyness is then
|
||||
ensured with an established-connections-table. Using ECMP, it is assumed (but
|
||||
not relied on) that servers will mostly receive traffic for different flows.
|
||||
|
||||
This implementation pushes the parallelism a little bit further by using
|
||||
one established-connections table per thread. This is equivalent to assuming
|
||||
that RSS will make a job similar to ECMP, and is pretty useful as threads don't
|
||||
need to get a lock in order to write in the table.
|
||||
|
||||
### Hash Table
|
||||
|
||||
A load balancer requires an efficient read and write hash table. The hash table
|
||||
used by ip6-forward is very read-efficient, but not so much for writing. In
|
||||
addition, it is not a big deal if writing into the hash table fails (again,
|
||||
MagLev uses a flow table but does not heaviliy relies on it).
|
||||
|
||||
The plugin therefore uses a very specific (and stupid) hash table.
|
||||
- Fixed (and power of 2) number of buckets (configured at runtime)
|
||||
- Fixed (and power of 2) elements per buckets (configured at compilation time)
|
||||
|
||||
### Reference counting
|
||||
|
||||
When an AS is removed, there is two possible ways to react.
|
||||
- Keep using the AS for established connections
|
||||
- Change AS for established connections (likely to cause error for TCP)
|
||||
|
||||
In the first case, although an AS is removed from the configuration, its
|
||||
associated state needs to stay around as long as it is used by at least one
|
||||
thread.
|
||||
|
||||
In order to avoid locks, a specific reference counter is used. The design is quite
|
||||
similar to clib counters but:
|
||||
- It is possible to decrease the value
|
||||
- Summing will not zero the per-thread counters
|
||||
- Only the thread can reallocate its own counters vector (to avoid concurrency issues)
|
||||
|
||||
This reference counter is lock free, but reading a count of 0 does not mean
|
||||
the value can be freed unless it is ensured by *other* means that no other thread
|
||||
is concurrently referencing the object. In the case of this plugin, it is assumed
|
||||
that no concurrent event will take place after a few seconds.
|
||||
|
9
plugins/lb-plugin/configure.ac
Normal file
9
plugins/lb-plugin/configure.ac
Normal file
@ -0,0 +1,9 @@
|
||||
AC_INIT(lb_plugin, 1.0)
|
||||
AM_INIT_AUTOMAKE
|
||||
AM_SILENT_RULES([yes])
|
||||
AC_PREFIX_DEFAULT([/usr])
|
||||
|
||||
AC_PROG_LIBTOOL
|
||||
AC_PROG_CC
|
||||
|
||||
AC_OUTPUT([Makefile])
|
212
plugins/lb-plugin/lb/api.c
Normal file
212
plugins/lb-plugin/lb/api.c
Normal file
@ -0,0 +1,212 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Cisco and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <lb/lb.h>
|
||||
|
||||
#include <vppinfra/byte_order.h>
|
||||
#include <vlibapi/api.h>
|
||||
#include <vlibapi/api.h>
|
||||
#include <vlibmemory/api.h>
|
||||
#include <vlibsocket/api.h>
|
||||
|
||||
#define vl_msg_id(n,h) n,
|
||||
typedef enum {
|
||||
#include <lb/lb.api.h>
|
||||
/* We'll want to know how many messages IDs we need... */
|
||||
VL_MSG_FIRST_AVAILABLE,
|
||||
} vl_msg_id_t;
|
||||
#undef vl_msg_id
|
||||
|
||||
|
||||
/* define message structures */
|
||||
#define vl_typedefs
|
||||
#include <lb/lb.api.h>
|
||||
#undef vl_typedefs
|
||||
|
||||
/* define generated endian-swappers */
|
||||
#define vl_endianfun
|
||||
#include <lb/lb.api.h>
|
||||
#undef vl_endianfun
|
||||
|
||||
#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
|
||||
|
||||
/* Get the API version number */
|
||||
#define vl_api_version(n,v) static u32 api_version=(v);
|
||||
#include <lb/lb.api.h>
|
||||
#undef vl_api_version
|
||||
|
||||
/* Macro to finish up custom dump fns */
|
||||
#define FINISH \
|
||||
vec_add1 (s, 0); \
|
||||
vl_print (handle, (char *)s); \
|
||||
vec_free (s); \
|
||||
return handle;
|
||||
|
||||
/*
|
||||
* A handy macro to set up a message reply.
|
||||
* Assumes that the following variables are available:
|
||||
* mp - pointer to request message
|
||||
* rmp - pointer to reply message type
|
||||
* rv - return value
|
||||
*/
|
||||
|
||||
#define REPLY_MACRO(t) \
|
||||
do { \
|
||||
unix_shared_memory_queue_t * q = \
|
||||
vl_api_client_index_to_input_queue (mp->client_index); \
|
||||
if (!q) \
|
||||
return; \
|
||||
\
|
||||
rmp = vl_msg_api_alloc (sizeof (*rmp)); \
|
||||
rmp->_vl_msg_id = ntohs((t)+lbm->msg_id_base); \
|
||||
rmp->context = mp->context; \
|
||||
rmp->retval = ntohl(rv); \
|
||||
\
|
||||
vl_msg_api_send_shmem (q, (u8 *)&rmp); \
|
||||
} while(0);
|
||||
|
||||
static void
|
||||
vl_api_lb_conf_t_handler
|
||||
(vl_api_lb_conf_t * mp)
|
||||
{
|
||||
lb_main_t *lbm = &lb_main;
|
||||
vl_api_lb_conf_reply_t * rmp;
|
||||
int rv = 0;
|
||||
|
||||
rv = lb_conf((ip4_address_t *)&mp->ip4_src_address,
|
||||
(ip6_address_t *)mp->ip6_src_address,
|
||||
mp->sticky_buckets_per_core,
|
||||
mp->flow_timeout);
|
||||
|
||||
REPLY_MACRO (VL_API_LB_CONF_REPLY);
|
||||
}
|
||||
|
||||
static void *vl_api_lb_conf_t_print
|
||||
(vl_api_lb_conf_t *mp, void * handle)
|
||||
{
|
||||
u8 * s;
|
||||
s = format (0, "SCRIPT: lb_conf ");
|
||||
s = format (s, "%U ", format_ip4_address, (ip4_address_t *)&mp->ip4_src_address);
|
||||
s = format (s, "%U ", format_ip6_address, (ip6_address_t *)mp->ip6_src_address);
|
||||
s = format (s, "%u ", mp->sticky_buckets_per_core);
|
||||
s = format (s, "%u ", mp->flow_timeout);
|
||||
FINISH;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
vl_api_lb_add_del_vip_t_handler
|
||||
(vl_api_lb_add_del_vip_t * mp)
|
||||
{
|
||||
lb_main_t *lbm = &lb_main;
|
||||
vl_api_lb_conf_reply_t * rmp;
|
||||
int rv = 0;
|
||||
ip46_address_t prefix;
|
||||
memcpy(&prefix.ip6, mp->ip_prefix, sizeof(prefix.ip6));
|
||||
|
||||
if (mp->is_del) {
|
||||
u32 vip_index;
|
||||
if (!(rv = lb_vip_find_index(&prefix, mp->prefix_length, &vip_index)))
|
||||
rv = lb_vip_del(vip_index);
|
||||
} else {
|
||||
u32 vip_index;
|
||||
lb_vip_type_t type;
|
||||
if (ip46_prefix_is_ip4(&prefix, mp->prefix_length)) {
|
||||
type = mp->is_gre4?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6;
|
||||
} else {
|
||||
type = mp->is_gre4?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6;
|
||||
}
|
||||
|
||||
rv = lb_vip_add(&prefix, mp->prefix_length, type,
|
||||
mp->new_flows_table_length, &vip_index);
|
||||
}
|
||||
REPLY_MACRO (VL_API_LB_CONF_REPLY);
|
||||
}
|
||||
|
||||
static void *vl_api_lb_add_del_vip_t_print
|
||||
(vl_api_lb_add_del_vip_t *mp, void * handle)
|
||||
{
|
||||
u8 * s;
|
||||
s = format (0, "SCRIPT: lb_add_del_vip ");
|
||||
s = format (s, "%U ", format_ip46_prefix,
|
||||
(ip46_address_t *)mp->ip_prefix, mp->prefix_length, IP46_TYPE_ANY);
|
||||
s = format (s, "%s ", mp->is_gre4?"gre4":"gre6");
|
||||
s = format (s, "%u ", mp->new_flows_table_length);
|
||||
s = format (s, "%s ", mp->is_del?"del":"add");
|
||||
FINISH;
|
||||
}
|
||||
|
||||
static void
|
||||
vl_api_lb_add_del_as_t_handler
|
||||
(vl_api_lb_add_del_as_t * mp)
|
||||
{
|
||||
lb_main_t *lbm = &lb_main;
|
||||
vl_api_lb_conf_reply_t * rmp;
|
||||
int rv = 0;
|
||||
u32 vip_index;
|
||||
if ((rv = lb_vip_find_index((ip46_address_t *)mp->vip_ip_prefix,
|
||||
mp->vip_prefix_length, &vip_index)))
|
||||
goto done;
|
||||
|
||||
if (mp->is_del)
|
||||
rv = lb_vip_del_ass(vip_index, (ip46_address_t *)mp->as_address, 1);
|
||||
else
|
||||
rv = lb_vip_add_ass(vip_index, (ip46_address_t *)mp->as_address, 1);
|
||||
|
||||
done:
|
||||
REPLY_MACRO (VL_API_LB_CONF_REPLY);
|
||||
}
|
||||
|
||||
static void *vl_api_lb_add_del_as_t_print
|
||||
(vl_api_lb_add_del_as_t *mp, void * handle)
|
||||
{
|
||||
u8 * s;
|
||||
s = format (0, "SCRIPT: lb_add_del_as ");
|
||||
s = format (s, "%U ", format_ip46_prefix,
|
||||
(ip46_address_t *)mp->vip_ip_prefix, mp->vip_prefix_length, IP46_TYPE_ANY);
|
||||
s = format (s, "%U ", format_ip46_address,
|
||||
(ip46_address_t *)mp->as_address, IP46_TYPE_ANY);
|
||||
s = format (s, "%s ", mp->is_del?"del":"add");
|
||||
FINISH;
|
||||
}
|
||||
|
||||
/* List of message types that this plugin understands */
|
||||
#define foreach_lb_plugin_api_msg \
|
||||
_(LB_CONF, lb_conf) \
|
||||
_(LB_ADD_DEL_VIP, lb_add_del_vip) \
|
||||
_(LB_ADD_DEL_AS, lb_add_del_as)
|
||||
|
||||
static clib_error_t * lb_api_init (vlib_main_t * vm)
|
||||
{
|
||||
lb_main_t *lbm = &lb_main;
|
||||
u8 *name = format (0, "lb_%08x%c", api_version, 0);
|
||||
lbm->msg_id_base = vl_msg_api_get_msg_ids
|
||||
((char *) name, VL_MSG_FIRST_AVAILABLE);
|
||||
|
||||
#define _(N,n) \
|
||||
vl_msg_api_set_handlers((VL_API_##N + lbm->msg_id_base), \
|
||||
#n, \
|
||||
vl_api_##n##_t_handler, \
|
||||
vl_noop_handler, \
|
||||
vl_api_##n##_t_endian, \
|
||||
vl_api_##n##_t_print, \
|
||||
sizeof(vl_api_##n##_t), 1);
|
||||
foreach_lb_plugin_api_msg;
|
||||
#undef _
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
VLIB_INIT_FUNCTION (lb_api_init);
|
250
plugins/lb-plugin/lb/cli.c
Normal file
250
plugins/lb-plugin/lb/cli.c
Normal file
@ -0,0 +1,250 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Cisco and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <lb/lb.h>
|
||||
#include <lb/util.h>
|
||||
|
||||
static clib_error_t *
|
||||
lb_vip_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input, vlib_cli_command_t * cmd)
|
||||
{
|
||||
unformat_input_t _line_input, *line_input = &_line_input;
|
||||
ip46_address_t prefix;
|
||||
u8 plen;
|
||||
u32 new_len = 1024;
|
||||
u8 del = 0;
|
||||
int ret;
|
||||
u32 gre4 = 0;
|
||||
lb_vip_type_t type;
|
||||
|
||||
if (!unformat_user (input, unformat_line_input, line_input))
|
||||
return 0;
|
||||
|
||||
if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY, &plen))
|
||||
return clib_error_return (0, "invalid vip prefix: '%U'",
|
||||
format_unformat_error, line_input);
|
||||
|
||||
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
|
||||
{
|
||||
if (unformat(line_input, "new_len %d", &new_len))
|
||||
;
|
||||
else if (unformat(line_input, "del"))
|
||||
del = 1;
|
||||
else if (unformat(line_input, "encap gre4"))
|
||||
gre4 = 1;
|
||||
else if (unformat(line_input, "encap gre6"))
|
||||
gre4 = 0;
|
||||
else
|
||||
return clib_error_return (0, "parse error: '%U'",
|
||||
format_unformat_error, line_input);
|
||||
}
|
||||
|
||||
unformat_free (line_input);
|
||||
|
||||
|
||||
if (ip46_prefix_is_ip4(&prefix, plen)) {
|
||||
type = (gre4)?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6;
|
||||
} else {
|
||||
type = (gre4)?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6;
|
||||
}
|
||||
|
||||
lb_garbage_collection();
|
||||
|
||||
u32 index;
|
||||
if (!del) {
|
||||
if ((ret = lb_vip_add(&prefix, plen, type, new_len, &index))) {
|
||||
return clib_error_return (0, "lb_vip_add error %d", ret);
|
||||
} else {
|
||||
vlib_cli_output(vm, "lb_vip_add ok %d", index);
|
||||
}
|
||||
} else {
|
||||
if ((ret = lb_vip_find_index(&prefix, plen, &index)))
|
||||
return clib_error_return (0, "lb_vip_find_index error %d", ret);
|
||||
else if ((ret = lb_vip_del(index)))
|
||||
return clib_error_return (0, "lb_vip_del error %d", ret);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
VLIB_CLI_COMMAND (lb_vip_command, static) =
|
||||
{
|
||||
.path = "lb vip",
|
||||
.short_help = "lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del]",
|
||||
.function = lb_vip_command_fn,
|
||||
};
|
||||
|
||||
static clib_error_t *
|
||||
lb_as_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input, vlib_cli_command_t * cmd)
|
||||
{
|
||||
unformat_input_t _line_input, *line_input = &_line_input;
|
||||
ip46_address_t vip_prefix, as_addr;
|
||||
u8 vip_plen;
|
||||
ip46_address_t *as_array = 0;
|
||||
u32 vip_index;
|
||||
u8 del = 0;
|
||||
int ret;
|
||||
|
||||
if (!unformat_user (input, unformat_line_input, line_input))
|
||||
return 0;
|
||||
|
||||
if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY))
|
||||
return clib_error_return (0, "invalid as address: '%U'",
|
||||
format_unformat_error, line_input);
|
||||
|
||||
if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index)))
|
||||
return clib_error_return (0, "lb_vip_find_index error %d", ret);
|
||||
|
||||
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
|
||||
{
|
||||
if (unformat(line_input, "%U", unformat_ip46_address, &as_addr, IP46_TYPE_ANY)) {
|
||||
vec_add1(as_array, as_addr);
|
||||
} else if (unformat(line_input, "del")) {
|
||||
del = 1;
|
||||
} else {
|
||||
vec_free(as_array);
|
||||
return clib_error_return (0, "parse error: '%U'",
|
||||
format_unformat_error, line_input);
|
||||
}
|
||||
}
|
||||
|
||||
if (!vec_len(as_array)) {
|
||||
vec_free(as_array);
|
||||
return clib_error_return (0, "No AS address provided");
|
||||
}
|
||||
|
||||
lb_garbage_collection();
|
||||
clib_warning("vip index is %d", vip_index);
|
||||
|
||||
if (del) {
|
||||
if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array)))) {
|
||||
vec_free(as_array);
|
||||
return clib_error_return (0, "lb_vip_del_ass error %d", ret);
|
||||
}
|
||||
} else {
|
||||
if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array)))) {
|
||||
vec_free(as_array);
|
||||
return clib_error_return (0, "lb_vip_add_ass error %d", ret);
|
||||
}
|
||||
}
|
||||
|
||||
vec_free(as_array);
|
||||
return 0;
|
||||
}
|
||||
|
||||
VLIB_CLI_COMMAND (lb_as_command, static) =
|
||||
{
|
||||
.path = "lb as",
|
||||
.short_help = "lb as <vip-prefix> [<address> [<address> [...]]] [del]",
|
||||
.function = lb_as_command_fn,
|
||||
};
|
||||
|
||||
static clib_error_t *
|
||||
lb_conf_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input, vlib_cli_command_t * cmd)
|
||||
{
|
||||
lb_main_t *lbm = &lb_main;
|
||||
unformat_input_t _line_input, *line_input = &_line_input;
|
||||
ip4_address_t ip4 = lbm->ip4_src_address;
|
||||
ip6_address_t ip6 = lbm->ip6_src_address;
|
||||
u32 per_cpu_sticky_buckets = lbm->per_cpu_sticky_buckets;
|
||||
u32 per_cpu_sticky_buckets_log2 = 0;
|
||||
u32 flow_timeout = lbm->flow_timeout;
|
||||
int ret;
|
||||
|
||||
if (!unformat_user (input, unformat_line_input, line_input))
|
||||
return 0;
|
||||
|
||||
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
|
||||
{
|
||||
if (unformat(line_input, "ip4-src-address %U", unformat_ip4_address, &ip4))
|
||||
;
|
||||
else if (unformat(line_input, "ip6-src-address %U", unformat_ip6_address, &ip6))
|
||||
;
|
||||
else if (unformat(line_input, "buckets %d", &per_cpu_sticky_buckets))
|
||||
;
|
||||
else if (unformat(line_input, "buckets-log2 %d", &per_cpu_sticky_buckets_log2)) {
|
||||
if (per_cpu_sticky_buckets_log2 >= 32)
|
||||
return clib_error_return (0, "buckets-log2 value is too high");
|
||||
per_cpu_sticky_buckets = 1 << per_cpu_sticky_buckets_log2;
|
||||
} else if (unformat(line_input, "timeout %d", &flow_timeout))
|
||||
;
|
||||
else
|
||||
return clib_error_return (0, "parse error: '%U'",
|
||||
format_unformat_error, line_input);
|
||||
}
|
||||
|
||||
unformat_free (line_input);
|
||||
|
||||
lb_garbage_collection();
|
||||
|
||||
if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout)))
|
||||
return clib_error_return (0, "lb_conf error %d", ret);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
VLIB_CLI_COMMAND (lb_conf_command, static) =
|
||||
{
|
||||
.path = "lb conf",
|
||||
.short_help = "lb conf [ip4-src-address <addr>] [ip6-src-address <addr>] [buckets <n>] [timeout <s>]",
|
||||
.function = lb_conf_command_fn,
|
||||
};
|
||||
|
||||
static clib_error_t *
|
||||
lb_show_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input, vlib_cli_command_t * cmd)
|
||||
{
|
||||
vlib_cli_output(vm, "%U", format_lb_main);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
VLIB_CLI_COMMAND (lb_show_command, static) =
|
||||
{
|
||||
.path = "show lb",
|
||||
.short_help = "show lb",
|
||||
.function = lb_show_command_fn,
|
||||
};
|
||||
|
||||
static clib_error_t *
|
||||
lb_show_vips_command_fn (vlib_main_t * vm,
|
||||
unformat_input_t * input, vlib_cli_command_t * cmd)
|
||||
{
|
||||
unformat_input_t line_input;
|
||||
lb_main_t *lbm = &lb_main;
|
||||
lb_vip_t *vip;
|
||||
u8 verbose = 0;
|
||||
|
||||
if (!unformat_user (input, unformat_line_input, &line_input))
|
||||
return 0;
|
||||
|
||||
if (unformat(&line_input, "verbose"))
|
||||
verbose = 1;
|
||||
|
||||
pool_foreach(vip, lbm->vips, {
|
||||
vlib_cli_output(vm, "%U\n", verbose?format_lb_vip_detailed:format_lb_vip, vip);
|
||||
});
|
||||
|
||||
unformat_free (&line_input);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
VLIB_CLI_COMMAND (lb_show_vips_command, static) =
|
||||
{
|
||||
.path = "show lb vips",
|
||||
.short_help = "show lb vips [verbose]",
|
||||
.function = lb_show_vips_command_fn,
|
||||
};
|
71
plugins/lb-plugin/lb/lb.api
Normal file
71
plugins/lb-plugin/lb/lb.api
Normal file
@ -0,0 +1,71 @@
|
||||
/** \brief Configure Load-Balancer global parameters
|
||||
@param client_index - opaque cookie to identify the sender
|
||||
@param context - sender context, to match reply w/ request
|
||||
@param ip4_src_address - IPv4 address to be used as source for IPv4 GRE traffic.
|
||||
@param ip6_src_address - IPv6 address to be used as source for IPv6 GRE traffic.
|
||||
@param n_sticky_buckets - Number of buckets *per worker thread* in the
|
||||
established flow table (must be power of 2).
|
||||
@param flow_timeout - Time in seconds after which, if no packet is received
|
||||
for a given flow, the flow is removed from the established flow table.
|
||||
*/
|
||||
define lb_conf
|
||||
{
|
||||
u32 client_index;
|
||||
u32 context;
|
||||
u32 ip4_src_address;
|
||||
u8 ip6_src_address[16];
|
||||
u32 sticky_buckets_per_core;
|
||||
u32 flow_timeout;
|
||||
};
|
||||
|
||||
define lb_conf_reply {
|
||||
u32 context;
|
||||
i32 retval;
|
||||
};
|
||||
|
||||
/** \brief Add a virtual address (or prefix)
|
||||
@param client_index - opaque cookie to identify the sender
|
||||
@param context - sender context, to match reply w/ request
|
||||
@param ip_prefix - IP address (IPv4 in lower order 32 bits).
|
||||
@param prefix_length - IP prefix length (96 + 'IPv4 prefix length' for IPv4).
|
||||
@param is_gre4 - Encap is ip4 GRE (ip6 GRE otherwise).
|
||||
@param new_flows_table_length - Size of the new connections flow table used
|
||||
for this VIP (must be power of 2).
|
||||
@param is_del - The VIP should be removed.
|
||||
*/
|
||||
define lb_add_del_vip {
|
||||
u32 client_index;
|
||||
u32 context;
|
||||
u8 ip_prefix[16];
|
||||
u8 prefix_length;
|
||||
u8 is_gre4;
|
||||
u32 new_flows_table_length;
|
||||
u8 is_del;
|
||||
};
|
||||
|
||||
define lb_add_del_vip_reply {
|
||||
u32 context;
|
||||
i32 retval;
|
||||
};
|
||||
|
||||
/** \brief Add an application server for a given VIP
|
||||
@param client_index - opaque cookie to identify the sender
|
||||
@param context - sender context, to match reply w/ request
|
||||
@param vip_ip_prefix - VIP IP address (IPv4 in lower order 32 bits).
|
||||
@param vip_ip_prefix - VIP IP prefix length (96 + 'IPv4 prefix length' for IPv4).
|
||||
@param as_address - The application server address (IPv4 in lower order 32 bits).
|
||||
@param is_del - The AS should be removed.
|
||||
*/
|
||||
define lb_add_del_as {
|
||||
u32 client_index;
|
||||
u32 context;
|
||||
u8 vip_ip_prefix[16];
|
||||
u8 vip_prefix_length;
|
||||
u8 as_address[16];
|
||||
u8 is_del;
|
||||
};
|
||||
|
||||
define lb_add_del_as_reply {
|
||||
u32 context;
|
||||
i32 retval;
|
||||
};
|
746
plugins/lb-plugin/lb/lb.c
Normal file
746
plugins/lb-plugin/lb/lb.c
Normal file
File diff suppressed because it is too large
Load Diff
304
plugins/lb-plugin/lb/lb.h
Normal file
304
plugins/lb-plugin/lb/lb.h
Normal file
File diff suppressed because it is too large
Load Diff
293
plugins/lb-plugin/lb/lb_test.c
Normal file
293
plugins/lb-plugin/lb/lb_test.c
Normal file
@ -0,0 +1,293 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Cisco and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vat/vat.h>
|
||||
#include <vlibapi/api.h>
|
||||
#include <vlibmemory/api.h>
|
||||
#include <vlibsocket/api.h>
|
||||
#include <vppinfra/error.h>
|
||||
#include <lb/lb.h>
|
||||
|
||||
//TODO: Move that to vat/plugin_api.c
|
||||
//////////////////////////
|
||||
uword unformat_ip46_address (unformat_input_t * input, va_list * args)
|
||||
{
|
||||
ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
|
||||
ip46_type_t type = va_arg (*args, ip46_type_t);
|
||||
if ((type != IP46_TYPE_IP6) &&
|
||||
unformat(input, "%U", unformat_ip4_address, &ip46->ip4)) {
|
||||
ip46_address_mask_ip4(ip46);
|
||||
return 1;
|
||||
} else if ((type != IP46_TYPE_IP4) &&
|
||||
unformat(input, "%U", unformat_ip6_address, &ip46->ip6)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
|
||||
{
|
||||
ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
|
||||
u8 *len = va_arg (*args, u8 *);
|
||||
ip46_type_t type = va_arg (*args, ip46_type_t);
|
||||
|
||||
u32 l;
|
||||
if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
|
||||
if (l > 32)
|
||||
return 0;
|
||||
*len = l + 96;
|
||||
ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
|
||||
} else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
|
||||
if (l > 128)
|
||||
return 0;
|
||||
*len = l;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
/////////////////////////
|
||||
|
||||
#define vl_msg_id(n,h) n,
|
||||
typedef enum {
|
||||
#include <lb/lb.api.h>
|
||||
/* We'll want to know how many messages IDs we need... */
|
||||
VL_MSG_FIRST_AVAILABLE,
|
||||
} vl_msg_id_t;
|
||||
#undef vl_msg_id
|
||||
|
||||
/* define message structures */
|
||||
#define vl_typedefs
|
||||
#include <lb/lb.api.h>
|
||||
#undef vl_typedefs
|
||||
|
||||
/* declare message handlers for each api */
|
||||
|
||||
#define vl_endianfun /* define message structures */
|
||||
#include <lb/lb.api.h>
|
||||
#undef vl_endianfun
|
||||
|
||||
/* instantiate all the print functions we know about */
|
||||
#define vl_print(handle, ...)
|
||||
#define vl_printfun
|
||||
#include <lb/lb.api.h>
|
||||
#undef vl_printfun
|
||||
|
||||
/* Get the API version number. */
|
||||
#define vl_api_version(n,v) static u32 api_version=(v);
|
||||
#include <lb/lb.api.h>
|
||||
#undef vl_api_version
|
||||
|
||||
typedef struct {
|
||||
/* API message ID base */
|
||||
u16 msg_id_base;
|
||||
vat_main_t *vat_main;
|
||||
} lb_test_main_t;
|
||||
|
||||
lb_test_main_t lb_test_main;
|
||||
|
||||
#define foreach_standard_reply_retval_handler \
|
||||
_(lb_conf_reply) \
|
||||
_(lb_add_del_vip_reply) \
|
||||
_(lb_add_del_as_reply)
|
||||
|
||||
#define _(n) \
|
||||
static void vl_api_##n##_t_handler \
|
||||
(vl_api_##n##_t * mp) \
|
||||
{ \
|
||||
vat_main_t * vam = lb_test_main.vat_main; \
|
||||
i32 retval = ntohl(mp->retval); \
|
||||
if (vam->async_mode) { \
|
||||
vam->async_errors += (retval < 0); \
|
||||
} else { \
|
||||
vam->retval = retval; \
|
||||
vam->result_ready = 1; \
|
||||
} \
|
||||
}
|
||||
foreach_standard_reply_retval_handler;
|
||||
#undef _
|
||||
|
||||
/*
|
||||
* Table of message reply handlers, must include boilerplate handlers
|
||||
* we just generated
|
||||
*/
|
||||
#define foreach_vpe_api_reply_msg \
|
||||
_(LB_CONF_REPLY, lb_conf_reply) \
|
||||
_(LB_ADD_DEL_VIP_REPLY, lb_add_del_vip_reply) \
|
||||
_(LB_ADD_DEL_AS_REPLY, lb_add_del_as_reply)
|
||||
|
||||
/* M: construct, but don't yet send a message */
|
||||
#define M(T,t) \
|
||||
do { \
|
||||
vam->result_ready = 0; \
|
||||
mp = vl_msg_api_alloc(sizeof(*mp)); \
|
||||
memcpy (mp, &mps, sizeof (*mp)); \
|
||||
mp->_vl_msg_id = ntohs (VL_API_##T + lbtm->msg_id_base); \
|
||||
mp->client_index = vam->my_client_index; \
|
||||
} while(0);
|
||||
|
||||
/* S: send a message */
|
||||
#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
|
||||
|
||||
/* W: wait for results, with timeout */
|
||||
#define W \
|
||||
do { \
|
||||
timeout = vat_time_now (vam) + 1.0; \
|
||||
\
|
||||
while (vat_time_now (vam) < timeout) { \
|
||||
if (vam->result_ready == 1) { \
|
||||
return (vam->retval); \
|
||||
} \
|
||||
} \
|
||||
return -99; \
|
||||
} while(0);
|
||||
|
||||
static int api_lb_conf (vat_main_t * vam)
|
||||
{
|
||||
lb_test_main_t *lbtm = &lb_test_main;
|
||||
unformat_input_t *i = vam->input;
|
||||
f64 timeout;
|
||||
vl_api_lb_conf_t mps, *mp;
|
||||
|
||||
if (!unformat(i, "%U %U %u %u",
|
||||
unformat_ip4_address, &mps.ip4_src_address,
|
||||
unformat_ip6_address, mps.ip6_src_address,
|
||||
&mps.sticky_buckets_per_core,
|
||||
&mps.flow_timeout)) {
|
||||
errmsg ("invalid arguments\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
M(LB_CONF, lb_conf); S; W;
|
||||
|
||||
/* NOTREACHED */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int api_lb_add_del_vip (vat_main_t * vam)
|
||||
{
|
||||
lb_test_main_t *lbtm = &lb_test_main;
|
||||
unformat_input_t * i = vam->input;
|
||||
f64 timeout;
|
||||
vl_api_lb_add_del_vip_t mps, *mp;
|
||||
mps.is_del = 0;
|
||||
mps.is_gre4 = 0;
|
||||
|
||||
if (!unformat(i, "%U",
|
||||
unformat_ip46_prefix, mps.ip_prefix, &mps.prefix_length, IP46_TYPE_ANY)) {
|
||||
errmsg ("invalid prefix\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
if (unformat(i, "gre4")) {
|
||||
mps.is_gre4 = 1;
|
||||
} else if (unformat(i, "gre6")) {
|
||||
mps.is_gre4 = 0;
|
||||
} else {
|
||||
errmsg ("no encap\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
if (!unformat(i, "%d", &mps.new_flows_table_length)) {
|
||||
errmsg ("no table lentgh\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
if (unformat(i, "del")) {
|
||||
mps.is_del = 1;
|
||||
}
|
||||
|
||||
M(LB_ADD_DEL_VIP, lb_add_del_vip); S; W;
|
||||
/* NOTREACHED */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int api_lb_add_del_as (vat_main_t * vam)
|
||||
{
|
||||
lb_test_main_t *lbtm = &lb_test_main;
|
||||
unformat_input_t * i = vam->input;
|
||||
f64 timeout;
|
||||
vl_api_lb_add_del_as_t mps, *mp;
|
||||
mps.is_del = 0;
|
||||
|
||||
if (!unformat(i, "%U %U",
|
||||
unformat_ip46_prefix, mps.vip_ip_prefix, &mps.vip_prefix_length, IP46_TYPE_ANY,
|
||||
unformat_ip46_address, mps.as_address)) {
|
||||
errmsg ("invalid prefix or address\n");
|
||||
return -99;
|
||||
}
|
||||
|
||||
if (unformat(i, "del")) {
|
||||
mps.is_del = 1;
|
||||
}
|
||||
|
||||
M(LB_ADD_DEL_AS, lb_add_del_as); S; W;
|
||||
/* NOTREACHED */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* List of messages that the api test plugin sends,
|
||||
* and that the data plane plugin processes
|
||||
*/
|
||||
#define foreach_vpe_api_msg \
|
||||
_(lb_conf, "<ip4-src-addr> <ip6-src-address> <sticky_buckets_per_core> <flow_timeout>") \
|
||||
_(lb_add_del_vip, "<ip-prefix> [gre4|gre6] <new_table_len> [del]") \
|
||||
_(lb_add_del_as, "<vip-ip-prefix> <address> [del]")
|
||||
|
||||
void vat_api_hookup (vat_main_t *vam)
|
||||
{
|
||||
lb_test_main_t * lbtm = &lb_test_main;
|
||||
/* Hook up handlers for replies from the data plane plug-in */
|
||||
#define _(N,n) \
|
||||
vl_msg_api_set_handlers((VL_API_##N + lbtm->msg_id_base), \
|
||||
#n, \
|
||||
vl_api_##n##_t_handler, \
|
||||
vl_noop_handler, \
|
||||
vl_api_##n##_t_endian, \
|
||||
vl_api_##n##_t_print, \
|
||||
sizeof(vl_api_##n##_t), 1);
|
||||
foreach_vpe_api_reply_msg;
|
||||
#undef _
|
||||
|
||||
/* API messages we can send */
|
||||
#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
|
||||
foreach_vpe_api_msg;
|
||||
#undef _
|
||||
|
||||
/* Help strings */
|
||||
#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
|
||||
foreach_vpe_api_msg;
|
||||
#undef _
|
||||
}
|
||||
|
||||
clib_error_t * vat_plugin_register (vat_main_t *vam)
|
||||
{
|
||||
lb_test_main_t * lbtm = &lb_test_main;
|
||||
|
||||
u8 * name;
|
||||
|
||||
lbtm->vat_main = vam;
|
||||
|
||||
/* Ask the vpp engine for the first assigned message-id */
|
||||
name = format (0, "lb_%08x%c", api_version, 0);
|
||||
lbtm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
|
||||
|
||||
if (lbtm->msg_id_base != (u16) ~0)
|
||||
vat_api_hookup (vam);
|
||||
|
||||
vec_free(name);
|
||||
|
||||
return 0;
|
||||
}
|
182
plugins/lb-plugin/lb/lbhash.h
Normal file
182
plugins/lb-plugin/lb/lbhash.h
Normal file
@ -0,0 +1,182 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Cisco and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* vppinfra already includes tons of different hash tables.
|
||||
* MagLev flow table is a bit different. It has to be very efficient
|
||||
* for both writing and reading operations. But it does not need to
|
||||
* be 100% reliable (write can fail). It also needs to recycle
|
||||
* old entries in a lazy way.
|
||||
*
|
||||
* This hash table is the most dummy hash table you can do.
|
||||
* Fixed total size, fixed bucket size.
|
||||
* Advantage is that it could be very efficient (maybe).
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef LB_PLUGIN_LB_LBHASH_H_
|
||||
#define LB_PLUGIN_LB_LBHASH_H_
|
||||
|
||||
#include <vnet/vnet.h>
|
||||
|
||||
#define LBHASH_ENTRY_PER_BUCKET_LOG2 2
|
||||
#define LBHASH_ENTRY_PER_BUCKET (1 << LBHASH_ENTRY_PER_BUCKET_LOG2)
|
||||
#define LBHASH_ENTRY_PER_BUCKET_MASK (LBHASH_ENTRY_PER_BUCKET - 1)
|
||||
|
||||
typedef struct {
|
||||
u64 key[5];
|
||||
u32 value;
|
||||
u32 last_seen;
|
||||
} lb_hash_entry_t;
|
||||
|
||||
typedef struct {
|
||||
u32 buckets_mask;
|
||||
u32 timeout;
|
||||
lb_hash_entry_t entries[];
|
||||
} lb_hash_t;
|
||||
|
||||
#define lb_hash_nbuckets(h) (((h)->buckets_mask >> LBHASH_ENTRY_PER_BUCKET_LOG2) + 1)
|
||||
#define lb_hash_size(h) ((h)->buckets_mask + LBHASH_ENTRY_PER_BUCKET)
|
||||
|
||||
#define lb_hash_foreach_entry(h, e) \
|
||||
for (e = (h)->entries; e < h->entries + lb_hash_size(h); e++)
|
||||
|
||||
#define lb_hash_foreach_valid_entry(h, e, now) \
|
||||
lb_hash_foreach_entry(h, e) \
|
||||
if (!clib_u32_loop_gt((now), (e)->last_seen + (h)->timeout))
|
||||
|
||||
static_always_inline
|
||||
lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout)
|
||||
{
|
||||
if ((!is_pow2(buckets)) ||
|
||||
((buckets << LBHASH_ENTRY_PER_BUCKET_LOG2) == 0))
|
||||
return NULL;
|
||||
|
||||
// Allocate 1 more bucket for prefetch
|
||||
u32 size = sizeof(lb_hash_t) + ((buckets << LBHASH_ENTRY_PER_BUCKET_LOG2) + 1)* sizeof(lb_hash_entry_t);
|
||||
u8 *mem = 0;
|
||||
lb_hash_t *h;
|
||||
vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES);
|
||||
h = (lb_hash_t *)mem;
|
||||
h->buckets_mask = (buckets - 1) << LBHASH_ENTRY_PER_BUCKET_LOG2;
|
||||
h->timeout = timeout;
|
||||
return h;
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
void lb_hash_free(lb_hash_t *h)
|
||||
{
|
||||
vec_free(h);
|
||||
}
|
||||
|
||||
#if __SSE4_2__
|
||||
static_always_inline
|
||||
u32 lb_hash_crc_u32(u32 data, u32 value)
|
||||
{
|
||||
__asm__ volatile( "crc32l %[data], %[value];"
|
||||
: [value] "+r" (value)
|
||||
: [data] "rm" (data));
|
||||
return value;
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
u32 lb_hash_hash(u64 k[5])
|
||||
{
|
||||
u32 * dp = (u32 *) k;
|
||||
u32 value = 0;
|
||||
|
||||
value = lb_hash_crc_u32 (dp[0], value);
|
||||
value = lb_hash_crc_u32 (dp[1], value);
|
||||
value = lb_hash_crc_u32 (dp[2], value);
|
||||
value = lb_hash_crc_u32 (dp[3], value);
|
||||
value = lb_hash_crc_u32 (dp[4], value);
|
||||
value = lb_hash_crc_u32 (dp[5], value);
|
||||
value = lb_hash_crc_u32 (dp[6], value);
|
||||
value = lb_hash_crc_u32 (dp[7], value);
|
||||
value = lb_hash_crc_u32 (dp[8], value);
|
||||
value = lb_hash_crc_u32 (dp[9], value);
|
||||
return value;
|
||||
}
|
||||
#else
|
||||
static_always_inline
|
||||
u32 lb_hash_hash(u64 k[5])
|
||||
{
|
||||
u64 tmp = k[0] ^ k[1] ^ k[2] ^ k[3] ^ k[4];
|
||||
return (u32)clib_xxhash (tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static_always_inline
|
||||
void lb_hash_get(lb_hash_t *h, u64 k[5], u32 hash, u32 time_now, u32 *available_index, u32 *value)
|
||||
{
|
||||
lb_hash_entry_t *e = &h->entries[hash & h->buckets_mask];
|
||||
u32 i;
|
||||
*value = ~0;
|
||||
*available_index = ~0;
|
||||
CLIB_PREFETCH (&(e[1]), sizeof(lb_hash_entry_t), STORE);
|
||||
for (i=0; i<LBHASH_ENTRY_PER_BUCKET; i++) {
|
||||
CLIB_PREFETCH (&(e[i+2]), sizeof(lb_hash_entry_t), STORE); //+2 somehow performs best
|
||||
u64 cmp =
|
||||
(e[i].key[0] ^ k[0]) |
|
||||
(e[i].key[1] ^ k[1]) |
|
||||
(e[i].key[2] ^ k[2]) |
|
||||
(e[i].key[3] ^ k[3]) |
|
||||
(e[i].key[4] ^ k[4]);
|
||||
|
||||
u8 timeouted = clib_u32_loop_gt(time_now, e[i].last_seen + h->timeout);
|
||||
|
||||
*value = (cmp || timeouted)?*value:e[i].value;
|
||||
e[i].last_seen = (cmp || timeouted)?e[i].last_seen:time_now;
|
||||
*available_index = (timeouted && (*available_index == ~0))?(&e[i] - h->entries):*available_index;
|
||||
|
||||
if (!cmp)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
u32 lb_hash_available_value(lb_hash_t *h, u32 available_index)
|
||||
{
|
||||
return h->entries[available_index].value;
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
u32 lb_hash_put(lb_hash_t *h, u64 k[5], u32 value, u32 available_index, u32 time_now)
|
||||
{
|
||||
lb_hash_entry_t *e = &h->entries[available_index];
|
||||
e->key[0] = k[0];
|
||||
e->key[1] = k[1];
|
||||
e->key[2] = k[2];
|
||||
e->key[3] = k[3];
|
||||
e->key[4] = k[4];
|
||||
e->value = value;
|
||||
e->last_seen = time_now;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static_always_inline
|
||||
u32 lb_hash_elts(lb_hash_t *h, u32 time_now)
|
||||
{
|
||||
u32 tot = 0;
|
||||
lb_hash_entry_t *e;
|
||||
lb_hash_foreach_valid_entry(h, e, time_now) {
|
||||
tot++;
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
#endif /* LB_PLUGIN_LB_LBHASH_H_ */
|
393
plugins/lb-plugin/lb/node.c
Normal file
393
plugins/lb-plugin/lb/node.c
Normal file
File diff suppressed because it is too large
Load Diff
41
plugins/lb-plugin/lb/refcount.c
Normal file
41
plugins/lb-plugin/lb/refcount.c
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Cisco and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <lb/refcount.h>
|
||||
|
||||
void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size)
|
||||
{
|
||||
u32 *new_counter = 0, *old_counter;
|
||||
vec_validate(new_counter, size);
|
||||
memcpy(new_counter, per_cpu->counters, per_cpu->length);
|
||||
old_counter = per_cpu->counters;
|
||||
per_cpu->counters = new_counter;
|
||||
CLIB_MEMORY_BARRIER();
|
||||
per_cpu->length = vec_len(new_counter);
|
||||
vec_free(old_counter);
|
||||
}
|
||||
|
||||
u64 vlib_refcount_get(vlib_refcount_t *r, u32 index)
|
||||
{
|
||||
u64 count = 0;
|
||||
vlib_thread_main_t *tm = vlib_get_thread_main ();
|
||||
u32 cpu_index;
|
||||
for (cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++) {
|
||||
if (r->per_cpu[cpu_index].length > index)
|
||||
count += r->per_cpu[cpu_index].counters[index];
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
67
plugins/lb-plugin/lb/refcount.h
Normal file
67
plugins/lb-plugin/lb/refcount.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Cisco and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* vlib provides lock-free counters but those
|
||||
* - Have 16bits per-CPU counter, which may overflow.
|
||||
* - Would only increment.
|
||||
*
|
||||
* This is very similar to vlib counters, but may be used to count reference.
|
||||
* Such a counter includes an arbitrary number of counters. Each counter
|
||||
* is identified by its index. This is used to aggregate per-cpu memory.
|
||||
*
|
||||
* Warning:
|
||||
* This reference counter is lock-free but is not race-condition free.
|
||||
* The counting result is approximate and another mechanism needs to be used
|
||||
* in order to ensure that an object may be freed.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <vnet/vnet.h>
|
||||
|
||||
typedef struct {
|
||||
u32 *counters;
|
||||
u32 length;
|
||||
u32 *reader_lengths;
|
||||
CLIB_CACHE_LINE_ALIGN_MARK(o);
|
||||
} vlib_refcount_per_cpu_t;
|
||||
|
||||
typedef struct {
|
||||
vlib_refcount_per_cpu_t *per_cpu;
|
||||
} vlib_refcount_t;
|
||||
|
||||
void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size);
|
||||
|
||||
static_always_inline
|
||||
void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v)
|
||||
{
|
||||
vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[cpu_index];
|
||||
if (PREDICT_FALSE(counter_index >= per_cpu->length))
|
||||
__vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2));
|
||||
|
||||
per_cpu->counters[counter_index] += v;
|
||||
}
|
||||
|
||||
u64 vlib_refcount_get(vlib_refcount_t *r, u32 index);
|
||||
|
||||
static_always_inline
|
||||
void vlib_refcount_init(vlib_refcount_t *r)
|
||||
{
|
||||
vlib_thread_main_t *tm = vlib_get_thread_main ();
|
||||
r->per_cpu = 0;
|
||||
vec_validate (r->per_cpu, tm->n_vlib_mains - 1);
|
||||
}
|
||||
|
||||
|
72
plugins/lb-plugin/lb/util.c
Normal file
72
plugins/lb-plugin/lb/util.c
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Cisco and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <lb/util.h>
|
||||
|
||||
void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen)
|
||||
{
|
||||
if (plen == 0) {
|
||||
prefix->as_u64[0] = 0;
|
||||
prefix->as_u64[1] = 0;
|
||||
} else if (plen <= 64) {
|
||||
prefix->as_u64[0] &= clib_host_to_net_u64(0xffffffffffffffffL << (64 - plen));
|
||||
prefix->as_u64[1] = 0;
|
||||
} else {
|
||||
prefix->as_u64[1] &= clib_host_to_net_u64(0xffffffffffffffffL << (128 - plen));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
|
||||
{
|
||||
ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
|
||||
u8 *len = va_arg (*args, u8 *);
|
||||
ip46_type_t type = va_arg (*args, ip46_type_t);
|
||||
|
||||
u32 l;
|
||||
if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
|
||||
if (l > 32)
|
||||
return 0;
|
||||
*len = l + 96;
|
||||
ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
|
||||
} else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
|
||||
if (l > 128)
|
||||
return 0;
|
||||
*len = l;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
u8 *format_ip46_prefix (u8 * s, va_list * args)
|
||||
{
|
||||
ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
|
||||
u32 len = va_arg (*args, u32); //va_arg cannot use u8 or u16
|
||||
ip46_type_t type = va_arg (*args, ip46_type_t);
|
||||
|
||||
int is_ip4 = 0;
|
||||
if (type == IP46_TYPE_IP4)
|
||||
is_ip4 = 1;
|
||||
else if (type == IP46_TYPE_IP6)
|
||||
is_ip4 = 0;
|
||||
else
|
||||
is_ip4 = (len >= 96) && ip46_address_is_ip4(ip46);
|
||||
|
||||
return is_ip4 ?
|
||||
format(s, "%U/%d", format_ip4_address, &ip46->ip4, len - 96):
|
||||
format(s, "%U/%d", format_ip6_address, &ip46->ip6, len);
|
||||
}
|
||||
|
40
plugins/lb-plugin/lb/util.h
Normal file
40
plugins/lb-plugin/lb/util.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Cisco and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Non-LB specific stuff comes here
|
||||
*/
|
||||
|
||||
#ifndef LB_PLUGIN_LB_UTIL_H_
|
||||
#define LB_PLUGIN_LB_UTIL_H_
|
||||
|
||||
#include <vnet/vnet.h>
|
||||
#include <vnet/ip/ip.h>
|
||||
|
||||
#define ip46_address_type(ip46) (ip46_address_is_ip4(ip46)?IP46_TYPE_IP4:IP46_TYPE_IP6)
|
||||
#define ip46_prefix_is_ip4(ip46, len) ((len) >= 96 && ip46_address_is_ip4(ip46))
|
||||
#define ip46_prefix_type(ip46, len) (ip46_prefix_is_ip4(ip46, len)?IP46_TYPE_IP4:IP46_TYPE_IP6)
|
||||
|
||||
void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen);
|
||||
uword unformat_ip46_prefix (unformat_input_t * input, va_list * args);
|
||||
u8 *format_ip46_prefix (u8 * s, va_list * args);
|
||||
|
||||
/**
|
||||
* 32 bits integer comparison for running values.
|
||||
* 1 > 0 is true. But 1 > 0xffffffff also is.
|
||||
*/
|
||||
#define clib_u32_loop_gt(a, b) (((u32)(a)) - ((u32)(b)) < 0x7fffffff)
|
||||
|
||||
#endif /* LB_PLUGIN_LB_UTIL_H_ */
|
@ -86,7 +86,8 @@ _(INVALID_EID_TYPE, -92, "Unsupported LSIP EID type") \
|
||||
_(CANNOT_CREATE_PCAP_FILE, -93, "Cannot create pcap file") \
|
||||
_(INCORRECT_ADJACENCY_TYPE, -94, "Invalid adjacency type for this operation") \
|
||||
_(EXCEEDED_NUMBER_OF_RANGES_CAPACITY, -95, "Operation would exceed configured capacity of ranges") \
|
||||
_(EXCEEDED_NUMBER_OF_PORTS_CAPACITY, -96, "Operation would exceed capacity of number of ports")
|
||||
_(EXCEEDED_NUMBER_OF_PORTS_CAPACITY, -96, "Operation would exceed capacity of number of ports") \
|
||||
_(INVALID_ADDRESS_FAMILY, -97, "Invalid address family")
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
Reference in New Issue
Block a user