f46c5525d2
fix use of vip after it was deleted
Type: fix
Fixes: 041eacc816
Change-Id: I5723485c5da7507fbc6c86ff6eb9f77127439f67
Signed-off-by: Georgy Borodin <bor1-go@yandex-team.ru>
1467 lines
42 KiB
C
1467 lines
42 KiB
C
/*
|
|
* Copyright (c) 2016 Cisco and/or its affiliates.
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <lb/lb.h>
|
|
#include <vnet/plugin/plugin.h>
|
|
#include <vpp/app/version.h>
|
|
#include <vnet/api_errno.h>
|
|
#include <vnet/udp/udp_local.h>
|
|
#include <vppinfra/lock.h>
|
|
|
|
//GC runs at most once every so many seconds
|
|
#define LB_GARBAGE_RUN 60
|
|
|
|
//After so many seconds. It is assumed that inter-core race condition will not occur.
|
|
#define LB_CONCURRENCY_TIMEOUT 10
|
|
|
|
// FIB source for adding routes
|
|
static fib_source_t lb_fib_src;
|
|
|
|
lb_main_t lb_main;
|
|
|
|
#define lb_get_writer_lock() clib_spinlock_lock (&lb_main.writer_lock)
|
|
#define lb_put_writer_lock() clib_spinlock_unlock (&lb_main.writer_lock)
|
|
|
|
static void lb_as_stack (lb_as_t *as);
|
|
|
|
|
|
const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
|
|
const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
|
|
const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
|
|
{
|
|
[DPO_PROTO_IP4] = lb_dpo_gre4_ip4,
|
|
[DPO_PROTO_IP6] = lb_dpo_gre4_ip6,
|
|
};
|
|
|
|
const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
|
|
const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
|
|
const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
|
|
{
|
|
[DPO_PROTO_IP4] = lb_dpo_gre6_ip4,
|
|
[DPO_PROTO_IP6] = lb_dpo_gre6_ip6,
|
|
};
|
|
|
|
const static char * const lb_dpo_gre4_ip4_port[] = { "lb4-gre4-port" , NULL };
|
|
const static char * const lb_dpo_gre4_ip6_port[] = { "lb6-gre4-port" , NULL };
|
|
const static char* const * const lb_dpo_gre4_port_nodes[DPO_PROTO_NUM] =
|
|
{
|
|
[DPO_PROTO_IP4] = lb_dpo_gre4_ip4_port,
|
|
[DPO_PROTO_IP6] = lb_dpo_gre4_ip6_port,
|
|
};
|
|
|
|
const static char * const lb_dpo_gre6_ip4_port[] = { "lb4-gre6-port" , NULL };
|
|
const static char * const lb_dpo_gre6_ip6_port[] = { "lb6-gre6-port" , NULL };
|
|
const static char* const * const lb_dpo_gre6_port_nodes[DPO_PROTO_NUM] =
|
|
{
|
|
[DPO_PROTO_IP4] = lb_dpo_gre6_ip4_port,
|
|
[DPO_PROTO_IP6] = lb_dpo_gre6_ip6_port,
|
|
};
|
|
|
|
const static char * const lb_dpo_l3dsr_ip4[] = {"lb4-l3dsr" , NULL};
|
|
const static char* const * const lb_dpo_l3dsr_nodes[DPO_PROTO_NUM] =
|
|
{
|
|
[DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4,
|
|
};
|
|
|
|
const static char * const lb_dpo_l3dsr_ip4_port[] = {"lb4-l3dsr-port" , NULL};
|
|
const static char* const * const lb_dpo_l3dsr_port_nodes[DPO_PROTO_NUM] =
|
|
{
|
|
[DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port,
|
|
};
|
|
|
|
const static char * const lb_dpo_nat4_ip4_port[] = { "lb4-nat4-port" , NULL };
|
|
const static char* const * const lb_dpo_nat4_port_nodes[DPO_PROTO_NUM] =
|
|
{
|
|
[DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port,
|
|
};
|
|
|
|
const static char * const lb_dpo_nat6_ip6_port[] = { "lb6-nat6-port" , NULL };
|
|
const static char* const * const lb_dpo_nat6_port_nodes[DPO_PROTO_NUM] =
|
|
{
|
|
[DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port,
|
|
};
|
|
|
|
u32 lb_hash_time_now(vlib_main_t * vm)
|
|
{
|
|
return (u32) (vlib_time_now(vm) + 10000);
|
|
}
|
|
|
|
u8 *format_lb_main (u8 * s, va_list * args)
|
|
{
|
|
vlib_thread_main_t *tm = vlib_get_thread_main();
|
|
lb_main_t *lbm = &lb_main;
|
|
s = format(s, "lb_main");
|
|
s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address);
|
|
s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address);
|
|
s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
|
|
s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
|
|
|
|
u32 thread_index;
|
|
for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
|
|
lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
|
|
if (h) {
|
|
s = format(s, "core %d\n", thread_index);
|
|
s = format(s, " timeout: %ds\n", h->timeout);
|
|
s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h));
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
static char *lb_vip_type_strings[] = {
|
|
[LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6",
|
|
[LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4",
|
|
[LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6",
|
|
[LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4",
|
|
[LB_VIP_TYPE_IP4_L3DSR] = "ip4-l3dsr",
|
|
[LB_VIP_TYPE_IP4_NAT4] = "ip4-nat4",
|
|
[LB_VIP_TYPE_IP6_NAT6] = "ip6-nat6",
|
|
};
|
|
|
|
u8 *format_lb_vip_type (u8 * s, va_list * args)
|
|
{
|
|
lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t);
|
|
u32 i;
|
|
for (i=0; i<LB_VIP_N_TYPES; i++)
|
|
if (vipt == i)
|
|
return format(s, lb_vip_type_strings[i]);
|
|
return format(s, "_WRONG_TYPE_");
|
|
}
|
|
|
|
uword unformat_lb_vip_type (unformat_input_t * input, va_list * args)
|
|
{
|
|
lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *);
|
|
u32 i;
|
|
for (i=0; i<LB_VIP_N_TYPES; i++)
|
|
if (unformat(input, lb_vip_type_strings[i])) {
|
|
*vipt = i;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
u8 *format_lb_vip (u8 * s, va_list * args)
|
|
{
|
|
lb_vip_t *vip = va_arg (*args, lb_vip_t *);
|
|
s = format(s, "%U %U new_size:%u #as:%u%s",
|
|
format_lb_vip_type, vip->type,
|
|
format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
|
|
vip->new_flow_table_mask + 1,
|
|
pool_elts(vip->as_indexes),
|
|
(vip->flags & LB_VIP_FLAGS_USED)?"":" removed");
|
|
|
|
if (vip->port != 0)
|
|
{
|
|
s = format(s, " protocol:%u port:%u ", vip->protocol, vip->port);
|
|
}
|
|
|
|
if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
|
|
{
|
|
s = format(s, " dscp:%u", vip->encap_args.dscp);
|
|
}
|
|
else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
|
|
|| (vip->type == LB_VIP_TYPE_IP6_NAT6))
|
|
{
|
|
s = format (s, " type:%s port:%u target_port:%u",
|
|
(vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
|
|
"nodeport",
|
|
ntohs(vip->port), ntohs(vip->encap_args.target_port));
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
u8 *format_lb_as (u8 * s, va_list * args)
|
|
{
|
|
lb_as_t *as = va_arg (*args, lb_as_t *);
|
|
return format(s, "%U %s", format_ip46_address,
|
|
&as->address, IP46_TYPE_ANY,
|
|
(as->flags & LB_AS_FLAGS_USED)?"used":"removed");
|
|
}
|
|
|
|
u8 *format_lb_vip_detailed (u8 * s, va_list * args)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_vip_t *vip = va_arg (*args, lb_vip_t *);
|
|
u32 indent = format_get_indent (s);
|
|
|
|
/* clang-format off */
|
|
s = format(s, "%U %U [%lu] %U%s%s\n"
|
|
"%U new_size:%u\n",
|
|
format_white_space, indent,
|
|
format_lb_vip_type, vip->type,
|
|
vip - lbm->vips,
|
|
format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
|
|
lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "",
|
|
(vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
|
|
format_white_space, indent,
|
|
vip->new_flow_table_mask + 1);
|
|
/* clang-format on */
|
|
|
|
if (vip->port != 0)
|
|
{
|
|
s = format(s, "%U protocol:%u port:%u\n",
|
|
format_white_space, indent,
|
|
vip->protocol, vip->port);
|
|
}
|
|
|
|
if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
|
|
{
|
|
s = format(s, "%U dscp:%u\n",
|
|
format_white_space, indent,
|
|
vip->encap_args.dscp);
|
|
}
|
|
else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
|
|
|| (vip->type == LB_VIP_TYPE_IP6_NAT6))
|
|
{
|
|
s = format (s, "%U type:%s port:%u target_port:%u",
|
|
format_white_space, indent,
|
|
(vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
|
|
"nodeport",
|
|
ntohs(vip->port), ntohs(vip->encap_args.target_port));
|
|
}
|
|
|
|
//Print counters
|
|
s = format(s, "%U counters:\n",
|
|
format_white_space, indent);
|
|
u32 i;
|
|
for (i=0; i<LB_N_VIP_COUNTERS; i++)
|
|
s = format(s, "%U %s: %Lu\n",
|
|
format_white_space, indent,
|
|
lbm->vip_counters[i].name,
|
|
vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips));
|
|
|
|
|
|
s = format(s, "%U #as:%u\n",
|
|
format_white_space, indent,
|
|
pool_elts(vip->as_indexes));
|
|
|
|
//Let's count the buckets for each AS
|
|
u32 *count = 0;
|
|
vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much...
|
|
lb_new_flow_entry_t *nfe;
|
|
vec_foreach(nfe, vip->new_flow_table)
|
|
count[nfe->as_index]++;
|
|
|
|
lb_as_t *as;
|
|
u32 *as_index;
|
|
pool_foreach (as_index, vip->as_indexes) {
|
|
as = &lbm->ass[*as_index];
|
|
s = format(s, "%U %U %u buckets %Lu flows dpo:%u %s\n",
|
|
format_white_space, indent,
|
|
format_ip46_address, &as->address, IP46_TYPE_ANY,
|
|
count[as - lbm->ass],
|
|
vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
|
|
as->dpo.dpoi_index,
|
|
(as->flags & LB_AS_FLAGS_USED)?"used":" removed");
|
|
}
|
|
|
|
vec_free(count);
|
|
return s;
|
|
}
|
|
|
|
typedef struct {
|
|
u32 as_index;
|
|
u32 last;
|
|
u32 skip;
|
|
} lb_pseudorand_t;
|
|
|
|
static int lb_pseudorand_compare(void *a, void *b)
|
|
{
|
|
lb_as_t *asa, *asb;
|
|
lb_main_t *lbm = &lb_main;
|
|
asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index];
|
|
asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index];
|
|
return memcmp(&asa->address, &asb->address, sizeof(asb->address));
|
|
}
|
|
|
|
static void lb_vip_garbage_collection(lb_vip_t *vip)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_snat4_key_t m_key4;
|
|
clib_bihash_kv_8_8_t kv4, value4;
|
|
lb_snat6_key_t m_key6;
|
|
clib_bihash_kv_24_8_t kv6, value6;
|
|
lb_snat_mapping_t *m = 0;
|
|
CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
|
|
|
|
u32 now = (u32) vlib_time_now(vlib_get_main());
|
|
if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN))
|
|
return;
|
|
|
|
vip->last_garbage_collection = now;
|
|
lb_as_t *as;
|
|
u32 *as_index;
|
|
pool_foreach (as_index, vip->as_indexes) {
|
|
as = &lbm->ass[*as_index];
|
|
if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
|
|
clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) &&
|
|
(vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
|
|
{ //Not referenced
|
|
|
|
if (lb_vip_is_nat4_port(vip)) {
|
|
m_key4.addr = as->address.ip4;
|
|
m_key4.port = vip->encap_args.target_port;
|
|
m_key4.protocol = 0;
|
|
m_key4.fib_index = 0;
|
|
|
|
kv4.key = m_key4.as_u64;
|
|
if(!clib_bihash_search_8_8(&lbm->mapping_by_as4, &kv4, &value4))
|
|
m = pool_elt_at_index (lbm->snat_mappings, value4.value);
|
|
ASSERT (m);
|
|
|
|
kv4.value = m - lbm->snat_mappings;
|
|
clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 0);
|
|
pool_put (lbm->snat_mappings, m);
|
|
} else if (lb_vip_is_nat6_port(vip)) {
|
|
m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
|
|
m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
|
|
m_key6.port = vip->encap_args.target_port;
|
|
m_key6.protocol = 0;
|
|
m_key6.fib_index = 0;
|
|
|
|
kv6.key[0] = m_key6.as_u64[0];
|
|
kv6.key[1] = m_key6.as_u64[1];
|
|
kv6.key[2] = m_key6.as_u64[2];
|
|
|
|
if (!clib_bihash_search_24_8 (&lbm->mapping_by_as6, &kv6, &value6))
|
|
m = pool_elt_at_index (lbm->snat_mappings, value6.value);
|
|
ASSERT (m);
|
|
|
|
kv6.value = m - lbm->snat_mappings;
|
|
clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 0);
|
|
pool_put (lbm->snat_mappings, m);
|
|
}
|
|
fib_entry_child_remove(as->next_hop_fib_entry_index,
|
|
as->next_hop_child_index);
|
|
fib_table_entry_delete_index(as->next_hop_fib_entry_index,
|
|
FIB_SOURCE_RR);
|
|
as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
|
|
|
|
pool_put(vip->as_indexes, as_index);
|
|
pool_put(lbm->ass, as);
|
|
}
|
|
}
|
|
}
|
|
|
|
void lb_garbage_collection()
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_get_writer_lock();
|
|
lb_vip_t *vip;
|
|
u32 *to_be_removed_vips = 0, *i;
|
|
pool_foreach (vip, lbm->vips) {
|
|
lb_vip_garbage_collection(vip);
|
|
|
|
if (!(vip->flags & LB_VIP_FLAGS_USED) &&
|
|
(pool_elts(vip->as_indexes) == 0)) {
|
|
vec_add1(to_be_removed_vips, vip - lbm->vips);
|
|
}
|
|
}
|
|
|
|
vec_foreach(i, to_be_removed_vips) {
|
|
vip = &lbm->vips[*i];
|
|
pool_free (vip->as_indexes);
|
|
pool_put (lbm->vips, vip);
|
|
}
|
|
|
|
vec_free(to_be_removed_vips);
|
|
lb_put_writer_lock();
|
|
}
|
|
|
|
static void lb_vip_update_new_flow_table(lb_vip_t *vip)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_new_flow_entry_t *old_table;
|
|
u32 i, *as_index;
|
|
lb_new_flow_entry_t *new_flow_table = 0;
|
|
lb_as_t *as;
|
|
lb_pseudorand_t *pr, *sort_arr = 0;
|
|
|
|
CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock); // We must have the lock
|
|
|
|
//Check if some AS is configured or not
|
|
i = 0;
|
|
pool_foreach (as_index, vip->as_indexes) {
|
|
as = &lbm->ass[*as_index];
|
|
if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
|
|
i = 1;
|
|
goto out; //Not sure 'break' works in this macro-loop
|
|
}
|
|
}
|
|
|
|
out:
|
|
if (i == 0) {
|
|
//Only the default. i.e. no AS
|
|
vec_validate(new_flow_table, vip->new_flow_table_mask);
|
|
for (i=0; i<vec_len(new_flow_table); i++)
|
|
new_flow_table[i].as_index = 0;
|
|
|
|
goto finished;
|
|
}
|
|
|
|
//First, let's sort the ASs
|
|
vec_validate (sort_arr, pool_elts (vip->as_indexes) - 1);
|
|
|
|
i = 0;
|
|
pool_foreach (as_index, vip->as_indexes) {
|
|
as = &lbm->ass[*as_index];
|
|
if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
|
|
continue;
|
|
|
|
sort_arr[i].as_index = as - lbm->ass;
|
|
i++;
|
|
}
|
|
vec_set_len (sort_arr, i);
|
|
|
|
vec_sort_with_function(sort_arr, lb_pseudorand_compare);
|
|
|
|
//Now let's pseudo-randomly generate permutations
|
|
vec_foreach(pr, sort_arr) {
|
|
lb_as_t *as = &lbm->ass[pr->as_index];
|
|
|
|
u64 seed = clib_xxhash(as->address.as_u64[0] ^
|
|
as->address.as_u64[1]);
|
|
/* We have 2^n buckets.
|
|
* skip must be prime with 2^n.
|
|
* So skip must be odd.
|
|
* MagLev actually state that M should be prime,
|
|
* but this has a big computation cost (% operation).
|
|
* Using 2^n is more better (& operation).
|
|
*/
|
|
pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
|
|
pr->last = (seed >> 32) & vip->new_flow_table_mask;
|
|
}
|
|
|
|
//Let's create a new flow table
|
|
vec_validate(new_flow_table, vip->new_flow_table_mask);
|
|
for (i=0; i<vec_len(new_flow_table); i++)
|
|
new_flow_table[i].as_index = 0;
|
|
|
|
u32 done = 0;
|
|
while (1) {
|
|
vec_foreach(pr, sort_arr) {
|
|
while (1) {
|
|
u32 last = pr->last;
|
|
pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
|
|
if (new_flow_table[last].as_index == 0) {
|
|
new_flow_table[last].as_index = pr->as_index;
|
|
break;
|
|
}
|
|
}
|
|
done++;
|
|
if (done == vec_len(new_flow_table))
|
|
goto finished;
|
|
}
|
|
}
|
|
|
|
finished:
|
|
vec_free(sort_arr);
|
|
|
|
old_table = vip->new_flow_table;
|
|
vip->new_flow_table = new_flow_table;
|
|
vec_free(old_table);
|
|
}
|
|
|
|
int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
|
|
u32 per_cpu_sticky_buckets, u32 flow_timeout)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
|
|
if (!is_pow2(per_cpu_sticky_buckets))
|
|
return VNET_API_ERROR_INVALID_MEMORY_SIZE;
|
|
|
|
lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self
|
|
lbm->ip4_src_address = *ip4_address;
|
|
lbm->ip6_src_address = *ip6_address;
|
|
lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets;
|
|
lbm->flow_timeout = flow_timeout;
|
|
lb_put_writer_lock();
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
static
|
|
int lb_vip_port_find_index(ip46_address_t *prefix, u8 plen,
|
|
u8 protocol, u16 port,
|
|
lb_lkp_type_t lkp_type,
|
|
u32 *vip_index)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_vip_t *vip;
|
|
/* This must be called with the lock owned */
|
|
CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
|
|
ip46_prefix_normalize(prefix, plen);
|
|
pool_foreach (vip, lbm->vips) {
|
|
if ((vip->flags & LB_AS_FLAGS_USED) &&
|
|
vip->plen == plen &&
|
|
vip->prefix.as_u64[0] == prefix->as_u64[0] &&
|
|
vip->prefix.as_u64[1] == prefix->as_u64[1])
|
|
{
|
|
if((lkp_type == LB_LKP_SAME_IP_PORT &&
|
|
vip->protocol == protocol &&
|
|
vip->port == port) ||
|
|
(lkp_type == LB_LKP_ALL_PORT_IP &&
|
|
vip->port == 0) ||
|
|
(lkp_type == LB_LKP_DIFF_IP_PORT &&
|
|
(vip->protocol != protocol ||
|
|
vip->port != port) ) )
|
|
{
|
|
*vip_index = vip - lbm->vips;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
return VNET_API_ERROR_NO_SUCH_ENTRY;
|
|
}
|
|
|
|
static
|
|
int lb_vip_port_find_index_with_lock(ip46_address_t *prefix, u8 plen,
|
|
u8 protocol, u16 port, u32 *vip_index)
|
|
{
|
|
return lb_vip_port_find_index(prefix, plen, protocol, port,
|
|
LB_LKP_SAME_IP_PORT, vip_index);
|
|
}
|
|
|
|
static
|
|
int lb_vip_port_find_all_port_vip(ip46_address_t *prefix, u8 plen,
|
|
u32 *vip_index)
|
|
{
|
|
return lb_vip_port_find_index(prefix, plen, ~0, 0,
|
|
LB_LKP_ALL_PORT_IP, vip_index);
|
|
}
|
|
|
|
/* Find out per-port-vip entry with different protocol and port */
|
|
static
|
|
int lb_vip_port_find_diff_port(ip46_address_t *prefix, u8 plen,
|
|
u8 protocol, u16 port, u32 *vip_index)
|
|
{
|
|
return lb_vip_port_find_index(prefix, plen, protocol, port,
|
|
LB_LKP_DIFF_IP_PORT, vip_index);
|
|
}
|
|
|
|
int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u8 protocol,
|
|
u16 port, u32 *vip_index)
|
|
{
|
|
int ret;
|
|
lb_get_writer_lock();
|
|
ret = lb_vip_port_find_index_with_lock(prefix, plen,
|
|
protocol, port, vip_index);
|
|
lb_put_writer_lock();
|
|
return ret;
|
|
}
|
|
|
|
static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
/* This must be called with the lock owned */
|
|
CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
|
|
lb_as_t *as;
|
|
u32 *asi;
|
|
pool_foreach (asi, vip->as_indexes) {
|
|
as = &lbm->ass[*asi];
|
|
if (as->vip_index == (vip - lbm->vips) &&
|
|
as->address.as_u64[0] == address->as_u64[0] &&
|
|
as->address.as_u64[1] == address->as_u64[1])
|
|
{
|
|
*as_index = as - lbm->ass;
|
|
return 0;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_get_writer_lock();
|
|
lb_vip_t *vip;
|
|
if (!(vip = lb_vip_get_by_index(vip_index))) {
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_NO_SUCH_ENTRY;
|
|
}
|
|
|
|
ip46_type_t type = lb_encap_is_ip4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6;
|
|
u32 *to_be_added = 0;
|
|
u32 *to_be_updated = 0;
|
|
u32 i;
|
|
u32 *ip;
|
|
lb_snat_mapping_t *m;
|
|
|
|
//Sanity check
|
|
while (n--) {
|
|
|
|
if (!lb_as_find_index_vip(vip, &addresses[n], &i)) {
|
|
if (lbm->ass[i].flags & LB_AS_FLAGS_USED) {
|
|
vec_free(to_be_added);
|
|
vec_free(to_be_updated);
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_VALUE_EXIST;
|
|
}
|
|
vec_add1(to_be_updated, i);
|
|
goto next;
|
|
}
|
|
|
|
if (ip46_address_type(&addresses[n]) != type) {
|
|
vec_free(to_be_added);
|
|
vec_free(to_be_updated);
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
|
|
}
|
|
|
|
if (n) {
|
|
u32 n2 = n;
|
|
while(n2--) //Check for duplicates
|
|
if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
|
|
addresses[n2].as_u64[1] == addresses[n].as_u64[1])
|
|
goto next;
|
|
}
|
|
|
|
vec_add1(to_be_added, n);
|
|
|
|
next:
|
|
continue;
|
|
}
|
|
|
|
//Update reused ASs
|
|
vec_foreach(ip, to_be_updated) {
|
|
lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
|
|
}
|
|
vec_free(to_be_updated);
|
|
|
|
//Create those who have to be created
|
|
vec_foreach(ip, to_be_added) {
|
|
lb_as_t *as;
|
|
u32 *as_index;
|
|
pool_get(lbm->ass, as);
|
|
as->address = addresses[*ip];
|
|
as->flags = LB_AS_FLAGS_USED;
|
|
as->vip_index = vip_index;
|
|
pool_get(vip->as_indexes, as_index);
|
|
*as_index = as - lbm->ass;
|
|
|
|
/*
|
|
* become a child of the FIB entry
|
|
* so we are informed when its forwarding changes
|
|
*/
|
|
fib_prefix_t nh = {};
|
|
if (lb_encap_is_ip4(vip)) {
|
|
nh.fp_addr.ip4 = as->address.ip4;
|
|
nh.fp_len = 32;
|
|
nh.fp_proto = FIB_PROTOCOL_IP4;
|
|
} else {
|
|
nh.fp_addr.ip6 = as->address.ip6;
|
|
nh.fp_len = 128;
|
|
nh.fp_proto = FIB_PROTOCOL_IP6;
|
|
}
|
|
|
|
as->next_hop_fib_entry_index =
|
|
fib_table_entry_special_add(0,
|
|
&nh,
|
|
FIB_SOURCE_RR,
|
|
FIB_ENTRY_FLAG_NONE);
|
|
as->next_hop_child_index =
|
|
fib_entry_child_add(as->next_hop_fib_entry_index,
|
|
lbm->fib_node_type,
|
|
as - lbm->ass);
|
|
|
|
lb_as_stack(as);
|
|
|
|
if ( lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip) )
|
|
{
|
|
/* Add SNAT static mapping */
|
|
pool_get (lbm->snat_mappings, m);
|
|
clib_memset (m, 0, sizeof (*m));
|
|
if (lb_vip_is_nat4_port(vip)) {
|
|
lb_snat4_key_t m_key4;
|
|
clib_bihash_kv_8_8_t kv4;
|
|
m_key4.addr = as->address.ip4;
|
|
m_key4.port = vip->encap_args.target_port;
|
|
m_key4.protocol = 0;
|
|
m_key4.fib_index = 0;
|
|
|
|
if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
|
|
{
|
|
m->src_ip.ip4 = vip->prefix.ip4;
|
|
}
|
|
else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
|
|
{
|
|
m->src_ip.ip4 = lbm->ip4_src_address;
|
|
}
|
|
m->src_ip_is_ipv6 = 0;
|
|
m->as_ip.ip4 = as->address.ip4;
|
|
m->as_ip_is_ipv6 = 0;
|
|
m->src_port = vip->port;
|
|
m->target_port = vip->encap_args.target_port;
|
|
m->vrf_id = 0;
|
|
m->fib_index = 0;
|
|
|
|
kv4.key = m_key4.as_u64;
|
|
kv4.value = m - lbm->snat_mappings;
|
|
clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 1);
|
|
} else {
|
|
lb_snat6_key_t m_key6;
|
|
clib_bihash_kv_24_8_t kv6;
|
|
m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
|
|
m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
|
|
m_key6.port = vip->encap_args.target_port;
|
|
m_key6.protocol = 0;
|
|
m_key6.fib_index = 0;
|
|
|
|
if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
|
|
{
|
|
m->src_ip.ip6.as_u64[0] = vip->prefix.ip6.as_u64[0];
|
|
m->src_ip.ip6.as_u64[1] = vip->prefix.ip6.as_u64[1];
|
|
}
|
|
else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
|
|
{
|
|
m->src_ip.ip6.as_u64[0] = lbm->ip6_src_address.as_u64[0];
|
|
m->src_ip.ip6.as_u64[1] = lbm->ip6_src_address.as_u64[1];
|
|
}
|
|
m->src_ip_is_ipv6 = 1;
|
|
m->as_ip.ip6.as_u64[0] = as->address.ip6.as_u64[0];
|
|
m->as_ip.ip6.as_u64[1] = as->address.ip6.as_u64[1];
|
|
m->as_ip_is_ipv6 = 1;
|
|
m->src_port = vip->port;
|
|
m->target_port = vip->encap_args.target_port;
|
|
m->vrf_id = 0;
|
|
m->fib_index = 0;
|
|
|
|
kv6.key[0] = m_key6.as_u64[0];
|
|
kv6.key[1] = m_key6.as_u64[1];
|
|
kv6.key[2] = m_key6.as_u64[2];
|
|
kv6.value = m - lbm->snat_mappings;
|
|
clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 1);
|
|
}
|
|
}
|
|
}
|
|
vec_free(to_be_added);
|
|
|
|
//Recompute flows
|
|
lb_vip_update_new_flow_table(vip);
|
|
|
|
//Garbage collection maybe
|
|
lb_vip_garbage_collection(vip);
|
|
|
|
lb_put_writer_lock();
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
lb_flush_vip_as (u32 vip_index, u32 as_index)
|
|
{
|
|
u32 thread_index;
|
|
vlib_thread_main_t *tm = vlib_get_thread_main();
|
|
lb_main_t *lbm = &lb_main;
|
|
|
|
for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
|
|
lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
|
|
if (h != NULL) {
|
|
u32 i;
|
|
lb_hash_bucket_t *b;
|
|
|
|
lb_hash_foreach_entry(h, b, i) {
|
|
if ((vip_index == ~0)
|
|
|| ((b->vip[i] == vip_index) && (as_index == ~0))
|
|
|| ((b->vip[i] == vip_index) && (b->value[i] == as_index)))
|
|
{
|
|
vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
|
|
vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
|
|
b->vip[i] = ~0;
|
|
b->value[i] = 0;
|
|
}
|
|
}
|
|
if (vip_index == ~0)
|
|
{
|
|
lb_hash_free(h);
|
|
lbm->per_cpu[thread_index].sticky_ht = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n,
|
|
u8 flush)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
u32 now = (u32) vlib_time_now(vlib_get_main());
|
|
u32 *ip = 0;
|
|
u32 as_index = 0;
|
|
|
|
lb_vip_t *vip;
|
|
if (!(vip = lb_vip_get_by_index(vip_index))) {
|
|
return VNET_API_ERROR_NO_SUCH_ENTRY;
|
|
}
|
|
|
|
u32 *indexes = NULL;
|
|
while (n--) {
|
|
if (lb_as_find_index_vip(vip, &addresses[n], &as_index)) {
|
|
vec_free(indexes);
|
|
return VNET_API_ERROR_NO_SUCH_ENTRY;
|
|
}
|
|
|
|
if (n) { //Check for duplicates
|
|
u32 n2 = n - 1;
|
|
while(n2--) {
|
|
if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
|
|
addresses[n2].as_u64[1] == addresses[n].as_u64[1])
|
|
goto next;
|
|
}
|
|
}
|
|
|
|
vec_add1(indexes, as_index);
|
|
next:
|
|
continue;
|
|
}
|
|
|
|
//Garbage collection maybe
|
|
lb_vip_garbage_collection(vip);
|
|
|
|
if (indexes != NULL) {
|
|
vec_foreach(ip, indexes) {
|
|
lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
|
|
lbm->ass[*ip].last_used = now;
|
|
|
|
if(flush)
|
|
{
|
|
/* flush flow table for deleted ASs*/
|
|
lb_flush_vip_as(vip_index, *ip);
|
|
}
|
|
}
|
|
|
|
//Recompute flows
|
|
lb_vip_update_new_flow_table(vip);
|
|
}
|
|
|
|
vec_free(indexes);
|
|
return 0;
|
|
}
|
|
|
|
int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n, u8 flush)
|
|
{
|
|
lb_get_writer_lock();
|
|
int ret = lb_vip_del_ass_withlock(vip_index, addresses, n, flush);
|
|
lb_put_writer_lock();
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
lb_vip_prefix_index_alloc (lb_main_t *lbm)
|
|
{
|
|
/*
|
|
* Check for dynamically allocated instance number.
|
|
*/
|
|
u32 bit;
|
|
|
|
bit = clib_bitmap_first_clear (lbm->vip_prefix_indexes);
|
|
|
|
lbm->vip_prefix_indexes = clib_bitmap_set(lbm->vip_prefix_indexes, bit, 1);
|
|
|
|
return bit;
|
|
}
|
|
|
|
static int
|
|
lb_vip_prefix_index_free (lb_main_t *lbm, u32 instance)
|
|
{
|
|
|
|
if (clib_bitmap_get (lbm->vip_prefix_indexes, instance) == 0)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
lbm->vip_prefix_indexes = clib_bitmap_set (lbm->vip_prefix_indexes,
|
|
instance, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Add the VIP adjacency to the ip4 or ip6 fib
|
|
*/
|
|
static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip,
|
|
u32 *vip_prefix_index)
|
|
{
|
|
dpo_proto_t proto = 0;
|
|
dpo_type_t dpo_type = 0;
|
|
u32 vip_idx = 0;
|
|
|
|
if (vip->port != 0)
|
|
{
|
|
/* for per-port vip, if VIP adjacency has been added,
|
|
* no need to add adjacency. */
|
|
if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
|
|
vip->protocol, vip->port, &vip_idx))
|
|
{
|
|
lb_vip_t *exists_vip = lb_vip_get_by_index(vip_idx);
|
|
*vip_prefix_index = exists_vip ? exists_vip->vip_prefix_index : ~0;
|
|
return;
|
|
}
|
|
|
|
/* Allocate an index for per-port vip */
|
|
*vip_prefix_index = lb_vip_prefix_index_alloc(lbm);
|
|
}
|
|
else
|
|
{
|
|
*vip_prefix_index = vip - lbm->vips;
|
|
}
|
|
|
|
dpo_id_t dpo = DPO_INVALID;
|
|
fib_prefix_t pfx = {};
|
|
if (lb_vip_is_ip4(vip->type)) {
|
|
pfx.fp_addr.ip4 = vip->prefix.ip4;
|
|
pfx.fp_len = vip->plen - 96;
|
|
pfx.fp_proto = FIB_PROTOCOL_IP4;
|
|
proto = DPO_PROTO_IP4;
|
|
} else {
|
|
pfx.fp_addr.ip6 = vip->prefix.ip6;
|
|
pfx.fp_len = vip->plen;
|
|
pfx.fp_proto = FIB_PROTOCOL_IP6;
|
|
proto = DPO_PROTO_IP6;
|
|
}
|
|
|
|
if (lb_vip_is_gre4(vip))
|
|
dpo_type = lbm->dpo_gre4_type;
|
|
else if (lb_vip_is_gre6(vip))
|
|
dpo_type = lbm->dpo_gre6_type;
|
|
else if (lb_vip_is_gre4_port(vip))
|
|
dpo_type = lbm->dpo_gre4_port_type;
|
|
else if (lb_vip_is_gre6_port(vip))
|
|
dpo_type = lbm->dpo_gre6_port_type;
|
|
else if (lb_vip_is_l3dsr(vip))
|
|
dpo_type = lbm->dpo_l3dsr_type;
|
|
else if (lb_vip_is_l3dsr_port(vip))
|
|
dpo_type = lbm->dpo_l3dsr_port_type;
|
|
else if(lb_vip_is_nat4_port(vip))
|
|
dpo_type = lbm->dpo_nat4_port_type;
|
|
else if (lb_vip_is_nat6_port(vip))
|
|
dpo_type = lbm->dpo_nat6_port_type;
|
|
|
|
dpo_set(&dpo, dpo_type, proto, *vip_prefix_index);
|
|
fib_table_entry_special_dpo_add(0,
|
|
&pfx,
|
|
lb_fib_src,
|
|
FIB_ENTRY_FLAG_EXCLUSIVE,
|
|
&dpo);
|
|
dpo_reset(&dpo);
|
|
}
|
|
|
|
/**
|
|
* Add the VIP filter entry
|
|
*/
|
|
static int lb_vip_add_port_filter(lb_main_t *lbm, lb_vip_t *vip,
|
|
u32 vip_prefix_index, u32 vip_idx)
|
|
{
|
|
vip_port_key_t key;
|
|
clib_bihash_kv_8_8_t kv;
|
|
|
|
key.vip_prefix_index = vip_prefix_index;
|
|
key.protocol = vip->protocol;
|
|
key.port = clib_host_to_net_u16(vip->port);
|
|
key.rsv = 0;
|
|
|
|
kv.key = key.as_u64;
|
|
kv.value = vip_idx;
|
|
clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Del the VIP filter entry
|
|
*/
|
|
static int lb_vip_del_port_filter(lb_main_t *lbm, lb_vip_t *vip)
|
|
{
|
|
vip_port_key_t key;
|
|
clib_bihash_kv_8_8_t kv, value;
|
|
lb_vip_t *m = 0;
|
|
|
|
key.vip_prefix_index = vip->vip_prefix_index;
|
|
key.protocol = vip->protocol;
|
|
key.port = clib_host_to_net_u16(vip->port);
|
|
key.rsv = 0;
|
|
|
|
kv.key = key.as_u64;
|
|
if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) != 0)
|
|
{
|
|
clib_warning("looking up vip_index_per_port failed.");
|
|
return VNET_API_ERROR_NO_SUCH_ENTRY;
|
|
}
|
|
m = pool_elt_at_index (lbm->vips, value.value);
|
|
ASSERT (m);
|
|
|
|
kv.value = m - lbm->vips;
|
|
clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Deletes the adjacency associated with the VIP
|
|
*/
|
|
static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
|
|
{
|
|
fib_prefix_t pfx = {};
|
|
u32 vip_idx = 0;
|
|
|
|
if (vip->port != 0)
|
|
{
|
|
/* If this vip adjacency is used by other per-port vip,
|
|
* no need to del this adjacency. */
|
|
if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
|
|
vip->protocol, vip->port, &vip_idx))
|
|
{
|
|
lb_put_writer_lock();
|
|
return;
|
|
}
|
|
|
|
/* Return vip_prefix_index for per-port vip */
|
|
lb_vip_prefix_index_free(lbm, vip->vip_prefix_index);
|
|
|
|
}
|
|
|
|
if (lb_vip_is_ip4(vip->type)) {
|
|
pfx.fp_addr.ip4 = vip->prefix.ip4;
|
|
pfx.fp_len = vip->plen - 96;
|
|
pfx.fp_proto = FIB_PROTOCOL_IP4;
|
|
} else {
|
|
pfx.fp_addr.ip6 = vip->prefix.ip6;
|
|
pfx.fp_len = vip->plen;
|
|
pfx.fp_proto = FIB_PROTOCOL_IP6;
|
|
}
|
|
fib_table_entry_special_remove(0, &pfx, lb_fib_src);
|
|
}
|
|
|
|
int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
vlib_main_t *vm = vlib_get_main();
|
|
lb_vip_t *vip;
|
|
lb_vip_type_t type = args.type;
|
|
u32 vip_prefix_index = 0;
|
|
|
|
lb_get_writer_lock();
|
|
ip46_prefix_normalize(&(args.prefix), args.plen);
|
|
|
|
if (!lb_vip_port_find_index_with_lock(&(args.prefix), args.plen,
|
|
args.protocol, args.port,
|
|
vip_index))
|
|
{
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_VALUE_EXIST;
|
|
}
|
|
|
|
/* Make sure we can't add a per-port VIP entry
|
|
* when there already is an all-port VIP for the same prefix. */
|
|
if ((args.port != 0) &&
|
|
!lb_vip_port_find_all_port_vip(&(args.prefix), args.plen, vip_index))
|
|
{
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_VALUE_EXIST;
|
|
}
|
|
|
|
/* Make sure we can't add a all-port VIP entry
|
|
* when there already is an per-port VIP for the same prefix. */
|
|
if ((args.port == 0) &&
|
|
!lb_vip_port_find_diff_port(&(args.prefix), args.plen,
|
|
args.protocol, args.port, vip_index))
|
|
{
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_VALUE_EXIST;
|
|
}
|
|
|
|
/* Make sure all VIP for a given prefix (using different ports) have the same type. */
|
|
if ((args.port != 0) &&
|
|
!lb_vip_port_find_diff_port(&(args.prefix), args.plen,
|
|
args.protocol, args.port, vip_index)
|
|
&& (args.type != lbm->vips[*vip_index].type))
|
|
{
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
if (!is_pow2(args.new_length)) {
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_INVALID_MEMORY_SIZE;
|
|
}
|
|
|
|
if (ip46_prefix_is_ip4(&(args.prefix), args.plen) &&
|
|
!lb_vip_is_ip4(type)) {
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
|
|
}
|
|
|
|
if ((!ip46_prefix_is_ip4(&(args.prefix), args.plen)) &&
|
|
!lb_vip_is_ip6(type)) {
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
|
|
}
|
|
|
|
if ((type == LB_VIP_TYPE_IP4_L3DSR) &&
|
|
(args.encap_args.dscp >= 64) )
|
|
{
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_VALUE_EXIST;
|
|
}
|
|
|
|
//Allocate
|
|
pool_get(lbm->vips, vip);
|
|
|
|
//Init
|
|
memcpy (&(vip->prefix), &(args.prefix), sizeof(args.prefix));
|
|
vip->plen = args.plen;
|
|
if (args.port != 0)
|
|
{
|
|
vip->protocol = args.protocol;
|
|
vip->port = args.port;
|
|
}
|
|
else
|
|
{
|
|
vip->protocol = (u8)~0;
|
|
vip->port = 0;
|
|
}
|
|
vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main());
|
|
vip->type = args.type;
|
|
|
|
if (args.type == LB_VIP_TYPE_IP4_L3DSR) {
|
|
vip->encap_args.dscp = args.encap_args.dscp;
|
|
}
|
|
else if ((args.type == LB_VIP_TYPE_IP4_NAT4)
|
|
||(args.type == LB_VIP_TYPE_IP6_NAT6)) {
|
|
vip->encap_args.srv_type = args.encap_args.srv_type;
|
|
vip->encap_args.target_port =
|
|
clib_host_to_net_u16(args.encap_args.target_port);
|
|
}
|
|
|
|
vip->flags = LB_VIP_FLAGS_USED;
|
|
if (args.src_ip_sticky)
|
|
{
|
|
vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY;
|
|
}
|
|
vip->as_indexes = 0;
|
|
|
|
//Validate counters
|
|
u32 i;
|
|
for (i = 0; i < LB_N_VIP_COUNTERS; i++) {
|
|
vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
|
|
vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
|
|
}
|
|
|
|
//Configure new flow table
|
|
vip->new_flow_table_mask = args.new_length - 1;
|
|
vip->new_flow_table = 0;
|
|
|
|
//Update flow hash table
|
|
lb_vip_update_new_flow_table(vip);
|
|
|
|
//Create adjacency to direct traffic
|
|
lb_vip_add_adjacency(lbm, vip, &vip_prefix_index);
|
|
|
|
if ( (lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip))
|
|
&& (args.encap_args.srv_type == LB_SRV_TYPE_NODEPORT) )
|
|
{
|
|
u32 key;
|
|
uword * entry;
|
|
|
|
//Create maping from nodeport to vip_index
|
|
key = clib_host_to_net_u16(args.port);
|
|
entry = hash_get_mem (lbm->vip_index_by_nodeport, &key);
|
|
if (entry) {
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_VALUE_EXIST;
|
|
}
|
|
|
|
hash_set_mem (lbm->vip_index_by_nodeport, &key, vip - lbm->vips);
|
|
|
|
/* receive packets destined to NodeIP:NodePort */
|
|
udp_register_dst_port (vm, args.port, lb4_nodeport_node.index, 1);
|
|
udp_register_dst_port (vm, args.port, lb6_nodeport_node.index, 0);
|
|
}
|
|
|
|
*vip_index = vip - lbm->vips;
|
|
//Create per-port vip filtering table
|
|
if (args.port != 0)
|
|
{
|
|
lb_vip_add_port_filter(lbm, vip, vip_prefix_index, *vip_index);
|
|
vip->vip_prefix_index = vip_prefix_index;
|
|
}
|
|
|
|
lb_put_writer_lock();
|
|
return 0;
|
|
}
|
|
|
|
int lb_vip_del(u32 vip_index)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_vip_t *vip;
|
|
int rv = 0;
|
|
|
|
/* Does not remove default vip, i.e. vip_index = 0 */
|
|
if (vip_index == 0)
|
|
return VNET_API_ERROR_INVALID_VALUE;
|
|
|
|
lb_get_writer_lock();
|
|
if (!(vip = lb_vip_get_by_index(vip_index))) {
|
|
lb_put_writer_lock();
|
|
return VNET_API_ERROR_NO_SUCH_ENTRY;
|
|
}
|
|
|
|
//FIXME: This operation is actually not working
|
|
//We will need to remove state before performing this.
|
|
|
|
{
|
|
//Remove all ASs
|
|
ip46_address_t *ass = 0;
|
|
lb_as_t *as;
|
|
u32 *as_index;
|
|
|
|
pool_foreach (as_index, vip->as_indexes) {
|
|
as = &lbm->ass[*as_index];
|
|
vec_add1(ass, as->address);
|
|
}
|
|
if (vec_len(ass))
|
|
lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass), 0);
|
|
vec_free(ass);
|
|
}
|
|
|
|
//Delete adjacency
|
|
lb_vip_del_adjacency(lbm, vip);
|
|
|
|
//Delete per-port vip filtering entry
|
|
if (vip->port != 0)
|
|
{
|
|
rv = lb_vip_del_port_filter(lbm, vip);
|
|
}
|
|
|
|
//Set the VIP as unused
|
|
vip->flags &= ~LB_VIP_FLAGS_USED;
|
|
|
|
lb_put_writer_lock();
|
|
return rv;
|
|
}
|
|
|
|
VLIB_PLUGIN_REGISTER () = {
|
|
.version = VPP_BUILD_VER,
|
|
.description = "Load Balancer (LB)",
|
|
};
|
|
|
|
u8 *format_lb_dpo (u8 * s, va_list * va)
|
|
{
|
|
index_t index = va_arg (*va, index_t);
|
|
CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
|
|
return format (s, "%U", format_lb_vip, vip);
|
|
}
|
|
|
|
static void lb_dpo_lock (dpo_id_t *dpo) {}
|
|
static void lb_dpo_unlock (dpo_id_t *dpo) {}
|
|
|
|
static fib_node_t *
|
|
lb_fib_node_get_node (fib_node_index_t index)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_as_t *as = pool_elt_at_index (lbm->ass, index);
|
|
return (&as->fib_node);
|
|
}
|
|
|
|
static void
|
|
lb_fib_node_last_lock_gone (fib_node_t *node)
|
|
{
|
|
}
|
|
|
|
static lb_as_t *
|
|
lb_as_from_fib_node (fib_node_t *node)
|
|
{
|
|
return ((lb_as_t*)(((char*)node) -
|
|
STRUCT_OFFSET_OF(lb_as_t, fib_node)));
|
|
}
|
|
|
|
static void
|
|
lb_as_stack (lb_as_t *as)
|
|
{
|
|
lb_main_t *lbm = &lb_main;
|
|
lb_vip_t *vip = &lbm->vips[as->vip_index];
|
|
dpo_type_t dpo_type = 0;
|
|
|
|
if (lb_vip_is_gre4(vip))
|
|
dpo_type = lbm->dpo_gre4_type;
|
|
else if (lb_vip_is_gre6(vip))
|
|
dpo_type = lbm->dpo_gre6_type;
|
|
else if (lb_vip_is_gre4_port(vip))
|
|
dpo_type = lbm->dpo_gre4_port_type;
|
|
else if (lb_vip_is_gre6_port(vip))
|
|
dpo_type = lbm->dpo_gre6_port_type;
|
|
else if (lb_vip_is_l3dsr(vip))
|
|
dpo_type = lbm->dpo_l3dsr_type;
|
|
else if (lb_vip_is_l3dsr_port(vip))
|
|
dpo_type = lbm->dpo_l3dsr_port_type;
|
|
else if(lb_vip_is_nat4_port(vip))
|
|
dpo_type = lbm->dpo_nat4_port_type;
|
|
else if (lb_vip_is_nat6_port(vip))
|
|
dpo_type = lbm->dpo_nat6_port_type;
|
|
|
|
dpo_stack(dpo_type,
|
|
lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6,
|
|
&as->dpo,
|
|
fib_entry_contribute_ip_forwarding(
|
|
as->next_hop_fib_entry_index));
|
|
}
|
|
|
|
static fib_node_back_walk_rc_t
|
|
lb_fib_node_back_walk_notify (fib_node_t *node,
|
|
fib_node_back_walk_ctx_t *ctx)
|
|
{
|
|
lb_as_stack(lb_as_from_fib_node(node));
|
|
return (FIB_NODE_BACK_WALK_CONTINUE);
|
|
}
|
|
|
|
int lb_nat4_interface_add_del (u32 sw_if_index, int is_del)
|
|
{
|
|
if (is_del)
|
|
{
|
|
vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
|
|
sw_if_index, 0, 0, 0);
|
|
}
|
|
else
|
|
{
|
|
vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
|
|
sw_if_index, 1, 0, 0);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int lb_nat6_interface_add_del (u32 sw_if_index, int is_del)
|
|
{
|
|
if (is_del)
|
|
{
|
|
vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
|
|
sw_if_index, 0, 0, 0);
|
|
}
|
|
else
|
|
{
|
|
vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
|
|
sw_if_index, 1, 0, 0);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
clib_error_t *
|
|
lb_init (vlib_main_t * vm)
|
|
{
|
|
vlib_thread_main_t *tm = vlib_get_thread_main ();
|
|
lb_main_t *lbm = &lb_main;
|
|
lbm->vnet_main = vnet_get_main ();
|
|
lbm->vlib_main = vm;
|
|
|
|
lb_vip_t *default_vip;
|
|
lb_as_t *default_as;
|
|
fib_node_vft_t lb_fib_node_vft = {
|
|
.fnv_get = lb_fib_node_get_node,
|
|
.fnv_last_lock = lb_fib_node_last_lock_gone,
|
|
.fnv_back_walk = lb_fib_node_back_walk_notify,
|
|
};
|
|
dpo_vft_t lb_vft = {
|
|
.dv_lock = lb_dpo_lock,
|
|
.dv_unlock = lb_dpo_unlock,
|
|
.dv_format = format_lb_dpo,
|
|
};
|
|
|
|
//Allocate and init default VIP.
|
|
lbm->vips = 0;
|
|
pool_get(lbm->vips, default_vip);
|
|
default_vip->new_flow_table_mask = 0;
|
|
default_vip->prefix.ip6.as_u64[0] = 0xffffffffffffffffL;
|
|
default_vip->prefix.ip6.as_u64[1] = 0xffffffffffffffffL;
|
|
default_vip->protocol = ~0;
|
|
default_vip->port = 0;
|
|
default_vip->flags = LB_VIP_FLAGS_USED;
|
|
|
|
lbm->per_cpu = 0;
|
|
vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
|
|
clib_spinlock_init (&lbm->writer_lock);
|
|
lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS;
|
|
lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT;
|
|
lbm->ip4_src_address.as_u32 = 0xffffffff;
|
|
lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
|
|
lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
|
|
lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
|
|
lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
|
|
lbm->dpo_gre4_port_type = dpo_register_new_type(&lb_vft,
|
|
lb_dpo_gre4_port_nodes);
|
|
lbm->dpo_gre6_port_type = dpo_register_new_type(&lb_vft,
|
|
lb_dpo_gre6_port_nodes);
|
|
lbm->dpo_l3dsr_type = dpo_register_new_type(&lb_vft,
|
|
lb_dpo_l3dsr_nodes);
|
|
lbm->dpo_l3dsr_port_type = dpo_register_new_type(&lb_vft,
|
|
lb_dpo_l3dsr_port_nodes);
|
|
lbm->dpo_nat4_port_type = dpo_register_new_type(&lb_vft,
|
|
lb_dpo_nat4_port_nodes);
|
|
lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
|
|
lb_dpo_nat6_port_nodes);
|
|
lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft);
|
|
|
|
//Init AS reference counters
|
|
vlib_refcount_init(&lbm->as_refcount);
|
|
|
|
//Allocate and init default AS.
|
|
lbm->ass = 0;
|
|
pool_get(lbm->ass, default_as);
|
|
default_as->flags = 0;
|
|
default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
|
|
default_as->vip_index = ~0;
|
|
default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
|
|
default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
|
|
|
|
/* Generate a valid flow table for default VIP */
|
|
default_vip->as_indexes = NULL;
|
|
lb_get_writer_lock();
|
|
lb_vip_update_new_flow_table(default_vip);
|
|
lb_put_writer_lock();
|
|
|
|
lbm->vip_index_by_nodeport
|
|
= hash_create_mem (0, sizeof(u16), sizeof (uword));
|
|
|
|
clib_bihash_init_8_8 (&lbm->vip_index_per_port,
|
|
"vip_index_per_port", LB_VIP_PER_PORT_BUCKETS,
|
|
LB_VIP_PER_PORT_MEMORY_SIZE);
|
|
|
|
clib_bihash_init_8_8 (&lbm->mapping_by_as4,
|
|
"mapping_by_as4", LB_MAPPING_BUCKETS,
|
|
LB_MAPPING_MEMORY_SIZE);
|
|
|
|
clib_bihash_init_24_8 (&lbm->mapping_by_as6,
|
|
"mapping_by_as6", LB_MAPPING_BUCKETS,
|
|
LB_MAPPING_MEMORY_SIZE);
|
|
|
|
#define _(a,b,c) lbm->vip_counters[c].name = b;
|
|
lb_foreach_vip_counter
|
|
#undef _
|
|
|
|
lb_fib_src = fib_source_allocate("lb",
|
|
FIB_SOURCE_PRIORITY_HI,
|
|
FIB_SOURCE_BH_SIMPLE);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
VLIB_INIT_FUNCTION (lb_init);
|