cnat: maglev fixes & improvements

This fixes the maglev logic which previously
included a wrong simplication.
It moves the maglev logic to its own file,
and adds a test function in the debug cli.

Type: improvement

Change-Id: I2790ae2a26fc1c5739ff02f41d436bfcafd5b380
Signed-off-by: Nathan Skrzypczak <nathan.skrzypczak@gmail.com>
This commit is contained in:
Nathan Skrzypczak
2022-01-24 17:10:41 +01:00
committed by Damjan Marion
parent c454e8993d
commit 6de58f5fd0
7 changed files with 447 additions and 103 deletions
+1
View File
@@ -24,6 +24,7 @@ add_vpp_plugin(cnat
cnat_types.c
cnat_snat_policy.c
cnat_src_policy.c
cnat_maglev.c
API_FILES
cnat.api
File diff suppressed because it is too large Load Diff
+21
View File
@@ -0,0 +1,21 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright(c) 2022 Cisco Systems, Inc.
*/
#ifndef __CNAT_MAGLEV_H__
#define __CNAT_MAGLEV_H__
#include <cnat/cnat_types.h>
#include <cnat/cnat_translation.h>
typedef struct
{
/* offset & skip used for sorting, should be first */
u32 offset;
u32 skip;
u32 index;
} cnat_maglev_perm_t;
extern void cnat_translation_init_maglev (cnat_translation_t *ct);
#endif
+1 -103
View File
@@ -20,6 +20,7 @@
#include <vnet/dpo/drop_dpo.h>
#include <cnat/cnat_translation.h>
#include <cnat/cnat_maglev.h>
#include <cnat/cnat_session.h>
#include <cnat/cnat_client.h>
@@ -200,110 +201,7 @@ cnat_remove_translation_from_db (index_t cci, cnat_endpoint_t * vip,
clib_bihash_add_del_8_8 (&cnat_translation_db, &bkey, 0);
}
typedef struct
{
cnat_ep_trk_t *trk;
u32 index;
u32 offset;
u32 skip;
} cnat_maglev_entry_t;
static int
cnat_maglev_entry_compare (void *_a, void *_b)
{
cnat_ep_trk_t *a = ((cnat_maglev_entry_t *) _a)->trk;
cnat_ep_trk_t *b = ((cnat_maglev_entry_t *) _b)->trk;
int rv = 0;
if ((rv =
ip_address_cmp (&a->ct_ep[VLIB_TX].ce_ip, &b->ct_ep[VLIB_TX].ce_ip)))
return rv;
if ((rv = a->ct_ep[VLIB_TX].ce_port - a->ct_ep[VLIB_TX].ce_port))
return rv;
if ((rv =
ip_address_cmp (&a->ct_ep[VLIB_RX].ce_ip, &b->ct_ep[VLIB_RX].ce_ip)))
return rv;
if ((rv = a->ct_ep[VLIB_RX].ce_port - a->ct_ep[VLIB_RX].ce_port))
return rv;
return 0;
}
static void
cnat_translation_init_maglev (cnat_translation_t *ct)
{
cnat_maglev_entry_t *backends = NULL, *bk;
cnat_main_t *cm = &cnat_main;
u32 done = 0;
cnat_ep_trk_t *trk;
int ep_idx = 0;
vec_foreach (trk, ct->ct_active_paths)
{
cnat_maglev_entry_t bk;
u32 h1, h2;
if (AF_IP4 == ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip))
{
u32 a, b, c;
a = ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32;
b = (u64) trk->ct_ep[VLIB_TX].ce_port << 16 |
(u64) trk->ct_ep[VLIB_RX].ce_port;
c = ip_addr_v4 (&trk->ct_ep[VLIB_RX].ce_ip).data_u32;
hash_v3_mix32 (a, b, c);
hash_v3_finalize32 (a, b, c);
h1 = c;
h2 = b;
}
else
{
u64 a, b, c;
a = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[0] ^
ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[1];
b = (u64) trk->ct_ep[VLIB_TX].ce_port << 16 |
(u64) trk->ct_ep[VLIB_RX].ce_port;
c = ip_addr_v6 (&trk->ct_ep[VLIB_RX].ce_ip).as_u64[0] ^
ip_addr_v6 (&trk->ct_ep[VLIB_RX].ce_ip).as_u64[1];
hash_mix64 (a, b, c);
h1 = c;
h2 = b;
}
bk.offset = h1 % cm->maglev_len;
bk.skip = h2 % (cm->maglev_len - 1) + 1;
bk.index = ep_idx++;
bk.trk = trk;
vec_add1 (backends, bk);
}
if (0 == ep_idx)
return;
vec_sort_with_function (backends, cnat_maglev_entry_compare);
/* Don't free if previous vector exists, just zero */
vec_validate (ct->lb_maglev, cm->maglev_len);
vec_set (ct->lb_maglev, -1);
while (1)
{
vec_foreach (bk, backends)
{
u32 next = 0;
u32 c = (bk->offset + next * bk->skip) % cm->maglev_len;
while (ct->lb_maglev[c] != (u32) -1)
{
next++;
c = (bk->offset + next * bk->skip) % cm->maglev_len;
}
ct->lb_maglev[c] = bk->index;
done++;
if (done == cm->maglev_len)
goto finished;
}
}
finished:
vec_free (backends);
}
static void
cnat_translation_stack (cnat_translation_t * ct)
+2
View File
@@ -62,6 +62,8 @@ typedef enum cnat_trk_flag_t_
/* Don't translate this endpoint, but still
* forward. Used by maglev for DSR */
CNAT_TRK_FLAG_NO_NAT = (1 << 1),
/* */
CNAT_TRK_FLAG_TEST_DISABLED = (1 << 7),
} cnat_trk_flag_t;
typedef enum
+1
View File
@@ -98,6 +98,7 @@ _(format_hex_bytes_no_wrap);
_(format_white_space);
_(format_f64);
_(format_time_interval);
_ (format_duration);
#ifdef CLIB_UNIX
/* Unix specific formats. */
+46
View File
@@ -134,6 +134,52 @@ format_white_space (u8 * s, va_list * va)
return s;
}
u8 *
format_duration (u8 *s, va_list *args)
{
f64 t = va_arg (*args, f64);
s = format (s, "");
const f64 seconds_per_minute = 60;
const f64 seconds_per_hour = 60 * seconds_per_minute;
const f64 seconds_per_day = 24 * seconds_per_hour;
uword days, hours, minutes, secs, msecs, usecs;
days = t / seconds_per_day;
t -= days * seconds_per_day;
hours = t / seconds_per_hour;
t -= hours * seconds_per_hour;
minutes = t / seconds_per_minute;
t -= minutes * seconds_per_minute;
secs = t;
t -= secs;
msecs = 1e3 * t;
usecs = 1e6 * t;
usecs = usecs % 1000;
if (t == 0.)
s = format (s, "0");
if (days)
s = format (s, "%ddays ", days);
if (hours)
s = format (s, "%dh ", hours);
if (minutes)
s = format (s, "%dmin ", minutes);
if (secs)
s = format (s, "%ds ", secs);
if (msecs)
s = format (s, "%dms ", msecs);
if (usecs)
s = format (s, "%dus", usecs);
return (s);
}
u8 *
format_time_interval (u8 * s, va_list * args)
{