cnat: remove rwlock on ts

Type: improvement

Remove rwlock contention on timestamps. ~10% pps with
10k sessions. Use fixed-size-pools of increasing sizes
starting with 4K, and with a x2 step each time.
We don't free/shrink allocated pools.

Change-Id: I5fea51faba40430106c823275a6356e81709d118
Signed-off-by: Nathan Skrzypczak <nathan.skrzypczak@gmail.com>
This commit is contained in:
Nathan Skrzypczak
2020-11-04 11:41:05 +01:00
committed by Beno�t Ganne
parent 897844d1e9
commit 6d733a93b2
7 changed files with 133 additions and 61 deletions

View File

@ -20,10 +20,9 @@
#include <cnat/cnat_translation.h>
cnat_client_t *cnat_client_pool;
cnat_client_db_t cnat_client_db;
dpo_type_t cnat_client_dpo;
fib_source_t cnat_fib_source;
static_always_inline u8
cnat_client_is_clone (cnat_client_t * cc)
@ -302,7 +301,6 @@ cnat_client_show (vlib_main_t * vm,
vlib_cli_output(vm, "%U", format_cnat_client, cci, 0);
vlib_cli_output (vm, "%d clients", pool_elts (cnat_client_pool));
vlib_cli_output (vm, "%d timestamps", pool_elts (cnat_timestamps));
}
else
{
@ -389,6 +387,9 @@ cnat_client_init (vlib_main_t * vm)
clib_bihash_init_16_8 (&cnat_client_db.cc_ip_id_hash, "CNat client DB",
cm->client_hash_buckets, cm->client_hash_memory);
cnat_fib_source = fib_source_allocate ("cnat", CNAT_FIB_SOURCE_PRIORITY,
FIB_SOURCE_BH_SIMPLE);
clib_spinlock_init (&cnat_client_db.throttle_lock);
cnat_client_db.throttle_mem =
hash_create_mem (0, sizeof (ip_address_t), sizeof (uword));

View File

@ -86,8 +86,6 @@ extern void cnat_client_free_by_ip (ip46_address_t * addr, u8 af);
extern cnat_client_t *cnat_client_pool;
extern dpo_type_t cnat_client_dpo;
#define CC_INDEX_INVALID ((u32)(~0))
static_always_inline cnat_client_t *
cnat_client_get (index_t i)
{

View File

@ -19,72 +19,122 @@
#include <cnat/cnat_types.h>
always_inline int
cnat_ts_is_free_index (u32 index)
{
u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
return pool_is_free_index (cnat_timestamps.ts_pools[pidx], index);
}
always_inline cnat_timestamp_t *
cnat_timestamp_get (u32 index)
{
/* 6 top bits for choosing pool */
u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index);
}
always_inline cnat_timestamp_t *
cnat_timestamp_get_if_valid (u32 index)
{
/* 6 top bits for choosing pool */
u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
if (pidx >= cnat_timestamps.next_empty_pool_idx)
return (NULL);
if (pool_is_free_index (cnat_timestamps.ts_pools[pidx], index))
return (NULL);
return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index);
}
always_inline index_t
cnat_timestamp_alloc ()
{
cnat_timestamp_t *ts;
u32 index, pool_sz;
uword pidx;
clib_spinlock_lock (&cnat_timestamps.ts_lock);
pidx = clib_bitmap_first_set (cnat_timestamps.ts_free);
pool_sz = 1 << (CNAT_TS_BASE_SIZE + pidx);
ASSERT (pidx <= cnat_timestamps.next_empty_pool_idx);
if (pidx == cnat_timestamps.next_empty_pool_idx)
pool_init_fixed (
cnat_timestamps.ts_pools[cnat_timestamps.next_empty_pool_idx++],
pool_sz);
pool_get (cnat_timestamps.ts_pools[pidx], ts);
if (pool_elts (cnat_timestamps.ts_pools[pidx]) == pool_sz)
clib_bitmap_set (cnat_timestamps.ts_free, pidx, 0);
clib_spinlock_unlock (&cnat_timestamps.ts_lock);
index = (u32) pidx << (32 - CNAT_TS_MPOOL_BITS);
return index | (ts - cnat_timestamps.ts_pools[pidx]);
}
always_inline void
cnat_timestamp_destroy (u32 index)
{
u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
clib_spinlock_lock (&cnat_timestamps.ts_lock);
pool_put_index (cnat_timestamps.ts_pools[pidx], index);
clib_bitmap_set (cnat_timestamps.ts_free, pidx, 1);
clib_spinlock_unlock (&cnat_timestamps.ts_lock);
}
always_inline u32
cnat_timestamp_new (f64 t)
{
u32 index;
cnat_timestamp_t *ts;
clib_rwlock_writer_lock (&cnat_main.ts_lock);
pool_get (cnat_timestamps, ts);
index_t index = cnat_timestamp_alloc ();
cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->last_seen = t;
ts->lifetime = cnat_main.session_max_age;
ts->refcnt = CNAT_TIMESTAMP_INIT_REFCNT;
index = ts - cnat_timestamps;
clib_rwlock_writer_unlock (&cnat_main.ts_lock);
return index;
}
always_inline void
cnat_timestamp_inc_refcnt (u32 index)
{
clib_rwlock_reader_lock (&cnat_main.ts_lock);
cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
ts->refcnt++;
clib_rwlock_reader_unlock (&cnat_main.ts_lock);
cnat_timestamp_t *ts = cnat_timestamp_get (index);
clib_atomic_add_fetch (&ts->refcnt, 1);
}
always_inline void
cnat_timestamp_update (u32 index, f64 t)
{
clib_rwlock_reader_lock (&cnat_main.ts_lock);
cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->last_seen = t;
clib_rwlock_reader_unlock (&cnat_main.ts_lock);
}
always_inline void
cnat_timestamp_set_lifetime (u32 index, u16 lifetime)
{
clib_rwlock_reader_lock (&cnat_main.ts_lock);
cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->lifetime = lifetime;
clib_rwlock_reader_unlock (&cnat_main.ts_lock);
}
always_inline f64
cnat_timestamp_exp (u32 index)
{
f64 t;
if (INDEX_INVALID == index)
cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index);
if (NULL == ts)
return -1;
clib_rwlock_reader_lock (&cnat_main.ts_lock);
cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
t = ts->last_seen + (f64) ts->lifetime;
clib_rwlock_reader_unlock (&cnat_main.ts_lock);
return t;
}
always_inline void
cnat_timestamp_free (u32 index)
{
if (INDEX_INVALID == index)
cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index);
if (NULL == ts)
return;
clib_rwlock_writer_lock (&cnat_main.ts_lock);
cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
ts->refcnt--;
if (0 == ts->refcnt)
pool_put (cnat_timestamps, ts);
clib_rwlock_writer_unlock (&cnat_main.ts_lock);
if (0 == clib_atomic_sub_fetch (&ts->refcnt, 1))
cnat_timestamp_destroy (index);
}
/*

View File

@ -14,6 +14,7 @@
*/
#include <cnat/cnat_session.h>
#include <vlibmemory/api.h>
#include <cnat/cnat_client.h>
static uword

View File

@ -94,7 +94,8 @@ format_cnat_session (u8 * s, va_list * args)
cnat_session_t *sess = va_arg (*args, cnat_session_t *);
CLIB_UNUSED (int verbose) = va_arg (*args, int);
f64 ts = 0;
if (!pool_is_free_index (cnat_timestamps, sess->value.cs_ts_index))
if (!cnat_ts_is_free_index (sess->value.cs_ts_index))
ts = cnat_timestamp_exp (sess->value.cs_ts_index);
s = format (
@ -279,6 +280,12 @@ cnat_session_init (vlib_main_t * vm)
cm->session_hash_memory);
BV (clib_bihash_set_kvp_format_fn) (&cnat_session_db, format_cnat_session);
cnat_timestamps.next_empty_pool_idx = 0;
clib_bitmap_alloc (cnat_timestamps.ts_free, 1 << CNAT_TS_MPOOL_BITS);
clib_bitmap_set_region (cnat_timestamps.ts_free, 0, 1,
1 << CNAT_TS_MPOOL_BITS);
clib_spinlock_init (&cnat_timestamps.ts_lock);
return (NULL);
}
@ -289,21 +296,38 @@ cnat_timestamp_show (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
cnat_timestamp_t *ts;
clib_rwlock_reader_lock (&cnat_main.ts_lock);
pool_foreach (ts, cnat_timestamps)
int ts_cnt = 0, cnt;
u8 verbose = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
ts - cnat_timestamps, ts->last_seen, ts->lifetime,
ts->refcnt);
if (unformat (input, "verbose"))
verbose = 1;
else
return (clib_error_return (0, "unknown input '%U'",
format_unformat_error, input));
}
clib_rwlock_reader_unlock (&cnat_main.ts_lock);
for (int i = 0; i < cnat_timestamps.next_empty_pool_idx; i++)
{
cnt = pool_elts (cnat_timestamps.ts_pools[i]);
ts_cnt += cnt;
vlib_cli_output (vm, "-- Pool %d [%d/%d]", i, cnt,
pool_header (cnat_timestamps.ts_pools[i])->max_elts);
if (!verbose)
continue;
pool_foreach (ts, cnat_timestamps.ts_pools[i])
vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
ts - cnat_timestamps.ts_pools[i], ts->last_seen,
ts->lifetime, ts->refcnt);
}
vlib_cli_output (vm, "Total timestamps %d", ts_cnt);
return (NULL);
}
VLIB_CLI_COMMAND (cnat_timestamp_show_cmd, static) = {
.path = "show cnat timestamp",
.function = cnat_timestamp_show,
.short_help = "show cnat timestamp",
.short_help = "show cnat timestamp [verbose]",
.is_mp_safe = 1,
};

View File

@ -16,8 +16,7 @@
#include <cnat/cnat_types.h>
cnat_main_t cnat_main;
fib_source_t cnat_fib_source;
cnat_timestamp_t *cnat_timestamps;
cnat_timestamp_mpool_t cnat_timestamps;
char *cnat_error_strings[] = {
#define cnat_error(n,s) s,
@ -152,19 +151,6 @@ format_cnat_endpoint (u8 * s, va_list * args)
return (s);
}
static clib_error_t *
cnat_types_init (vlib_main_t * vm)
{
cnat_fib_source = fib_source_allocate ("cnat",
CNAT_FIB_SOURCE_PRIORITY,
FIB_SOURCE_BH_SIMPLE);
clib_rwlock_init (&cnat_main.ts_lock);
return (NULL);
}
void
cnat_enable_disable_scanner (cnat_scanner_cmd_t event_type)
{
@ -258,7 +244,6 @@ cnat_get_main ()
}
VLIB_EARLY_CONFIG_FUNCTION (cnat_config, "cnat");
VLIB_INIT_FUNCTION (cnat_types_init);
/*
* fd.io coding-style-patch-verification: ON

View File

@ -148,9 +148,6 @@ typedef struct cnat_main_
/* delay in seconds between two scans of session/clients tables */
f64 scanner_timeout;
/* Lock for the timestamp pool */
clib_rwlock_t ts_lock;
/* Index of the scanner process node */
uword scanner_node_index;
@ -175,6 +172,23 @@ typedef struct cnat_timestamp_t_
u16 refcnt;
} cnat_timestamp_t;
/* Create the first pool with 1 << CNAT_TS_BASE_SIZE elts */
#define CNAT_TS_BASE_SIZE (8)
/* reserve the top CNAT_TS_MPOOL_BITS bits for finding the pool */
#define CNAT_TS_MPOOL_BITS (6)
typedef struct cnat_timestamp_mpool_t_
{
/* Increasing fixed size pools of timestamps */
cnat_timestamp_t *ts_pools[1 << CNAT_TS_MPOOL_BITS];
/* Bitmap of pools with free space */
uword *ts_free;
/* Index of next pool to init */
u8 next_empty_pool_idx;
/* ts creation lock */
clib_spinlock_t ts_lock;
} cnat_timestamp_mpool_t;
typedef struct cnat_node_ctx_
{
f64 now;
@ -188,8 +202,7 @@ extern u8 *format_cnat_endpoint (u8 * s, va_list * args);
extern uword unformat_cnat_ep_tuple (unformat_input_t * input,
va_list * args);
extern uword unformat_cnat_ep (unformat_input_t * input, va_list * args);
extern cnat_timestamp_t *cnat_timestamps;
extern fib_source_t cnat_fib_source;
extern cnat_timestamp_mpool_t cnat_timestamps;
extern cnat_main_t cnat_main;
extern char *cnat_error_strings[];