buffers: support cases where numa node memory is not available

Change-Id: I550ef893e41d86310d4e5af16d6100b8e557b68a
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2019-02-28 23:16:11 +01:00
committed by Neale Ranns
parent 470a3704e3
commit b592d1b641
3 changed files with 83 additions and 41 deletions

View File

@ -493,9 +493,9 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
return copied; return copied;
} }
clib_error_t * u8
vlib_buffer_pool_create (vlib_main_t * vm, u8 index, char *name, vlib_buffer_pool_create (vlib_main_t * vm, char *name, u32 data_size,
u32 data_size, u32 physmem_map_index) u32 physmem_map_index)
{ {
vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_main_t *bm = vm->buffer_main;
vlib_buffer_pool_t *bp; vlib_buffer_pool_t *bp;
@ -503,17 +503,12 @@ vlib_buffer_pool_create (vlib_main_t * vm, u8 index, char *name,
uword start = pointer_to_uword (m->base); uword start = pointer_to_uword (m->base);
uword size = (uword) m->n_pages << m->log2_page_size; uword size = (uword) m->n_pages << m->log2_page_size;
uword i, j; uword i, j;
u32 alloc_size, n_alloc_per_page;; u32 alloc_size, n_alloc_per_page;
vec_validate_aligned (bm->buffer_pools, index, CLIB_CACHE_LINE_BYTES); if (vec_len (bm->buffer_pools) >= 255)
bp = vec_elt_at_index (bm->buffer_pools, index); return ~0;
if (bp->start) vec_add2_aligned (bm->buffer_pools, bp, 1, CLIB_LOG2_CACHE_LINE_BYTES);
return clib_error_return (0, "buffer with index %u already exists",
index);
if (index >= 255)
return clib_error_return (0, "buffer index must be < 255", index);
if (bm->buffer_mem_size == 0) if (bm->buffer_mem_size == 0)
{ {
@ -581,8 +576,7 @@ vlib_buffer_pool_create (vlib_main_t * vm, u8 index, char *name,
} }
bp->n_buffers = vec_len (bp->buffers); bp->n_buffers = vec_len (bp->buffers);
return bp->index;
return 0;
} }
static u8 * static u8 *
@ -659,7 +653,8 @@ vlib_buffer_worker_init (vlib_main_t * vm)
VLIB_WORKER_INIT_FUNCTION (vlib_buffer_worker_init); VLIB_WORKER_INIT_FUNCTION (vlib_buffer_worker_init);
static clib_error_t * static clib_error_t *
vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node) vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node,
u8 * index)
{ {
vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_main_t *bm = vm->buffer_main;
clib_error_t *error; clib_error_t *error;
@ -703,9 +698,14 @@ retry:
vec_reset_length (name); vec_reset_length (name);
name = format (name, "default-numa-%d%c", numa_node, 0); name = format (name, "default-numa-%d%c", numa_node, 0);
return vlib_buffer_pool_create (vm, numa_node, (char *) name, *index = vlib_buffer_pool_create (vm, (char *) name,
vlib_buffer_get_default_data_size (vm), vlib_buffer_get_default_data_size (vm),
physmem_map_index); physmem_map_index);
if (*index == (u8) ~ 0)
return clib_error_return (0, "maximum number of buffer pools reached");
return 0;
} }
void void
@ -790,10 +790,10 @@ vlib_buffer_main_init (struct vlib_main_t * vm)
{ {
vlib_buffer_main_t *bm; vlib_buffer_main_t *bm;
clib_error_t *err; clib_error_t *err;
clib_bitmap_t *bmp = 0; clib_bitmap_t *bmp = 0, *bmp_has_memory = 0;
u32 numa_node; u32 numa_node;
vlib_buffer_pool_t *bp; vlib_buffer_pool_t *bp;
u8 *name; u8 *name = 0, first_valid_buffer_pool_index = ~0;
vlib_buffer_main_alloc (vm); vlib_buffer_main_alloc (vm);
@ -803,42 +803,82 @@ vlib_buffer_main_init (struct vlib_main_t * vm)
clib_spinlock_init (&bm->buffer_known_hash_lockp); clib_spinlock_init (&bm->buffer_known_hash_lockp);
err = clib_sysfs_read ("/sys/devices/system/node/possible", "%U", if ((err = clib_sysfs_read ("/sys/devices/system/node/online", "%U",
unformat_bitmap_list, &bmp); unformat_bitmap_list, &bmp)))
if (err) clib_error_free (err);
{
/* no info from sysfs, assuming that only numa 0 exists */ if ((err = clib_sysfs_read ("/sys/devices/system/node/has_memory", "%U",
clib_error_free (err); unformat_bitmap_list, &bmp_has_memory)))
bmp = clib_bitmap_set (bmp, 0, 1); clib_error_free (err);
}
if (bmp && bmp_has_memory)
bmp = clib_bitmap_and (bmp, bmp_has_memory);
/* no info from sysfs, assuming that only numa 0 exists */
if (bmp == 0)
bmp = clib_bitmap_set (bmp, 0, 1);
if (clib_bitmap_last_set (bmp) >= VLIB_BUFFER_MAX_NUMA_NODES)
clib_panic ("system have more than %u NUMA nodes",
VLIB_BUFFER_MAX_NUMA_NODES);
/* *INDENT-OFF* */ /* *INDENT-OFF* */
clib_bitmap_foreach (numa_node, bmp, { clib_bitmap_foreach (numa_node, bmp,
if ((err = vlib_buffer_main_init_numa_node(vm, numa_node))) {
goto done; u8 *index = bm->default_buffer_pool_index_for_numa + numa_node;
index[0] = ~0;
if ((err = vlib_buffer_main_init_numa_node (vm, numa_node, index)))
{
clib_error_report (err);
clib_error_free (err);
continue;
}
if (first_valid_buffer_pool_index == 0xff)
first_valid_buffer_pool_index = index[0];
}); });
/* *INDENT-ON* */ /* *INDENT-ON* */
bm->n_numa_nodes = clib_bitmap_last_set (bmp) + 1; if (first_valid_buffer_pool_index == (u8) ~ 0)
{
err = clib_error_return (0, "failed to allocate buffer pool(s)");
goto done;
}
/* *INDENT-OFF* */
clib_bitmap_foreach (numa_node, bmp,
{
if (bm->default_buffer_pool_index_for_numa[numa_node] == (u8) ~0)
bm->default_buffer_pool_index_for_numa[numa_node] =
first_valid_buffer_pool_index;
});
/* *INDENT-ON* */
vec_foreach (bp, bm->buffer_pools) vec_foreach (bp, bm->buffer_pools)
{ {
name = format (0, "/buffer-pools/%s/cached%c", bp->name, 0); if (bp->n_buffers == 0)
continue;
vec_reset_length (name);
name = format (name, "/buffer-pools/%s/cached%c", bp->name, 0);
stat_segment_register_gauge (name, buffer_gauges_update_cached_fn, stat_segment_register_gauge (name, buffer_gauges_update_cached_fn,
bp - bm->buffer_pools); bp - bm->buffer_pools);
vec_free (name);
name = format (0, "/buffer-pools/%s/used%c", bp->name, 0); vec_reset_length (name);
name = format (name, "/buffer-pools/%s/used%c", bp->name, 0);
stat_segment_register_gauge (name, buffer_gauges_update_used_fn, stat_segment_register_gauge (name, buffer_gauges_update_used_fn,
bp - bm->buffer_pools); bp - bm->buffer_pools);
vec_free (name);
name = format (0, "/buffer-pools/%s/available%c", bp->name, 0); vec_reset_length (name);
name = format (name, "/buffer-pools/%s/available%c", bp->name, 0);
stat_segment_register_gauge (name, buffer_gauges_update_available_fn, stat_segment_register_gauge (name, buffer_gauges_update_available_fn,
bp - bm->buffer_pools); bp - bm->buffer_pools);
vec_free (name);
} }
done: done:
vec_free (bmp); vec_free (bmp);
vec_free (bmp_has_memory);
vec_free (name);
return err; return err;
} }

View File

@ -394,6 +394,8 @@ typedef struct
vlib_buffer_t buffer_template; vlib_buffer_t buffer_template;
} vlib_buffer_pool_t; } vlib_buffer_pool_t;
#define VLIB_BUFFER_MAX_NUMA_NODES 32
typedef struct typedef struct
{ {
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@ -409,7 +411,7 @@ typedef struct
has never been allocated. */ has never been allocated. */
uword *buffer_known_hash; uword *buffer_known_hash;
clib_spinlock_t buffer_known_hash_lockp; clib_spinlock_t buffer_known_hash_lockp;
u32 n_numa_nodes; u8 default_buffer_pool_index_for_numa[VLIB_BUFFER_MAX_NUMA_NODES];
/* config */ /* config */
u32 buffers_per_numa; u32 buffers_per_numa;

View File

@ -126,8 +126,8 @@ vlib_buffer_copy_template (vlib_buffer_t * b, vlib_buffer_t * bt)
always_inline u8 always_inline u8
vlib_buffer_pool_get_default_for_numa (vlib_main_t * vm, u32 numa_node) vlib_buffer_pool_get_default_for_numa (vlib_main_t * vm, u32 numa_node)
{ {
ASSERT (numa_node < vm->buffer_main->n_numa_nodes); ASSERT (numa_node < VLIB_BUFFER_MAX_NUMA_NODES);
return numa_node; return vm->buffer_main->default_buffer_pool_index_for_numa[numa_node];
} }
/** \brief Translate array of buffer indices into buffer pointers with offset /** \brief Translate array of buffer indices into buffer pointers with offset