vlib: handoff rework

Type: improvement
Change-Id: I1e199ae31e969154319e94c5cd286b8d8adc6660
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2021-05-11 09:39:24 +02:00
committed by Florin Coras
parent 7cf80af582
commit c0d9ca7fe1
5 changed files with 188 additions and 293 deletions

File diff suppressed because it is too large Load Diff

View File

@ -765,7 +765,6 @@ typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u64 head;
u64 head_hint;
u64 tail;
u32 n_in_use;
u32 nelts;

View File

@ -346,26 +346,13 @@ vlib_frame_queue_alloc (int nelts)
fq = clib_mem_alloc_aligned (sizeof (*fq), CLIB_CACHE_LINE_BYTES);
clib_memset (fq, 0, sizeof (*fq));
fq->nelts = nelts;
fq->vector_threshold = 128; // packets
fq->vector_threshold = 2 * VLIB_FRAME_SIZE;
vec_validate_aligned (fq->elts, nelts - 1, CLIB_CACHE_LINE_BYTES);
if (1)
if (nelts & (nelts - 1))
{
if (((uword) & fq->tail) & (CLIB_CACHE_LINE_BYTES - 1))
fformat (stderr, "WARNING: fq->tail unaligned\n");
if (((uword) & fq->head) & (CLIB_CACHE_LINE_BYTES - 1))
fformat (stderr, "WARNING: fq->head unaligned\n");
if (((uword) fq->elts) & (CLIB_CACHE_LINE_BYTES - 1))
fformat (stderr, "WARNING: fq->elts unaligned\n");
if (sizeof (fq->elts[0]) % CLIB_CACHE_LINE_BYTES)
fformat (stderr, "WARNING: fq->elts[0] size %d\n",
sizeof (fq->elts[0]));
if (nelts & (nelts - 1))
{
fformat (stderr, "FATAL: nelts MUST be a power of 2\n");
abort ();
}
fformat (stderr, "FATAL: nelts MUST be a power of 2\n");
abort ();
}
return (fq);
@ -1587,23 +1574,13 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
fqm->node_index = node_index;
fqm->frame_queue_nelts = frame_queue_nelts;
fqm->queue_hi_thresh = frame_queue_nelts - num_threads;
vec_validate (fqm->vlib_frame_queues, tm->n_vlib_mains - 1);
vec_validate (fqm->per_thread_data, tm->n_vlib_mains - 1);
_vec_len (fqm->vlib_frame_queues) = 0;
for (i = 0; i < tm->n_vlib_mains; i++)
{
vlib_frame_queue_per_thread_data_t *ptd;
fq = vlib_frame_queue_alloc (frame_queue_nelts);
vec_add1 (fqm->vlib_frame_queues, fq);
ptd = vec_elt_at_index (fqm->per_thread_data, i);
vec_validate (ptd->handoff_queue_elt_by_thread_index,
tm->n_vlib_mains - 1);
vec_validate_init_empty (ptd->congested_handoff_queue_by_thread_index,
tm->n_vlib_mains - 1,
(vlib_frame_queue_t *) (~0));
}
return (fqm - tm->frame_queue_mains);

View File

@ -64,20 +64,16 @@ typedef struct vlib_thread_registration_
#define VLIB_LOG2_THREAD_STACK_SIZE (21)
#define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
typedef enum
{
VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME,
} vlib_frame_queue_msg_type_t;
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
volatile u32 valid;
u32 msg_type;
u32 maybe_trace : 1;
u32 n_vectors;
u32 last_n_vectors;
u32 offset;
STRUCT_MARK (end_of_reset);
/* 256 * 4 = 1024 bytes, even mult of cache line size */
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
u32 buffer_index[VLIB_FRAME_SIZE];
}
vlib_frame_queue_elt_t;
@ -117,42 +113,29 @@ extern vlib_worker_thread_t *vlib_worker_threads;
typedef struct
{
/* enqueue side */
/* static data */
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
volatile u64 tail;
u32 enqueue_full_events;
/* dequeue side */
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
volatile u64 head;
u64 trace;
u64 vector_threshold;
/* dequeue hint to enqueue side */
CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
volatile u64 head_hint;
/* read-only, constant, shared */
CLIB_CACHE_LINE_ALIGN_MARK (cacheline3);
vlib_frame_queue_elt_t *elts;
u64 vector_threshold;
u64 trace;
u32 nelts;
/* modified by enqueue side */
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
volatile u64 tail;
/* modified by dequeue side */
CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
volatile u64 head;
}
vlib_frame_queue_t;
typedef struct
{
vlib_frame_queue_elt_t **handoff_queue_elt_by_thread_index;
vlib_frame_queue_t **congested_handoff_queue_by_thread_index;
} vlib_frame_queue_per_thread_data_t;
typedef struct
{
u32 node_index;
u32 frame_queue_nelts;
u32 queue_hi_thresh;
vlib_frame_queue_t **vlib_frame_queues;
vlib_frame_queue_per_thread_data_t *per_thread_data;
/* for frame queue tracing */
frame_queue_trace_t *frame_queue_traces;
@ -169,10 +152,6 @@ typedef struct
/* Called early, in thread 0's context */
clib_error_t *vlib_thread_init (vlib_main_t * vm);
int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
u32 frame_queue_index, vlib_frame_t * frame,
vlib_frame_queue_msg_type_t type);
void vlib_worker_thread_node_runtime_update (void);
void vlib_create_worker_threads (vlib_main_t * vm, int n,
@ -510,94 +489,6 @@ vlib_thread_is_main_w_barrier (void)
&& vlib_worker_threads->wait_at_barrier[0])));
}
static inline void
vlib_put_frame_queue_elt (vlib_frame_queue_elt_t * hf)
{
CLIB_MEMORY_BARRIER ();
hf->valid = 1;
}
static inline vlib_frame_queue_elt_t *
vlib_get_frame_queue_elt (u32 frame_queue_index, u32 index)
{
vlib_frame_queue_t *fq;
vlib_frame_queue_elt_t *elt;
vlib_thread_main_t *tm = &vlib_thread_main;
vlib_frame_queue_main_t *fqm =
vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
u64 new_tail;
fq = fqm->vlib_frame_queues[index];
ASSERT (fq);
new_tail = clib_atomic_add_fetch (&fq->tail, 1);
/* Wait until a ring slot is available */
while (new_tail >= fq->head_hint + fq->nelts)
vlib_worker_thread_barrier_check ();
elt = fq->elts + (new_tail & (fq->nelts - 1));
/* this would be very bad... */
while (elt->valid)
;
elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME;
elt->last_n_vectors = elt->n_vectors = 0;
return elt;
}
static inline vlib_frame_queue_t *
is_vlib_frame_queue_congested (u32 frame_queue_index,
u32 index,
u32 queue_hi_thresh,
vlib_frame_queue_t **
handoff_queue_by_worker_index)
{
vlib_frame_queue_t *fq;
vlib_thread_main_t *tm = &vlib_thread_main;
vlib_frame_queue_main_t *fqm =
vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
fq = handoff_queue_by_worker_index[index];
if (fq != (vlib_frame_queue_t *) (~0))
return fq;
fq = fqm->vlib_frame_queues[index];
ASSERT (fq);
if (PREDICT_FALSE (fq->tail >= (fq->head_hint + queue_hi_thresh)))
{
/* a valid entry in the array will indicate the queue has reached
* the specified threshold and is congested
*/
handoff_queue_by_worker_index[index] = fq;
fq->enqueue_full_events++;
return fq;
}
return NULL;
}
static inline vlib_frame_queue_elt_t *
vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
u32 vlib_worker_index,
vlib_frame_queue_elt_t **
handoff_queue_elt_by_worker_index)
{
vlib_frame_queue_elt_t *elt;
if (handoff_queue_elt_by_worker_index[vlib_worker_index])
return handoff_queue_elt_by_worker_index[vlib_worker_index];
elt = vlib_get_frame_queue_elt (frame_queue_index, vlib_worker_index);
handoff_queue_elt_by_worker_index[vlib_worker_index] = elt;
return elt;
}
u8 *vlib_thread_stack_init (uword thread_index);
int vlib_thread_cb_register (struct vlib_main_t *vm,
vlib_thread_callbacks_t * cb);

View File

@ -290,8 +290,8 @@ show_frame_queue_internal (vlib_main_t * vm,
vlib_cli_output (vm,
" vector-threshold %d ring size %d in use %d\n",
fqt->threshold, fqt->nelts, fqt->n_in_use);
vlib_cli_output (vm, " head %12d head_hint %12d tail %12d\n",
fqt->head, fqt->head_hint, fqt->tail);
vlib_cli_output (vm, " head %12d tail %12d\n", fqt->head,
fqt->tail);
vlib_cli_output (vm,
" %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n",
fqt->n_vectors[0], fqt->n_vectors[1],