Store per-thread node error counters

This fixed performance issue in muti-threaded setup
due to sharing of the same cacheline between multiple threads

Change-Id: I930ee44c17a83d4da350d15b4b97b8bb4633a9b0
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2015-12-17 14:28:18 +01:00
parent be9bf426b9
commit bc20bdf207
2 changed files with 65 additions and 15 deletions

View File

@@ -151,6 +151,8 @@ void vlib_register_errors (vlib_main_t * vm,
vlib_node_t * n = vlib_get_node (vm, node_index);
uword l;
ASSERT(os_get_cpu_number() == 0);
/* Free up any previous error strings. */
if (n->n_errors > 0)
heap_dealloc (em->error_strings_heap, n->error_heap_handle);
@@ -209,26 +211,59 @@ show_errors (vlib_main_t * vm,
vlib_node_t * n;
u32 code, i, ni;
u64 c;
int index = 0;
int verbose = 0;
u64 * sums = 0;
if (unformat (input, "verbose"))
verbose = 1;
vec_validate(sums, vec_len(em->counters));
vlib_cli_output (vm, "%=16s%=40s%=20s", "Count", "Node", "Reason");
foreach_vlib_main(({
em = &this_vlib_main->error_main;
if (verbose)
vlib_cli_output(vm, "Thread %u (%v):", index, vlib_worker_threads[index].name);
for (ni = 0; ni < vec_len (this_vlib_main->node_main.nodes); ni++)
{
n = vlib_get_node (this_vlib_main, ni);
for (code = 0; code < n->n_errors; code++)
{
i = n->error_heap_index + code;
c = em->counters[i];
if (i < vec_len (em->counters_last_clear))
c -= em->counters_last_clear[i];
sums[i] += c;
if (c == 0 || !verbose)
continue;
vlib_cli_output (vm, "%16Ld%=40v%s", c, n->name, em->error_strings_heap[i]);
}
}
index++;
}));
if (verbose)
vlib_cli_output(vm, "Total:");
for (ni = 0; ni < vec_len (vm->node_main.nodes); ni++)
{
n = vlib_get_node (vm, ni);
for (code = 0; code < n->n_errors; code++)
{
i = n->error_heap_index + code;
c = em->counters[i];
if (i < vec_len (em->counters_last_clear))
c -= em->counters_last_clear[i];
if (c == 0)
continue;
vlib_cli_output (vm, "%16Ld%=40v%s", c, n->name, em->error_strings_heap[i]);
if (sums[i])
vlib_cli_output (vm, "%16Ld%=40v%s", sums[i], n->name, em->error_strings_heap[i]);
}
}
vec_free(sums);
return 0;
}
@@ -249,12 +284,15 @@ clear_error_counters (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
vlib_error_main_t * em = &vm->error_main;
vlib_error_main_t * em;
u32 i;
vec_validate (em->counters_last_clear, vec_len (em->counters) - 1);
for (i = 0; i < vec_len (em->counters); i++)
em->counters_last_clear[i] = em->counters[i];
foreach_vlib_main(({
em = &this_vlib_main->error_main;
vec_validate (em->counters_last_clear, vec_len (em->counters) - 1);
for (i = 0; i < vec_len (em->counters); i++)
em->counters_last_clear[i] = em->counters[i];
}));
return 0;
}

View File

@@ -70,7 +70,7 @@ os_get_cpu_number (void)
/* Get any old stack address. */
sp = &sp;
n = ((uword)sp - (uword)vlib_thread_stacks[0])
n = ((uword)sp - (uword)vlib_thread_stacks[0])
>> VLIB_LOG2_THREAD_STACK_SIZE;
/* "processes" have their own stacks, and they always run in thread 0 */
@@ -675,6 +675,11 @@ static clib_error_t * start_workers (vlib_main_t * vm)
unix_physmem_init (vm_clone, 0 /* physmem not required */);
vm_clone->error_main.counters =
vec_dup(vlib_mains[0]->error_main.counters);
vm_clone->error_main.counters_last_clear =
vec_dup(vlib_mains[0]->error_main.counters_last_clear);
/* Fork the vlib_buffer_main_t free lists, etc. */
bm_clone = vec_dup (vm_clone->buffer_main);
vm_clone->buffer_main = bm_clone;
@@ -817,16 +822,23 @@ void vlib_worker_thread_node_runtime_update(void)
vlib_node_runtime_t * rt;
w = vlib_worker_threads + i;
oldheap = clib_mem_set_heap (w->thread_mheap);
vm_clone = vlib_mains[i];
/* Re-clone error heap */
u64 * old_counters = vm_clone->error_main.counters;
u64 * old_counters_all_clear = vm_clone->error_main.counters_last_clear;
memcpy (&vm_clone->error_main, &vm->error_main, sizeof (vm->error_main));
j = vec_len(vm->error_main.counters) - 1;
vec_validate_aligned(old_counters, j, CLIB_CACHE_LINE_BYTES);
vec_validate_aligned(old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
vm_clone->error_main.counters = old_counters;
vm_clone->error_main.counters_last_clear = old_counters_all_clear;
nm_clone = &vm_clone->node_main;
vec_free (nm_clone->next_frames);
nm_clone->next_frames = vec_dup (nm->next_frames);
for (j = 0; j < vec_len (nm_clone->next_frames); j++)
{
vlib_next_frame_t *nf = &nm_clone->next_frames[j];