perfmon: fix perf event user page read
When mmap()-ing perf event in userspace, we must adhere to the kernel update protocol to read consistent values. Also, 'offset' is an offset to add to the counter value, not to apply to the PMC index. Type: fix Change-Id: I59106bb3a48185ff3fcb0d2f09097269a67bb6d6 Signed-off-by: Benoît Ganne <bganne@cisco.com>
This commit is contained in:

committed by
Damjan Marion

parent
03f2a01599
commit
4e3af51a66
@@ -25,8 +25,100 @@
|
||||
|
||||
#include <perfmon/perfmon.h>
|
||||
|
||||
static_always_inline u64
|
||||
perfmon_mmap_read_pmc1 (const struct perf_event_mmap_page *mmap_page)
|
||||
{
|
||||
u64 count;
|
||||
u32 seq;
|
||||
|
||||
/* See documentation in /usr/include/linux/perf_event.h, for more details
|
||||
* but the 2 main important things are:
|
||||
* 1) if seq != mmap_page->lock, it means the kernel is currently updating
|
||||
* the user page and we need to read it again
|
||||
* 2) if idx == 0, it means the perf event is currently turned off and we
|
||||
* just need to read the kernel-updated 'offset', otherwise we must also
|
||||
* add the current hw value (hence rdmpc) */
|
||||
do
|
||||
{
|
||||
u32 idx;
|
||||
|
||||
seq = mmap_page->lock;
|
||||
CLIB_COMPILER_BARRIER ();
|
||||
|
||||
idx = mmap_page->index;
|
||||
count = mmap_page->offset;
|
||||
if (idx)
|
||||
count += _rdpmc (idx - 1);
|
||||
|
||||
CLIB_COMPILER_BARRIER ();
|
||||
}
|
||||
while (mmap_page->lock != seq);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static_always_inline void
|
||||
perfmon_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
|
||||
perfmon_mmap_read_pmcs (u64 *counters,
|
||||
struct perf_event_mmap_page **mmap_pages,
|
||||
u8 n_counters)
|
||||
{
|
||||
switch (n_counters)
|
||||
{
|
||||
default:
|
||||
case 7:
|
||||
counters[6] = perfmon_mmap_read_pmc1 (mmap_pages[6]);
|
||||
case 6:
|
||||
counters[5] = perfmon_mmap_read_pmc1 (mmap_pages[5]);
|
||||
case 5:
|
||||
counters[4] = perfmon_mmap_read_pmc1 (mmap_pages[4]);
|
||||
case 4:
|
||||
counters[3] = perfmon_mmap_read_pmc1 (mmap_pages[3]);
|
||||
case 3:
|
||||
counters[2] = perfmon_mmap_read_pmc1 (mmap_pages[2]);
|
||||
case 2:
|
||||
counters[1] = perfmon_mmap_read_pmc1 (mmap_pages[1]);
|
||||
case 1:
|
||||
counters[0] = perfmon_mmap_read_pmc1 (mmap_pages[0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uword
|
||||
perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
vlib_frame_t *frame)
|
||||
{
|
||||
perfmon_main_t *pm = &perfmon_main;
|
||||
perfmon_thread_runtime_t *rt =
|
||||
vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
|
||||
perfmon_node_stats_t *s =
|
||||
vec_elt_at_index (rt->node_stats, node->node_index);
|
||||
|
||||
u8 n_events = rt->n_events;
|
||||
|
||||
u64 before[PERF_MAX_EVENTS];
|
||||
u64 after[PERF_MAX_EVENTS];
|
||||
uword rv;
|
||||
|
||||
clib_prefetch_load (s);
|
||||
|
||||
perfmon_mmap_read_pmcs (&before[0], rt->mmap_pages, n_events);
|
||||
rv = node->function (vm, node, frame);
|
||||
perfmon_mmap_read_pmcs (&after[0], rt->mmap_pages, n_events);
|
||||
|
||||
if (rv == 0)
|
||||
return rv;
|
||||
|
||||
s->n_calls += 1;
|
||||
s->n_packets += rv;
|
||||
|
||||
for (int i = 0; i < n_events; i++)
|
||||
s->value[i] += after[i] - before[i];
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static_always_inline void
|
||||
perfmon_metric_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
|
||||
{
|
||||
switch (n_counters)
|
||||
{
|
||||
@@ -49,73 +141,12 @@ perfmon_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
|
||||
}
|
||||
}
|
||||
|
||||
static_always_inline int
|
||||
perfmon_calc_mmap_offset (perfmon_thread_runtime_t *tr, u8 i)
|
||||
{
|
||||
return (int) (tr->mmap_pages[i]->index + tr->mmap_pages[i]->offset);
|
||||
}
|
||||
|
||||
static_always_inline int
|
||||
perfmon_metric_index (perfmon_bundle_t *b, u8 i)
|
||||
{
|
||||
return (int) (b->metrics[i]);
|
||||
}
|
||||
|
||||
uword
|
||||
perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
vlib_frame_t *frame)
|
||||
{
|
||||
perfmon_main_t *pm = &perfmon_main;
|
||||
perfmon_thread_runtime_t *rt =
|
||||
vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
|
||||
perfmon_node_stats_t *s =
|
||||
vec_elt_at_index (rt->node_stats, node->node_index);
|
||||
|
||||
u8 n_events = rt->n_events;
|
||||
|
||||
u64 before[PERF_MAX_EVENTS];
|
||||
u64 after[PERF_MAX_EVENTS];
|
||||
int pmc_index[PERF_MAX_EVENTS];
|
||||
uword rv;
|
||||
|
||||
clib_prefetch_load (s);
|
||||
|
||||
switch (n_events)
|
||||
{
|
||||
default:
|
||||
case 7:
|
||||
pmc_index[6] = perfmon_calc_mmap_offset (rt, 6);
|
||||
case 6:
|
||||
pmc_index[5] = perfmon_calc_mmap_offset (rt, 5);
|
||||
case 5:
|
||||
pmc_index[4] = perfmon_calc_mmap_offset (rt, 4);
|
||||
case 4:
|
||||
pmc_index[3] = perfmon_calc_mmap_offset (rt, 3);
|
||||
case 3:
|
||||
pmc_index[2] = perfmon_calc_mmap_offset (rt, 2);
|
||||
case 2:
|
||||
pmc_index[1] = perfmon_calc_mmap_offset (rt, 1);
|
||||
case 1:
|
||||
pmc_index[0] = perfmon_calc_mmap_offset (rt, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
perfmon_read_pmcs (&before[0], pmc_index, n_events);
|
||||
rv = node->function (vm, node, frame);
|
||||
perfmon_read_pmcs (&after[0], pmc_index, n_events);
|
||||
|
||||
if (rv == 0)
|
||||
return rv;
|
||||
|
||||
s->n_calls += 1;
|
||||
s->n_packets += rv;
|
||||
|
||||
for (int i = 0; i < n_events; i++)
|
||||
s->value[i] += after[i] - before[i];
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
uword
|
||||
perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
vlib_frame_t *frame)
|
||||
@@ -154,11 +185,11 @@ perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
break;
|
||||
}
|
||||
|
||||
perfmon_read_pmcs (&before[0], pmc_index, n_events);
|
||||
perfmon_metric_read_pmcs (&before[0], pmc_index, n_events);
|
||||
rv = node->function (vm, node, frame);
|
||||
|
||||
clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before));
|
||||
perfmon_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
|
||||
perfmon_metric_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
|
||||
|
||||
if (rv == 0)
|
||||
return rv;
|
||||
|
Reference in New Issue
Block a user