perfmon: adding support for papi TMAM
Adding support for Linux papi TMAM on Intel Snowridge. Adds the ability to indicate that a bundle should be thread or node bundle type based on available cpu features (rdpmc support). Type: feature Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Change-Id: Ib871b2644fdb2410fbb580e0d21c3a8e2be13aba
This commit is contained in:
committed by
Damjan Marion
parent
6f98d9900d
commit
c3cb2075de
@@ -128,7 +128,8 @@ show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
|
||||
vlib_cli_output (vm, "%U\n", format_perfmon_bundle, 0, 0);
|
||||
|
||||
for (int i = 0; i < vec_len (vb); i++)
|
||||
if (!vb[i]->cpu_supports || vb[i]->cpu_supports ())
|
||||
/* bundle type will be unknown if no cpu_supports matched */
|
||||
if (vb[i]->type != PERFMON_BUNDLE_TYPE_UNKNOWN)
|
||||
vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], verbose);
|
||||
|
||||
vec_free (vb);
|
||||
@@ -312,7 +313,7 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
|
||||
{
|
||||
in = vec_elt_at_index (it->instances, i);
|
||||
r = vec_elt_at_index (readings, i);
|
||||
table_format_cell (t, col, -1, "%s", in->name);
|
||||
table_format_cell (t, col, -1, "%s", in->name, b->type);
|
||||
if (b->type == PERFMON_BUNDLE_TYPE_NODE)
|
||||
{
|
||||
perfmon_thread_runtime_t *tr;
|
||||
@@ -322,19 +323,20 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
|
||||
{
|
||||
perfmon_node_stats_t ns;
|
||||
table_format_cell (t, ++col, -1, "%U", format_vlib_node_name,
|
||||
vm, j);
|
||||
vm, j, b->type);
|
||||
table_set_cell_align (t, col, -1, TTAA_RIGHT);
|
||||
table_set_cell_fg_color (t, col, -1, TTAC_CYAN);
|
||||
clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
|
||||
|
||||
for (int j = 0; j < n_row; j++)
|
||||
table_format_cell (t, col, j, "%U", b->format_fn, &ns, j);
|
||||
table_format_cell (t, col, j, "%U", b->format_fn, &ns, j,
|
||||
b->type);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = 0; j < n_row; j++)
|
||||
table_format_cell (t, i, j, "%U", b->format_fn, r, j);
|
||||
table_format_cell (t, i, j, "%U", b->format_fn, r, j, b->type);
|
||||
}
|
||||
col++;
|
||||
}
|
||||
|
||||
@@ -27,29 +27,42 @@
|
||||
|
||||
typedef enum
|
||||
{
|
||||
TOPDOWN_E_METRIC_RETIRING = 0,
|
||||
TOPDOWN_E_METRIC_BAD_SPEC,
|
||||
TOPDOWN_E_METRIC_FE_BOUND,
|
||||
TOPDOWN_E_METRIC_BE_BOUND,
|
||||
} topdown_lvl1_counters_t;
|
||||
TOPDOWN_E_RETIRING = 0,
|
||||
TOPDOWN_E_BAD_SPEC,
|
||||
TOPDOWN_E_FE_BOUND,
|
||||
TOPDOWN_E_BE_BOUND,
|
||||
} topdown_lvl1_t;
|
||||
|
||||
enum
|
||||
{
|
||||
TOPDOWN_SLOTS = 0,
|
||||
TOPDOWN_METRICS,
|
||||
} topdown_lvl1_metrics_t;
|
||||
TOPDOWN_E_RDPMC_SLOTS = 0,
|
||||
TOPDOWN_E_RDPMC_METRICS,
|
||||
};
|
||||
|
||||
static_always_inline f32
|
||||
topdown_lvl1_parse_row (perfmon_node_stats_t *ns, topdown_lvl1_counters_t e)
|
||||
typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_lvl1_t);
|
||||
|
||||
/* Parse thread level states from perfmon_reading */
|
||||
static_always_inline f64
|
||||
topdown_lvl1_perf_reading (void *ps, topdown_lvl1_t e)
|
||||
{
|
||||
perfmon_reading_t *ss = (perfmon_reading_t *) ps;
|
||||
|
||||
/* slots are at value[0], everthing else follows at +1 */
|
||||
return ((f64) ss->value[e + 1] / ss->value[0]) * 100;
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
topdown_lvl1_rdpmc_metric (void *ps, topdown_lvl1_t e)
|
||||
{
|
||||
perfmon_node_stats_t *ss = (perfmon_node_stats_t *) ps;
|
||||
f64 slots_t0 =
|
||||
ns->t[0].value[TOPDOWN_SLOTS] *
|
||||
((f64) GET_METRIC (ns->t[0].value[TOPDOWN_METRICS], e) / 0xff);
|
||||
ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS] *
|
||||
((f64) GET_METRIC (ss->t[0].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
|
||||
f64 slots_t1 =
|
||||
ns->t[1].value[TOPDOWN_SLOTS] *
|
||||
((f64) GET_METRIC (ns->t[1].value[TOPDOWN_METRICS], e) / 0xff);
|
||||
u64 slots_delta =
|
||||
ns->t[1].value[TOPDOWN_SLOTS] - ns->t[0].value[TOPDOWN_SLOTS];
|
||||
ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] *
|
||||
((f64) GET_METRIC (ss->t[1].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
|
||||
u64 slots_delta = ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] -
|
||||
ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS];
|
||||
|
||||
slots_t1 = slots_t1 - slots_t0;
|
||||
|
||||
@@ -59,53 +72,60 @@ topdown_lvl1_parse_row (perfmon_node_stats_t *ns, topdown_lvl1_counters_t e)
|
||||
static u8 *
|
||||
format_topdown_lvl1 (u8 *s, va_list *args)
|
||||
{
|
||||
perfmon_node_stats_t *st = va_arg (*args, perfmon_node_stats_t *);
|
||||
u64 row = va_arg (*args, int);
|
||||
void *ps = va_arg (*args, void *);
|
||||
u64 idx = va_arg (*args, int);
|
||||
perfmon_bundle_type_t type = va_arg (*args, perfmon_bundle_type_t);
|
||||
f64 sv = 0;
|
||||
|
||||
switch (row)
|
||||
topdown_lvl1_parse_fn_t *parse_fn,
|
||||
*parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl1_rdpmc_metric,
|
||||
topdown_lvl1_perf_reading, 0 };
|
||||
parse_fn = parse_fns[type];
|
||||
ASSERT (parse_fn);
|
||||
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC) +
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
|
||||
sv =
|
||||
parse_fn (ps, TOPDOWN_E_BAD_SPEC) + parse_fn (ps, TOPDOWN_E_RETIRING);
|
||||
break;
|
||||
case 1:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND) +
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
|
||||
sv =
|
||||
parse_fn (ps, TOPDOWN_E_BE_BOUND) + parse_fn (ps, TOPDOWN_E_FE_BOUND);
|
||||
break;
|
||||
case 2:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
|
||||
break;
|
||||
case 3:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC));
|
||||
break;
|
||||
case 4:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
|
||||
break;
|
||||
case 5:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND));
|
||||
default:
|
||||
sv = parse_fn (ps, (topdown_lvl1_t) idx - 2);
|
||||
break;
|
||||
}
|
||||
|
||||
s = format (s, "%f", sv);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
PERFMON_REGISTER_BUNDLE (topdown_lvl1) = {
|
||||
static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = {
|
||||
/* Intel SNR supports papi/thread only */
|
||||
{ clib_cpu_supports_movdiri, PERFMON_BUNDLE_TYPE_THREAD },
|
||||
/* Intel ICX supports papi/thread or rdpmc/node */
|
||||
{ clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }
|
||||
};
|
||||
|
||||
PERFMON_REGISTER_BUNDLE (topdown_lvl1_metric) = {
|
||||
.name = "topdown-level1",
|
||||
.description = "Top-down Microarchitecture Analysis Level 1",
|
||||
.source = "intel-core",
|
||||
.type = PERFMON_BUNDLE_TYPE_NODE,
|
||||
.offset_type = PERFMON_OFFSET_TYPE_METRICS,
|
||||
.events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
|
||||
.events[1] = INTEL_CORE_E_TOPDOWN_L1_METRICS,
|
||||
.events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
|
||||
.events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
|
||||
.events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
|
||||
.events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
|
||||
.n_events = 5,
|
||||
.metrics[0] = RDPMC_FIXED_SLOTS | FIXED_COUNTER_SLOTS,
|
||||
.metrics[1] = RDPMC_L1_METRICS | METRIC_COUNTER_TOPDOWN_L1,
|
||||
.n_events = 2,
|
||||
.cpu_supports = clib_cpu_supports_avx512_bitalg,
|
||||
.n_metrics = 2,
|
||||
.cpu_supports = topdown_lvl1_cpu_supports,
|
||||
.n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports),
|
||||
.format_fn = format_topdown_lvl1,
|
||||
.column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
|
||||
"% ST.FE", "% ST.BE"),
|
||||
|
||||
@@ -29,8 +29,14 @@
|
||||
"Reference cycles when the core is not in halt state.") \
|
||||
_ (0x00, 0x04, 0, 0, 0, 0x00, TOPDOWN, SLOTS, \
|
||||
"TMA slots available for an unhalted logical processor.") \
|
||||
_ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_METRICS, \
|
||||
"TMA slots metrics for an unhalted logical processor.") \
|
||||
_ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_METRIC, \
|
||||
"TMA retiring slots for an unhalted logical processor.") \
|
||||
_ (0x00, 0x81, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_METRIC, \
|
||||
"TMA bad spec slots or an unhalted logical processor.") \
|
||||
_ (0x00, 0x82, 0, 0, 0, 0x00, TOPDOWN, L1_FE_BOUND_METRIC, \
|
||||
"TMA fe bound slots for an unhalted logical processor.") \
|
||||
_ (0x00, 0x83, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_METRIC, \
|
||||
"TMA be bound slots for an unhalted logical processor.") \
|
||||
_ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD, \
|
||||
"Loads blocked due to overlapping with a preceding store that cannot be" \
|
||||
" forwarded.") \
|
||||
|
||||
@@ -31,6 +31,7 @@ typedef enum
|
||||
PERFMON_BUNDLE_TYPE_NODE,
|
||||
PERFMON_BUNDLE_TYPE_THREAD,
|
||||
PERFMON_BUNDLE_TYPE_SYSTEM,
|
||||
PERFMON_BUNDLE_TYPE_MAX,
|
||||
} perfmon_bundle_type_t;
|
||||
|
||||
typedef enum
|
||||
@@ -95,6 +96,12 @@ struct perfmon_bundle;
|
||||
typedef clib_error_t *(perfmon_bundle_init_fn_t) (vlib_main_t *vm,
|
||||
struct perfmon_bundle *);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
clib_cpu_supports_func_t cpu_supports;
|
||||
perfmon_bundle_type_t bundle_type;
|
||||
} perfmon_cpu_supports_t;
|
||||
|
||||
typedef struct perfmon_bundle
|
||||
{
|
||||
char *name;
|
||||
@@ -104,14 +111,18 @@ typedef struct perfmon_bundle
|
||||
perfmon_bundle_type_t type;
|
||||
perfmon_offset_type_t offset_type;
|
||||
u32 events[PERF_MAX_EVENTS];
|
||||
u32 metrics[PERF_MAX_EVENTS];
|
||||
u32 n_events;
|
||||
|
||||
u32 metrics[PERF_MAX_EVENTS];
|
||||
u32 n_metrics;
|
||||
|
||||
perfmon_cpu_supports_t *cpu_supports;
|
||||
u32 n_cpu_supports;
|
||||
|
||||
perfmon_bundle_init_fn_t *init_fn;
|
||||
|
||||
char **column_headers;
|
||||
format_function_t *format_fn;
|
||||
clib_cpu_supports_func_t cpu_supports;
|
||||
|
||||
/* do not set manually */
|
||||
perfmon_source_t *src;
|
||||
@@ -168,6 +179,24 @@ typedef struct
|
||||
|
||||
extern perfmon_main_t perfmon_main;
|
||||
|
||||
always_inline uword
|
||||
perfmon_cpu_supported_bundle_type (perfmon_bundle_t *b)
|
||||
{
|
||||
perfmon_cpu_supports_t *supports = b->cpu_supports;
|
||||
uword type = 0;
|
||||
|
||||
/* if nothing specific for this bundle, go with the default */
|
||||
if (!supports)
|
||||
return b->type;
|
||||
|
||||
/* the last specified type, will always win */
|
||||
for (int i = 0; i < b->n_cpu_supports; ++i)
|
||||
if (supports[i].cpu_supports ())
|
||||
type = supports[i].bundle_type;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
#define PERFMON_REGISTER_SOURCE(x) \
|
||||
perfmon_source_t __perfmon_source_##x; \
|
||||
static void __clib_constructor __perfmon_source_registration_##x (void) \
|
||||
@@ -184,6 +213,8 @@ extern perfmon_main_t perfmon_main;
|
||||
{ \
|
||||
perfmon_main_t *pm = &perfmon_main; \
|
||||
__perfmon_bundle_##x.next = pm->bundles; \
|
||||
__perfmon_bundle_##x.type = \
|
||||
perfmon_cpu_supported_bundle_type (&__perfmon_bundle_##x); \
|
||||
pm->bundles = &__perfmon_bundle_##x; \
|
||||
} \
|
||||
perfmon_bundle_t __perfmon_bundle_##x
|
||||
|
||||
Reference in New Issue
Block a user