perfmon: top down level 1 support
Adding perfmon node TMAM support on ICX. Type: improvement Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Change-Id: I48a9a9ff6a72efc28eaf0cb11ef39fb62cebb126
This commit is contained in:
@@ -30,4 +30,5 @@ add_vpp_plugin(perfmon
|
||||
intel/bundle/cache_hit_miss.c
|
||||
intel/bundle/branch_mispred.c
|
||||
intel/bundle/power_license.c
|
||||
intel/bundle/topdown_metrics.c
|
||||
)
|
||||
|
||||
@@ -128,7 +128,8 @@ show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
|
||||
vlib_cli_output (vm, "%U\n", format_perfmon_bundle, 0, 0);
|
||||
|
||||
for (int i = 0; i < vec_len (vb); i++)
|
||||
vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], verbose);
|
||||
if (!vb[i]->cpu_supports || vb[i]->cpu_supports ())
|
||||
vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], verbose);
|
||||
|
||||
vec_free (vb);
|
||||
return 0;
|
||||
@@ -290,7 +291,8 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
|
||||
n_instances = vec_len (it->instances);
|
||||
vec_validate (readings, n_instances - 1);
|
||||
|
||||
for (int i = 0; i < n_instances; i++)
|
||||
/*Only perform read() for THREAD or SYSTEM bundles*/
|
||||
for (int i = 0; i < n_instances && b->type != PERFMON_BUNDLE_TYPE_NODE; i++)
|
||||
{
|
||||
in = vec_elt_at_index (it->instances, i);
|
||||
r = vec_elt_at_index (readings, i);
|
||||
@@ -340,6 +342,7 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
|
||||
table_set_cell_align (t, col, -1, TTAA_RIGHT);
|
||||
table_set_cell_fg_color (t, col, -1, TTAC_CYAN);
|
||||
clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
|
||||
|
||||
for (int j = 0; j < n_row; j++)
|
||||
table_format_cell (t, col, j, "%U", b->format_fn, &ns, j);
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "vppinfra/string.h"
|
||||
#include <vnet/vnet.h>
|
||||
|
||||
#include <vlibapi/api.h>
|
||||
@@ -49,24 +50,32 @@ perfmon_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
|
||||
}
|
||||
|
||||
static_always_inline int
|
||||
perfmon_calc_pmc_index (perfmon_thread_runtime_t *tr, u8 i)
|
||||
perfmon_calc_mmap_offset (perfmon_thread_runtime_t *tr, u8 i)
|
||||
{
|
||||
return (int) (tr->mmap_pages[i]->index + tr->mmap_pages[i]->offset);
|
||||
}
|
||||
|
||||
static_always_inline int
|
||||
perfmon_metric_index (perfmon_bundle_t *b, u8 i)
|
||||
{
|
||||
return (int) (b->metrics[i]);
|
||||
}
|
||||
|
||||
uword
|
||||
perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
vlib_frame_t *frame)
|
||||
perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
vlib_frame_t *frame)
|
||||
{
|
||||
perfmon_main_t *pm = &perfmon_main;
|
||||
perfmon_thread_runtime_t *rt =
|
||||
vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
|
||||
perfmon_node_stats_t *s =
|
||||
vec_elt_at_index (rt->node_stats, node->node_index);
|
||||
|
||||
u8 n_events = rt->n_events;
|
||||
int pmc_index[PERF_MAX_EVENTS];
|
||||
|
||||
u64 before[PERF_MAX_EVENTS];
|
||||
u64 after[PERF_MAX_EVENTS];
|
||||
int pmc_index[PERF_MAX_EVENTS];
|
||||
uword rv;
|
||||
|
||||
clib_prefetch_load (s);
|
||||
@@ -75,33 +84,87 @@ perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
{
|
||||
default:
|
||||
case 7:
|
||||
pmc_index[6] = perfmon_calc_pmc_index (rt, 6);
|
||||
pmc_index[6] = perfmon_calc_mmap_offset (rt, 6);
|
||||
case 6:
|
||||
pmc_index[5] = perfmon_calc_pmc_index (rt, 5);
|
||||
pmc_index[5] = perfmon_calc_mmap_offset (rt, 5);
|
||||
case 5:
|
||||
pmc_index[4] = perfmon_calc_pmc_index (rt, 4);
|
||||
pmc_index[4] = perfmon_calc_mmap_offset (rt, 4);
|
||||
case 4:
|
||||
pmc_index[3] = perfmon_calc_pmc_index (rt, 3);
|
||||
pmc_index[3] = perfmon_calc_mmap_offset (rt, 3);
|
||||
case 3:
|
||||
pmc_index[2] = perfmon_calc_pmc_index (rt, 2);
|
||||
pmc_index[2] = perfmon_calc_mmap_offset (rt, 2);
|
||||
case 2:
|
||||
pmc_index[1] = perfmon_calc_pmc_index (rt, 1);
|
||||
pmc_index[1] = perfmon_calc_mmap_offset (rt, 1);
|
||||
case 1:
|
||||
pmc_index[0] = perfmon_calc_pmc_index (rt, 0);
|
||||
pmc_index[0] = perfmon_calc_mmap_offset (rt, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
perfmon_read_pmcs (before, pmc_index, n_events);
|
||||
perfmon_read_pmcs (&before[0], pmc_index, n_events);
|
||||
rv = node->function (vm, node, frame);
|
||||
perfmon_read_pmcs (after, pmc_index, n_events);
|
||||
perfmon_read_pmcs (&after[0], pmc_index, n_events);
|
||||
|
||||
if (rv == 0)
|
||||
return rv;
|
||||
|
||||
s->n_calls += 1;
|
||||
s->n_packets += rv;
|
||||
|
||||
for (int i = 0; i < n_events; i++)
|
||||
s->value[i] += after[i] - before[i];
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
uword
|
||||
perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
vlib_frame_t *frame)
|
||||
{
|
||||
perfmon_main_t *pm = &perfmon_main;
|
||||
perfmon_thread_runtime_t *rt =
|
||||
vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
|
||||
perfmon_node_stats_t *s =
|
||||
vec_elt_at_index (rt->node_stats, node->node_index);
|
||||
|
||||
u8 n_events = rt->n_events;
|
||||
|
||||
u64 before[PERF_MAX_EVENTS];
|
||||
int pmc_index[PERF_MAX_EVENTS];
|
||||
uword rv;
|
||||
|
||||
clib_prefetch_load (s);
|
||||
|
||||
switch (n_events)
|
||||
{
|
||||
default:
|
||||
case 7:
|
||||
pmc_index[6] = perfmon_metric_index (rt->bundle, 6);
|
||||
case 6:
|
||||
pmc_index[5] = perfmon_metric_index (rt->bundle, 5);
|
||||
case 5:
|
||||
pmc_index[4] = perfmon_metric_index (rt->bundle, 4);
|
||||
case 4:
|
||||
pmc_index[3] = perfmon_metric_index (rt->bundle, 3);
|
||||
case 3:
|
||||
pmc_index[2] = perfmon_metric_index (rt->bundle, 2);
|
||||
case 2:
|
||||
pmc_index[1] = perfmon_metric_index (rt->bundle, 1);
|
||||
case 1:
|
||||
pmc_index[0] = perfmon_metric_index (rt->bundle, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
perfmon_read_pmcs (&before[0], pmc_index, n_events);
|
||||
rv = node->function (vm, node, frame);
|
||||
|
||||
clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before));
|
||||
perfmon_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
|
||||
|
||||
if (rv == 0)
|
||||
return rv;
|
||||
|
||||
s->n_calls += 1;
|
||||
s->n_packets += rv;
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
115
src/plugins/perfmon/intel/bundle/topdown_metrics.c
Normal file
115
src/plugins/perfmon/intel/bundle/topdown_metrics.c
Normal file
@@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Intel and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vnet/vnet.h>
|
||||
#include <perfmon/perfmon.h>
|
||||
#include <perfmon/intel/core.h>
|
||||
|
||||
#define GET_METRIC(m, i) (((m) >> (i * 8)) & 0xff)
|
||||
#define GET_RATIO(m, i) (((m) >> (i * 32)) & 0xffffffff)
|
||||
#define RDPMC_FIXED_SLOTS (1 << 30) /* fixed slots */
|
||||
#define RDPMC_L1_METRICS (1 << 29) /* l1 metric counters */
|
||||
|
||||
#define FIXED_COUNTER_SLOTS 3
|
||||
#define METRIC_COUNTER_TOPDOWN_L1 0
|
||||
|
||||
typedef enum
|
||||
{
|
||||
TOPDOWN_E_METRIC_RETIRING = 0,
|
||||
TOPDOWN_E_METRIC_BAD_SPEC,
|
||||
TOPDOWN_E_METRIC_FE_BOUND,
|
||||
TOPDOWN_E_METRIC_BE_BOUND,
|
||||
} topdown_lvl1_counters_t;
|
||||
|
||||
enum
|
||||
{
|
||||
TOPDOWN_SLOTS = 0,
|
||||
TOPDOWN_METRICS,
|
||||
} topdown_lvl1_metrics_t;
|
||||
|
||||
static_always_inline f32
|
||||
topdown_lvl1_parse_row (perfmon_node_stats_t *ns, topdown_lvl1_counters_t e)
|
||||
{
|
||||
f64 slots_t0 =
|
||||
ns->t[0].value[TOPDOWN_SLOTS] *
|
||||
((f64) GET_METRIC (ns->t[0].value[TOPDOWN_METRICS], e) / 0xff);
|
||||
f64 slots_t1 =
|
||||
ns->t[1].value[TOPDOWN_SLOTS] *
|
||||
((f64) GET_METRIC (ns->t[1].value[TOPDOWN_METRICS], e) / 0xff);
|
||||
u64 slots_delta =
|
||||
ns->t[1].value[TOPDOWN_SLOTS] - ns->t[0].value[TOPDOWN_SLOTS];
|
||||
|
||||
slots_t1 = slots_t1 - slots_t0;
|
||||
|
||||
return (slots_t1 / slots_delta) * 100;
|
||||
}
|
||||
|
||||
static u8 *
|
||||
format_topdown_lvl1 (u8 *s, va_list *args)
|
||||
{
|
||||
perfmon_node_stats_t *st = va_arg (*args, perfmon_node_stats_t *);
|
||||
u64 row = va_arg (*args, int);
|
||||
|
||||
switch (row)
|
||||
{
|
||||
case 0:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC) +
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
|
||||
break;
|
||||
case 1:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND) +
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
|
||||
break;
|
||||
case 2:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
|
||||
break;
|
||||
case 3:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC));
|
||||
break;
|
||||
case 4:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
|
||||
break;
|
||||
case 5:
|
||||
s = format (s, "%f",
|
||||
topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND));
|
||||
break;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
PERFMON_REGISTER_BUNDLE (topdown_lvl1) = {
|
||||
.name = "topdown-level1",
|
||||
.description = "Top-down Microarchitecture Analysis Level 1",
|
||||
.source = "intel-core",
|
||||
.type = PERFMON_BUNDLE_TYPE_NODE,
|
||||
.offset_type = PERFMON_OFFSET_TYPE_METRICS,
|
||||
.events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
|
||||
.events[1] = INTEL_CORE_E_TOPDOWN_L1_METRICS,
|
||||
.metrics[0] = RDPMC_FIXED_SLOTS | FIXED_COUNTER_SLOTS,
|
||||
.metrics[1] = RDPMC_L1_METRICS | METRIC_COUNTER_TOPDOWN_L1,
|
||||
.n_events = 2,
|
||||
.cpu_supports = clib_cpu_supports_avx512_bitalg,
|
||||
.format_fn = format_topdown_lvl1,
|
||||
.column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
|
||||
"% ST.FE", "% ST.BE"),
|
||||
.footer = "Not Stalled (NS),STalled (ST),\n"
|
||||
" Retiring (RT), Bad Speculation (BS),\n"
|
||||
" FrontEnd bound (FE), BackEnd bound (BE)",
|
||||
};
|
||||
@@ -20,12 +20,12 @@
|
||||
|
||||
static perfmon_event_t events[] = {
|
||||
#define _(event, umask, edge, any, inv, cmask, n, suffix, desc) \
|
||||
[INTEL_CORE_E_##n##_##suffix] = { \
|
||||
.type = PERF_TYPE_RAW, \
|
||||
.config = PERF_INTEL_CODE (event, umask, edge, any, inv, cmask), \
|
||||
.name = #n "." #suffix, \
|
||||
.description = desc, \
|
||||
},
|
||||
[INTEL_CORE_E_##n##_##suffix] = { .type = PERF_TYPE_RAW, \
|
||||
.config = PERF_INTEL_CODE ( \
|
||||
event, umask, edge, any, inv, cmask), \
|
||||
.name = #n "." #suffix, \
|
||||
.description = desc, \
|
||||
.exclude_kernel = 1 },
|
||||
|
||||
foreach_perf_intel_core_event
|
||||
#undef _
|
||||
|
||||
@@ -27,6 +27,10 @@
|
||||
"Core cycles when the thread is not in halt state") \
|
||||
_ (0x00, 0x03, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, REF_TSC, \
|
||||
"Reference cycles when the core is not in halt state.") \
|
||||
_ (0x00, 0x04, 0, 0, 0, 0x00, TOPDOWN, SLOTS, \
|
||||
"TMA slots available for an unhalted logical processor.") \
|
||||
_ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_METRICS, \
|
||||
"TMA slots metrics for an unhalted logical processor.") \
|
||||
_ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD, \
|
||||
"Loads blocked due to overlapping with a preceding store that cannot be" \
|
||||
" forwarded.") \
|
||||
|
||||
@@ -193,6 +193,7 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
|
||||
{
|
||||
perfmon_thread_runtime_t *rt;
|
||||
rt = vec_elt_at_index (pm->thread_runtimes, i);
|
||||
rt->bundle = b;
|
||||
rt->n_events = b->n_events;
|
||||
rt->n_nodes = n_nodes;
|
||||
vec_validate_aligned (rt->node_stats, n_nodes - 1,
|
||||
@@ -235,11 +236,20 @@ perfmon_start (vlib_main_t *vm, perfmon_bundle_t *b)
|
||||
return clib_error_return_unix (0, "ioctl(PERF_EVENT_IOC_ENABLE)");
|
||||
}
|
||||
}
|
||||
if (pm->active_bundle->type == PERFMON_BUNDLE_TYPE_NODE)
|
||||
if (b->type == PERFMON_BUNDLE_TYPE_NODE)
|
||||
{
|
||||
|
||||
vlib_node_function_t *funcs[PERFMON_OFFSET_TYPE_MAX];
|
||||
#define _(type, pfunc) funcs[type] = pfunc;
|
||||
|
||||
foreach_permon_offset_type
|
||||
#undef _
|
||||
|
||||
ASSERT (funcs[b->offset_type]);
|
||||
|
||||
for (int i = 0; i < vlib_get_n_threads (); i++)
|
||||
vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i),
|
||||
perfmon_dispatch_wrapper);
|
||||
funcs[b->offset_type]);
|
||||
}
|
||||
|
||||
pm->sample_time = vlib_time_now (vm);
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <vppinfra/clib.h>
|
||||
#include <vppinfra/format.h>
|
||||
#include <vppinfra/error.h>
|
||||
#include <vppinfra/cpu.h>
|
||||
#include <vlib/vlib.h>
|
||||
|
||||
#define PERF_MAX_EVENTS 7 /* 3 fixed and 4 programmable */
|
||||
@@ -32,6 +33,13 @@ typedef enum
|
||||
PERFMON_BUNDLE_TYPE_SYSTEM,
|
||||
} perfmon_bundle_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
PERFMON_OFFSET_TYPE_MMAP,
|
||||
PERFMON_OFFSET_TYPE_METRICS,
|
||||
PERFMON_OFFSET_TYPE_MAX,
|
||||
} perfmon_offset_type_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 type_from_instance : 1;
|
||||
@@ -61,7 +69,12 @@ typedef struct
|
||||
} perfmon_instance_type_t;
|
||||
|
||||
struct perfmon_source;
|
||||
vlib_node_function_t perfmon_dispatch_wrapper;
|
||||
vlib_node_function_t perfmon_dispatch_wrapper_mmap;
|
||||
vlib_node_function_t perfmon_dispatch_wrapper_metrics;
|
||||
|
||||
#define foreach_permon_offset_type \
|
||||
_ (PERFMON_OFFSET_TYPE_MMAP, perfmon_dispatch_wrapper_mmap) \
|
||||
_ (PERFMON_OFFSET_TYPE_METRICS, perfmon_dispatch_wrapper_metrics)
|
||||
|
||||
typedef clib_error_t *(perfmon_source_init_fn_t) (vlib_main_t *vm,
|
||||
struct perfmon_source *);
|
||||
@@ -78,8 +91,10 @@ typedef struct perfmon_source
|
||||
} perfmon_source_t;
|
||||
|
||||
struct perfmon_bundle;
|
||||
|
||||
typedef clib_error_t *(perfmon_bundle_init_fn_t) (vlib_main_t *vm,
|
||||
struct perfmon_bundle *);
|
||||
|
||||
typedef struct perfmon_bundle
|
||||
{
|
||||
char *name;
|
||||
@@ -87,7 +102,9 @@ typedef struct perfmon_bundle
|
||||
char *source;
|
||||
char *footer;
|
||||
perfmon_bundle_type_t type;
|
||||
perfmon_offset_type_t offset_type;
|
||||
u32 events[PERF_MAX_EVENTS];
|
||||
u32 metrics[PERF_MAX_EVENTS];
|
||||
u32 n_events;
|
||||
|
||||
perfmon_bundle_init_fn_t *init_fn;
|
||||
@@ -95,6 +112,7 @@ typedef struct perfmon_bundle
|
||||
char **column_headers;
|
||||
char **raw_column_headers;
|
||||
format_function_t *format_fn;
|
||||
clib_cpu_supports_func_t cpu_supports;
|
||||
|
||||
/* do not set manually */
|
||||
perfmon_source_t *src;
|
||||
@@ -114,7 +132,14 @@ typedef struct
|
||||
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
|
||||
u64 n_calls;
|
||||
u64 n_packets;
|
||||
u64 value[PERF_MAX_EVENTS];
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u64 value[PERF_MAX_EVENTS];
|
||||
} t[2];
|
||||
u64 value[PERF_MAX_EVENTS * 2];
|
||||
};
|
||||
} perfmon_node_stats_t;
|
||||
|
||||
typedef struct
|
||||
@@ -122,6 +147,7 @@ typedef struct
|
||||
u8 n_events;
|
||||
u16 n_nodes;
|
||||
perfmon_node_stats_t *node_stats;
|
||||
perfmon_bundle_t *bundle;
|
||||
struct perf_event_mmap_page *mmap_pages[PERF_MAX_EVENTS];
|
||||
} perfmon_thread_runtime_t;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user