perfmon: topdown level 1 and 2 for icx
Topdown level 1 and 2 for Intel Ice Lake (ICX). Limiting topdown support to THREAD for the moment on Ice Lake, as NODE support is still unreliable. Also removing Topdown Level 1 from Sapphire Rapids onwards, as Topdown LeveL 2 also shows Level 1 on Sapphire, and it reduces the overall number of bundles. Type: improvement Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Change-Id: Iaa68b711dc8b6fb1090880b411debadb3c37f8bc
This commit is contained in:

committed by
Damjan Marion

parent
7e8aeb876b
commit
9d0c638b0f
@@ -32,6 +32,7 @@ add_vpp_plugin(perfmon
|
||||
intel/bundle/branch_mispred.c
|
||||
intel/bundle/power_license.c
|
||||
intel/bundle/topdown_metrics.c
|
||||
intel/bundle/topdown_icelake.c
|
||||
intel/bundle/topdown_tremont.c
|
||||
intel/bundle/frontend_bound_bw.c
|
||||
intel/bundle/frontend_bound_lat.c
|
||||
|
176
src/plugins/perfmon/intel/bundle/topdown_icelake.c
Normal file
176
src/plugins/perfmon/intel/bundle/topdown_icelake.c
Normal file
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Intel and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <perfmon/perfmon.h>
|
||||
#include <perfmon/intel/core.h>
|
||||
|
||||
static int
|
||||
is_icelake ()
|
||||
{
|
||||
return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
|
||||
}
|
||||
|
||||
static perfmon_cpu_supports_t topdown_lvl2_cpu_supports_icx[] = {
|
||||
{ is_icelake, PERFMON_BUNDLE_TYPE_THREAD }
|
||||
};
|
||||
|
||||
#define GET_METRIC(m, i) (f64) (((m) >> (i * 8)) & 0xff)
|
||||
|
||||
enum
|
||||
{
|
||||
TD_SLOTS = 0,
|
||||
STALLS_MEM_ANY,
|
||||
STALLS_TOTAL,
|
||||
BOUND_ON_STORES,
|
||||
RECOVERY_CYCLES,
|
||||
UOP_DROPPING,
|
||||
UOP_NOT_DELIVERED,
|
||||
TD_RETIRING,
|
||||
TD_BAD_SPEC,
|
||||
TD_FE_BOUND,
|
||||
TD_BE_BOUND,
|
||||
};
|
||||
|
||||
static_always_inline f64
|
||||
memory_bound_fraction (perfmon_reading_t *ss)
|
||||
{
|
||||
return (ss->value[STALLS_MEM_ANY] + ss->value[BOUND_ON_STORES]) /
|
||||
(f64) (ss->value[STALLS_TOTAL] + ss->value[BOUND_ON_STORES]);
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
perf_metrics_sum (perfmon_reading_t *ss)
|
||||
{
|
||||
return ss->value[TD_RETIRING] + ss->value[TD_BAD_SPEC] +
|
||||
ss->value[TD_FE_BOUND] + ss->value[TD_BE_BOUND];
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
retiring (perfmon_reading_t *ss)
|
||||
{
|
||||
return ss->value[TD_RETIRING] / perf_metrics_sum (ss);
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
bad_speculation (perfmon_reading_t *ss)
|
||||
{
|
||||
return ss->value[TD_BAD_SPEC] / perf_metrics_sum (ss);
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
frontend_bound (perfmon_reading_t *ss)
|
||||
{
|
||||
return (ss->value[TD_FE_BOUND] / perf_metrics_sum (ss)) -
|
||||
(ss->value[UOP_DROPPING] / perf_metrics_sum (ss));
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
backend_bound (perfmon_reading_t *ss)
|
||||
{
|
||||
return (ss->value[TD_BE_BOUND] / perf_metrics_sum (ss)) +
|
||||
((5 * ss->value[RECOVERY_CYCLES]) / perf_metrics_sum (ss));
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
fetch_latency (perfmon_reading_t *ss)
|
||||
{
|
||||
f64 r = ((5 * ss->value[UOP_NOT_DELIVERED] - ss->value[UOP_DROPPING]) /
|
||||
(f64) ss->value[TD_SLOTS]);
|
||||
return r;
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
fetch_bandwidth (perfmon_reading_t *ss)
|
||||
{
|
||||
return clib_max (0, frontend_bound (ss) - fetch_latency (ss));
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
memory_bound (perfmon_reading_t *ss)
|
||||
{
|
||||
return backend_bound (ss) * memory_bound_fraction (ss);
|
||||
}
|
||||
|
||||
static_always_inline f64
|
||||
core_bound (perfmon_reading_t *ss)
|
||||
{
|
||||
return backend_bound (ss) - memory_bound (ss);
|
||||
}
|
||||
|
||||
static u8 *
|
||||
format_topdown_lvl2_icx (u8 *s, va_list *args)
|
||||
{
|
||||
perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
|
||||
u64 idx = va_arg (*args, int);
|
||||
f64 sv = 0;
|
||||
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
sv = retiring (ss);
|
||||
break;
|
||||
case 1:
|
||||
sv = bad_speculation (ss);
|
||||
break;
|
||||
case 2:
|
||||
sv = frontend_bound (ss);
|
||||
break;
|
||||
case 3:
|
||||
sv = backend_bound (ss);
|
||||
break;
|
||||
case 4:
|
||||
sv = fetch_latency (ss);
|
||||
break;
|
||||
case 5:
|
||||
sv = fetch_bandwidth (ss);
|
||||
break;
|
||||
case 6:
|
||||
sv = memory_bound (ss);
|
||||
break;
|
||||
case 7:
|
||||
sv = core_bound (ss);
|
||||
break;
|
||||
}
|
||||
|
||||
s = format (s, "%f", sv * 100);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric_icx) = {
|
||||
.name = "topdown",
|
||||
.description = "Top-down Microarchitecture Analysis Level 1 & 2",
|
||||
.source = "intel-core",
|
||||
.events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
|
||||
.events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY,
|
||||
.events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL,
|
||||
.events[3] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES,
|
||||
.events[4] = INTEL_CORE_E_INT_MISC_RECOVERY_CYCLES,
|
||||
.events[5] = INTEL_CORE_E_INT_MISC_UOP_DROPPING,
|
||||
.events[6] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CORE,
|
||||
.events[7] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
|
||||
.events[8] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
|
||||
.events[9] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
|
||||
.events[10] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
|
||||
.n_events = 11,
|
||||
.cpu_supports = topdown_lvl2_cpu_supports_icx,
|
||||
.n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports_icx),
|
||||
.format_fn = format_topdown_lvl2_icx,
|
||||
.column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% FE.FL",
|
||||
"% FE.FB", "% BE.MB", "% BE.CB"),
|
||||
.footer = "Retiring (RT), Bad Speculation (BS),\n"
|
||||
" FrontEnd bound (FE), BackEnd bound (BE),\n"
|
||||
" Fetch Latency (FL), Fetch Bandwidth (FB),\n"
|
||||
" Memory Bound (MB), Core Bound (CB)",
|
||||
};
|
@@ -79,66 +79,6 @@ topdown_lvl1_rdpmc_metric (void *ps, topdown_e_t e)
|
||||
return (slots_t1 / slots_delta) * 100;
|
||||
}
|
||||
|
||||
static u8 *
|
||||
format_topdown_lvl1 (u8 *s, va_list *args)
|
||||
{
|
||||
void *ps = va_arg (*args, void *);
|
||||
u64 idx = va_arg (*args, int);
|
||||
perfmon_bundle_type_t type = va_arg (*args, perfmon_bundle_type_t);
|
||||
f64 sv = 0;
|
||||
|
||||
topdown_lvl1_parse_fn_t *parse_fn,
|
||||
*parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl1_rdpmc_metric,
|
||||
topdown_lvl1_perf_reading, 0 };
|
||||
parse_fn = parse_fns[type];
|
||||
ASSERT (parse_fn);
|
||||
|
||||
switch (idx)
|
||||
{
|
||||
case 0:
|
||||
sv =
|
||||
parse_fn (ps, TOPDOWN_E_BAD_SPEC) + parse_fn (ps, TOPDOWN_E_RETIRING);
|
||||
break;
|
||||
case 1:
|
||||
sv =
|
||||
parse_fn (ps, TOPDOWN_E_BE_BOUND) + parse_fn (ps, TOPDOWN_E_FE_BOUND);
|
||||
break;
|
||||
default:
|
||||
sv = parse_fn (ps, (topdown_e_t) idx - 2);
|
||||
break;
|
||||
}
|
||||
|
||||
s = format (s, "%f", sv);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = {
|
||||
/* Intel ICX supports papi/thread or rdpmc/node */
|
||||
{ clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD }
|
||||
};
|
||||
|
||||
PERFMON_REGISTER_BUNDLE (topdown_lvl1_metric) = {
|
||||
.name = "topdown-level1",
|
||||
.description = "Top-down Microarchitecture Analysis Level 1",
|
||||
.source = "intel-core",
|
||||
.events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
|
||||
.events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
|
||||
.events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
|
||||
.events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
|
||||
.events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
|
||||
.n_events = 5,
|
||||
.preserve_samples = 0x1F,
|
||||
.cpu_supports = topdown_lvl1_cpu_supports,
|
||||
.n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports),
|
||||
.format_fn = format_topdown_lvl1,
|
||||
.column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
|
||||
"% ST.FE", "% ST.BE"),
|
||||
.footer = "Not Stalled (NS),STalled (ST),\n"
|
||||
" Retiring (RT), Bad Speculation (BS),\n"
|
||||
" FrontEnd bound (FE), BackEnd bound (BE)",
|
||||
};
|
||||
|
||||
/* Convert the TopDown enum to the perf reading index */
|
||||
#define TO_LVL2_PERF_IDX(e) \
|
||||
({ \
|
||||
@@ -245,8 +185,8 @@ static perfmon_cpu_supports_t topdown_lvl2_cpu_supports[] = {
|
||||
};
|
||||
|
||||
PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric) = {
|
||||
.name = "topdown-level2",
|
||||
.description = "Top-down Microarchitecture Analysis Level 2",
|
||||
.name = "topdown",
|
||||
.description = "Top-down Microarchitecture Analysis Level 1 & 2",
|
||||
.source = "intel-core",
|
||||
.events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
|
||||
.events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
|
||||
|
@@ -146,7 +146,10 @@
|
||||
_ (0x83, 0x04, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_STALL, \
|
||||
"Cycles where a code fetch is stalled due to L1 instruction cache tag " \
|
||||
"miss.") \
|
||||
_ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \
|
||||
_ (0x83, 0x02, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_MISS, \
|
||||
"Instruction fetch tag lookups that miss in the instruction cache " \
|
||||
"(L1I). Counts at 64-byte cache-line granularity.") \
|
||||
_ (0x9C, 0x01, 0, 0, 0, 0x05, IDQ_UOPS_NOT_DELIVERED, CORE, \
|
||||
"Uops not delivered to Resource Allocation Table (RAT) per thread when " \
|
||||
"backend of the machine is not stalled") \
|
||||
_ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \
|
||||
|
Reference in New Issue
Block a user