perfmon: topdown backend bound core bundle
Add a bundle to measure topdown backend bound core cycles, will indicate if any given execution port has contention. Type: improvement Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Change-Id: I37d1b38c101ac42d51c10fa4452b822d34b729c9
This commit is contained in:
committed by
Damjan Marion
parent
1f2070a0fe
commit
fe85d87235
@@ -24,6 +24,7 @@ add_vpp_plugin(perfmon
|
||||
intel/core.c
|
||||
intel/uncore.c
|
||||
intel/bundle/backend_bound_mem.c
|
||||
intel/bundle/backend_bound_core.c
|
||||
intel/bundle/inst_and_clock.c
|
||||
intel/bundle/load_blocks.c
|
||||
intel/bundle/mem_bw.c
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Intel and/or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <perfmon/perfmon.h>
|
||||
#include <perfmon/intel/core.h>
|
||||
|
||||
enum
|
||||
{
|
||||
PORT0 = 0,
|
||||
PORT1 = 1,
|
||||
PORT5 = 2,
|
||||
PORT6 = 3,
|
||||
PORT2_3 = 4,
|
||||
PORT4_9 = 5,
|
||||
PORT7_8 = 6,
|
||||
DISTRIBUTED = 7,
|
||||
};
|
||||
|
||||
static u8 *
|
||||
format_intel_backend_bound_core (u8 *s, va_list *args)
|
||||
{
|
||||
perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
|
||||
int row = va_arg (*args, int);
|
||||
f64 sv = 0;
|
||||
|
||||
if (!ss->n_packets)
|
||||
return s;
|
||||
|
||||
if (0 == row)
|
||||
{
|
||||
sv = ss->value[DISTRIBUTED] / ss->n_packets;
|
||||
|
||||
s = format (s, "%.0f", sv);
|
||||
return s;
|
||||
}
|
||||
|
||||
switch (row)
|
||||
{
|
||||
case 1:
|
||||
sv = ss->value[PORT0] / (f64) ss->value[DISTRIBUTED];
|
||||
break;
|
||||
case 2:
|
||||
sv = ss->value[PORT1] / (f64) ss->value[DISTRIBUTED];
|
||||
break;
|
||||
case 3:
|
||||
sv = ss->value[PORT5] / (f64) ss->value[DISTRIBUTED];
|
||||
break;
|
||||
case 4:
|
||||
sv = ss->value[PORT6] / (f64) ss->value[DISTRIBUTED];
|
||||
break;
|
||||
case 5:
|
||||
sv = (ss->value[PORT2_3]) / (f64) (2 * ss->value[DISTRIBUTED]);
|
||||
break;
|
||||
case 6:
|
||||
sv = (ss->value[PORT4_9] + ss->value[PORT7_8]) /
|
||||
(f64) (4 * ss->value[DISTRIBUTED]);
|
||||
break;
|
||||
}
|
||||
|
||||
sv = clib_max (sv * 100, 0);
|
||||
s = format (s, "%04.1f", sv);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static perfmon_cpu_supports_t backend_bound_core_cpu_supports[] = {
|
||||
{ clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
|
||||
};
|
||||
|
||||
PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_core) = {
|
||||
.name = "td-backend-core",
|
||||
.description = "Topdown BackEnd-bound Core - % cycles core resources busy",
|
||||
.source = "intel-core",
|
||||
.events[0] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_0, /* 0xFF */
|
||||
.events[1] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_1, /* 0xFF */
|
||||
.events[2] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_5, /* 0xFF */
|
||||
.events[3] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_6, /* 0xFF */
|
||||
.events[4] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_2_3, /* 0xFF */
|
||||
.events[5] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_4_9, /* 0xFF */
|
||||
.events[6] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_7_8, /* 0xFF */
|
||||
.events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_DISTRIBUTED, /* 0xFF */
|
||||
.n_events = 8,
|
||||
.format_fn = format_intel_backend_bound_core,
|
||||
.cpu_supports = backend_bound_core_cpu_supports,
|
||||
.n_cpu_supports = ARRAY_LEN (backend_bound_core_cpu_supports),
|
||||
.column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Port0", "%Port1",
|
||||
"%Port5", "%Port6", "%Load", "%Store"),
|
||||
};
|
||||
@@ -149,6 +149,20 @@
|
||||
_ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \
|
||||
"Uops not delivered to Resource Allocation Table (RAT) per thread when " \
|
||||
"backend of the machine is not stalled") \
|
||||
_ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \
|
||||
"Number of uops executed on port 0") \
|
||||
_ (0xA1, 0x02, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_1, \
|
||||
"Number of uops executed on port 1") \
|
||||
_ (0xA1, 0x04, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_2_3, \
|
||||
"Number of uops executed on port 2 and 3") \
|
||||
_ (0xA1, 0x10, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_4_9, \
|
||||
"Number of uops executed on port 4 and 9") \
|
||||
_ (0xA1, 0x20, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_5, \
|
||||
"Number of uops executed on port 5") \
|
||||
_ (0xA1, 0x40, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_6, \
|
||||
"Number of uops executed on port 6") \
|
||||
_ (0xA1, 0x80, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_7_8, \
|
||||
"Number of uops executed on port 7 and 8") \
|
||||
_ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \
|
||||
"Counts allocation stall cycles caused by the store buffer (SB) being " \
|
||||
"full. This counts cycles that the pipeline back-end blocked uop " \
|
||||
@@ -230,6 +244,9 @@
|
||||
"Counts the total number when the front end is resteered, mainly when " \
|
||||
"the BPU cannot provide a correct prediction and this is corrected by " \
|
||||
"other branch handling mechanisms at the front end.") \
|
||||
_ (0xEC, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, DISTRIBUTED, \
|
||||
"Cycle counts are evenly distributed between active threads in the " \
|
||||
" Core") \
|
||||
_ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB, \
|
||||
"L2 writebacks that access L2 cache") \
|
||||
_ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \
|
||||
|
||||
Reference in New Issue
Block a user