compare.py: add normal and t-student utest

This commit is contained in:
Vicente Adolfo Bolea Sanchez 2022-09-20 15:28:38 -04:00
parent 706bd8d2b7
commit 19f10b9a0b
2 changed files with 34 additions and 8 deletions

@ -55,6 +55,15 @@ def create_parser():
default=True,
action="store_false",
help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS))
utest.add_argument(
'--dist',
dest='utest_dist',
choices=['mannwhitney', 'normal', 't'],
default='mannwhitney',
type=str,
help="Utest probabilistic distribution to use")
alpha_default = 0.05
utest.add_argument(
"--alpha",
@ -239,7 +248,7 @@ def main():
# Diff and output
output_lines = gbench.report.generate_difference_report(
json1, json2, args.display_aggregates_only,
args.utest, args.utest_alpha, sys.stdout.isatty())
args.utest, args.utest_dist, args.utest_alpha, sys.stdout.isatty())
print(description)
for ln in output_lines:
print(ln)

@ -6,7 +6,11 @@ import re
import copy
from scipy.stats import mannwhitneyu
from scipy.stats import norm
from scipy.stats import t
from statistics import mean
from statistics import stdev
class BenchmarkColor(object):
def __init__(self, name, code):
@ -38,6 +42,7 @@ BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
UTEST_MIN_REPETITIONS = 2
UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
UTEST_COL_NAME = "_pvalue"
UTEST_NORM_MIN_STDEV = 0.05
def color_format(use_color, fmt_str, *args, **kwargs):
@ -154,7 +159,7 @@ def extract_field(partition, field_name):
rhs = [x[field_name] for x in partition[1]]
return [lhs, rhs]
def calc_utest(timings_cpu, timings_time):
def calc_utest(utest_dist, timings_cpu, timings_time):
min_rep_cnt = min(len(timings_time[0]),
len(timings_time[1]),
len(timings_cpu[0]),
@ -164,20 +169,30 @@ def calc_utest(timings_cpu, timings_time):
if min_rep_cnt < UTEST_MIN_REPETITIONS:
return False, None, None
time_pvalue = mannwhitneyu(
timings_time[0], timings_time[1], alternative='two-sided').pvalue
cpu_pvalue = mannwhitneyu(
timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
pvalue_fn = str
if utest_dist == "normal":
pvalue_fn = lambda x : norm.sf(mean(x[1]), mean(x[0]),
max(stdev(x[0]), mean(x[0])*UTEST_NORM_MIN_STDEV))
elif utest_dist == "t":
pvalue_fn = lambda x : t.sf(mean(x[1]), len(x[0]) - 1, mean(x[0]),
max(stdev(x[0]), mean(x[0])*UTEST_NORM_MIN_STDEV))
elif utest_dist == "mannwhitney":
pvalue_fn = lambda x : mannwhitneyu(x[0], x[1], alternative='two-sided').pvalue
else:
return False, None, None
time_pvalue = pvalue_fn(timings_time)
cpu_pvalue = pvalue_fn(timings_cpu)
return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
def print_utest(partition, utest_alpha, first_col_width, use_color=True):
def print_utest(partition, utest_dist, utest_alpha, first_col_width, use_color=True):
def get_utest_color(pval):
return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
timings_time = extract_field(partition, 'real_time')
timings_cpu = extract_field(partition, 'cpu_time')
have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(utest_dist, timings_cpu, timings_time)
# Check if we failed miserably with minimum required repetitions for utest
if not have_optimal_repetitions and cpu_pvalue is None and time_pvalue is None:
@ -212,6 +227,7 @@ def generate_difference_report(
json2,
display_aggregates_only=False,
utest=False,
utest_dist="mannwhitney",
utest_alpha=0.05,
use_color=True):
"""
@ -279,6 +295,7 @@ def generate_difference_report(
# After processing the whole partition, if requested, do the U test.
if utest:
output_strs += print_utest(partition,
utest_dist=utest_dist,
utest_alpha=utest_alpha,
first_col_width=first_col_width,
use_color=use_color)