benchmarks: pass unparsed args to Google benchmark

- It also adds Google's benchmarch compare.py script
  - It is installed to the build directory.

- It add a wrapper script called compare-benchmarks.py which:
  - Let you run each of the benchmarks with different devices

- It adds a README.md explaining how to run the benchmarks

- BenchmarkDeviceAdapter input size range parametrized at compile time

Signed-off-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com>
This commit is contained in:
Vicente Adolfo Bolea Sanchez 2020-03-19 15:15:32 -04:00
parent 098c50b382
commit b05bd33d3c
20 changed files with 1713 additions and 472 deletions

@ -1,157 +0,0 @@
#!/usr/bin/env python3
#
# Compares the output from BenchmarkDeviceAdapter from the serial
# device to a parallel device and prints a table containing the results.
#
# Example usage:
#
# $ BenchmarkDeviceAdapter_SERIAL > serial.out
# $ BenchmarkDeviceAdapter_TBB > tbb.out
# $ benchCompare.py serial.out tbb.out
#
#
# The number of threads (optional -- only used to generate the "Warn" column)
maxThreads = 4
#
# Print debugging output:
doDebug = False
#
# End config options.
import re
import sys
assert(len(sys.argv) == 3)
def debug(str):
if (doDebug): print(str)
# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
typeParser = re.compile("\\*{3} ([^*]+) on device ([^*]+) \\*{15}")
# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
nameParser = re.compile("Benchmark '([^-]+)' results:")
# Parses "mean = 0.0125s" --> 0.0125
meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
# Parses "std dev = 0.0125s" --> 0.0125
stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
serialFilename = sys.argv[1]
parallelFilename = sys.argv[2]
serialFile = open(serialFilename, 'r')
parallelFile = open(parallelFilename, 'r')
class BenchKey:
def __init__(self, name_, type_):
self.name = name_
self.type = type_
def __eq__(self, other):
return self.name == other.name and self.type == other.type
def __lt__(self, other):
if self.name < other.name: return True
elif self.name > other.name: return False
else: return self.type < other.type
def __hash__(self):
return (self.name + self.type).__hash__()
class BenchData:
def __init__(self, mean_, stdDev_):
self.mean = mean_
self.stdDev = stdDev_
def parseFile(f, benchmarks):
type = ""
bench = ""
mean = -1.
stdDev = -1.
for line in f:
debug("Line: {}".format(line))
typeRes = typeParser.match(line)
if typeRes:
type = typeRes.group(1)
debug("Found type: {}".format(type))
continue
nameRes = nameParser.match(line)
if nameRes:
name = nameRes.group(1)
debug("Found name: {}".format(name))
continue
meanRes = meanParser.match(line)
if meanRes:
mean = float(meanRes.group(1))
debug("Found mean: {}".format(mean))
continue
stdDevRes = stdDevParser.match(line)
if stdDevRes:
stdDev = float(stdDevRes.group(1))
debug("Found stddev: {}".format(stdDev))
# stdDev is always the last parse for a given benchmark, add entry now
benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
debug("{} records found.".format(len(benchmarks)))
mean = -1.
stdDev = -1.
continue
serialBenchmarks = {}
parallelBenchmarks = {}
parseFile(serialFile, serialBenchmarks)
parseFile(parallelFile, parallelBenchmarks)
serialKeys = set(serialBenchmarks.keys())
parallelKeys = set(parallelBenchmarks.keys())
commonKeys = sorted(list(serialKeys.intersection(parallelKeys)))
serialOnlyKeys = sorted(list(serialKeys.difference(parallelKeys)))
parallelOnlyKeys = sorted(list(parallelKeys.difference(serialKeys)))
debug("{} serial keys\n{} parallel keys\n{} common keys\n{} serialOnly keys\n{} parallelOnly keys.".format(
len(serialKeys), len(parallelKeys), len(commonKeys), len(serialOnlyKeys), len(parallelOnlyKeys)))
if len(serialOnlyKeys) > 0:
print("Keys found only in serial:")
for k in serialOnlyKeys:
print("%s (%s)"%(k.name, k.type))
print("")
if len(parallelOnlyKeys) > 0:
print("Keys found only in parallel:")
for k in parallelOnlyKeys:
print("%s (%s)"%(k.name, k.type))
print("")
print("Comparison:")
print("| %7s | %4s | %8s %8s | %8s %8s | %s (%s) |"%(
"Speedup", "Warn", "serial", "", "parallel", "", "Benchmark", "Type"))
print("|-%7s-|-%4s-|-%8s----%8s-|-%8s----%8s-|-%s--%s--|"%(
"-"*7, "-"*4, "-"*8, "-"*8, "-"*8, "-"*8, "-"*9, "-"*4))
for key in commonKeys:
sData = serialBenchmarks[key]
pData = parallelBenchmarks[key]
speedup = sData.mean / pData.mean if pData.mean != 0. else 0.
if speedup > maxThreads * .9:
flag = " "
elif speedup > maxThreads * .75:
flag = "! "
elif speedup > maxThreads * .5:
flag = "!! "
elif speedup > maxThreads * .25:
flag = "!!! "
else:
flag = "!!!!"
print("| %7.3f | %4s | %08.6f +- %08.6f | %08.6f +- %08.6f | %s (%s) |"%(
speedup, flag, sData.mean, sData.stdDev, pData.mean, pData.stdDev, key.name, key.type))

@ -1,111 +0,0 @@
#!/usr/bin/env python
#
# Prints a concise summary of a benchmark output as a TSV blob.
#
# Example usage:
#
# $ BenchmarkXXX_DEVICE > bench.out
# $ benchSummary.py bench.out
#
# Options SortByType, SortByName, or SortByMean may be passed after the
# filename to sort the output by the indicated quantity. If no sort option
# is provided, the output order matches the input. If multiple options are
# specified, the list will be sorted repeatedly in the order requested.
import re
import sys
assert(len(sys.argv) >= 2)
# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
nameParser = re.compile("Benchmark '([^-]+)' results:")
# Parses "mean = 0.0125s" --> 0.0125
meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
# Parses "std dev = 0.0125s" --> 0.0125
stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
filename = sys.argv[1]
benchFile = open(filename, 'r')
sortOpt = None
if len(sys.argv) > 2:
sortOpt = sys.argv[2:]
class BenchKey:
def __init__(self, name_, type_):
self.name = name_
self.type = type_
def __eq__(self, other):
return self.name == other.name and self.type == other.type
def __lt__(self, other):
if self.name < other.name: return True
elif self.name > other.name: return False
else: return self.type < other.type
def __hash__(self):
return (self.name + self.type).__hash__()
class BenchData:
def __init__(self, mean_, stdDev_):
self.mean = mean_
self.stdDev = stdDev_
def parseFile(f, benchmarks):
type = ""
bench = ""
mean = -1.
stdDev = -1.
for line in f:
typeRes = typeParser.match(line)
if typeRes:
type = typeRes.group(1)
continue
nameRes = nameParser.match(line)
if nameRes:
name = nameRes.group(1)
continue
meanRes = meanParser.match(line)
if meanRes:
mean = float(meanRes.group(1))
continue
stdDevRes = stdDevParser.match(line)
if stdDevRes:
stdDev = float(stdDevRes.group(1))
# stdDev is always the last parse for a given benchmark, add entry now
benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
mean = -1.
stdDev = -1.
continue
benchmarks = {}
parseFile(benchFile, benchmarks)
# Sort keys by type:
keys = benchmarks.keys()
if sortOpt:
for opt in sortOpt:
if opt.lower() == "sortbytype":
keys = sorted(keys, key=lambda k: k.type)
elif opt.lower() == "sortbyname":
keys = sorted(keys, key=lambda k: k.name)
elif opt.lower() == "sortbymean":
keys = sorted(keys, key=lambda k: benchmarks[k].mean)
print("# Summary: (%s)"%filename)
print("%-9s\t%-9s\t%-9s\t%-s"%("Mean", "Stdev", "Stdev%", "Benchmark (type)"))
for key in keys:
data = benchmarks[key]
print("%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.mean, data.stdDev, data.stdDev / data.mean * 100., key.name, key.type))

@ -1,156 +0,0 @@
#!/usr/bin/env python
#
# Prints a concise summary of a benchmark output as a TSV blob. Benchmarks are
# expected to have "Baseline" in the name, and a matching benchmark with the
# same name but Baseline replaced with something else. For example,
#
# Baseline benchmark name: "Some benchmark: Baseline, Size=4"
# Test benchmark name: "Some benchmark: Blahblah, Size=4"
#
# The output will print the baseline, test, and overhead times for the
# benchmarks.
#
# Example usage:
#
# $ BenchmarkXXX_DEVICE > bench.out
# $ benchSummaryWithBaselines.py bench.out
#
# Options SortByType, SortByName, SortByOverhead, or SortByRatio
# (testtime/baseline) may be passed after the filename to sort the output by
# the indicated quantity. If no sort option is provided, the output order
# matches the input. If multiple options are specified, the list will be sorted
# repeatedly in the order requested.
import re
import sys
assert(len(sys.argv) >= 2)
# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
nameParser = re.compile("Benchmark '([^-]+)' results:")
# Parses "mean = 0.0125s" --> 0.0125
meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
# Parses "std dev = 0.0125s" --> 0.0125
stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
# Parses "SomeText Baseline Other Text" --> ("SomeText ", " Other Text")
baselineParser = re.compile("(.*)Baseline(.*)")
filename = sys.argv[1]
benchFile = open(filename, 'r')
sortOpt = None
if len(sys.argv) > 2:
sortOpt = sys.argv[2:]
class BenchKey:
def __init__(self, name_, type_):
self.name = name_
self.type = type_
def __eq__(self, other):
return self.name == other.name and self.type == other.type
def __lt__(self, other):
if self.name < other.name: return True
elif self.name > other.name: return False
else: return self.type < other.type
def __hash__(self):
return (self.name + self.type).__hash__()
class BenchData:
def __init__(self, mean_, stdDev_):
self.mean = mean_
self.stdDev = stdDev_
def parseFile(f, benchmarks):
type = ""
bench = ""
mean = -1.
stdDev = -1.
for line in f:
typeRes = typeParser.match(line)
if typeRes:
type = typeRes.group(1)
continue
nameRes = nameParser.match(line)
if nameRes:
name = nameRes.group(1)
continue
meanRes = meanParser.match(line)
if meanRes:
mean = float(meanRes.group(1))
continue
stdDevRes = stdDevParser.match(line)
if stdDevRes:
stdDev = float(stdDevRes.group(1))
# stdDev is always the last parse for a given benchmark, add entry now
benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
mean = -1.
stdDev = -1.
continue
class BaselinedBenchData:
def __init__(self, baseline, test):
self.baseline = baseline.mean
self.test = test.mean
self.overhead = test.mean - baseline.mean
def findBaselines(benchmarks):
result = {}
for baseKey in benchmarks.keys():
# Look for baseline entries
baselineRes = baselineParser.match(baseKey.name)
if baselineRes:
prefix = baselineRes.group(1)
suffix = baselineRes.group(2)
# Find the test entry matching the baseline:
for testKey in benchmarks.keys():
if baseKey.type != testKey.type: # Need same type
continue
if baseKey.name == testKey.name: # Skip the base key
continue
if testKey.name.startswith(prefix) and testKey.name.endswith(suffix):
newName = (prefix + suffix).replace(", ,", ",")
newKey = BenchKey(newName, testKey.type)
newVal = BaselinedBenchData(benchmarks[baseKey], benchmarks[testKey])
result[newKey] = newVal
return result
benchmarks = {}
parseFile(benchFile, benchmarks)
benchmarks = findBaselines(benchmarks)
# Sort keys by type:
keys = benchmarks.keys()
if sortOpt:
for opt in sortOpt:
if opt.lower() == "sortbytype":
keys = sorted(keys, key=lambda k: k.type)
elif opt.lower() == "sortbyname":
keys = sorted(keys, key=lambda k: k.name)
elif opt.lower() == "sortbyoverhead":
keys = sorted(keys, key=lambda k: benchmarks[k].overhead)
elif opt.lower() == "sortbyratio":
keys = sorted(keys, key=lambda k: benchmarks[k].overhead / benchmarks[k].baseline)
print("# Summary: (%s)"%filename)
print("%-9s\t%-9s\t%-9s\t%-9s\t%-s"%("Baseline", "TestTime", "Overhead", "Test/Base", "Benchmark (type)"))
for key in keys:
data = benchmarks[key]
print("%9.6f\t%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.baseline, data.test,
data.overhead, data.test / data.baseline, key.name, key.type))

@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""
compare-benchmarks.py - VTKm + Google Benchmarks compare.py
"""
import getopt
import subprocess
import sys
import time
import os
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
COMPARE_PY_PATH = os.path.join(CURRENT_DIR, 'compare.py')
COMPARE_PY = sys.executable + " " + COMPARE_PY_PATH
class Bench():
def __init__(self):
self.__cmd = None
@property
def cmd(self):
return self.__cmd
@cmd.setter
def cmd(self, c):
self.__cmd = c
def launch(self):
output_file = "bench-%d.json" % time.time()
cmd_exec = "%s --benchmark_out=%s --benchmark_out_format=json" \
% (self.cmd, output_file)
print(cmd_exec)
subprocess.call(cmd_exec, shell=True)
return output_file
def print_help(error_msg = None):
if error_msg != None:
print(error_msg)
print("usage: compare-benchmarks <opts>\n" \
" --benchmark1='<benchmark1> [arg1] [arg2] ...'"\
" [--filter1=<filter1>]\n"\
" --benchmark2='<benchmark2> [arg1] [arg2] ...'"\
" [--filter2=<filter2>]\n"\
" -- [-opt] benchmarks|filters|benchmarksfiltered\n\n" \
"compare.py help:")
subprocess.call(COMPARE_PY, shell=True)
sys.exit(0)
# -----------------------------------------------------------------------------
def main():
is_filters = False
filter1 = str()
filter2 = str()
bench1 = Bench()
bench2 = Bench()
options, remainder = getopt.gnu_getopt(sys.argv[1:], '',
['help','benchmark1=', 'benchmark2=', 'filter1=', 'filter2='])
for opt, arg in options:
if opt == "--benchmark1":
bench1.cmd = arg
if opt == "--benchmark2":
bench2.cmd = arg
if opt == "--filter1":
filter1 = arg
if opt == "--filter2":
filter2 = arg
if opt == "--help":
print_help()
if bench1.cmd == None:
print_help("ERROR: no benchmarks chosen")
for arg in remainder:
if arg == "filters":
is_filters = True
if is_filters and bench2.cmd != None:
print_help("ERROR: filters option can only accept --benchmark1= and --filter1")
b1_output = bench1.launch()
b2_output = bench2.launch() if not is_filters else filter1 + " " + filter2
cmd = "%s %s %s %s" % (COMPARE_PY, " ".join(remainder), b1_output, b2_output)
print(cmd)
subprocess.call(cmd, shell=True)
os.remove(b1_output)
if not is_filters:
os.remove(b2_output)
if __name__ == '__main__':
main()

408
Utilities/Scripts/compare.py Executable file

@ -0,0 +1,408 @@
#!/usr/bin/env python
import unittest
"""
compare.py - versatile benchmark output compare tool
"""
import argparse
from argparse import ArgumentParser
import sys
import gbench
from gbench import util, report
from gbench.util import *
def check_inputs(in1, in2, flags):
"""
Perform checking on the user provided inputs and diagnose any abnormalities
"""
in1_kind, in1_err = classify_input_file(in1)
in2_kind, in2_err = classify_input_file(in2)
output_file = find_benchmark_flag('--benchmark_out=', flags)
output_type = find_benchmark_flag('--benchmark_out_format=', flags)
if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
print(("WARNING: '--benchmark_out=%s' will be passed to both "
"benchmarks causing it to be overwritten") % output_file)
if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
print("WARNING: passing optional flags has no effect since both "
"inputs are JSON")
if output_type is not None and output_type != 'json':
print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
" is not supported.") % output_type)
sys.exit(1)
def create_parser():
parser = ArgumentParser(
description='versatile benchmark output compare tool')
parser.add_argument(
'-a',
'--display_aggregates_only',
dest='display_aggregates_only',
action="store_true",
help="If there are repetitions, by default, we display everything - the"
" actual runs, and the aggregates computed. Sometimes, it is "
"desirable to only view the aggregates. E.g. when there are a lot "
"of repetitions. Do note that only the display is affected. "
"Internally, all the actual runs are still used, e.g. for U test.")
utest = parser.add_argument_group()
utest.add_argument(
'--no-utest',
dest='utest',
default=True,
action="store_false",
help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS))
alpha_default = 0.05
utest.add_argument(
"--alpha",
dest='utest_alpha',
default=alpha_default,
type=float,
help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") %
alpha_default)
subparsers = parser.add_subparsers(
help='This tool has multiple modes of operation:',
dest='mode')
parser_a = subparsers.add_parser(
'benchmarks',
help='The most simple use-case, compare all the output of these two benchmarks')
baseline = parser_a.add_argument_group(
'baseline', 'The benchmark baseline')
baseline.add_argument(
'test_baseline',
metavar='test_baseline',
type=argparse.FileType('r'),
nargs=1,
help='A benchmark executable or JSON output file')
contender = parser_a.add_argument_group(
'contender', 'The benchmark that will be compared against the baseline')
contender.add_argument(
'test_contender',
metavar='test_contender',
type=argparse.FileType('r'),
nargs=1,
help='A benchmark executable or JSON output file')
parser_a.add_argument(
'benchmark_options',
metavar='benchmark_options',
nargs=argparse.REMAINDER,
help='Arguments to pass when running benchmark executables')
parser_b = subparsers.add_parser(
'filters', help='Compare filter one with the filter two of benchmark')
baseline = parser_b.add_argument_group(
'baseline', 'The benchmark baseline')
baseline.add_argument(
'test',
metavar='test',
type=argparse.FileType('r'),
nargs=1,
help='A benchmark executable or JSON output file')
baseline.add_argument(
'filter_baseline',
metavar='filter_baseline',
type=str,
nargs=1,
help='The first filter, that will be used as baseline')
contender = parser_b.add_argument_group(
'contender', 'The benchmark that will be compared against the baseline')
contender.add_argument(
'filter_contender',
metavar='filter_contender',
type=str,
nargs=1,
help='The second filter, that will be compared against the baseline')
parser_b.add_argument(
'benchmark_options',
metavar='benchmark_options',
nargs=argparse.REMAINDER,
help='Arguments to pass when running benchmark executables')
parser_c = subparsers.add_parser(
'benchmarksfiltered',
help='Compare filter one of first benchmark with filter two of the second benchmark')
baseline = parser_c.add_argument_group(
'baseline', 'The benchmark baseline')
baseline.add_argument(
'test_baseline',
metavar='test_baseline',
type=argparse.FileType('r'),
nargs=1,
help='A benchmark executable or JSON output file')
baseline.add_argument(
'filter_baseline',
metavar='filter_baseline',
type=str,
nargs=1,
help='The first filter, that will be used as baseline')
contender = parser_c.add_argument_group(
'contender', 'The benchmark that will be compared against the baseline')
contender.add_argument(
'test_contender',
metavar='test_contender',
type=argparse.FileType('r'),
nargs=1,
help='The second benchmark executable or JSON output file, that will be compared against the baseline')
contender.add_argument(
'filter_contender',
metavar='filter_contender',
type=str,
nargs=1,
help='The second filter, that will be compared against the baseline')
parser_c.add_argument(
'benchmark_options',
metavar='benchmark_options',
nargs=argparse.REMAINDER,
help='Arguments to pass when running benchmark executables')
return parser
def main():
# Parse the command line flags
parser = create_parser()
args, unknown_args = parser.parse_known_args()
if args.mode is None:
parser.print_help()
exit(1)
assert not unknown_args
benchmark_options = args.benchmark_options
if args.mode == 'benchmarks':
test_baseline = args.test_baseline[0].name
test_contender = args.test_contender[0].name
filter_baseline = ''
filter_contender = ''
# NOTE: if test_baseline == test_contender, you are analyzing the stdev
description = 'Comparing %s to %s' % (test_baseline, test_contender)
elif args.mode == 'filters':
test_baseline = args.test[0].name
test_contender = args.test[0].name
filter_baseline = args.filter_baseline[0]
filter_contender = args.filter_contender[0]
# NOTE: if filter_baseline == filter_contender, you are analyzing the
# stdev
description = 'Comparing %s to %s (from %s)' % (
filter_baseline, filter_contender, args.test[0].name)
elif args.mode == 'benchmarksfiltered':
test_baseline = args.test_baseline[0].name
test_contender = args.test_contender[0].name
filter_baseline = args.filter_baseline[0]
filter_contender = args.filter_contender[0]
# NOTE: if test_baseline == test_contender and
# filter_baseline == filter_contender, you are analyzing the stdev
description = 'Comparing %s (from %s) to %s (from %s)' % (
filter_baseline, test_baseline, filter_contender, test_contender)
else:
# should never happen
print("Unrecognized mode of operation: '%s'" % args.mode)
parser.print_help()
exit(1)
check_inputs(test_baseline, test_contender, benchmark_options)
if args.display_aggregates_only:
benchmark_options += ['--benchmark_display_aggregates_only=true']
options_baseline = []
options_contender = []
if filter_baseline and filter_contender:
options_baseline = ['--benchmark_filter=%s' % filter_baseline]
options_contender = ['--benchmark_filter=%s' % filter_contender]
# Run the benchmarks and report the results
json1 = json1_orig = gbench.util.run_or_load_benchmark(
test_baseline, benchmark_options + options_baseline)
json2 = json2_orig = gbench.util.run_or_load_benchmark(
test_contender, benchmark_options + options_contender)
# Now, filter the benchmarks so that the difference report can work
if filter_baseline and filter_contender:
replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
json1 = gbench.report.filter_benchmark(
json1_orig, filter_baseline, replacement)
json2 = gbench.report.filter_benchmark(
json2_orig, filter_contender, replacement)
# Diff and output
output_lines = gbench.report.generate_difference_report(
json1, json2, args.display_aggregates_only,
args.utest, args.utest_alpha)
print(description)
for ln in output_lines:
print(ln)
class TestParser(unittest.TestCase):
def setUp(self):
self.parser = create_parser()
testInputs = os.path.join(
os.path.dirname(
os.path.realpath(__file__)),
'gbench',
'Inputs')
self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
def test_benchmarks_basic(self):
parsed = self.parser.parse_args(
['benchmarks', self.testInput0, self.testInput1])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_basic_without_utest(self):
parsed = self.parser.parse_args(
['--no-utest', 'benchmarks', self.testInput0, self.testInput1])
self.assertFalse(parsed.display_aggregates_only)
self.assertFalse(parsed.utest)
self.assertEqual(parsed.utest_alpha, 0.05)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_basic_display_aggregates_only(self):
parsed = self.parser.parse_args(
['-a', 'benchmarks', self.testInput0, self.testInput1])
self.assertTrue(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_basic_with_utest_alpha(self):
parsed = self.parser.parse_args(
['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.utest_alpha, 0.314)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_basic_without_utest_with_utest_alpha(self):
parsed = self.parser.parse_args(
['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
self.assertFalse(parsed.display_aggregates_only)
self.assertFalse(parsed.utest)
self.assertEqual(parsed.utest_alpha, 0.314)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_with_remainder(self):
parsed = self.parser.parse_args(
['benchmarks', self.testInput0, self.testInput1, 'd'])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.benchmark_options, ['d'])
def test_benchmarks_with_remainder_after_doubleminus(self):
parsed = self.parser.parse_args(
['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.benchmark_options, ['e'])
def test_filters_basic(self):
parsed = self.parser.parse_args(
['filters', self.testInput0, 'c', 'd'])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'filters')
self.assertEqual(parsed.test[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_contender[0], 'd')
self.assertFalse(parsed.benchmark_options)
def test_filters_with_remainder(self):
parsed = self.parser.parse_args(
['filters', self.testInput0, 'c', 'd', 'e'])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'filters')
self.assertEqual(parsed.test[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_contender[0], 'd')
self.assertEqual(parsed.benchmark_options, ['e'])
def test_filters_with_remainder_after_doubleminus(self):
parsed = self.parser.parse_args(
['filters', self.testInput0, 'c', 'd', '--', 'f'])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'filters')
self.assertEqual(parsed.test[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_contender[0], 'd')
self.assertEqual(parsed.benchmark_options, ['f'])
def test_benchmarksfiltered_basic(self):
parsed = self.parser.parse_args(
['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarksfiltered')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.filter_contender[0], 'e')
self.assertFalse(parsed.benchmark_options)
def test_benchmarksfiltered_with_remainder(self):
parsed = self.parser.parse_args(
['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarksfiltered')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.filter_contender[0], 'e')
self.assertEqual(parsed.benchmark_options[0], 'f')
def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
parsed = self.parser.parse_args(
['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarksfiltered')
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.filter_contender[0], 'e')
self.assertEqual(parsed.benchmark_options[0], 'g')
if __name__ == '__main__':
# unittest.main()
main()
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
# kate: indent-mode python; remove-trailing-spaces modified;

@ -0,0 +1,8 @@
"""Google Benchmark tooling"""
__author__ = 'Eric Fiselier'
__email__ = 'eric@efcs.ca'
__versioninfo__ = (0, 5, 0)
__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
__all__ = []

@ -0,0 +1,541 @@
import unittest
"""report.py - Utilities for reporting statistics about benchmark results
"""
import os
import re
import copy
from scipy.stats import mannwhitneyu
class BenchmarkColor(object):
def __init__(self, name, code):
self.name = name
self.code = code
def __repr__(self):
return '%s%r' % (self.__class__.__name__,
(self.name, self.code))
def __format__(self, format):
return self.code
# Benchmark Colors Enumeration
BC_NONE = BenchmarkColor('NONE', '')
BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
UTEST_MIN_REPETITIONS = 2
UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
UTEST_COL_NAME = "_pvalue"
def color_format(use_color, fmt_str, *args, **kwargs):
"""
Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
is False then all color codes in 'args' and 'kwargs' are replaced with
the empty string.
"""
assert use_color is True or use_color is False
if not use_color:
args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
for arg in args]
kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
for key, arg in kwargs.items()}
return fmt_str.format(*args, **kwargs)
def find_longest_name(benchmark_list):
"""
Return the length of the longest benchmark name in a given list of
benchmark JSON objects
"""
longest_name = 1
for bc in benchmark_list:
if len(bc['name']) > longest_name:
longest_name = len(bc['name'])
return longest_name
def calculate_change(old_val, new_val):
"""
Return a float representing the decimal change between old_val and new_val.
"""
if old_val == 0 and new_val == 0:
return 0.0
if old_val == 0:
return float(new_val - old_val) / (float(old_val + new_val) / 2)
return float(new_val - old_val) / abs(old_val)
def filter_benchmark(json_orig, family, replacement=""):
"""
Apply a filter to the json, and only leave the 'family' of benchmarks.
"""
regex = re.compile(family)
filtered = {}
filtered['benchmarks'] = []
for be in json_orig['benchmarks']:
if not regex.search(be['name']):
continue
filteredbench = copy.deepcopy(be) # Do NOT modify the old name!
filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
filtered['benchmarks'].append(filteredbench)
return filtered
def get_unique_benchmark_names(json):
"""
While *keeping* the order, give all the unique 'names' used for benchmarks.
"""
seen = set()
uniqued = [x['name'] for x in json['benchmarks']
if x['name'] not in seen and
(seen.add(x['name']) or True)]
return uniqued
def intersect(list1, list2):
"""
Given two lists, get a new list consisting of the elements only contained
in *both of the input lists*, while preserving the ordering.
"""
return [x for x in list1 if x in list2]
def is_potentially_comparable_benchmark(x):
return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
def partition_benchmarks(json1, json2):
"""
While preserving the ordering, find benchmarks with the same names in
both of the inputs, and group them.
(i.e. partition/filter into groups with common name)
"""
json1_unique_names = get_unique_benchmark_names(json1)
json2_unique_names = get_unique_benchmark_names(json2)
names = intersect(json1_unique_names, json2_unique_names)
partitions = []
for name in names:
time_unit = None
# Pick the time unit from the first entry of the lhs benchmark.
# We should be careful not to crash with unexpected input.
for x in json1['benchmarks']:
if (x['name'] == name and is_potentially_comparable_benchmark(x)):
time_unit = x['time_unit']
break
if time_unit is None:
continue
# Filter by name and time unit.
# All the repetitions are assumed to be comparable.
lhs = [x for x in json1['benchmarks'] if x['name'] == name and
x['time_unit'] == time_unit]
rhs = [x for x in json2['benchmarks'] if x['name'] == name and
x['time_unit'] == time_unit]
partitions.append([lhs, rhs])
return partitions
def extract_field(partition, field_name):
# The count of elements may be different. We want *all* of them.
lhs = [x[field_name] for x in partition[0]]
rhs = [x[field_name] for x in partition[1]]
return [lhs, rhs]
def calc_utest(timings_cpu, timings_time):
min_rep_cnt = min(len(timings_time[0]),
len(timings_time[1]),
len(timings_cpu[0]),
len(timings_cpu[1]))
# Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
if min_rep_cnt < UTEST_MIN_REPETITIONS:
return False, None, None
time_pvalue = mannwhitneyu(
timings_time[0], timings_time[1], alternative='two-sided').pvalue
cpu_pvalue = mannwhitneyu(
timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
def print_utest(partition, utest_alpha, first_col_width, use_color=True):
def get_utest_color(pval):
return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
timings_time = extract_field(partition, 'real_time')
timings_cpu = extract_field(partition, 'cpu_time')
have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
# Check if we failed miserably with minimum required repetitions for utest
if not have_optimal_repetitions and cpu_pvalue is None and time_pvalue is None:
return []
dsc = "U Test, Repetitions: {} vs {}".format(
len(timings_cpu[0]), len(timings_cpu[1]))
dsc_color = BC_OKGREEN
# We still got some results to show but issue a warning about it.
if not have_optimal_repetitions:
dsc_color = BC_WARNING
dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
UTEST_OPTIMAL_REPETITIONS)
special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}"
last_name = partition[0][0]['name']
return [color_format(use_color,
special_str,
BC_HEADER,
"{}{}".format(last_name, UTEST_COL_NAME),
first_col_width,
get_utest_color(time_pvalue), time_pvalue,
get_utest_color(cpu_pvalue), cpu_pvalue,
dsc_color, dsc,
endc=BC_ENDC)]
def generate_difference_report(
json1,
json2,
display_aggregates_only=False,
utest=False,
utest_alpha=0.05,
use_color=True):
"""
Calculate and report the difference between each test of two benchmarks
runs specified as 'json1' and 'json2'.
"""
assert utest is True or utest is False
first_col_width = find_longest_name(json1['benchmarks'])
def find_test(name):
for b in json2['benchmarks']:
if b['name'] == name:
return b
return None
first_col_width = max(
first_col_width,
len('Benchmark'))
first_col_width += len(UTEST_COL_NAME)
first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format(
'Benchmark', 12 + first_col_width)
output_strs = [first_line, '-' * len(first_line)]
partitions = partition_benchmarks(json1, json2)
for partition in partitions:
# Careful, we may have different repetition count.
for i in range(min(len(partition[0]), len(partition[1]))):
bn = partition[0][i]
other_bench = partition[1][i]
# *If* we were asked to only display aggregates,
# and if it is non-aggregate, then skip it.
if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench:
assert bn['run_type'] == other_bench['run_type']
if bn['run_type'] != 'aggregate':
continue
fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
def get_color(res):
if res > 0.05:
return BC_FAIL
elif res > -0.07:
return BC_WHITE
else:
return BC_CYAN
tres = calculate_change(bn['real_time'], other_bench['real_time'])
cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
output_strs += [color_format(use_color,
fmt_str,
BC_HEADER,
bn['name'],
first_col_width,
get_color(tres),
tres,
get_color(cpures),
cpures,
bn['real_time'],
other_bench['real_time'],
bn['cpu_time'],
other_bench['cpu_time'],
endc=BC_ENDC)]
# After processing the whole partition, if requested, do the U test.
if utest:
output_strs += print_utest(partition,
utest_alpha=utest_alpha,
first_col_width=first_col_width,
use_color=use_color)
return output_strs
###############################################################################
# Unit tests
class TestGetUniqueBenchmarkNames(unittest.TestCase):
def load_results(self):
import json
testInputs = os.path.join(
os.path.dirname(
os.path.realpath(__file__)),
'Inputs')
testOutput = os.path.join(testInputs, 'test3_run0.json')
with open(testOutput, 'r') as f:
json = json.load(f)
return json
def test_basic(self):
expect_lines = [
'BM_One',
'BM_Two',
'short', # These two are not sorted
'medium', # These two are not sorted
]
json = self.load_results()
output_lines = get_unique_benchmark_names(json)
print("\n")
print("\n".join(output_lines))
self.assertEqual(len(output_lines), len(expect_lines))
for i in range(0, len(output_lines)):
self.assertEqual(expect_lines[i], output_lines[i])
class TestReportDifference(unittest.TestCase):
def load_results(self):
import json
testInputs = os.path.join(
os.path.dirname(
os.path.realpath(__file__)),
'Inputs')
testOutput1 = os.path.join(testInputs, 'test1_run1.json')
testOutput2 = os.path.join(testInputs, 'test1_run2.json')
with open(testOutput1, 'r') as f:
json1 = json.load(f)
with open(testOutput2, 'r') as f:
json2 = json.load(f)
return json1, json2
def test_basic(self):
expect_lines = [
['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
['BM_100xSlower', '+99.0000', '+99.0000',
'100', '10000', '100', '10000'],
['BM_100xFaster', '-0.9900', '-0.9900',
'10000', '100', '10000', '100'],
['BM_10PercentCPUToTime', '+0.1000',
'-0.1000', '100', '110', '100', '90'],
['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
]
json1, json2 = self.load_results()
output_lines_with_header = generate_difference_report(
json1, json2, use_color=False)
output_lines = output_lines_with_header[2:]
print("\n")
print("\n".join(output_lines_with_header))
self.assertEqual(len(output_lines), len(expect_lines))
for i in range(0, len(output_lines)):
parts = [x for x in output_lines[i].split(' ') if x]
self.assertEqual(len(parts), 7)
self.assertEqual(expect_lines[i], parts)
class TestReportDifferenceBetweenFamilies(unittest.TestCase):
def load_result(self):
import json
testInputs = os.path.join(
os.path.dirname(
os.path.realpath(__file__)),
'Inputs')
testOutput = os.path.join(testInputs, 'test2_run.json')
with open(testOutput, 'r') as f:
json = json.load(f)
return json
def test_basic(self):
expect_lines = [
['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
]
json = self.load_result()
json1 = filter_benchmark(json, "BM_Z.ro", ".")
json2 = filter_benchmark(json, "BM_O.e", ".")
output_lines_with_header = generate_difference_report(
json1, json2, use_color=False)
output_lines = output_lines_with_header[2:]
print("\n")
print("\n".join(output_lines_with_header))
self.assertEqual(len(output_lines), len(expect_lines))
for i in range(0, len(output_lines)):
parts = [x for x in output_lines[i].split(' ') if x]
self.assertEqual(len(parts), 7)
self.assertEqual(expect_lines[i], parts)
class TestReportDifferenceWithUTest(unittest.TestCase):
def load_results(self):
import json
testInputs = os.path.join(
os.path.dirname(
os.path.realpath(__file__)),
'Inputs')
testOutput1 = os.path.join(testInputs, 'test3_run0.json')
testOutput2 = os.path.join(testInputs, 'test3_run1.json')
with open(testOutput1, 'r') as f:
json1 = json.load(f)
with open(testOutput2, 'r') as f:
json2 = json.load(f)
return json1, json2
def test_utest(self):
expect_lines = []
expect_lines = [
['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
['BM_Two_pvalue',
'0.6985',
'0.6985',
'U',
'Test,',
'Repetitions:',
'2',
'vs',
'2.',
'WARNING:',
'Results',
'unreliable!',
'9+',
'repetitions',
'recommended.'],
['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
['short_pvalue',
'0.7671',
'0.1489',
'U',
'Test,',
'Repetitions:',
'2',
'vs',
'3.',
'WARNING:',
'Results',
'unreliable!',
'9+',
'repetitions',
'recommended.'],
['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
]
json1, json2 = self.load_results()
output_lines_with_header = generate_difference_report(
json1, json2, utest=True, utest_alpha=0.05, use_color=False)
output_lines = output_lines_with_header[2:]
print("\n")
print("\n".join(output_lines_with_header))
self.assertEqual(len(output_lines), len(expect_lines))
for i in range(0, len(output_lines)):
parts = [x for x in output_lines[i].split(' ') if x]
self.assertEqual(expect_lines[i], parts)
class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
unittest.TestCase):
def load_results(self):
import json
testInputs = os.path.join(
os.path.dirname(
os.path.realpath(__file__)),
'Inputs')
testOutput1 = os.path.join(testInputs, 'test3_run0.json')
testOutput2 = os.path.join(testInputs, 'test3_run1.json')
with open(testOutput1, 'r') as f:
json1 = json.load(f)
with open(testOutput2, 'r') as f:
json2 = json.load(f)
return json1, json2
def test_utest(self):
expect_lines = []
expect_lines = [
['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
['BM_Two_pvalue',
'0.6985',
'0.6985',
'U',
'Test,',
'Repetitions:',
'2',
'vs',
'2.',
'WARNING:',
'Results',
'unreliable!',
'9+',
'repetitions',
'recommended.'],
['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
['short_pvalue',
'0.7671',
'0.1489',
'U',
'Test,',
'Repetitions:',
'2',
'vs',
'3.',
'WARNING:',
'Results',
'unreliable!',
'9+',
'repetitions',
'recommended.'],
]
json1, json2 = self.load_results()
output_lines_with_header = generate_difference_report(
json1, json2, display_aggregates_only=True,
utest=True, utest_alpha=0.05, use_color=False)
output_lines = output_lines_with_header[2:]
print("\n")
print("\n".join(output_lines_with_header))
self.assertEqual(len(output_lines), len(expect_lines))
for i in range(0, len(output_lines)):
parts = [x for x in output_lines[i].split(' ') if x]
self.assertEqual(expect_lines[i], parts)
if __name__ == '__main__':
unittest.main()
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
# kate: indent-mode python; remove-trailing-spaces modified;

@ -0,0 +1,164 @@
"""util.py - General utilities for running, loading, and processing benchmarks
"""
import json
import os
import tempfile
import subprocess
import sys
# Input file type enumeration
IT_Invalid = 0
IT_JSON = 1
IT_Executable = 2
_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
def is_executable_file(filename):
"""
Return 'True' if 'filename' names a valid file which is likely
an executable. A file is considered an executable if it starts with the
magic bytes for a EXE, Mach O, or ELF file.
"""
if not os.path.isfile(filename):
return False
with open(filename, mode='rb') as f:
magic_bytes = f.read(_num_magic_bytes)
if sys.platform == 'darwin':
return magic_bytes in [
b'\xfe\xed\xfa\xce', # MH_MAGIC
b'\xce\xfa\xed\xfe', # MH_CIGAM
b'\xfe\xed\xfa\xcf', # MH_MAGIC_64
b'\xcf\xfa\xed\xfe', # MH_CIGAM_64
b'\xca\xfe\xba\xbe', # FAT_MAGIC
b'\xbe\xba\xfe\xca' # FAT_CIGAM
]
elif sys.platform.startswith('win'):
return magic_bytes == b'MZ'
else:
return magic_bytes == b'\x7FELF'
def is_json_file(filename):
"""
Returns 'True' if 'filename' names a valid JSON output file.
'False' otherwise.
"""
try:
with open(filename, 'r') as f:
json.load(f)
return True
except BaseException:
pass
return False
def classify_input_file(filename):
"""
Return a tuple (type, msg) where 'type' specifies the classified type
of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
string represeting the error.
"""
ftype = IT_Invalid
err_msg = None
if not os.path.exists(filename):
err_msg = "'%s' does not exist" % filename
elif not os.path.isfile(filename):
err_msg = "'%s' does not name a file" % filename
elif is_executable_file(filename):
ftype = IT_Executable
elif is_json_file(filename):
ftype = IT_JSON
else:
err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename
return ftype, err_msg
def check_input_file(filename):
"""
Classify the file named by 'filename' and return the classification.
If the file is classified as 'IT_Invalid' print an error message and exit
the program.
"""
ftype, msg = classify_input_file(filename)
if ftype == IT_Invalid:
print("Invalid input file: %s" % msg)
sys.exit(1)
return ftype
def find_benchmark_flag(prefix, benchmark_flags):
"""
Search the specified list of flags for a flag matching `<prefix><arg>` and
if it is found return the arg it specifies. If specified more than once the
last value is returned. If the flag is not found None is returned.
"""
assert prefix.startswith('--') and prefix.endswith('=')
result = None
for f in benchmark_flags:
if f.startswith(prefix):
result = f[len(prefix):]
return result
def remove_benchmark_flags(prefix, benchmark_flags):
"""
Return a new list containing the specified benchmark_flags except those
with the specified prefix.
"""
assert prefix.startswith('--') and prefix.endswith('=')
return [f for f in benchmark_flags if not f.startswith(prefix)]
def load_benchmark_results(fname):
"""
Read benchmark output from a file and return the JSON object.
REQUIRES: 'fname' names a file containing JSON benchmark output.
"""
with open(fname, 'r') as f:
return json.load(f)
def run_benchmark(exe_name, benchmark_flags):
"""
Run a benchmark specified by 'exe_name' with the specified
'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
real time console output.
RETURNS: A JSON object representing the benchmark output
"""
output_name = find_benchmark_flag('--benchmark_out=',
benchmark_flags)
is_temp_output = False
if output_name is None:
is_temp_output = True
thandle, output_name = tempfile.mkstemp()
os.close(thandle)
benchmark_flags = list(benchmark_flags) + \
['--benchmark_out=%s' % output_name]
cmd = [exe_name] + benchmark_flags
print("RUNNING: %s" % ' '.join(cmd))
exitCode = subprocess.call(cmd)
if exitCode != 0:
print('TEST FAILED...')
sys.exit(exitCode)
json_res = load_benchmark_results(output_name)
if is_temp_output:
os.unlink(output_name)
return json_res
def run_or_load_benchmark(filename, benchmark_flags):
"""
Get the results for a specified benchmark. If 'filename' specifies
an executable benchmark then the results are generated by running the
benchmark. Otherwise 'filename' must name a valid JSON output file,
which is loaded and the result returned.
"""
ftype = check_input_file(filename)
if ftype == IT_JSON:
return load_benchmark_results(filename)
elif ftype == IT_Executable:
return run_benchmark(filename, benchmark_flags)
else:
assert False # This branch is unreachable

151
Utilities/Scripts/strip_asm.py Executable file

@ -0,0 +1,151 @@
#!/usr/bin/env python
"""
strip_asm.py - Cleanup ASM output for the specified file
"""
from argparse import ArgumentParser
import sys
import os
import re
def find_used_labels(asm):
found = set()
label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
for l in asm.splitlines():
m = label_re.match(l)
if m:
found.add('.L%s' % m.group(1))
return found
def normalize_labels(asm):
decls = set()
label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
for l in asm.splitlines():
m = label_decl.match(l)
if m:
decls.add(m.group(0))
if len(decls) == 0:
return asm
needs_dot = next(iter(decls))[0] != '.'
if not needs_dot:
return asm
for ld in decls:
asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
return asm
def transform_labels(asm):
asm = normalize_labels(asm)
used_decls = find_used_labels(asm)
new_asm = ''
label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
for l in asm.splitlines():
m = label_decl.match(l)
if not m or m.group(0) in used_decls:
new_asm += l
new_asm += '\n'
return new_asm
def is_identifier(tk):
if len(tk) == 0:
return False
first = tk[0]
if not first.isalpha() and first != '_':
return False
for i in range(1, len(tk)):
c = tk[i]
if not c.isalnum() and c != '_':
return False
return True
def process_identifiers(l):
"""
process_identifiers - process all identifiers and modify them to have
consistent names across all platforms; specifically across ELF and MachO.
For example, MachO inserts an additional understore at the beginning of
names. This function removes that.
"""
parts = re.split(r'([a-zA-Z0-9_]+)', l)
new_line = ''
for tk in parts:
if is_identifier(tk):
if tk.startswith('__Z'):
tk = tk[1:]
elif tk.startswith('_') and len(tk) > 1 and \
tk[1].isalpha() and tk[1] != 'Z':
tk = tk[1:]
new_line += tk
return new_line
def process_asm(asm):
"""
Strip the ASM of unwanted directives and lines
"""
new_contents = ''
asm = transform_labels(asm)
# TODO: Add more things we want to remove
discard_regexes = [
re.compile("\s+\..*$"), # directive
re.compile("\s*#(NO_APP|APP)$"), #inline ASM
re.compile("\s*#.*$"), # comment line
re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
]
keep_regexes = [
]
fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
for l in asm.splitlines():
# Remove Mach-O attribute
l = l.replace('@GOTPCREL', '')
add_line = True
for reg in discard_regexes:
if reg.match(l) is not None:
add_line = False
break
for reg in keep_regexes:
if reg.match(l) is not None:
add_line = True
break
if add_line:
if fn_label_def.match(l) and len(new_contents) != 0:
new_contents += '\n'
l = process_identifiers(l)
new_contents += l
new_contents += '\n'
return new_contents
def main():
parser = ArgumentParser(
description='generate a stripped assembly file')
parser.add_argument(
'input', metavar='input', type=str, nargs=1,
help='An input assembly file')
parser.add_argument(
'out', metavar='output', type=str, nargs=1,
help='The output file')
args, unknown_args = parser.parse_known_args()
input = args.input[0]
output = args.out[0]
if not os.path.isfile(input):
print(("ERROR: input file '%s' does not exist") % input)
sys.exit(1)
contents = None
with open(input, 'r') as f:
contents = f.read()
new_contents = process_asm(contents)
with open(output, 'w') as f:
f.write(new_contents)
if __name__ == '__main__':
main()
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
# kate: indent-mode python; remove-trailing-spaces modified;

@ -473,12 +473,25 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchExecToContReadWrite,
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
// Initialize command line args
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -506,11 +506,24 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -95,11 +95,23 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(CopySpeed,
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// Handle NumThreads command-line arg:
#ifdef VTKM_ENABLE_TBB
@ -126,5 +138,5 @@ int main(int argc, char* argv[])
#endif // TBB
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -39,6 +39,40 @@
namespace
{
// Parametrize the input size samples for most of the benchmarks
//
// Define at compile time:
//
// Being VTKm_BENCHS_RANGE_LOWER_BOUNDARY b0 and,
// being VTKm_BENCHS_RANGE_UPPER_BOUNDARY b1
//
// This will create the following sample sizes b0, b0*2^3, b0*2^6, ..., b1.
//
// Notice that setting up VTKm_BENCHS_RANGE_LOWER_BOUNDARY / VTKm_BENCHS_RANGE_UPPER_BOUNDARY
// will affect both ShortRange and FullRange.
//
#ifndef VTKm_BENCHS_RANGE_LOWER_BOUNDARY
#define FULL_RANGE_LOWER_BOUNDARY (1 << 12) // 4 KiB
#define SHORT_RANGE_LOWER_BOUNDARY (1 << 15) // 32 KiB
#else
#define FULL_RANGE_LOWER_BOUNDARY (VTKm_BENCHS_RANGE_LOWER_BOUNDARY)
#define SHORT_RANGE_LOWER_BOUNDARY (VTKm_BENCHS_RANGE_LOWER_BOUNDARY)
#endif
#ifndef VTKm_BENCHS_RANGE_UPPER_BOUNDARY
#define FULL_RANGE_UPPER_BOUNDARY (1 << 27) // 128 MiB
#define SHORT_RANGE_UPPER_BOUNDARY (1 << 27) // 128 MiB
#define BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING (1 << 26) // 64 MiB
#else
#define FULL_RANGE_UPPER_BOUNDARY (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
#define SHORT_RANGE_UPPER_BOUNDARY (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
#define BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
#endif
// Default sampling rate is x8 and always includes min/max,
// so this will generate 7 samples at:
// 1: 4 KiB
@ -47,15 +81,17 @@ namespace
// 4: 2 MiB
// 5: 16 MiB
// 6: 128 MiB
static const std::pair<int64_t, int64_t> FullRange{ 1 << 12, 1 << 27 }; // 4KiB, 128MiB
static const std::pair<int64_t, int64_t> FullRange{ FULL_RANGE_LOWER_BOUNDARY,
FULL_RANGE_UPPER_BOUNDARY };
// Smaller range that can be used to reduce the number of benchmarks. Used
// with `RangeMultiplier(SmallRangeMultiplier)`, this produces:
// 1: 32 KiB
// 2: 2 MiB
// 3: 128 MiB
static const std::pair<int64_t, int64_t> SmallRange{ 1 << 15, 1 << 27 }; // 4KiB, 128MiB
static constexpr int SmallRangeMultiplier = 1 << 21; // Ensure a sample at 2MiB
static const std::pair<int64_t, int64_t> SmallRange{ SHORT_RANGE_LOWER_BOUNDARY,
SHORT_RANGE_UPPER_BOUNDARY };
static constexpr int SmallRangeMultiplier = 1 << 21; // Ensure a sample at 2MiB
using TypeList = vtkm::List<vtkm::UInt8,
vtkm::Float32,
@ -351,7 +387,7 @@ void BenchBitFieldToUnorderedSetGenerator(benchmark::internal::Benchmark* bm)
{
// Use a reduced NUM_BYTES_MAX value here -- these benchmarks allocate one
// 8-byte id per bit, so this caps the index array out at 512 MB:
static constexpr int64_t numBytesMax = 1 << 26; // 64 MiB of bits
static int64_t numBytesMax = std::min(1 << 29, BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING);
bm->UseManualTime();
bm->ArgNames({ "Size", "C" });
@ -393,6 +429,7 @@ void BenchCopy(benchmark::State& state)
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
};
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCopy, ->Ranges({ FullRange })->ArgName("Size"), TypeList);
template <typename ValueType>
@ -534,7 +571,7 @@ void BenchCountSetBitsGenerator(benchmark::internal::Benchmark* bm)
for (int64_t config = 0; config < 6; ++config)
{
bm->Ranges({ FullRange, { config, config } });
bm->Ranges({ { FullRange.first, FullRange.second }, { config, config } });
}
}
VTKM_BENCHMARK_APPLY(BenchCountSetBits, BenchCountSetBitsGenerator);
@ -1053,8 +1090,10 @@ void BenchmarkStableSortIndicesUniqueGenerator(benchmark::internal::Benchmark* b
bm->ArgNames({ "Size", "%Uniq" });
for (int64_t pcntUnique = 0; pcntUnique <= 100; pcntUnique += 25)
{
// Cap the max size here at 21 MiB. This sort is too slow.
bm->Ranges({ { SmallRange.first, 1 << 21 }, { pcntUnique, pcntUnique } });
// Cap the max size here at 2 MiB. This sort is too slow.
const int64_t maxSize = 1 << 21;
bm->Ranges(
{ { SmallRange.first, std::min(maxSize, SmallRange.second) }, { pcntUnique, pcntUnique } });
}
}
@ -1167,12 +1206,23 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchUpperBounds,
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// Handle NumThreads command-line arg:
#ifdef VTKM_ENABLE_TBB
@ -1199,5 +1249,5 @@ int main(int argc, char* argv[])
#endif // TBB
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -942,12 +942,24 @@ VTKM_BENCHMARK(Bench2VirtualImplicitFunctions);
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -1040,12 +1040,23 @@ void InitDataSet(int& argc, char** argv)
int main(int argc, char* argv[])
{
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
Config = vtkm::cont::Initialize(argc, argv, opts);
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
InitDataSet(argc, argv);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
InitDataSet(argc, args.data());
}
const std::string dataSetSummary = []() -> std::string {
std::ostringstream out;
@ -1054,5 +1065,5 @@ int main(int argc, char* argv[])
}();
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, dataSetSummary);
VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, args.data(), dataSetSummary);
}

@ -116,13 +116,24 @@ VTKM_BENCHMARK(BenchRayTracing);
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -380,12 +380,24 @@ VTKM_BENCHMARK_TEMPLATES(BenchClassificationDynamic, ValueTypes);
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -388,6 +388,37 @@ static inline vtkm::Id ExecuteBenchmarks(int& argc,
return static_cast<vtkm::Id>(num);
}
void InitializeArgs(int* argc, std::vector<char*>& args, vtkm::cont::InitializeOptions& opts)
{
bool isHelp = false;
// Inject --help
if (*argc == 1)
{
const char* help = "--help"; // We want it to be static
args.push_back(const_cast<char*>(help));
*argc = *argc + 1;
}
args.push_back(nullptr);
for (size_t i = 0; i < static_cast<size_t>(*argc); ++i)
{
auto opt_s = std::string(args[i]);
if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h")
{
isHelp = true;
}
}
if (!isHelp)
{
return;
}
opts = vtkm::cont::InitializeOptions::None;
}
}
}
} // end namespace vtkm::bench::detail

@ -47,10 +47,17 @@ set(benchmarks
BenchmarkTopologyAlgorithms
)
set(VTKm_BENCHS_RANGE_LOWER_BOUNDARY 4096 CACHE STRING "Smallest sample for input size bench for BenchmarkDeviceAdapter")
set(VTKm_BENCHS_RANGE_UPPER_BOUNDARY 134217728 CACHE STRING "Biggest sample for input size bench for BenchmarkDeviceAdapter")
mark_as_advanced(VTKm_BENCHS_RANGE_LOWER_BOUNDARY VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
foreach (benchmark ${benchmarks})
add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_source vtkm_filter)
endforeach ()
target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_LOWER_BOUNDARY=${VTKm_BENCHS_RANGE_LOWER_BOUNDARY})
target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_UPPER_BOUNDARY=${VTKm_BENCHS_RANGE_UPPER_BOUNDARY})
if(TARGET vtkm_rendering)
add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering)
endif()

120
benchmarking/README.md Normal file

@ -0,0 +1,120 @@
# BENCHMARKING VTK-m
## TL;DR
When configuring _VTM-m_ with _CMake_ pass the flag `-DVTKm_ENABLE_BENCHMARKS=1`
. In the build directory you will see the following binaries:
$ ls bin/Benchmark*
bin/BenchmarkArrayTransfer* bin/BenchmarkCopySpeeds* bin/BenchmarkFieldAlgorithms*
bin/BenchmarkRayTracing* bin/BenchmarkAtomicArray* bin/BenchmarkDeviceAdapter*
bin/BenchmarkFilters* bin/BenchmarkTopologyAlgorithms*
Taking as an example `BenchmarkArrayTransfer`, we can run it as:
$ bin/BenchmarkArrayTransfer -d Any
---
## Parts of this Documents
0. [TL;DR](#TL;DR)
1. [Devices](#choosing-devices)
2. [Filters](#run-a-subset-of-your-benchmarks)
4. [Compare with baseline](#compare-with-baseline)
5. [Installing compare.py](#installing-compare-benchmarkspy)
---
## Choosing devices
Taking as an example `BenchmarkArrayTransfer`, we can determine in which
device we can run it by simply:
$ bin/BenchmarkArrayTransfer
...
Valid devices: "Any" "Serial"
...
Upon the _Valid devices_ you can chose in which device to run the benchmark by:
$ bin/BenchmarkArrayTransfer -d Serial
## Run a subset of your benchmarks
_VTK-m_ benchmarks uses [Google Benchmarks] which allows you to choose a subset
of benchmaks by using the flag `--benchmark_filter=REGEX`
For instance, if you want to run all the benchmarks that writes something you
would run:
$ bin/BenchmarkArrayTransfer -d Serial --benchmark_filter='Write'
Note you can list all of the available benchmarks with the option:
`--benchmark_list_tests`.
## Compare with baseline
_VTM-m_ ships with a helper script based in [Google Benchmarks] `compare.py`
named `compare-benchmarks.py` which lets you compare benchmarks using different
devices, filters, and binaries. After building `VTM-m` it must appear on the
`bin` directory within your `build` directory.
When running `compare-benchmarks.py`:
- You can specify the baseline benchmark binary path and its arguments in
`--benchmark1=`
- The contender benchmark binary path and its arguments in `--benchmark2=`
- Extra options to be passed to `compare.py` must come after `--`
### Compare between filters
When comparing filters, we only can use one benchmark binary with a single device
as shown in the following example:
```sh
$ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Any
--benchmark_filter=1024' --filter1='Read' --filter2=Write -- filters
# It will output something like this:
Benchmark Time CPU Time Old Time New CPU Old CPU New
---------------------------------------------------------------------------------------------------------------------------------------------------------------
BenchContToExec[Read vs. Write]<F32>/Bytes:1024/manual_time +0.2694 +0.2655 18521 23511 18766 23749
BenchExecToCont[Read vs. Write]<F32>/Bytes:1024/manual_time +0.0212 +0.0209 25910 26460 26152 26698
```
### Compare between devices
When comparing two benchmarks using two devices use the _option_ `benchmark`
after `--` and call `./compare-benchmarks.py` as follows:
```sh
$ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Serial
--benchmark_filter=1024' --benchmark2='./BenchmarkArrayTransfer -d Cuda
--benchmark_filter=1024' -- benchmarks
# It will output something like this:
Benchmark Time CPU Time Old Time New CPU Old CPU New
---------------------------------------------------------------------------------------------------------------------------------------------------
BenchContToExecRead<F32>/Bytes:1024/manual_time +0.0127 +0.0120 18388 18622 18632 18856
BenchContToExecWrite<F32>/Bytes:1024/manual_time +0.0010 +0.0006 23471 23496 23712 23726
BenchContToExecReadWrite<F32>/Bytes:1024/manual_time -0.0034 -0.0041 26363 26274 26611 26502
BenchRoundTripRead<F32>/Bytes:1024/manual_time +0.0055 +0.0056 20635 20748 21172 21291
BenchRoundTripReadWrite<F32>/Bytes:1024/manual_time +0.0084 +0.0082 29288 29535 29662 29905
BenchExecToContRead<F32>/Bytes:1024/manual_time +0.0025 +0.0021 25883 25947 26122 26178
BenchExecToContWrite<F32>/Bytes:1024/manual_time -0.0027 -0.0038 26375 26305 26622 26522
BenchExecToContReadWrite<F32>/Bytes:1024/manual_time +0.0041 +0.0039 25639 25745 25871 25972
```
## Installing compare-benchmarks.py
`compare-benchmarks.py` relies on `compare.py` from Google Benchmarks which also
relies in `SciPy`, you can find instructions [here][SciPy] regarding its
installation.
[Google Benchmarks]: https://github.com/google/benchmark
[Compare.py]: https://github.com/google/benchmark/blob/master/tools/compare.py
[SciPy]: https://www.scipy.org/install.html