diff --git a/Utilities/Scripts/benchCompare.py b/Utilities/Scripts/benchCompare.py deleted file mode 100755 index c7c6b42a8..000000000 --- a/Utilities/Scripts/benchCompare.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python3 -# -# Compares the output from BenchmarkDeviceAdapter from the serial -# device to a parallel device and prints a table containing the results. -# -# Example usage: -# -# $ BenchmarkDeviceAdapter_SERIAL > serial.out -# $ BenchmarkDeviceAdapter_TBB > tbb.out -# $ benchCompare.py serial.out tbb.out -# -# -# The number of threads (optional -- only used to generate the "Warn" column) -maxThreads = 4 -# -# Print debugging output: -doDebug = False -# -# End config options. - -import re -import sys - -assert(len(sys.argv) == 3) - -def debug(str): - if (doDebug): print(str) - -# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64 -typeParser = re.compile("\\*{3} ([^*]+) on device ([^*]+) \\*{15}") - -# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name -nameParser = re.compile("Benchmark '([^-]+)' results:") - -# Parses "mean = 0.0125s" --> 0.0125 -meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s") - -# Parses "std dev = 0.0125s" --> 0.0125 -stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s") - -serialFilename = sys.argv[1] -parallelFilename = sys.argv[2] - -serialFile = open(serialFilename, 'r') -parallelFile = open(parallelFilename, 'r') - -class BenchKey: - def __init__(self, name_, type_): - self.name = name_ - self.type = type_ - - def __eq__(self, other): - return self.name == other.name and self.type == other.type - - def __lt__(self, other): - if self.name < other.name: return True - elif self.name > other.name: return False - else: return self.type < other.type - - def __hash__(self): - return (self.name + self.type).__hash__() - -class BenchData: - def __init__(self, mean_, stdDev_): - self.mean = mean_ - self.stdDev = stdDev_ - -def parseFile(f, benchmarks): - type = "" - bench = "" - mean = -1. - stdDev = -1. - for line in f: - debug("Line: {}".format(line)) - - typeRes = typeParser.match(line) - if typeRes: - type = typeRes.group(1) - debug("Found type: {}".format(type)) - continue - - nameRes = nameParser.match(line) - if nameRes: - name = nameRes.group(1) - debug("Found name: {}".format(name)) - continue - - meanRes = meanParser.match(line) - if meanRes: - mean = float(meanRes.group(1)) - debug("Found mean: {}".format(mean)) - continue - - stdDevRes = stdDevParser.match(line) - if stdDevRes: - stdDev = float(stdDevRes.group(1)) - debug("Found stddev: {}".format(stdDev)) - - # stdDev is always the last parse for a given benchmark, add entry now - benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev) - debug("{} records found.".format(len(benchmarks))) - - mean = -1. - stdDev = -1. - - continue - -serialBenchmarks = {} -parallelBenchmarks = {} - -parseFile(serialFile, serialBenchmarks) -parseFile(parallelFile, parallelBenchmarks) - -serialKeys = set(serialBenchmarks.keys()) -parallelKeys = set(parallelBenchmarks.keys()) - -commonKeys = sorted(list(serialKeys.intersection(parallelKeys))) - -serialOnlyKeys = sorted(list(serialKeys.difference(parallelKeys))) -parallelOnlyKeys = sorted(list(parallelKeys.difference(serialKeys))) - -debug("{} serial keys\n{} parallel keys\n{} common keys\n{} serialOnly keys\n{} parallelOnly keys.".format( - len(serialKeys), len(parallelKeys), len(commonKeys), len(serialOnlyKeys), len(parallelOnlyKeys))) - -if len(serialOnlyKeys) > 0: - print("Keys found only in serial:") - for k in serialOnlyKeys: - print("%s (%s)"%(k.name, k.type)) - print("") - -if len(parallelOnlyKeys) > 0: - print("Keys found only in parallel:") - for k in parallelOnlyKeys: - print("%s (%s)"%(k.name, k.type)) - print("") - -print("Comparison:") -print("| %7s | %4s | %8s %8s | %8s %8s | %s (%s) |"%( - "Speedup", "Warn", "serial", "", "parallel", "", "Benchmark", "Type")) -print("|-%7s-|-%4s-|-%8s----%8s-|-%8s----%8s-|-%s--%s--|"%( - "-"*7, "-"*4, "-"*8, "-"*8, "-"*8, "-"*8, "-"*9, "-"*4)) -for key in commonKeys: - sData = serialBenchmarks[key] - pData = parallelBenchmarks[key] - speedup = sData.mean / pData.mean if pData.mean != 0. else 0. - if speedup > maxThreads * .9: - flag = " " - elif speedup > maxThreads * .75: - flag = "! " - elif speedup > maxThreads * .5: - flag = "!! " - elif speedup > maxThreads * .25: - flag = "!!! " - else: - flag = "!!!!" - print("| %7.3f | %4s | %08.6f +- %08.6f | %08.6f +- %08.6f | %s (%s) |"%( - speedup, flag, sData.mean, sData.stdDev, pData.mean, pData.stdDev, key.name, key.type)) diff --git a/Utilities/Scripts/benchSummary.py b/Utilities/Scripts/benchSummary.py deleted file mode 100755 index 722add00c..000000000 --- a/Utilities/Scripts/benchSummary.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python -# -# Prints a concise summary of a benchmark output as a TSV blob. -# -# Example usage: -# -# $ BenchmarkXXX_DEVICE > bench.out -# $ benchSummary.py bench.out -# -# Options SortByType, SortByName, or SortByMean may be passed after the -# filename to sort the output by the indicated quantity. If no sort option -# is provided, the output order matches the input. If multiple options are -# specified, the list will be sorted repeatedly in the order requested. - -import re -import sys - -assert(len(sys.argv) >= 2) - -# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64 -typeParser = re.compile("\\*{3} ([^*]+) \\*{15}") - -# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name -nameParser = re.compile("Benchmark '([^-]+)' results:") - -# Parses "mean = 0.0125s" --> 0.0125 -meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s") - -# Parses "std dev = 0.0125s" --> 0.0125 -stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s") - -filename = sys.argv[1] -benchFile = open(filename, 'r') - -sortOpt = None -if len(sys.argv) > 2: - sortOpt = sys.argv[2:] - -class BenchKey: - def __init__(self, name_, type_): - self.name = name_ - self.type = type_ - - def __eq__(self, other): - return self.name == other.name and self.type == other.type - - def __lt__(self, other): - if self.name < other.name: return True - elif self.name > other.name: return False - else: return self.type < other.type - - def __hash__(self): - return (self.name + self.type).__hash__() - -class BenchData: - def __init__(self, mean_, stdDev_): - self.mean = mean_ - self.stdDev = stdDev_ - -def parseFile(f, benchmarks): - type = "" - bench = "" - mean = -1. - stdDev = -1. - for line in f: - typeRes = typeParser.match(line) - if typeRes: - type = typeRes.group(1) - continue - - nameRes = nameParser.match(line) - if nameRes: - name = nameRes.group(1) - continue - - meanRes = meanParser.match(line) - if meanRes: - mean = float(meanRes.group(1)) - continue - - stdDevRes = stdDevParser.match(line) - if stdDevRes: - stdDev = float(stdDevRes.group(1)) - - # stdDev is always the last parse for a given benchmark, add entry now - benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev) - - mean = -1. - stdDev = -1. - - continue - -benchmarks = {} -parseFile(benchFile, benchmarks) - -# Sort keys by type: -keys = benchmarks.keys() -if sortOpt: - for opt in sortOpt: - if opt.lower() == "sortbytype": - keys = sorted(keys, key=lambda k: k.type) - elif opt.lower() == "sortbyname": - keys = sorted(keys, key=lambda k: k.name) - elif opt.lower() == "sortbymean": - keys = sorted(keys, key=lambda k: benchmarks[k].mean) - -print("# Summary: (%s)"%filename) -print("%-9s\t%-9s\t%-9s\t%-s"%("Mean", "Stdev", "Stdev%", "Benchmark (type)")) -for key in keys: - data = benchmarks[key] - print("%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.mean, data.stdDev, data.stdDev / data.mean * 100., key.name, key.type)) diff --git a/Utilities/Scripts/benchSummaryWithBaselines.py b/Utilities/Scripts/benchSummaryWithBaselines.py deleted file mode 100755 index c875b07ba..000000000 --- a/Utilities/Scripts/benchSummaryWithBaselines.py +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env python -# -# Prints a concise summary of a benchmark output as a TSV blob. Benchmarks are -# expected to have "Baseline" in the name, and a matching benchmark with the -# same name but Baseline replaced with something else. For example, -# -# Baseline benchmark name: "Some benchmark: Baseline, Size=4" -# Test benchmark name: "Some benchmark: Blahblah, Size=4" -# -# The output will print the baseline, test, and overhead times for the -# benchmarks. -# -# Example usage: -# -# $ BenchmarkXXX_DEVICE > bench.out -# $ benchSummaryWithBaselines.py bench.out -# -# Options SortByType, SortByName, SortByOverhead, or SortByRatio -# (testtime/baseline) may be passed after the filename to sort the output by -# the indicated quantity. If no sort option is provided, the output order -# matches the input. If multiple options are specified, the list will be sorted -# repeatedly in the order requested. - -import re -import sys - -assert(len(sys.argv) >= 2) - -# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64 -typeParser = re.compile("\\*{3} ([^*]+) \\*{15}") - -# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name -nameParser = re.compile("Benchmark '([^-]+)' results:") - -# Parses "mean = 0.0125s" --> 0.0125 -meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s") - -# Parses "std dev = 0.0125s" --> 0.0125 -stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s") - -# Parses "SomeText Baseline Other Text" --> ("SomeText ", " Other Text") -baselineParser = re.compile("(.*)Baseline(.*)") - -filename = sys.argv[1] -benchFile = open(filename, 'r') - -sortOpt = None -if len(sys.argv) > 2: - sortOpt = sys.argv[2:] - -class BenchKey: - def __init__(self, name_, type_): - self.name = name_ - self.type = type_ - - def __eq__(self, other): - return self.name == other.name and self.type == other.type - - def __lt__(self, other): - if self.name < other.name: return True - elif self.name > other.name: return False - else: return self.type < other.type - - def __hash__(self): - return (self.name + self.type).__hash__() - -class BenchData: - def __init__(self, mean_, stdDev_): - self.mean = mean_ - self.stdDev = stdDev_ - -def parseFile(f, benchmarks): - type = "" - bench = "" - mean = -1. - stdDev = -1. - for line in f: - typeRes = typeParser.match(line) - if typeRes: - type = typeRes.group(1) - continue - - nameRes = nameParser.match(line) - if nameRes: - name = nameRes.group(1) - continue - - meanRes = meanParser.match(line) - if meanRes: - mean = float(meanRes.group(1)) - continue - - stdDevRes = stdDevParser.match(line) - if stdDevRes: - stdDev = float(stdDevRes.group(1)) - - # stdDev is always the last parse for a given benchmark, add entry now - benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev) - - mean = -1. - stdDev = -1. - - continue - -class BaselinedBenchData: - def __init__(self, baseline, test): - self.baseline = baseline.mean - self.test = test.mean - self.overhead = test.mean - baseline.mean - -def findBaselines(benchmarks): - result = {} - - for baseKey in benchmarks.keys(): - # Look for baseline entries - baselineRes = baselineParser.match(baseKey.name) - if baselineRes: - prefix = baselineRes.group(1) - suffix = baselineRes.group(2) - - # Find the test entry matching the baseline: - for testKey in benchmarks.keys(): - if baseKey.type != testKey.type: # Need same type - continue - if baseKey.name == testKey.name: # Skip the base key - continue - if testKey.name.startswith(prefix) and testKey.name.endswith(suffix): - newName = (prefix + suffix).replace(", ,", ",") - newKey = BenchKey(newName, testKey.type) - newVal = BaselinedBenchData(benchmarks[baseKey], benchmarks[testKey]) - result[newKey] = newVal - return result - -benchmarks = {} -parseFile(benchFile, benchmarks) -benchmarks = findBaselines(benchmarks) - -# Sort keys by type: -keys = benchmarks.keys() -if sortOpt: - for opt in sortOpt: - if opt.lower() == "sortbytype": - keys = sorted(keys, key=lambda k: k.type) - elif opt.lower() == "sortbyname": - keys = sorted(keys, key=lambda k: k.name) - elif opt.lower() == "sortbyoverhead": - keys = sorted(keys, key=lambda k: benchmarks[k].overhead) - elif opt.lower() == "sortbyratio": - keys = sorted(keys, key=lambda k: benchmarks[k].overhead / benchmarks[k].baseline) - -print("# Summary: (%s)"%filename) -print("%-9s\t%-9s\t%-9s\t%-9s\t%-s"%("Baseline", "TestTime", "Overhead", "Test/Base", "Benchmark (type)")) -for key in keys: - data = benchmarks[key] - print("%9.6f\t%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.baseline, data.test, - data.overhead, data.test / data.baseline, key.name, key.type)) diff --git a/Utilities/Scripts/compare-benchmarks.py b/Utilities/Scripts/compare-benchmarks.py new file mode 100755 index 000000000..9aa676d6f --- /dev/null +++ b/Utilities/Scripts/compare-benchmarks.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +compare-benchmarks.py - VTKm + Google Benchmarks compare.py +""" + +import getopt +import subprocess +import sys +import time +import os + +CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) +COMPARE_PY_PATH = os.path.join(CURRENT_DIR, 'compare.py') +COMPARE_PY = sys.executable + " " + COMPARE_PY_PATH + +class Bench(): + def __init__(self): + self.__cmd = None + + @property + def cmd(self): + return self.__cmd + + @cmd.setter + def cmd(self, c): + self.__cmd = c + + def launch(self): + output_file = "bench-%d.json" % time.time() + cmd_exec = "%s --benchmark_out=%s --benchmark_out_format=json" \ + % (self.cmd, output_file) + print(cmd_exec) + subprocess.call(cmd_exec, shell=True) + return output_file + +def print_help(error_msg = None): + if error_msg != None: + print(error_msg) + + print("usage: compare-benchmarks \n" \ + " --benchmark1=' [arg1] [arg2] ...'"\ + " [--filter1=]\n"\ + " --benchmark2=' [arg1] [arg2] ...'"\ + " [--filter2=]\n"\ + " -- [-opt] benchmarks|filters|benchmarksfiltered\n\n" \ + "compare.py help:") + + subprocess.call(COMPARE_PY, shell=True) + sys.exit(0) + +# ----------------------------------------------------------------------------- +def main(): + is_filters = False + filter1 = str() + filter2 = str() + bench1 = Bench() + bench2 = Bench() + + options, remainder = getopt.gnu_getopt(sys.argv[1:], '', + ['help','benchmark1=', 'benchmark2=', 'filter1=', 'filter2=']) + + for opt, arg in options: + if opt == "--benchmark1": + bench1.cmd = arg + + if opt == "--benchmark2": + bench2.cmd = arg + + if opt == "--filter1": + filter1 = arg + + if opt == "--filter2": + filter2 = arg + + if opt == "--help": + print_help() + + if bench1.cmd == None: + print_help("ERROR: no benchmarks chosen") + + for arg in remainder: + if arg == "filters": + is_filters = True + + if is_filters and bench2.cmd != None: + print_help("ERROR: filters option can only accept --benchmark1= and --filter1") + + b1_output = bench1.launch() + b2_output = bench2.launch() if not is_filters else filter1 + " " + filter2 + + cmd = "%s %s %s %s" % (COMPARE_PY, " ".join(remainder), b1_output, b2_output) + print(cmd) + subprocess.call(cmd, shell=True) + + os.remove(b1_output) + + if not is_filters: + os.remove(b2_output) + +if __name__ == '__main__': + main() diff --git a/Utilities/Scripts/compare.py b/Utilities/Scripts/compare.py new file mode 100755 index 000000000..539ace6fb --- /dev/null +++ b/Utilities/Scripts/compare.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python + +import unittest +""" +compare.py - versatile benchmark output compare tool +""" + +import argparse +from argparse import ArgumentParser +import sys +import gbench +from gbench import util, report +from gbench.util import * + + +def check_inputs(in1, in2, flags): + """ + Perform checking on the user provided inputs and diagnose any abnormalities + """ + in1_kind, in1_err = classify_input_file(in1) + in2_kind, in2_err = classify_input_file(in2) + output_file = find_benchmark_flag('--benchmark_out=', flags) + output_type = find_benchmark_flag('--benchmark_out_format=', flags) + if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file: + print(("WARNING: '--benchmark_out=%s' will be passed to both " + "benchmarks causing it to be overwritten") % output_file) + if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0: + print("WARNING: passing optional flags has no effect since both " + "inputs are JSON") + if output_type is not None and output_type != 'json': + print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`" + " is not supported.") % output_type) + sys.exit(1) + + +def create_parser(): + parser = ArgumentParser( + description='versatile benchmark output compare tool') + + parser.add_argument( + '-a', + '--display_aggregates_only', + dest='display_aggregates_only', + action="store_true", + help="If there are repetitions, by default, we display everything - the" + " actual runs, and the aggregates computed. Sometimes, it is " + "desirable to only view the aggregates. E.g. when there are a lot " + "of repetitions. Do note that only the display is affected. " + "Internally, all the actual runs are still used, e.g. for U test.") + + utest = parser.add_argument_group() + utest.add_argument( + '--no-utest', + dest='utest', + default=True, + action="store_false", + help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS)) + alpha_default = 0.05 + utest.add_argument( + "--alpha", + dest='utest_alpha', + default=alpha_default, + type=float, + help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") % + alpha_default) + + subparsers = parser.add_subparsers( + help='This tool has multiple modes of operation:', + dest='mode') + + parser_a = subparsers.add_parser( + 'benchmarks', + help='The most simple use-case, compare all the output of these two benchmarks') + baseline = parser_a.add_argument_group( + 'baseline', 'The benchmark baseline') + baseline.add_argument( + 'test_baseline', + metavar='test_baseline', + type=argparse.FileType('r'), + nargs=1, + help='A benchmark executable or JSON output file') + contender = parser_a.add_argument_group( + 'contender', 'The benchmark that will be compared against the baseline') + contender.add_argument( + 'test_contender', + metavar='test_contender', + type=argparse.FileType('r'), + nargs=1, + help='A benchmark executable or JSON output file') + parser_a.add_argument( + 'benchmark_options', + metavar='benchmark_options', + nargs=argparse.REMAINDER, + help='Arguments to pass when running benchmark executables') + + parser_b = subparsers.add_parser( + 'filters', help='Compare filter one with the filter two of benchmark') + baseline = parser_b.add_argument_group( + 'baseline', 'The benchmark baseline') + baseline.add_argument( + 'test', + metavar='test', + type=argparse.FileType('r'), + nargs=1, + help='A benchmark executable or JSON output file') + baseline.add_argument( + 'filter_baseline', + metavar='filter_baseline', + type=str, + nargs=1, + help='The first filter, that will be used as baseline') + contender = parser_b.add_argument_group( + 'contender', 'The benchmark that will be compared against the baseline') + contender.add_argument( + 'filter_contender', + metavar='filter_contender', + type=str, + nargs=1, + help='The second filter, that will be compared against the baseline') + parser_b.add_argument( + 'benchmark_options', + metavar='benchmark_options', + nargs=argparse.REMAINDER, + help='Arguments to pass when running benchmark executables') + + parser_c = subparsers.add_parser( + 'benchmarksfiltered', + help='Compare filter one of first benchmark with filter two of the second benchmark') + baseline = parser_c.add_argument_group( + 'baseline', 'The benchmark baseline') + baseline.add_argument( + 'test_baseline', + metavar='test_baseline', + type=argparse.FileType('r'), + nargs=1, + help='A benchmark executable or JSON output file') + baseline.add_argument( + 'filter_baseline', + metavar='filter_baseline', + type=str, + nargs=1, + help='The first filter, that will be used as baseline') + contender = parser_c.add_argument_group( + 'contender', 'The benchmark that will be compared against the baseline') + contender.add_argument( + 'test_contender', + metavar='test_contender', + type=argparse.FileType('r'), + nargs=1, + help='The second benchmark executable or JSON output file, that will be compared against the baseline') + contender.add_argument( + 'filter_contender', + metavar='filter_contender', + type=str, + nargs=1, + help='The second filter, that will be compared against the baseline') + parser_c.add_argument( + 'benchmark_options', + metavar='benchmark_options', + nargs=argparse.REMAINDER, + help='Arguments to pass when running benchmark executables') + + return parser + + +def main(): + # Parse the command line flags + parser = create_parser() + args, unknown_args = parser.parse_known_args() + if args.mode is None: + parser.print_help() + exit(1) + assert not unknown_args + benchmark_options = args.benchmark_options + + if args.mode == 'benchmarks': + test_baseline = args.test_baseline[0].name + test_contender = args.test_contender[0].name + filter_baseline = '' + filter_contender = '' + + # NOTE: if test_baseline == test_contender, you are analyzing the stdev + + description = 'Comparing %s to %s' % (test_baseline, test_contender) + elif args.mode == 'filters': + test_baseline = args.test[0].name + test_contender = args.test[0].name + filter_baseline = args.filter_baseline[0] + filter_contender = args.filter_contender[0] + + # NOTE: if filter_baseline == filter_contender, you are analyzing the + # stdev + + description = 'Comparing %s to %s (from %s)' % ( + filter_baseline, filter_contender, args.test[0].name) + elif args.mode == 'benchmarksfiltered': + test_baseline = args.test_baseline[0].name + test_contender = args.test_contender[0].name + filter_baseline = args.filter_baseline[0] + filter_contender = args.filter_contender[0] + + # NOTE: if test_baseline == test_contender and + # filter_baseline == filter_contender, you are analyzing the stdev + + description = 'Comparing %s (from %s) to %s (from %s)' % ( + filter_baseline, test_baseline, filter_contender, test_contender) + else: + # should never happen + print("Unrecognized mode of operation: '%s'" % args.mode) + parser.print_help() + exit(1) + + check_inputs(test_baseline, test_contender, benchmark_options) + + if args.display_aggregates_only: + benchmark_options += ['--benchmark_display_aggregates_only=true'] + + options_baseline = [] + options_contender = [] + + if filter_baseline and filter_contender: + options_baseline = ['--benchmark_filter=%s' % filter_baseline] + options_contender = ['--benchmark_filter=%s' % filter_contender] + + # Run the benchmarks and report the results + json1 = json1_orig = gbench.util.run_or_load_benchmark( + test_baseline, benchmark_options + options_baseline) + json2 = json2_orig = gbench.util.run_or_load_benchmark( + test_contender, benchmark_options + options_contender) + + # Now, filter the benchmarks so that the difference report can work + if filter_baseline and filter_contender: + replacement = '[%s vs. %s]' % (filter_baseline, filter_contender) + json1 = gbench.report.filter_benchmark( + json1_orig, filter_baseline, replacement) + json2 = gbench.report.filter_benchmark( + json2_orig, filter_contender, replacement) + + # Diff and output + output_lines = gbench.report.generate_difference_report( + json1, json2, args.display_aggregates_only, + args.utest, args.utest_alpha) + print(description) + for ln in output_lines: + print(ln) + + +class TestParser(unittest.TestCase): + def setUp(self): + self.parser = create_parser() + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'gbench', + 'Inputs') + self.testInput0 = os.path.join(testInputs, 'test1_run1.json') + self.testInput1 = os.path.join(testInputs, 'test1_run2.json') + + def test_benchmarks_basic(self): + parsed = self.parser.parse_args( + ['benchmarks', self.testInput0, self.testInput1]) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_basic_without_utest(self): + parsed = self.parser.parse_args( + ['--no-utest', 'benchmarks', self.testInput0, self.testInput1]) + self.assertFalse(parsed.display_aggregates_only) + self.assertFalse(parsed.utest) + self.assertEqual(parsed.utest_alpha, 0.05) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_basic_display_aggregates_only(self): + parsed = self.parser.parse_args( + ['-a', 'benchmarks', self.testInput0, self.testInput1]) + self.assertTrue(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_basic_with_utest_alpha(self): + parsed = self.parser.parse_args( + ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1]) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.utest_alpha, 0.314) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_basic_without_utest_with_utest_alpha(self): + parsed = self.parser.parse_args( + ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1]) + self.assertFalse(parsed.display_aggregates_only) + self.assertFalse(parsed.utest) + self.assertEqual(parsed.utest_alpha, 0.314) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_with_remainder(self): + parsed = self.parser.parse_args( + ['benchmarks', self.testInput0, self.testInput1, 'd']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.benchmark_options, ['d']) + + def test_benchmarks_with_remainder_after_doubleminus(self): + parsed = self.parser.parse_args( + ['benchmarks', self.testInput0, self.testInput1, '--', 'e']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.benchmark_options, ['e']) + + def test_filters_basic(self): + parsed = self.parser.parse_args( + ['filters', self.testInput0, 'c', 'd']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'filters') + self.assertEqual(parsed.test[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') + self.assertFalse(parsed.benchmark_options) + + def test_filters_with_remainder(self): + parsed = self.parser.parse_args( + ['filters', self.testInput0, 'c', 'd', 'e']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'filters') + self.assertEqual(parsed.test[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') + self.assertEqual(parsed.benchmark_options, ['e']) + + def test_filters_with_remainder_after_doubleminus(self): + parsed = self.parser.parse_args( + ['filters', self.testInput0, 'c', 'd', '--', 'f']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'filters') + self.assertEqual(parsed.test[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') + self.assertEqual(parsed.benchmark_options, ['f']) + + def test_benchmarksfiltered_basic(self): + parsed = self.parser.parse_args( + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarksfiltered') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.filter_contender[0], 'e') + self.assertFalse(parsed.benchmark_options) + + def test_benchmarksfiltered_with_remainder(self): + parsed = self.parser.parse_args( + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarksfiltered') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.filter_contender[0], 'e') + self.assertEqual(parsed.benchmark_options[0], 'f') + + def test_benchmarksfiltered_with_remainder_after_doubleminus(self): + parsed = self.parser.parse_args( + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarksfiltered') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.filter_contender[0], 'e') + self.assertEqual(parsed.benchmark_options[0], 'g') + + +if __name__ == '__main__': + # unittest.main() + main() + +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; +# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/Utilities/Scripts/gbench/__init__.py b/Utilities/Scripts/gbench/__init__.py new file mode 100644 index 000000000..fce1a1acf --- /dev/null +++ b/Utilities/Scripts/gbench/__init__.py @@ -0,0 +1,8 @@ +"""Google Benchmark tooling""" + +__author__ = 'Eric Fiselier' +__email__ = 'eric@efcs.ca' +__versioninfo__ = (0, 5, 0) +__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' + +__all__ = [] diff --git a/Utilities/Scripts/gbench/report.py b/Utilities/Scripts/gbench/report.py new file mode 100644 index 000000000..5bd3a8d85 --- /dev/null +++ b/Utilities/Scripts/gbench/report.py @@ -0,0 +1,541 @@ +import unittest +"""report.py - Utilities for reporting statistics about benchmark results +""" +import os +import re +import copy + +from scipy.stats import mannwhitneyu + + +class BenchmarkColor(object): + def __init__(self, name, code): + self.name = name + self.code = code + + def __repr__(self): + return '%s%r' % (self.__class__.__name__, + (self.name, self.code)) + + def __format__(self, format): + return self.code + + +# Benchmark Colors Enumeration +BC_NONE = BenchmarkColor('NONE', '') +BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') +BC_CYAN = BenchmarkColor('CYAN', '\033[96m') +BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') +BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') +BC_HEADER = BenchmarkColor('HEADER', '\033[92m') +BC_WARNING = BenchmarkColor('WARNING', '\033[93m') +BC_WHITE = BenchmarkColor('WHITE', '\033[97m') +BC_FAIL = BenchmarkColor('FAIL', '\033[91m') +BC_ENDC = BenchmarkColor('ENDC', '\033[0m') +BC_BOLD = BenchmarkColor('BOLD', '\033[1m') +BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') + +UTEST_MIN_REPETITIONS = 2 +UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. +UTEST_COL_NAME = "_pvalue" + + +def color_format(use_color, fmt_str, *args, **kwargs): + """ + Return the result of 'fmt_str.format(*args, **kwargs)' after transforming + 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' + is False then all color codes in 'args' and 'kwargs' are replaced with + the empty string. + """ + assert use_color is True or use_color is False + if not use_color: + args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE + for arg in args] + kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE + for key, arg in kwargs.items()} + return fmt_str.format(*args, **kwargs) + + +def find_longest_name(benchmark_list): + """ + Return the length of the longest benchmark name in a given list of + benchmark JSON objects + """ + longest_name = 1 + for bc in benchmark_list: + if len(bc['name']) > longest_name: + longest_name = len(bc['name']) + return longest_name + + +def calculate_change(old_val, new_val): + """ + Return a float representing the decimal change between old_val and new_val. + """ + if old_val == 0 and new_val == 0: + return 0.0 + if old_val == 0: + return float(new_val - old_val) / (float(old_val + new_val) / 2) + return float(new_val - old_val) / abs(old_val) + + +def filter_benchmark(json_orig, family, replacement=""): + """ + Apply a filter to the json, and only leave the 'family' of benchmarks. + """ + regex = re.compile(family) + filtered = {} + filtered['benchmarks'] = [] + for be in json_orig['benchmarks']: + if not regex.search(be['name']): + continue + filteredbench = copy.deepcopy(be) # Do NOT modify the old name! + filteredbench['name'] = regex.sub(replacement, filteredbench['name']) + filtered['benchmarks'].append(filteredbench) + return filtered + + +def get_unique_benchmark_names(json): + """ + While *keeping* the order, give all the unique 'names' used for benchmarks. + """ + seen = set() + uniqued = [x['name'] for x in json['benchmarks'] + if x['name'] not in seen and + (seen.add(x['name']) or True)] + return uniqued + + +def intersect(list1, list2): + """ + Given two lists, get a new list consisting of the elements only contained + in *both of the input lists*, while preserving the ordering. + """ + return [x for x in list1 if x in list2] + + +def is_potentially_comparable_benchmark(x): + return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x) + + +def partition_benchmarks(json1, json2): + """ + While preserving the ordering, find benchmarks with the same names in + both of the inputs, and group them. + (i.e. partition/filter into groups with common name) + """ + json1_unique_names = get_unique_benchmark_names(json1) + json2_unique_names = get_unique_benchmark_names(json2) + names = intersect(json1_unique_names, json2_unique_names) + partitions = [] + for name in names: + time_unit = None + # Pick the time unit from the first entry of the lhs benchmark. + # We should be careful not to crash with unexpected input. + for x in json1['benchmarks']: + if (x['name'] == name and is_potentially_comparable_benchmark(x)): + time_unit = x['time_unit'] + break + if time_unit is None: + continue + # Filter by name and time unit. + # All the repetitions are assumed to be comparable. + lhs = [x for x in json1['benchmarks'] if x['name'] == name and + x['time_unit'] == time_unit] + rhs = [x for x in json2['benchmarks'] if x['name'] == name and + x['time_unit'] == time_unit] + partitions.append([lhs, rhs]) + return partitions + + +def extract_field(partition, field_name): + # The count of elements may be different. We want *all* of them. + lhs = [x[field_name] for x in partition[0]] + rhs = [x[field_name] for x in partition[1]] + return [lhs, rhs] + +def calc_utest(timings_cpu, timings_time): + min_rep_cnt = min(len(timings_time[0]), + len(timings_time[1]), + len(timings_cpu[0]), + len(timings_cpu[1])) + + # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? + if min_rep_cnt < UTEST_MIN_REPETITIONS: + return False, None, None + + time_pvalue = mannwhitneyu( + timings_time[0], timings_time[1], alternative='two-sided').pvalue + cpu_pvalue = mannwhitneyu( + timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue + + return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue + +def print_utest(partition, utest_alpha, first_col_width, use_color=True): + def get_utest_color(pval): + return BC_FAIL if pval >= utest_alpha else BC_OKGREEN + + timings_time = extract_field(partition, 'real_time') + timings_cpu = extract_field(partition, 'cpu_time') + have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) + + # Check if we failed miserably with minimum required repetitions for utest + if not have_optimal_repetitions and cpu_pvalue is None and time_pvalue is None: + return [] + + dsc = "U Test, Repetitions: {} vs {}".format( + len(timings_cpu[0]), len(timings_cpu[1])) + dsc_color = BC_OKGREEN + + # We still got some results to show but issue a warning about it. + if not have_optimal_repetitions: + dsc_color = BC_WARNING + dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( + UTEST_OPTIMAL_REPETITIONS) + + special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" + + last_name = partition[0][0]['name'] + return [color_format(use_color, + special_str, + BC_HEADER, + "{}{}".format(last_name, UTEST_COL_NAME), + first_col_width, + get_utest_color(time_pvalue), time_pvalue, + get_utest_color(cpu_pvalue), cpu_pvalue, + dsc_color, dsc, + endc=BC_ENDC)] + + +def generate_difference_report( + json1, + json2, + display_aggregates_only=False, + utest=False, + utest_alpha=0.05, + use_color=True): + """ + Calculate and report the difference between each test of two benchmarks + runs specified as 'json1' and 'json2'. + """ + assert utest is True or utest is False + first_col_width = find_longest_name(json1['benchmarks']) + + def find_test(name): + for b in json2['benchmarks']: + if b['name'] == name: + return b + return None + + first_col_width = max( + first_col_width, + len('Benchmark')) + first_col_width += len(UTEST_COL_NAME) + first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( + 'Benchmark', 12 + first_col_width) + output_strs = [first_line, '-' * len(first_line)] + + partitions = partition_benchmarks(json1, json2) + for partition in partitions: + # Careful, we may have different repetition count. + for i in range(min(len(partition[0]), len(partition[1]))): + bn = partition[0][i] + other_bench = partition[1][i] + + # *If* we were asked to only display aggregates, + # and if it is non-aggregate, then skip it. + if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench: + assert bn['run_type'] == other_bench['run_type'] + if bn['run_type'] != 'aggregate': + continue + + fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" + + def get_color(res): + if res > 0.05: + return BC_FAIL + elif res > -0.07: + return BC_WHITE + else: + return BC_CYAN + + tres = calculate_change(bn['real_time'], other_bench['real_time']) + cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time']) + output_strs += [color_format(use_color, + fmt_str, + BC_HEADER, + bn['name'], + first_col_width, + get_color(tres), + tres, + get_color(cpures), + cpures, + bn['real_time'], + other_bench['real_time'], + bn['cpu_time'], + other_bench['cpu_time'], + endc=BC_ENDC)] + + # After processing the whole partition, if requested, do the U test. + if utest: + output_strs += print_utest(partition, + utest_alpha=utest_alpha, + first_col_width=first_col_width, + use_color=use_color) + + return output_strs + + +############################################################################### +# Unit tests + + +class TestGetUniqueBenchmarkNames(unittest.TestCase): + def load_results(self): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test3_run0.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json + + def test_basic(self): + expect_lines = [ + 'BM_One', + 'BM_Two', + 'short', # These two are not sorted + 'medium', # These two are not sorted + ] + json = self.load_results() + output_lines = get_unique_benchmark_names(json) + print("\n") + print("\n".join(output_lines)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + self.assertEqual(expect_lines[i], output_lines[i]) + + +class TestReportDifference(unittest.TestCase): + def load_results(self): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test1_run1.json') + testOutput2 = os.path.join(testInputs, 'test1_run2.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + def test_basic(self): + expect_lines = [ + ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], + ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], + ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], + ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], + ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], + ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], + ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], + ['BM_100xSlower', '+99.0000', '+99.0000', + '100', '10000', '100', '10000'], + ['BM_100xFaster', '-0.9900', '-0.9900', + '10000', '100', '10000', '100'], + ['BM_10PercentCPUToTime', '+0.1000', + '-0.1000', '100', '110', '100', '90'], + ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], + ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], + ] + json1, json2 = self.load_results() + output_lines_with_header = generate_difference_report( + json1, json2, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(len(parts), 7) + self.assertEqual(expect_lines[i], parts) + + +class TestReportDifferenceBetweenFamilies(unittest.TestCase): + def load_result(self): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test2_run.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json + + def test_basic(self): + expect_lines = [ + ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], + ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], + ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], + ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], + ] + json = self.load_result() + json1 = filter_benchmark(json, "BM_Z.ro", ".") + json2 = filter_benchmark(json, "BM_O.e", ".") + output_lines_with_header = generate_difference_report( + json1, json2, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(len(parts), 7) + self.assertEqual(expect_lines[i], parts) + + +class TestReportDifferenceWithUTest(unittest.TestCase): + def load_results(self): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test3_run0.json') + testOutput2 = os.path.join(testInputs, 'test3_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + def test_utest(self): + expect_lines = [] + expect_lines = [ + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], + ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], + ['BM_Two_pvalue', + '0.6985', + '0.6985', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.1489', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ] + json1, json2 = self.load_results() + output_lines_with_header = generate_difference_report( + json1, json2, utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + +class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( + unittest.TestCase): + def load_results(self): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test3_run0.json') + testOutput2 = os.path.join(testInputs, 'test3_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + def test_utest(self): + expect_lines = [] + expect_lines = [ + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], + ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], + ['BM_Two_pvalue', + '0.6985', + '0.6985', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.1489', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ] + json1, json2 = self.load_results() + output_lines_with_header = generate_difference_report( + json1, json2, display_aggregates_only=True, + utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + +if __name__ == '__main__': + unittest.main() + +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; +# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/Utilities/Scripts/gbench/util.py b/Utilities/Scripts/gbench/util.py new file mode 100644 index 000000000..1f8e8e2c4 --- /dev/null +++ b/Utilities/Scripts/gbench/util.py @@ -0,0 +1,164 @@ +"""util.py - General utilities for running, loading, and processing benchmarks +""" +import json +import os +import tempfile +import subprocess +import sys + +# Input file type enumeration +IT_Invalid = 0 +IT_JSON = 1 +IT_Executable = 2 + +_num_magic_bytes = 2 if sys.platform.startswith('win') else 4 + + +def is_executable_file(filename): + """ + Return 'True' if 'filename' names a valid file which is likely + an executable. A file is considered an executable if it starts with the + magic bytes for a EXE, Mach O, or ELF file. + """ + if not os.path.isfile(filename): + return False + with open(filename, mode='rb') as f: + magic_bytes = f.read(_num_magic_bytes) + if sys.platform == 'darwin': + return magic_bytes in [ + b'\xfe\xed\xfa\xce', # MH_MAGIC + b'\xce\xfa\xed\xfe', # MH_CIGAM + b'\xfe\xed\xfa\xcf', # MH_MAGIC_64 + b'\xcf\xfa\xed\xfe', # MH_CIGAM_64 + b'\xca\xfe\xba\xbe', # FAT_MAGIC + b'\xbe\xba\xfe\xca' # FAT_CIGAM + ] + elif sys.platform.startswith('win'): + return magic_bytes == b'MZ' + else: + return magic_bytes == b'\x7FELF' + + +def is_json_file(filename): + """ + Returns 'True' if 'filename' names a valid JSON output file. + 'False' otherwise. + """ + try: + with open(filename, 'r') as f: + json.load(f) + return True + except BaseException: + pass + return False + + +def classify_input_file(filename): + """ + Return a tuple (type, msg) where 'type' specifies the classified type + of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable + string represeting the error. + """ + ftype = IT_Invalid + err_msg = None + if not os.path.exists(filename): + err_msg = "'%s' does not exist" % filename + elif not os.path.isfile(filename): + err_msg = "'%s' does not name a file" % filename + elif is_executable_file(filename): + ftype = IT_Executable + elif is_json_file(filename): + ftype = IT_JSON + else: + err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename + return ftype, err_msg + + +def check_input_file(filename): + """ + Classify the file named by 'filename' and return the classification. + If the file is classified as 'IT_Invalid' print an error message and exit + the program. + """ + ftype, msg = classify_input_file(filename) + if ftype == IT_Invalid: + print("Invalid input file: %s" % msg) + sys.exit(1) + return ftype + + +def find_benchmark_flag(prefix, benchmark_flags): + """ + Search the specified list of flags for a flag matching `` and + if it is found return the arg it specifies. If specified more than once the + last value is returned. If the flag is not found None is returned. + """ + assert prefix.startswith('--') and prefix.endswith('=') + result = None + for f in benchmark_flags: + if f.startswith(prefix): + result = f[len(prefix):] + return result + + +def remove_benchmark_flags(prefix, benchmark_flags): + """ + Return a new list containing the specified benchmark_flags except those + with the specified prefix. + """ + assert prefix.startswith('--') and prefix.endswith('=') + return [f for f in benchmark_flags if not f.startswith(prefix)] + + +def load_benchmark_results(fname): + """ + Read benchmark output from a file and return the JSON object. + REQUIRES: 'fname' names a file containing JSON benchmark output. + """ + with open(fname, 'r') as f: + return json.load(f) + + +def run_benchmark(exe_name, benchmark_flags): + """ + Run a benchmark specified by 'exe_name' with the specified + 'benchmark_flags'. The benchmark is run directly as a subprocess to preserve + real time console output. + RETURNS: A JSON object representing the benchmark output + """ + output_name = find_benchmark_flag('--benchmark_out=', + benchmark_flags) + is_temp_output = False + if output_name is None: + is_temp_output = True + thandle, output_name = tempfile.mkstemp() + os.close(thandle) + benchmark_flags = list(benchmark_flags) + \ + ['--benchmark_out=%s' % output_name] + + cmd = [exe_name] + benchmark_flags + print("RUNNING: %s" % ' '.join(cmd)) + exitCode = subprocess.call(cmd) + if exitCode != 0: + print('TEST FAILED...') + sys.exit(exitCode) + json_res = load_benchmark_results(output_name) + if is_temp_output: + os.unlink(output_name) + return json_res + + +def run_or_load_benchmark(filename, benchmark_flags): + """ + Get the results for a specified benchmark. If 'filename' specifies + an executable benchmark then the results are generated by running the + benchmark. Otherwise 'filename' must name a valid JSON output file, + which is loaded and the result returned. + """ + ftype = check_input_file(filename) + if ftype == IT_JSON: + return load_benchmark_results(filename) + elif ftype == IT_Executable: + return run_benchmark(filename, benchmark_flags) + else: + assert False # This branch is unreachable diff --git a/Utilities/Scripts/strip_asm.py b/Utilities/Scripts/strip_asm.py new file mode 100755 index 000000000..9030550b4 --- /dev/null +++ b/Utilities/Scripts/strip_asm.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python + +""" +strip_asm.py - Cleanup ASM output for the specified file +""" + +from argparse import ArgumentParser +import sys +import os +import re + +def find_used_labels(asm): + found = set() + label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") + for l in asm.splitlines(): + m = label_re.match(l) + if m: + found.add('.L%s' % m.group(1)) + return found + + +def normalize_labels(asm): + decls = set() + label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") + for l in asm.splitlines(): + m = label_decl.match(l) + if m: + decls.add(m.group(0)) + if len(decls) == 0: + return asm + needs_dot = next(iter(decls))[0] != '.' + if not needs_dot: + return asm + for ld in decls: + asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) + return asm + + +def transform_labels(asm): + asm = normalize_labels(asm) + used_decls = find_used_labels(asm) + new_asm = '' + label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") + for l in asm.splitlines(): + m = label_decl.match(l) + if not m or m.group(0) in used_decls: + new_asm += l + new_asm += '\n' + return new_asm + + +def is_identifier(tk): + if len(tk) == 0: + return False + first = tk[0] + if not first.isalpha() and first != '_': + return False + for i in range(1, len(tk)): + c = tk[i] + if not c.isalnum() and c != '_': + return False + return True + +def process_identifiers(l): + """ + process_identifiers - process all identifiers and modify them to have + consistent names across all platforms; specifically across ELF and MachO. + For example, MachO inserts an additional understore at the beginning of + names. This function removes that. + """ + parts = re.split(r'([a-zA-Z0-9_]+)', l) + new_line = '' + for tk in parts: + if is_identifier(tk): + if tk.startswith('__Z'): + tk = tk[1:] + elif tk.startswith('_') and len(tk) > 1 and \ + tk[1].isalpha() and tk[1] != 'Z': + tk = tk[1:] + new_line += tk + return new_line + + +def process_asm(asm): + """ + Strip the ASM of unwanted directives and lines + """ + new_contents = '' + asm = transform_labels(asm) + + # TODO: Add more things we want to remove + discard_regexes = [ + re.compile("\s+\..*$"), # directive + re.compile("\s*#(NO_APP|APP)$"), #inline ASM + re.compile("\s*#.*$"), # comment line + re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive + re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), + ] + keep_regexes = [ + + ] + fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") + for l in asm.splitlines(): + # Remove Mach-O attribute + l = l.replace('@GOTPCREL', '') + add_line = True + for reg in discard_regexes: + if reg.match(l) is not None: + add_line = False + break + for reg in keep_regexes: + if reg.match(l) is not None: + add_line = True + break + if add_line: + if fn_label_def.match(l) and len(new_contents) != 0: + new_contents += '\n' + l = process_identifiers(l) + new_contents += l + new_contents += '\n' + return new_contents + +def main(): + parser = ArgumentParser( + description='generate a stripped assembly file') + parser.add_argument( + 'input', metavar='input', type=str, nargs=1, + help='An input assembly file') + parser.add_argument( + 'out', metavar='output', type=str, nargs=1, + help='The output file') + args, unknown_args = parser.parse_known_args() + input = args.input[0] + output = args.out[0] + if not os.path.isfile(input): + print(("ERROR: input file '%s' does not exist") % input) + sys.exit(1) + contents = None + with open(input, 'r') as f: + contents = f.read() + new_contents = process_asm(contents) + with open(output, 'w') as f: + f.write(new_contents) + + +if __name__ == '__main__': + main() + +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; +# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/benchmarking/BenchmarkArrayTransfer.cxx b/benchmarking/BenchmarkArrayTransfer.cxx index 55358c816..71b463f56 100644 --- a/benchmarking/BenchmarkArrayTransfer.cxx +++ b/benchmarking/BenchmarkArrayTransfer.cxx @@ -473,12 +473,25 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchExecToContReadWrite, int main(int argc, char* argv[]) { - // Parse VTK-m options: - auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp; - Config = vtkm::cont::Initialize(argc, argv, opts); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; - vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + // Initialize command line args + std::vector args(argv, argv + argc); + vtkm::bench::detail::InitializeArgs(&argc, args, opts); + + // Parse VTK-m options: + Config = vtkm::cont::Initialize(argc, args.data(), opts); + + // This occurs when it is help + if (opts == vtkm::cont::InitializeOptions::None) + { + std::cout << Config.Usage << std::endl; + } + else + { + vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + } // handle benchmarking related args and run benchmarks: - VTKM_EXECUTE_BENCHMARKS(argc, argv); + VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/BenchmarkAtomicArray.cxx b/benchmarking/BenchmarkAtomicArray.cxx index 48002aa1a..e32205867 100644 --- a/benchmarking/BenchmarkAtomicArray.cxx +++ b/benchmarking/BenchmarkAtomicArray.cxx @@ -506,11 +506,24 @@ VTKM_BENCHMARK_TEMPLATES_OPTS( int main(int argc, char* argv[]) { // Parse VTK-m options: - auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp; - Config = vtkm::cont::Initialize(argc, argv, opts); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; - vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + std::vector args(argv, argv + argc); + vtkm::bench::detail::InitializeArgs(&argc, args, opts); + + // Parse VTK-m options: + Config = vtkm::cont::Initialize(argc, args.data(), opts); + + // This occurs when it is help + if (opts == vtkm::cont::InitializeOptions::None) + { + std::cout << Config.Usage << std::endl; + } + else + { + vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + } // handle benchmarking related args and run benchmarks: - VTKM_EXECUTE_BENCHMARKS(argc, argv); + VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/BenchmarkCopySpeeds.cxx b/benchmarking/BenchmarkCopySpeeds.cxx index 8deddad47..1bd5fe244 100644 --- a/benchmarking/BenchmarkCopySpeeds.cxx +++ b/benchmarking/BenchmarkCopySpeeds.cxx @@ -95,11 +95,23 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(CopySpeed, int main(int argc, char* argv[]) { // Parse VTK-m options: - auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp; - Config = vtkm::cont::Initialize(argc, argv, opts); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; - // Setup device: - vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + std::vector args(argv, argv + argc); + vtkm::bench::detail::InitializeArgs(&argc, args, opts); + + // Parse VTK-m options: + Config = vtkm::cont::Initialize(argc, args.data(), opts); + + // This occurs when it is help + if (opts == vtkm::cont::InitializeOptions::None) + { + std::cout << Config.Usage << std::endl; + } + else + { + vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + } // Handle NumThreads command-line arg: #ifdef VTKM_ENABLE_TBB @@ -126,5 +138,5 @@ int main(int argc, char* argv[]) #endif // TBB // handle benchmarking related args and run benchmarks: - VTKM_EXECUTE_BENCHMARKS(argc, argv); + VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/BenchmarkDeviceAdapter.cxx b/benchmarking/BenchmarkDeviceAdapter.cxx index 049e93500..637a91982 100644 --- a/benchmarking/BenchmarkDeviceAdapter.cxx +++ b/benchmarking/BenchmarkDeviceAdapter.cxx @@ -39,6 +39,40 @@ namespace { +// Parametrize the input size samples for most of the benchmarks +// +// Define at compile time: +// +// Being VTKm_BENCHS_RANGE_LOWER_BOUNDARY b0 and, +// being VTKm_BENCHS_RANGE_UPPER_BOUNDARY b1 +// +// This will create the following sample sizes b0, b0*2^3, b0*2^6, ..., b1. +// +// Notice that setting up VTKm_BENCHS_RANGE_LOWER_BOUNDARY / VTKm_BENCHS_RANGE_UPPER_BOUNDARY +// will affect both ShortRange and FullRange. +// +#ifndef VTKm_BENCHS_RANGE_LOWER_BOUNDARY +#define FULL_RANGE_LOWER_BOUNDARY (1 << 12) // 4 KiB +#define SHORT_RANGE_LOWER_BOUNDARY (1 << 15) // 32 KiB + +#else +#define FULL_RANGE_LOWER_BOUNDARY (VTKm_BENCHS_RANGE_LOWER_BOUNDARY) +#define SHORT_RANGE_LOWER_BOUNDARY (VTKm_BENCHS_RANGE_LOWER_BOUNDARY) + +#endif + +#ifndef VTKm_BENCHS_RANGE_UPPER_BOUNDARY +#define FULL_RANGE_UPPER_BOUNDARY (1 << 27) // 128 MiB +#define SHORT_RANGE_UPPER_BOUNDARY (1 << 27) // 128 MiB +#define BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING (1 << 26) // 64 MiB + +#else +#define FULL_RANGE_UPPER_BOUNDARY (VTKm_BENCHS_RANGE_UPPER_BOUNDARY) +#define SHORT_RANGE_UPPER_BOUNDARY (VTKm_BENCHS_RANGE_UPPER_BOUNDARY) +#define BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING (VTKm_BENCHS_RANGE_UPPER_BOUNDARY) + +#endif + // Default sampling rate is x8 and always includes min/max, // so this will generate 7 samples at: // 1: 4 KiB @@ -47,15 +81,17 @@ namespace // 4: 2 MiB // 5: 16 MiB // 6: 128 MiB -static const std::pair FullRange{ 1 << 12, 1 << 27 }; // 4KiB, 128MiB +static const std::pair FullRange{ FULL_RANGE_LOWER_BOUNDARY, + FULL_RANGE_UPPER_BOUNDARY }; // Smaller range that can be used to reduce the number of benchmarks. Used // with `RangeMultiplier(SmallRangeMultiplier)`, this produces: // 1: 32 KiB // 2: 2 MiB // 3: 128 MiB -static const std::pair SmallRange{ 1 << 15, 1 << 27 }; // 4KiB, 128MiB -static constexpr int SmallRangeMultiplier = 1 << 21; // Ensure a sample at 2MiB +static const std::pair SmallRange{ SHORT_RANGE_LOWER_BOUNDARY, + SHORT_RANGE_UPPER_BOUNDARY }; +static constexpr int SmallRangeMultiplier = 1 << 21; // Ensure a sample at 2MiB using TypeList = vtkm::ListUseManualTime(); bm->ArgNames({ "Size", "C" }); @@ -393,6 +429,7 @@ void BenchCopy(benchmark::State& state) state.SetBytesProcessed(static_cast(numBytes) * iterations); state.SetItemsProcessed(static_cast(numValues) * iterations); }; + VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCopy, ->Ranges({ FullRange })->ArgName("Size"), TypeList); template @@ -534,7 +571,7 @@ void BenchCountSetBitsGenerator(benchmark::internal::Benchmark* bm) for (int64_t config = 0; config < 6; ++config) { - bm->Ranges({ FullRange, { config, config } }); + bm->Ranges({ { FullRange.first, FullRange.second }, { config, config } }); } } VTKM_BENCHMARK_APPLY(BenchCountSetBits, BenchCountSetBitsGenerator); @@ -1053,8 +1090,10 @@ void BenchmarkStableSortIndicesUniqueGenerator(benchmark::internal::Benchmark* b bm->ArgNames({ "Size", "%Uniq" }); for (int64_t pcntUnique = 0; pcntUnique <= 100; pcntUnique += 25) { - // Cap the max size here at 21 MiB. This sort is too slow. - bm->Ranges({ { SmallRange.first, 1 << 21 }, { pcntUnique, pcntUnique } }); + // Cap the max size here at 2 MiB. This sort is too slow. + const int64_t maxSize = 1 << 21; + bm->Ranges( + { { SmallRange.first, std::min(maxSize, SmallRange.second) }, { pcntUnique, pcntUnique } }); } } @@ -1167,12 +1206,23 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchUpperBounds, int main(int argc, char* argv[]) { - // Parse VTK-m options: - auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp; - Config = vtkm::cont::Initialize(argc, argv, opts); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; - // Setup device: - vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + std::vector args(argv, argv + argc); + vtkm::bench::detail::InitializeArgs(&argc, args, opts); + + // Parse VTK-m options: + Config = vtkm::cont::Initialize(argc, args.data(), opts); + + // This occurs when it is help + if (opts == vtkm::cont::InitializeOptions::None) + { + std::cout << Config.Usage << std::endl; + } + else + { + vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + } // Handle NumThreads command-line arg: #ifdef VTKM_ENABLE_TBB @@ -1199,5 +1249,5 @@ int main(int argc, char* argv[]) #endif // TBB // handle benchmarking related args and run benchmarks: - VTKM_EXECUTE_BENCHMARKS(argc, argv); + VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/BenchmarkFieldAlgorithms.cxx b/benchmarking/BenchmarkFieldAlgorithms.cxx index 4a607145f..a0817f2ca 100644 --- a/benchmarking/BenchmarkFieldAlgorithms.cxx +++ b/benchmarking/BenchmarkFieldAlgorithms.cxx @@ -942,12 +942,24 @@ VTKM_BENCHMARK(Bench2VirtualImplicitFunctions); int main(int argc, char* argv[]) { // Parse VTK-m options: - auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp; - Config = vtkm::cont::Initialize(argc, argv, opts); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; - // Setup device: - vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + std::vector args(argv, argv + argc); + vtkm::bench::detail::InitializeArgs(&argc, args, opts); + + // Parse VTK-m options: + Config = vtkm::cont::Initialize(argc, args.data(), opts); + + // This occurs when it is help + if (opts == vtkm::cont::InitializeOptions::None) + { + std::cout << Config.Usage << std::endl; + } + else + { + vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + } // handle benchmarking related args and run benchmarks: - VTKM_EXECUTE_BENCHMARKS(argc, argv); + VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/BenchmarkFilters.cxx b/benchmarking/BenchmarkFilters.cxx index 033bcaeb6..d3e3c6e85 100644 --- a/benchmarking/BenchmarkFilters.cxx +++ b/benchmarking/BenchmarkFilters.cxx @@ -1040,12 +1040,23 @@ void InitDataSet(int& argc, char** argv) int main(int argc, char* argv[]) { auto opts = vtkm::cont::InitializeOptions::RequireDevice; - Config = vtkm::cont::Initialize(argc, argv, opts); - // Setup device: - vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + std::vector args(argv, argv + argc); + vtkm::bench::detail::InitializeArgs(&argc, args, opts); - InitDataSet(argc, argv); + // Parse VTK-m options: + Config = vtkm::cont::Initialize(argc, args.data(), opts); + + // This occurs when it is help + if (opts == vtkm::cont::InitializeOptions::None) + { + std::cout << Config.Usage << std::endl; + } + else + { + vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + InitDataSet(argc, args.data()); + } const std::string dataSetSummary = []() -> std::string { std::ostringstream out; @@ -1054,5 +1065,5 @@ int main(int argc, char* argv[]) }(); // handle benchmarking related args and run benchmarks: - VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, dataSetSummary); + VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, args.data(), dataSetSummary); } diff --git a/benchmarking/BenchmarkRayTracing.cxx b/benchmarking/BenchmarkRayTracing.cxx index a8a9ffb6a..1b1585984 100644 --- a/benchmarking/BenchmarkRayTracing.cxx +++ b/benchmarking/BenchmarkRayTracing.cxx @@ -116,13 +116,24 @@ VTKM_BENCHMARK(BenchRayTracing); int main(int argc, char* argv[]) { - // Parse VTK-m options: - auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp; - Config = vtkm::cont::Initialize(argc, argv, opts); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; - // Setup device: - vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + std::vector args(argv, argv + argc); + vtkm::bench::detail::InitializeArgs(&argc, args, opts); + + // Parse VTK-m options: + Config = vtkm::cont::Initialize(argc, args.data(), opts); + + // This occurs when it is help + if (opts == vtkm::cont::InitializeOptions::None) + { + std::cout << Config.Usage << std::endl; + } + else + { + vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + } // handle benchmarking related args and run benchmarks: - VTKM_EXECUTE_BENCHMARKS(argc, argv); + VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/BenchmarkTopologyAlgorithms.cxx b/benchmarking/BenchmarkTopologyAlgorithms.cxx index 708028d4a..b55f5a783 100644 --- a/benchmarking/BenchmarkTopologyAlgorithms.cxx +++ b/benchmarking/BenchmarkTopologyAlgorithms.cxx @@ -380,12 +380,24 @@ VTKM_BENCHMARK_TEMPLATES(BenchClassificationDynamic, ValueTypes); int main(int argc, char* argv[]) { // Parse VTK-m options: - auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp; - Config = vtkm::cont::Initialize(argc, argv, opts); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; - // Setup device: - vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + std::vector args(argv, argv + argc); + vtkm::bench::detail::InitializeArgs(&argc, args, opts); + + // Parse VTK-m options: + Config = vtkm::cont::Initialize(argc, args.data(), opts); + + // This occurs when it is help + if (opts == vtkm::cont::InitializeOptions::None) + { + std::cout << Config.Usage << std::endl; + } + else + { + vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); + } // handle benchmarking related args and run benchmarks: - VTKM_EXECUTE_BENCHMARKS(argc, argv); + VTKM_EXECUTE_BENCHMARKS(argc, args.data()); } diff --git a/benchmarking/Benchmarker.h b/benchmarking/Benchmarker.h index f4035767e..0e44da5d2 100644 --- a/benchmarking/Benchmarker.h +++ b/benchmarking/Benchmarker.h @@ -388,6 +388,37 @@ static inline vtkm::Id ExecuteBenchmarks(int& argc, return static_cast(num); } + +void InitializeArgs(int* argc, std::vector& args, vtkm::cont::InitializeOptions& opts) +{ + bool isHelp = false; + + // Inject --help + if (*argc == 1) + { + const char* help = "--help"; // We want it to be static + args.push_back(const_cast(help)); + *argc = *argc + 1; + } + + args.push_back(nullptr); + + for (size_t i = 0; i < static_cast(*argc); ++i) + { + auto opt_s = std::string(args[i]); + if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h") + { + isHelp = true; + } + } + + if (!isHelp) + { + return; + } + + opts = vtkm::cont::InitializeOptions::None; +} } } } // end namespace vtkm::bench::detail diff --git a/benchmarking/CMakeLists.txt b/benchmarking/CMakeLists.txt index ba603e031..a504d395e 100644 --- a/benchmarking/CMakeLists.txt +++ b/benchmarking/CMakeLists.txt @@ -47,10 +47,17 @@ set(benchmarks BenchmarkTopologyAlgorithms ) +set(VTKm_BENCHS_RANGE_LOWER_BOUNDARY 4096 CACHE STRING "Smallest sample for input size bench for BenchmarkDeviceAdapter") +set(VTKm_BENCHS_RANGE_UPPER_BOUNDARY 134217728 CACHE STRING "Biggest sample for input size bench for BenchmarkDeviceAdapter") +mark_as_advanced(VTKm_BENCHS_RANGE_LOWER_BOUNDARY VTKm_BENCHS_RANGE_UPPER_BOUNDARY) + foreach (benchmark ${benchmarks}) add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_source vtkm_filter) endforeach () +target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_LOWER_BOUNDARY=${VTKm_BENCHS_RANGE_LOWER_BOUNDARY}) +target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_UPPER_BOUNDARY=${VTKm_BENCHS_RANGE_UPPER_BOUNDARY}) + if(TARGET vtkm_rendering) add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering) endif() diff --git a/benchmarking/README.md b/benchmarking/README.md new file mode 100644 index 000000000..a8e1b5948 --- /dev/null +++ b/benchmarking/README.md @@ -0,0 +1,120 @@ +# BENCHMARKING VTK-m + +## TL;DR + +When configuring _VTM-m_ with _CMake_ pass the flag `-DVTKm_ENABLE_BENCHMARKS=1` +. In the build directory you will see the following binaries: + + $ ls bin/Benchmark* + bin/BenchmarkArrayTransfer* bin/BenchmarkCopySpeeds* bin/BenchmarkFieldAlgorithms* + bin/BenchmarkRayTracing* bin/BenchmarkAtomicArray* bin/BenchmarkDeviceAdapter* + bin/BenchmarkFilters* bin/BenchmarkTopologyAlgorithms* + +Taking as an example `BenchmarkArrayTransfer`, we can run it as: + + $ bin/BenchmarkArrayTransfer -d Any + +--- + +## Parts of this Documents + +0. [TL;DR](#TL;DR) +1. [Devices](#choosing-devices) +2. [Filters](#run-a-subset-of-your-benchmarks) +4. [Compare with baseline](#compare-with-baseline) +5. [Installing compare.py](#installing-compare-benchmarkspy) + +--- + +## Choosing devices + +Taking as an example `BenchmarkArrayTransfer`, we can determine in which +device we can run it by simply: + + $ bin/BenchmarkArrayTransfer + ... + Valid devices: "Any" "Serial" + ... + +Upon the _Valid devices_ you can chose in which device to run the benchmark by: + + $ bin/BenchmarkArrayTransfer -d Serial + + +## Run a subset of your benchmarks + +_VTK-m_ benchmarks uses [Google Benchmarks] which allows you to choose a subset +of benchmaks by using the flag `--benchmark_filter=REGEX` + +For instance, if you want to run all the benchmarks that writes something you +would run: + + $ bin/BenchmarkArrayTransfer -d Serial --benchmark_filter='Write' + +Note you can list all of the available benchmarks with the option: +`--benchmark_list_tests`. + +## Compare with baseline + +_VTM-m_ ships with a helper script based in [Google Benchmarks] `compare.py` +named `compare-benchmarks.py` which lets you compare benchmarks using different +devices, filters, and binaries. After building `VTM-m` it must appear on the +`bin` directory within your `build` directory. + +When running `compare-benchmarks.py`: + - You can specify the baseline benchmark binary path and its arguments in + `--benchmark1=` + - The contender benchmark binary path and its arguments in `--benchmark2=` + - Extra options to be passed to `compare.py` must come after `--` + +### Compare between filters + +When comparing filters, we only can use one benchmark binary with a single device +as shown in the following example: + +```sh +$ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Any +--benchmark_filter=1024' --filter1='Read' --filter2=Write -- filters + +# It will output something like this: + +Benchmark Time CPU Time Old Time New CPU Old CPU New +--------------------------------------------------------------------------------------------------------------------------------------------------------------- +BenchContToExec[Read vs. Write]/Bytes:1024/manual_time +0.2694 +0.2655 18521 23511 18766 23749 +BenchExecToCont[Read vs. Write]/Bytes:1024/manual_time +0.0212 +0.0209 25910 26460 26152 26698 +``` + +### Compare between devices + +When comparing two benchmarks using two devices use the _option_ `benchmark` +after `--` and call `./compare-benchmarks.py` as follows: + +```sh +$ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Serial +--benchmark_filter=1024' --benchmark2='./BenchmarkArrayTransfer -d Cuda +--benchmark_filter=1024' -- benchmarks + + +# It will output something like this: + +Benchmark Time CPU Time Old Time New CPU Old CPU New +--------------------------------------------------------------------------------------------------------------------------------------------------- +BenchContToExecRead/Bytes:1024/manual_time +0.0127 +0.0120 18388 18622 18632 18856 +BenchContToExecWrite/Bytes:1024/manual_time +0.0010 +0.0006 23471 23496 23712 23726 +BenchContToExecReadWrite/Bytes:1024/manual_time -0.0034 -0.0041 26363 26274 26611 26502 +BenchRoundTripRead/Bytes:1024/manual_time +0.0055 +0.0056 20635 20748 21172 21291 +BenchRoundTripReadWrite/Bytes:1024/manual_time +0.0084 +0.0082 29288 29535 29662 29905 +BenchExecToContRead/Bytes:1024/manual_time +0.0025 +0.0021 25883 25947 26122 26178 +BenchExecToContWrite/Bytes:1024/manual_time -0.0027 -0.0038 26375 26305 26622 26522 +BenchExecToContReadWrite/Bytes:1024/manual_time +0.0041 +0.0039 25639 25745 25871 25972 +``` + +## Installing compare-benchmarks.py + +`compare-benchmarks.py` relies on `compare.py` from Google Benchmarks which also +relies in `SciPy`, you can find instructions [here][SciPy] regarding its +installation. + +[Google Benchmarks]: https://github.com/google/benchmark +[Compare.py]: https://github.com/google/benchmark/blob/master/tools/compare.py +[SciPy]: https://www.scipy.org/install.html