benchmarks: pass unparsed args to Google benchmark

- It also adds Google's benchmarch compare.py script - It is installed to the build directory. - It add a wrapper script called compare-benchmarks.py which: - Let you run each of the benchmarks with different devices - It adds a README.md explaining how to run the benchmarks - BenchmarkDeviceAdapter input size range parametrized at compile time Signed-off-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com>
2024-10-05 01:49:02 +00:00 · 2020-03-19 15:15:32 -04:00 · 2020-03-19 15:15:32 -04:00 · b05bd33d3c
commit b05bd33d3c
parent 098c50b382
20 changed files with 1713 additions and 472 deletions
--- a/Utilities/Scripts/benchCompare.py
+++ b/Utilities/Scripts/benchCompare.py
@ -1,157 +0,0 @@
 #!/usr/bin/env python3
 #
 # Compares the output from BenchmarkDeviceAdapter from the serial
 # device to a parallel device and prints a table containing the results.
 #
 # Example usage:
 #
 # $ BenchmarkDeviceAdapter_SERIAL > serial.out
 # $ BenchmarkDeviceAdapter_TBB > tbb.out
 # $ benchCompare.py serial.out tbb.out
 #
 #
 # The number of threads (optional -- only used to generate the "Warn" column)
 maxThreads = 4
 #
 # Print debugging output:
 doDebug = False
 #
 # End config options.
 import re
 import sys
 assert(len(sys.argv) == 3)
 def debug(str):
  if (doDebug): print(str)
 # Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
 typeParser = re.compile("\\*{3} ([^*]+) on device ([^*]+) \\*{15}")
 # Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
 nameParser = re.compile("Benchmark '([^-]+)' results:")
 # Parses "mean = 0.0125s" --> 0.0125
 meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
 # Parses "std dev = 0.0125s" --> 0.0125
 stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
 serialFilename = sys.argv[1]
 parallelFilename = sys.argv[2]
 serialFile = open(serialFilename, 'r')
 parallelFile = open(parallelFilename, 'r')
 class BenchKey:
  def __init__(self, name_, type_):
    self.name = name_
    self.type = type_
  def __eq__(self, other):
    return self.name == other.name and self.type == other.type
  def __lt__(self, other):
    if self.name < other.name: return True
    elif self.name > other.name: return False
    else: return self.type < other.type
  def __hash__(self):
    return (self.name + self.type).__hash__()
 class BenchData:
  def __init__(self, mean_, stdDev_):
    self.mean = mean_
    self.stdDev = stdDev_
 def parseFile(f, benchmarks):
  type = ""
  bench = ""
  mean = -1.
  stdDev = -1.
  for line in f:
    debug("Line: {}".format(line))
    typeRes = typeParser.match(line)
    if typeRes:
      type = typeRes.group(1)
      debug("Found type: {}".format(type))
      continue
    nameRes = nameParser.match(line)
    if nameRes:
      name = nameRes.group(1)
      debug("Found name: {}".format(name))
      continue
    meanRes = meanParser.match(line)
    if meanRes:
      mean = float(meanRes.group(1))
      debug("Found mean: {}".format(mean))
      continue
    stdDevRes = stdDevParser.match(line)
    if stdDevRes:
      stdDev = float(stdDevRes.group(1))
      debug("Found stddev: {}".format(stdDev))
      # stdDev is always the last parse for a given benchmark, add entry now
      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
      debug("{} records found.".format(len(benchmarks)))
      mean = -1.
      stdDev = -1.
      continue
 serialBenchmarks = {}
 parallelBenchmarks = {}
 parseFile(serialFile, serialBenchmarks)
 parseFile(parallelFile, parallelBenchmarks)
 serialKeys = set(serialBenchmarks.keys())
 parallelKeys = set(parallelBenchmarks.keys())
 commonKeys = sorted(list(serialKeys.intersection(parallelKeys)))
 serialOnlyKeys = sorted(list(serialKeys.difference(parallelKeys)))
 parallelOnlyKeys = sorted(list(parallelKeys.difference(serialKeys)))
 debug("{} serial keys\n{} parallel keys\n{} common keys\n{} serialOnly keys\n{} parallelOnly keys.".format(
        len(serialKeys), len(parallelKeys), len(commonKeys), len(serialOnlyKeys), len(parallelOnlyKeys)))
 if len(serialOnlyKeys) > 0:
  print("Keys found only in serial:")
  for k in serialOnlyKeys:
    print("%s (%s)"%(k.name, k.type))
  print("")
 if len(parallelOnlyKeys) > 0:
  print("Keys found only in parallel:")
  for k in parallelOnlyKeys:
    print("%s (%s)"%(k.name, k.type))
  print("")
 print("Comparison:")
 print("| %7s | %4s | %8s    %8s | %8s    %8s | %s (%s) |"%(
        "Speedup", "Warn", "serial", "", "parallel", "", "Benchmark", "Type"))
 print("|-%7s-|-%4s-|-%8s----%8s-|-%8s----%8s-|-%s--%s--|"%(
        "-"*7, "-"*4, "-"*8, "-"*8, "-"*8, "-"*8, "-"*9, "-"*4))
 for key in commonKeys:
  sData = serialBenchmarks[key]
  pData = parallelBenchmarks[key]
  speedup = sData.mean / pData.mean if pData.mean != 0. else 0.
  if speedup > maxThreads * .9:
    flag = "    "
  elif speedup > maxThreads * .75:
    flag = "!   "
  elif speedup > maxThreads * .5:
    flag = "!!  "
  elif speedup > maxThreads * .25:
    flag = "!!! "
  else:
    flag = "!!!!"
  print("| %7.3f | %4s | %08.6f +- %08.6f | %08.6f +- %08.6f | %s (%s) |"%(
          speedup, flag, sData.mean, sData.stdDev, pData.mean, pData.stdDev, key.name, key.type))
--- a/Utilities/Scripts/benchSummary.py
+++ b/Utilities/Scripts/benchSummary.py
@ -1,111 +0,0 @@
 #!/usr/bin/env python
 #
 # Prints a concise summary of a benchmark output as a TSV blob.
 #
 # Example usage:
 #
 # $ BenchmarkXXX_DEVICE > bench.out
 # $ benchSummary.py bench.out
 #
 # Options SortByType, SortByName, or SortByMean may be passed after the
 # filename to sort the output by the indicated quantity. If no sort option
 # is provided, the output order matches the input. If multiple options are
 # specified, the list will be sorted repeatedly in the order requested.
 import re
 import sys
 assert(len(sys.argv) >= 2)
 # Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
 typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
 # Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
 nameParser = re.compile("Benchmark '([^-]+)' results:")
 # Parses "mean = 0.0125s" --> 0.0125
 meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
 # Parses "std dev = 0.0125s" --> 0.0125
 stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
 filename = sys.argv[1]
 benchFile = open(filename, 'r')
 sortOpt = None
 if len(sys.argv) > 2:
  sortOpt = sys.argv[2:]
 class BenchKey:
  def __init__(self, name_, type_):
    self.name = name_
    self.type = type_
  def __eq__(self, other):
    return self.name == other.name and self.type == other.type
  def __lt__(self, other):
    if self.name < other.name: return True
    elif self.name > other.name: return False
    else: return self.type < other.type
  def __hash__(self):
    return (self.name + self.type).__hash__()
 class BenchData:
  def __init__(self, mean_, stdDev_):
    self.mean = mean_
    self.stdDev = stdDev_
 def parseFile(f, benchmarks):
  type = ""
  bench = ""
  mean = -1.
  stdDev = -1.
  for line in f:
    typeRes = typeParser.match(line)
    if typeRes:
      type = typeRes.group(1)
      continue
    nameRes = nameParser.match(line)
    if nameRes:
      name = nameRes.group(1)
      continue
    meanRes = meanParser.match(line)
    if meanRes:
      mean = float(meanRes.group(1))
      continue
    stdDevRes = stdDevParser.match(line)
    if stdDevRes:
      stdDev = float(stdDevRes.group(1))
      # stdDev is always the last parse for a given benchmark, add entry now
      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
      mean = -1.
      stdDev = -1.
      continue
 benchmarks = {}
 parseFile(benchFile, benchmarks)
 # Sort keys by type:
 keys = benchmarks.keys()
 if sortOpt:
  for opt in sortOpt:
    if opt.lower() == "sortbytype":
      keys = sorted(keys, key=lambda k: k.type)
    elif opt.lower() == "sortbyname":
      keys = sorted(keys, key=lambda k: k.name)
    elif opt.lower() == "sortbymean":
      keys = sorted(keys, key=lambda k: benchmarks[k].mean)
 print("# Summary: (%s)"%filename)
 print("%-9s\t%-9s\t%-9s\t%-s"%("Mean", "Stdev", "Stdev%", "Benchmark (type)"))
 for key in keys:
  data = benchmarks[key]
  print("%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.mean, data.stdDev, data.stdDev / data.mean * 100., key.name, key.type))
--- a/Utilities/Scripts/benchSummaryWithBaselines.py
+++ b/Utilities/Scripts/benchSummaryWithBaselines.py
@ -1,156 +0,0 @@
 #!/usr/bin/env python
 #
 # Prints a concise summary of a benchmark output as a TSV blob. Benchmarks are
 # expected to have "Baseline" in the name, and a matching benchmark with the
 # same name but Baseline replaced with something else. For example,
 #
 # Baseline benchmark name: "Some benchmark: Baseline, Size=4"
 # Test benchmark name:     "Some benchmark: Blahblah, Size=4"
 #
 # The output will print the baseline, test, and overhead times for the
 # benchmarks.
 #
 # Example usage:
 #
 # $ BenchmarkXXX_DEVICE > bench.out
 # $ benchSummaryWithBaselines.py bench.out
 #
 # Options SortByType, SortByName, SortByOverhead, or SortByRatio
 # (testtime/baseline) may be passed after the filename to sort the output by
 # the indicated quantity. If no sort option is provided, the output order
 # matches the input. If multiple options are specified, the list will be sorted
 # repeatedly in the order requested.
 import re
 import sys
 assert(len(sys.argv) >= 2)
 # Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
 typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
 # Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
 nameParser = re.compile("Benchmark '([^-]+)' results:")
 # Parses "mean = 0.0125s" --> 0.0125
 meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
 # Parses "std dev = 0.0125s" --> 0.0125
 stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
 # Parses "SomeText Baseline Other Text" --> ("SomeText ", " Other Text")
 baselineParser = re.compile("(.*)Baseline(.*)")
 filename = sys.argv[1]
 benchFile = open(filename, 'r')
 sortOpt = None
 if len(sys.argv) > 2:
  sortOpt = sys.argv[2:]
 class BenchKey:
  def __init__(self, name_, type_):
    self.name = name_
    self.type = type_
  def __eq__(self, other):
    return self.name == other.name and self.type == other.type
  def __lt__(self, other):
    if self.name < other.name: return True
    elif self.name > other.name: return False
    else: return self.type < other.type
  def __hash__(self):
    return (self.name + self.type).__hash__()
 class BenchData:
  def __init__(self, mean_, stdDev_):
    self.mean = mean_
    self.stdDev = stdDev_
 def parseFile(f, benchmarks):
  type = ""
  bench = ""
  mean = -1.
  stdDev = -1.
  for line in f:
    typeRes = typeParser.match(line)
    if typeRes:
      type = typeRes.group(1)
      continue
    nameRes = nameParser.match(line)
    if nameRes:
      name = nameRes.group(1)
      continue
    meanRes = meanParser.match(line)
    if meanRes:
      mean = float(meanRes.group(1))
      continue
    stdDevRes = stdDevParser.match(line)
    if stdDevRes:
      stdDev = float(stdDevRes.group(1))
      # stdDev is always the last parse for a given benchmark, add entry now
      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
      mean = -1.
      stdDev = -1.
      continue
 class BaselinedBenchData:
  def __init__(self, baseline, test):
    self.baseline = baseline.mean
    self.test = test.mean
    self.overhead = test.mean - baseline.mean
 def findBaselines(benchmarks):
  result = {}
  for baseKey in benchmarks.keys():
    # Look for baseline entries
    baselineRes = baselineParser.match(baseKey.name)
    if baselineRes:
      prefix = baselineRes.group(1)
      suffix = baselineRes.group(2)
      # Find the test entry matching the baseline:
      for testKey in benchmarks.keys():
        if baseKey.type != testKey.type: # Need same type
          continue
        if baseKey.name == testKey.name: # Skip the base key
          continue
        if testKey.name.startswith(prefix) and testKey.name.endswith(suffix):
          newName = (prefix + suffix).replace(", ,", ",")
          newKey = BenchKey(newName, testKey.type)
          newVal = BaselinedBenchData(benchmarks[baseKey], benchmarks[testKey])
          result[newKey] = newVal
  return result
 benchmarks = {}
 parseFile(benchFile, benchmarks)
 benchmarks = findBaselines(benchmarks)
 # Sort keys by type:
 keys = benchmarks.keys()
 if sortOpt:
  for opt in sortOpt:
    if opt.lower() == "sortbytype":
      keys = sorted(keys, key=lambda k: k.type)
    elif opt.lower() == "sortbyname":
      keys = sorted(keys, key=lambda k: k.name)
    elif opt.lower() == "sortbyoverhead":
      keys = sorted(keys, key=lambda k: benchmarks[k].overhead)
    elif opt.lower() == "sortbyratio":
      keys = sorted(keys, key=lambda k: benchmarks[k].overhead / benchmarks[k].baseline)
 print("# Summary: (%s)"%filename)
 print("%-9s\t%-9s\t%-9s\t%-9s\t%-s"%("Baseline", "TestTime", "Overhead", "Test/Base", "Benchmark (type)"))
 for key in keys:
  data = benchmarks[key]
  print("%9.6f\t%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.baseline, data.test,
        data.overhead, data.test / data.baseline, key.name, key.type))
--- a/Utilities/Scripts/compare-benchmarks.py
+++ b/Utilities/Scripts/compare-benchmarks.py
@ -0,0 +1,101 @@
 #!/usr/bin/env python3
 """
 compare-benchmarks.py - VTKm + Google Benchmarks compare.py
 """
 import getopt
 import subprocess
 import sys
 import time
 import os
 CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
 COMPARE_PY_PATH = os.path.join(CURRENT_DIR, 'compare.py')
 COMPARE_PY = sys.executable + " " + COMPARE_PY_PATH
 class Bench():
    def __init__(self):
        self.__cmd = None
    @property
    def cmd(self):
        return self.__cmd
    @cmd.setter
    def cmd(self, c):
        self.__cmd = c
    def launch(self):
        output_file = "bench-%d.json" % time.time()
        cmd_exec = "%s --benchmark_out=%s --benchmark_out_format=json" \
                % (self.cmd, output_file)
        print(cmd_exec)
        subprocess.call(cmd_exec, shell=True)
        return output_file
 def print_help(error_msg = None):
    if error_msg != None:
        print(error_msg)
    print("usage: compare-benchmarks <opts>\n" \
            " --benchmark1='<benchmark1> [arg1] [arg2] ...'"\
            " [--filter1=<filter1>]\n"\
            " --benchmark2='<benchmark2> [arg1] [arg2] ...'"\
            " [--filter2=<filter2>]\n"\
            " -- [-opt] benchmarks|filters|benchmarksfiltered\n\n" \
            "compare.py help:")
    subprocess.call(COMPARE_PY, shell=True)
    sys.exit(0)
 # -----------------------------------------------------------------------------
 def main():
    is_filters = False
    filter1 = str()
    filter2 = str()
    bench1 = Bench()
    bench2 = Bench()
    options, remainder = getopt.gnu_getopt(sys.argv[1:], '',
            ['help','benchmark1=', 'benchmark2=', 'filter1=', 'filter2='])
    for opt, arg in options:
        if opt == "--benchmark1":
            bench1.cmd = arg
        if opt == "--benchmark2":
            bench2.cmd = arg
        if opt == "--filter1":
            filter1 = arg
        if opt == "--filter2":
            filter2 = arg
        if opt == "--help":
            print_help()
    if bench1.cmd == None:
        print_help("ERROR: no benchmarks chosen")
    for arg in remainder:
        if arg == "filters":
           is_filters = True
    if is_filters and bench2.cmd != None:
        print_help("ERROR: filters option can only accept --benchmark1= and --filter1")
    b1_output = bench1.launch()
    b2_output = bench2.launch() if not is_filters else filter1 + " " + filter2
    cmd = "%s %s %s %s" % (COMPARE_PY, " ".join(remainder), b1_output, b2_output)
    print(cmd)
    subprocess.call(cmd, shell=True)
    os.remove(b1_output)
    if not is_filters:
        os.remove(b2_output)
 if  __name__ == '__main__':
    main()
--- a/Utilities/Scripts/compare.py
+++ b/Utilities/Scripts/compare.py
@ -0,0 +1,408 @@
 #!/usr/bin/env python
 import unittest
 """
 compare.py - versatile benchmark output compare tool
 """
 import argparse
 from argparse import ArgumentParser
 import sys
 import gbench
 from gbench import util, report
 from gbench.util import *
 def check_inputs(in1, in2, flags):
    """
    Perform checking on the user provided inputs and diagnose any abnormalities
    """
    in1_kind, in1_err = classify_input_file(in1)
    in2_kind, in2_err = classify_input_file(in2)
    output_file = find_benchmark_flag('--benchmark_out=', flags)
    output_type = find_benchmark_flag('--benchmark_out_format=', flags)
    if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
        print(("WARNING: '--benchmark_out=%s' will be passed to both "
               "benchmarks causing it to be overwritten") % output_file)
    if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
        print("WARNING: passing optional flags has no effect since both "
              "inputs are JSON")
    if output_type is not None and output_type != 'json':
        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
               " is not supported.") % output_type)
        sys.exit(1)
 def create_parser():
    parser = ArgumentParser(
        description='versatile benchmark output compare tool')
    parser.add_argument(
        '-a',
        '--display_aggregates_only',
        dest='display_aggregates_only',
        action="store_true",
        help="If there are repetitions, by default, we display everything - the"
             " actual runs, and the aggregates computed. Sometimes, it is "
             "desirable to only view the aggregates. E.g. when there are a lot "
             "of repetitions. Do note that only the display is affected. "
             "Internally, all the actual runs are still used, e.g. for U test.")
    utest = parser.add_argument_group()
    utest.add_argument(
        '--no-utest',
        dest='utest',
        default=True,
        action="store_false",
        help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS))
    alpha_default = 0.05
    utest.add_argument(
        "--alpha",
        dest='utest_alpha',
        default=alpha_default,
        type=float,
        help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") %
        alpha_default)
    subparsers = parser.add_subparsers(
        help='This tool has multiple modes of operation:',
        dest='mode')
    parser_a = subparsers.add_parser(
        'benchmarks',
        help='The most simple use-case, compare all the output of these two benchmarks')
    baseline = parser_a.add_argument_group(
        'baseline', 'The benchmark baseline')
    baseline.add_argument(
        'test_baseline',
        metavar='test_baseline',
        type=argparse.FileType('r'),
        nargs=1,
        help='A benchmark executable or JSON output file')
    contender = parser_a.add_argument_group(
        'contender', 'The benchmark that will be compared against the baseline')
    contender.add_argument(
        'test_contender',
        metavar='test_contender',
        type=argparse.FileType('r'),
        nargs=1,
        help='A benchmark executable or JSON output file')
    parser_a.add_argument(
        'benchmark_options',
        metavar='benchmark_options',
        nargs=argparse.REMAINDER,
        help='Arguments to pass when running benchmark executables')
    parser_b = subparsers.add_parser(
        'filters', help='Compare filter one with the filter two of benchmark')
    baseline = parser_b.add_argument_group(
        'baseline', 'The benchmark baseline')
    baseline.add_argument(
        'test',
        metavar='test',
        type=argparse.FileType('r'),
        nargs=1,
        help='A benchmark executable or JSON output file')
    baseline.add_argument(
        'filter_baseline',
        metavar='filter_baseline',
        type=str,
        nargs=1,
        help='The first filter, that will be used as baseline')
    contender = parser_b.add_argument_group(
        'contender', 'The benchmark that will be compared against the baseline')
    contender.add_argument(
        'filter_contender',
        metavar='filter_contender',
        type=str,
        nargs=1,
        help='The second filter, that will be compared against the baseline')
    parser_b.add_argument(
        'benchmark_options',
        metavar='benchmark_options',
        nargs=argparse.REMAINDER,
        help='Arguments to pass when running benchmark executables')
    parser_c = subparsers.add_parser(
        'benchmarksfiltered',
        help='Compare filter one of first benchmark with filter two of the second benchmark')
    baseline = parser_c.add_argument_group(
        'baseline', 'The benchmark baseline')
    baseline.add_argument(
        'test_baseline',
        metavar='test_baseline',
        type=argparse.FileType('r'),
        nargs=1,
        help='A benchmark executable or JSON output file')
    baseline.add_argument(
        'filter_baseline',
        metavar='filter_baseline',
        type=str,
        nargs=1,
        help='The first filter, that will be used as baseline')
    contender = parser_c.add_argument_group(
        'contender', 'The benchmark that will be compared against the baseline')
    contender.add_argument(
        'test_contender',
        metavar='test_contender',
        type=argparse.FileType('r'),
        nargs=1,
        help='The second benchmark executable or JSON output file, that will be compared against the baseline')
    contender.add_argument(
        'filter_contender',
        metavar='filter_contender',
        type=str,
        nargs=1,
        help='The second filter, that will be compared against the baseline')
    parser_c.add_argument(
        'benchmark_options',
        metavar='benchmark_options',
        nargs=argparse.REMAINDER,
        help='Arguments to pass when running benchmark executables')
    return parser
 def main():
    # Parse the command line flags
    parser = create_parser()
    args, unknown_args = parser.parse_known_args()
    if args.mode is None:
        parser.print_help()
        exit(1)
    assert not unknown_args
    benchmark_options = args.benchmark_options
    if args.mode == 'benchmarks':
        test_baseline = args.test_baseline[0].name
        test_contender = args.test_contender[0].name
        filter_baseline = ''
        filter_contender = ''
        # NOTE: if test_baseline == test_contender, you are analyzing the stdev
        description = 'Comparing %s to %s' % (test_baseline, test_contender)
    elif args.mode == 'filters':
        test_baseline = args.test[0].name
        test_contender = args.test[0].name
        filter_baseline = args.filter_baseline[0]
        filter_contender = args.filter_contender[0]
        # NOTE: if filter_baseline == filter_contender, you are analyzing the
        # stdev
        description = 'Comparing %s to %s (from %s)' % (
            filter_baseline, filter_contender, args.test[0].name)
    elif args.mode == 'benchmarksfiltered':
        test_baseline = args.test_baseline[0].name
        test_contender = args.test_contender[0].name
        filter_baseline = args.filter_baseline[0]
        filter_contender = args.filter_contender[0]
        # NOTE: if test_baseline == test_contender and
        # filter_baseline == filter_contender, you are analyzing the stdev
        description = 'Comparing %s (from %s) to %s (from %s)' % (
            filter_baseline, test_baseline, filter_contender, test_contender)
    else:
        # should never happen
        print("Unrecognized mode of operation: '%s'" % args.mode)
        parser.print_help()
        exit(1)
    check_inputs(test_baseline, test_contender, benchmark_options)
    if args.display_aggregates_only:
        benchmark_options += ['--benchmark_display_aggregates_only=true']
    options_baseline = []
    options_contender = []
    if filter_baseline and filter_contender:
        options_baseline = ['--benchmark_filter=%s' % filter_baseline]
        options_contender = ['--benchmark_filter=%s' % filter_contender]
    # Run the benchmarks and report the results
    json1 = json1_orig = gbench.util.run_or_load_benchmark(
        test_baseline, benchmark_options + options_baseline)
    json2 = json2_orig = gbench.util.run_or_load_benchmark(
        test_contender, benchmark_options + options_contender)
    # Now, filter the benchmarks so that the difference report can work
    if filter_baseline and filter_contender:
        replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
        json1 = gbench.report.filter_benchmark(
            json1_orig, filter_baseline, replacement)
        json2 = gbench.report.filter_benchmark(
            json2_orig, filter_contender, replacement)
    # Diff and output
    output_lines = gbench.report.generate_difference_report(
        json1, json2, args.display_aggregates_only,
        args.utest, args.utest_alpha)
    print(description)
    for ln in output_lines:
        print(ln)
 class TestParser(unittest.TestCase):
    def setUp(self):
        self.parser = create_parser()
        testInputs = os.path.join(
            os.path.dirname(
                os.path.realpath(__file__)),
            'gbench',
            'Inputs')
        self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
        self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
    def test_benchmarks_basic(self):
        parsed = self.parser.parse_args(
            ['benchmarks', self.testInput0, self.testInput1])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'benchmarks')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertFalse(parsed.benchmark_options)
    def test_benchmarks_basic_without_utest(self):
        parsed = self.parser.parse_args(
            ['--no-utest', 'benchmarks', self.testInput0, self.testInput1])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertFalse(parsed.utest)
        self.assertEqual(parsed.utest_alpha, 0.05)
        self.assertEqual(parsed.mode, 'benchmarks')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertFalse(parsed.benchmark_options)
    def test_benchmarks_basic_display_aggregates_only(self):
        parsed = self.parser.parse_args(
            ['-a', 'benchmarks', self.testInput0, self.testInput1])
        self.assertTrue(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'benchmarks')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertFalse(parsed.benchmark_options)
    def test_benchmarks_basic_with_utest_alpha(self):
        parsed = self.parser.parse_args(
            ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.utest_alpha, 0.314)
        self.assertEqual(parsed.mode, 'benchmarks')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertFalse(parsed.benchmark_options)
    def test_benchmarks_basic_without_utest_with_utest_alpha(self):
        parsed = self.parser.parse_args(
            ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertFalse(parsed.utest)
        self.assertEqual(parsed.utest_alpha, 0.314)
        self.assertEqual(parsed.mode, 'benchmarks')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertFalse(parsed.benchmark_options)
    def test_benchmarks_with_remainder(self):
        parsed = self.parser.parse_args(
            ['benchmarks', self.testInput0, self.testInput1, 'd'])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'benchmarks')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertEqual(parsed.benchmark_options, ['d'])
    def test_benchmarks_with_remainder_after_doubleminus(self):
        parsed = self.parser.parse_args(
            ['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'benchmarks')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertEqual(parsed.benchmark_options, ['e'])
    def test_filters_basic(self):
        parsed = self.parser.parse_args(
            ['filters', self.testInput0, 'c', 'd'])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'filters')
        self.assertEqual(parsed.test[0].name, self.testInput0)
        self.assertEqual(parsed.filter_baseline[0], 'c')
        self.assertEqual(parsed.filter_contender[0], 'd')
        self.assertFalse(parsed.benchmark_options)
    def test_filters_with_remainder(self):
        parsed = self.parser.parse_args(
            ['filters', self.testInput0, 'c', 'd', 'e'])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'filters')
        self.assertEqual(parsed.test[0].name, self.testInput0)
        self.assertEqual(parsed.filter_baseline[0], 'c')
        self.assertEqual(parsed.filter_contender[0], 'd')
        self.assertEqual(parsed.benchmark_options, ['e'])
    def test_filters_with_remainder_after_doubleminus(self):
        parsed = self.parser.parse_args(
            ['filters', self.testInput0, 'c', 'd', '--', 'f'])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'filters')
        self.assertEqual(parsed.test[0].name, self.testInput0)
        self.assertEqual(parsed.filter_baseline[0], 'c')
        self.assertEqual(parsed.filter_contender[0], 'd')
        self.assertEqual(parsed.benchmark_options, ['f'])
    def test_benchmarksfiltered_basic(self):
        parsed = self.parser.parse_args(
            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'benchmarksfiltered')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.filter_baseline[0], 'c')
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertEqual(parsed.filter_contender[0], 'e')
        self.assertFalse(parsed.benchmark_options)
    def test_benchmarksfiltered_with_remainder(self):
        parsed = self.parser.parse_args(
            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'benchmarksfiltered')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.filter_baseline[0], 'c')
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertEqual(parsed.filter_contender[0], 'e')
        self.assertEqual(parsed.benchmark_options[0], 'f')
    def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
        parsed = self.parser.parse_args(
            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
        self.assertFalse(parsed.display_aggregates_only)
        self.assertTrue(parsed.utest)
        self.assertEqual(parsed.mode, 'benchmarksfiltered')
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
        self.assertEqual(parsed.filter_baseline[0], 'c')
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
        self.assertEqual(parsed.filter_contender[0], 'e')
        self.assertEqual(parsed.benchmark_options[0], 'g')
 if __name__ == '__main__':
    # unittest.main()
    main()
 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
 # kate: indent-mode python; remove-trailing-spaces modified;
--- a/Utilities/Scripts/gbench/init.py
+++ b/Utilities/Scripts/gbench/init.py
@ -0,0 +1,8 @@
 """Google Benchmark tooling"""
 __author__ = 'Eric Fiselier'
 __email__ = 'eric@efcs.ca'
 __versioninfo__ = (0, 5, 0)
 __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
 __all__ = []
--- a/Utilities/Scripts/gbench/report.py
+++ b/Utilities/Scripts/gbench/report.py
@ -0,0 +1,541 @@
 import unittest
 """report.py - Utilities for reporting statistics about benchmark results
 """
 import os
 import re
 import copy
 from scipy.stats import mannwhitneyu
 class BenchmarkColor(object):
    def __init__(self, name, code):
        self.name = name
        self.code = code
    def __repr__(self):
        return '%s%r' % (self.__class__.__name__,
                         (self.name, self.code))
    def __format__(self, format):
        return self.code
 # Benchmark Colors Enumeration
 BC_NONE = BenchmarkColor('NONE', '')
 BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
 BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
 BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
 BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
 BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
 BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
 BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
 BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
 BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
 BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
 BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
 UTEST_MIN_REPETITIONS = 2
 UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
 UTEST_COL_NAME = "_pvalue"
 def color_format(use_color, fmt_str, *args, **kwargs):
    """
    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
    is False then all color codes in 'args' and 'kwargs' are replaced with
    the empty string.
    """
    assert use_color is True or use_color is False
    if not use_color:
        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
                for arg in args]
        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
                  for key, arg in kwargs.items()}
    return fmt_str.format(*args, **kwargs)
 def find_longest_name(benchmark_list):
    """
    Return the length of the longest benchmark name in a given list of
    benchmark JSON objects
    """
    longest_name = 1
    for bc in benchmark_list:
        if len(bc['name']) > longest_name:
            longest_name = len(bc['name'])
    return longest_name
 def calculate_change(old_val, new_val):
    """
    Return a float representing the decimal change between old_val and new_val.
    """
    if old_val == 0 and new_val == 0:
        return 0.0
    if old_val == 0:
        return float(new_val - old_val) / (float(old_val + new_val) / 2)
    return float(new_val - old_val) / abs(old_val)
 def filter_benchmark(json_orig, family, replacement=""):
    """
    Apply a filter to the json, and only leave the 'family' of benchmarks.
    """
    regex = re.compile(family)
    filtered = {}
    filtered['benchmarks'] = []
    for be in json_orig['benchmarks']:
        if not regex.search(be['name']):
            continue
        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
        filtered['benchmarks'].append(filteredbench)
    return filtered
 def get_unique_benchmark_names(json):
    """
    While *keeping* the order, give all the unique 'names' used for benchmarks.
    """
    seen = set()
    uniqued = [x['name'] for x in json['benchmarks']
               if x['name'] not in seen and
               (seen.add(x['name']) or True)]
    return uniqued
 def intersect(list1, list2):
    """
    Given two lists, get a new list consisting of the elements only contained
    in *both of the input lists*, while preserving the ordering.
    """
    return [x for x in list1 if x in list2]
 def is_potentially_comparable_benchmark(x):
    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
 def partition_benchmarks(json1, json2):
    """
    While preserving the ordering, find benchmarks with the same names in
    both of the inputs, and group them.
    (i.e. partition/filter into groups with common name)
    """
    json1_unique_names = get_unique_benchmark_names(json1)
    json2_unique_names = get_unique_benchmark_names(json2)
    names = intersect(json1_unique_names, json2_unique_names)
    partitions = []
    for name in names:
        time_unit = None
        # Pick the time unit from the first entry of the lhs benchmark.
        # We should be careful not to crash with unexpected input.
        for x in json1['benchmarks']:
            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
                time_unit = x['time_unit']
                break
        if time_unit is None:
            continue
        # Filter by name and time unit.
        # All the repetitions are assumed to be comparable.
        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
               x['time_unit'] == time_unit]
        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
               x['time_unit'] == time_unit]
        partitions.append([lhs, rhs])
    return partitions
 def extract_field(partition, field_name):
    # The count of elements may be different. We want *all* of them.
    lhs = [x[field_name] for x in partition[0]]
    rhs = [x[field_name] for x in partition[1]]
    return [lhs, rhs]
 def calc_utest(timings_cpu, timings_time):
    min_rep_cnt = min(len(timings_time[0]),
                      len(timings_time[1]),
                      len(timings_cpu[0]),
                      len(timings_cpu[1]))
    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
    if min_rep_cnt < UTEST_MIN_REPETITIONS:
        return False, None, None
    time_pvalue = mannwhitneyu(
        timings_time[0], timings_time[1], alternative='two-sided').pvalue
    cpu_pvalue = mannwhitneyu(
        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
    return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
 def print_utest(partition, utest_alpha, first_col_width, use_color=True):
    def get_utest_color(pval):
        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
    timings_time = extract_field(partition, 'real_time')
    timings_cpu = extract_field(partition, 'cpu_time')
    have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
    # Check if we failed miserably with minimum required repetitions for utest
    if not have_optimal_repetitions and cpu_pvalue is None and time_pvalue is None:
        return []
    dsc = "U Test, Repetitions: {} vs {}".format(
        len(timings_cpu[0]), len(timings_cpu[1]))
    dsc_color = BC_OKGREEN
    # We still got some results to show but issue a warning about it.
    if not have_optimal_repetitions:
        dsc_color = BC_WARNING
        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
            UTEST_OPTIMAL_REPETITIONS)
    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
    last_name = partition[0][0]['name']
    return [color_format(use_color,
                         special_str,
                         BC_HEADER,
                         "{}{}".format(last_name, UTEST_COL_NAME),
                         first_col_width,
                         get_utest_color(time_pvalue), time_pvalue,
                         get_utest_color(cpu_pvalue), cpu_pvalue,
                         dsc_color, dsc,
                         endc=BC_ENDC)]
 def generate_difference_report(
        json1,
        json2,
        display_aggregates_only=False,
        utest=False,
        utest_alpha=0.05,
        use_color=True):
    """
    Calculate and report the difference between each test of two benchmarks
    runs specified as 'json1' and 'json2'.
    """
    assert utest is True or utest is False
    first_col_width = find_longest_name(json1['benchmarks'])
    def find_test(name):
        for b in json2['benchmarks']:
            if b['name'] == name:
                return b
        return None
    first_col_width = max(
        first_col_width,
        len('Benchmark'))
    first_col_width += len(UTEST_COL_NAME)
    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
        'Benchmark', 12 + first_col_width)
    output_strs = [first_line, '-' * len(first_line)]
    partitions = partition_benchmarks(json1, json2)
    for partition in partitions:
        # Careful, we may have different repetition count.
        for i in range(min(len(partition[0]), len(partition[1]))):
            bn = partition[0][i]
            other_bench = partition[1][i]
            # *If* we were asked to only display aggregates,
            # and if it is non-aggregate, then skip it.
            if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench:
                assert bn['run_type'] == other_bench['run_type']
                if bn['run_type'] != 'aggregate':
                    continue
            fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
            def get_color(res):
                if res > 0.05:
                    return BC_FAIL
                elif res > -0.07:
                    return BC_WHITE
                else:
                    return BC_CYAN
            tres = calculate_change(bn['real_time'], other_bench['real_time'])
            cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
            output_strs += [color_format(use_color,
                                         fmt_str,
                                         BC_HEADER,
                                         bn['name'],
                                         first_col_width,
                                         get_color(tres),
                                         tres,
                                         get_color(cpures),
                                         cpures,
                                         bn['real_time'],
                                         other_bench['real_time'],
                                         bn['cpu_time'],
                                         other_bench['cpu_time'],
                                         endc=BC_ENDC)]
        # After processing the whole partition, if requested, do the U test.
        if utest:
            output_strs += print_utest(partition,
                                       utest_alpha=utest_alpha,
                                       first_col_width=first_col_width,
                                       use_color=use_color)
    return output_strs
 ###############################################################################
 # Unit tests
 class TestGetUniqueBenchmarkNames(unittest.TestCase):
    def load_results(self):
        import json
        testInputs = os.path.join(
            os.path.dirname(
                os.path.realpath(__file__)),
            'Inputs')
        testOutput = os.path.join(testInputs, 'test3_run0.json')
        with open(testOutput, 'r') as f:
            json = json.load(f)
        return json
    def test_basic(self):
        expect_lines = [
            'BM_One',
            'BM_Two',
            'short',  # These two are not sorted
            'medium',  # These two are not sorted
        ]
        json = self.load_results()
        output_lines = get_unique_benchmark_names(json)
        print("\n")
        print("\n".join(output_lines))
        self.assertEqual(len(output_lines), len(expect_lines))
        for i in range(0, len(output_lines)):
            self.assertEqual(expect_lines[i], output_lines[i])
 class TestReportDifference(unittest.TestCase):
    def load_results(self):
        import json
        testInputs = os.path.join(
            os.path.dirname(
                os.path.realpath(__file__)),
            'Inputs')
        testOutput1 = os.path.join(testInputs, 'test1_run1.json')
        testOutput2 = os.path.join(testInputs, 'test1_run2.json')
        with open(testOutput1, 'r') as f:
            json1 = json.load(f)
        with open(testOutput2, 'r') as f:
            json2 = json.load(f)
        return json1, json2
    def test_basic(self):
        expect_lines = [
            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
            ['BM_100xSlower', '+99.0000', '+99.0000',
                '100', '10000', '100', '10000'],
            ['BM_100xFaster', '-0.9900', '-0.9900',
                '10000', '100', '10000', '100'],
            ['BM_10PercentCPUToTime', '+0.1000',
                '-0.1000', '100', '110', '100', '90'],
            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
        ]
        json1, json2 = self.load_results()
        output_lines_with_header = generate_difference_report(
            json1, json2, use_color=False)
        output_lines = output_lines_with_header[2:]
        print("\n")
        print("\n".join(output_lines_with_header))
        self.assertEqual(len(output_lines), len(expect_lines))
        for i in range(0, len(output_lines)):
            parts = [x for x in output_lines[i].split(' ') if x]
            self.assertEqual(len(parts), 7)
            self.assertEqual(expect_lines[i], parts)
 class TestReportDifferenceBetweenFamilies(unittest.TestCase):
    def load_result(self):
        import json
        testInputs = os.path.join(
            os.path.dirname(
                os.path.realpath(__file__)),
            'Inputs')
        testOutput = os.path.join(testInputs, 'test2_run.json')
        with open(testOutput, 'r') as f:
            json = json.load(f)
        return json
    def test_basic(self):
        expect_lines = [
            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
        ]
        json = self.load_result()
        json1 = filter_benchmark(json, "BM_Z.ro", ".")
        json2 = filter_benchmark(json, "BM_O.e", ".")
        output_lines_with_header = generate_difference_report(
            json1, json2, use_color=False)
        output_lines = output_lines_with_header[2:]
        print("\n")
        print("\n".join(output_lines_with_header))
        self.assertEqual(len(output_lines), len(expect_lines))
        for i in range(0, len(output_lines)):
            parts = [x for x in output_lines[i].split(' ') if x]
            self.assertEqual(len(parts), 7)
            self.assertEqual(expect_lines[i], parts)
 class TestReportDifferenceWithUTest(unittest.TestCase):
    def load_results(self):
        import json
        testInputs = os.path.join(
            os.path.dirname(
                os.path.realpath(__file__)),
            'Inputs')
        testOutput1 = os.path.join(testInputs, 'test3_run0.json')
        testOutput2 = os.path.join(testInputs, 'test3_run1.json')
        with open(testOutput1, 'r') as f:
            json1 = json.load(f)
        with open(testOutput2, 'r') as f:
            json2 = json.load(f)
        return json1, json2
    def test_utest(self):
        expect_lines = []
        expect_lines = [
            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
            ['BM_Two_pvalue',
             '0.6985',
             '0.6985',
             'U',
             'Test,',
             'Repetitions:',
             '2',
             'vs',
             '2.',
             'WARNING:',
             'Results',
             'unreliable!',
             '9+',
             'repetitions',
             'recommended.'],
            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
            ['short_pvalue',
             '0.7671',
             '0.1489',
             'U',
             'Test,',
             'Repetitions:',
             '2',
             'vs',
             '3.',
             'WARNING:',
             'Results',
             'unreliable!',
             '9+',
             'repetitions',
             'recommended.'],
            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
        ]
        json1, json2 = self.load_results()
        output_lines_with_header = generate_difference_report(
            json1, json2, utest=True, utest_alpha=0.05, use_color=False)
        output_lines = output_lines_with_header[2:]
        print("\n")
        print("\n".join(output_lines_with_header))
        self.assertEqual(len(output_lines), len(expect_lines))
        for i in range(0, len(output_lines)):
            parts = [x for x in output_lines[i].split(' ') if x]
            self.assertEqual(expect_lines[i], parts)
 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
        unittest.TestCase):
    def load_results(self):
        import json
        testInputs = os.path.join(
            os.path.dirname(
                os.path.realpath(__file__)),
            'Inputs')
        testOutput1 = os.path.join(testInputs, 'test3_run0.json')
        testOutput2 = os.path.join(testInputs, 'test3_run1.json')
        with open(testOutput1, 'r') as f:
            json1 = json.load(f)
        with open(testOutput2, 'r') as f:
            json2 = json.load(f)
        return json1, json2
    def test_utest(self):
        expect_lines = []
        expect_lines = [
            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
            ['BM_Two_pvalue',
             '0.6985',
             '0.6985',
             'U',
             'Test,',
             'Repetitions:',
             '2',
             'vs',
             '2.',
             'WARNING:',
             'Results',
             'unreliable!',
             '9+',
             'repetitions',
             'recommended.'],
            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
            ['short_pvalue',
             '0.7671',
             '0.1489',
             'U',
             'Test,',
             'Repetitions:',
             '2',
             'vs',
             '3.',
             'WARNING:',
             'Results',
             'unreliable!',
             '9+',
             'repetitions',
             'recommended.'],
        ]
        json1, json2 = self.load_results()
        output_lines_with_header = generate_difference_report(
            json1, json2, display_aggregates_only=True,
            utest=True, utest_alpha=0.05, use_color=False)
        output_lines = output_lines_with_header[2:]
        print("\n")
        print("\n".join(output_lines_with_header))
        self.assertEqual(len(output_lines), len(expect_lines))
        for i in range(0, len(output_lines)):
            parts = [x for x in output_lines[i].split(' ') if x]
            self.assertEqual(expect_lines[i], parts)
 if __name__ == '__main__':
    unittest.main()
 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
 # kate: indent-mode python; remove-trailing-spaces modified;
--- a/Utilities/Scripts/gbench/util.py
+++ b/Utilities/Scripts/gbench/util.py
@ -0,0 +1,164 @@
 """util.py - General utilities for running, loading, and processing benchmarks
 """
 import json
 import os
 import tempfile
 import subprocess
 import sys
 # Input file type enumeration
 IT_Invalid = 0
 IT_JSON = 1
 IT_Executable = 2
 _num_magic_bytes = 2 if sys.platform.startswith('win') else 4
 def is_executable_file(filename):
    """
    Return 'True' if 'filename' names a valid file which is likely
    an executable. A file is considered an executable if it starts with the
    magic bytes for a EXE, Mach O, or ELF file.
    """
    if not os.path.isfile(filename):
        return False
    with open(filename, mode='rb') as f:
        magic_bytes = f.read(_num_magic_bytes)
    if sys.platform == 'darwin':
        return magic_bytes in [
            b'\xfe\xed\xfa\xce',  # MH_MAGIC
            b'\xce\xfa\xed\xfe',  # MH_CIGAM
            b'\xfe\xed\xfa\xcf',  # MH_MAGIC_64
            b'\xcf\xfa\xed\xfe',  # MH_CIGAM_64
            b'\xca\xfe\xba\xbe',  # FAT_MAGIC
            b'\xbe\xba\xfe\xca'   # FAT_CIGAM
        ]
    elif sys.platform.startswith('win'):
        return magic_bytes == b'MZ'
    else:
        return magic_bytes == b'\x7FELF'
 def is_json_file(filename):
    """
    Returns 'True' if 'filename' names a valid JSON output file.
    'False' otherwise.
    """
    try:
        with open(filename, 'r') as f:
            json.load(f)
        return True
    except BaseException:
        pass
    return False
 def classify_input_file(filename):
    """
    Return a tuple (type, msg) where 'type' specifies the classified type
    of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
    string represeting the error.
    """
    ftype = IT_Invalid
    err_msg = None
    if not os.path.exists(filename):
        err_msg = "'%s' does not exist" % filename
    elif not os.path.isfile(filename):
        err_msg = "'%s' does not name a file" % filename
    elif is_executable_file(filename):
        ftype = IT_Executable
    elif is_json_file(filename):
        ftype = IT_JSON
    else:
        err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename
    return ftype, err_msg
 def check_input_file(filename):
    """
    Classify the file named by 'filename' and return the classification.
    If the file is classified as 'IT_Invalid' print an error message and exit
    the program.
    """
    ftype, msg = classify_input_file(filename)
    if ftype == IT_Invalid:
        print("Invalid input file: %s" % msg)
        sys.exit(1)
    return ftype
 def find_benchmark_flag(prefix, benchmark_flags):
    """
    Search the specified list of flags for a flag matching `<prefix><arg>` and
    if it is found return the arg it specifies. If specified more than once the
    last value is returned. If the flag is not found None is returned.
    """
    assert prefix.startswith('--') and prefix.endswith('=')
    result = None
    for f in benchmark_flags:
        if f.startswith(prefix):
            result = f[len(prefix):]
    return result
 def remove_benchmark_flags(prefix, benchmark_flags):
    """
    Return a new list containing the specified benchmark_flags except those
    with the specified prefix.
    """
    assert prefix.startswith('--') and prefix.endswith('=')
    return [f for f in benchmark_flags if not f.startswith(prefix)]
 def load_benchmark_results(fname):
    """
    Read benchmark output from a file and return the JSON object.
    REQUIRES: 'fname' names a file containing JSON benchmark output.
    """
    with open(fname, 'r') as f:
        return json.load(f)
 def run_benchmark(exe_name, benchmark_flags):
    """
    Run a benchmark specified by 'exe_name' with the specified
    'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
    real time console output.
    RETURNS: A JSON object representing the benchmark output
    """
    output_name = find_benchmark_flag('--benchmark_out=',
                                      benchmark_flags)
    is_temp_output = False
    if output_name is None:
        is_temp_output = True
        thandle, output_name = tempfile.mkstemp()
        os.close(thandle)
        benchmark_flags = list(benchmark_flags) + \
            ['--benchmark_out=%s' % output_name]
    cmd = [exe_name] + benchmark_flags
    print("RUNNING: %s" % ' '.join(cmd))
    exitCode = subprocess.call(cmd)
    if exitCode != 0:
        print('TEST FAILED...')
        sys.exit(exitCode)
    json_res = load_benchmark_results(output_name)
    if is_temp_output:
        os.unlink(output_name)
    return json_res
 def run_or_load_benchmark(filename, benchmark_flags):
    """
    Get the results for a specified benchmark. If 'filename' specifies
    an executable benchmark then the results are generated by running the
    benchmark. Otherwise 'filename' must name a valid JSON output file,
    which is loaded and the result returned.
    """
    ftype = check_input_file(filename)
    if ftype == IT_JSON:
        return load_benchmark_results(filename)
    elif ftype == IT_Executable:
        return run_benchmark(filename, benchmark_flags)
    else:
        assert False  # This branch is unreachable
--- a/Utilities/Scripts/strip_asm.py
+++ b/Utilities/Scripts/strip_asm.py
@ -0,0 +1,151 @@
 #!/usr/bin/env python
 """
 strip_asm.py - Cleanup ASM output for the specified file
 """
 from argparse import ArgumentParser
 import sys
 import os
 import re
 def find_used_labels(asm):
    found = set()
    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
    for l in asm.splitlines():
        m = label_re.match(l)
        if m:
            found.add('.L%s' % m.group(1))
    return found
 def normalize_labels(asm):
    decls = set()
    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
    for l in asm.splitlines():
        m = label_decl.match(l)
        if m:
            decls.add(m.group(0))
    if len(decls) == 0:
        return asm
    needs_dot = next(iter(decls))[0] != '.'
    if not needs_dot:
        return asm
    for ld in decls:
        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
    return asm
 def transform_labels(asm):
    asm = normalize_labels(asm)
    used_decls = find_used_labels(asm)
    new_asm = ''
    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
    for l in asm.splitlines():
        m = label_decl.match(l)
        if not m or m.group(0) in used_decls:
            new_asm += l
            new_asm += '\n'
    return new_asm
 def is_identifier(tk):
    if len(tk) == 0:
        return False
    first = tk[0]
    if not first.isalpha() and first != '_':
        return False
    for i in range(1, len(tk)):
        c = tk[i]
        if not c.isalnum() and c != '_':
            return False
    return True
 def process_identifiers(l):
    """
    process_identifiers - process all identifiers and modify them to have
    consistent names across all platforms; specifically across ELF and MachO.
    For example, MachO inserts an additional understore at the beginning of
    names. This function removes that.
    """
    parts = re.split(r'([a-zA-Z0-9_]+)', l)
    new_line = ''
    for tk in parts:
        if is_identifier(tk):
            if tk.startswith('__Z'):
                tk = tk[1:]
            elif tk.startswith('_') and len(tk) > 1 and \
                    tk[1].isalpha() and tk[1] != 'Z':
                tk = tk[1:]
        new_line += tk
    return new_line
 def process_asm(asm):
    """
    Strip the ASM of unwanted directives and lines
    """
    new_contents = ''
    asm = transform_labels(asm)
    # TODO: Add more things we want to remove
    discard_regexes = [
        re.compile("\s+\..*$"), # directive
        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
        re.compile("\s*#.*$"), # comment line
        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
    ]
    keep_regexes = [
    ]
    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
    for l in asm.splitlines():
        # Remove Mach-O attribute
        l = l.replace('@GOTPCREL', '')
        add_line = True
        for reg in discard_regexes:
            if reg.match(l) is not None:
                add_line = False
                break
        for reg in keep_regexes:
            if reg.match(l) is not None:
                add_line = True
                break
        if add_line:
            if fn_label_def.match(l) and len(new_contents) != 0:
                new_contents += '\n'
            l = process_identifiers(l)
            new_contents += l
            new_contents += '\n'
    return new_contents
 def main():
    parser = ArgumentParser(
        description='generate a stripped assembly file')
    parser.add_argument(
        'input', metavar='input', type=str, nargs=1,
        help='An input assembly file')
    parser.add_argument(
        'out', metavar='output', type=str, nargs=1,
        help='The output file')
    args, unknown_args = parser.parse_known_args()
    input = args.input[0]
    output = args.out[0]
    if not os.path.isfile(input):
        print(("ERROR: input file '%s' does not exist") % input)
        sys.exit(1)
    contents = None
    with open(input, 'r') as f:
        contents = f.read()
    new_contents = process_asm(contents)
    with open(output, 'w') as f:
        f.write(new_contents)
 if __name__ == '__main__':
    main()
 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
 # kate: indent-mode python; remove-trailing-spaces modified;
--- a/benchmarking/BenchmarkArrayTransfer.cxx
+++ b/benchmarking/BenchmarkArrayTransfer.cxx
@ -473,12 +473,25 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchExecToContReadWrite,
 int main(int argc, char* argv[])
 {
-  // Parse VTK-m options:
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
  Config = vtkm::cont::Initialize(argc, argv, opts);
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  // Initialize command line args
  std::vector<char*> args(argv, argv + argc);
  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
  // Parse VTK-m options:
  Config = vtkm::cont::Initialize(argc, args.data(), opts);
  // This occurs when it is help
  if (opts == vtkm::cont::InitializeOptions::None)
  {
    std::cout << Config.Usage << std::endl;
  }
  else
  {
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }
  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkAtomicArray.cxx
+++ b/benchmarking/BenchmarkAtomicArray.cxx
@ -506,11 +506,24 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(
 int main(int argc, char* argv[])
 {
  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
  Config = vtkm::cont::Initialize(argc, argv, opts);
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  std::vector<char*> args(argv, argv + argc);
  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
  // Parse VTK-m options:
  Config = vtkm::cont::Initialize(argc, args.data(), opts);
  // This occurs when it is help
  if (opts == vtkm::cont::InitializeOptions::None)
  {
    std::cout << Config.Usage << std::endl;
  }
  else
  {
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }
  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkCopySpeeds.cxx
+++ b/benchmarking/BenchmarkCopySpeeds.cxx
@ -95,11 +95,23 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(CopySpeed,
 int main(int argc, char* argv[])
 {
  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
  Config = vtkm::cont::Initialize(argc, argv, opts);
-  // Setup device:
+  std::vector<char*> args(argv, argv + argc);
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
  // Parse VTK-m options:
  Config = vtkm::cont::Initialize(argc, args.data(), opts);
  // This occurs when it is help
  if (opts == vtkm::cont::InitializeOptions::None)
  {
    std::cout << Config.Usage << std::endl;
  }
  else
  {
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }
 // Handle NumThreads command-line arg:
 #ifdef VTKM_ENABLE_TBB
@ -126,5 +138,5 @@ int main(int argc, char* argv[])
 #endif // TBB
  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkDeviceAdapter.cxx
+++ b/benchmarking/BenchmarkDeviceAdapter.cxx
@ -39,6 +39,40 @@
 namespace
 {
 // Parametrize the input size samples for most of the benchmarks
 //
 // Define at compile time:
 //
 //   Being VTKm_BENCHS_RANGE_LOWER_BOUNDARY b0 and,
 //   being VTKm_BENCHS_RANGE_UPPER_BOUNDARY b1
 //
 // This will create the following sample sizes b0, b0*2^3, b0*2^6, ..., b1.
 //
 // Notice that setting up VTKm_BENCHS_RANGE_LOWER_BOUNDARY / VTKm_BENCHS_RANGE_UPPER_BOUNDARY
 // will affect both ShortRange and FullRange.
 //
 #ifndef VTKm_BENCHS_RANGE_LOWER_BOUNDARY
 #define FULL_RANGE_LOWER_BOUNDARY (1 << 12)  //  4 KiB
 #define SHORT_RANGE_LOWER_BOUNDARY (1 << 15) // 32 KiB
 #else
 #define FULL_RANGE_LOWER_BOUNDARY (VTKm_BENCHS_RANGE_LOWER_BOUNDARY)
 #define SHORT_RANGE_LOWER_BOUNDARY (VTKm_BENCHS_RANGE_LOWER_BOUNDARY)
 #endif
 #ifndef VTKm_BENCHS_RANGE_UPPER_BOUNDARY
 #define FULL_RANGE_UPPER_BOUNDARY (1 << 27)             // 128 MiB
 #define SHORT_RANGE_UPPER_BOUNDARY (1 << 27)            // 128 MiB
 #define BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING (1 << 26) // 64 MiB
 #else
 #define FULL_RANGE_UPPER_BOUNDARY (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
 #define SHORT_RANGE_UPPER_BOUNDARY (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
 #define BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
 #endif
 // Default sampling rate is x8 and always includes min/max,
 // so this will generate 7 samples at:
 // 1: 4 KiB
@ -47,15 +81,17 @@ namespace
 // 4: 2 MiB
 // 5: 16 MiB
 // 6: 128 MiB
-static const std::pair<int64_t, int64_t> FullRange{ 1 << 12, 1 << 27 }; // 4KiB, 128MiB
+static const std::pair<int64_t, int64_t> FullRange{ FULL_RANGE_LOWER_BOUNDARY,
                                                    FULL_RANGE_UPPER_BOUNDARY };
 // Smaller range that can be used to reduce the number of benchmarks. Used
 // with `RangeMultiplier(SmallRangeMultiplier)`, this produces:
 // 1: 32 KiB
 // 2: 2 MiB
 // 3: 128 MiB
-static const std::pair<int64_t, int64_t> SmallRange{ 1 << 15, 1 << 27 }; // 4KiB, 128MiB
+static const std::pair<int64_t, int64_t> SmallRange{ SHORT_RANGE_LOWER_BOUNDARY,
-static constexpr int SmallRangeMultiplier = 1 << 21;                     // Ensure a sample at 2MiB
+                                                     SHORT_RANGE_UPPER_BOUNDARY };
 static constexpr int SmallRangeMultiplier = 1 << 21; // Ensure a sample at 2MiB
 using TypeList = vtkm::List<vtkm::UInt8,
                            vtkm::Float32,
@ -351,7 +387,7 @@ void BenchBitFieldToUnorderedSetGenerator(benchmark::internal::Benchmark* bm)
 {
  // Use a reduced NUM_BYTES_MAX value here -- these benchmarks allocate one
  // 8-byte id per bit, so this caps the index array out at 512 MB:
-  static constexpr int64_t numBytesMax = 1 << 26; // 64 MiB of bits
+  static int64_t numBytesMax = std::min(1 << 29, BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING);
  bm->UseManualTime();
  bm->ArgNames({ "Size", "C" });
@ -393,6 +429,7 @@ void BenchCopy(benchmark::State& state)
  state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
  state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
 };
 VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCopy, ->Ranges({ FullRange })->ArgName("Size"), TypeList);
 template <typename ValueType>
@ -534,7 +571,7 @@ void BenchCountSetBitsGenerator(benchmark::internal::Benchmark* bm)
  for (int64_t config = 0; config < 6; ++config)
  {
-    bm->Ranges({ FullRange, { config, config } });
+    bm->Ranges({ { FullRange.first, FullRange.second }, { config, config } });
  }
 }
 VTKM_BENCHMARK_APPLY(BenchCountSetBits, BenchCountSetBitsGenerator);
@ -1053,8 +1090,10 @@ void BenchmarkStableSortIndicesUniqueGenerator(benchmark::internal::Benchmark* b
  bm->ArgNames({ "Size", "%Uniq" });
  for (int64_t pcntUnique = 0; pcntUnique <= 100; pcntUnique += 25)
  {
-    // Cap the max size here at 21 MiB. This sort is too slow.
+    // Cap the max size here at 2 MiB. This sort is too slow.
-    bm->Ranges({ { SmallRange.first, 1 << 21 }, { pcntUnique, pcntUnique } });
+    const int64_t maxSize = 1 << 21;
    bm->Ranges(
      { { SmallRange.first, std::min(maxSize, SmallRange.second) }, { pcntUnique, pcntUnique } });
  }
 }
@ -1167,12 +1206,23 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchUpperBounds,
 int main(int argc, char* argv[])
 {
-  // Parse VTK-m options:
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
  Config = vtkm::cont::Initialize(argc, argv, opts);
-  // Setup device:
+  std::vector<char*> args(argv, argv + argc);
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
  // Parse VTK-m options:
  Config = vtkm::cont::Initialize(argc, args.data(), opts);
  // This occurs when it is help
  if (opts == vtkm::cont::InitializeOptions::None)
  {
    std::cout << Config.Usage << std::endl;
  }
  else
  {
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }
 // Handle NumThreads command-line arg:
 #ifdef VTKM_ENABLE_TBB
@ -1199,5 +1249,5 @@ int main(int argc, char* argv[])
 #endif // TBB
  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkFieldAlgorithms.cxx
+++ b/benchmarking/BenchmarkFieldAlgorithms.cxx
@ -942,12 +942,24 @@ VTKM_BENCHMARK(Bench2VirtualImplicitFunctions);
 int main(int argc, char* argv[])
 {
  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
  Config = vtkm::cont::Initialize(argc, argv, opts);
-  // Setup device:
+  std::vector<char*> args(argv, argv + argc);
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
  // Parse VTK-m options:
  Config = vtkm::cont::Initialize(argc, args.data(), opts);
  // This occurs when it is help
  if (opts == vtkm::cont::InitializeOptions::None)
  {
    std::cout << Config.Usage << std::endl;
  }
  else
  {
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }
  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkFilters.cxx
+++ b/benchmarking/BenchmarkFilters.cxx
@ -1040,12 +1040,23 @@ void InitDataSet(int& argc, char** argv)
 int main(int argc, char* argv[])
 {
  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
  Config = vtkm::cont::Initialize(argc, argv, opts);
-  // Setup device:
+  std::vector<char*> args(argv, argv + argc);
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
-  InitDataSet(argc, argv);
+  // Parse VTK-m options:
  Config = vtkm::cont::Initialize(argc, args.data(), opts);
  // This occurs when it is help
  if (opts == vtkm::cont::InitializeOptions::None)
  {
    std::cout << Config.Usage << std::endl;
  }
  else
  {
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
    InitDataSet(argc, args.data());
  }
  const std::string dataSetSummary = []() -> std::string {
    std::ostringstream out;
@ -1054,5 +1065,5 @@ int main(int argc, char* argv[])
  }();
  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, dataSetSummary);
+  VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, args.data(), dataSetSummary);
 }
--- a/benchmarking/BenchmarkRayTracing.cxx
+++ b/benchmarking/BenchmarkRayTracing.cxx
@ -116,13 +116,24 @@ VTKM_BENCHMARK(BenchRayTracing);
 int main(int argc, char* argv[])
 {
-  // Parse VTK-m options:
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
  Config = vtkm::cont::Initialize(argc, argv, opts);
-  // Setup device:
+  std::vector<char*> args(argv, argv + argc);
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
  // Parse VTK-m options:
  Config = vtkm::cont::Initialize(argc, args.data(), opts);
  // This occurs when it is help
  if (opts == vtkm::cont::InitializeOptions::None)
  {
    std::cout << Config.Usage << std::endl;
  }
  else
  {
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }
  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkTopologyAlgorithms.cxx
+++ b/benchmarking/BenchmarkTopologyAlgorithms.cxx
@ -380,12 +380,24 @@ VTKM_BENCHMARK_TEMPLATES(BenchClassificationDynamic, ValueTypes);
 int main(int argc, char* argv[])
 {
  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
  Config = vtkm::cont::Initialize(argc, argv, opts);
-  // Setup device:
+  std::vector<char*> args(argv, argv + argc);
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
  // Parse VTK-m options:
  Config = vtkm::cont::Initialize(argc, args.data(), opts);
  // This occurs when it is help
  if (opts == vtkm::cont::InitializeOptions::None)
  {
    std::cout << Config.Usage << std::endl;
  }
  else
  {
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }
  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/Benchmarker.h
+++ b/benchmarking/Benchmarker.h
@ -388,6 +388,37 @@ static inline vtkm::Id ExecuteBenchmarks(int& argc,
  return static_cast<vtkm::Id>(num);
 }
 void InitializeArgs(int* argc, std::vector<char*>& args, vtkm::cont::InitializeOptions& opts)
 {
  bool isHelp = false;
  // Inject --help
  if (*argc == 1)
  {
    const char* help = "--help"; // We want it to be static
    args.push_back(const_cast<char*>(help));
    *argc = *argc + 1;
  }
  args.push_back(nullptr);
  for (size_t i = 0; i < static_cast<size_t>(*argc); ++i)
  {
    auto opt_s = std::string(args[i]);
    if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h")
    {
      isHelp = true;
    }
  }
  if (!isHelp)
  {
    return;
  }
  opts = vtkm::cont::InitializeOptions::None;
 }
 }
 }
 } // end namespace vtkm::bench::detail
--- a/benchmarking/CMakeLists.txt
+++ b/benchmarking/CMakeLists.txt
@ -47,10 +47,17 @@ set(benchmarks
  BenchmarkTopologyAlgorithms
  )
 set(VTKm_BENCHS_RANGE_LOWER_BOUNDARY 4096 CACHE STRING "Smallest sample for input size bench for BenchmarkDeviceAdapter")
 set(VTKm_BENCHS_RANGE_UPPER_BOUNDARY 134217728 CACHE STRING "Biggest sample for input size bench for BenchmarkDeviceAdapter")
 mark_as_advanced(VTKm_BENCHS_RANGE_LOWER_BOUNDARY VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
 foreach (benchmark ${benchmarks})
  add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_source vtkm_filter)
 endforeach ()
 target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_LOWER_BOUNDARY=${VTKm_BENCHS_RANGE_LOWER_BOUNDARY})
 target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_UPPER_BOUNDARY=${VTKm_BENCHS_RANGE_UPPER_BOUNDARY})
 if(TARGET vtkm_rendering)
  add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering)
 endif()
--- a/benchmarking/README.md
+++ b/benchmarking/README.md
@ -0,0 +1,120 @@
 # BENCHMARKING VTK-m
 ## TL;DR
 When configuring _VTM-m_ with _CMake_ pass the flag `-DVTKm_ENABLE_BENCHMARKS=1`
 . In the build directory you will see the following binaries:
    $ ls bin/Benchmark*
    bin/BenchmarkArrayTransfer*  bin/BenchmarkCopySpeeds* bin/BenchmarkFieldAlgorithms*
    bin/BenchmarkRayTracing* bin/BenchmarkAtomicArray*    bin/BenchmarkDeviceAdapter*
    bin/BenchmarkFilters* bin/BenchmarkTopologyAlgorithms*
 Taking as an example `BenchmarkArrayTransfer`, we can run it as:
    $ bin/BenchmarkArrayTransfer -d Any
 ---
 ## Parts of this Documents
 0. [TL;DR](#TL;DR)
 1. [Devices](#choosing-devices)
 2. [Filters](#run-a-subset-of-your-benchmarks)
 4. [Compare with baseline](#compare-with-baseline)
 5. [Installing compare.py](#installing-compare-benchmarkspy)
 ---
 ## Choosing devices
 Taking as an example `BenchmarkArrayTransfer`, we can determine in which
 device we can run it by simply:
    $ bin/BenchmarkArrayTransfer
    ...
    Valid devices: "Any" "Serial"
    ...
 Upon the _Valid devices_ you can chose in which device to run the benchmark by:
    $ bin/BenchmarkArrayTransfer -d Serial
 ## Run a subset of your benchmarks
 _VTK-m_ benchmarks uses [Google Benchmarks] which allows you to choose a subset
 of benchmaks by using the flag `--benchmark_filter=REGEX`
 For instance, if you want to run all the benchmarks that writes something you
 would run:
    $ bin/BenchmarkArrayTransfer -d Serial --benchmark_filter='Write'
 Note you can list all of the available benchmarks with the option:
 `--benchmark_list_tests`.
 ## Compare with baseline
 _VTM-m_ ships with a helper script based in [Google Benchmarks] `compare.py`
 named `compare-benchmarks.py` which lets you compare benchmarks using different
 devices, filters, and binaries. After building `VTM-m` it must appear on the 
 `bin` directory within your `build` directory.
 When running `compare-benchmarks.py`:
 - You can specify the baseline benchmark binary path and its arguments in 
   `--benchmark1=`
 - The contender benchmark binary path and its arguments in `--benchmark2=`
 - Extra options to be passed to `compare.py` must come after `--`
 ### Compare between filters
 When comparing filters, we only can use one benchmark binary with a single device
 as shown in the following example:
 ```sh
 $ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Any
 --benchmark_filter=1024' --filter1='Read' --filter2=Write -- filters
 # It will output something like this:
 Benchmark                                                                          Time             CPU      Time Old      Time New       CPU Old       CPU New
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
 BenchContToExec[Read vs. Write]<F32>/Bytes:1024/manual_time                     +0.2694         +0.2655         18521         23511         18766         23749
 BenchExecToCont[Read vs. Write]<F32>/Bytes:1024/manual_time                     +0.0212         +0.0209         25910         26460         26152         26698
 ```
 ### Compare between devices
 When comparing two benchmarks using two devices use the _option_ `benchmark`
 after `--` and call `./compare-benchmarks.py` as follows:
 ```sh
 $ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Serial
 --benchmark_filter=1024' --benchmark2='./BenchmarkArrayTransfer -d Cuda
 --benchmark_filter=1024' -- benchmarks
 # It will output something like this:
 Benchmark                                                              Time             CPU      Time Old      Time New       CPU Old       CPU New
 ---------------------------------------------------------------------------------------------------------------------------------------------------
 BenchContToExecRead<F32>/Bytes:1024/manual_time                     +0.0127         +0.0120         18388         18622         18632         18856
 BenchContToExecWrite<F32>/Bytes:1024/manual_time                    +0.0010         +0.0006         23471         23496         23712         23726
 BenchContToExecReadWrite<F32>/Bytes:1024/manual_time                -0.0034         -0.0041         26363         26274         26611         26502
 BenchRoundTripRead<F32>/Bytes:1024/manual_time                      +0.0055         +0.0056         20635         20748         21172         21291
 BenchRoundTripReadWrite<F32>/Bytes:1024/manual_time                 +0.0084         +0.0082         29288         29535         29662         29905
 BenchExecToContRead<F32>/Bytes:1024/manual_time                     +0.0025         +0.0021         25883         25947         26122         26178
 BenchExecToContWrite<F32>/Bytes:1024/manual_time                    -0.0027         -0.0038         26375         26305         26622         26522
 BenchExecToContReadWrite<F32>/Bytes:1024/manual_time                +0.0041         +0.0039         25639         25745         25871         25972
 ```
 ## Installing compare-benchmarks.py
 `compare-benchmarks.py` relies on `compare.py` from Google Benchmarks which also
 relies in `SciPy`, you can find instructions [here][SciPy] regarding its
 installation.
 [Google Benchmarks]: https://github.com/google/benchmark
 [Compare.py]:        https://github.com/google/benchmark/blob/master/tools/compare.py
 [SciPy]:             https://www.scipy.org/install.html