vtk-m/benchmarking/Benchmarker.h
Kenneth Moreland 920392b6d8 Remove brigand from Benchmarker.h
Instead, consistently use `vtkm::List`. (All the actual benchmark code
already uses `vtkm::List` instead of `brigand::list`.)
2022-03-08 07:25:08 -07:00

428 lines
16 KiB
C++

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#ifndef vtk_m_benchmarking_Benchmarker_h
#define vtk_m_benchmarking_Benchmarker_h
#include <sstream>
#include <vtkm/cont/Initialize.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/List.h>
#include <vtkm/internal/Meta.h>
#include <benchmark/benchmark.h>
#include <ostream>
/// \file Benchmarker.h
/// \brief Benchmarking utilities
///
/// VTK-m's benchmarking framework is built on top of Google Benchmark.
///
/// A benchmark is now a single function, which is passed to a macro:
///
/// ```
/// void MyBenchmark(::benchmark::State& state)
/// {
/// MyClass someClass;
///
/// // Optional: Add a descriptive label with additional benchmark details:
/// state.SetLabel("Blah blah blah.");
///
/// // Must use a vtkm timer to properly capture eg. CUDA execution times.
/// vtkm::cont::Timer timer;
/// for (auto _ : state)
/// {
/// someClass.Reset();
///
/// timer.Start();
/// someClass.DoWork();
/// timer.Stop();
///
/// state.SetIterationTime(timer.GetElapsedTime());
/// }
///
/// // Optional: Report items and/or bytes processed per iteration in output:
/// state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
/// state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
/// }
/// }
/// VTKM_BENCHMARK(MyBenchmark);
/// ```
///
/// Google benchmark also makes it easy to implement parameter sweep benchmarks:
///
/// ```
/// void MyParameterSweep(::benchmark::State& state)
/// {
/// // The current value in the sweep:
/// const vtkm::Id currentValue = state.range(0);
///
/// MyClass someClass;
/// someClass.SetSomeParameter(currentValue);
///
/// vtkm::cont::Timer timer;
/// for (auto _ : state)
/// {
/// someClass.Reset();
///
/// timer.Start();
/// someClass.DoWork();
/// timer.Stop();
///
/// state.SetIterationTime(timer.GetElapsedTime());
/// }
/// }
/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
/// ```
///
/// will generate and launch several benchmarks, exploring the parameter space of
/// `SetSomeParameter` between the values of 32 and (1024*1024). The chain of
/// functions calls in the second argument is applied to an instance of
/// ::benchmark::internal::Benchmark. See Google Benchmark's documentation for
/// more details.
///
/// For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
/// accepts a function with the signature
/// `void Func(::benchmark::internal::Benchmark*)` that may be used to generate
/// more complex configurations.
///
/// To instantiate a templated benchmark across a list of types, the
/// VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
/// parameter. The templated benchmark function will be instantiated and called
/// for each type in the list:
///
/// ```
/// template <typename T>
/// void MyBenchmark(::benchmark::State& state)
/// {
/// MyClass<T> someClass;
///
/// // Must use a vtkm timer to properly capture eg. CUDA execution times.
/// vtkm::cont::Timer timer;
/// for (auto _ : state)
/// {
/// someClass.Reset();
///
/// timer.Start();
/// someClass.DoWork();
/// timer.Stop();
///
/// state.SetIterationTime(timer.GetElapsedTime());
/// }
/// }
/// }
/// VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
/// ```
///
/// The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
/// macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
/// macro that appends the contents of `some_string` to the Google Benchmark preamble.
///
/// If a benchmark is not compatible with some configuration, it may call
/// `state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
/// useful, for instance in the filter tests when the input is not compatible with the filter.
///
/// When launching a benchmark executable, the following options are supported by Google Benchmark:
///
/// - `--benchmark_list_tests`: List all available tests.
/// - `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
/// - `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
/// - `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
/// of data.
/// - `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
/// (mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
/// an "iteration", which is a loop of the `for(auto _:state){...}` section.
/// - `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
/// reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
/// - `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
/// printed to the terminal. Any file output will still contain all repetition info.
/// - `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
/// (`console`) or `csv`/`json` formats.
/// - `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
/// (`console`) or `csv`/`json` formats.
/// - `--benchmark_out=[filename]`: Specify output file.
/// - `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
/// - `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
/// in the table, rather than appending them as a label.
///
/// For more information and examples of practical usage, take a look at the existing benchmarks in
/// vtk-m/benchmarking/.
/// \def VTKM_EXECUTE_BENCHMARKS(argc, argv)
///
/// Run the benchmarks defined in the current file. Benchmarks may be filtered
/// and modified using the passed arguments; see the Google Benchmark documentation
/// for more details.
#define VTKM_EXECUTE_BENCHMARKS(argc, argv) vtkm::bench::detail::ExecuteBenchmarks(argc, argv)
/// \def VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble)
///
/// Run the benchmarks defined in the current file. Benchmarks may be filtered
/// and modified using the passed arguments; see the Google Benchmark documentation
/// for more details. The `preamble` string may be used to supply additional
/// information that will be appended to the output's preamble.
#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
/// \def VTKM_BENCHMARK(BenchFunc)
///
/// Define a simple benchmark. A single benchmark will be generated that executes
/// `BenchFunc`. `BenchFunc` must have the signature:
///
/// ```
/// void BenchFunc(::benchmark::State& state)
/// ```
#define VTKM_BENCHMARK(BenchFunc) \
BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)
/// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
///
/// Similar to `VTKM_BENCHMARK`, but allows additional options to be specified
/// on the `::benchmark::internal::Benchmark` object. Example usage:
///
/// ```
/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("MyParam")->Range(32, 1024*1024));
/// ```
///
/// Note the similarity to the raw Google Benchmark usage of
/// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
/// the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options
/// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
///
/// Similar to `VTKM_BENCHMARK`, but allows advanced benchmark configuration
/// via a supplied ConfigFunc, similar to Google Benchmark's
/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
/// signature:
///
/// ```
/// void ConfigFunc(::benchmark::internal::Benchmark*);
/// ```
///
/// See the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)
/// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
///
/// Define a family of benchmark that vary by template argument. A single
/// benchmark will be generated for each type in `TypeList` (a vtkm::List of
/// types) that executes `BenchFunc<T>`. `BenchFunc` must have the signature:
///
/// ```
/// template <typename T>
/// void BenchFunc(::benchmark::State& state)
/// ```
#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
/// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
///
/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows additional options to be specified
/// on the `::benchmark::internal::Benchmark` object. Example usage:
///
/// ```
/// VTKM_BENCHMARK_TEMPLATES_OPTS(MyBenchmark,
/// ->ArgName("MyParam")->Range(32, 1024*1024),
/// vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
/// ```
#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY( \
BenchFunc, \
[](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
TypeList)
/// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
///
/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows advanced benchmark configuration
/// via a supplied ConfigFunc, similar to Google Benchmark's
/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
/// signature:
///
/// ```
/// void ConfigFunc(::benchmark::internal::Benchmark*);
/// ```
///
/// See the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
namespace \
{ /* A template function cannot be used as a template parameter, so wrap the function with \
* a template struct to get it into the GenerateTemplateBenchmarks class. */ \
template <typename... Ts> \
struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
{ \
static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
}; \
} /* end anon namespace */ \
int BENCHMARK_PRIVATE_NAME(BenchFunc) = \
vtkm::bench::detail::GenerateTemplateBenchmarks<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc), \
TypeList>::Register(#BenchFunc, ApplyFunctor)
// Internal use only:
#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)
namespace vtkm
{
namespace bench
{
namespace detail
{
static inline void NullApply(::benchmark::internal::Benchmark*) {}
/// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
/// instead.
// TypeLists could be expanded to compute cross products if we ever have that
// need.
template <template <typename...> class BenchType, typename TypeList>
struct GenerateTemplateBenchmarks
{
private:
template <typename T>
using MakeBenchType = BenchType<T>;
using Benchmarks = vtkm::ListTransform<TypeList, MakeBenchType>;
template <typename ApplyFunctor>
struct RegisterImpl
{
std::string BenchName;
ApplyFunctor Apply;
template <typename P>
void operator()(vtkm::internal::meta::Type<BenchType<P>>) const
{
std::ostringstream name;
name << this->BenchName << "<" << vtkm::cont::TypeToString<P>() << ">";
auto bm = ::benchmark::internal::RegisterBenchmarkInternal(
new ::benchmark::internal::FunctionBenchmark(name.str().c_str(),
BenchType<P>::GetFunction()));
this->Apply(bm);
// Always use manual time with vtkm::cont::Timer to capture CUDA times accurately.
bm->UseManualTime()->Unit(benchmark::kMillisecond);
}
};
public:
template <typename ApplyFunctor>
static int Register(const std::string& benchName, ApplyFunctor&& apply)
{
vtkm::ListForEach(RegisterImpl<ApplyFunctor>{ benchName, std::forward<ApplyFunctor>(apply) },
vtkm::ListTransform<Benchmarks, vtkm::internal::meta::Type>{});
return 0;
}
};
class VTKmConsoleReporter : public ::benchmark::ConsoleReporter
{
std::string UserPreamble;
public:
VTKmConsoleReporter() = default;
explicit VTKmConsoleReporter(const std::string& preamble)
: UserPreamble{ preamble }
{
}
bool ReportContext(const Context& context) override
{
if (!::benchmark::ConsoleReporter::ReportContext(context))
{
return false;
}
// The rest of the preamble is printed to the error stream, so be consistent:
auto& out = this->GetErrorStream();
// Print list of devices:
out << "VTK-m Device State:\n";
vtkm::cont::GetRuntimeDeviceTracker().PrintSummary(out);
if (!this->UserPreamble.empty())
{
out << this->UserPreamble << "\n";
}
out.flush();
return true;
}
};
// Returns the number of executed benchmarks:
static inline vtkm::Id ExecuteBenchmarks(int& argc,
char* argv[],
const std::string& preamble = std::string{})
{
::benchmark::Initialize(&argc, argv);
if (::benchmark::ReportUnrecognizedArguments(argc, argv))
{
return 1;
}
VTKmConsoleReporter reporter{ preamble };
vtkm::cont::Timer timer;
timer.Start();
std::size_t num = ::benchmark::RunSpecifiedBenchmarks(&reporter);
timer.Stop();
reporter.GetOutputStream().flush();
reporter.GetErrorStream().flush();
reporter.GetErrorStream() << "Ran " << num << " benchmarks in " << timer.GetElapsedTime()
<< " seconds." << std::endl;
return static_cast<vtkm::Id>(num);
}
void InitializeArgs(int* argc, std::vector<char*>& args, vtkm::cont::InitializeOptions& opts)
{
bool isHelp = false;
// Inject --help
if (*argc == 1)
{
const char* help = "--help"; // We want it to be static
args.push_back(const_cast<char*>(help));
*argc = *argc + 1;
}
args.push_back(nullptr);
for (size_t i = 0; i < static_cast<size_t>(*argc); ++i)
{
auto opt_s = std::string(args[i]);
if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h")
{
isHelp = true;
}
}
if (!isHelp)
{
return;
}
opts = vtkm::cont::InitializeOptions::None;
}
}
}
} // end namespace vtkm::bench::detail
#endif