920392b6d8
Instead, consistently use `vtkm::List`. (All the actual benchmark code already uses `vtkm::List` instead of `brigand::list`.)
428 lines
16 KiB
C++
428 lines
16 KiB
C++
//============================================================================
|
|
// Copyright (c) Kitware, Inc.
|
|
// All rights reserved.
|
|
// See LICENSE.txt for details.
|
|
//
|
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
// PURPOSE. See the above copyright notice for more information.
|
|
//============================================================================
|
|
|
|
#ifndef vtk_m_benchmarking_Benchmarker_h
|
|
#define vtk_m_benchmarking_Benchmarker_h
|
|
#include <sstream>
|
|
#include <vtkm/cont/Initialize.h>
|
|
#include <vtkm/cont/Logging.h>
|
|
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
|
#include <vtkm/cont/Timer.h>
|
|
|
|
#include <vtkm/List.h>
|
|
|
|
#include <vtkm/internal/Meta.h>
|
|
|
|
#include <benchmark/benchmark.h>
|
|
|
|
#include <ostream>
|
|
|
|
/// \file Benchmarker.h
|
|
/// \brief Benchmarking utilities
|
|
///
|
|
/// VTK-m's benchmarking framework is built on top of Google Benchmark.
|
|
///
|
|
/// A benchmark is now a single function, which is passed to a macro:
|
|
///
|
|
/// ```
|
|
/// void MyBenchmark(::benchmark::State& state)
|
|
/// {
|
|
/// MyClass someClass;
|
|
///
|
|
/// // Optional: Add a descriptive label with additional benchmark details:
|
|
/// state.SetLabel("Blah blah blah.");
|
|
///
|
|
/// // Must use a vtkm timer to properly capture eg. CUDA execution times.
|
|
/// vtkm::cont::Timer timer;
|
|
/// for (auto _ : state)
|
|
/// {
|
|
/// someClass.Reset();
|
|
///
|
|
/// timer.Start();
|
|
/// someClass.DoWork();
|
|
/// timer.Stop();
|
|
///
|
|
/// state.SetIterationTime(timer.GetElapsedTime());
|
|
/// }
|
|
///
|
|
/// // Optional: Report items and/or bytes processed per iteration in output:
|
|
/// state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
|
|
/// state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
|
|
/// }
|
|
/// }
|
|
/// VTKM_BENCHMARK(MyBenchmark);
|
|
/// ```
|
|
///
|
|
/// Google benchmark also makes it easy to implement parameter sweep benchmarks:
|
|
///
|
|
/// ```
|
|
/// void MyParameterSweep(::benchmark::State& state)
|
|
/// {
|
|
/// // The current value in the sweep:
|
|
/// const vtkm::Id currentValue = state.range(0);
|
|
///
|
|
/// MyClass someClass;
|
|
/// someClass.SetSomeParameter(currentValue);
|
|
///
|
|
/// vtkm::cont::Timer timer;
|
|
/// for (auto _ : state)
|
|
/// {
|
|
/// someClass.Reset();
|
|
///
|
|
/// timer.Start();
|
|
/// someClass.DoWork();
|
|
/// timer.Stop();
|
|
///
|
|
/// state.SetIterationTime(timer.GetElapsedTime());
|
|
/// }
|
|
/// }
|
|
/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
|
|
/// ```
|
|
///
|
|
/// will generate and launch several benchmarks, exploring the parameter space of
|
|
/// `SetSomeParameter` between the values of 32 and (1024*1024). The chain of
|
|
/// functions calls in the second argument is applied to an instance of
|
|
/// ::benchmark::internal::Benchmark. See Google Benchmark's documentation for
|
|
/// more details.
|
|
///
|
|
/// For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
|
|
/// accepts a function with the signature
|
|
/// `void Func(::benchmark::internal::Benchmark*)` that may be used to generate
|
|
/// more complex configurations.
|
|
///
|
|
/// To instantiate a templated benchmark across a list of types, the
|
|
/// VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
|
|
/// parameter. The templated benchmark function will be instantiated and called
|
|
/// for each type in the list:
|
|
///
|
|
/// ```
|
|
/// template <typename T>
|
|
/// void MyBenchmark(::benchmark::State& state)
|
|
/// {
|
|
/// MyClass<T> someClass;
|
|
///
|
|
/// // Must use a vtkm timer to properly capture eg. CUDA execution times.
|
|
/// vtkm::cont::Timer timer;
|
|
/// for (auto _ : state)
|
|
/// {
|
|
/// someClass.Reset();
|
|
///
|
|
/// timer.Start();
|
|
/// someClass.DoWork();
|
|
/// timer.Stop();
|
|
///
|
|
/// state.SetIterationTime(timer.GetElapsedTime());
|
|
/// }
|
|
/// }
|
|
/// }
|
|
/// VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
|
|
/// ```
|
|
///
|
|
/// The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
|
|
/// macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
|
|
/// macro that appends the contents of `some_string` to the Google Benchmark preamble.
|
|
///
|
|
/// If a benchmark is not compatible with some configuration, it may call
|
|
/// `state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
|
|
/// useful, for instance in the filter tests when the input is not compatible with the filter.
|
|
///
|
|
/// When launching a benchmark executable, the following options are supported by Google Benchmark:
|
|
///
|
|
/// - `--benchmark_list_tests`: List all available tests.
|
|
/// - `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
|
|
/// - `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
|
|
/// - `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
|
|
/// of data.
|
|
/// - `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
|
|
/// (mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
|
|
/// an "iteration", which is a loop of the `for(auto _:state){...}` section.
|
|
/// - `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
|
|
/// reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
|
|
/// - `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
|
|
/// printed to the terminal. Any file output will still contain all repetition info.
|
|
/// - `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
|
|
/// (`console`) or `csv`/`json` formats.
|
|
/// - `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
|
|
/// (`console`) or `csv`/`json` formats.
|
|
/// - `--benchmark_out=[filename]`: Specify output file.
|
|
/// - `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
|
|
/// - `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
|
|
/// in the table, rather than appending them as a label.
|
|
///
|
|
/// For more information and examples of practical usage, take a look at the existing benchmarks in
|
|
/// vtk-m/benchmarking/.
|
|
|
|
/// \def VTKM_EXECUTE_BENCHMARKS(argc, argv)
|
|
///
|
|
/// Run the benchmarks defined in the current file. Benchmarks may be filtered
|
|
/// and modified using the passed arguments; see the Google Benchmark documentation
|
|
/// for more details.
|
|
#define VTKM_EXECUTE_BENCHMARKS(argc, argv) vtkm::bench::detail::ExecuteBenchmarks(argc, argv)
|
|
|
|
/// \def VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble)
|
|
///
|
|
/// Run the benchmarks defined in the current file. Benchmarks may be filtered
|
|
/// and modified using the passed arguments; see the Google Benchmark documentation
|
|
/// for more details. The `preamble` string may be used to supply additional
|
|
/// information that will be appended to the output's preamble.
|
|
#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
|
|
vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
|
|
|
|
/// \def VTKM_BENCHMARK(BenchFunc)
|
|
///
|
|
/// Define a simple benchmark. A single benchmark will be generated that executes
|
|
/// `BenchFunc`. `BenchFunc` must have the signature:
|
|
///
|
|
/// ```
|
|
/// void BenchFunc(::benchmark::State& state)
|
|
/// ```
|
|
#define VTKM_BENCHMARK(BenchFunc) \
|
|
BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)
|
|
|
|
/// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
|
|
///
|
|
/// Similar to `VTKM_BENCHMARK`, but allows additional options to be specified
|
|
/// on the `::benchmark::internal::Benchmark` object. Example usage:
|
|
///
|
|
/// ```
|
|
/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("MyParam")->Range(32, 1024*1024));
|
|
/// ```
|
|
///
|
|
/// Note the similarity to the raw Google Benchmark usage of
|
|
/// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
|
|
/// the Google Benchmark documentation for more details on the available options.
|
|
#define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
|
|
BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options
|
|
|
|
/// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
|
|
///
|
|
/// Similar to `VTKM_BENCHMARK`, but allows advanced benchmark configuration
|
|
/// via a supplied ConfigFunc, similar to Google Benchmark's
|
|
/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
|
|
/// signature:
|
|
///
|
|
/// ```
|
|
/// void ConfigFunc(::benchmark::internal::Benchmark*);
|
|
/// ```
|
|
///
|
|
/// See the Google Benchmark documentation for more details on the available options.
|
|
#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
|
|
BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)
|
|
|
|
/// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
|
|
///
|
|
/// Define a family of benchmark that vary by template argument. A single
|
|
/// benchmark will be generated for each type in `TypeList` (a vtkm::List of
|
|
/// types) that executes `BenchFunc<T>`. `BenchFunc` must have the signature:
|
|
///
|
|
/// ```
|
|
/// template <typename T>
|
|
/// void BenchFunc(::benchmark::State& state)
|
|
/// ```
|
|
#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
|
|
VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
|
|
|
|
/// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
|
|
///
|
|
/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows additional options to be specified
|
|
/// on the `::benchmark::internal::Benchmark` object. Example usage:
|
|
///
|
|
/// ```
|
|
/// VTKM_BENCHMARK_TEMPLATES_OPTS(MyBenchmark,
|
|
/// ->ArgName("MyParam")->Range(32, 1024*1024),
|
|
/// vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
|
|
/// ```
|
|
#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
|
|
VTKM_BENCHMARK_TEMPLATES_APPLY( \
|
|
BenchFunc, \
|
|
[](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
|
|
TypeList)
|
|
|
|
/// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
|
|
///
|
|
/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows advanced benchmark configuration
|
|
/// via a supplied ConfigFunc, similar to Google Benchmark's
|
|
/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
|
|
/// signature:
|
|
///
|
|
/// ```
|
|
/// void ConfigFunc(::benchmark::internal::Benchmark*);
|
|
/// ```
|
|
///
|
|
/// See the Google Benchmark documentation for more details on the available options.
|
|
#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
|
|
namespace \
|
|
{ /* A template function cannot be used as a template parameter, so wrap the function with \
|
|
* a template struct to get it into the GenerateTemplateBenchmarks class. */ \
|
|
template <typename... Ts> \
|
|
struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
|
|
{ \
|
|
static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
|
|
}; \
|
|
} /* end anon namespace */ \
|
|
int BENCHMARK_PRIVATE_NAME(BenchFunc) = \
|
|
vtkm::bench::detail::GenerateTemplateBenchmarks<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc), \
|
|
TypeList>::Register(#BenchFunc, ApplyFunctor)
|
|
|
|
// Internal use only:
|
|
#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
|
|
BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)
|
|
|
|
namespace vtkm
|
|
{
|
|
namespace bench
|
|
{
|
|
namespace detail
|
|
{
|
|
|
|
static inline void NullApply(::benchmark::internal::Benchmark*) {}
|
|
|
|
/// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
|
|
/// instead.
|
|
// TypeLists could be expanded to compute cross products if we ever have that
|
|
// need.
|
|
template <template <typename...> class BenchType, typename TypeList>
|
|
struct GenerateTemplateBenchmarks
|
|
{
|
|
private:
|
|
template <typename T>
|
|
using MakeBenchType = BenchType<T>;
|
|
|
|
using Benchmarks = vtkm::ListTransform<TypeList, MakeBenchType>;
|
|
|
|
template <typename ApplyFunctor>
|
|
struct RegisterImpl
|
|
{
|
|
std::string BenchName;
|
|
ApplyFunctor Apply;
|
|
|
|
template <typename P>
|
|
void operator()(vtkm::internal::meta::Type<BenchType<P>>) const
|
|
{
|
|
std::ostringstream name;
|
|
name << this->BenchName << "<" << vtkm::cont::TypeToString<P>() << ">";
|
|
auto bm = ::benchmark::internal::RegisterBenchmarkInternal(
|
|
new ::benchmark::internal::FunctionBenchmark(name.str().c_str(),
|
|
BenchType<P>::GetFunction()));
|
|
this->Apply(bm);
|
|
|
|
// Always use manual time with vtkm::cont::Timer to capture CUDA times accurately.
|
|
bm->UseManualTime()->Unit(benchmark::kMillisecond);
|
|
}
|
|
};
|
|
|
|
public:
|
|
template <typename ApplyFunctor>
|
|
static int Register(const std::string& benchName, ApplyFunctor&& apply)
|
|
{
|
|
vtkm::ListForEach(RegisterImpl<ApplyFunctor>{ benchName, std::forward<ApplyFunctor>(apply) },
|
|
vtkm::ListTransform<Benchmarks, vtkm::internal::meta::Type>{});
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
class VTKmConsoleReporter : public ::benchmark::ConsoleReporter
|
|
{
|
|
std::string UserPreamble;
|
|
|
|
public:
|
|
VTKmConsoleReporter() = default;
|
|
|
|
explicit VTKmConsoleReporter(const std::string& preamble)
|
|
: UserPreamble{ preamble }
|
|
{
|
|
}
|
|
|
|
bool ReportContext(const Context& context) override
|
|
{
|
|
if (!::benchmark::ConsoleReporter::ReportContext(context))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// The rest of the preamble is printed to the error stream, so be consistent:
|
|
auto& out = this->GetErrorStream();
|
|
|
|
// Print list of devices:
|
|
out << "VTK-m Device State:\n";
|
|
vtkm::cont::GetRuntimeDeviceTracker().PrintSummary(out);
|
|
if (!this->UserPreamble.empty())
|
|
{
|
|
out << this->UserPreamble << "\n";
|
|
}
|
|
out.flush();
|
|
|
|
return true;
|
|
}
|
|
};
|
|
|
|
// Returns the number of executed benchmarks:
|
|
static inline vtkm::Id ExecuteBenchmarks(int& argc,
|
|
char* argv[],
|
|
const std::string& preamble = std::string{})
|
|
{
|
|
::benchmark::Initialize(&argc, argv);
|
|
if (::benchmark::ReportUnrecognizedArguments(argc, argv))
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
VTKmConsoleReporter reporter{ preamble };
|
|
|
|
vtkm::cont::Timer timer;
|
|
timer.Start();
|
|
std::size_t num = ::benchmark::RunSpecifiedBenchmarks(&reporter);
|
|
timer.Stop();
|
|
|
|
reporter.GetOutputStream().flush();
|
|
reporter.GetErrorStream().flush();
|
|
|
|
reporter.GetErrorStream() << "Ran " << num << " benchmarks in " << timer.GetElapsedTime()
|
|
<< " seconds." << std::endl;
|
|
|
|
return static_cast<vtkm::Id>(num);
|
|
}
|
|
|
|
void InitializeArgs(int* argc, std::vector<char*>& args, vtkm::cont::InitializeOptions& opts)
|
|
{
|
|
bool isHelp = false;
|
|
|
|
// Inject --help
|
|
if (*argc == 1)
|
|
{
|
|
const char* help = "--help"; // We want it to be static
|
|
args.push_back(const_cast<char*>(help));
|
|
*argc = *argc + 1;
|
|
}
|
|
|
|
args.push_back(nullptr);
|
|
|
|
for (size_t i = 0; i < static_cast<size_t>(*argc); ++i)
|
|
{
|
|
auto opt_s = std::string(args[i]);
|
|
if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h")
|
|
{
|
|
isHelp = true;
|
|
}
|
|
}
|
|
|
|
if (!isHelp)
|
|
{
|
|
return;
|
|
}
|
|
|
|
opts = vtkm::cont::InitializeOptions::None;
|
|
}
|
|
}
|
|
}
|
|
} // end namespace vtkm::bench::detail
|
|
|
|
#endif
|