Port benchmarking framework to Google Benchmark.

This commit is contained in:
Allison Vacanti 2019-12-26 14:48:51 -05:00
parent 39d981bcf9
commit 539f6e5ad7
14 changed files with 4038 additions and 5465 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -8,23 +8,19 @@
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/TypeTraits.h>
#include "Benchmarker.h"
#include <vtkm/cont/Algorithm.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorBadAllocation.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <vtkm/internal/Configure.h>
#include <vtkm/testing/Testing.h>
#include <iomanip>
#include <iostream>
#include <vtkm/List.h>
#include <sstream>
#ifdef VTKM_ENABLE_TBB
@ -34,145 +30,78 @@
// For the TBB implementation, the number of threads can be customized using a
// "NumThreads [numThreads]" argument.
namespace vtkm
{
namespace benchmarking
{
const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
const vtkm::UInt64 COPY_SIZE_MAX = (1 << 29); // 512 MiB
const vtkm::UInt64 COPY_SIZE_INC = 1; // Used as 'size <<= INC'
const size_t COL_WIDTH = 32;
template <typename ValueType, typename DeviceAdapter>
struct MeasureCopySpeed
{
using Algo = vtkm::cont::Algorithm;
vtkm::cont::ArrayHandle<ValueType> Source;
vtkm::cont::ArrayHandle<ValueType> Destination;
vtkm::UInt64 NumBytes;
VTKM_CONT
MeasureCopySpeed(vtkm::UInt64 bytes)
: NumBytes(bytes)
{
vtkm::Id numValues = static_cast<vtkm::Id>(bytes / sizeof(ValueType));
this->Source.Allocate(numValues);
}
VTKM_CONT vtkm::Float64 operator()()
{
vtkm::cont::Timer timer{ DeviceAdapter() };
timer.Start();
Algo::Copy(this->Source, this->Destination);
return timer.GetElapsedTime();
}
VTKM_CONT std::string Description() const
{
vtkm::UInt64 actualSize = sizeof(ValueType);
actualSize *= static_cast<vtkm::UInt64>(this->Source.GetNumberOfValues());
std::ostringstream out;
out << "Copying " << vtkm::cont::GetHumanReadableSize(this->NumBytes)
<< " (actual=" << vtkm::cont::GetHumanReadableSize(actualSize) << ") of "
<< vtkm::testing::TypeName<ValueType>::Name() << "\n";
return out.str();
}
};
void PrintRow(std::ostream& out, const std::string& label, const std::string& data)
{
out << "| " << std::setw(COL_WIDTH) << label << " | " << std::setw(COL_WIDTH) << data << " |"
<< std::endl;
}
void PrintDivider(std::ostream& out)
{
const std::string fillStr(COL_WIDTH, '-');
out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl;
}
template <typename ValueType, typename DeviceAdapter>
void BenchmarkValueType(vtkm::cont::DeviceAdapterId id)
{
PrintRow(std::cout, vtkm::testing::TypeName<ValueType>::Name(), id.GetName());
PrintDivider(std::cout);
Benchmarker bench(15, 100);
for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC)
{
MeasureCopySpeed<ValueType, DeviceAdapter> functor(size);
bench.Reset();
std::string speedStr;
try
{
bench.GatherSamples(functor);
vtkm::Float64 speed = static_cast<Float64>(size) / stats::Mean(bench.GetSamples());
speedStr = vtkm::cont::GetHumanReadableSize(static_cast<UInt64>(speed)) + std::string("/s");
}
catch (vtkm::cont::ErrorBadAllocation&)
{
speedStr = "[allocation too large]";
}
PrintRow(std::cout, vtkm::cont::GetHumanReadableSize(size), speedStr);
}
std::cout << "\n";
}
}
} // end namespace vtkm::benchmarking
namespace
{
using namespace vtkm::benchmarking;
struct BenchmarkValueTypeFunctor
// Make this global so benchmarks can access the current device id:
vtkm::cont::InitializeResult Config;
const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
const vtkm::UInt64 COPY_SIZE_MAX = (1 << 30); // 1 GiB
using TypeList = vtkm::List<vtkm::UInt8,
vtkm::Vec2ui_8,
vtkm::Vec3ui_8,
vtkm::Vec4ui_8,
vtkm::UInt32,
vtkm::Vec2ui_32,
vtkm::UInt64,
vtkm::Vec2ui_64,
vtkm::Float32,
vtkm::Vec2f_32,
vtkm::Float64,
vtkm::Vec2f_64,
vtkm::Pair<vtkm::UInt32, vtkm::Float32>,
vtkm::Pair<vtkm::UInt32, vtkm::Float64>,
vtkm::Pair<vtkm::UInt64, vtkm::Float32>,
vtkm::Pair<vtkm::UInt64, vtkm::Float64>>;
template <typename ValueType>
void CopySpeed(benchmark::State& state)
{
template <typename DeviceAdapter>
bool operator()(DeviceAdapter id)
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
state.SetLabel(vtkm::cont::GetHumanReadableSize(numBytes));
vtkm::cont::ArrayHandle<ValueType> src;
vtkm::cont::ArrayHandle<ValueType> dst;
src.Allocate(numValues);
dst.Allocate(numValues);
vtkm::cont::Timer timer(device);
for (auto _ : state)
{
BenchmarkValueType<vtkm::UInt8, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Vec2ui_8, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Vec3ui_8, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Vec4ui_8, DeviceAdapter>(id);
(void)_;
timer.Start();
vtkm::cont::Algorithm::Copy(device, src, dst);
timer.Stop();
BenchmarkValueType<vtkm::UInt32, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Vec2ui_32, DeviceAdapter>(id);
BenchmarkValueType<vtkm::UInt64, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Vec2ui_64, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Float32, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Vec2f_32, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Float64, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Vec2f_64, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float32>, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float64>, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float32>, DeviceAdapter>(id);
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float64>, DeviceAdapter>(id);
return true;
state.SetIterationTime(timer.GetElapsedTime());
}
};
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(CopySpeed,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TypeList);
} // end anon namespace
int main(int argc, char* argv[])
{
auto opts = vtkm::cont::InitializeOptions::RequireDevice |
vtkm::cont::InitializeOptions::ErrorOnBadOption | vtkm::cont::InitializeOptions::AddHelp;
auto config = vtkm::cont::Initialize(argc, argv, opts);
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
// Handle NumThreads command-line arg:
#ifdef VTKM_ENABLE_TBB
int numThreads = tbb::task_scheduler_init::automatic;
#endif // TBB
@ -196,6 +125,6 @@ int main(int argc, char* argv[])
tbb::task_scheduler_init init(numThreads);
#endif // TBB
BenchmarkValueTypeFunctor functor;
vtkm::cont::TryExecuteOnDevice(config.Device, functor);
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -14,6 +14,7 @@
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/Initialize.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/testing/MakeTestDataSet.h>
@ -31,114 +32,97 @@
#include <string>
#include <vector>
using namespace vtkm::benchmarking;
namespace vtkm
{
namespace benchmarking
namespace
{
template <typename Precision, typename DeviceAdapter>
struct BenchRayTracing
// Hold configuration state (e.g. active device)
vtkm::cont::InitializeResult Config;
void BenchRayTracing(::benchmark::State& state)
{
vtkm::rendering::raytracing::RayTracer Tracer;
vtkm::rendering::raytracing::Camera RayCamera;
vtkm::cont::ArrayHandle<vtkm::Id4> Indices;
vtkm::rendering::raytracing::Ray<Precision> Rays;
vtkm::cont::CoordinateSystem Coords;
vtkm::cont::DataSet Data;
const vtkm::Id3 dims(128, 128, 128);
VTKM_CONT ~BenchRayTracing() {}
vtkm::cont::testing::MakeTestDataSet maker;
auto dataset = maker.Make3DUniformDataSet3(dims);
auto coords = dataset.GetCoordinateSystem();
VTKM_CONT BenchRayTracing()
vtkm::rendering::Camera camera;
vtkm::Bounds bounds = dataset.GetCoordinateSystem().GetBounds();
camera.ResetToBounds(bounds);
vtkm::cont::DynamicCellSet cellset = dataset.GetCellSet();
vtkm::rendering::raytracing::TriangleExtractor triExtractor;
triExtractor.ExtractCells(cellset);
auto triIntersector = std::make_shared<vtkm::rendering::raytracing::TriangleIntersector>(
vtkm::rendering::raytracing::TriangleIntersector());
vtkm::rendering::raytracing::RayTracer tracer;
triIntersector->SetData(coords, triExtractor.GetTriangles());
tracer.AddShapeIntersector(triIntersector);
vtkm::rendering::CanvasRayTracer canvas(1920, 1080);
vtkm::rendering::raytracing::Camera rayCamera;
rayCamera.SetParameters(camera, canvas);
vtkm::rendering::raytracing::Ray<vtkm::Float32> rays;
rayCamera.CreateRays(rays, coords.GetBounds());
rays.Buffers.at(0).InitConst(0.f);
vtkm::cont::Field field = dataset.GetField("pointvar");
vtkm::Range range = field.GetRange().GetPortalConstControl().Get(0);
tracer.SetField(field, range);
vtkm::cont::ArrayHandle<vtkm::Vec4ui_8> temp;
vtkm::cont::ColorTable table("cool to warm");
table.Sample(100, temp);
vtkm::cont::ArrayHandle<vtkm::Vec4f_32> colors;
colors.Allocate(100);
auto portal = colors.GetPortalControl();
auto colorPortal = temp.GetPortalConstControl();
constexpr vtkm::Float32 conversionToFloatSpace = (1.0f / 255.0f);
for (vtkm::Id i = 0; i < 100; ++i)
{
vtkm::Id3 dims(128, 128, 128);
vtkm::cont::testing::MakeTestDataSet maker;
Data = maker.Make3DUniformDataSet3(dims);
Coords = Data.GetCoordinateSystem();
vtkm::rendering::Camera camera;
vtkm::Bounds bounds = Data.GetCoordinateSystem().GetBounds();
camera.ResetToBounds(bounds);
vtkm::cont::DynamicCellSet cellset = Data.GetCellSet();
vtkm::rendering::raytracing::TriangleExtractor triExtractor;
triExtractor.ExtractCells(cellset);
auto triIntersector = std::make_shared<vtkm::rendering::raytracing::TriangleIntersector>(
vtkm::rendering::raytracing::TriangleIntersector());
triIntersector->SetData(Coords, triExtractor.GetTriangles());
Tracer.AddShapeIntersector(triIntersector);
vtkm::rendering::CanvasRayTracer canvas(1920, 1080);
RayCamera.SetParameters(camera, canvas);
RayCamera.CreateRays(Rays, Coords.GetBounds());
Rays.Buffers.at(0).InitConst(0.f);
vtkm::cont::Field field = Data.GetField("pointvar");
vtkm::Range range = field.GetRange().GetPortalConstControl().Get(0);
Tracer.SetField(field, range);
vtkm::cont::ArrayHandle<vtkm::Vec4ui_8> temp;
vtkm::cont::ColorTable table("cool to warm");
table.Sample(100, temp);
vtkm::cont::ArrayHandle<vtkm::Vec4f_32> colors;
colors.Allocate(100);
auto portal = colors.GetPortalControl();
auto colorPortal = temp.GetPortalConstControl();
constexpr vtkm::Float32 conversionToFloatSpace = (1.0f / 255.0f);
for (vtkm::Id i = 0; i < 100; ++i)
{
auto color = colorPortal.Get(i);
vtkm::Vec4f_32 t(color[0] * conversionToFloatSpace,
color[1] * conversionToFloatSpace,
color[2] * conversionToFloatSpace,
color[3] * conversionToFloatSpace);
portal.Set(i, t);
}
Tracer.SetColorMap(colors);
Tracer.Render(Rays);
auto color = colorPortal.Get(i);
vtkm::Vec4f_32 t(color[0] * conversionToFloatSpace,
color[1] * conversionToFloatSpace,
color[2] * conversionToFloatSpace,
color[3] * conversionToFloatSpace);
portal.Set(i, t);
}
VTKM_CONT
vtkm::Float64 operator()()
tracer.SetColorMap(colors);
tracer.Render(rays);
vtkm::cont::Timer timer{ Config.Device };
for (auto _ : state)
{
vtkm::cont::Timer timer{ DeviceAdapter() };
(void)_;
timer.Start();
rayCamera.CreateRays(rays, coords.GetBounds());
tracer.Render(rays);
timer.Stop();
RayCamera.CreateRays(Rays, Coords.GetBounds());
try
{
Tracer.Render(Rays);
}
catch (vtkm::cont::ErrorBadValue& e)
{
std::cout << "exception " << e.what() << "\n";
}
return timer.GetElapsedTime();
state.SetIterationTime(timer.GetElapsedTime());
}
VTKM_CONT
std::string Description() const { return "A ray tracing benchmark"; }
};
VTKM_MAKE_BENCHMARK(RayTracing, BenchRayTracing);
}
} // end namespace vtkm::benchmarking
VTKM_BENCHMARK(BenchRayTracing);
} // end namespace vtkm::benchmarking
int main(int argc, char* argv[])
{
auto opts =
vtkm::cont::InitializeOptions::DefaultAnyDevice | vtkm::cont::InitializeOptions::Strict;
auto config = vtkm::cont::Initialize(argc, argv, opts);
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
VTKM_RUN_BENCHMARK(RayTracing, vtkm::List<vtkm::Float32>(), config.Device);
return 0;
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
}

@ -7,40 +7,37 @@
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include "Benchmarker.h"
#include <vtkm/Math.h>
#include <vtkm/VectorAnalysis.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/CellSetStructured.h>
#include <vtkm/cont/Invoker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/worklet/DispatcherMapField.h>
#include <vtkm/worklet/DispatcherMapTopology.h>
#include <vtkm/worklet/WorkletMapField.h>
#include <vtkm/worklet/WorkletMapTopology.h>
#include "Benchmarker.h"
#include <vtkm/cont/testing/Testing.h>
#include <cctype>
#include <random>
#include <string>
namespace vtkm
{
namespace benchmarking
namespace
{
#define CUBE_SIZE 256
static const std::string DIVIDER(40, '-');
enum BenchmarkName
{
CELL_TO_POINT = 1 << 1,
POINT_TO_CELL = 1 << 2,
MC_CLASSIFY = 1 << 3,
ALL = CELL_TO_POINT | POINT_TO_CELL | MC_CLASSIFY
};
using ValueTypes = vtkm::List<vtkm::UInt32, vtkm::Int32, vtkm::Int64, vtkm::Float32, vtkm::Float64>;
using ValueVariantHandle = vtkm::cont::VariantArrayHandleBase<ValueTypes>;
// Hold configuration state (e.g. active device)
vtkm::cont::InitializeResult Config;
class AveragePointToCell : public vtkm::worklet::WorkletVisitCellsWithPoints
{
@ -118,373 +115,277 @@ public:
}
};
using ValueTypes = vtkm::List<vtkm::UInt32, vtkm::Int32, vtkm::Int64, vtkm::Float32, vtkm::Float64>;
/// This class runs a series of micro-benchmarks to measure
/// performance of different field operations
class BenchmarkTopologyAlgorithms
template <typename T, typename Enable = void>
struct NumberGenerator
{
using StorageTag = vtkm::cont::StorageTagBasic;
};
using Timer = vtkm::cont::Timer;
using ValueVariantHandle = vtkm::cont::VariantArrayHandleBase<ValueTypes>;
private:
template <typename T, typename Enable = void>
struct NumberGenerator
template <typename T>
struct NumberGenerator<T, typename std::enable_if<std::is_floating_point<T>::value>::type>
{
std::mt19937 rng;
std::uniform_real_distribution<T> distribution;
NumberGenerator(T low, T high)
: rng()
, distribution(low, high)
{
};
}
T next() { return distribution(rng); }
};
template <typename T>
struct NumberGenerator<T, typename std::enable_if<std::is_floating_point<T>::value>::type>
template <typename T>
struct NumberGenerator<T, typename std::enable_if<!std::is_floating_point<T>::value>::type>
{
std::mt19937 rng;
std::uniform_int_distribution<T> distribution;
NumberGenerator(T low, T high)
: rng()
, distribution(low, high)
{
std::mt19937 rng;
std::uniform_real_distribution<T> distribution;
NumberGenerator(T low, T high)
: rng()
, distribution(low, high)
{
}
T next() { return distribution(rng); }
};
}
T next() { return distribution(rng); }
};
template <typename T>
struct NumberGenerator<T, typename std::enable_if<!std::is_floating_point<T>::value>::type>
// Returns an extra random value.
// Like, an additional random value.
// Not a random value that's somehow "extra random".
template <typename ArrayT>
VTKM_CONT typename ArrayT::ValueType FillRandomValues(ArrayT& array,
vtkm::Id size,
vtkm::Float64 min,
vtkm::Float64 max)
{
using ValueType = typename ArrayT::ValueType;
NumberGenerator<ValueType> generator{ static_cast<ValueType>(min), static_cast<ValueType>(max) };
array.Allocate(size);
auto portal = array.GetPortalControl();
for (vtkm::Id i = 0; i < size; ++i)
{
std::mt19937 rng;
std::uniform_int_distribution<T> distribution;
portal.Set(i, generator.next());
}
return generator.next();
}
NumberGenerator(T low, T high)
: rng()
, distribution(low, high)
{
}
T next() { return distribution(rng); }
};
template <typename Value>
struct BenchCellToPointAvgImpl
{
vtkm::cont::ArrayHandle<Value> Input;
template <typename Value, typename DeviceAdapter>
struct BenchCellToPointAvg
::benchmark::State& State;
vtkm::Id CubeSize;
vtkm::Id NumCells;
vtkm::cont::Timer Timer;
vtkm::cont::Invoker Invoker;
VTKM_CONT
BenchCellToPointAvgImpl(::benchmark::State& state)
: State{ state }
, CubeSize{ CUBE_SIZE }
, NumCells{ (this->CubeSize - 1) * (this->CubeSize - 1) * (this->CubeSize - 1) }
, Timer{ Config.Device }
, Invoker{ Config.Device }
{
std::vector<Value> input;
vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
std::size_t DomainSize;
FillRandomValues(this->Input, this->NumCells, 1., 100.);
VTKM_CONT
BenchCellToPointAvg()
{
NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
//cube size is points in each dim
this->DomainSize = (CUBE_SIZE - 1) * (CUBE_SIZE - 1) * (CUBE_SIZE - 1);
this->input.resize(DomainSize);
for (std::size_t i = 0; i < DomainSize; ++i)
{
this->input[i] = generator.next();
}
this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
{ // Configure label:
std::ostringstream desc;
desc << "CubeSize:" << this->CubeSize;
this->State.SetLabel(desc.str());
}
}
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
vtkm::cont::ArrayHandle<Value, StorageTag> result;
Timer timer{ DeviceAdapter() };
timer.Start();
vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
dispatcher.Invoke(this->InputHandle, cellSet, result);
return timer.GetElapsedTime();
}
virtual std::string Type() const { return std::string("Static"); }
VTKM_CONT
std::string Description() const
{
std::stringstream description;
description << "Computing Cell To Point Average "
<< "[" << this->Type() << "] "
<< "with a domain size of: " << this->DomainSize;
return description.str();
}
};
template <typename Value, typename DeviceAdapter>
struct BenchCellToPointAvgDynamic : public BenchCellToPointAvg<Value, DeviceAdapter>
template <typename BenchArrayType>
VTKM_CONT void Run(const BenchArrayType& input)
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
vtkm::cont::ArrayHandle<Value> result;
VTKM_CONT
vtkm::Float64 operator()()
for (auto _ : this->State)
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
(void)_;
this->Timer.Start();
this->Invoker(AverageCellToPoint{}, input, cellSet, result);
this->Timer.Stop();
ValueVariantHandle dinput(this->InputHandle);
vtkm::cont::ArrayHandle<Value, StorageTag> result;
Timer timer{ DeviceAdapter() };
timer.Start();
vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
dispatcher.Invoke(dinput, cellSet, result);
return timer.GetElapsedTime();
this->State.SetIterationTime(this->Timer.GetElapsedTime());
}
virtual std::string Type() const { return std::string("Dynamic"); }
};
VTKM_MAKE_BENCHMARK(CellToPointAvg, BenchCellToPointAvg);
VTKM_MAKE_BENCHMARK(CellToPointAvgDynamic, BenchCellToPointAvgDynamic);
template <typename Value, typename DeviceAdapter>
struct BenchPointToCellAvg
{
std::vector<Value> input;
vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
std::size_t DomainSize;
VTKM_CONT
BenchPointToCellAvg()
{
NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
this->DomainSize = (CUBE_SIZE) * (CUBE_SIZE) * (CUBE_SIZE);
this->input.resize(DomainSize);
for (std::size_t i = 0; i < DomainSize; ++i)
{
this->input[i] = generator.next();
}
this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
}
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
vtkm::cont::ArrayHandle<Value, StorageTag> result;
Timer timer{ DeviceAdapter() };
timer.Start();
vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
dispatcher.Invoke(this->InputHandle, cellSet, result);
return timer.GetElapsedTime();
}
virtual std::string Type() const { return std::string("Static"); }
VTKM_CONT
std::string Description() const
{
std::stringstream description;
description << "Computing Point To Cell Average "
<< "[" << this->Type() << "] "
<< "with a domain size of: " << this->DomainSize;
return description.str();
}
};
template <typename Value, typename DeviceAdapter>
struct BenchPointToCellAvgDynamic : public BenchPointToCellAvg<Value, DeviceAdapter>
{
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
ValueVariantHandle dinput(this->InputHandle);
vtkm::cont::ArrayHandle<Value, StorageTag> result;
Timer timer{ DeviceAdapter() };
timer.Start();
vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
dispatcher.Invoke(dinput, cellSet, result);
return timer.GetElapsedTime();
}
virtual std::string Type() const { return std::string("Dynamic"); }
};
VTKM_MAKE_BENCHMARK(PointToCellAvg, BenchPointToCellAvg);
VTKM_MAKE_BENCHMARK(PointToCellAvgDynamic, BenchPointToCellAvgDynamic);
template <typename Value, typename DeviceAdapter>
struct BenchClassification
{
std::vector<Value> input;
vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
Value IsoValue;
size_t DomainSize;
VTKM_CONT
BenchClassification()
{
NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
this->DomainSize = (CUBE_SIZE) * (CUBE_SIZE) * (CUBE_SIZE);
this->input.resize(DomainSize);
for (std::size_t i = 0; i < DomainSize; ++i)
{
this->input[i] = generator.next();
}
this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
this->IsoValue = generator.next();
}
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
vtkm::cont::ArrayHandle<vtkm::IdComponent, StorageTag> result;
ValueVariantHandle dinput(this->InputHandle);
Timer timer{ DeviceAdapter() };
timer.Start();
Classification<Value> worklet(this->IsoValue);
vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
dispatcher.Invoke(dinput, cellSet, result);
return timer.GetElapsedTime();
}
virtual std::string Type() const { return std::string("Static"); }
VTKM_CONT
std::string Description() const
{
std::stringstream description;
description << "Computing Marching Cubes Classification "
<< "[" << this->Type() << "] "
<< "with a domain size of: " << this->DomainSize;
return description.str();
}
};
template <typename Value, typename DeviceAdapter>
struct BenchClassificationDynamic : public BenchClassification<Value, DeviceAdapter>
{
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
vtkm::cont::ArrayHandle<vtkm::IdComponent, StorageTag> result;
Timer timer{ DeviceAdapter() };
timer.Start();
Classification<Value> worklet(this->IsoValue);
vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
dispatcher.Invoke(this->InputHandle, cellSet, result);
timer.Stop();
return timer.GetElapsedTime();
}
virtual std::string Type() const { return std::string("Dynamic"); }
};
VTKM_MAKE_BENCHMARK(Classification, BenchClassification);
VTKM_MAKE_BENCHMARK(ClassificationDynamic, BenchClassificationDynamic);
public:
static VTKM_CONT int Run(int benchmarks, vtkm::cont::DeviceAdapterId id)
{
std::cout << DIVIDER << "\nRunning Topology Algorithm benchmarks\n";
if (benchmarks & CELL_TO_POINT)
{
std::cout << DIVIDER << "\nBenchmarking Cell To Point Average\n";
VTKM_RUN_BENCHMARK(CellToPointAvg, ValueTypes(), id);
VTKM_RUN_BENCHMARK(CellToPointAvgDynamic, ValueTypes(), id);
}
if (benchmarks & POINT_TO_CELL)
{
std::cout << DIVIDER << "\nBenchmarking Point to Cell Average\n";
VTKM_RUN_BENCHMARK(PointToCellAvg, ValueTypes(), id);
VTKM_RUN_BENCHMARK(PointToCellAvgDynamic, ValueTypes(), id);
}
if (benchmarks & MC_CLASSIFY)
{
std::cout << DIVIDER << "\nBenchmarking Hex/Voxel MC Classification\n";
VTKM_RUN_BENCHMARK(Classification, ValueTypes(), id);
VTKM_RUN_BENCHMARK(ClassificationDynamic, ValueTypes(), id);
}
return 0;
// #items = #points
const int64_t iterations = static_cast<int64_t>(this->State.iterations());
this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfPoints()) * iterations);
}
};
#undef ARRAY_SIZE
}
} // namespace vtkm::benchmarking
int main(int argc, char* argv[])
template <typename ValueType>
void BenchCellToPointAvgStatic(::benchmark::State& state)
{
auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
auto config = vtkm::cont::Initialize(argc, argv, opts);
BenchCellToPointAvgImpl<ValueType> impl{ state };
impl.Run(impl.Input);
};
VTKM_BENCHMARK_TEMPLATES(BenchCellToPointAvgStatic, ValueTypes);
int benchmarks = 0;
if (argc <= 1)
template <typename ValueType>
void BenchCellToPointAvgDynamic(::benchmark::State& state)
{
BenchCellToPointAvgImpl<ValueType> impl{ state };
impl.Run(ValueVariantHandle{ impl.Input });
};
VTKM_BENCHMARK_TEMPLATES(BenchCellToPointAvgDynamic, ValueTypes);
template <typename Value>
struct BenchPointToCellAvgImpl
{
vtkm::cont::ArrayHandle<Value> Input;
::benchmark::State& State;
vtkm::Id CubeSize;
vtkm::Id NumPoints;
vtkm::cont::Timer Timer;
vtkm::cont::Invoker Invoker;
VTKM_CONT
BenchPointToCellAvgImpl(::benchmark::State& state)
: State{ state }
, CubeSize{ CUBE_SIZE }
, NumPoints{ (this->CubeSize) * (this->CubeSize) * (this->CubeSize) }
, Timer{ Config.Device }
, Invoker{ Config.Device }
{
benchmarks = vtkm::benchmarking::ALL;
}
else
{
for (int i = 1; i < argc; ++i)
{
std::string arg = argv[i];
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
});
if (arg == "celltopoint")
{
benchmarks |= vtkm::benchmarking::CELL_TO_POINT;
}
else if (arg == "pointtocell")
{
benchmarks |= vtkm::benchmarking::POINT_TO_CELL;
}
else if (arg == "classify")
{
benchmarks |= vtkm::benchmarking::MC_CLASSIFY;
}
else
{
std::cerr << "Unrecognized benchmark: " << argv[i] << std::endl;
std::cerr << "USAGE: " << argv[0] << " [options] [<benchmarks>]" << std::endl;
std::cerr << "Options are: " << std::endl;
std::cerr << config.Usage << std::endl;
std::cerr << "Benchmarks are one or more of the following:" << std::endl;
std::cerr << " CellToPoint\tFind average of point data on each cell" << std::endl;
std::cerr << " PointToCell\tFind average of cell data on each point" << std::endl;
std::cerr << " Classify\tFind Marching Cube case of each cell" << std::endl;
std::cerr << "If no benchmarks are specified, all are run." << std::endl;
return 1;
}
FillRandomValues(this->Input, this->NumPoints, 1., 100.);
{ // Configure label:
std::ostringstream desc;
desc << "CubeSize:" << this->CubeSize;
this->State.SetLabel(desc.str());
}
}
//now actually execute the benchmarks
template <typename BenchArrayType>
VTKM_CONT void Run(const BenchArrayType& input)
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
vtkm::cont::ArrayHandle<Value> result;
return vtkm::benchmarking::BenchmarkTopologyAlgorithms::Run(benchmarks, config.Device);
for (auto _ : this->State)
{
(void)_;
this->Timer.Start();
this->Invoker(AveragePointToCell{}, input, cellSet, result);
this->Timer.Stop();
this->State.SetIterationTime(this->Timer.GetElapsedTime());
}
// #items = #cells
const int64_t iterations = static_cast<int64_t>(this->State.iterations());
this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfCells()) * iterations);
}
};
template <typename ValueType>
void BenchPointToCellAvgStatic(::benchmark::State& state)
{
BenchPointToCellAvgImpl<ValueType> impl{ state };
impl.Run(impl.Input);
};
VTKM_BENCHMARK_TEMPLATES(BenchPointToCellAvgStatic, ValueTypes);
template <typename ValueType>
void BenchPointToCellAvgDynamic(::benchmark::State& state)
{
BenchPointToCellAvgImpl<ValueType> impl{ state };
impl.Run(ValueVariantHandle{ impl.Input });
};
VTKM_BENCHMARK_TEMPLATES(BenchPointToCellAvgDynamic, ValueTypes);
template <typename Value>
struct BenchClassificationImpl
{
vtkm::cont::ArrayHandle<Value> Input;
::benchmark::State& State;
vtkm::Id CubeSize;
vtkm::Id DomainSize;
Value IsoValue;
vtkm::cont::Timer Timer;
vtkm::cont::Invoker Invoker;
VTKM_CONT
BenchClassificationImpl(::benchmark::State& state)
: State{ state }
, CubeSize{ CUBE_SIZE }
, DomainSize{ this->CubeSize * this->CubeSize * this->CubeSize }
, Timer{ Config.Device }
, Invoker{ Config.Device }
{
this->IsoValue = FillRandomValues(this->Input, this->DomainSize, 1., 100.);
{ // Configure label:
std::ostringstream desc;
desc << "CubeSize:" << this->CubeSize;
this->State.SetLabel(desc.str());
}
}
template <typename BenchArrayType>
VTKM_CONT void Run(const BenchArrayType& input)
{
vtkm::cont::CellSetStructured<3> cellSet;
cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
vtkm::cont::ArrayHandle<vtkm::IdComponent> result;
Classification<Value> worklet(this->IsoValue);
for (auto _ : this->State)
{
(void)_;
this->Timer.Start();
this->Invoker(worklet, input, cellSet, result);
this->Timer.Stop();
this->State.SetIterationTime(this->Timer.GetElapsedTime());
}
// #items = #cells
const int64_t iterations = static_cast<int64_t>(this->State.iterations());
this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfCells()) * iterations);
}
};
template <typename ValueType>
void BenchClassificationStatic(::benchmark::State& state)
{
BenchClassificationImpl<ValueType> impl{ state };
impl.Run(impl.Input);
};
VTKM_BENCHMARK_TEMPLATES(BenchClassificationStatic, ValueTypes);
template <typename ValueType>
void BenchClassificationDynamic(::benchmark::State& state)
{
BenchClassificationImpl<ValueType> impl{ state };
impl.Run(ValueVariantHandle{ impl.Input });
};
VTKM_BENCHMARK_TEMPLATES(BenchClassificationDynamic, ValueTypes);
} // end anon namespace
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
Config = vtkm::cont::Initialize(argc, argv, opts);
// Setup device:
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, argv);
}

@ -11,330 +11,385 @@
#ifndef vtk_m_benchmarking_Benchmarker_h
#define vtk_m_benchmarking_Benchmarker_h
#include <vtkm/Math.h>
#include <vtkm/cont/DeviceAdapterTag.h>
#include <vtkm/cont/TryExecute.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/testing/Testing.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <vtkm/internal/brigand.hpp>
/*
* Writing a Benchmark
* -------------------
* To write a benchmark you must provide a functor that will run the operations
* you want to time and return the run time of those operations using the timer
* for the device. The benchmark should also be templated on the value type being
* operated on. Then use VTKM_MAKE_BENCHMARK to generate a maker functor and
* VTKM_RUN_BENCHMARK to run the benchmark on a list of types.
*
* For Example:
*
* template<typename Value>
* struct BenchSilly {
* // Setup anything that doesn't need to change per run in the constructor
* VTKM_CONT BenchSilly(){}
*
* // The overloaded call operator will run the operations being timed and
* // return the execution time
* VTKM_CONT
* vtkm::Float64 operator()(){
* return 0.05;
* }
*
* // The benchmark must also provide a method describing itself, this is
* // used when printing out run time statistics
* VTKM_CONT
* std::string Description() const {
* return "A silly benchmark";
* }
* };
*
* // Now use the VTKM_MAKE_BENCHMARK macro to generate a maker functor for
* // your benchmark. This lets us generate the benchmark functor for each type
* // we want to test
* VTKM_MAKE_BENCHMARK(Silly, BenchSilly);
*
* // You can also optionally pass arguments to the constructor like so:
* // VTKM_MAKE_BENCHMARK(Blah, BenchBlah, 1, 2, 3);
* // Note that benchmark names (the first argument) must be unique so different
* // parameters to the constructor should have different names
*
* // We can now run our benchmark using VTKM_RUN_BENCHMARK, passing the
* // benchmark name and type list to run on
* int main(int, char**){
* VTKM_RUN_BENCHMARK(Silly, vtkm::List<vtkm::Float32>());
* return 0;
* }
*
* Check out vtkm/benchmarking/BenchmarkDeviceAdapter.h for some example usage
*/
#include <benchmark/benchmark.h>
/*
* Use the VTKM_MAKE_BENCHMARK macro to define a maker functor for your benchmark.
* This is used to allow you to template the benchmark functor on the type being benchmarked
* and the device adapter so you can write init code in the constructor. Then the maker will
* return a constructed instance of your benchmark for the type being benchmarked.
* The VA_ARGS are used to pass any extra arguments needed by your benchmark
*/
#define VTKM_MAKE_BENCHMARK(Name, Bench, ...) \
struct MakeBench##Name \
{ \
template <typename Value, typename DeviceAdapter> \
VTKM_CONT Bench<Value, DeviceAdapter> operator()(const Value vtkmNotUsed(v), \
DeviceAdapter vtkmNotUsed(id)) const \
{ \
return Bench<Value, DeviceAdapter>(__VA_ARGS__); \
} \
}
#include <ostream>
/*
* Use the VTKM_RUN_BENCHMARK macro to run your benchmark on the type list passed.
* You must have previously defined a maker functor with VTKM_MAKE_BENCHMARK that this
* macro will look for and use
*/
#define VTKM_RUN_BENCHMARK(Name, Types, Id) \
vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types), (Id))
/// \file Benchmarker.h
/// \brief Benchmarking utilities
///
/// VTK-m's benchmarking framework is built on top of Google Benchmark.
///
/// A benchmark is now a single function, which is passed to a macro:
///
/// ```
/// void MyBenchmark(::benchmark::State& state)
/// {
/// MyClass someClass;
///
/// // Optional: Add a descriptive label with additional benchmark details:
/// state.SetLabel("Blah blah blah.");
///
/// // Must use a vtkm timer to properly capture eg. CUDA execution times.
/// vtkm::cont::Timer timer;
/// for (auto _ : state)
/// {
/// someClass.Reset();
///
/// timer.Start();
/// someClass.DoWork();
/// timer.Stop();
///
/// state.SetIterationTime(timer.GetElapsedTime());
/// }
///
/// // Optional: Report items and/or bytes processed per iteration in output:
/// state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
/// state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
/// }
/// }
/// VTKM_BENCHMARK(MyBenchmark);
/// ```
///
/// Google benchmark also makes it easy to implement parameter sweep benchmarks:
///
/// ```
/// void MyParameterSweep(::benchmark::State& state)
/// {
/// // The current value in the sweep:
/// const vtkm::Id currentValue = state.range(0);
///
/// MyClass someClass;
/// someClass.SetSomeParameter(currentValue);
///
/// vtkm::cont::Timer timer;
/// for (auto _ : state)
/// {
/// someClass.Reset();
///
/// timer.Start();
/// someClass.DoWork();
/// timer.Stop();
///
/// state.SetIterationTime(timer.GetElapsedTime());
/// }
/// }
/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
/// ```
///
/// will generate and launch several benchmarks, exploring the parameter space of
/// `SetSomeParameter` between the values of 32 and (1024*1024). The chain of
/// functions calls in the second argument is applied to an instance of
/// ::benchmark::internal::Benchmark. See Google Benchmark's documentation for
/// more details.
///
/// For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
/// accepts a function with the signature
/// `void Func(::benchmark::internal::Benchmark*)` that may be used to generate
/// more complex configurations.
///
/// To instantiate a templated benchmark across a list of types, the
/// VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
/// parameter. The templated benchmark function will be instantiated and called
/// for each type in the list:
///
/// ```
/// template <typename T>
/// void MyBenchmark(::benchmark::State& state)
/// {
/// MyClass<T> someClass;
///
/// // Must use a vtkm timer to properly capture eg. CUDA execution times.
/// vtkm::cont::Timer timer;
/// for (auto _ : state)
/// {
/// someClass.Reset();
///
/// timer.Start();
/// someClass.DoWork();
/// timer.Stop();
///
/// state.SetIterationTime(timer.GetElapsedTime());
/// }
/// }
/// }
/// VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
/// ```
///
/// The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
/// macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
/// macro that appends the contents of `some_string` to the Google Benchmark preamble.
///
/// If a benchmark is not compatible with some configuration, it may call
/// `state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
/// useful, for instance in the filter tests when the input is not compatible with the filter.
///
/// When launching a benchmark executable, the following options are supported by Google Benchmark:
///
/// - `--benchmark_list_tests`: List all available tests.
/// - `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
/// - `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
/// - `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
/// of data.
/// - `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
/// (mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
/// an "iteration", which is a loop of the `for(auto _:state){...}` section.
/// - `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
/// reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
/// - `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
/// printed to the terminal. Any file output will still contain all repetition info.
/// - `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
/// (`console`) or `csv`/`json` formats.
/// - `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
/// (`console`) or `csv`/`json` formats.
/// - `--benchmark_out=[filename]`: Specify output file.
/// - `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
/// - `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
/// in the table, rather than appending them as a label.
///
/// For more information and examples of practical usage, take a look at the existing benchmarks in
/// vtk-m/benchmarking/.
/// \def VTKM_EXECUTE_BENCHMARKS(argc, argv)
///
/// Run the benchmarks defined in the current file. Benchmarks may be filtered
/// and modified using the passed arguments; see the Google Benchmark documentation
/// for more details.
#define VTKM_EXECUTE_BENCHMARKS(argc, argv) vtkm::bench::detail::ExecuteBenchmarks(argc, argv)
/// \def VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble)
///
/// Run the benchmarks defined in the current file. Benchmarks may be filtered
/// and modified using the passed arguments; see the Google Benchmark documentation
/// for more details. The `preamble` string may be used to supply additional
/// information that will be appended to the output's preamble.
#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
/// \def VTKM_BENCHMARK(BenchFunc)
///
/// Define a simple benchmark. A single benchmark will be generated that executes
/// `BenchFunc`. `BenchFunc` must have the signature:
///
/// ```
/// void BenchFunc(::benchmark::State& state)
/// ```
#define VTKM_BENCHMARK(BenchFunc) BENCHMARK(BenchFunc)->UseManualTime()
/// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
///
/// Similar to `VTKM_BENCHMARK`, but allows additional options to be specified
/// on the `::benchmark::internal::Benchmark` object. Example usage:
///
/// ```
/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("MyParam")->Range(32, 1024*1024));
/// ```
///
/// Note the similarity to the raw Google Benchmark usage of
/// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
/// the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_OPTS(BenchFunc, options) BENCHMARK(BenchFunc)->UseManualTime() options
/// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
///
/// Similar to `VTKM_BENCHMARK`, but allows advanced benchmark configuration
/// via a supplied ConfigFunc, similar to Google Benchmark's
/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
/// signature:
///
/// ```
/// void ConfigFunc(::benchmark::internal::Benchmark*);
/// ```
///
/// See the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()
/// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
///
/// Define a family of benchmark that vary by template argument. A single
/// benchmark will be generated for each type in `TypeList` (a vtkm::List of
/// types) that executes `BenchFunc<T>`. `BenchFunc` must have the signature:
///
/// ```
/// template <typename T>
/// void BenchFunc(::benchmark::State& state)
/// ```
#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
/// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
///
/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows additional options to be specified
/// on the `::benchmark::internal::Benchmark` object. Example usage:
///
/// ```
/// VTKM_BENCHMARK_TEMPLATES_OPTS(MyBenchmark,
/// ->ArgName("MyParam")->Range(32, 1024*1024),
/// vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
/// ```
#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY( \
BenchFunc, [](::benchmark::internal::Benchmark* bm) { bm options; }, TypeList)
/// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
///
/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows advanced benchmark configuration
/// via a supplied ConfigFunc, similar to Google Benchmark's
/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
/// signature:
///
/// ```
/// void ConfigFunc(::benchmark::internal::Benchmark*);
/// ```
///
/// See the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
namespace \
{ /* A template function cannot be used as a template parameter, so wrap the function with \
* a template struct to get it into the GenerateTemplateBenchmarks class. */ \
template <typename... Ts> \
struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
{ \
static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
}; \
} /* end anon namespace */ \
int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks< \
brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>, \
TypeList>::Register(#BenchFunc, ApplyFunctor)
// Internal use only:
#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)
namespace vtkm
{
namespace benchmarking
namespace bench
{
namespace stats
namespace detail
{
// Checks that the sequence is sorted, returns true if it's sorted, false
// otherwise
template <typename ForwardIt>
bool is_sorted(ForwardIt first, ForwardIt last)
static inline void NullApply(::benchmark::internal::Benchmark*)
{
ForwardIt next = first;
++next;
for (; next != last; ++next, ++first)
}
/// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
/// instead.
// TypeLists could be expanded to compute cross products if we ever have that
// need.
template <typename BoundBench, typename TypeLists>
struct GenerateTemplateBenchmarks;
template <template <typename...> class BenchType, typename TypeList>
struct GenerateTemplateBenchmarks<brigand::bind<BenchType>, TypeList>
{
private:
template <typename T>
using MakeBenchType = BenchType<T>;
using Benchmarks = brigand::transform<TypeList, brigand::bind<MakeBenchType, brigand::_1>>;
template <typename ApplyFunctor>
struct RegisterImpl
{
if (*first > *next)
std::string BenchName;
ApplyFunctor Apply;
template <typename P>
void operator()(brigand::type_<BenchType<P>>) const
{
std::ostringstream name;
name << this->BenchName << "<" << vtkm::testing::TypeName<P>::Name() << ">";
auto bm = ::benchmark::internal::RegisterBenchmarkInternal(
new ::benchmark::internal::FunctionBenchmark(name.str().c_str(),
BenchType<P>::GetFunction()));
this->Apply(bm);
// Always use manual time with vtkm::cont::Timer to capture CUDA times accurately.
bm->UseManualTime();
}
};
public:
template <typename ApplyFunctor>
static int Register(const std::string& benchName, ApplyFunctor&& apply)
{
brigand::for_each<Benchmarks>(
RegisterImpl<ApplyFunctor>{ benchName, std::forward<ApplyFunctor>(apply) });
return 0;
}
};
class VTKmConsoleReporter : public ::benchmark::ConsoleReporter
{
std::string UserPreamble;
public:
VTKmConsoleReporter() = default;
explicit VTKmConsoleReporter(const std::string& preamble)
: UserPreamble{ preamble }
{
}
bool ReportContext(const Context& context) override
{
if (!::benchmark::ConsoleReporter::ReportContext(context))
{
return false;
}
}
return true;
}
// Get the value representing the `percent` percentile of the
// sorted samples using linear interpolation
vtkm::Float64 PercentileValue(const std::vector<vtkm::Float64>& samples,
const vtkm::Float64 percent)
{
VTKM_ASSERT(!samples.empty());
if (samples.size() == 1)
{
return samples.front();
}
VTKM_ASSERT(percent >= 0.0);
VTKM_ASSERT(percent <= 100.0);
VTKM_ASSERT(vtkm::benchmarking::stats::is_sorted(samples.begin(), samples.end()));
if (percent == 100.0)
{
return samples.back();
}
// Find the two nearest percentile values and linearly
// interpolate between them
const vtkm::Float64 rank = percent / 100.0 * (static_cast<vtkm::Float64>(samples.size()) - 1.0);
const vtkm::Float64 low_rank = vtkm::Floor(rank);
const vtkm::Float64 dist = rank - low_rank;
const size_t k = static_cast<size_t>(low_rank);
const vtkm::Float64 low = samples[k];
const vtkm::Float64 high = samples[k + 1];
return low + (high - low) * dist;
}
// Winsorize the samples to clean up any very extreme outliers
// Will replace all samples below `percent` and above 100 - `percent` percentiles
// with the value at the percentile
// NOTE: Assumes the samples have been sorted, as we make use of PercentileValue
void Winsorize(std::vector<vtkm::Float64>& samples, const vtkm::Float64 percent)
{
const vtkm::Float64 low_percentile = PercentileValue(samples, percent);
const vtkm::Float64 high_percentile = PercentileValue(samples, 100.0 - percent);
for (std::vector<vtkm::Float64>::iterator it = samples.begin(); it != samples.end(); ++it)
{
if (*it < low_percentile)
// The rest of the preamble is printed to the error stream, so be consistent:
auto& out = this->GetErrorStream();
// Print list of devices:
out << "VTK-m Device State:\n";
vtkm::cont::GetRuntimeDeviceTracker().PrintSummary(out);
if (!this->UserPreamble.empty())
{
*it = low_percentile;
out << this->UserPreamble << "\n";
}
else if (*it > high_percentile)
{
*it = high_percentile;
}
}
}
// Compute the mean value of the dataset
vtkm::Float64 Mean(const std::vector<vtkm::Float64>& samples)
{
vtkm::Float64 mean = 0;
for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
{
mean += *it;
}
return mean / static_cast<vtkm::Float64>(samples.size());
}
// Compute the sample variance of the samples
vtkm::Float64 Variance(const std::vector<vtkm::Float64>& samples)
{
vtkm::Float64 mean = Mean(samples);
vtkm::Float64 square_deviations = 0;
for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
{
square_deviations += vtkm::Pow(*it - mean, 2.0);
}
return square_deviations / (static_cast<vtkm::Float64>(samples.size()) - 1.0);
}
// Compute the standard deviation of the samples
vtkm::Float64 StandardDeviation(const std::vector<vtkm::Float64>& samples)
{
return vtkm::Sqrt(Variance(samples));
}
// Compute the median absolute deviation of the dataset
vtkm::Float64 MedianAbsDeviation(const std::vector<vtkm::Float64>& samples)
{
std::vector<vtkm::Float64> abs_deviations;
abs_deviations.reserve(samples.size());
const vtkm::Float64 median = PercentileValue(samples, 50.0);
for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
{
abs_deviations.push_back(vtkm::Abs(*it - median));
}
std::sort(abs_deviations.begin(), abs_deviations.end());
return PercentileValue(abs_deviations, 50.0);
}
} // stats
out.flush();
/*
* The benchmarker takes a functor to benchmark and runs it multiple times,
* printing out statistics of the run time at the end.
* The functor passed should return the run time of the thing being benchmarked
* in seconds, this lets us avoid including any per-run setup time in the benchmark.
* However any one-time setup should be done in the functor's constructor
*/
struct Benchmarker
{
std::vector<vtkm::Float64> Samples;
std::string BenchmarkName;
const vtkm::Float64 MaxRuntime;
const size_t MaxIterations;
public:
VTKM_CONT
Benchmarker(vtkm::Float64 maxRuntime = 30, std::size_t maxIterations = 100)
: MaxRuntime(maxRuntime)
, MaxIterations(maxIterations)
{
}
template <typename Functor>
VTKM_CONT void GatherSamples(Functor func)
{
this->Samples.clear();
this->BenchmarkName = func.Description();
// Do a warm-up run. If the benchmark allocates any additional memory
// eg. storage for output results, this will let it do that and
// allow us to avoid measuring the allocation time in the actual benchmark run
func();
this->Samples.reserve(this->MaxIterations);
// Run each benchmark for MAX_RUNTIME seconds or MAX_ITERATIONS iterations, whichever
// takes less time. This kind of assumes that running for 500 iterations or 30s will give
// good statistics, but if median abs dev and/or std dev are too high both these limits
// could be increased
size_t iter = 0;
for (vtkm::Float64 elapsed = 0.0; elapsed < this->MaxRuntime && iter < this->MaxIterations;
elapsed += this->Samples.back(), ++iter)
{
this->Samples.push_back(func());
}
std::sort(this->Samples.begin(), this->Samples.end());
stats::Winsorize(this->Samples, 5.0);
}
VTKM_CONT void PrintSummary(std::ostream& out = std::cout)
{
out << "Benchmark \'" << this->BenchmarkName << "\' results:\n";
if (this->Samples.empty())
{
out << "\tNo samples gathered!\n";
return;
}
out << "\tnumSamples = " << this->Samples.size() << "\n"
<< "\tmedian = " << stats::PercentileValue(this->Samples, 50.0) << "s\n"
<< "\tmedian abs dev = " << stats::MedianAbsDeviation(this->Samples) << "s\n"
<< "\tmean = " << stats::Mean(this->Samples) << "s\n"
<< "\tstd dev = " << stats::StandardDeviation(this->Samples) << "s\n"
<< "\tmin = " << this->Samples.front() << "s\n"
<< "\tmax = " << this->Samples.back() << "s\n";
}
template <typename DeviceAdapter, typename MakerFunctor, typename T>
VTKM_CONT bool operator()(DeviceAdapter id, MakerFunctor&& makerFunctor, T t)
{
auto func = makerFunctor(t, id);
std::cout << "Running '" << func.Description() << "'" << std::endl;
this->GatherSamples(func);
this->PrintSummary();
return true;
}
VTKM_CONT const std::vector<vtkm::Float64>& GetSamples() const { return this->Samples; }
VTKM_CONT void Reset()
{
this->Samples.clear();
this->BenchmarkName.clear();
}
};
template <typename MakerFunctor>
class InternalPrintTypeAndBench
// Returns the number of executed benchmarks:
static inline vtkm::Id ExecuteBenchmarks(int& argc,
char* argv[],
const std::string& preamble = std::string{})
{
MakerFunctor Maker;
public:
VTKM_CONT
InternalPrintTypeAndBench(MakerFunctor maker)
: Maker(maker)
::benchmark::Initialize(&argc, argv);
if (::benchmark::ReportUnrecognizedArguments(argc, argv))
{
return 1;
}
template <typename T>
VTKM_CONT void operator()(T t, vtkm::cont::DeviceAdapterId id) const
{
std::cout << "*** " << vtkm::testing::TypeName<T>::Name() << " on device " << id.GetName()
<< " ***************" << std::endl;
Benchmarker bench;
try
{
vtkm::cont::TryExecuteOnDevice(id, bench, Maker, t);
}
catch (std::exception& e)
{
std::cout << "\n"
<< "An exception occurring during a benchmark:\n\t" << e.what() << "\n"
<< "Attempting to continue with remaining benchmarks...\n\n";
}
}
};
VTKmConsoleReporter reporter{ preamble };
template <class MakerFunctor, class TypeList>
VTKM_CONT void BenchmarkTypes(MakerFunctor&& maker, TypeList, vtkm::cont::DeviceAdapterId id)
{
vtkm::ListForEach(
InternalPrintTypeAndBench<MakerFunctor>(std::forward<MakerFunctor>(maker)), TypeList(), id);
vtkm::cont::Timer timer;
timer.Start();
std::size_t num = ::benchmark::RunSpecifiedBenchmarks(&reporter);
timer.Stop();
reporter.GetOutputStream().flush();
reporter.GetErrorStream().flush();
reporter.GetErrorStream() << "Ran " << num << " benchmarks in " << timer.GetElapsedTime()
<< " seconds." << std::endl;
return static_cast<vtkm::Id>(num);
}
}
}
} // end namespace vtkm::bench::detail
#endif

@ -7,18 +7,24 @@
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
# Find Google Benchmark. Note that benchmark_DIR must be pointed at an
# installation, not a build directory.
find_package(benchmark)
function(add_benchmark)
set(options)
set(oneValueArgs NAME FILE)
set(multiValueArgs LIBS)
cmake_parse_arguments(VTKm_AB
"${options}" "${oneValueArgs}" "${multiValueArgs}"
${ARGN}
)
"${options}" "${oneValueArgs}" "${multiValueArgs}"
${ARGN}
)
set(exe_name ${VTKm_AB_NAME})
add_executable(${exe_name} ${VTKm_AB_FILE})
target_link_libraries(${exe_name} PRIVATE ${VTKm_AB_LIBS})
target_link_libraries(${exe_name} PRIVATE benchmark::benchmark)
vtkm_add_drop_unused_function_flags(${exe_name})
vtkm_add_target_information(${exe_name})

@ -0,0 +1,133 @@
# Updated Benchmark Framework
The benchmarking framework has been updated to use Google Benchmark.
A benchmark is now a single function, which is passed to a macro:
```
void MyBenchmark(::benchmark::State& state)
{
MyClass someClass;
// Optional: Add a descriptive label with additional benchmark details:
state.SetLabel("Blah blah blah.");
// Must use a vtkm timer to properly capture eg. CUDA execution times.
vtkm::cont::Timer timer;
for (auto _ : state)
{
someClass.Reset();
timer.Start();
someClass.DoWork();
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
// Optional: Report items and/or bytes processed per iteration in output:
state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
}
}
VTKM_BENCHMARK(MyBenchmark);
```
Google benchmark also makes it easy to implement parameter sweep benchmarks:
```
void MyParameterSweep(::benchmark::State& state)
{
// The current value in the sweep:
const vtkm::Id currentValue = state.range(0);
MyClass someClass;
someClass.SetSomeParameter(currentValue);
vtkm::cont::Timer timer;
for (auto _ : state)
{
someClass.Reset();
timer.Start();
someClass.DoWork();
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
}
VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
```
will generate and launch several benchmarks, exploring the parameter space of
`SetSomeParameter` between the values of 32 and (1024*1024). The chain of
functions calls in the second argument is applied to an instance of
::benchmark::internal::Benchmark. See Google Benchmark's documentation for
more details.
For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
accepts a function with the signature
`void Func(::benchmark::internal::Benchmark*)` that may be used to generate
more complex configurations.
To instantiate a templated benchmark across a list of types, the
VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
parameter. The templated benchmark function will be instantiated and called
for each type in the list:
```
template <typename T>
void MyBenchmark(::benchmark::State& state)
{
MyClass<T> someClass;
// Must use a vtkm timer to properly capture eg. CUDA execution times.
vtkm::cont::Timer timer;
for (auto _ : state)
{
someClass.Reset();
timer.Start();
someClass.DoWork();
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
}
}
VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
```
The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
macro that appends the contents of `some_string` to the Google Benchmark preamble.
If a benchmark is not compatible with some configuration, it may call
`state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
useful, for instance in the filter tests when the input is not compatible with the filter.
When launching a benchmark executable, the following options are supported by Google Benchmark:
- `--benchmark_list_tests`: List all available tests.
- `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
- `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
- `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
of data.
- `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
(mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
an "iteration", which is a loop of the `for(auto _:state){...}` section.
- `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
- `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
printed to the terminal. Any file output will still contain all repetition info.
- `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
(`console`) or `csv`/`json` formats.
- `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
(`console`) or `csv`/`json` formats.
- `--benchmark_out=[filename]`: Specify output file.
- `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
- `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
in the table, rather than appending them as a label.
For more information and examples of practical usage, take a look at the existing benchmarks in
vtk-m/benchmarking/.

@ -68,6 +68,13 @@ public:
{
}
};
/// A convenience function for creating an ArrayHandleIndex. It takes the
/// size of the array and generates an array holding vtkm::Id from [0, size - 1]
VTKM_CONT inline vtkm::cont::ArrayHandleIndex make_ArrayHandleIndex(vtkm::Id length)
{
return vtkm::cont::ArrayHandleIndex(length);
}
}
} // namespace vtkm::cont

@ -47,7 +47,6 @@ void DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>::Reset()
void DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>::Start()
{
VTKM_CUDA_CALL(cudaEventRecord(this->StartEvent, cudaStreamPerThread));
VTKM_CUDA_CALL(cudaEventSynchronize(this->StartEvent));
this->StartReady = true;
}

@ -22,6 +22,35 @@ namespace vtkm
namespace exec
{
namespace detail
{
// Clang-7 as host compiler under nvcc returns types from std::make_unsigned
// that are not compatible with the AtomicInterface API, so we define our own
// mapping. This must exist for every entry in vtkm::cont::AtomicArrayTypeList.
template <typename>
struct MakeUnsigned;
template <>
struct MakeUnsigned<vtkm::UInt32>
{
using type = vtkm::UInt32;
};
template <>
struct MakeUnsigned<vtkm::Int32>
{
using type = vtkm::UInt32;
};
template <>
struct MakeUnsigned<vtkm::UInt64>
{
using type = vtkm::UInt64;
};
template <>
struct MakeUnsigned<vtkm::Int64>
{
using type = vtkm::UInt64;
};
}
template <typename T, typename Device>
class AtomicArrayExecutionObject
{
@ -66,7 +95,7 @@ public:
// We only support 32/64 bit signed/unsigned ints, and AtomicInterface
// currently only provides API for unsigned types.
// We'll cast the signed types to unsigned to work around this.
using APIType = typename std::make_unsigned<ValueType>::type;
using APIType = typename detail::MakeUnsigned<ValueType>::type;
return static_cast<T>(
AtomicInterface::Load(reinterpret_cast<const APIType*>(this->Data + index)));
@ -89,7 +118,7 @@ public:
// This is safe, since the only difference between signed/unsigned types
// is how overflow works, and signed overflow is already undefined. We also
// document that overflow is undefined for this operation.
using APIType = typename std::make_unsigned<ValueType>::type;
using APIType = typename detail::MakeUnsigned<ValueType>::type;
return static_cast<T>(AtomicInterface::Add(reinterpret_cast<APIType*>(this->Data + index),
static_cast<APIType>(value)));
@ -116,7 +145,7 @@ public:
// This is safe, since the only difference between signed/unsigned types
// is how overflow works, and signed overflow is already undefined. We also
// document that overflow is undefined for this operation.
using APIType = typename std::make_unsigned<ValueType>::type;
using APIType = typename detail::MakeUnsigned<ValueType>::type;
AtomicInterface::Store(reinterpret_cast<APIType*>(this->Data + index),
static_cast<APIType>(value));
@ -169,7 +198,7 @@ public:
// We'll cast the signed types to unsigned to work around this.
// This is safe, since the only difference between signed/unsigned types
// is how overflow works, and signed overflow is already undefined.
using APIType = typename std::make_unsigned<ValueType>::type;
using APIType = typename detail::MakeUnsigned<ValueType>::type;
return static_cast<T>(
AtomicInterface::CompareAndSwap(reinterpret_cast<APIType*>(this->Data + index),