mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-10-08 03:18:58 +00:00
Port benchmarking framework to Google Benchmark.
This commit is contained in:
parent
39d981bcf9
commit
539f6e5ad7
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -8,23 +8,19 @@
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//============================================================================
|
||||
|
||||
#include <vtkm/TypeTraits.h>
|
||||
|
||||
#include "Benchmarker.h"
|
||||
|
||||
#include <vtkm/cont/Algorithm.h>
|
||||
#include <vtkm/cont/DeviceAdapter.h>
|
||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||
#include <vtkm/cont/ErrorBadAllocation.h>
|
||||
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
||||
#include <vtkm/cont/Timer.h>
|
||||
|
||||
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
|
||||
|
||||
#include <vtkm/internal/Configure.h>
|
||||
|
||||
#include <vtkm/testing/Testing.h>
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <vtkm/List.h>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#ifdef VTKM_ENABLE_TBB
|
||||
@ -34,145 +30,78 @@
|
||||
// For the TBB implementation, the number of threads can be customized using a
|
||||
// "NumThreads [numThreads]" argument.
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace benchmarking
|
||||
{
|
||||
|
||||
const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
|
||||
const vtkm::UInt64 COPY_SIZE_MAX = (1 << 29); // 512 MiB
|
||||
const vtkm::UInt64 COPY_SIZE_INC = 1; // Used as 'size <<= INC'
|
||||
|
||||
const size_t COL_WIDTH = 32;
|
||||
|
||||
template <typename ValueType, typename DeviceAdapter>
|
||||
struct MeasureCopySpeed
|
||||
{
|
||||
using Algo = vtkm::cont::Algorithm;
|
||||
|
||||
vtkm::cont::ArrayHandle<ValueType> Source;
|
||||
vtkm::cont::ArrayHandle<ValueType> Destination;
|
||||
vtkm::UInt64 NumBytes;
|
||||
|
||||
VTKM_CONT
|
||||
MeasureCopySpeed(vtkm::UInt64 bytes)
|
||||
: NumBytes(bytes)
|
||||
{
|
||||
vtkm::Id numValues = static_cast<vtkm::Id>(bytes / sizeof(ValueType));
|
||||
this->Source.Allocate(numValues);
|
||||
}
|
||||
|
||||
VTKM_CONT vtkm::Float64 operator()()
|
||||
{
|
||||
vtkm::cont::Timer timer{ DeviceAdapter() };
|
||||
timer.Start();
|
||||
Algo::Copy(this->Source, this->Destination);
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
VTKM_CONT std::string Description() const
|
||||
{
|
||||
vtkm::UInt64 actualSize = sizeof(ValueType);
|
||||
actualSize *= static_cast<vtkm::UInt64>(this->Source.GetNumberOfValues());
|
||||
std::ostringstream out;
|
||||
out << "Copying " << vtkm::cont::GetHumanReadableSize(this->NumBytes)
|
||||
<< " (actual=" << vtkm::cont::GetHumanReadableSize(actualSize) << ") of "
|
||||
<< vtkm::testing::TypeName<ValueType>::Name() << "\n";
|
||||
return out.str();
|
||||
}
|
||||
};
|
||||
|
||||
void PrintRow(std::ostream& out, const std::string& label, const std::string& data)
|
||||
{
|
||||
out << "| " << std::setw(COL_WIDTH) << label << " | " << std::setw(COL_WIDTH) << data << " |"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
void PrintDivider(std::ostream& out)
|
||||
{
|
||||
const std::string fillStr(COL_WIDTH, '-');
|
||||
|
||||
out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl;
|
||||
}
|
||||
|
||||
template <typename ValueType, typename DeviceAdapter>
|
||||
void BenchmarkValueType(vtkm::cont::DeviceAdapterId id)
|
||||
{
|
||||
PrintRow(std::cout, vtkm::testing::TypeName<ValueType>::Name(), id.GetName());
|
||||
|
||||
PrintDivider(std::cout);
|
||||
|
||||
Benchmarker bench(15, 100);
|
||||
for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC)
|
||||
{
|
||||
MeasureCopySpeed<ValueType, DeviceAdapter> functor(size);
|
||||
bench.Reset();
|
||||
|
||||
std::string speedStr;
|
||||
|
||||
try
|
||||
{
|
||||
bench.GatherSamples(functor);
|
||||
vtkm::Float64 speed = static_cast<Float64>(size) / stats::Mean(bench.GetSamples());
|
||||
speedStr = vtkm::cont::GetHumanReadableSize(static_cast<UInt64>(speed)) + std::string("/s");
|
||||
}
|
||||
catch (vtkm::cont::ErrorBadAllocation&)
|
||||
{
|
||||
speedStr = "[allocation too large]";
|
||||
}
|
||||
|
||||
PrintRow(std::cout, vtkm::cont::GetHumanReadableSize(size), speedStr);
|
||||
}
|
||||
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::benchmarking
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace vtkm::benchmarking;
|
||||
|
||||
struct BenchmarkValueTypeFunctor
|
||||
// Make this global so benchmarks can access the current device id:
|
||||
vtkm::cont::InitializeResult Config;
|
||||
|
||||
const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
|
||||
const vtkm::UInt64 COPY_SIZE_MAX = (1 << 30); // 1 GiB
|
||||
|
||||
using TypeList = vtkm::List<vtkm::UInt8,
|
||||
vtkm::Vec2ui_8,
|
||||
vtkm::Vec3ui_8,
|
||||
vtkm::Vec4ui_8,
|
||||
vtkm::UInt32,
|
||||
vtkm::Vec2ui_32,
|
||||
vtkm::UInt64,
|
||||
vtkm::Vec2ui_64,
|
||||
vtkm::Float32,
|
||||
vtkm::Vec2f_32,
|
||||
vtkm::Float64,
|
||||
vtkm::Vec2f_64,
|
||||
vtkm::Pair<vtkm::UInt32, vtkm::Float32>,
|
||||
vtkm::Pair<vtkm::UInt32, vtkm::Float64>,
|
||||
vtkm::Pair<vtkm::UInt64, vtkm::Float32>,
|
||||
vtkm::Pair<vtkm::UInt64, vtkm::Float64>>;
|
||||
|
||||
template <typename ValueType>
|
||||
void CopySpeed(benchmark::State& state)
|
||||
{
|
||||
template <typename DeviceAdapter>
|
||||
bool operator()(DeviceAdapter id)
|
||||
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
||||
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
|
||||
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
|
||||
|
||||
state.SetLabel(vtkm::cont::GetHumanReadableSize(numBytes));
|
||||
|
||||
vtkm::cont::ArrayHandle<ValueType> src;
|
||||
vtkm::cont::ArrayHandle<ValueType> dst;
|
||||
src.Allocate(numValues);
|
||||
dst.Allocate(numValues);
|
||||
|
||||
vtkm::cont::Timer timer(device);
|
||||
for (auto _ : state)
|
||||
{
|
||||
BenchmarkValueType<vtkm::UInt8, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Vec2ui_8, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Vec3ui_8, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Vec4ui_8, DeviceAdapter>(id);
|
||||
(void)_;
|
||||
timer.Start();
|
||||
vtkm::cont::Algorithm::Copy(device, src, dst);
|
||||
timer.Stop();
|
||||
|
||||
BenchmarkValueType<vtkm::UInt32, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Vec2ui_32, DeviceAdapter>(id);
|
||||
|
||||
BenchmarkValueType<vtkm::UInt64, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Vec2ui_64, DeviceAdapter>(id);
|
||||
|
||||
BenchmarkValueType<vtkm::Float32, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Vec2f_32, DeviceAdapter>(id);
|
||||
|
||||
BenchmarkValueType<vtkm::Float64, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Vec2f_64, DeviceAdapter>(id);
|
||||
|
||||
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float32>, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float64>, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float32>, DeviceAdapter>(id);
|
||||
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float64>, DeviceAdapter>(id);
|
||||
|
||||
return true;
|
||||
state.SetIterationTime(timer.GetElapsedTime());
|
||||
}
|
||||
};
|
||||
|
||||
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
||||
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
|
||||
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
|
||||
}
|
||||
VTKM_BENCHMARK_TEMPLATES_OPTS(CopySpeed,
|
||||
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
|
||||
->ArgName("Bytes"),
|
||||
TypeList);
|
||||
|
||||
} // end anon namespace
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
auto opts = vtkm::cont::InitializeOptions::RequireDevice |
|
||||
vtkm::cont::InitializeOptions::ErrorOnBadOption | vtkm::cont::InitializeOptions::AddHelp;
|
||||
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||
// Parse VTK-m options:
|
||||
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
|
||||
Config = vtkm::cont::Initialize(argc, argv, opts);
|
||||
|
||||
// Setup device:
|
||||
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
|
||||
|
||||
// Handle NumThreads command-line arg:
|
||||
#ifdef VTKM_ENABLE_TBB
|
||||
int numThreads = tbb::task_scheduler_init::automatic;
|
||||
#endif // TBB
|
||||
@ -196,6 +125,6 @@ int main(int argc, char* argv[])
|
||||
tbb::task_scheduler_init init(numThreads);
|
||||
#endif // TBB
|
||||
|
||||
BenchmarkValueTypeFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(config.Device, functor);
|
||||
// handle benchmarking related args and run benchmarks:
|
||||
VTKM_EXECUTE_BENCHMARKS(argc, argv);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -14,6 +14,7 @@
|
||||
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||
#include <vtkm/cont/Initialize.h>
|
||||
#include <vtkm/cont/Timer.h>
|
||||
#include <vtkm/cont/testing/MakeTestDataSet.h>
|
||||
|
||||
@ -31,114 +32,97 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace vtkm::benchmarking;
|
||||
namespace vtkm
|
||||
{
|
||||
namespace benchmarking
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Precision, typename DeviceAdapter>
|
||||
struct BenchRayTracing
|
||||
// Hold configuration state (e.g. active device)
|
||||
vtkm::cont::InitializeResult Config;
|
||||
|
||||
void BenchRayTracing(::benchmark::State& state)
|
||||
{
|
||||
vtkm::rendering::raytracing::RayTracer Tracer;
|
||||
vtkm::rendering::raytracing::Camera RayCamera;
|
||||
vtkm::cont::ArrayHandle<vtkm::Id4> Indices;
|
||||
vtkm::rendering::raytracing::Ray<Precision> Rays;
|
||||
vtkm::cont::CoordinateSystem Coords;
|
||||
vtkm::cont::DataSet Data;
|
||||
const vtkm::Id3 dims(128, 128, 128);
|
||||
|
||||
VTKM_CONT ~BenchRayTracing() {}
|
||||
vtkm::cont::testing::MakeTestDataSet maker;
|
||||
auto dataset = maker.Make3DUniformDataSet3(dims);
|
||||
auto coords = dataset.GetCoordinateSystem();
|
||||
|
||||
VTKM_CONT BenchRayTracing()
|
||||
vtkm::rendering::Camera camera;
|
||||
vtkm::Bounds bounds = dataset.GetCoordinateSystem().GetBounds();
|
||||
camera.ResetToBounds(bounds);
|
||||
|
||||
vtkm::cont::DynamicCellSet cellset = dataset.GetCellSet();
|
||||
|
||||
vtkm::rendering::raytracing::TriangleExtractor triExtractor;
|
||||
triExtractor.ExtractCells(cellset);
|
||||
|
||||
auto triIntersector = std::make_shared<vtkm::rendering::raytracing::TriangleIntersector>(
|
||||
vtkm::rendering::raytracing::TriangleIntersector());
|
||||
|
||||
vtkm::rendering::raytracing::RayTracer tracer;
|
||||
triIntersector->SetData(coords, triExtractor.GetTriangles());
|
||||
tracer.AddShapeIntersector(triIntersector);
|
||||
|
||||
vtkm::rendering::CanvasRayTracer canvas(1920, 1080);
|
||||
vtkm::rendering::raytracing::Camera rayCamera;
|
||||
rayCamera.SetParameters(camera, canvas);
|
||||
vtkm::rendering::raytracing::Ray<vtkm::Float32> rays;
|
||||
rayCamera.CreateRays(rays, coords.GetBounds());
|
||||
|
||||
rays.Buffers.at(0).InitConst(0.f);
|
||||
|
||||
vtkm::cont::Field field = dataset.GetField("pointvar");
|
||||
vtkm::Range range = field.GetRange().GetPortalConstControl().Get(0);
|
||||
|
||||
tracer.SetField(field, range);
|
||||
|
||||
vtkm::cont::ArrayHandle<vtkm::Vec4ui_8> temp;
|
||||
vtkm::cont::ColorTable table("cool to warm");
|
||||
table.Sample(100, temp);
|
||||
|
||||
vtkm::cont::ArrayHandle<vtkm::Vec4f_32> colors;
|
||||
colors.Allocate(100);
|
||||
auto portal = colors.GetPortalControl();
|
||||
auto colorPortal = temp.GetPortalConstControl();
|
||||
constexpr vtkm::Float32 conversionToFloatSpace = (1.0f / 255.0f);
|
||||
for (vtkm::Id i = 0; i < 100; ++i)
|
||||
{
|
||||
vtkm::Id3 dims(128, 128, 128);
|
||||
vtkm::cont::testing::MakeTestDataSet maker;
|
||||
Data = maker.Make3DUniformDataSet3(dims);
|
||||
Coords = Data.GetCoordinateSystem();
|
||||
|
||||
vtkm::rendering::Camera camera;
|
||||
vtkm::Bounds bounds = Data.GetCoordinateSystem().GetBounds();
|
||||
camera.ResetToBounds(bounds);
|
||||
|
||||
vtkm::cont::DynamicCellSet cellset = Data.GetCellSet();
|
||||
|
||||
vtkm::rendering::raytracing::TriangleExtractor triExtractor;
|
||||
triExtractor.ExtractCells(cellset);
|
||||
|
||||
auto triIntersector = std::make_shared<vtkm::rendering::raytracing::TriangleIntersector>(
|
||||
vtkm::rendering::raytracing::TriangleIntersector());
|
||||
|
||||
triIntersector->SetData(Coords, triExtractor.GetTriangles());
|
||||
Tracer.AddShapeIntersector(triIntersector);
|
||||
|
||||
vtkm::rendering::CanvasRayTracer canvas(1920, 1080);
|
||||
RayCamera.SetParameters(camera, canvas);
|
||||
RayCamera.CreateRays(Rays, Coords.GetBounds());
|
||||
|
||||
Rays.Buffers.at(0).InitConst(0.f);
|
||||
|
||||
vtkm::cont::Field field = Data.GetField("pointvar");
|
||||
vtkm::Range range = field.GetRange().GetPortalConstControl().Get(0);
|
||||
|
||||
Tracer.SetField(field, range);
|
||||
|
||||
vtkm::cont::ArrayHandle<vtkm::Vec4ui_8> temp;
|
||||
vtkm::cont::ColorTable table("cool to warm");
|
||||
table.Sample(100, temp);
|
||||
|
||||
vtkm::cont::ArrayHandle<vtkm::Vec4f_32> colors;
|
||||
colors.Allocate(100);
|
||||
auto portal = colors.GetPortalControl();
|
||||
auto colorPortal = temp.GetPortalConstControl();
|
||||
constexpr vtkm::Float32 conversionToFloatSpace = (1.0f / 255.0f);
|
||||
for (vtkm::Id i = 0; i < 100; ++i)
|
||||
{
|
||||
auto color = colorPortal.Get(i);
|
||||
vtkm::Vec4f_32 t(color[0] * conversionToFloatSpace,
|
||||
color[1] * conversionToFloatSpace,
|
||||
color[2] * conversionToFloatSpace,
|
||||
color[3] * conversionToFloatSpace);
|
||||
portal.Set(i, t);
|
||||
}
|
||||
|
||||
Tracer.SetColorMap(colors);
|
||||
Tracer.Render(Rays);
|
||||
auto color = colorPortal.Get(i);
|
||||
vtkm::Vec4f_32 t(color[0] * conversionToFloatSpace,
|
||||
color[1] * conversionToFloatSpace,
|
||||
color[2] * conversionToFloatSpace,
|
||||
color[3] * conversionToFloatSpace);
|
||||
portal.Set(i, t);
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
tracer.SetColorMap(colors);
|
||||
tracer.Render(rays);
|
||||
|
||||
vtkm::cont::Timer timer{ Config.Device };
|
||||
for (auto _ : state)
|
||||
{
|
||||
vtkm::cont::Timer timer{ DeviceAdapter() };
|
||||
(void)_;
|
||||
timer.Start();
|
||||
rayCamera.CreateRays(rays, coords.GetBounds());
|
||||
tracer.Render(rays);
|
||||
timer.Stop();
|
||||
|
||||
RayCamera.CreateRays(Rays, Coords.GetBounds());
|
||||
try
|
||||
{
|
||||
Tracer.Render(Rays);
|
||||
}
|
||||
catch (vtkm::cont::ErrorBadValue& e)
|
||||
{
|
||||
std::cout << "exception " << e.what() << "\n";
|
||||
}
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
state.SetIterationTime(timer.GetElapsedTime());
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
std::string Description() const { return "A ray tracing benchmark"; }
|
||||
};
|
||||
|
||||
VTKM_MAKE_BENCHMARK(RayTracing, BenchRayTracing);
|
||||
}
|
||||
} // end namespace vtkm::benchmarking
|
||||
|
||||
VTKM_BENCHMARK(BenchRayTracing);
|
||||
|
||||
} // end namespace vtkm::benchmarking
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
auto opts =
|
||||
vtkm::cont::InitializeOptions::DefaultAnyDevice | vtkm::cont::InitializeOptions::Strict;
|
||||
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||
// Parse VTK-m options:
|
||||
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
|
||||
Config = vtkm::cont::Initialize(argc, argv, opts);
|
||||
|
||||
VTKM_RUN_BENCHMARK(RayTracing, vtkm::List<vtkm::Float32>(), config.Device);
|
||||
return 0;
|
||||
// Setup device:
|
||||
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
|
||||
|
||||
// handle benchmarking related args and run benchmarks:
|
||||
VTKM_EXECUTE_BENCHMARKS(argc, argv);
|
||||
}
|
||||
|
@ -7,40 +7,37 @@
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//============================================================================
|
||||
|
||||
#include "Benchmarker.h"
|
||||
|
||||
#include <vtkm/Math.h>
|
||||
#include <vtkm/VectorAnalysis.h>
|
||||
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/CellSetStructured.h>
|
||||
#include <vtkm/cont/Invoker.h>
|
||||
#include <vtkm/cont/Timer.h>
|
||||
|
||||
#include <vtkm/worklet/DispatcherMapField.h>
|
||||
#include <vtkm/worklet/DispatcherMapTopology.h>
|
||||
#include <vtkm/worklet/WorkletMapField.h>
|
||||
#include <vtkm/worklet/WorkletMapTopology.h>
|
||||
|
||||
#include "Benchmarker.h"
|
||||
#include <vtkm/cont/testing/Testing.h>
|
||||
|
||||
#include <cctype>
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace benchmarking
|
||||
namespace
|
||||
{
|
||||
|
||||
#define CUBE_SIZE 256
|
||||
static const std::string DIVIDER(40, '-');
|
||||
|
||||
enum BenchmarkName
|
||||
{
|
||||
CELL_TO_POINT = 1 << 1,
|
||||
POINT_TO_CELL = 1 << 2,
|
||||
MC_CLASSIFY = 1 << 3,
|
||||
ALL = CELL_TO_POINT | POINT_TO_CELL | MC_CLASSIFY
|
||||
};
|
||||
using ValueTypes = vtkm::List<vtkm::UInt32, vtkm::Int32, vtkm::Int64, vtkm::Float32, vtkm::Float64>;
|
||||
|
||||
using ValueVariantHandle = vtkm::cont::VariantArrayHandleBase<ValueTypes>;
|
||||
|
||||
// Hold configuration state (e.g. active device)
|
||||
vtkm::cont::InitializeResult Config;
|
||||
|
||||
class AveragePointToCell : public vtkm::worklet::WorkletVisitCellsWithPoints
|
||||
{
|
||||
@ -118,373 +115,277 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
using ValueTypes = vtkm::List<vtkm::UInt32, vtkm::Int32, vtkm::Int64, vtkm::Float32, vtkm::Float64>;
|
||||
|
||||
/// This class runs a series of micro-benchmarks to measure
|
||||
/// performance of different field operations
|
||||
class BenchmarkTopologyAlgorithms
|
||||
template <typename T, typename Enable = void>
|
||||
struct NumberGenerator
|
||||
{
|
||||
using StorageTag = vtkm::cont::StorageTagBasic;
|
||||
};
|
||||
|
||||
using Timer = vtkm::cont::Timer;
|
||||
|
||||
using ValueVariantHandle = vtkm::cont::VariantArrayHandleBase<ValueTypes>;
|
||||
|
||||
private:
|
||||
template <typename T, typename Enable = void>
|
||||
struct NumberGenerator
|
||||
template <typename T>
|
||||
struct NumberGenerator<T, typename std::enable_if<std::is_floating_point<T>::value>::type>
|
||||
{
|
||||
std::mt19937 rng;
|
||||
std::uniform_real_distribution<T> distribution;
|
||||
NumberGenerator(T low, T high)
|
||||
: rng()
|
||||
, distribution(low, high)
|
||||
{
|
||||
};
|
||||
}
|
||||
T next() { return distribution(rng); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct NumberGenerator<T, typename std::enable_if<std::is_floating_point<T>::value>::type>
|
||||
template <typename T>
|
||||
struct NumberGenerator<T, typename std::enable_if<!std::is_floating_point<T>::value>::type>
|
||||
{
|
||||
std::mt19937 rng;
|
||||
std::uniform_int_distribution<T> distribution;
|
||||
|
||||
NumberGenerator(T low, T high)
|
||||
: rng()
|
||||
, distribution(low, high)
|
||||
{
|
||||
std::mt19937 rng;
|
||||
std::uniform_real_distribution<T> distribution;
|
||||
NumberGenerator(T low, T high)
|
||||
: rng()
|
||||
, distribution(low, high)
|
||||
{
|
||||
}
|
||||
T next() { return distribution(rng); }
|
||||
};
|
||||
}
|
||||
T next() { return distribution(rng); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct NumberGenerator<T, typename std::enable_if<!std::is_floating_point<T>::value>::type>
|
||||
// Returns an extra random value.
|
||||
// Like, an additional random value.
|
||||
// Not a random value that's somehow "extra random".
|
||||
template <typename ArrayT>
|
||||
VTKM_CONT typename ArrayT::ValueType FillRandomValues(ArrayT& array,
|
||||
vtkm::Id size,
|
||||
vtkm::Float64 min,
|
||||
vtkm::Float64 max)
|
||||
{
|
||||
using ValueType = typename ArrayT::ValueType;
|
||||
|
||||
NumberGenerator<ValueType> generator{ static_cast<ValueType>(min), static_cast<ValueType>(max) };
|
||||
array.Allocate(size);
|
||||
auto portal = array.GetPortalControl();
|
||||
for (vtkm::Id i = 0; i < size; ++i)
|
||||
{
|
||||
std::mt19937 rng;
|
||||
std::uniform_int_distribution<T> distribution;
|
||||
portal.Set(i, generator.next());
|
||||
}
|
||||
return generator.next();
|
||||
}
|
||||
|
||||
NumberGenerator(T low, T high)
|
||||
: rng()
|
||||
, distribution(low, high)
|
||||
{
|
||||
}
|
||||
T next() { return distribution(rng); }
|
||||
};
|
||||
template <typename Value>
|
||||
struct BenchCellToPointAvgImpl
|
||||
{
|
||||
vtkm::cont::ArrayHandle<Value> Input;
|
||||
|
||||
template <typename Value, typename DeviceAdapter>
|
||||
struct BenchCellToPointAvg
|
||||
::benchmark::State& State;
|
||||
vtkm::Id CubeSize;
|
||||
vtkm::Id NumCells;
|
||||
|
||||
vtkm::cont::Timer Timer;
|
||||
vtkm::cont::Invoker Invoker;
|
||||
|
||||
VTKM_CONT
|
||||
BenchCellToPointAvgImpl(::benchmark::State& state)
|
||||
: State{ state }
|
||||
, CubeSize{ CUBE_SIZE }
|
||||
, NumCells{ (this->CubeSize - 1) * (this->CubeSize - 1) * (this->CubeSize - 1) }
|
||||
, Timer{ Config.Device }
|
||||
, Invoker{ Config.Device }
|
||||
{
|
||||
std::vector<Value> input;
|
||||
vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
|
||||
std::size_t DomainSize;
|
||||
FillRandomValues(this->Input, this->NumCells, 1., 100.);
|
||||
|
||||
VTKM_CONT
|
||||
BenchCellToPointAvg()
|
||||
{
|
||||
NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
|
||||
//cube size is points in each dim
|
||||
this->DomainSize = (CUBE_SIZE - 1) * (CUBE_SIZE - 1) * (CUBE_SIZE - 1);
|
||||
this->input.resize(DomainSize);
|
||||
for (std::size_t i = 0; i < DomainSize; ++i)
|
||||
{
|
||||
this->input[i] = generator.next();
|
||||
}
|
||||
this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
|
||||
{ // Configure label:
|
||||
std::ostringstream desc;
|
||||
desc << "CubeSize:" << this->CubeSize;
|
||||
this->State.SetLabel(desc.str());
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||
|
||||
Timer timer{ DeviceAdapter() };
|
||||
timer.Start();
|
||||
|
||||
vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
|
||||
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
virtual std::string Type() const { return std::string("Static"); }
|
||||
|
||||
VTKM_CONT
|
||||
std::string Description() const
|
||||
{
|
||||
|
||||
std::stringstream description;
|
||||
description << "Computing Cell To Point Average "
|
||||
<< "[" << this->Type() << "] "
|
||||
<< "with a domain size of: " << this->DomainSize;
|
||||
return description.str();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Value, typename DeviceAdapter>
|
||||
struct BenchCellToPointAvgDynamic : public BenchCellToPointAvg<Value, DeviceAdapter>
|
||||
template <typename BenchArrayType>
|
||||
VTKM_CONT void Run(const BenchArrayType& input)
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
|
||||
vtkm::cont::ArrayHandle<Value> result;
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
for (auto _ : this->State)
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||
(void)_;
|
||||
this->Timer.Start();
|
||||
this->Invoker(AverageCellToPoint{}, input, cellSet, result);
|
||||
this->Timer.Stop();
|
||||
|
||||
ValueVariantHandle dinput(this->InputHandle);
|
||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||
|
||||
Timer timer{ DeviceAdapter() };
|
||||
timer.Start();
|
||||
|
||||
vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
|
||||
dispatcher.Invoke(dinput, cellSet, result);
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
this->State.SetIterationTime(this->Timer.GetElapsedTime());
|
||||
}
|
||||
|
||||
virtual std::string Type() const { return std::string("Dynamic"); }
|
||||
};
|
||||
|
||||
VTKM_MAKE_BENCHMARK(CellToPointAvg, BenchCellToPointAvg);
|
||||
VTKM_MAKE_BENCHMARK(CellToPointAvgDynamic, BenchCellToPointAvgDynamic);
|
||||
|
||||
template <typename Value, typename DeviceAdapter>
|
||||
struct BenchPointToCellAvg
|
||||
{
|
||||
std::vector<Value> input;
|
||||
vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
|
||||
std::size_t DomainSize;
|
||||
|
||||
VTKM_CONT
|
||||
BenchPointToCellAvg()
|
||||
{
|
||||
NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
|
||||
|
||||
this->DomainSize = (CUBE_SIZE) * (CUBE_SIZE) * (CUBE_SIZE);
|
||||
this->input.resize(DomainSize);
|
||||
for (std::size_t i = 0; i < DomainSize; ++i)
|
||||
{
|
||||
this->input[i] = generator.next();
|
||||
}
|
||||
this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||
|
||||
Timer timer{ DeviceAdapter() };
|
||||
timer.Start();
|
||||
|
||||
vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
|
||||
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
virtual std::string Type() const { return std::string("Static"); }
|
||||
|
||||
VTKM_CONT
|
||||
std::string Description() const
|
||||
{
|
||||
|
||||
std::stringstream description;
|
||||
description << "Computing Point To Cell Average "
|
||||
<< "[" << this->Type() << "] "
|
||||
<< "with a domain size of: " << this->DomainSize;
|
||||
return description.str();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Value, typename DeviceAdapter>
|
||||
struct BenchPointToCellAvgDynamic : public BenchPointToCellAvg<Value, DeviceAdapter>
|
||||
{
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||
|
||||
ValueVariantHandle dinput(this->InputHandle);
|
||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||
|
||||
Timer timer{ DeviceAdapter() };
|
||||
timer.Start();
|
||||
|
||||
vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
|
||||
dispatcher.Invoke(dinput, cellSet, result);
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
virtual std::string Type() const { return std::string("Dynamic"); }
|
||||
};
|
||||
|
||||
VTKM_MAKE_BENCHMARK(PointToCellAvg, BenchPointToCellAvg);
|
||||
VTKM_MAKE_BENCHMARK(PointToCellAvgDynamic, BenchPointToCellAvgDynamic);
|
||||
|
||||
template <typename Value, typename DeviceAdapter>
|
||||
struct BenchClassification
|
||||
{
|
||||
std::vector<Value> input;
|
||||
vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
|
||||
Value IsoValue;
|
||||
size_t DomainSize;
|
||||
|
||||
VTKM_CONT
|
||||
BenchClassification()
|
||||
{
|
||||
NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
|
||||
|
||||
this->DomainSize = (CUBE_SIZE) * (CUBE_SIZE) * (CUBE_SIZE);
|
||||
this->input.resize(DomainSize);
|
||||
for (std::size_t i = 0; i < DomainSize; ++i)
|
||||
{
|
||||
this->input[i] = generator.next();
|
||||
}
|
||||
this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
|
||||
this->IsoValue = generator.next();
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||
vtkm::cont::ArrayHandle<vtkm::IdComponent, StorageTag> result;
|
||||
|
||||
ValueVariantHandle dinput(this->InputHandle);
|
||||
|
||||
Timer timer{ DeviceAdapter() };
|
||||
timer.Start();
|
||||
|
||||
Classification<Value> worklet(this->IsoValue);
|
||||
vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
|
||||
dispatcher.Invoke(dinput, cellSet, result);
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
virtual std::string Type() const { return std::string("Static"); }
|
||||
|
||||
VTKM_CONT
|
||||
std::string Description() const
|
||||
{
|
||||
|
||||
std::stringstream description;
|
||||
description << "Computing Marching Cubes Classification "
|
||||
<< "[" << this->Type() << "] "
|
||||
<< "with a domain size of: " << this->DomainSize;
|
||||
return description.str();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Value, typename DeviceAdapter>
|
||||
struct BenchClassificationDynamic : public BenchClassification<Value, DeviceAdapter>
|
||||
{
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||
vtkm::cont::ArrayHandle<vtkm::IdComponent, StorageTag> result;
|
||||
|
||||
Timer timer{ DeviceAdapter() };
|
||||
timer.Start();
|
||||
|
||||
Classification<Value> worklet(this->IsoValue);
|
||||
vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
|
||||
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
||||
|
||||
timer.Stop();
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
virtual std::string Type() const { return std::string("Dynamic"); }
|
||||
};
|
||||
|
||||
VTKM_MAKE_BENCHMARK(Classification, BenchClassification);
|
||||
VTKM_MAKE_BENCHMARK(ClassificationDynamic, BenchClassificationDynamic);
|
||||
|
||||
public:
|
||||
static VTKM_CONT int Run(int benchmarks, vtkm::cont::DeviceAdapterId id)
|
||||
{
|
||||
std::cout << DIVIDER << "\nRunning Topology Algorithm benchmarks\n";
|
||||
|
||||
if (benchmarks & CELL_TO_POINT)
|
||||
{
|
||||
std::cout << DIVIDER << "\nBenchmarking Cell To Point Average\n";
|
||||
VTKM_RUN_BENCHMARK(CellToPointAvg, ValueTypes(), id);
|
||||
VTKM_RUN_BENCHMARK(CellToPointAvgDynamic, ValueTypes(), id);
|
||||
}
|
||||
|
||||
if (benchmarks & POINT_TO_CELL)
|
||||
{
|
||||
std::cout << DIVIDER << "\nBenchmarking Point to Cell Average\n";
|
||||
VTKM_RUN_BENCHMARK(PointToCellAvg, ValueTypes(), id);
|
||||
VTKM_RUN_BENCHMARK(PointToCellAvgDynamic, ValueTypes(), id);
|
||||
}
|
||||
|
||||
if (benchmarks & MC_CLASSIFY)
|
||||
{
|
||||
std::cout << DIVIDER << "\nBenchmarking Hex/Voxel MC Classification\n";
|
||||
VTKM_RUN_BENCHMARK(Classification, ValueTypes(), id);
|
||||
VTKM_RUN_BENCHMARK(ClassificationDynamic, ValueTypes(), id);
|
||||
}
|
||||
|
||||
return 0;
|
||||
// #items = #points
|
||||
const int64_t iterations = static_cast<int64_t>(this->State.iterations());
|
||||
this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfPoints()) * iterations);
|
||||
}
|
||||
};
|
||||
|
||||
#undef ARRAY_SIZE
|
||||
}
|
||||
} // namespace vtkm::benchmarking
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
template <typename ValueType>
|
||||
void BenchCellToPointAvgStatic(::benchmark::State& state)
|
||||
{
|
||||
auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
|
||||
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||
BenchCellToPointAvgImpl<ValueType> impl{ state };
|
||||
impl.Run(impl.Input);
|
||||
};
|
||||
VTKM_BENCHMARK_TEMPLATES(BenchCellToPointAvgStatic, ValueTypes);
|
||||
|
||||
int benchmarks = 0;
|
||||
if (argc <= 1)
|
||||
template <typename ValueType>
|
||||
void BenchCellToPointAvgDynamic(::benchmark::State& state)
|
||||
{
|
||||
BenchCellToPointAvgImpl<ValueType> impl{ state };
|
||||
impl.Run(ValueVariantHandle{ impl.Input });
|
||||
};
|
||||
VTKM_BENCHMARK_TEMPLATES(BenchCellToPointAvgDynamic, ValueTypes);
|
||||
|
||||
template <typename Value>
|
||||
struct BenchPointToCellAvgImpl
|
||||
{
|
||||
vtkm::cont::ArrayHandle<Value> Input;
|
||||
|
||||
::benchmark::State& State;
|
||||
vtkm::Id CubeSize;
|
||||
vtkm::Id NumPoints;
|
||||
|
||||
vtkm::cont::Timer Timer;
|
||||
vtkm::cont::Invoker Invoker;
|
||||
|
||||
VTKM_CONT
|
||||
BenchPointToCellAvgImpl(::benchmark::State& state)
|
||||
: State{ state }
|
||||
, CubeSize{ CUBE_SIZE }
|
||||
, NumPoints{ (this->CubeSize) * (this->CubeSize) * (this->CubeSize) }
|
||||
, Timer{ Config.Device }
|
||||
, Invoker{ Config.Device }
|
||||
{
|
||||
benchmarks = vtkm::benchmarking::ALL;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 1; i < argc; ++i)
|
||||
{
|
||||
std::string arg = argv[i];
|
||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
});
|
||||
if (arg == "celltopoint")
|
||||
{
|
||||
benchmarks |= vtkm::benchmarking::CELL_TO_POINT;
|
||||
}
|
||||
else if (arg == "pointtocell")
|
||||
{
|
||||
benchmarks |= vtkm::benchmarking::POINT_TO_CELL;
|
||||
}
|
||||
else if (arg == "classify")
|
||||
{
|
||||
benchmarks |= vtkm::benchmarking::MC_CLASSIFY;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "Unrecognized benchmark: " << argv[i] << std::endl;
|
||||
std::cerr << "USAGE: " << argv[0] << " [options] [<benchmarks>]" << std::endl;
|
||||
std::cerr << "Options are: " << std::endl;
|
||||
std::cerr << config.Usage << std::endl;
|
||||
std::cerr << "Benchmarks are one or more of the following:" << std::endl;
|
||||
std::cerr << " CellToPoint\tFind average of point data on each cell" << std::endl;
|
||||
std::cerr << " PointToCell\tFind average of cell data on each point" << std::endl;
|
||||
std::cerr << " Classify\tFind Marching Cube case of each cell" << std::endl;
|
||||
std::cerr << "If no benchmarks are specified, all are run." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
FillRandomValues(this->Input, this->NumPoints, 1., 100.);
|
||||
|
||||
{ // Configure label:
|
||||
std::ostringstream desc;
|
||||
desc << "CubeSize:" << this->CubeSize;
|
||||
this->State.SetLabel(desc.str());
|
||||
}
|
||||
}
|
||||
|
||||
//now actually execute the benchmarks
|
||||
template <typename BenchArrayType>
|
||||
VTKM_CONT void Run(const BenchArrayType& input)
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
|
||||
vtkm::cont::ArrayHandle<Value> result;
|
||||
|
||||
return vtkm::benchmarking::BenchmarkTopologyAlgorithms::Run(benchmarks, config.Device);
|
||||
for (auto _ : this->State)
|
||||
{
|
||||
(void)_;
|
||||
this->Timer.Start();
|
||||
this->Invoker(AveragePointToCell{}, input, cellSet, result);
|
||||
this->Timer.Stop();
|
||||
|
||||
this->State.SetIterationTime(this->Timer.GetElapsedTime());
|
||||
}
|
||||
|
||||
// #items = #cells
|
||||
const int64_t iterations = static_cast<int64_t>(this->State.iterations());
|
||||
this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfCells()) * iterations);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ValueType>
|
||||
void BenchPointToCellAvgStatic(::benchmark::State& state)
|
||||
{
|
||||
BenchPointToCellAvgImpl<ValueType> impl{ state };
|
||||
impl.Run(impl.Input);
|
||||
};
|
||||
VTKM_BENCHMARK_TEMPLATES(BenchPointToCellAvgStatic, ValueTypes);
|
||||
|
||||
template <typename ValueType>
|
||||
void BenchPointToCellAvgDynamic(::benchmark::State& state)
|
||||
{
|
||||
BenchPointToCellAvgImpl<ValueType> impl{ state };
|
||||
impl.Run(ValueVariantHandle{ impl.Input });
|
||||
};
|
||||
VTKM_BENCHMARK_TEMPLATES(BenchPointToCellAvgDynamic, ValueTypes);
|
||||
|
||||
template <typename Value>
|
||||
struct BenchClassificationImpl
|
||||
{
|
||||
vtkm::cont::ArrayHandle<Value> Input;
|
||||
|
||||
::benchmark::State& State;
|
||||
vtkm::Id CubeSize;
|
||||
vtkm::Id DomainSize;
|
||||
Value IsoValue;
|
||||
|
||||
vtkm::cont::Timer Timer;
|
||||
vtkm::cont::Invoker Invoker;
|
||||
|
||||
VTKM_CONT
|
||||
BenchClassificationImpl(::benchmark::State& state)
|
||||
: State{ state }
|
||||
, CubeSize{ CUBE_SIZE }
|
||||
, DomainSize{ this->CubeSize * this->CubeSize * this->CubeSize }
|
||||
, Timer{ Config.Device }
|
||||
, Invoker{ Config.Device }
|
||||
{
|
||||
this->IsoValue = FillRandomValues(this->Input, this->DomainSize, 1., 100.);
|
||||
|
||||
{ // Configure label:
|
||||
std::ostringstream desc;
|
||||
desc << "CubeSize:" << this->CubeSize;
|
||||
this->State.SetLabel(desc.str());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename BenchArrayType>
|
||||
VTKM_CONT void Run(const BenchArrayType& input)
|
||||
{
|
||||
vtkm::cont::CellSetStructured<3> cellSet;
|
||||
cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
|
||||
vtkm::cont::ArrayHandle<vtkm::IdComponent> result;
|
||||
|
||||
Classification<Value> worklet(this->IsoValue);
|
||||
|
||||
for (auto _ : this->State)
|
||||
{
|
||||
(void)_;
|
||||
this->Timer.Start();
|
||||
this->Invoker(worklet, input, cellSet, result);
|
||||
this->Timer.Stop();
|
||||
|
||||
this->State.SetIterationTime(this->Timer.GetElapsedTime());
|
||||
}
|
||||
|
||||
// #items = #cells
|
||||
const int64_t iterations = static_cast<int64_t>(this->State.iterations());
|
||||
this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfCells()) * iterations);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ValueType>
|
||||
void BenchClassificationStatic(::benchmark::State& state)
|
||||
{
|
||||
BenchClassificationImpl<ValueType> impl{ state };
|
||||
impl.Run(impl.Input);
|
||||
};
|
||||
VTKM_BENCHMARK_TEMPLATES(BenchClassificationStatic, ValueTypes);
|
||||
|
||||
template <typename ValueType>
|
||||
void BenchClassificationDynamic(::benchmark::State& state)
|
||||
{
|
||||
BenchClassificationImpl<ValueType> impl{ state };
|
||||
impl.Run(ValueVariantHandle{ impl.Input });
|
||||
};
|
||||
VTKM_BENCHMARK_TEMPLATES(BenchClassificationDynamic, ValueTypes);
|
||||
|
||||
} // end anon namespace
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// Parse VTK-m options:
|
||||
auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
|
||||
Config = vtkm::cont::Initialize(argc, argv, opts);
|
||||
|
||||
// Setup device:
|
||||
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
|
||||
|
||||
// handle benchmarking related args and run benchmarks:
|
||||
VTKM_EXECUTE_BENCHMARKS(argc, argv);
|
||||
}
|
||||
|
@ -11,330 +11,385 @@
|
||||
#ifndef vtk_m_benchmarking_Benchmarker_h
|
||||
#define vtk_m_benchmarking_Benchmarker_h
|
||||
|
||||
#include <vtkm/Math.h>
|
||||
#include <vtkm/cont/DeviceAdapterTag.h>
|
||||
#include <vtkm/cont/TryExecute.h>
|
||||
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
||||
#include <vtkm/cont/Timer.h>
|
||||
|
||||
#include <vtkm/cont/testing/Testing.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <vtkm/internal/brigand.hpp>
|
||||
|
||||
/*
|
||||
* Writing a Benchmark
|
||||
* -------------------
|
||||
* To write a benchmark you must provide a functor that will run the operations
|
||||
* you want to time and return the run time of those operations using the timer
|
||||
* for the device. The benchmark should also be templated on the value type being
|
||||
* operated on. Then use VTKM_MAKE_BENCHMARK to generate a maker functor and
|
||||
* VTKM_RUN_BENCHMARK to run the benchmark on a list of types.
|
||||
*
|
||||
* For Example:
|
||||
*
|
||||
* template<typename Value>
|
||||
* struct BenchSilly {
|
||||
* // Setup anything that doesn't need to change per run in the constructor
|
||||
* VTKM_CONT BenchSilly(){}
|
||||
*
|
||||
* // The overloaded call operator will run the operations being timed and
|
||||
* // return the execution time
|
||||
* VTKM_CONT
|
||||
* vtkm::Float64 operator()(){
|
||||
* return 0.05;
|
||||
* }
|
||||
*
|
||||
* // The benchmark must also provide a method describing itself, this is
|
||||
* // used when printing out run time statistics
|
||||
* VTKM_CONT
|
||||
* std::string Description() const {
|
||||
* return "A silly benchmark";
|
||||
* }
|
||||
* };
|
||||
*
|
||||
* // Now use the VTKM_MAKE_BENCHMARK macro to generate a maker functor for
|
||||
* // your benchmark. This lets us generate the benchmark functor for each type
|
||||
* // we want to test
|
||||
* VTKM_MAKE_BENCHMARK(Silly, BenchSilly);
|
||||
*
|
||||
* // You can also optionally pass arguments to the constructor like so:
|
||||
* // VTKM_MAKE_BENCHMARK(Blah, BenchBlah, 1, 2, 3);
|
||||
* // Note that benchmark names (the first argument) must be unique so different
|
||||
* // parameters to the constructor should have different names
|
||||
*
|
||||
* // We can now run our benchmark using VTKM_RUN_BENCHMARK, passing the
|
||||
* // benchmark name and type list to run on
|
||||
* int main(int, char**){
|
||||
* VTKM_RUN_BENCHMARK(Silly, vtkm::List<vtkm::Float32>());
|
||||
* return 0;
|
||||
* }
|
||||
*
|
||||
* Check out vtkm/benchmarking/BenchmarkDeviceAdapter.h for some example usage
|
||||
*/
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
/*
|
||||
* Use the VTKM_MAKE_BENCHMARK macro to define a maker functor for your benchmark.
|
||||
* This is used to allow you to template the benchmark functor on the type being benchmarked
|
||||
* and the device adapter so you can write init code in the constructor. Then the maker will
|
||||
* return a constructed instance of your benchmark for the type being benchmarked.
|
||||
* The VA_ARGS are used to pass any extra arguments needed by your benchmark
|
||||
*/
|
||||
#define VTKM_MAKE_BENCHMARK(Name, Bench, ...) \
|
||||
struct MakeBench##Name \
|
||||
{ \
|
||||
template <typename Value, typename DeviceAdapter> \
|
||||
VTKM_CONT Bench<Value, DeviceAdapter> operator()(const Value vtkmNotUsed(v), \
|
||||
DeviceAdapter vtkmNotUsed(id)) const \
|
||||
{ \
|
||||
return Bench<Value, DeviceAdapter>(__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
#include <ostream>
|
||||
|
||||
/*
|
||||
* Use the VTKM_RUN_BENCHMARK macro to run your benchmark on the type list passed.
|
||||
* You must have previously defined a maker functor with VTKM_MAKE_BENCHMARK that this
|
||||
* macro will look for and use
|
||||
*/
|
||||
#define VTKM_RUN_BENCHMARK(Name, Types, Id) \
|
||||
vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types), (Id))
|
||||
/// \file Benchmarker.h
|
||||
/// \brief Benchmarking utilities
|
||||
///
|
||||
/// VTK-m's benchmarking framework is built on top of Google Benchmark.
|
||||
///
|
||||
/// A benchmark is now a single function, which is passed to a macro:
|
||||
///
|
||||
/// ```
|
||||
/// void MyBenchmark(::benchmark::State& state)
|
||||
/// {
|
||||
/// MyClass someClass;
|
||||
///
|
||||
/// // Optional: Add a descriptive label with additional benchmark details:
|
||||
/// state.SetLabel("Blah blah blah.");
|
||||
///
|
||||
/// // Must use a vtkm timer to properly capture eg. CUDA execution times.
|
||||
/// vtkm::cont::Timer timer;
|
||||
/// for (auto _ : state)
|
||||
/// {
|
||||
/// someClass.Reset();
|
||||
///
|
||||
/// timer.Start();
|
||||
/// someClass.DoWork();
|
||||
/// timer.Stop();
|
||||
///
|
||||
/// state.SetIterationTime(timer.GetElapsedTime());
|
||||
/// }
|
||||
///
|
||||
/// // Optional: Report items and/or bytes processed per iteration in output:
|
||||
/// state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
|
||||
/// state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
|
||||
/// }
|
||||
/// }
|
||||
/// VTKM_BENCHMARK(MyBenchmark);
|
||||
/// ```
|
||||
///
|
||||
/// Google benchmark also makes it easy to implement parameter sweep benchmarks:
|
||||
///
|
||||
/// ```
|
||||
/// void MyParameterSweep(::benchmark::State& state)
|
||||
/// {
|
||||
/// // The current value in the sweep:
|
||||
/// const vtkm::Id currentValue = state.range(0);
|
||||
///
|
||||
/// MyClass someClass;
|
||||
/// someClass.SetSomeParameter(currentValue);
|
||||
///
|
||||
/// vtkm::cont::Timer timer;
|
||||
/// for (auto _ : state)
|
||||
/// {
|
||||
/// someClass.Reset();
|
||||
///
|
||||
/// timer.Start();
|
||||
/// someClass.DoWork();
|
||||
/// timer.Stop();
|
||||
///
|
||||
/// state.SetIterationTime(timer.GetElapsedTime());
|
||||
/// }
|
||||
/// }
|
||||
/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
|
||||
/// ```
|
||||
///
|
||||
/// will generate and launch several benchmarks, exploring the parameter space of
|
||||
/// `SetSomeParameter` between the values of 32 and (1024*1024). The chain of
|
||||
/// functions calls in the second argument is applied to an instance of
|
||||
/// ::benchmark::internal::Benchmark. See Google Benchmark's documentation for
|
||||
/// more details.
|
||||
///
|
||||
/// For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
|
||||
/// accepts a function with the signature
|
||||
/// `void Func(::benchmark::internal::Benchmark*)` that may be used to generate
|
||||
/// more complex configurations.
|
||||
///
|
||||
/// To instantiate a templated benchmark across a list of types, the
|
||||
/// VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
|
||||
/// parameter. The templated benchmark function will be instantiated and called
|
||||
/// for each type in the list:
|
||||
///
|
||||
/// ```
|
||||
/// template <typename T>
|
||||
/// void MyBenchmark(::benchmark::State& state)
|
||||
/// {
|
||||
/// MyClass<T> someClass;
|
||||
///
|
||||
/// // Must use a vtkm timer to properly capture eg. CUDA execution times.
|
||||
/// vtkm::cont::Timer timer;
|
||||
/// for (auto _ : state)
|
||||
/// {
|
||||
/// someClass.Reset();
|
||||
///
|
||||
/// timer.Start();
|
||||
/// someClass.DoWork();
|
||||
/// timer.Stop();
|
||||
///
|
||||
/// state.SetIterationTime(timer.GetElapsedTime());
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
|
||||
/// ```
|
||||
///
|
||||
/// The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
|
||||
/// macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
|
||||
/// macro that appends the contents of `some_string` to the Google Benchmark preamble.
|
||||
///
|
||||
/// If a benchmark is not compatible with some configuration, it may call
|
||||
/// `state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
|
||||
/// useful, for instance in the filter tests when the input is not compatible with the filter.
|
||||
///
|
||||
/// When launching a benchmark executable, the following options are supported by Google Benchmark:
|
||||
///
|
||||
/// - `--benchmark_list_tests`: List all available tests.
|
||||
/// - `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
|
||||
/// - `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
|
||||
/// - `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
|
||||
/// of data.
|
||||
/// - `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
|
||||
/// (mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
|
||||
/// an "iteration", which is a loop of the `for(auto _:state){...}` section.
|
||||
/// - `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
|
||||
/// reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
|
||||
/// - `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
|
||||
/// printed to the terminal. Any file output will still contain all repetition info.
|
||||
/// - `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
|
||||
/// (`console`) or `csv`/`json` formats.
|
||||
/// - `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
|
||||
/// (`console`) or `csv`/`json` formats.
|
||||
/// - `--benchmark_out=[filename]`: Specify output file.
|
||||
/// - `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
|
||||
/// - `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
|
||||
/// in the table, rather than appending them as a label.
|
||||
///
|
||||
/// For more information and examples of practical usage, take a look at the existing benchmarks in
|
||||
/// vtk-m/benchmarking/.
|
||||
|
||||
/// \def VTKM_EXECUTE_BENCHMARKS(argc, argv)
|
||||
///
|
||||
/// Run the benchmarks defined in the current file. Benchmarks may be filtered
|
||||
/// and modified using the passed arguments; see the Google Benchmark documentation
|
||||
/// for more details.
|
||||
#define VTKM_EXECUTE_BENCHMARKS(argc, argv) vtkm::bench::detail::ExecuteBenchmarks(argc, argv)
|
||||
|
||||
/// \def VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble)
|
||||
///
|
||||
/// Run the benchmarks defined in the current file. Benchmarks may be filtered
|
||||
/// and modified using the passed arguments; see the Google Benchmark documentation
|
||||
/// for more details. The `preamble` string may be used to supply additional
|
||||
/// information that will be appended to the output's preamble.
|
||||
#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
|
||||
vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
|
||||
|
||||
/// \def VTKM_BENCHMARK(BenchFunc)
|
||||
///
|
||||
/// Define a simple benchmark. A single benchmark will be generated that executes
|
||||
/// `BenchFunc`. `BenchFunc` must have the signature:
|
||||
///
|
||||
/// ```
|
||||
/// void BenchFunc(::benchmark::State& state)
|
||||
/// ```
|
||||
#define VTKM_BENCHMARK(BenchFunc) BENCHMARK(BenchFunc)->UseManualTime()
|
||||
|
||||
/// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
|
||||
///
|
||||
/// Similar to `VTKM_BENCHMARK`, but allows additional options to be specified
|
||||
/// on the `::benchmark::internal::Benchmark` object. Example usage:
|
||||
///
|
||||
/// ```
|
||||
/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("MyParam")->Range(32, 1024*1024));
|
||||
/// ```
|
||||
///
|
||||
/// Note the similarity to the raw Google Benchmark usage of
|
||||
/// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
|
||||
/// the Google Benchmark documentation for more details on the available options.
|
||||
#define VTKM_BENCHMARK_OPTS(BenchFunc, options) BENCHMARK(BenchFunc)->UseManualTime() options
|
||||
|
||||
/// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
|
||||
///
|
||||
/// Similar to `VTKM_BENCHMARK`, but allows advanced benchmark configuration
|
||||
/// via a supplied ConfigFunc, similar to Google Benchmark's
|
||||
/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
|
||||
/// signature:
|
||||
///
|
||||
/// ```
|
||||
/// void ConfigFunc(::benchmark::internal::Benchmark*);
|
||||
/// ```
|
||||
///
|
||||
/// See the Google Benchmark documentation for more details on the available options.
|
||||
#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
|
||||
BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()
|
||||
|
||||
/// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
|
||||
///
|
||||
/// Define a family of benchmark that vary by template argument. A single
|
||||
/// benchmark will be generated for each type in `TypeList` (a vtkm::List of
|
||||
/// types) that executes `BenchFunc<T>`. `BenchFunc` must have the signature:
|
||||
///
|
||||
/// ```
|
||||
/// template <typename T>
|
||||
/// void BenchFunc(::benchmark::State& state)
|
||||
/// ```
|
||||
#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
|
||||
VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
|
||||
|
||||
/// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
|
||||
///
|
||||
/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows additional options to be specified
|
||||
/// on the `::benchmark::internal::Benchmark` object. Example usage:
|
||||
///
|
||||
/// ```
|
||||
/// VTKM_BENCHMARK_TEMPLATES_OPTS(MyBenchmark,
|
||||
/// ->ArgName("MyParam")->Range(32, 1024*1024),
|
||||
/// vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
|
||||
/// ```
|
||||
#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
|
||||
VTKM_BENCHMARK_TEMPLATES_APPLY( \
|
||||
BenchFunc, [](::benchmark::internal::Benchmark* bm) { bm options; }, TypeList)
|
||||
|
||||
/// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
|
||||
///
|
||||
/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows advanced benchmark configuration
|
||||
/// via a supplied ConfigFunc, similar to Google Benchmark's
|
||||
/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
|
||||
/// signature:
|
||||
///
|
||||
/// ```
|
||||
/// void ConfigFunc(::benchmark::internal::Benchmark*);
|
||||
/// ```
|
||||
///
|
||||
/// See the Google Benchmark documentation for more details on the available options.
|
||||
#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
|
||||
namespace \
|
||||
{ /* A template function cannot be used as a template parameter, so wrap the function with \
|
||||
* a template struct to get it into the GenerateTemplateBenchmarks class. */ \
|
||||
template <typename... Ts> \
|
||||
struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
|
||||
{ \
|
||||
static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
|
||||
}; \
|
||||
} /* end anon namespace */ \
|
||||
int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks< \
|
||||
brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>, \
|
||||
TypeList>::Register(#BenchFunc, ApplyFunctor)
|
||||
|
||||
// Internal use only:
|
||||
#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
|
||||
BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace benchmarking
|
||||
namespace bench
|
||||
{
|
||||
namespace stats
|
||||
namespace detail
|
||||
{
|
||||
// Checks that the sequence is sorted, returns true if it's sorted, false
|
||||
// otherwise
|
||||
template <typename ForwardIt>
|
||||
bool is_sorted(ForwardIt first, ForwardIt last)
|
||||
|
||||
static inline void NullApply(::benchmark::internal::Benchmark*)
|
||||
{
|
||||
ForwardIt next = first;
|
||||
++next;
|
||||
for (; next != last; ++next, ++first)
|
||||
}
|
||||
|
||||
/// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
|
||||
/// instead.
|
||||
// TypeLists could be expanded to compute cross products if we ever have that
|
||||
// need.
|
||||
template <typename BoundBench, typename TypeLists>
|
||||
struct GenerateTemplateBenchmarks;
|
||||
|
||||
template <template <typename...> class BenchType, typename TypeList>
|
||||
struct GenerateTemplateBenchmarks<brigand::bind<BenchType>, TypeList>
|
||||
{
|
||||
private:
|
||||
template <typename T>
|
||||
using MakeBenchType = BenchType<T>;
|
||||
|
||||
using Benchmarks = brigand::transform<TypeList, brigand::bind<MakeBenchType, brigand::_1>>;
|
||||
|
||||
template <typename ApplyFunctor>
|
||||
struct RegisterImpl
|
||||
{
|
||||
if (*first > *next)
|
||||
std::string BenchName;
|
||||
ApplyFunctor Apply;
|
||||
|
||||
template <typename P>
|
||||
void operator()(brigand::type_<BenchType<P>>) const
|
||||
{
|
||||
std::ostringstream name;
|
||||
name << this->BenchName << "<" << vtkm::testing::TypeName<P>::Name() << ">";
|
||||
auto bm = ::benchmark::internal::RegisterBenchmarkInternal(
|
||||
new ::benchmark::internal::FunctionBenchmark(name.str().c_str(),
|
||||
BenchType<P>::GetFunction()));
|
||||
this->Apply(bm);
|
||||
|
||||
// Always use manual time with vtkm::cont::Timer to capture CUDA times accurately.
|
||||
bm->UseManualTime();
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
template <typename ApplyFunctor>
|
||||
static int Register(const std::string& benchName, ApplyFunctor&& apply)
|
||||
{
|
||||
brigand::for_each<Benchmarks>(
|
||||
RegisterImpl<ApplyFunctor>{ benchName, std::forward<ApplyFunctor>(apply) });
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
class VTKmConsoleReporter : public ::benchmark::ConsoleReporter
|
||||
{
|
||||
std::string UserPreamble;
|
||||
|
||||
public:
|
||||
VTKmConsoleReporter() = default;
|
||||
|
||||
explicit VTKmConsoleReporter(const std::string& preamble)
|
||||
: UserPreamble{ preamble }
|
||||
{
|
||||
}
|
||||
|
||||
bool ReportContext(const Context& context) override
|
||||
{
|
||||
if (!::benchmark::ConsoleReporter::ReportContext(context))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the value representing the `percent` percentile of the
|
||||
// sorted samples using linear interpolation
|
||||
vtkm::Float64 PercentileValue(const std::vector<vtkm::Float64>& samples,
|
||||
const vtkm::Float64 percent)
|
||||
{
|
||||
VTKM_ASSERT(!samples.empty());
|
||||
if (samples.size() == 1)
|
||||
{
|
||||
return samples.front();
|
||||
}
|
||||
VTKM_ASSERT(percent >= 0.0);
|
||||
VTKM_ASSERT(percent <= 100.0);
|
||||
VTKM_ASSERT(vtkm::benchmarking::stats::is_sorted(samples.begin(), samples.end()));
|
||||
if (percent == 100.0)
|
||||
{
|
||||
return samples.back();
|
||||
}
|
||||
// Find the two nearest percentile values and linearly
|
||||
// interpolate between them
|
||||
const vtkm::Float64 rank = percent / 100.0 * (static_cast<vtkm::Float64>(samples.size()) - 1.0);
|
||||
const vtkm::Float64 low_rank = vtkm::Floor(rank);
|
||||
const vtkm::Float64 dist = rank - low_rank;
|
||||
const size_t k = static_cast<size_t>(low_rank);
|
||||
const vtkm::Float64 low = samples[k];
|
||||
const vtkm::Float64 high = samples[k + 1];
|
||||
return low + (high - low) * dist;
|
||||
}
|
||||
// Winsorize the samples to clean up any very extreme outliers
|
||||
// Will replace all samples below `percent` and above 100 - `percent` percentiles
|
||||
// with the value at the percentile
|
||||
// NOTE: Assumes the samples have been sorted, as we make use of PercentileValue
|
||||
void Winsorize(std::vector<vtkm::Float64>& samples, const vtkm::Float64 percent)
|
||||
{
|
||||
const vtkm::Float64 low_percentile = PercentileValue(samples, percent);
|
||||
const vtkm::Float64 high_percentile = PercentileValue(samples, 100.0 - percent);
|
||||
for (std::vector<vtkm::Float64>::iterator it = samples.begin(); it != samples.end(); ++it)
|
||||
{
|
||||
if (*it < low_percentile)
|
||||
// The rest of the preamble is printed to the error stream, so be consistent:
|
||||
auto& out = this->GetErrorStream();
|
||||
|
||||
// Print list of devices:
|
||||
out << "VTK-m Device State:\n";
|
||||
vtkm::cont::GetRuntimeDeviceTracker().PrintSummary(out);
|
||||
if (!this->UserPreamble.empty())
|
||||
{
|
||||
*it = low_percentile;
|
||||
out << this->UserPreamble << "\n";
|
||||
}
|
||||
else if (*it > high_percentile)
|
||||
{
|
||||
*it = high_percentile;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Compute the mean value of the dataset
|
||||
vtkm::Float64 Mean(const std::vector<vtkm::Float64>& samples)
|
||||
{
|
||||
vtkm::Float64 mean = 0;
|
||||
for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
|
||||
{
|
||||
mean += *it;
|
||||
}
|
||||
return mean / static_cast<vtkm::Float64>(samples.size());
|
||||
}
|
||||
// Compute the sample variance of the samples
|
||||
vtkm::Float64 Variance(const std::vector<vtkm::Float64>& samples)
|
||||
{
|
||||
vtkm::Float64 mean = Mean(samples);
|
||||
vtkm::Float64 square_deviations = 0;
|
||||
for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
|
||||
{
|
||||
square_deviations += vtkm::Pow(*it - mean, 2.0);
|
||||
}
|
||||
return square_deviations / (static_cast<vtkm::Float64>(samples.size()) - 1.0);
|
||||
}
|
||||
// Compute the standard deviation of the samples
|
||||
vtkm::Float64 StandardDeviation(const std::vector<vtkm::Float64>& samples)
|
||||
{
|
||||
return vtkm::Sqrt(Variance(samples));
|
||||
}
|
||||
// Compute the median absolute deviation of the dataset
|
||||
vtkm::Float64 MedianAbsDeviation(const std::vector<vtkm::Float64>& samples)
|
||||
{
|
||||
std::vector<vtkm::Float64> abs_deviations;
|
||||
abs_deviations.reserve(samples.size());
|
||||
const vtkm::Float64 median = PercentileValue(samples, 50.0);
|
||||
for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
|
||||
{
|
||||
abs_deviations.push_back(vtkm::Abs(*it - median));
|
||||
}
|
||||
std::sort(abs_deviations.begin(), abs_deviations.end());
|
||||
return PercentileValue(abs_deviations, 50.0);
|
||||
}
|
||||
} // stats
|
||||
out.flush();
|
||||
|
||||
/*
|
||||
* The benchmarker takes a functor to benchmark and runs it multiple times,
|
||||
* printing out statistics of the run time at the end.
|
||||
* The functor passed should return the run time of the thing being benchmarked
|
||||
* in seconds, this lets us avoid including any per-run setup time in the benchmark.
|
||||
* However any one-time setup should be done in the functor's constructor
|
||||
*/
|
||||
struct Benchmarker
|
||||
{
|
||||
std::vector<vtkm::Float64> Samples;
|
||||
std::string BenchmarkName;
|
||||
|
||||
const vtkm::Float64 MaxRuntime;
|
||||
const size_t MaxIterations;
|
||||
|
||||
public:
|
||||
VTKM_CONT
|
||||
Benchmarker(vtkm::Float64 maxRuntime = 30, std::size_t maxIterations = 100)
|
||||
: MaxRuntime(maxRuntime)
|
||||
, MaxIterations(maxIterations)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Functor>
|
||||
VTKM_CONT void GatherSamples(Functor func)
|
||||
{
|
||||
this->Samples.clear();
|
||||
this->BenchmarkName = func.Description();
|
||||
|
||||
// Do a warm-up run. If the benchmark allocates any additional memory
|
||||
// eg. storage for output results, this will let it do that and
|
||||
// allow us to avoid measuring the allocation time in the actual benchmark run
|
||||
func();
|
||||
|
||||
this->Samples.reserve(this->MaxIterations);
|
||||
|
||||
// Run each benchmark for MAX_RUNTIME seconds or MAX_ITERATIONS iterations, whichever
|
||||
// takes less time. This kind of assumes that running for 500 iterations or 30s will give
|
||||
// good statistics, but if median abs dev and/or std dev are too high both these limits
|
||||
// could be increased
|
||||
size_t iter = 0;
|
||||
for (vtkm::Float64 elapsed = 0.0; elapsed < this->MaxRuntime && iter < this->MaxIterations;
|
||||
elapsed += this->Samples.back(), ++iter)
|
||||
{
|
||||
this->Samples.push_back(func());
|
||||
}
|
||||
|
||||
std::sort(this->Samples.begin(), this->Samples.end());
|
||||
stats::Winsorize(this->Samples, 5.0);
|
||||
}
|
||||
|
||||
VTKM_CONT void PrintSummary(std::ostream& out = std::cout)
|
||||
{
|
||||
out << "Benchmark \'" << this->BenchmarkName << "\' results:\n";
|
||||
|
||||
if (this->Samples.empty())
|
||||
{
|
||||
out << "\tNo samples gathered!\n";
|
||||
return;
|
||||
}
|
||||
|
||||
out << "\tnumSamples = " << this->Samples.size() << "\n"
|
||||
<< "\tmedian = " << stats::PercentileValue(this->Samples, 50.0) << "s\n"
|
||||
<< "\tmedian abs dev = " << stats::MedianAbsDeviation(this->Samples) << "s\n"
|
||||
<< "\tmean = " << stats::Mean(this->Samples) << "s\n"
|
||||
<< "\tstd dev = " << stats::StandardDeviation(this->Samples) << "s\n"
|
||||
<< "\tmin = " << this->Samples.front() << "s\n"
|
||||
<< "\tmax = " << this->Samples.back() << "s\n";
|
||||
}
|
||||
|
||||
template <typename DeviceAdapter, typename MakerFunctor, typename T>
|
||||
VTKM_CONT bool operator()(DeviceAdapter id, MakerFunctor&& makerFunctor, T t)
|
||||
{
|
||||
auto func = makerFunctor(t, id);
|
||||
std::cout << "Running '" << func.Description() << "'" << std::endl;
|
||||
this->GatherSamples(func);
|
||||
this->PrintSummary();
|
||||
return true;
|
||||
}
|
||||
|
||||
VTKM_CONT const std::vector<vtkm::Float64>& GetSamples() const { return this->Samples; }
|
||||
|
||||
VTKM_CONT void Reset()
|
||||
{
|
||||
this->Samples.clear();
|
||||
this->BenchmarkName.clear();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename MakerFunctor>
|
||||
class InternalPrintTypeAndBench
|
||||
// Returns the number of executed benchmarks:
|
||||
static inline vtkm::Id ExecuteBenchmarks(int& argc,
|
||||
char* argv[],
|
||||
const std::string& preamble = std::string{})
|
||||
{
|
||||
MakerFunctor Maker;
|
||||
|
||||
public:
|
||||
VTKM_CONT
|
||||
InternalPrintTypeAndBench(MakerFunctor maker)
|
||||
: Maker(maker)
|
||||
::benchmark::Initialize(&argc, argv);
|
||||
if (::benchmark::ReportUnrecognizedArguments(argc, argv))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
VTKM_CONT void operator()(T t, vtkm::cont::DeviceAdapterId id) const
|
||||
{
|
||||
std::cout << "*** " << vtkm::testing::TypeName<T>::Name() << " on device " << id.GetName()
|
||||
<< " ***************" << std::endl;
|
||||
Benchmarker bench;
|
||||
try
|
||||
{
|
||||
vtkm::cont::TryExecuteOnDevice(id, bench, Maker, t);
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
std::cout << "\n"
|
||||
<< "An exception occurring during a benchmark:\n\t" << e.what() << "\n"
|
||||
<< "Attempting to continue with remaining benchmarks...\n\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
VTKmConsoleReporter reporter{ preamble };
|
||||
|
||||
template <class MakerFunctor, class TypeList>
|
||||
VTKM_CONT void BenchmarkTypes(MakerFunctor&& maker, TypeList, vtkm::cont::DeviceAdapterId id)
|
||||
{
|
||||
vtkm::ListForEach(
|
||||
InternalPrintTypeAndBench<MakerFunctor>(std::forward<MakerFunctor>(maker)), TypeList(), id);
|
||||
vtkm::cont::Timer timer;
|
||||
timer.Start();
|
||||
std::size_t num = ::benchmark::RunSpecifiedBenchmarks(&reporter);
|
||||
timer.Stop();
|
||||
|
||||
reporter.GetOutputStream().flush();
|
||||
reporter.GetErrorStream().flush();
|
||||
|
||||
reporter.GetErrorStream() << "Ran " << num << " benchmarks in " << timer.GetElapsedTime()
|
||||
<< " seconds." << std::endl;
|
||||
|
||||
return static_cast<vtkm::Id>(num);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::bench::detail
|
||||
|
||||
#endif
|
||||
|
@ -7,18 +7,24 @@
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##============================================================================
|
||||
|
||||
# Find Google Benchmark. Note that benchmark_DIR must be pointed at an
|
||||
# installation, not a build directory.
|
||||
find_package(benchmark)
|
||||
|
||||
function(add_benchmark)
|
||||
set(options)
|
||||
set(oneValueArgs NAME FILE)
|
||||
set(multiValueArgs LIBS)
|
||||
cmake_parse_arguments(VTKm_AB
|
||||
"${options}" "${oneValueArgs}" "${multiValueArgs}"
|
||||
${ARGN}
|
||||
)
|
||||
"${options}" "${oneValueArgs}" "${multiValueArgs}"
|
||||
${ARGN}
|
||||
)
|
||||
set(exe_name ${VTKm_AB_NAME})
|
||||
|
||||
add_executable(${exe_name} ${VTKm_AB_FILE})
|
||||
target_link_libraries(${exe_name} PRIVATE ${VTKm_AB_LIBS})
|
||||
target_link_libraries(${exe_name} PRIVATE benchmark::benchmark)
|
||||
vtkm_add_drop_unused_function_flags(${exe_name})
|
||||
vtkm_add_target_information(${exe_name})
|
||||
|
||||
|
133
docs/changelog/google_benchmark.md
Normal file
133
docs/changelog/google_benchmark.md
Normal file
@ -0,0 +1,133 @@
|
||||
# Updated Benchmark Framework
|
||||
|
||||
The benchmarking framework has been updated to use Google Benchmark.
|
||||
|
||||
A benchmark is now a single function, which is passed to a macro:
|
||||
|
||||
```
|
||||
void MyBenchmark(::benchmark::State& state)
|
||||
{
|
||||
MyClass someClass;
|
||||
|
||||
// Optional: Add a descriptive label with additional benchmark details:
|
||||
state.SetLabel("Blah blah blah.");
|
||||
|
||||
// Must use a vtkm timer to properly capture eg. CUDA execution times.
|
||||
vtkm::cont::Timer timer;
|
||||
for (auto _ : state)
|
||||
{
|
||||
someClass.Reset();
|
||||
|
||||
timer.Start();
|
||||
someClass.DoWork();
|
||||
timer.Stop();
|
||||
|
||||
state.SetIterationTime(timer.GetElapsedTime());
|
||||
}
|
||||
|
||||
// Optional: Report items and/or bytes processed per iteration in output:
|
||||
state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
|
||||
state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
|
||||
}
|
||||
}
|
||||
VTKM_BENCHMARK(MyBenchmark);
|
||||
```
|
||||
|
||||
Google benchmark also makes it easy to implement parameter sweep benchmarks:
|
||||
|
||||
```
|
||||
void MyParameterSweep(::benchmark::State& state)
|
||||
{
|
||||
// The current value in the sweep:
|
||||
const vtkm::Id currentValue = state.range(0);
|
||||
|
||||
MyClass someClass;
|
||||
someClass.SetSomeParameter(currentValue);
|
||||
|
||||
vtkm::cont::Timer timer;
|
||||
for (auto _ : state)
|
||||
{
|
||||
someClass.Reset();
|
||||
|
||||
timer.Start();
|
||||
someClass.DoWork();
|
||||
timer.Stop();
|
||||
|
||||
state.SetIterationTime(timer.GetElapsedTime());
|
||||
}
|
||||
}
|
||||
VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
|
||||
```
|
||||
|
||||
will generate and launch several benchmarks, exploring the parameter space of
|
||||
`SetSomeParameter` between the values of 32 and (1024*1024). The chain of
|
||||
functions calls in the second argument is applied to an instance of
|
||||
::benchmark::internal::Benchmark. See Google Benchmark's documentation for
|
||||
more details.
|
||||
|
||||
For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
|
||||
accepts a function with the signature
|
||||
`void Func(::benchmark::internal::Benchmark*)` that may be used to generate
|
||||
more complex configurations.
|
||||
|
||||
To instantiate a templated benchmark across a list of types, the
|
||||
VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
|
||||
parameter. The templated benchmark function will be instantiated and called
|
||||
for each type in the list:
|
||||
|
||||
```
|
||||
template <typename T>
|
||||
void MyBenchmark(::benchmark::State& state)
|
||||
{
|
||||
MyClass<T> someClass;
|
||||
|
||||
// Must use a vtkm timer to properly capture eg. CUDA execution times.
|
||||
vtkm::cont::Timer timer;
|
||||
for (auto _ : state)
|
||||
{
|
||||
someClass.Reset();
|
||||
|
||||
timer.Start();
|
||||
someClass.DoWork();
|
||||
timer.Stop();
|
||||
|
||||
state.SetIterationTime(timer.GetElapsedTime());
|
||||
}
|
||||
}
|
||||
}
|
||||
VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
|
||||
```
|
||||
|
||||
The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
|
||||
macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
|
||||
macro that appends the contents of `some_string` to the Google Benchmark preamble.
|
||||
|
||||
If a benchmark is not compatible with some configuration, it may call
|
||||
`state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
|
||||
useful, for instance in the filter tests when the input is not compatible with the filter.
|
||||
|
||||
When launching a benchmark executable, the following options are supported by Google Benchmark:
|
||||
|
||||
- `--benchmark_list_tests`: List all available tests.
|
||||
- `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
|
||||
- `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
|
||||
- `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
|
||||
of data.
|
||||
- `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
|
||||
(mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
|
||||
an "iteration", which is a loop of the `for(auto _:state){...}` section.
|
||||
- `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
|
||||
reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
|
||||
- `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
|
||||
printed to the terminal. Any file output will still contain all repetition info.
|
||||
- `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
|
||||
(`console`) or `csv`/`json` formats.
|
||||
- `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
|
||||
(`console`) or `csv`/`json` formats.
|
||||
- `--benchmark_out=[filename]`: Specify output file.
|
||||
- `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
|
||||
- `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
|
||||
in the table, rather than appending them as a label.
|
||||
|
||||
For more information and examples of practical usage, take a look at the existing benchmarks in
|
||||
vtk-m/benchmarking/.
|
@ -68,6 +68,13 @@ public:
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/// A convenience function for creating an ArrayHandleIndex. It takes the
|
||||
/// size of the array and generates an array holding vtkm::Id from [0, size - 1]
|
||||
VTKM_CONT inline vtkm::cont::ArrayHandleIndex make_ArrayHandleIndex(vtkm::Id length)
|
||||
{
|
||||
return vtkm::cont::ArrayHandleIndex(length);
|
||||
}
|
||||
}
|
||||
} // namespace vtkm::cont
|
||||
|
||||
|
@ -47,7 +47,6 @@ void DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>::Reset()
|
||||
void DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>::Start()
|
||||
{
|
||||
VTKM_CUDA_CALL(cudaEventRecord(this->StartEvent, cudaStreamPerThread));
|
||||
VTKM_CUDA_CALL(cudaEventSynchronize(this->StartEvent));
|
||||
this->StartReady = true;
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,35 @@ namespace vtkm
|
||||
namespace exec
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
// Clang-7 as host compiler under nvcc returns types from std::make_unsigned
|
||||
// that are not compatible with the AtomicInterface API, so we define our own
|
||||
// mapping. This must exist for every entry in vtkm::cont::AtomicArrayTypeList.
|
||||
template <typename>
|
||||
struct MakeUnsigned;
|
||||
template <>
|
||||
struct MakeUnsigned<vtkm::UInt32>
|
||||
{
|
||||
using type = vtkm::UInt32;
|
||||
};
|
||||
template <>
|
||||
struct MakeUnsigned<vtkm::Int32>
|
||||
{
|
||||
using type = vtkm::UInt32;
|
||||
};
|
||||
template <>
|
||||
struct MakeUnsigned<vtkm::UInt64>
|
||||
{
|
||||
using type = vtkm::UInt64;
|
||||
};
|
||||
template <>
|
||||
struct MakeUnsigned<vtkm::Int64>
|
||||
{
|
||||
using type = vtkm::UInt64;
|
||||
};
|
||||
}
|
||||
|
||||
template <typename T, typename Device>
|
||||
class AtomicArrayExecutionObject
|
||||
{
|
||||
@ -66,7 +95,7 @@ public:
|
||||
// We only support 32/64 bit signed/unsigned ints, and AtomicInterface
|
||||
// currently only provides API for unsigned types.
|
||||
// We'll cast the signed types to unsigned to work around this.
|
||||
using APIType = typename std::make_unsigned<ValueType>::type;
|
||||
using APIType = typename detail::MakeUnsigned<ValueType>::type;
|
||||
|
||||
return static_cast<T>(
|
||||
AtomicInterface::Load(reinterpret_cast<const APIType*>(this->Data + index)));
|
||||
@ -89,7 +118,7 @@ public:
|
||||
// This is safe, since the only difference between signed/unsigned types
|
||||
// is how overflow works, and signed overflow is already undefined. We also
|
||||
// document that overflow is undefined for this operation.
|
||||
using APIType = typename std::make_unsigned<ValueType>::type;
|
||||
using APIType = typename detail::MakeUnsigned<ValueType>::type;
|
||||
|
||||
return static_cast<T>(AtomicInterface::Add(reinterpret_cast<APIType*>(this->Data + index),
|
||||
static_cast<APIType>(value)));
|
||||
@ -116,7 +145,7 @@ public:
|
||||
// This is safe, since the only difference between signed/unsigned types
|
||||
// is how overflow works, and signed overflow is already undefined. We also
|
||||
// document that overflow is undefined for this operation.
|
||||
using APIType = typename std::make_unsigned<ValueType>::type;
|
||||
using APIType = typename detail::MakeUnsigned<ValueType>::type;
|
||||
|
||||
AtomicInterface::Store(reinterpret_cast<APIType*>(this->Data + index),
|
||||
static_cast<APIType>(value));
|
||||
@ -169,7 +198,7 @@ public:
|
||||
// We'll cast the signed types to unsigned to work around this.
|
||||
// This is safe, since the only difference between signed/unsigned types
|
||||
// is how overflow works, and signed overflow is already undefined.
|
||||
using APIType = typename std::make_unsigned<ValueType>::type;
|
||||
using APIType = typename detail::MakeUnsigned<ValueType>::type;
|
||||
|
||||
return static_cast<T>(
|
||||
AtomicInterface::CompareAndSwap(reinterpret_cast<APIType*>(this->Data + index),
|
||||
|
Loading…
Reference in New Issue
Block a user