From 634f523d92ae854f13ae9ffec87d75c493e92e19 Mon Sep 17 00:00:00 2001 From: Haocheng LIU Date: Fri, 15 Feb 2019 14:47:07 -0500 Subject: [PATCH] Merge benchmark executables into a device dependent shared library VTK-m has been updated to replace old per device benchmark executables with a device dependent shared library so that it's able to accept a device adapter at runtime through the "--device=" argument. --- benchmarking/BenchmarkArrayTransfer.cxx | 92 ++-- benchmarking/BenchmarkAtomicArray.cxx | 103 ++-- benchmarking/BenchmarkCopySpeeds.cxx | 94 ++-- benchmarking/BenchmarkDeviceAdapter.cxx | 487 +++++++++--------- benchmarking/BenchmarkFieldAlgorithms.cxx | 128 ++--- benchmarking/BenchmarkFilters.cxx | 232 +++++---- benchmarking/BenchmarkRayTracing.cxx | 15 +- benchmarking/BenchmarkTopologyAlgorithms.cxx | 67 ++- benchmarking/Benchmarker.h | 39 +- benchmarking/CMakeLists.txt | 53 +- docs/changelog/merge-benchmark-executables.md | 6 + vtkm/cont/Algorithm.h | 9 +- 12 files changed, 666 insertions(+), 659 deletions(-) create mode 100644 docs/changelog/merge-benchmark-executables.md diff --git a/benchmarking/BenchmarkArrayTransfer.cxx b/benchmarking/BenchmarkArrayTransfer.cxx index 7bbc9d161..d3a689017 100644 --- a/benchmarking/BenchmarkArrayTransfer.cxx +++ b/benchmarking/BenchmarkArrayTransfer.cxx @@ -22,7 +22,9 @@ #include +#include #include +#include #include #include @@ -40,10 +42,9 @@ namespace vtkm namespace benchmarking { -template struct BenchmarkArrayTransfer { - using Algo = vtkm::cont::DeviceAdapterAlgorithm; + using Algo = vtkm::cont::Algorithm; using StorageTag = vtkm::cont::StorageTagBasic; using Timer = vtkm::cont::Timer; @@ -139,11 +140,10 @@ struct BenchmarkArrayTransfer // Copies NumValues from control environment to execution environment and // accesses them as read-only. - template + template struct BenchContToExecRead { using ArrayType = vtkm::cont::ArrayHandle; - using PortalType = typename ArrayType::template ExecutionTypes::PortalConst; using ValueTypeTraits = vtkm::TypeTraits; vtkm::Id NumValues; @@ -164,7 +164,7 @@ struct BenchmarkArrayTransfer } VTKM_CONT - vtkm::Float64 operator()() + vtkm::Float64 operator()() const { std::vector vec(static_cast(this->NumValues), ValueTypeTraits::ZeroInitialization()); @@ -173,8 +173,8 @@ struct BenchmarkArrayTransfer // Time the copy: Timer timer{ DeviceAdapter() }; timer.Start(); - ReadValues functor(array.PrepareForInput(DeviceAdapter()), - ValueTypeTraits::ZeroInitialization()); + auto portal = array.PrepareForInput(DeviceAdapter()); + ReadValues functor(portal, ValueTypeTraits::ZeroInitialization()); Algo::Schedule(functor, this->NumValues); return timer.GetElapsedTime(); } @@ -183,11 +183,10 @@ struct BenchmarkArrayTransfer // Writes values to ArrayHandle in execution environment. There is no actual // copy between control/execution in this case. - template + template struct BenchContToExecWrite { using ArrayType = vtkm::cont::ArrayHandle; - using PortalType = typename ArrayType::template ExecutionTypes::Portal; using ValueTypeTraits = vtkm::TypeTraits; vtkm::Id NumValues; @@ -208,14 +207,15 @@ struct BenchmarkArrayTransfer } VTKM_CONT - vtkm::Float64 operator()() + vtkm::Float64 operator()() const { ArrayType array; // Time the write: Timer timer{ DeviceAdapter() }; timer.Start(); - WriteValues functor(array.PrepareForOutput(this->NumValues, DeviceAdapter())); + auto portal = array.PrepareForOutput(this->NumValues, DeviceAdapter()); + WriteValues functor(portal); Algo::Schedule(functor, this->NumValues); return timer.GetElapsedTime(); @@ -225,11 +225,10 @@ struct BenchmarkArrayTransfer // Copies NumValues from control environment to execution environment and // both reads and writes them. - template + template struct BenchContToExecReadWrite { using ArrayType = vtkm::cont::ArrayHandle; - using PortalType = typename ArrayType::template ExecutionTypes::Portal; using ValueTypeTraits = vtkm::TypeTraits; vtkm::Id NumValues; @@ -250,7 +249,7 @@ struct BenchmarkArrayTransfer } VTKM_CONT - vtkm::Float64 operator()() + vtkm::Float64 operator()() const { std::vector vec(static_cast(this->NumValues), ValueTypeTraits::ZeroInitialization()); @@ -259,7 +258,8 @@ struct BenchmarkArrayTransfer // Time the copy: Timer timer{ DeviceAdapter() }; timer.Start(); - ReadWriteValues functor(array.PrepareForInPlace(DeviceAdapter())); + auto portal = array.PrepareForInPlace(DeviceAdapter()); + ReadWriteValues functor(portal); Algo::Schedule(functor, this->NumValues); return timer.GetElapsedTime(); } @@ -268,7 +268,7 @@ struct BenchmarkArrayTransfer // Copies NumValues from control environment to execution environment and // back, then accesses them as read-only. - template + template struct BenchRoundTripRead { using ArrayType = vtkm::cont::ArrayHandle; @@ -295,7 +295,7 @@ struct BenchmarkArrayTransfer } VTKM_CONT - vtkm::Float64 operator()() + vtkm::Float64 operator()() const { std::vector vec(static_cast(this->NumValues), ValueTypeTraits::ZeroInitialization()); @@ -309,8 +309,8 @@ struct BenchmarkArrayTransfer timer.Start(); // Copy to device: - ReadValues functor(array.PrepareForInput(DeviceAdapter()), - ValueTypeTraits::ZeroInitialization()); + auto portal = array.PrepareForInput(DeviceAdapter()); + ReadValues functor(portal, ValueTypeTraits::ZeroInitialization()); Algo::Schedule(functor, this->NumValues); // Copy back to host and read: @@ -328,7 +328,7 @@ struct BenchmarkArrayTransfer // Copies NumValues from control environment to execution environment and // back, then reads and writes them in-place. - template + template struct BenchRoundTripReadWrite { using ArrayType = vtkm::cont::ArrayHandle; @@ -355,7 +355,7 @@ struct BenchmarkArrayTransfer } VTKM_CONT - vtkm::Float64 operator()() + vtkm::Float64 operator()() const { std::vector vec(static_cast(this->NumValues), ValueTypeTraits::ZeroInitialization()); @@ -369,7 +369,8 @@ struct BenchmarkArrayTransfer timer.Start(); // Do work on device: - ReadWriteValues functor(array.PrepareForInPlace(DeviceAdapter())); + auto portal = array.PrepareForInPlace(DeviceAdapter()); + ReadWriteValues functor(portal); Algo::Schedule(functor, this->NumValues); ReadWriteValues cFunctor(array.GetPortalControl()); @@ -385,7 +386,7 @@ struct BenchmarkArrayTransfer // Write NumValues to device allocated memory and copies them back to control // for reading. - template + template struct BenchExecToContRead { using ArrayType = vtkm::cont::ArrayHandle; @@ -412,7 +413,7 @@ struct BenchmarkArrayTransfer } VTKM_CONT - vtkm::Float64 operator()() + vtkm::Float64 operator()() const { ArrayType array; @@ -421,7 +422,8 @@ struct BenchmarkArrayTransfer timer.Start(); // Allocate/write data on device - WriteValues functor(array.PrepareForOutput(this->NumValues, DeviceAdapter())); + auto portal = array.PrepareForOutput(this->NumValues, DeviceAdapter()); + WriteValues functor(portal); Algo::Schedule(functor, this->NumValues); // Read back on host: @@ -439,7 +441,7 @@ struct BenchmarkArrayTransfer // Write NumValues to device allocated memory and copies them back to control // and overwrites them. - template + template struct BenchExecToContWrite { using ArrayType = vtkm::cont::ArrayHandle; @@ -475,7 +477,8 @@ struct BenchmarkArrayTransfer timer.Start(); // Allocate/write data on device - WriteValues functor(array.PrepareForOutput(this->NumValues, DeviceAdapter())); + auto portal = array.PrepareForOutput(this->NumValues, DeviceAdapter()); + WriteValues functor(portal); Algo::Schedule(functor, this->NumValues); // Read back on host: @@ -492,7 +495,7 @@ struct BenchmarkArrayTransfer // Write NumValues to device allocated memory and copies them back to control // for reading and writing. - template + template struct BenchExecToContReadWrite { using ArrayType = vtkm::cont::ArrayHandle; @@ -528,7 +531,8 @@ struct BenchmarkArrayTransfer timer.Start(); // Allocate/write data on device - WriteValues functor(array.PrepareForOutput(this->NumValues, DeviceAdapter())); + auto portal = array.PrepareForOutput(this->NumValues, DeviceAdapter()); + WriteValues functor(portal); Algo::Schedule(functor, this->NumValues); // Read back on host: @@ -547,17 +551,16 @@ struct BenchmarkArrayTransfer using TestTypes = vtkm::ListTagBase; - static VTKM_CONT bool Run() + static VTKM_CONT bool Run(vtkm::cont::DeviceAdapterId id) { - VTKM_RUN_BENCHMARK(ContToExecRead, TestTypes()); - VTKM_RUN_BENCHMARK(ContToExecWrite, TestTypes()); - VTKM_RUN_BENCHMARK(ContToExecReadWrite, TestTypes()); - VTKM_RUN_BENCHMARK(RoundTripRead, TestTypes()); - VTKM_RUN_BENCHMARK(RoundTripReadWrite, TestTypes()); - VTKM_RUN_BENCHMARK(ExecToContRead, TestTypes()); - VTKM_RUN_BENCHMARK(ExecToContWrite, TestTypes()); - VTKM_RUN_BENCHMARK(ExecToContReadWrite, TestTypes()); - + VTKM_RUN_BENCHMARK(ContToExecRead, TestTypes(), id); + VTKM_RUN_BENCHMARK(ContToExecWrite, TestTypes(), id); + VTKM_RUN_BENCHMARK(ContToExecReadWrite, TestTypes(), id); + VTKM_RUN_BENCHMARK(RoundTripRead, TestTypes(), id); + VTKM_RUN_BENCHMARK(RoundTripReadWrite, TestTypes(), id); + VTKM_RUN_BENCHMARK(ExecToContRead, TestTypes(), id); + VTKM_RUN_BENCHMARK(ExecToContWrite, TestTypes(), id); + VTKM_RUN_BENCHMARK(ExecToContReadWrite, TestTypes(), id); return true; } }; @@ -566,14 +569,11 @@ struct BenchmarkArrayTransfer int main(int argc, char* argv[]) { - vtkm::cont::InitLogging(argc, argv); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; + auto config = vtkm::cont::Initialize(argc, argv, opts); - using DeviceAdapter = VTKM_DEFAULT_DEVICE_ADAPTER_TAG; - using Benchmarks = vtkm::benchmarking::BenchmarkArrayTransfer; + using Benchmarks = vtkm::benchmarking::BenchmarkArrayTransfer; - auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker(); - tracker.ForceDevice(DeviceAdapter{}); - - bool result = Benchmarks::Run(); + bool result = Benchmarks::Run(config.Device); return result ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/benchmarking/BenchmarkAtomicArray.cxx b/benchmarking/BenchmarkAtomicArray.cxx index 92b5cb495..b05ac8c1f 100644 --- a/benchmarking/BenchmarkAtomicArray.cxx +++ b/benchmarking/BenchmarkAtomicArray.cxx @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -48,25 +49,24 @@ static constexpr vtkm::Id NumWrites = 33554432; // 2^25 VTKM_MAKE_BENCHMARK(Name##32768, Class, 32768); \ VTKM_MAKE_BENCHMARK(Name##1048576, Class, 1048576) -#define RUN_ATOMIC_BENCHMARKS(Name) \ - VTKM_RUN_BENCHMARK(Name##1, vtkm::cont::AtomicArrayTypeListTag{}); \ - VTKM_RUN_BENCHMARK(Name##8, vtkm::cont::AtomicArrayTypeListTag{}); \ - VTKM_RUN_BENCHMARK(Name##32, vtkm::cont::AtomicArrayTypeListTag{}); \ - VTKM_RUN_BENCHMARK(Name##512, vtkm::cont::AtomicArrayTypeListTag{}); \ - VTKM_RUN_BENCHMARK(Name##2048, vtkm::cont::AtomicArrayTypeListTag{}); \ - VTKM_RUN_BENCHMARK(Name##32768, vtkm::cont::AtomicArrayTypeListTag{}); \ - VTKM_RUN_BENCHMARK(Name##1048576, vtkm::cont::AtomicArrayTypeListTag{}) +#define RUN_ATOMIC_BENCHMARKS(Name, id) \ + VTKM_RUN_BENCHMARK(Name##1, vtkm::cont::AtomicArrayTypeListTag{}, id); \ + VTKM_RUN_BENCHMARK(Name##8, vtkm::cont::AtomicArrayTypeListTag{}, id); \ + VTKM_RUN_BENCHMARK(Name##32, vtkm::cont::AtomicArrayTypeListTag{}, id); \ + VTKM_RUN_BENCHMARK(Name##512, vtkm::cont::AtomicArrayTypeListTag{}, id); \ + VTKM_RUN_BENCHMARK(Name##2048, vtkm::cont::AtomicArrayTypeListTag{}, id); \ + VTKM_RUN_BENCHMARK(Name##32768, vtkm::cont::AtomicArrayTypeListTag{}, id); \ + VTKM_RUN_BENCHMARK(Name##1048576, vtkm::cont::AtomicArrayTypeListTag{}, id) -template class BenchmarkAtomicArray { public: - using Algo = vtkm::cont::DeviceAdapterAlgorithm; + using Algo = vtkm::cont::Algorithm; using Timer = vtkm::cont::Timer; // Benchmarks AtomicArray::Add such that each work index writes to adjacent // indices. - template + template struct BenchAddSeq { vtkm::Id ArraySize; @@ -92,17 +92,17 @@ public: BenchAddSeq(vtkm::Id arraySize) : ArraySize(arraySize) { - this->Data.PrepareForOutput(this->ArraySize, Device{}); + this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter()); } VTKM_CONT vtkm::Float64 operator()() { vtkm::cont::AtomicArray array(this->Data); - auto portal = array.PrepareForExecution(Device{}); + auto portal = array.PrepareForExecution(DeviceAdapter()); Worker worker{ this->ArraySize, portal }; - Timer timer{ Device() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algo::Schedule(worker, NumWrites); @@ -120,7 +120,7 @@ public: MAKE_ATOMIC_BENCHMARKS(AddSeq, BenchAddSeq); // Provides a non-atomic baseline for BenchAddSeq - template + template struct BenchAddSeqBaseline { vtkm::Id ArraySize; @@ -155,10 +155,10 @@ public: VTKM_CONT vtkm::Float64 operator()() { - auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{}); + auto portal = this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter()); Worker worker{ this->ArraySize, portal }; - Timer timer{ Device() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algo::Schedule(worker, NumWrites); @@ -177,7 +177,7 @@ public: // Benchmarks AtomicArray::Add such that each work index writes to a strided // index ( floor(i / stride) + stride * (i % stride) - template + template struct BenchAddStride { vtkm::Id ArraySize; @@ -211,17 +211,17 @@ public: : ArraySize(arraySize) , Stride(stride) { - this->Data.PrepareForOutput(this->ArraySize, Device{}); + this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter()); } VTKM_CONT vtkm::Float64 operator()() { vtkm::cont::AtomicArray array(this->Data); - auto portal = array.PrepareForExecution(Device{}); + auto portal = array.PrepareForExecution(DeviceAdapter()); Worker worker{ this->ArraySize, this->Stride, portal }; - Timer timer{ Device() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algo::Schedule(worker, NumWrites); @@ -240,7 +240,7 @@ public: MAKE_ATOMIC_BENCHMARKS(AddStride, BenchAddStride); // Non-atomic baseline for AddStride - template + template struct BenchAddStrideBaseline { vtkm::Id ArraySize; @@ -279,10 +279,10 @@ public: VTKM_CONT vtkm::Float64 operator()() { - auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{}); + auto portal = this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter()); Worker worker{ this->ArraySize, this->Stride, portal }; - Timer timer{ Device() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algo::Schedule(worker, NumWrites); @@ -302,7 +302,7 @@ public: // Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent // indices. - template + template struct BenchCASSeq { vtkm::Id ArraySize; @@ -340,17 +340,17 @@ public: BenchCASSeq(vtkm::Id arraySize) : ArraySize(arraySize) { - this->Data.PrepareForOutput(this->ArraySize, Device{}); + this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter()); } VTKM_CONT vtkm::Float64 operator()() { vtkm::cont::AtomicArray array(this->Data); - auto portal = array.PrepareForExecution(Device{}); + auto portal = array.PrepareForExecution(DeviceAdapter()); Worker worker{ this->ArraySize, portal }; - Timer timer{ Device() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algo::Schedule(worker, NumWrites); @@ -368,7 +368,7 @@ public: MAKE_ATOMIC_BENCHMARKS(CASSeq, BenchCASSeq); // Provides a non-atomic baseline for BenchCASSeq - template + template struct BenchCASSeqBaseline { vtkm::Id ArraySize; @@ -405,10 +405,10 @@ public: VTKM_CONT vtkm::Float64 operator()() { - auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{}); + auto portal = this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter()); Worker worker{ this->ArraySize, portal }; - Timer timer{ Device() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algo::Schedule(worker, NumWrites); return timer.GetElapsedTime(); @@ -427,7 +427,7 @@ public: // Benchmarks AtomicArray::CompareAndSwap such that each work index writes to // a strided index: // ( floor(i / stride) + stride * (i % stride) - template + template struct BenchCASStride { vtkm::Id ArraySize; @@ -469,17 +469,17 @@ public: : ArraySize(arraySize) , Stride(stride) { - this->Data.PrepareForOutput(this->ArraySize, Device{}); + this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter()); } VTKM_CONT vtkm::Float64 operator()() { vtkm::cont::AtomicArray array(this->Data); - auto portal = array.PrepareForExecution(Device{}); + auto portal = array.PrepareForExecution(DeviceAdapter()); Worker worker{ this->ArraySize, this->Stride, portal }; - Timer timer{ Device() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algo::Schedule(worker, NumWrites); @@ -498,7 +498,7 @@ public: MAKE_ATOMIC_BENCHMARKS(CASStride, BenchCASStride); // Non-atomic baseline for CASStride - template + template struct BenchCASStrideBaseline { vtkm::Id ArraySize; @@ -539,10 +539,10 @@ public: VTKM_CONT vtkm::Float64 operator()() { - auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{}); + auto portal = this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter()); Worker worker{ this->ArraySize, this->Stride, portal }; - Timer timer{ Device() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algo::Schedule(worker, NumWrites); @@ -560,17 +560,17 @@ public: }; MAKE_ATOMIC_BENCHMARKS(CASStrideBase, BenchCASStrideBaseline); - static void Run() + static void Run(vtkm::cont::DeviceAdapterId id) { - RUN_ATOMIC_BENCHMARKS(AddSeq); - RUN_ATOMIC_BENCHMARKS(AddSeqBase); - RUN_ATOMIC_BENCHMARKS(AddStride); - RUN_ATOMIC_BENCHMARKS(AddStrideBase); + RUN_ATOMIC_BENCHMARKS(AddSeq, id); + RUN_ATOMIC_BENCHMARKS(AddSeqBase, id); + RUN_ATOMIC_BENCHMARKS(AddStride, id); + RUN_ATOMIC_BENCHMARKS(AddStrideBase, id); - RUN_ATOMIC_BENCHMARKS(CASSeq); - RUN_ATOMIC_BENCHMARKS(CASSeqBase); - RUN_ATOMIC_BENCHMARKS(CASStride); - RUN_ATOMIC_BENCHMARKS(CASStrideBase); + RUN_ATOMIC_BENCHMARKS(CASSeq, id); + RUN_ATOMIC_BENCHMARKS(CASSeqBase, id); + RUN_ATOMIC_BENCHMARKS(CASStride, id); + RUN_ATOMIC_BENCHMARKS(CASStrideBase, id); } }; } @@ -578,15 +578,12 @@ public: int main(int argc, char* argv[]) { - vtkm::cont::InitLogging(argc, argv); - - using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG; - auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker(); - tracker.ForceDevice(Device{}); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; + auto config = vtkm::cont::Initialize(argc, argv, opts); try { - vtkm::benchmarking::BenchmarkAtomicArray::Run(); + vtkm::benchmarking::BenchmarkAtomicArray::Run(config.Device); } catch (std::exception& e) { diff --git a/benchmarking/BenchmarkCopySpeeds.cxx b/benchmarking/BenchmarkCopySpeeds.cxx index beaf2e1a9..782a5c087 100644 --- a/benchmarking/BenchmarkCopySpeeds.cxx +++ b/benchmarking/BenchmarkCopySpeeds.cxx @@ -37,7 +37,7 @@ #include #include -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB +#ifdef VTKM_ENABLE_TBB #include #endif // TBB @@ -58,7 +58,7 @@ const size_t COL_WIDTH = 32; template struct MeasureCopySpeed { - using Algo = vtkm::cont::DeviceAdapterAlgorithm; + using Algo = vtkm::cont::Algorithm; vtkm::cont::ArrayHandle Source; vtkm::cont::ArrayHandle Destination; @@ -106,19 +106,17 @@ void PrintDivider(std::ostream& out) out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl; } -template -void BenchmarkValueType() +template +void BenchmarkValueType(vtkm::cont::DeviceAdapterId id) { - PrintRow(std::cout, - vtkm::testing::TypeName::Name(), - vtkm::cont::DeviceAdapterTraits::GetName()); + PrintRow(std::cout, vtkm::testing::TypeName::Name(), id.GetName()); PrintDivider(std::cout); Benchmarker bench(15, 100); for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC) { - MeasureCopySpeed functor(size); + MeasureCopySpeed functor(size); bench.Reset(); std::string speedStr; @@ -142,22 +140,58 @@ void BenchmarkValueType() } } // end namespace vtkm::benchmarking +namespace +{ +using namespace vtkm::benchmarking; + +struct BenchmarkValueTypeFunctor +{ + template + bool operator()(DeviceAdapter id) + { + BenchmarkValueType(id); + BenchmarkValueType, DeviceAdapter>(id); + BenchmarkValueType, DeviceAdapter>(id); + BenchmarkValueType, DeviceAdapter>(id); + + BenchmarkValueType(id); + BenchmarkValueType, DeviceAdapter>(id); + + BenchmarkValueType(id); + BenchmarkValueType, DeviceAdapter>(id); + + BenchmarkValueType(id); + BenchmarkValueType, DeviceAdapter>(id); + + BenchmarkValueType(id); + BenchmarkValueType, DeviceAdapter>(id); + + BenchmarkValueType, DeviceAdapter>(id); + BenchmarkValueType, DeviceAdapter>(id); + BenchmarkValueType, DeviceAdapter>(id); + BenchmarkValueType, DeviceAdapter>(id); + + return true; + } +}; +} + int main(int argc, char* argv[]) { - vtkm::cont::InitLogging(argc, argv); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; + auto config = vtkm::cont::Initialize(argc, argv, opts); - using namespace vtkm::benchmarking; -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB +#ifdef VTKM_ENABLE_TBB int numThreads = tbb::task_scheduler_init::automatic; #endif // TBB - if (argc == 3) + if (config.Arguments.size() == 2) { - if (std::string(argv[1]) == "NumThreads") + if (std::string(config.Arguments[0]) == "NumThreads") { -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB - std::istringstream parse(argv[2]); +#ifdef VTKM_ENABLE_TBB + std::istringstream parse(config.Arguments[1]); parse >> numThreads; std::cout << "Selected " << numThreads << " TBB threads." << std::endl; #else @@ -166,35 +200,11 @@ int main(int argc, char* argv[]) } } -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB +#ifdef VTKM_ENABLE_TBB // Must not be destroyed as long as benchmarks are running: tbb::task_scheduler_init init(numThreads); #endif // TBB - using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG; - auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker(); - tracker.ForceDevice(Device{}); - - - BenchmarkValueType(); - BenchmarkValueType>(); - BenchmarkValueType>(); - BenchmarkValueType>(); - - BenchmarkValueType(); - BenchmarkValueType>(); - - BenchmarkValueType(); - BenchmarkValueType>(); - - BenchmarkValueType(); - BenchmarkValueType>(); - - BenchmarkValueType(); - BenchmarkValueType>(); - - BenchmarkValueType>(); - BenchmarkValueType>(); - BenchmarkValueType>(); - BenchmarkValueType>(); + BenchmarkValueTypeFunctor functor; + vtkm::cont::TryExecuteOnDevice(config.Device, functor); } diff --git a/benchmarking/BenchmarkDeviceAdapter.cxx b/benchmarking/BenchmarkDeviceAdapter.cxx index 2cb14edb5..8e94c8aaf 100644 --- a/benchmarking/BenchmarkDeviceAdapter.cxx +++ b/benchmarking/BenchmarkDeviceAdapter.cxx @@ -44,9 +44,10 @@ #include -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB +#ifdef VTKM_ENABLE_TBB #include -#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP +#endif +#ifdef VTKM_ENABLE_OPENMP #include #endif @@ -168,25 +169,23 @@ static const std::string DIVIDER(40, '-'); /// This class runs a series of micro-benchmarks to measure /// performance of the parallel primitives provided by each /// device adapter -template class BenchmarkDeviceAdapter { using StorageTag = vtkm::cont::StorageTagBasic; using IdArrayHandle = vtkm::cont::ArrayHandle; - using Algorithm = vtkm::cont::DeviceAdapterAlgorithm; + using Algorithm = vtkm::cont::Algorithm; using Timer = vtkm::cont::Timer; public: // Various kernels used by the different benchmarks to accelerate // initialization of data - template + template struct FillTestValueKernel : vtkm::exec::FunctorBase { using ValueArrayHandle = vtkm::cont::ArrayHandle; - using PortalType = typename ValueArrayHandle::template ExecutionTypes::Portal; PortalType Output; @@ -199,11 +198,10 @@ public: VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i, Value())); } }; - template + template struct FillScaledTestValueKernel : vtkm::exec::FunctorBase { using ValueArrayHandle = vtkm::cont::ArrayHandle; - using PortalType = typename ValueArrayHandle::template ExecutionTypes::Portal; PortalType Output; const vtkm::Id IdScale; @@ -218,11 +216,10 @@ public: VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i * IdScale, Value())); } }; - template + template struct FillModuloTestValueKernel : vtkm::exec::FunctorBase { using ValueArrayHandle = vtkm::cont::ArrayHandle; - using PortalType = typename ValueArrayHandle::template ExecutionTypes::Portal; PortalType Output; const vtkm::Id Modulus; @@ -237,11 +234,10 @@ public: VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i % Modulus, Value())); } }; - template + template struct FillBinaryTestValueKernel : vtkm::exec::FunctorBase { using ValueArrayHandle = vtkm::cont::ArrayHandle; - using PortalType = typename ValueArrayHandle::template ExecutionTypes::Portal; PortalType Output; const vtkm::Id Modulus; @@ -260,7 +256,7 @@ public: }; private: - template + template struct BenchCopy { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -284,7 +280,7 @@ private: VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::Copy(ValueHandle_src, ValueHandle_dst); @@ -305,7 +301,7 @@ private: }; VTKM_MAKE_BENCHMARK(Copy, BenchCopy); - template + template struct BenchCopyIf { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -322,18 +318,18 @@ private: { vtkm::Id arraySize = Config.ComputeSize(); vtkm::Id modulo = arraySize / N_VALID; - Algorithm::Schedule( - FillTestValueKernel(ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), - arraySize); - Algorithm::Schedule(FillBinaryTestValueKernel( - modulo, StencilHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), + auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillTestValueKernel(vHPortal), arraySize); + + auto sHPortal = StencilHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillBinaryTestValueKernel(modulo, sHPortal), arraySize); } VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::CopyIf(ValueHandle, StencilHandle, OutHandle); @@ -365,7 +361,7 @@ private: VTKM_MAKE_BENCHMARK(CopyIf75, BenchCopyIf, 75); VTKM_MAKE_BENCHMARK(CopyIf100, BenchCopyIf, 100); - template + template struct BenchLowerBounds { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -381,18 +377,18 @@ private: , PERCENT_VALUES(value_percent) { vtkm::Id arraySize = Config.ComputeSize(); - Algorithm::Schedule( - FillTestValueKernel(InputHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), - arraySize); - Algorithm::Schedule(FillScaledTestValueKernel( - 2, ValueHandle.PrepareForOutput(N_VALS, DeviceAdapterTag())), + auto iHPortal = InputHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillTestValueKernel(iHPortal), arraySize); + auto vHPortal = ValueHandle.PrepareForOutput(N_VALS, DeviceAdapter()); + Algorithm::Schedule(FillScaledTestValueKernel(2, vHPortal), N_VALS); } VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::LowerBounds(InputHandle, ValueHandle, OutHandle); @@ -424,7 +420,7 @@ private: VTKM_MAKE_BENCHMARK(LowerBounds75, BenchLowerBounds, 75); VTKM_MAKE_BENCHMARK(LowerBounds100, BenchLowerBounds, 100); - template + template struct BenchReduce { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -438,9 +434,8 @@ private: BenchReduce() { vtkm::Id arraySize = Config.ComputeSize(); - Algorithm::Schedule( - FillTestValueKernel(InputHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), - arraySize); + auto iHPortal = this->InputHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillTestValueKernel(iHPortal), arraySize); this->Result = Algorithm::Reduce(this->InputHandle, vtkm::TypeTraits::ZeroInitialization()); } @@ -448,9 +443,11 @@ private: VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + + Timer timer{ DeviceAdapter() }; timer.Start(); - Value tmp = Algorithm::Reduce(InputHandle, vtkm::TypeTraits::ZeroInitialization()); + Value tmp = + Algorithm::Reduce(this->InputHandle, vtkm::TypeTraits::ZeroInitialization()); vtkm::Float64 time = timer.GetElapsedTime(); if (tmp != this->Result) { @@ -473,7 +470,7 @@ private: }; VTKM_MAKE_BENCHMARK(Reduce, BenchReduce); - template + template struct BenchReduceByKey { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -489,11 +486,10 @@ private: , PERCENT_KEYS(key_percent) { vtkm::Id arraySize = Config.ComputeSize(); - Algorithm::Schedule( - FillTestValueKernel(ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), - arraySize); - Algorithm::Schedule(FillModuloTestValueKernel( - N_KEYS, KeyHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), + auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillTestValueKernel(vHPortal), arraySize); + auto kHPortal = KeyHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillModuloTestValueKernel(N_KEYS, kHPortal), arraySize); Algorithm::SortByKey(KeyHandle, ValueHandle); } @@ -501,7 +497,7 @@ private: VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::ReduceByKey(KeyHandle, ValueHandle, KeysOut, ValuesOut, vtkm::Add()); return timer.GetElapsedTime(); @@ -532,7 +528,7 @@ private: VTKM_MAKE_BENCHMARK(ReduceByKey75, BenchReduceByKey, 75); VTKM_MAKE_BENCHMARK(ReduceByKey100, BenchReduceByKey, 100); - template + template struct BenchScanInclusive { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -542,15 +538,14 @@ private: BenchScanInclusive() { vtkm::Id arraySize = Config.ComputeSize(); - Algorithm::Schedule( - FillTestValueKernel(ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), - arraySize); + auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillTestValueKernel(vHPortal), arraySize); } VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::ScanInclusive(ValueHandle, OutHandle); return timer.GetElapsedTime(); @@ -570,7 +565,7 @@ private: }; VTKM_MAKE_BENCHMARK(ScanInclusive, BenchScanInclusive); - template + template struct BenchScanExclusive { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -581,15 +576,15 @@ private: BenchScanExclusive() { vtkm::Id arraySize = Config.ComputeSize(); - Algorithm::Schedule( - FillTestValueKernel(ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), - arraySize); + auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillTestValueKernel(vHPortal), arraySize); } VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::ScanExclusive(ValueHandle, OutHandle); return timer.GetElapsedTime(); @@ -609,7 +604,7 @@ private: }; VTKM_MAKE_BENCHMARK(ScanExclusive, BenchScanExclusive); - template + template struct BenchSort { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -634,7 +629,7 @@ private: ValueArrayHandle array; Algorithm::Copy(this->ValueHandle, array); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::Sort(array); return timer.GetElapsedTime(); @@ -654,7 +649,7 @@ private: }; VTKM_MAKE_BENCHMARK(Sort, BenchSort); - template + template struct BenchSortByKey { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -677,8 +672,8 @@ private: { portal.Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value())); } - Algorithm::Schedule(FillModuloTestValueKernel( - N_KEYS, KeyHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), + auto kHPortal = KeyHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillModuloTestValueKernel(N_KEYS, kHPortal), arraySize); } @@ -690,7 +685,7 @@ private: Algorithm::Copy(this->KeyHandle, keys); Algorithm::Copy(this->ValueHandle, values); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::SortByKey(keys, values); return timer.GetElapsedTime(); @@ -721,7 +716,7 @@ private: VTKM_MAKE_BENCHMARK(SortByKey75, BenchSortByKey, 75); VTKM_MAKE_BENCHMARK(SortByKey100, BenchSortByKey, 100); - template + template struct BenchStableSortIndices { using SSI = vtkm::worklet::StableSortIndices; @@ -749,7 +744,7 @@ private: vtkm::cont::ArrayHandle indices; Algorithm::Copy(vtkm::cont::ArrayHandleIndex(arraySize), indices); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); SSI::Sort(ValueHandle, indices); return timer.GetElapsedTime(); @@ -769,7 +764,7 @@ private: }; VTKM_MAKE_BENCHMARK(StableSortIndices, BenchStableSortIndices); - template + template struct BenchStableSortIndicesUnique { using SSI = vtkm::worklet::StableSortIndices; @@ -787,19 +782,19 @@ private: , PERCENT_VALID(percent_valid) { vtkm::Id arraySize = Config.ComputeSize(); - Algorithm::Schedule( - FillModuloTestValueKernel( - N_VALID, this->ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), - arraySize); + auto vHPortal = this->ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillModuloTestValueKernel(N_VALID, vHPortal), + arraySize); this->IndexHandle = SSI::Sort(this->ValueHandle); } VTKM_CONT vtkm::Float64 operator()() { + IndexArrayHandle indices; Algorithm::Copy(this->IndexHandle, indices); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); SSI::Unique(this->ValueHandle, indices); return timer.GetElapsedTime(); @@ -830,7 +825,7 @@ private: VTKM_MAKE_BENCHMARK(StableSortIndicesUnique75, BenchStableSortIndicesUnique, 75); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique100, BenchStableSortIndicesUnique, 100); - template + template struct BenchUnique { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -845,8 +840,8 @@ private: , PERCENT_VALID(percent_valid) { vtkm::Id arraySize = Config.ComputeSize(); - Algorithm::Schedule(FillModuloTestValueKernel( - N_VALID, ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), + auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillModuloTestValueKernel(N_VALID, vHPortal), arraySize); Algorithm::Sort(ValueHandle); } @@ -854,10 +849,11 @@ private: VTKM_CONT vtkm::Float64 operator()() { + ValueArrayHandle array; Algorithm::Copy(this->ValueHandle, array); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::Unique(array); return timer.GetElapsedTime(); @@ -888,7 +884,7 @@ private: VTKM_MAKE_BENCHMARK(Unique75, BenchUnique, 75); VTKM_MAKE_BENCHMARK(Unique100, BenchUnique, 100); - template + template struct BenchUpperBounds { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -904,18 +900,17 @@ private: , PERCENT_VALS(percent_vals) { vtkm::Id arraySize = Config.ComputeSize(); - Algorithm::Schedule( - FillTestValueKernel(InputHandle.PrepareForOutput(arraySize, DeviceAdapterTag())), - arraySize); - Algorithm::Schedule(FillScaledTestValueKernel( - 2, ValueHandle.PrepareForOutput(N_VALS, DeviceAdapterTag())), + auto iHPortal = InputHandle.PrepareForOutput(arraySize, DeviceAdapter()); + Algorithm::Schedule(FillTestValueKernel(iHPortal), arraySize); + auto vHPortal = ValueHandle.PrepareForOutput(N_VALS, DeviceAdapter()); + Algorithm::Schedule(FillScaledTestValueKernel(2, vHPortal), N_VALS); } VTKM_CONT vtkm::Float64 operator()() { - vtkm::cont::Timer timer; + vtkm::cont::Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::UpperBounds(InputHandle, ValueHandle, OutHandle); return timer.GetElapsedTime(); @@ -948,11 +943,8 @@ private: VTKM_MAKE_BENCHMARK(UpperBounds100, BenchUpperBounds, 100); public: - static VTKM_CONT int Run() + static VTKM_CONT int Run(vtkm::cont::DeviceAdapterId id) { - std::cout << DIVIDER << "\nRunning DeviceAdapter benchmarks\n"; - vtkm::cont::GetGlobalRuntimeDeviceTracker().ForceDevice(DeviceAdapterTag()); - // Run fixed bytes / size tests: for (int sizeType = 0; sizeType < 2; ++sizeType) { @@ -962,11 +954,11 @@ public: Config.DoByteSizes = true; if (!Config.ExtendedTypeList) { - RunInternal(); + RunInternal(id); } else { - RunInternal(); + RunInternal(id); } } if (sizeType == 1 && Config.TestArraySizeValues) @@ -975,11 +967,11 @@ public: Config.DoByteSizes = false; if (!Config.ExtendedTypeList) { - RunInternal(); + RunInternal(id); } else { - RunInternal(); + RunInternal(id); } } } @@ -988,12 +980,12 @@ public: } template - static VTKM_CONT void RunInternal() + static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id) { if (Config.BenchmarkFlags & COPY) { std::cout << DIVIDER << "\nBenchmarking Copy\n"; - VTKM_RUN_BENCHMARK(Copy, ValueTypes()); + VTKM_RUN_BENCHMARK(Copy, ValueTypes(), id); } if (Config.BenchmarkFlags & COPY_IF) @@ -1001,26 +993,26 @@ public: std::cout << "\n" << DIVIDER << "\nBenchmarking CopyIf\n"; if (Config.DetailedOutputRangeScaling) { - VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf10, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf15, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf20, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf30, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf35, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf40, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf45, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes()); + VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf10, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf15, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf20, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf30, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf35, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf40, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf45, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes(), id); } else { - VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes()); - VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes()); + VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes(), id); } } @@ -1029,33 +1021,33 @@ public: std::cout << DIVIDER << "\nBenchmarking LowerBounds\n"; if (Config.DetailedOutputRangeScaling) { - VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds10, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds15, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds20, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds30, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds35, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds40, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds45, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes()); + VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds10, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds15, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds20, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds30, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds35, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds40, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds45, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes(), id); } else { - VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes()); - VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes()); + VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes(), id); } } if (Config.BenchmarkFlags & REDUCE) { std::cout << "\n" << DIVIDER << "\nBenchmarking Reduce\n"; - VTKM_RUN_BENCHMARK(Reduce, ValueTypes()); + VTKM_RUN_BENCHMARK(Reduce, ValueTypes(), id); } if (Config.BenchmarkFlags & REDUCE_BY_KEY) @@ -1063,45 +1055,45 @@ public: std::cout << "\n" << DIVIDER << "\nBenchmarking ReduceByKey\n"; if (Config.DetailedOutputRangeScaling) { - VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey10, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey15, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey20, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey30, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey35, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey40, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey45, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes()); + VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey10, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey15, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey20, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey30, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey35, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey40, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey45, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes(), id); } else { - VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes()); - VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes()); + VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes(), id); } } if (Config.BenchmarkFlags & SCAN_INCLUSIVE) { std::cout << "\n" << DIVIDER << "\nBenchmarking ScanInclusive\n"; - VTKM_RUN_BENCHMARK(ScanInclusive, ValueTypes()); + VTKM_RUN_BENCHMARK(ScanInclusive, ValueTypes(), id); } if (Config.BenchmarkFlags & SCAN_EXCLUSIVE) { std::cout << "\n" << DIVIDER << "\nBenchmarking ScanExclusive\n"; - VTKM_RUN_BENCHMARK(ScanExclusive, ValueTypes()); + VTKM_RUN_BENCHMARK(ScanExclusive, ValueTypes(), id); } if (Config.BenchmarkFlags & SORT) { std::cout << "\n" << DIVIDER << "\nBenchmarking Sort\n"; - VTKM_RUN_BENCHMARK(Sort, ValueTypes()); + VTKM_RUN_BENCHMARK(Sort, ValueTypes(), id); } if (Config.BenchmarkFlags & SORT_BY_KEY) @@ -1109,33 +1101,33 @@ public: std::cout << "\n" << DIVIDER << "\nBenchmarking SortByKey\n"; if (Config.DetailedOutputRangeScaling) { - VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey10, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey15, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey20, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey30, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey35, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey40, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey45, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes()); + VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey10, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey15, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey20, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey30, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey35, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey40, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey45, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes(), id); } else { - VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes()); - VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes()); + VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes(), id); } } if (Config.BenchmarkFlags & STABLE_SORT_INDICES) { std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Sort\n"; - VTKM_RUN_BENCHMARK(StableSortIndices, ValueTypes()); + VTKM_RUN_BENCHMARK(StableSortIndices, ValueTypes(), id); } if (Config.BenchmarkFlags & STABLE_SORT_INDICES_UNIQUE) @@ -1143,26 +1135,26 @@ public: std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Unique\n"; if (Config.DetailedOutputRangeScaling) { - VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique10, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique15, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique20, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique30, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique35, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique40, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique45, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes()); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique10, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique15, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique20, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique30, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique35, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique40, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique45, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes(), id); } else { - VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes()); - VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes()); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes(), id); } } @@ -1171,26 +1163,26 @@ public: std::cout << "\n" << DIVIDER << "\nBenchmarking Unique\n"; if (Config.DetailedOutputRangeScaling) { - VTKM_RUN_BENCHMARK(Unique5, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique10, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique15, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique20, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique25, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique30, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique35, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique40, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique45, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique50, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique75, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique100, ValueTypes()); + VTKM_RUN_BENCHMARK(Unique5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique10, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique15, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique20, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique30, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique35, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique40, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique45, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique100, ValueTypes(), id); } else { - VTKM_RUN_BENCHMARK(Unique5, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique25, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique50, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique75, ValueTypes()); - VTKM_RUN_BENCHMARK(Unique100, ValueTypes()); + VTKM_RUN_BENCHMARK(Unique5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(Unique100, ValueTypes(), id); } } @@ -1199,26 +1191,26 @@ public: std::cout << "\n" << DIVIDER << "\nBenchmarking UpperBounds\n"; if (Config.DetailedOutputRangeScaling) { - VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds10, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds15, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds20, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds30, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds35, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds40, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds45, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes()); + VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds10, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds15, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds20, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds30, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds35, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds40, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds45, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes(), id); } else { - VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes()); - VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes()); + VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes(), id); + VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes(), id); } } } @@ -1230,19 +1222,28 @@ public: int main(int argc, char* argv[]) { - vtkm::cont::InitLogging(argc, argv); + auto opt = vtkm::cont::InitializeOptions::RequireDevice; + auto initConfig = vtkm::cont::Initialize(argc, argv, opt); -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB - int numThreads = tbb::task_scheduler_init::automatic; -#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP - int numThreads = omp_get_max_threads(); -#endif // TBB + int numThreads{ 0 }; +#ifdef VTKM_ENABLE_TBB + if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB()) + { + numThreads = tbb::task_scheduler_init::automatic; + } +#endif +#ifdef VTKM_ENABLE_OPENMP + if (initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP()) + { + numThreads = omp_get_max_threads(); + } +#endif vtkm::benchmarking::BenchDevAlgoConfig& config = vtkm::benchmarking::Config; - for (int i = 1; i < argc; ++i) + for (size_t i = 0; i < initConfig.Arguments.size(); ++i) { - std::string arg = argv[i]; + std::string arg = initConfig.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); @@ -1301,7 +1302,7 @@ int main(int argc, char* argv[]) else if (arg == "typelist") { ++i; - arg = argv[i]; + arg = initConfig.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); @@ -1315,14 +1316,14 @@ int main(int argc, char* argv[]) } else { - std::cerr << "Unrecognized TypeList: " << argv[i] << std::endl; + std::cerr << "Unrecognized TypeList: " << initConfig.Arguments[i] << std::endl; return 1; } } else if (arg == "fixbytes") { ++i; - arg = argv[i]; + arg = initConfig.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); @@ -1340,7 +1341,7 @@ int main(int argc, char* argv[]) else if (arg == "fixsizes") { ++i; - arg = argv[i]; + arg = initConfig.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); @@ -1362,30 +1363,38 @@ int main(int argc, char* argv[]) else if (arg == "numthreads") { ++i; -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB - std::istringstream parse(argv[i]); - parse >> numThreads; - std::cout << "Selected " << numThreads << " TBB threads." << std::endl; -#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP - std::istringstream parse(argv[i]); - parse >> numThreads; - std::cout << "Selected " << numThreads << " OpenMP threads." << std::endl; -#else - std::cerr << "NumThreads not valid on this device. Ignoring." << std::endl; -#endif // TBB + if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB() || + initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP()) + { + std::istringstream parse(initConfig.Arguments[i]); + parse >> numThreads; + std::cout << "Selected " << numThreads << " " << initConfig.Device.GetName() << " threads." + << std::endl; + } + else + { + std::cerr << "NumThreads not valid on this device. Ignoring." << std::endl; + } } else { - std::cerr << "Unrecognized benchmark: " << argv[i] << std::endl; + std::cerr << "Unrecognized benchmark: " << initConfig.Arguments[i] << std::endl; return 1; } } -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB +#ifdef VTKM_ENABLE_TBB // Must not be destroyed as long as benchmarks are running: - tbb::task_scheduler_init init(numThreads); -#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP - omp_set_num_threads(numThreads); + if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB()) + { + tbb::task_scheduler_init init(numThreads); + } +#endif +#ifdef VTKM_ENABLE_OPENMP + if (initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP()) + { + omp_set_num_threads(numThreads); + } #endif // TBB if (config.BenchmarkFlags == 0) @@ -1394,5 +1403,5 @@ int main(int argc, char* argv[]) } //now actually execute the benchmarks - return vtkm::benchmarking::BenchmarkDeviceAdapter::Run(); + return vtkm::benchmarking::BenchmarkDeviceAdapter::Run(initConfig.Device); } diff --git a/benchmarking/BenchmarkFieldAlgorithms.cxx b/benchmarking/BenchmarkFieldAlgorithms.cxx index d5d2c55cc..b9a75c5dc 100644 --- a/benchmarking/BenchmarkFieldAlgorithms.cxx +++ b/benchmarking/BenchmarkFieldAlgorithms.cxx @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -311,7 +312,6 @@ struct InterpValueTypes : vtkm::ListTagBase class BenchmarkFieldAlgorithms { using StorageTag = vtkm::cont::StorageTagBasic; @@ -323,7 +323,7 @@ class BenchmarkFieldAlgorithms using EdgeIdVariantHandle = vtkm::cont::VariantArrayHandleBase; private: - template + template struct BenchBlackScholes { using ValueArrayHandle = vtkm::cont::ArrayHandle; @@ -366,11 +366,10 @@ private: const Value RISKFREE = 0.02f; const Value VOLATILITY = 0.30f; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); BlackScholes worklet(RISKFREE, VOLATILITY); vtkm::worklet::DispatcherMapField> dispatcher(worklet); - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke( this->StockPrice, this->OptionStrike, this->OptionYears, callResultHandle, putResultHandle); @@ -391,8 +390,8 @@ private: } }; - template - struct BenchBlackScholesDynamic : public BenchBlackScholes + template + struct BenchBlackScholesDynamic : public BenchBlackScholes { VTKM_CONT @@ -406,11 +405,10 @@ private: const Value RISKFREE = 0.02f; const Value VOLATILITY = 0.30f; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); BlackScholes worklet(RISKFREE, VOLATILITY); vtkm::worklet::DispatcherMapField> dispatcher(worklet); - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(dstocks, dstrikes, doptions, callResultHandle, putResultHandle); @@ -423,7 +421,7 @@ private: VTKM_MAKE_BENCHMARK(BlackScholes, BenchBlackScholes); VTKM_MAKE_BENCHMARK(BlackScholesDynamic, BenchBlackScholesDynamic); - template + template struct BenchMath { std::vector> input; @@ -450,10 +448,10 @@ private: vtkm::cont::ArrayHandle tempHandle1; vtkm::cont::ArrayHandle tempHandle2; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); - vtkm::worklet::Invoker invoke(DeviceAdapterTag{}); + vtkm::worklet::Invoker invoke(DeviceAdapter{}); invoke(Mag{}, this->InputHandle, tempHandle1); invoke(Sin{}, tempHandle1, tempHandle2); invoke(Square{}, tempHandle2, tempHandle1); @@ -475,8 +473,8 @@ private: } }; - template - struct BenchMathDynamic : public BenchMath + template + struct BenchMathDynamic : public BenchMath { VTKM_CONT @@ -490,10 +488,10 @@ private: ValueVariantHandle dtemp1(temp1); ValueVariantHandle dtemp2(temp2); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); - vtkm::worklet::Invoker invoke(DeviceAdapterTag{}); + vtkm::worklet::Invoker invoke(DeviceAdapter{}); invoke(Mag{}, dinput, dtemp1); invoke(Sin{}, dtemp1, dtemp2); invoke(Square{}, dtemp2, dtemp1); @@ -508,7 +506,7 @@ private: VTKM_MAKE_BENCHMARK(Math, BenchMath); VTKM_MAKE_BENCHMARK(MathDynamic, BenchMathDynamic); - template + template struct BenchFusedMath { std::vector> input; @@ -534,10 +532,9 @@ private: { vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); vtkm::worklet::DispatcherMapField dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->InputHandle, result); return timer.GetElapsedTime(); @@ -556,8 +553,8 @@ private: } }; - template - struct BenchFusedMathDynamic : public BenchFusedMath + template + struct BenchFusedMathDynamic : public BenchFusedMath { VTKM_CONT @@ -569,10 +566,9 @@ private: vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); vtkm::worklet::DispatcherMapField dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(dinput, result); return timer.GetElapsedTime(); @@ -584,7 +580,7 @@ private: VTKM_MAKE_BENCHMARK(FusedMath, BenchFusedMath); VTKM_MAKE_BENCHMARK(FusedMathDynamic, BenchFusedMathDynamic); - template + template struct BenchEdgeInterp { std::vector weight; @@ -617,7 +613,6 @@ private: this->EdgePairHandle.Allocate(numberOfEdges); vtkm::worklet::DispatcherMapTopology dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(cellSet, this->EdgePairHandle); this->weight.resize(esize); @@ -641,10 +636,9 @@ private: { vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); vtkm::worklet::DispatcherMapField dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->EdgePairHandle, this->WeightHandle, this->FieldHandle, result); return timer.GetElapsedTime(); @@ -664,8 +658,8 @@ private: } }; - template - struct BenchEdgeInterpDynamic : public BenchEdgeInterp + template + struct BenchEdgeInterpDynamic : public BenchEdgeInterp { VTKM_CONT @@ -676,10 +670,9 @@ private: EdgeIdVariantHandle dedges(this->EdgePairHandle); vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); vtkm::worklet::DispatcherMapField dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(dedges, dweight, dfield, result); return timer.GetElapsedTime(); @@ -724,7 +717,7 @@ private: return data; } - template + template struct BenchImplicitFunction { BenchImplicitFunction() @@ -739,14 +732,12 @@ private: using EvalDispatcher = vtkm::worklet::DispatcherMapField; auto handle = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1); - auto function = - static_cast(handle.PrepareForExecution(DeviceAdapterTag())); + auto function = static_cast(handle.PrepareForExecution(DeviceAdapter())); EvalWorklet eval(function); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); EvalDispatcher dispatcher(eval); - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->Internal.Points, this->Internal.Result); return timer.GetElapsedTime(); @@ -764,7 +755,7 @@ private: ImplicitFunctionBenchData Internal; }; - template + template struct BenchVirtualImplicitFunction { BenchVirtualImplicitFunction() @@ -779,12 +770,11 @@ private: using EvalDispatcher = vtkm::worklet::DispatcherMapField; auto sphere = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1); - EvalWorklet eval(sphere.PrepareForExecution(DeviceAdapterTag())); + EvalWorklet eval(sphere.PrepareForExecution(DeviceAdapter())); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); EvalDispatcher dispatcher(eval); - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->Internal.Points, this->Internal.Result); return timer.GetElapsedTime(); @@ -802,7 +792,7 @@ private: ImplicitFunctionBenchData Internal; }; - template + template struct Bench2ImplicitFunctions { Bench2ImplicitFunctions() @@ -818,14 +808,13 @@ private: auto h1 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1); auto h2 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere2); - auto f1 = static_cast(h1.PrepareForExecution(DeviceAdapterTag())); - auto f2 = static_cast(h2.PrepareForExecution(DeviceAdapterTag())); + auto f1 = static_cast(h1.PrepareForExecution(DeviceAdapter())); + auto f2 = static_cast(h2.PrepareForExecution(DeviceAdapter())); EvalWorklet eval(f1, f2); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); EvalDispatcher dispatcher(eval); - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->Internal.Points, this->Internal.Result); return timer.GetElapsedTime(); @@ -843,7 +832,7 @@ private: ImplicitFunctionBenchData Internal; }; - template + template struct Bench2VirtualImplicitFunctions { Bench2VirtualImplicitFunctions() @@ -860,13 +849,12 @@ private: auto s1 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1); auto s2 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere2); - EvalWorklet eval(s1.PrepareForExecution(DeviceAdapterTag()), - s2.PrepareForExecution(DeviceAdapterTag())); + EvalWorklet eval(s1.PrepareForExecution(DeviceAdapter()), + s2.PrepareForExecution(DeviceAdapter())); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); EvalDispatcher dispatcher(eval); - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->Internal.Points, this->Internal.Result); return timer.GetElapsedTime(); @@ -890,36 +878,36 @@ private: VTKM_MAKE_BENCHMARK(ImplicitFunctionVirtual2, Bench2VirtualImplicitFunctions); public: - static VTKM_CONT int Run(int benchmarks) + static VTKM_CONT int Run(int benchmarks, vtkm::cont::DeviceAdapterId id) { std::cout << DIVIDER << "\nRunning Field Algorithm benchmarks\n"; if (benchmarks & BLACK_SCHOLES) { std::cout << DIVIDER << "\nBenchmarking BlackScholes\n"; - VTKM_RUN_BENCHMARK(BlackScholes, ValueTypes()); - VTKM_RUN_BENCHMARK(BlackScholesDynamic, ValueTypes()); + VTKM_RUN_BENCHMARK(BlackScholes, ValueTypes(), id); + VTKM_RUN_BENCHMARK(BlackScholesDynamic, ValueTypes(), id); } if (benchmarks & MATH) { std::cout << DIVIDER << "\nBenchmarking Multiple Math Worklets\n"; - VTKM_RUN_BENCHMARK(Math, ValueTypes()); - VTKM_RUN_BENCHMARK(MathDynamic, ValueTypes()); + VTKM_RUN_BENCHMARK(Math, ValueTypes(), id); + VTKM_RUN_BENCHMARK(MathDynamic, ValueTypes(), id); } if (benchmarks & FUSED_MATH) { std::cout << DIVIDER << "\nBenchmarking Single Fused Math Worklet\n"; - VTKM_RUN_BENCHMARK(FusedMath, ValueTypes()); - VTKM_RUN_BENCHMARK(FusedMathDynamic, ValueTypes()); + VTKM_RUN_BENCHMARK(FusedMath, ValueTypes(), id); + VTKM_RUN_BENCHMARK(FusedMathDynamic, ValueTypes(), id); } if (benchmarks & INTERPOLATE_FIELD) { std::cout << DIVIDER << "\nBenchmarking Edge Based Field InterpolationWorklet\n"; - VTKM_RUN_BENCHMARK(EdgeInterp, InterpValueTypes()); - VTKM_RUN_BENCHMARK(EdgeInterpDynamic, InterpValueTypes()); + VTKM_RUN_BENCHMARK(EdgeInterp, InterpValueTypes(), id); + VTKM_RUN_BENCHMARK(EdgeInterpDynamic, InterpValueTypes(), id); } if (benchmarks & IMPLICIT_FUNCTION) @@ -927,10 +915,10 @@ public: using FloatDefaultType = vtkm::ListTagBase; std::cout << "\nBenchmarking Implicit Function\n"; - VTKM_RUN_BENCHMARK(ImplicitFunction, FloatDefaultType()); - VTKM_RUN_BENCHMARK(ImplicitFunctionVirtual, FloatDefaultType()); - VTKM_RUN_BENCHMARK(ImplicitFunction2, FloatDefaultType()); - VTKM_RUN_BENCHMARK(ImplicitFunctionVirtual2, FloatDefaultType()); + VTKM_RUN_BENCHMARK(ImplicitFunction, FloatDefaultType(), id); + VTKM_RUN_BENCHMARK(ImplicitFunctionVirtual, FloatDefaultType(), id); + VTKM_RUN_BENCHMARK(ImplicitFunction2, FloatDefaultType(), id); + VTKM_RUN_BENCHMARK(ImplicitFunctionVirtual2, FloatDefaultType(), id); } return 0; @@ -943,18 +931,19 @@ public: int main(int argc, char* argv[]) { - vtkm::cont::InitLogging(argc, argv); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; + auto config = vtkm::cont::Initialize(argc, argv, opts); int benchmarks = 0; - if (argc < 2) + if (!config.Arguments.size()) { benchmarks = vtkm::benchmarking::ALL; } else { - for (int i = 1; i < argc; ++i) + for (size_t i = 0; i < config.Arguments.size(); ++i) { - std::string arg = argv[i]; + std::string arg = config.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); @@ -980,16 +969,13 @@ int main(int argc, char* argv[]) } else { - std::cout << "Unrecognized benchmark: " << argv[i] << std::endl; + std::cout << "Unrecognized benchmark: " << config.Arguments[i] << std::endl; return 1; } } } //now actually execute the benchmarks - using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG; - auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker(); - tracker.ForceDevice(Device{}); - return vtkm::benchmarking::BenchmarkFieldAlgorithms::Run(benchmarks); + return vtkm::benchmarking::BenchmarkFieldAlgorithms::Run(benchmarks, config.Device); } diff --git a/benchmarking/BenchmarkFilters.cxx b/benchmarking/BenchmarkFilters.cxx index b2dfac186..8c25b7902 100644 --- a/benchmarking/BenchmarkFilters.cxx +++ b/benchmarking/BenchmarkFilters.cxx @@ -62,9 +62,10 @@ #include #include -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB +#ifdef VTKM_ENABLE_TBB #include -#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP +#endif +#ifdef VTKM_ENABLE_OPENMP #include #endif @@ -97,8 +98,6 @@ namespace { -using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG; -using DevTraits = vtkm::cont::DeviceAdapterTraits; // unscoped enum so we can use bitwise ops without a lot of hassle: enum BenchmarkName @@ -156,6 +155,16 @@ using AllCellList = vtkm::ListTagJoin; using CoordinateList = vtkm::ListTagBase, vtkm::Vec>; +struct WaveletGeneratorDataFunctor +{ + template + bool operator()(DeviceAdapter, vtkm::worklet::WaveletGenerator& gen) + { + InputDataSet = gen.GenerateDataSet(); + return true; + } +}; + class BenchmarkFilterPolicy : public vtkm::filter::PolicyBase { public: @@ -169,7 +178,6 @@ public: }; // Class implementing all filter benchmarks: -template class BenchmarkFilters { using Timer = vtkm::cont::Timer; @@ -185,7 +193,7 @@ class BenchmarkFilters ScalarInput = 1 << 6 }; - template + template struct BenchGradient { vtkm::filter::Gradient Filter; @@ -229,7 +237,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -293,7 +301,7 @@ class BenchmarkFilters BenchGradient, Gradient | PointGradient | Divergence | Vorticity | QCriterion); - template + template struct BenchThreshold { vtkm::filter::Threshold Filter; @@ -317,7 +325,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -328,7 +336,7 @@ class BenchmarkFilters }; VTKM_MAKE_BENCHMARK(Threshold, BenchThreshold); - template + template struct BenchThresholdPoints { bool CompactPoints; @@ -356,7 +364,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -368,7 +376,7 @@ class BenchmarkFilters VTKM_MAKE_BENCHMARK(ThresholdPoints, BenchThresholdPoints, false); VTKM_MAKE_BENCHMARK(ThresholdPointsCompact, BenchThresholdPoints, true); - template + template struct BenchCellAverage { vtkm::filter::CellAverage Filter; @@ -382,7 +390,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -393,7 +401,7 @@ class BenchmarkFilters }; VTKM_MAKE_BENCHMARK(CellAverage, BenchCellAverage); - template + template struct BenchPointAverage { vtkm::filter::PointAverage Filter; @@ -407,7 +415,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -418,7 +426,7 @@ class BenchmarkFilters }; VTKM_MAKE_BENCHMARK(PointAverage, BenchPointAverage); - template + template struct BenchWarpScalar { vtkm::filter::WarpScalar Filter; @@ -435,7 +443,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -446,7 +454,7 @@ class BenchmarkFilters }; VTKM_MAKE_BENCHMARK(WarpScalar, BenchWarpScalar); - template + template struct BenchWarpVector { vtkm::filter::WarpVector Filter; @@ -462,7 +470,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -473,7 +481,7 @@ class BenchmarkFilters }; VTKM_MAKE_BENCHMARK(WarpVector, BenchWarpVector); - template + template struct BenchMarchingCubes { vtkm::filter::MarchingCubes Filter; @@ -505,7 +513,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -535,7 +543,7 @@ class BenchmarkFilters VTKM_MAKE_BENCHMARK(MarchingCubes3FTT, BenchMarchingCubes, 3, false, true, true); VTKM_MAKE_BENCHMARK(MarchingCubes12FTT, BenchMarchingCubes, 12, false, true, true); - template + template struct BenchExternalFaces { vtkm::filter::ExternalFaces Filter; @@ -550,7 +558,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -571,7 +579,7 @@ class BenchmarkFilters VTKM_MAKE_BENCHMARK(ExternalFaces, BenchExternalFaces, false); VTKM_MAKE_BENCHMARK(ExternalFacesCompact, BenchExternalFaces, true); - template + template struct BenchTetrahedralize { vtkm::filter::Tetrahedralize Filter; @@ -585,7 +593,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -596,7 +604,7 @@ class BenchmarkFilters }; VTKM_MAKE_BENCHMARK(Tetrahedralize, BenchTetrahedralize); - template + template struct BenchVertexClustering { vtkm::filter::VertexClustering Filter; @@ -611,7 +619,7 @@ class BenchmarkFilters VTKM_CONT vtkm::Float64 operator()() { - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy()); return timer.GetElapsedTime(); @@ -633,7 +641,7 @@ class BenchmarkFilters VTKM_MAKE_BENCHMARK(VertexClustering512, BenchVertexClustering, 512); VTKM_MAKE_BENCHMARK(VertexClustering1024, BenchVertexClustering, 1024); - template + template struct BenchCellToPoint { struct PrepareForInput @@ -661,10 +669,10 @@ class BenchmarkFilters vtkm::TopologyElementTagPoint{}); } - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); cellSet.PrepareForInput( - Device{}, vtkm::TopologyElementTagCell{}, vtkm::TopologyElementTagPoint{}); + DeviceAdapter(), vtkm::TopologyElementTagCell{}, vtkm::TopologyElementTagPoint{}); this->Time = timer.GetElapsedTime(); } }; @@ -692,7 +700,7 @@ class BenchmarkFilters VTKM_MAKE_BENCHMARK(CellToPoint, BenchCellToPoint); public: - static VTKM_CONT int Run(int benches) + static VTKM_CONT int Run(int benches, vtkm::cont::DeviceAdapterId id) { // This has no influence on the benchmarks. See issue #286. auto dummyTypes = vtkm::ListTagBase{}; @@ -703,104 +711,104 @@ public: { if (ReducedOptions) { - VTKM_RUN_BENCHMARK(GradientScalar, dummyTypes); - VTKM_RUN_BENCHMARK(GradientVector, dummyTypes); - VTKM_RUN_BENCHMARK(GradientVectorRow, dummyTypes); - VTKM_RUN_BENCHMARK(GradientKitchenSink, dummyTypes); + VTKM_RUN_BENCHMARK(GradientScalar, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientVector, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientVectorRow, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientKitchenSink, dummyTypes, id); } else { - VTKM_RUN_BENCHMARK(GradientScalar, dummyTypes); - VTKM_RUN_BENCHMARK(GradientVector, dummyTypes); - VTKM_RUN_BENCHMARK(GradientVectorRow, dummyTypes); - VTKM_RUN_BENCHMARK(GradientPoint, dummyTypes); - VTKM_RUN_BENCHMARK(GradientDivergence, dummyTypes); - VTKM_RUN_BENCHMARK(GradientVorticity, dummyTypes); - VTKM_RUN_BENCHMARK(GradientQCriterion, dummyTypes); - VTKM_RUN_BENCHMARK(GradientKitchenSink, dummyTypes); + VTKM_RUN_BENCHMARK(GradientScalar, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientVector, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientVectorRow, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientPoint, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientDivergence, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientVorticity, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientQCriterion, dummyTypes, id); + VTKM_RUN_BENCHMARK(GradientKitchenSink, dummyTypes, id); } } if (benches & BenchmarkName::THRESHOLD) { - VTKM_RUN_BENCHMARK(Threshold, dummyTypes); + VTKM_RUN_BENCHMARK(Threshold, dummyTypes, id); } if (benches & BenchmarkName::THRESHOLD_POINTS) { - VTKM_RUN_BENCHMARK(ThresholdPoints, dummyTypes); - VTKM_RUN_BENCHMARK(ThresholdPointsCompact, dummyTypes); + VTKM_RUN_BENCHMARK(ThresholdPoints, dummyTypes, id); + VTKM_RUN_BENCHMARK(ThresholdPointsCompact, dummyTypes, id); } if (benches & BenchmarkName::CELL_AVERAGE) { - VTKM_RUN_BENCHMARK(CellAverage, dummyTypes); + VTKM_RUN_BENCHMARK(CellAverage, dummyTypes, id); } if (benches & BenchmarkName::POINT_AVERAGE) { - VTKM_RUN_BENCHMARK(PointAverage, dummyTypes); + VTKM_RUN_BENCHMARK(PointAverage, dummyTypes, id); } if (benches & BenchmarkName::WARP_SCALAR) { - VTKM_RUN_BENCHMARK(WarpScalar, dummyTypes); + VTKM_RUN_BENCHMARK(WarpScalar, dummyTypes, id); } if (benches & BenchmarkName::WARP_VECTOR) { - VTKM_RUN_BENCHMARK(WarpVector, dummyTypes); + VTKM_RUN_BENCHMARK(WarpVector, dummyTypes, id); } if (benches & BenchmarkName::MARCHING_CUBES) { if (ReducedOptions) { - VTKM_RUN_BENCHMARK(MarchingCubes1FFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes12FFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes12TFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes12FTF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes12FTT, dummyTypes); + VTKM_RUN_BENCHMARK(MarchingCubes1FFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes12FFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes12TFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes12FTF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes12FTT, dummyTypes, id); } else { - VTKM_RUN_BENCHMARK(MarchingCubes1FFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes3FFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes12FFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes1TFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes3TFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes12TFF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes1FTF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes3FTF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes12FTF, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes1FTT, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes3FTT, dummyTypes); - VTKM_RUN_BENCHMARK(MarchingCubes12FTT, dummyTypes); + VTKM_RUN_BENCHMARK(MarchingCubes1FFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes3FFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes12FFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes1TFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes3TFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes12TFF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes1FTF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes3FTF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes12FTF, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes1FTT, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes3FTT, dummyTypes, id); + VTKM_RUN_BENCHMARK(MarchingCubes12FTT, dummyTypes, id); } } if (benches & BenchmarkName::EXTERNAL_FACES) { - VTKM_RUN_BENCHMARK(ExternalFaces, dummyTypes); - VTKM_RUN_BENCHMARK(ExternalFacesCompact, dummyTypes); + VTKM_RUN_BENCHMARK(ExternalFaces, dummyTypes, id); + VTKM_RUN_BENCHMARK(ExternalFacesCompact, dummyTypes, id); } if (benches & BenchmarkName::TETRAHEDRALIZE) { - VTKM_RUN_BENCHMARK(Tetrahedralize, dummyTypes); + VTKM_RUN_BENCHMARK(Tetrahedralize, dummyTypes, id); } if (benches & BenchmarkName::VERTEX_CLUSTERING) { if (ReducedOptions) { - VTKM_RUN_BENCHMARK(VertexClustering32, dummyTypes); - VTKM_RUN_BENCHMARK(VertexClustering256, dummyTypes); - VTKM_RUN_BENCHMARK(VertexClustering1024, dummyTypes); + VTKM_RUN_BENCHMARK(VertexClustering32, dummyTypes, id); + VTKM_RUN_BENCHMARK(VertexClustering256, dummyTypes, id); + VTKM_RUN_BENCHMARK(VertexClustering1024, dummyTypes, id); } else { - VTKM_RUN_BENCHMARK(VertexClustering32, dummyTypes); - VTKM_RUN_BENCHMARK(VertexClustering64, dummyTypes); - VTKM_RUN_BENCHMARK(VertexClustering128, dummyTypes); - VTKM_RUN_BENCHMARK(VertexClustering256, dummyTypes); - VTKM_RUN_BENCHMARK(VertexClustering512, dummyTypes); - VTKM_RUN_BENCHMARK(VertexClustering1024, dummyTypes); + VTKM_RUN_BENCHMARK(VertexClustering32, dummyTypes, id); + VTKM_RUN_BENCHMARK(VertexClustering64, dummyTypes, id); + VTKM_RUN_BENCHMARK(VertexClustering128, dummyTypes, id); + VTKM_RUN_BENCHMARK(VertexClustering256, dummyTypes, id); + VTKM_RUN_BENCHMARK(VertexClustering512, dummyTypes, id); + VTKM_RUN_BENCHMARK(VertexClustering1024, dummyTypes, id); } } if (benches & BenchmarkName::CELL_TO_POINT) { - VTKM_RUN_BENCHMARK(CellToPoint, dummyTypes); + VTKM_RUN_BENCHMARK(CellToPoint, dummyTypes, id); } return 0; @@ -936,7 +944,6 @@ void CreateFields(bool needPointScalars, bool needCellScalars, bool needPointVec PointVectorGenerator worklet(bounds); vtkm::worklet::DispatcherMapField dispatch(worklet); - dispatch.SetDevice(Device()); dispatch.Invoke(points, pvecs); InputDataSet.AddField( vtkm::cont::Field("GeneratedPointVectors", vtkm::cont::Field::Association::POINTS, pvecs)); @@ -1050,18 +1057,21 @@ void AssertFields(bool needPointScalars, bool needCellScalars, bool needPointVec } } -int BenchmarkBody(int argc, char* argv[]) +int BenchmarkBody(const std::vector& argv, vtkm::cont::DeviceAdapterId id) { int numThreads = 1; -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB - numThreads = tbb::task_scheduler_init::automatic; -#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP - numThreads = omp_get_max_threads(); -#endif // TBB - - // Force the requested device in case a tracker is used internally by a filter: - auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker(); - tracker.ForceDevice(Device()); +#ifdef VTKM_ENABLE_TBB + if (id == vtkm::cont::DeviceAdapterTagTBB()) + { + numThreads = tbb::task_scheduler_init::automatic; + } +#endif +#ifdef VTKM_ENABLE_OPENMP + if (id == vtkm::cont::DeviceAdapterTagOpenMP()) + { + numThreads = omp_get_max_threads(); + } +#endif int benches = BenchmarkName::NONE; std::string filename; @@ -1073,7 +1083,7 @@ int BenchmarkBody(int argc, char* argv[]) ReducedOptions = false; - for (int i = 1; i < argc; ++i) + for (size_t i = 0; i < argv.size(); ++i) { std::string arg = argv[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { @@ -1174,13 +1184,11 @@ int BenchmarkBody(int argc, char* argv[]) else if (arg == "numthreads") { ++i; - if (Device{} == vtkm::cont::DeviceAdapterTagOpenMP{} || - Device{} == vtkm::cont::DeviceAdapterTagTBB{}) + if (id == vtkm::cont::DeviceAdapterTagOpenMP() || id == vtkm::cont::DeviceAdapterTagTBB()) { std::istringstream parse(argv[i]); parse >> numThreads; - std::cout << "Selected " << numThreads << " " << DevTraits::GetName() << " threads." - << std::endl; + std::cout << "Selected " << numThreads << " " << id.GetName() << " threads." << std::endl; } else { @@ -1194,12 +1202,19 @@ int BenchmarkBody(int argc, char* argv[]) } } -#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB +#ifdef VTKM_ENABLE_TBB // Must not be destroyed as long as benchmarks are running: - tbb::task_scheduler_init init(numThreads); -#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP - omp_set_num_threads(numThreads); -#endif // TBB + if (id == vtkm::cont::DeviceAdapterTagTBB()) + { + tbb::task_scheduler_init init(numThreads); + } +#endif +#ifdef VTKM_ENABLE_OPENMP + if (id == vtkm::cont::DeviceAdapterTagOpenMP()) + { + omp_set_num_threads(numThreads); + } +#endif if (benches == BenchmarkName::NONE) { @@ -1222,7 +1237,10 @@ int BenchmarkBody(int argc, char* argv[]) << " wavelet...\n"; vtkm::worklet::WaveletGenerator gen; gen.SetExtent({ 0 }, { waveletDim }); - InputDataSet = gen.GenerateDataSet(); + + // WaveletGenerator needs a template device argument not a id to deduce the portal type. + WaveletGeneratorDataFunctor genFunctor; + vtkm::cont::TryExecuteOnDevice(id, genFunctor, gen); } if (tetra) @@ -1271,7 +1289,7 @@ int BenchmarkBody(int argc, char* argv[]) std::cout << "\n"; //now actually execute the benchmarks - int result = BenchmarkFilters::Run(benches); + int result = BenchmarkFilters::Run(benches, id); // Explicitly free resources before exit. InputDataSet.Clear(); @@ -1283,15 +1301,13 @@ int BenchmarkBody(int argc, char* argv[]) int main(int argc, char* argv[]) { - vtkm::cont::Initialize(argc, argv); - - auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker(); - tracker.ForceDevice(Device{}); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; + auto config = vtkm::cont::Initialize(argc, argv, opts); int retval = 1; try { - retval = BenchmarkBody(argc, argv); + retval = BenchmarkBody(config.Arguments, config.Device); } catch (std::exception& e) { diff --git a/benchmarking/BenchmarkRayTracing.cxx b/benchmarking/BenchmarkRayTracing.cxx index d6708effb..92da753c9 100644 --- a/benchmarking/BenchmarkRayTracing.cxx +++ b/benchmarking/BenchmarkRayTracing.cxx @@ -47,7 +47,7 @@ namespace vtkm namespace benchmarking { -template +template struct BenchRayTracing { vtkm::rendering::raytracing::RayTracer Tracer; @@ -118,9 +118,7 @@ struct BenchRayTracing VTKM_CONT vtkm::Float64 operator()() { - - - vtkm::cont::Timer timer; + vtkm::cont::Timer timer{ DeviceAdapter() }; timer.Start(); RayCamera.CreateRays(Rays, Coords.GetBounds()); @@ -147,12 +145,9 @@ VTKM_MAKE_BENCHMARK(RayTracing, BenchRayTracing); int main(int argc, char* argv[]) { - vtkm::cont::InitLogging(argc, argv); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; + auto config = vtkm::cont::Initialize(argc, argv, opts); - using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG; - auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker(); - tracker.ForceDevice(Device{}); - - VTKM_RUN_BENCHMARK(RayTracing, vtkm::ListTagBase()); + VTKM_RUN_BENCHMARK(RayTracing, vtkm::ListTagBase(), config.Device); return 0; } diff --git a/benchmarking/BenchmarkTopologyAlgorithms.cxx b/benchmarking/BenchmarkTopologyAlgorithms.cxx index 0f9b38fe3..ab87a59cb 100644 --- a/benchmarking/BenchmarkTopologyAlgorithms.cxx +++ b/benchmarking/BenchmarkTopologyAlgorithms.cxx @@ -135,7 +135,6 @@ struct ValueTypes /// This class runs a series of micro-benchmarks to measure /// performance of different field operations -template class BenchmarkTopologyAlgorithms { using StorageTag = vtkm::cont::StorageTagBasic; @@ -177,7 +176,7 @@ private: T next() { return distribution(rng); } }; - template + template struct BenchCellToPointAvg { std::vector input; @@ -205,11 +204,10 @@ private: cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)); vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); vtkm::worklet::DispatcherMapTopology dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->InputHandle, cellSet, result); return timer.GetElapsedTime(); @@ -229,8 +227,8 @@ private: } }; - template - struct BenchCellToPointAvgDynamic : public BenchCellToPointAvg + template + struct BenchCellToPointAvgDynamic : public BenchCellToPointAvg { VTKM_CONT @@ -242,11 +240,10 @@ private: ValueVariantHandle dinput(this->InputHandle); vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); vtkm::worklet::DispatcherMapTopology dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(dinput, cellSet, result); return timer.GetElapsedTime(); @@ -258,7 +255,7 @@ private: VTKM_MAKE_BENCHMARK(CellToPointAvg, BenchCellToPointAvg); VTKM_MAKE_BENCHMARK(CellToPointAvgDynamic, BenchCellToPointAvgDynamic); - template + template struct BenchPointToCellAvg { std::vector input; @@ -286,11 +283,10 @@ private: cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)); vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); vtkm::worklet::DispatcherMapTopology dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->InputHandle, cellSet, result); return timer.GetElapsedTime(); @@ -310,8 +306,8 @@ private: } }; - template - struct BenchPointToCellAvgDynamic : public BenchPointToCellAvg + template + struct BenchPointToCellAvgDynamic : public BenchPointToCellAvg { VTKM_CONT @@ -323,11 +319,10 @@ private: ValueVariantHandle dinput(this->InputHandle); vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); vtkm::worklet::DispatcherMapTopology dispatcher; - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(dinput, cellSet, result); return timer.GetElapsedTime(); @@ -339,7 +334,7 @@ private: VTKM_MAKE_BENCHMARK(PointToCellAvg, BenchPointToCellAvg); VTKM_MAKE_BENCHMARK(PointToCellAvgDynamic, BenchPointToCellAvgDynamic); - template + template struct BenchClassification { std::vector input; @@ -371,12 +366,11 @@ private: ValueVariantHandle dinput(this->InputHandle); - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Classification worklet(this->IsoValue); vtkm::worklet::DispatcherMapTopology> dispatcher(worklet); - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(dinput, cellSet, result); return timer.GetElapsedTime(); @@ -396,8 +390,8 @@ private: } }; - template - struct BenchClassificationDynamic : public BenchClassification + template + struct BenchClassificationDynamic : public BenchClassification { VTKM_CONT vtkm::Float64 operator()() @@ -406,12 +400,11 @@ private: cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)); vtkm::cont::ArrayHandle result; - Timer timer{ DeviceAdapterTag() }; + Timer timer{ DeviceAdapter() }; timer.Start(); Classification worklet(this->IsoValue); vtkm::worklet::DispatcherMapTopology> dispatcher(worklet); - dispatcher.SetDevice(DeviceAdapterTag()); dispatcher.Invoke(this->InputHandle, cellSet, result); timer.Stop(); @@ -425,29 +418,29 @@ private: VTKM_MAKE_BENCHMARK(ClassificationDynamic, BenchClassificationDynamic); public: - static VTKM_CONT int Run(int benchmarks) + static VTKM_CONT int Run(int benchmarks, vtkm::cont::DeviceAdapterId id) { std::cout << DIVIDER << "\nRunning Topology Algorithm benchmarks\n"; if (benchmarks & CELL_TO_POINT) { std::cout << DIVIDER << "\nBenchmarking Cell To Point Average\n"; - VTKM_RUN_BENCHMARK(CellToPointAvg, ValueTypes()); - VTKM_RUN_BENCHMARK(CellToPointAvgDynamic, ValueTypes()); + VTKM_RUN_BENCHMARK(CellToPointAvg, ValueTypes(), id); + VTKM_RUN_BENCHMARK(CellToPointAvgDynamic, ValueTypes(), id); } if (benchmarks & POINT_TO_CELL) { std::cout << DIVIDER << "\nBenchmarking Point to Cell Average\n"; - VTKM_RUN_BENCHMARK(PointToCellAvg, ValueTypes()); - VTKM_RUN_BENCHMARK(PointToCellAvgDynamic, ValueTypes()); + VTKM_RUN_BENCHMARK(PointToCellAvg, ValueTypes(), id); + VTKM_RUN_BENCHMARK(PointToCellAvgDynamic, ValueTypes(), id); } if (benchmarks & MC_CLASSIFY) { std::cout << DIVIDER << "\nBenchmarking Hex/Voxel MC Classification\n"; - VTKM_RUN_BENCHMARK(Classification, ValueTypes()); - VTKM_RUN_BENCHMARK(ClassificationDynamic, ValueTypes()); + VTKM_RUN_BENCHMARK(Classification, ValueTypes(), id); + VTKM_RUN_BENCHMARK(ClassificationDynamic, ValueTypes(), id); } return 0; @@ -460,18 +453,19 @@ public: int main(int argc, char* argv[]) { - vtkm::cont::InitLogging(argc, argv); + auto opts = vtkm::cont::InitializeOptions::RequireDevice; + auto config = vtkm::cont::Initialize(argc, argv, opts); int benchmarks = 0; - if (argc < 2) + if (!config.Arguments.size()) { benchmarks = vtkm::benchmarking::ALL; } else { - for (int i = 1; i < argc; ++i) + for (size_t i = 0; i < config.Arguments.size(); ++i) { - std::string arg = argv[i]; + std::string arg = config.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); @@ -489,16 +483,13 @@ int main(int argc, char* argv[]) } else { - std::cout << "Unrecognized benchmark: " << argv[i] << std::endl; + std::cout << "Unrecognized benchmark: " << config.Arguments[i] << std::endl; return 1; } } } //now actually execute the benchmarks - using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG; - auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker(); - tracker.ForceDevice(Device{}); - return vtkm::benchmarking::BenchmarkTopologyAlgorithms::Run(benchmarks); + return vtkm::benchmarking::BenchmarkTopologyAlgorithms::Run(benchmarks, config.Device); } diff --git a/benchmarking/Benchmarker.h b/benchmarking/Benchmarker.h index b899ba9d2..bfd88cf78 100644 --- a/benchmarking/Benchmarker.h +++ b/benchmarking/Benchmarker.h @@ -23,6 +23,8 @@ #include #include +#include +#include #include #include @@ -83,17 +85,18 @@ /* * Use the VTKM_MAKE_BENCHMARK macro to define a maker functor for your benchmark. * This is used to allow you to template the benchmark functor on the type being benchmarked - * so you can write init code in the constructor. Then the maker will return a constructed - * instance of your benchmark for the type being benchmarked. The VA_ARGS are used to - * pass any extra arguments needed by your benchmark + * and the device adapter so you can write init code in the constructor. Then the maker will + * return a constructed instance of your benchmark for the type being benchmarked. + * The VA_ARGS are used to pass any extra arguments needed by your benchmark */ #define VTKM_MAKE_BENCHMARK(Name, Bench, ...) \ struct MakeBench##Name \ { \ - template \ - VTKM_CONT Bench operator()(const Value vtkmNotUsed(v)) const \ + template \ + VTKM_CONT Bench operator()(const Value vtkmNotUsed(v), \ + DeviceAdapter vtkmNotUsed(id)) const \ { \ - return Bench(__VA_ARGS__); \ + return Bench(__VA_ARGS__); \ } \ } @@ -102,8 +105,8 @@ * You must have previously defined a maker functor with VTKM_MAKE_BENCHMARK that this * macro will look for and use */ -#define VTKM_RUN_BENCHMARK(Name, Types) \ - vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types)) +#define VTKM_RUN_BENCHMARK(Name, Types, Id) \ + vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types), (Id)) namespace vtkm { @@ -223,7 +226,7 @@ vtkm::Float64 MedianAbsDeviation(const std::vector& samples) * in seconds, this lets us avoid including any per-run setup time in the benchmark. * However any one-time setup should be done in the functor's constructor */ -class Benchmarker +struct Benchmarker { std::vector Samples; std::string BenchmarkName; @@ -286,11 +289,13 @@ public: << "\tmax = " << this->Samples.back() << "s\n"; } - template - VTKM_CONT void operator()(Functor func) + template + VTKM_CONT bool operator()(DeviceAdapter id, MakerFunctor&& makerFunctor, T t) { + auto func = makerFunctor(t, id); this->GatherSamples(func); this->PrintSummary(); + return true; } VTKM_CONT const std::vector& GetSamples() const { return this->Samples; } @@ -315,13 +320,14 @@ public: } template - VTKM_CONT void operator()(T t) const + VTKM_CONT void operator()(T t, vtkm::cont::DeviceAdapterId id) const { - std::cout << "*** " << vtkm::testing::TypeName::Name() << " ***************" << std::endl; + std::cout << "*** " << vtkm::testing::TypeName::Name() << " on device " << id.GetName() + << " ***************" << std::endl; Benchmarker bench; try { - bench(Maker(t)); + vtkm::cont::TryExecuteOnDevice(id, bench, Maker, t); } catch (std::exception& e) { @@ -333,9 +339,10 @@ public: }; template -VTKM_CONT void BenchmarkTypes(const MakerFunctor& maker, TypeList) +VTKM_CONT void BenchmarkTypes(MakerFunctor&& maker, TypeList, vtkm::cont::DeviceAdapterId id) { - vtkm::ListForEach(InternalPrintTypeAndBench(maker), TypeList()); + vtkm::ListForEach( + InternalPrintTypeAndBench(std::forward(maker)), TypeList(), id); } } } diff --git a/benchmarking/CMakeLists.txt b/benchmarking/CMakeLists.txt index 3ebef0afa..b646f6a3d 100644 --- a/benchmarking/CMakeLists.txt +++ b/benchmarking/CMakeLists.txt @@ -17,43 +17,32 @@ ## Laboratory (LANL), the U.S. Government retains certain rights in ## this software. ##============================================================================ -function(add_benchmark name files lib) - set(benchmarks ) - - add_executable(${name}_SERIAL ${files}) - list(APPEND benchmarks ${name}_SERIAL) - target_compile_definitions(${name}_SERIAL PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_SERIAL") - - if (TARGET vtkm::tbb) - add_executable(${name}_TBB ${files}) - list(APPEND benchmarks ${name}_TBB) - target_compile_definitions(${name}_TBB PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_TBB") - endif() - - if (TARGET vtkm::openmp) - add_executable(${name}_OPENMP ${files}) - list(APPEND benchmarks ${name}_OPENMP) - target_compile_definitions(${name}_OPENMP PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_OPENMP") - endif() - +function(add_benchmark) + set(options) + set(oneValueArgs NAME FILE) + set(multiValueArgs LIBS) + cmake_parse_arguments(VTKm_AB + "${options}" "${oneValueArgs}" "${multiValueArgs}" + ${ARGN} + ) + set(exe_name ${VTKm_AB_NAME}) if (TARGET vtkm::cuda) - get_filename_component(fname "${name}" NAME_WE) - get_filename_component(fullpath "${name}.cxx" ABSOLUTE) + get_filename_component(fname ${VTKm_AB_FILE} NAME_WE) + get_filename_component(fullpath ${VTKm_AB_FILE} ABSOLUTE) file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${fname}.cu CONTENT "#include \"${fullpath}\"") - add_executable(${name}_CUDA ${CMAKE_CURRENT_BINARY_DIR}/${fname}.cu) - list(APPEND benchmarks ${name}_CUDA) - target_compile_definitions(${name}_CUDA PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_CUDA") + add_executable(${exe_name} ${CMAKE_CURRENT_BINARY_DIR}/${fname}.cu) + set_property(TARGET ${exe_name} PROPERTY CUDA_SEPARABLE_COMPILATION ON) + else() + add_executable(${exe_name} ${VTKm_AB_FILE}) endif() - foreach(benchmark ${benchmarks}) - target_link_libraries(${benchmark} PRIVATE ${lib}) - set_target_properties(${benchmark} PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH} - ) - endforeach() + target_link_libraries(${exe_name} PRIVATE ${VTKm_AB_LIBS}) + set_target_properties(${exe_name} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH} + ) endfunction() @@ -69,9 +58,9 @@ set(benchmarks ) foreach (benchmark ${benchmarks}) - add_benchmark(${benchmark} ${benchmark}.cxx vtkm_filter) + add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_filter vtkm_cont) endforeach () if(TARGET vtkm_rendering) - add_benchmark(BenchmarkRayTracing BenchmarkRayTracing.cxx vtkm_rendering) + add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering) endif() diff --git a/docs/changelog/merge-benchmark-executables.md b/docs/changelog/merge-benchmark-executables.md new file mode 100644 index 000000000..15695ab77 --- /dev/null +++ b/docs/changelog/merge-benchmark-executables.md @@ -0,0 +1,6 @@ +# Merge benchmark executables into a device dependent shared library + +VTK-m has been updated to replace old per device benchmark executables with a device +dependent shared library so that it's able to accept a device adapter at runtime through +the "--device=" argument. + diff --git a/vtkm/cont/Algorithm.h b/vtkm/cont/Algorithm.h index 6383c31b0..566a874dc 100644 --- a/vtkm/cont/Algorithm.h +++ b/vtkm/cont/Algorithm.h @@ -122,7 +122,7 @@ struct ReduceFunctor U result; ReduceFunctor() - : result(U(0)) + : result(vtkm::TypeTraits::ZeroInitialization()) { } @@ -148,12 +148,13 @@ struct ReduceByKeyFunctor } }; -template +template struct ScanInclusiveResultFunctor { - T result; + U result; + ScanInclusiveResultFunctor() - : result(T(0)) + : result(vtkm::TypeTraits::ZeroInitialization()) { }