mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-16 17:22:55 +00:00
Merge benchmark executables into a device dependent shared library
VTK-m has been updated to replace old per device benchmark executables with a device dependent shared library so that it's able to accept a device adapter at runtime through the "--device=" argument.
This commit is contained in:
parent
c27a33669b
commit
634f523d92
@ -22,7 +22,9 @@
|
|||||||
|
|
||||||
#include <vtkm/TypeTraits.h>
|
#include <vtkm/TypeTraits.h>
|
||||||
|
|
||||||
|
#include <vtkm/cont/Algorithm.h>
|
||||||
#include <vtkm/cont/ArrayHandle.h>
|
#include <vtkm/cont/ArrayHandle.h>
|
||||||
|
#include <vtkm/cont/DeviceAdapter.h>
|
||||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||||
#include <vtkm/cont/Timer.h>
|
#include <vtkm/cont/Timer.h>
|
||||||
|
|
||||||
@ -40,10 +42,9 @@ namespace vtkm
|
|||||||
namespace benchmarking
|
namespace benchmarking
|
||||||
{
|
{
|
||||||
|
|
||||||
template <typename DeviceAdapter>
|
|
||||||
struct BenchmarkArrayTransfer
|
struct BenchmarkArrayTransfer
|
||||||
{
|
{
|
||||||
using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapter>;
|
using Algo = vtkm::cont::Algorithm;
|
||||||
using StorageTag = vtkm::cont::StorageTagBasic;
|
using StorageTag = vtkm::cont::StorageTagBasic;
|
||||||
using Timer = vtkm::cont::Timer;
|
using Timer = vtkm::cont::Timer;
|
||||||
|
|
||||||
@ -139,11 +140,10 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
// Copies NumValues from control environment to execution environment and
|
// Copies NumValues from control environment to execution environment and
|
||||||
// accesses them as read-only.
|
// accesses them as read-only.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchContToExecRead
|
struct BenchContToExecRead
|
||||||
{
|
{
|
||||||
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
||||||
using PortalType = typename ArrayType::template ExecutionTypes<DeviceAdapter>::PortalConst;
|
|
||||||
using ValueTypeTraits = vtkm::TypeTraits<ValueType>;
|
using ValueTypeTraits = vtkm::TypeTraits<ValueType>;
|
||||||
|
|
||||||
vtkm::Id NumValues;
|
vtkm::Id NumValues;
|
||||||
@ -164,7 +164,7 @@ struct BenchmarkArrayTransfer
|
|||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()() const
|
||||||
{
|
{
|
||||||
std::vector<ValueType> vec(static_cast<std::size_t>(this->NumValues),
|
std::vector<ValueType> vec(static_cast<std::size_t>(this->NumValues),
|
||||||
ValueTypeTraits::ZeroInitialization());
|
ValueTypeTraits::ZeroInitialization());
|
||||||
@ -173,8 +173,8 @@ struct BenchmarkArrayTransfer
|
|||||||
// Time the copy:
|
// Time the copy:
|
||||||
Timer timer{ DeviceAdapter() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
ReadValues<PortalType> functor(array.PrepareForInput(DeviceAdapter()),
|
auto portal = array.PrepareForInput(DeviceAdapter());
|
||||||
ValueTypeTraits::ZeroInitialization());
|
ReadValues<decltype(portal)> functor(portal, ValueTypeTraits::ZeroInitialization());
|
||||||
Algo::Schedule(functor, this->NumValues);
|
Algo::Schedule(functor, this->NumValues);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
}
|
}
|
||||||
@ -183,11 +183,10 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
// Writes values to ArrayHandle in execution environment. There is no actual
|
// Writes values to ArrayHandle in execution environment. There is no actual
|
||||||
// copy between control/execution in this case.
|
// copy between control/execution in this case.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchContToExecWrite
|
struct BenchContToExecWrite
|
||||||
{
|
{
|
||||||
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
||||||
using PortalType = typename ArrayType::template ExecutionTypes<DeviceAdapter>::Portal;
|
|
||||||
using ValueTypeTraits = vtkm::TypeTraits<ValueType>;
|
using ValueTypeTraits = vtkm::TypeTraits<ValueType>;
|
||||||
|
|
||||||
vtkm::Id NumValues;
|
vtkm::Id NumValues;
|
||||||
@ -208,14 +207,15 @@ struct BenchmarkArrayTransfer
|
|||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()() const
|
||||||
{
|
{
|
||||||
ArrayType array;
|
ArrayType array;
|
||||||
|
|
||||||
// Time the write:
|
// Time the write:
|
||||||
Timer timer{ DeviceAdapter() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
WriteValues<PortalType> functor(array.PrepareForOutput(this->NumValues, DeviceAdapter()));
|
auto portal = array.PrepareForOutput(this->NumValues, DeviceAdapter());
|
||||||
|
WriteValues<decltype(portal)> functor(portal);
|
||||||
Algo::Schedule(functor, this->NumValues);
|
Algo::Schedule(functor, this->NumValues);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -225,11 +225,10 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
// Copies NumValues from control environment to execution environment and
|
// Copies NumValues from control environment to execution environment and
|
||||||
// both reads and writes them.
|
// both reads and writes them.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchContToExecReadWrite
|
struct BenchContToExecReadWrite
|
||||||
{
|
{
|
||||||
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
||||||
using PortalType = typename ArrayType::template ExecutionTypes<DeviceAdapter>::Portal;
|
|
||||||
using ValueTypeTraits = vtkm::TypeTraits<ValueType>;
|
using ValueTypeTraits = vtkm::TypeTraits<ValueType>;
|
||||||
|
|
||||||
vtkm::Id NumValues;
|
vtkm::Id NumValues;
|
||||||
@ -250,7 +249,7 @@ struct BenchmarkArrayTransfer
|
|||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()() const
|
||||||
{
|
{
|
||||||
std::vector<ValueType> vec(static_cast<std::size_t>(this->NumValues),
|
std::vector<ValueType> vec(static_cast<std::size_t>(this->NumValues),
|
||||||
ValueTypeTraits::ZeroInitialization());
|
ValueTypeTraits::ZeroInitialization());
|
||||||
@ -259,7 +258,8 @@ struct BenchmarkArrayTransfer
|
|||||||
// Time the copy:
|
// Time the copy:
|
||||||
Timer timer{ DeviceAdapter() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
ReadWriteValues<PortalType> functor(array.PrepareForInPlace(DeviceAdapter()));
|
auto portal = array.PrepareForInPlace(DeviceAdapter());
|
||||||
|
ReadWriteValues<decltype(portal)> functor(portal);
|
||||||
Algo::Schedule(functor, this->NumValues);
|
Algo::Schedule(functor, this->NumValues);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
}
|
}
|
||||||
@ -268,7 +268,7 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
// Copies NumValues from control environment to execution environment and
|
// Copies NumValues from control environment to execution environment and
|
||||||
// back, then accesses them as read-only.
|
// back, then accesses them as read-only.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchRoundTripRead
|
struct BenchRoundTripRead
|
||||||
{
|
{
|
||||||
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
||||||
@ -295,7 +295,7 @@ struct BenchmarkArrayTransfer
|
|||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()() const
|
||||||
{
|
{
|
||||||
std::vector<ValueType> vec(static_cast<std::size_t>(this->NumValues),
|
std::vector<ValueType> vec(static_cast<std::size_t>(this->NumValues),
|
||||||
ValueTypeTraits::ZeroInitialization());
|
ValueTypeTraits::ZeroInitialization());
|
||||||
@ -309,8 +309,8 @@ struct BenchmarkArrayTransfer
|
|||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
// Copy to device:
|
// Copy to device:
|
||||||
ReadValues<PortalExecType> functor(array.PrepareForInput(DeviceAdapter()),
|
auto portal = array.PrepareForInput(DeviceAdapter());
|
||||||
ValueTypeTraits::ZeroInitialization());
|
ReadValues<PortalExecType> functor(portal, ValueTypeTraits::ZeroInitialization());
|
||||||
Algo::Schedule(functor, this->NumValues);
|
Algo::Schedule(functor, this->NumValues);
|
||||||
|
|
||||||
// Copy back to host and read:
|
// Copy back to host and read:
|
||||||
@ -328,7 +328,7 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
// Copies NumValues from control environment to execution environment and
|
// Copies NumValues from control environment to execution environment and
|
||||||
// back, then reads and writes them in-place.
|
// back, then reads and writes them in-place.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchRoundTripReadWrite
|
struct BenchRoundTripReadWrite
|
||||||
{
|
{
|
||||||
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
||||||
@ -355,7 +355,7 @@ struct BenchmarkArrayTransfer
|
|||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()() const
|
||||||
{
|
{
|
||||||
std::vector<ValueType> vec(static_cast<std::size_t>(this->NumValues),
|
std::vector<ValueType> vec(static_cast<std::size_t>(this->NumValues),
|
||||||
ValueTypeTraits::ZeroInitialization());
|
ValueTypeTraits::ZeroInitialization());
|
||||||
@ -369,7 +369,8 @@ struct BenchmarkArrayTransfer
|
|||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
// Do work on device:
|
// Do work on device:
|
||||||
ReadWriteValues<PortalExecType> functor(array.PrepareForInPlace(DeviceAdapter()));
|
auto portal = array.PrepareForInPlace(DeviceAdapter());
|
||||||
|
ReadWriteValues<PortalExecType> functor(portal);
|
||||||
Algo::Schedule(functor, this->NumValues);
|
Algo::Schedule(functor, this->NumValues);
|
||||||
|
|
||||||
ReadWriteValues<PortalContType> cFunctor(array.GetPortalControl());
|
ReadWriteValues<PortalContType> cFunctor(array.GetPortalControl());
|
||||||
@ -385,7 +386,7 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
// Write NumValues to device allocated memory and copies them back to control
|
// Write NumValues to device allocated memory and copies them back to control
|
||||||
// for reading.
|
// for reading.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchExecToContRead
|
struct BenchExecToContRead
|
||||||
{
|
{
|
||||||
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
||||||
@ -412,7 +413,7 @@ struct BenchmarkArrayTransfer
|
|||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()() const
|
||||||
{
|
{
|
||||||
ArrayType array;
|
ArrayType array;
|
||||||
|
|
||||||
@ -421,7 +422,8 @@ struct BenchmarkArrayTransfer
|
|||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
// Allocate/write data on device
|
// Allocate/write data on device
|
||||||
WriteValues<PortalExecType> functor(array.PrepareForOutput(this->NumValues, DeviceAdapter()));
|
auto portal = array.PrepareForOutput(this->NumValues, DeviceAdapter());
|
||||||
|
WriteValues<PortalExecType> functor(portal);
|
||||||
Algo::Schedule(functor, this->NumValues);
|
Algo::Schedule(functor, this->NumValues);
|
||||||
|
|
||||||
// Read back on host:
|
// Read back on host:
|
||||||
@ -439,7 +441,7 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
// Write NumValues to device allocated memory and copies them back to control
|
// Write NumValues to device allocated memory and copies them back to control
|
||||||
// and overwrites them.
|
// and overwrites them.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchExecToContWrite
|
struct BenchExecToContWrite
|
||||||
{
|
{
|
||||||
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
||||||
@ -475,7 +477,8 @@ struct BenchmarkArrayTransfer
|
|||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
// Allocate/write data on device
|
// Allocate/write data on device
|
||||||
WriteValues<PortalExecType> functor(array.PrepareForOutput(this->NumValues, DeviceAdapter()));
|
auto portal = array.PrepareForOutput(this->NumValues, DeviceAdapter());
|
||||||
|
WriteValues<PortalExecType> functor(portal);
|
||||||
Algo::Schedule(functor, this->NumValues);
|
Algo::Schedule(functor, this->NumValues);
|
||||||
|
|
||||||
// Read back on host:
|
// Read back on host:
|
||||||
@ -492,7 +495,7 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
// Write NumValues to device allocated memory and copies them back to control
|
// Write NumValues to device allocated memory and copies them back to control
|
||||||
// for reading and writing.
|
// for reading and writing.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchExecToContReadWrite
|
struct BenchExecToContReadWrite
|
||||||
{
|
{
|
||||||
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
using ArrayType = vtkm::cont::ArrayHandle<ValueType, StorageTag>;
|
||||||
@ -528,7 +531,8 @@ struct BenchmarkArrayTransfer
|
|||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
// Allocate/write data on device
|
// Allocate/write data on device
|
||||||
WriteValues<PortalExecType> functor(array.PrepareForOutput(this->NumValues, DeviceAdapter()));
|
auto portal = array.PrepareForOutput(this->NumValues, DeviceAdapter());
|
||||||
|
WriteValues<PortalExecType> functor(portal);
|
||||||
Algo::Schedule(functor, this->NumValues);
|
Algo::Schedule(functor, this->NumValues);
|
||||||
|
|
||||||
// Read back on host:
|
// Read back on host:
|
||||||
@ -547,17 +551,16 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
using TestTypes = vtkm::ListTagBase<vtkm::Float32>;
|
using TestTypes = vtkm::ListTagBase<vtkm::Float32>;
|
||||||
|
|
||||||
static VTKM_CONT bool Run()
|
static VTKM_CONT bool Run(vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(ContToExecRead, TestTypes());
|
VTKM_RUN_BENCHMARK(ContToExecRead, TestTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ContToExecWrite, TestTypes());
|
VTKM_RUN_BENCHMARK(ContToExecWrite, TestTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ContToExecReadWrite, TestTypes());
|
VTKM_RUN_BENCHMARK(ContToExecReadWrite, TestTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(RoundTripRead, TestTypes());
|
VTKM_RUN_BENCHMARK(RoundTripRead, TestTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(RoundTripReadWrite, TestTypes());
|
VTKM_RUN_BENCHMARK(RoundTripReadWrite, TestTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ExecToContRead, TestTypes());
|
VTKM_RUN_BENCHMARK(ExecToContRead, TestTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ExecToContWrite, TestTypes());
|
VTKM_RUN_BENCHMARK(ExecToContWrite, TestTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ExecToContReadWrite, TestTypes());
|
VTKM_RUN_BENCHMARK(ExecToContReadWrite, TestTypes(), id);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -566,14 +569,11 @@ struct BenchmarkArrayTransfer
|
|||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
vtkm::cont::InitLogging(argc, argv);
|
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
|
||||||
|
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||||
|
|
||||||
using DeviceAdapter = VTKM_DEFAULT_DEVICE_ADAPTER_TAG;
|
using Benchmarks = vtkm::benchmarking::BenchmarkArrayTransfer;
|
||||||
using Benchmarks = vtkm::benchmarking::BenchmarkArrayTransfer<DeviceAdapter>;
|
|
||||||
|
|
||||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
bool result = Benchmarks::Run(config.Device);
|
||||||
tracker.ForceDevice(DeviceAdapter{});
|
|
||||||
|
|
||||||
bool result = Benchmarks::Run();
|
|
||||||
return result ? EXIT_SUCCESS : EXIT_FAILURE;
|
return result ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include <vtkm/cont/AtomicArray.h>
|
#include <vtkm/cont/AtomicArray.h>
|
||||||
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
||||||
#include <vtkm/cont/Timer.h>
|
#include <vtkm/cont/Timer.h>
|
||||||
|
#include <vtkm/cont/internal/DeviceAdapterTag.h>
|
||||||
|
|
||||||
#include <vtkm/exec/FunctorBase.h>
|
#include <vtkm/exec/FunctorBase.h>
|
||||||
|
|
||||||
@ -48,25 +49,24 @@ static constexpr vtkm::Id NumWrites = 33554432; // 2^25
|
|||||||
VTKM_MAKE_BENCHMARK(Name##32768, Class, 32768); \
|
VTKM_MAKE_BENCHMARK(Name##32768, Class, 32768); \
|
||||||
VTKM_MAKE_BENCHMARK(Name##1048576, Class, 1048576)
|
VTKM_MAKE_BENCHMARK(Name##1048576, Class, 1048576)
|
||||||
|
|
||||||
#define RUN_ATOMIC_BENCHMARKS(Name) \
|
#define RUN_ATOMIC_BENCHMARKS(Name, id) \
|
||||||
VTKM_RUN_BENCHMARK(Name##1, vtkm::cont::AtomicArrayTypeListTag{}); \
|
VTKM_RUN_BENCHMARK(Name##1, vtkm::cont::AtomicArrayTypeListTag{}, id); \
|
||||||
VTKM_RUN_BENCHMARK(Name##8, vtkm::cont::AtomicArrayTypeListTag{}); \
|
VTKM_RUN_BENCHMARK(Name##8, vtkm::cont::AtomicArrayTypeListTag{}, id); \
|
||||||
VTKM_RUN_BENCHMARK(Name##32, vtkm::cont::AtomicArrayTypeListTag{}); \
|
VTKM_RUN_BENCHMARK(Name##32, vtkm::cont::AtomicArrayTypeListTag{}, id); \
|
||||||
VTKM_RUN_BENCHMARK(Name##512, vtkm::cont::AtomicArrayTypeListTag{}); \
|
VTKM_RUN_BENCHMARK(Name##512, vtkm::cont::AtomicArrayTypeListTag{}, id); \
|
||||||
VTKM_RUN_BENCHMARK(Name##2048, vtkm::cont::AtomicArrayTypeListTag{}); \
|
VTKM_RUN_BENCHMARK(Name##2048, vtkm::cont::AtomicArrayTypeListTag{}, id); \
|
||||||
VTKM_RUN_BENCHMARK(Name##32768, vtkm::cont::AtomicArrayTypeListTag{}); \
|
VTKM_RUN_BENCHMARK(Name##32768, vtkm::cont::AtomicArrayTypeListTag{}, id); \
|
||||||
VTKM_RUN_BENCHMARK(Name##1048576, vtkm::cont::AtomicArrayTypeListTag{})
|
VTKM_RUN_BENCHMARK(Name##1048576, vtkm::cont::AtomicArrayTypeListTag{}, id)
|
||||||
|
|
||||||
template <class Device>
|
|
||||||
class BenchmarkAtomicArray
|
class BenchmarkAtomicArray
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using Algo = vtkm::cont::DeviceAdapterAlgorithm<Device>;
|
using Algo = vtkm::cont::Algorithm;
|
||||||
using Timer = vtkm::cont::Timer;
|
using Timer = vtkm::cont::Timer;
|
||||||
|
|
||||||
// Benchmarks AtomicArray::Add such that each work index writes to adjacent
|
// Benchmarks AtomicArray::Add such that each work index writes to adjacent
|
||||||
// indices.
|
// indices.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchAddSeq
|
struct BenchAddSeq
|
||||||
{
|
{
|
||||||
vtkm::Id ArraySize;
|
vtkm::Id ArraySize;
|
||||||
@ -92,17 +92,17 @@ public:
|
|||||||
BenchAddSeq(vtkm::Id arraySize)
|
BenchAddSeq(vtkm::Id arraySize)
|
||||||
: ArraySize(arraySize)
|
: ArraySize(arraySize)
|
||||||
{
|
{
|
||||||
this->Data.PrepareForOutput(this->ArraySize, Device{});
|
this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter());
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
vtkm::cont::AtomicArray<ValueType> array(this->Data);
|
vtkm::cont::AtomicArray<ValueType> array(this->Data);
|
||||||
auto portal = array.PrepareForExecution(Device{});
|
auto portal = array.PrepareForExecution(DeviceAdapter());
|
||||||
Worker<decltype(portal)> worker{ this->ArraySize, portal };
|
Worker<decltype(portal)> worker{ this->ArraySize, portal };
|
||||||
|
|
||||||
Timer timer{ Device() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algo::Schedule(worker, NumWrites);
|
Algo::Schedule(worker, NumWrites);
|
||||||
|
|
||||||
@ -120,7 +120,7 @@ public:
|
|||||||
MAKE_ATOMIC_BENCHMARKS(AddSeq, BenchAddSeq);
|
MAKE_ATOMIC_BENCHMARKS(AddSeq, BenchAddSeq);
|
||||||
|
|
||||||
// Provides a non-atomic baseline for BenchAddSeq
|
// Provides a non-atomic baseline for BenchAddSeq
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchAddSeqBaseline
|
struct BenchAddSeqBaseline
|
||||||
{
|
{
|
||||||
vtkm::Id ArraySize;
|
vtkm::Id ArraySize;
|
||||||
@ -155,10 +155,10 @@ public:
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
|
auto portal = this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter());
|
||||||
Worker<decltype(portal)> worker{ this->ArraySize, portal };
|
Worker<decltype(portal)> worker{ this->ArraySize, portal };
|
||||||
|
|
||||||
Timer timer{ Device() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algo::Schedule(worker, NumWrites);
|
Algo::Schedule(worker, NumWrites);
|
||||||
|
|
||||||
@ -177,7 +177,7 @@ public:
|
|||||||
|
|
||||||
// Benchmarks AtomicArray::Add such that each work index writes to a strided
|
// Benchmarks AtomicArray::Add such that each work index writes to a strided
|
||||||
// index ( floor(i / stride) + stride * (i % stride)
|
// index ( floor(i / stride) + stride * (i % stride)
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchAddStride
|
struct BenchAddStride
|
||||||
{
|
{
|
||||||
vtkm::Id ArraySize;
|
vtkm::Id ArraySize;
|
||||||
@ -211,17 +211,17 @@ public:
|
|||||||
: ArraySize(arraySize)
|
: ArraySize(arraySize)
|
||||||
, Stride(stride)
|
, Stride(stride)
|
||||||
{
|
{
|
||||||
this->Data.PrepareForOutput(this->ArraySize, Device{});
|
this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter());
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
vtkm::cont::AtomicArray<ValueType> array(this->Data);
|
vtkm::cont::AtomicArray<ValueType> array(this->Data);
|
||||||
auto portal = array.PrepareForExecution(Device{});
|
auto portal = array.PrepareForExecution(DeviceAdapter());
|
||||||
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
|
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
|
||||||
|
|
||||||
Timer timer{ Device() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algo::Schedule(worker, NumWrites);
|
Algo::Schedule(worker, NumWrites);
|
||||||
|
|
||||||
@ -240,7 +240,7 @@ public:
|
|||||||
MAKE_ATOMIC_BENCHMARKS(AddStride, BenchAddStride);
|
MAKE_ATOMIC_BENCHMARKS(AddStride, BenchAddStride);
|
||||||
|
|
||||||
// Non-atomic baseline for AddStride
|
// Non-atomic baseline for AddStride
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchAddStrideBaseline
|
struct BenchAddStrideBaseline
|
||||||
{
|
{
|
||||||
vtkm::Id ArraySize;
|
vtkm::Id ArraySize;
|
||||||
@ -279,10 +279,10 @@ public:
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
|
auto portal = this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter());
|
||||||
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
|
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
|
||||||
|
|
||||||
Timer timer{ Device() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algo::Schedule(worker, NumWrites);
|
Algo::Schedule(worker, NumWrites);
|
||||||
|
|
||||||
@ -302,7 +302,7 @@ public:
|
|||||||
|
|
||||||
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent
|
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent
|
||||||
// indices.
|
// indices.
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchCASSeq
|
struct BenchCASSeq
|
||||||
{
|
{
|
||||||
vtkm::Id ArraySize;
|
vtkm::Id ArraySize;
|
||||||
@ -340,17 +340,17 @@ public:
|
|||||||
BenchCASSeq(vtkm::Id arraySize)
|
BenchCASSeq(vtkm::Id arraySize)
|
||||||
: ArraySize(arraySize)
|
: ArraySize(arraySize)
|
||||||
{
|
{
|
||||||
this->Data.PrepareForOutput(this->ArraySize, Device{});
|
this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter());
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
vtkm::cont::AtomicArray<ValueType> array(this->Data);
|
vtkm::cont::AtomicArray<ValueType> array(this->Data);
|
||||||
auto portal = array.PrepareForExecution(Device{});
|
auto portal = array.PrepareForExecution(DeviceAdapter());
|
||||||
Worker<decltype(portal)> worker{ this->ArraySize, portal };
|
Worker<decltype(portal)> worker{ this->ArraySize, portal };
|
||||||
|
|
||||||
Timer timer{ Device() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algo::Schedule(worker, NumWrites);
|
Algo::Schedule(worker, NumWrites);
|
||||||
|
|
||||||
@ -368,7 +368,7 @@ public:
|
|||||||
MAKE_ATOMIC_BENCHMARKS(CASSeq, BenchCASSeq);
|
MAKE_ATOMIC_BENCHMARKS(CASSeq, BenchCASSeq);
|
||||||
|
|
||||||
// Provides a non-atomic baseline for BenchCASSeq
|
// Provides a non-atomic baseline for BenchCASSeq
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchCASSeqBaseline
|
struct BenchCASSeqBaseline
|
||||||
{
|
{
|
||||||
vtkm::Id ArraySize;
|
vtkm::Id ArraySize;
|
||||||
@ -405,10 +405,10 @@ public:
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
|
auto portal = this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter());
|
||||||
Worker<decltype(portal)> worker{ this->ArraySize, portal };
|
Worker<decltype(portal)> worker{ this->ArraySize, portal };
|
||||||
|
|
||||||
Timer timer{ Device() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algo::Schedule(worker, NumWrites);
|
Algo::Schedule(worker, NumWrites);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -427,7 +427,7 @@ public:
|
|||||||
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to
|
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to
|
||||||
// a strided index:
|
// a strided index:
|
||||||
// ( floor(i / stride) + stride * (i % stride)
|
// ( floor(i / stride) + stride * (i % stride)
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchCASStride
|
struct BenchCASStride
|
||||||
{
|
{
|
||||||
vtkm::Id ArraySize;
|
vtkm::Id ArraySize;
|
||||||
@ -469,17 +469,17 @@ public:
|
|||||||
: ArraySize(arraySize)
|
: ArraySize(arraySize)
|
||||||
, Stride(stride)
|
, Stride(stride)
|
||||||
{
|
{
|
||||||
this->Data.PrepareForOutput(this->ArraySize, Device{});
|
this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter());
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
vtkm::cont::AtomicArray<ValueType> array(this->Data);
|
vtkm::cont::AtomicArray<ValueType> array(this->Data);
|
||||||
auto portal = array.PrepareForExecution(Device{});
|
auto portal = array.PrepareForExecution(DeviceAdapter());
|
||||||
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
|
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
|
||||||
|
|
||||||
Timer timer{ Device() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algo::Schedule(worker, NumWrites);
|
Algo::Schedule(worker, NumWrites);
|
||||||
|
|
||||||
@ -498,7 +498,7 @@ public:
|
|||||||
MAKE_ATOMIC_BENCHMARKS(CASStride, BenchCASStride);
|
MAKE_ATOMIC_BENCHMARKS(CASStride, BenchCASStride);
|
||||||
|
|
||||||
// Non-atomic baseline for CASStride
|
// Non-atomic baseline for CASStride
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct BenchCASStrideBaseline
|
struct BenchCASStrideBaseline
|
||||||
{
|
{
|
||||||
vtkm::Id ArraySize;
|
vtkm::Id ArraySize;
|
||||||
@ -539,10 +539,10 @@ public:
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
|
auto portal = this->Data.PrepareForOutput(this->ArraySize, DeviceAdapter());
|
||||||
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
|
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
|
||||||
|
|
||||||
Timer timer{ Device() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algo::Schedule(worker, NumWrites);
|
Algo::Schedule(worker, NumWrites);
|
||||||
|
|
||||||
@ -560,17 +560,17 @@ public:
|
|||||||
};
|
};
|
||||||
MAKE_ATOMIC_BENCHMARKS(CASStrideBase, BenchCASStrideBaseline);
|
MAKE_ATOMIC_BENCHMARKS(CASStrideBase, BenchCASStrideBaseline);
|
||||||
|
|
||||||
static void Run()
|
static void Run(vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
RUN_ATOMIC_BENCHMARKS(AddSeq);
|
RUN_ATOMIC_BENCHMARKS(AddSeq, id);
|
||||||
RUN_ATOMIC_BENCHMARKS(AddSeqBase);
|
RUN_ATOMIC_BENCHMARKS(AddSeqBase, id);
|
||||||
RUN_ATOMIC_BENCHMARKS(AddStride);
|
RUN_ATOMIC_BENCHMARKS(AddStride, id);
|
||||||
RUN_ATOMIC_BENCHMARKS(AddStrideBase);
|
RUN_ATOMIC_BENCHMARKS(AddStrideBase, id);
|
||||||
|
|
||||||
RUN_ATOMIC_BENCHMARKS(CASSeq);
|
RUN_ATOMIC_BENCHMARKS(CASSeq, id);
|
||||||
RUN_ATOMIC_BENCHMARKS(CASSeqBase);
|
RUN_ATOMIC_BENCHMARKS(CASSeqBase, id);
|
||||||
RUN_ATOMIC_BENCHMARKS(CASStride);
|
RUN_ATOMIC_BENCHMARKS(CASStride, id);
|
||||||
RUN_ATOMIC_BENCHMARKS(CASStrideBase);
|
RUN_ATOMIC_BENCHMARKS(CASStrideBase, id);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -578,15 +578,12 @@ public:
|
|||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
vtkm::cont::InitLogging(argc, argv);
|
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
|
||||||
|
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||||
using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG;
|
|
||||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
|
||||||
tracker.ForceDevice(Device{});
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
vtkm::benchmarking::BenchmarkAtomicArray<Device>::Run();
|
vtkm::benchmarking::BenchmarkAtomicArray::Run(config.Device);
|
||||||
}
|
}
|
||||||
catch (std::exception& e)
|
catch (std::exception& e)
|
||||||
{
|
{
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
#include <tbb/task_scheduler_init.h>
|
#include <tbb/task_scheduler_init.h>
|
||||||
#endif // TBB
|
#endif // TBB
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ const size_t COL_WIDTH = 32;
|
|||||||
template <typename ValueType, typename DeviceAdapter>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
struct MeasureCopySpeed
|
struct MeasureCopySpeed
|
||||||
{
|
{
|
||||||
using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapter>;
|
using Algo = vtkm::cont::Algorithm;
|
||||||
|
|
||||||
vtkm::cont::ArrayHandle<ValueType> Source;
|
vtkm::cont::ArrayHandle<ValueType> Source;
|
||||||
vtkm::cont::ArrayHandle<ValueType> Destination;
|
vtkm::cont::ArrayHandle<ValueType> Destination;
|
||||||
@ -106,19 +106,17 @@ void PrintDivider(std::ostream& out)
|
|||||||
out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl;
|
out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ValueType>
|
template <typename ValueType, typename DeviceAdapter>
|
||||||
void BenchmarkValueType()
|
void BenchmarkValueType(vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
PrintRow(std::cout,
|
PrintRow(std::cout, vtkm::testing::TypeName<ValueType>::Name(), id.GetName());
|
||||||
vtkm::testing::TypeName<ValueType>::Name(),
|
|
||||||
vtkm::cont::DeviceAdapterTraits<VTKM_DEFAULT_DEVICE_ADAPTER_TAG>::GetName());
|
|
||||||
|
|
||||||
PrintDivider(std::cout);
|
PrintDivider(std::cout);
|
||||||
|
|
||||||
Benchmarker bench(15, 100);
|
Benchmarker bench(15, 100);
|
||||||
for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC)
|
for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC)
|
||||||
{
|
{
|
||||||
MeasureCopySpeed<ValueType, VTKM_DEFAULT_DEVICE_ADAPTER_TAG> functor(size);
|
MeasureCopySpeed<ValueType, DeviceAdapter> functor(size);
|
||||||
bench.Reset();
|
bench.Reset();
|
||||||
|
|
||||||
std::string speedStr;
|
std::string speedStr;
|
||||||
@ -142,22 +140,58 @@ void BenchmarkValueType()
|
|||||||
}
|
}
|
||||||
} // end namespace vtkm::benchmarking
|
} // end namespace vtkm::benchmarking
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
using namespace vtkm::benchmarking;
|
||||||
|
|
||||||
|
struct BenchmarkValueTypeFunctor
|
||||||
|
{
|
||||||
|
template <typename DeviceAdapter>
|
||||||
|
bool operator()(DeviceAdapter id)
|
||||||
|
{
|
||||||
|
BenchmarkValueType<vtkm::UInt8, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 2>, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 3>, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 4>, DeviceAdapter>(id);
|
||||||
|
|
||||||
|
BenchmarkValueType<vtkm::UInt32, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Vec<vtkm::UInt32, 2>, DeviceAdapter>(id);
|
||||||
|
|
||||||
|
BenchmarkValueType<vtkm::UInt64, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Vec<vtkm::UInt64, 2>, DeviceAdapter>(id);
|
||||||
|
|
||||||
|
BenchmarkValueType<vtkm::Float32, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Vec<vtkm::Float32, 2>, DeviceAdapter>(id);
|
||||||
|
|
||||||
|
BenchmarkValueType<vtkm::Float64, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Vec<vtkm::Float64, 2>, DeviceAdapter>(id);
|
||||||
|
|
||||||
|
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float32>, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float64>, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float32>, DeviceAdapter>(id);
|
||||||
|
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float64>, DeviceAdapter>(id);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
vtkm::cont::InitLogging(argc, argv);
|
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
|
||||||
|
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||||
|
|
||||||
using namespace vtkm::benchmarking;
|
|
||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
int numThreads = tbb::task_scheduler_init::automatic;
|
int numThreads = tbb::task_scheduler_init::automatic;
|
||||||
#endif // TBB
|
#endif // TBB
|
||||||
|
|
||||||
if (argc == 3)
|
if (config.Arguments.size() == 2)
|
||||||
{
|
{
|
||||||
if (std::string(argv[1]) == "NumThreads")
|
if (std::string(config.Arguments[0]) == "NumThreads")
|
||||||
{
|
{
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
std::istringstream parse(argv[2]);
|
std::istringstream parse(config.Arguments[1]);
|
||||||
parse >> numThreads;
|
parse >> numThreads;
|
||||||
std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
|
std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
|
||||||
#else
|
#else
|
||||||
@ -166,35 +200,11 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
// Must not be destroyed as long as benchmarks are running:
|
// Must not be destroyed as long as benchmarks are running:
|
||||||
tbb::task_scheduler_init init(numThreads);
|
tbb::task_scheduler_init init(numThreads);
|
||||||
#endif // TBB
|
#endif // TBB
|
||||||
|
|
||||||
using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG;
|
BenchmarkValueTypeFunctor functor;
|
||||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
vtkm::cont::TryExecuteOnDevice(config.Device, functor);
|
||||||
tracker.ForceDevice(Device{});
|
|
||||||
|
|
||||||
|
|
||||||
BenchmarkValueType<vtkm::UInt8>();
|
|
||||||
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 2>>();
|
|
||||||
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 3>>();
|
|
||||||
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 4>>();
|
|
||||||
|
|
||||||
BenchmarkValueType<vtkm::UInt32>();
|
|
||||||
BenchmarkValueType<vtkm::Vec<vtkm::UInt32, 2>>();
|
|
||||||
|
|
||||||
BenchmarkValueType<vtkm::UInt64>();
|
|
||||||
BenchmarkValueType<vtkm::Vec<vtkm::UInt64, 2>>();
|
|
||||||
|
|
||||||
BenchmarkValueType<vtkm::Float32>();
|
|
||||||
BenchmarkValueType<vtkm::Vec<vtkm::Float32, 2>>();
|
|
||||||
|
|
||||||
BenchmarkValueType<vtkm::Float64>();
|
|
||||||
BenchmarkValueType<vtkm::Vec<vtkm::Float64, 2>>();
|
|
||||||
|
|
||||||
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float32>>();
|
|
||||||
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float64>>();
|
|
||||||
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float32>>();
|
|
||||||
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float64>>();
|
|
||||||
}
|
}
|
||||||
|
@ -44,9 +44,10 @@
|
|||||||
|
|
||||||
#include <vtkm/internal/Windows.h>
|
#include <vtkm/internal/Windows.h>
|
||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
#include <tbb/task_scheduler_init.h>
|
#include <tbb/task_scheduler_init.h>
|
||||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
#endif
|
||||||
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -168,25 +169,23 @@ static const std::string DIVIDER(40, '-');
|
|||||||
/// This class runs a series of micro-benchmarks to measure
|
/// This class runs a series of micro-benchmarks to measure
|
||||||
/// performance of the parallel primitives provided by each
|
/// performance of the parallel primitives provided by each
|
||||||
/// device adapter
|
/// device adapter
|
||||||
template <class DeviceAdapterTag>
|
|
||||||
class BenchmarkDeviceAdapter
|
class BenchmarkDeviceAdapter
|
||||||
{
|
{
|
||||||
using StorageTag = vtkm::cont::StorageTagBasic;
|
using StorageTag = vtkm::cont::StorageTagBasic;
|
||||||
|
|
||||||
using IdArrayHandle = vtkm::cont::ArrayHandle<vtkm::Id, StorageTag>;
|
using IdArrayHandle = vtkm::cont::ArrayHandle<vtkm::Id, StorageTag>;
|
||||||
|
|
||||||
using Algorithm = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag>;
|
using Algorithm = vtkm::cont::Algorithm;
|
||||||
|
|
||||||
using Timer = vtkm::cont::Timer;
|
using Timer = vtkm::cont::Timer;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Various kernels used by the different benchmarks to accelerate
|
// Various kernels used by the different benchmarks to accelerate
|
||||||
// initialization of data
|
// initialization of data
|
||||||
template <typename Value>
|
template <typename Value, typename PortalType>
|
||||||
struct FillTestValueKernel : vtkm::exec::FunctorBase
|
struct FillTestValueKernel : vtkm::exec::FunctorBase
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
using PortalType = typename ValueArrayHandle::template ExecutionTypes<DeviceAdapterTag>::Portal;
|
|
||||||
|
|
||||||
PortalType Output;
|
PortalType Output;
|
||||||
|
|
||||||
@ -199,11 +198,10 @@ public:
|
|||||||
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i, Value())); }
|
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i, Value())); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename PortalType>
|
||||||
struct FillScaledTestValueKernel : vtkm::exec::FunctorBase
|
struct FillScaledTestValueKernel : vtkm::exec::FunctorBase
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
using PortalType = typename ValueArrayHandle::template ExecutionTypes<DeviceAdapterTag>::Portal;
|
|
||||||
|
|
||||||
PortalType Output;
|
PortalType Output;
|
||||||
const vtkm::Id IdScale;
|
const vtkm::Id IdScale;
|
||||||
@ -218,11 +216,10 @@ public:
|
|||||||
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i * IdScale, Value())); }
|
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i * IdScale, Value())); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename PortalType>
|
||||||
struct FillModuloTestValueKernel : vtkm::exec::FunctorBase
|
struct FillModuloTestValueKernel : vtkm::exec::FunctorBase
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
using PortalType = typename ValueArrayHandle::template ExecutionTypes<DeviceAdapterTag>::Portal;
|
|
||||||
|
|
||||||
PortalType Output;
|
PortalType Output;
|
||||||
const vtkm::Id Modulus;
|
const vtkm::Id Modulus;
|
||||||
@ -237,11 +234,10 @@ public:
|
|||||||
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i % Modulus, Value())); }
|
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i % Modulus, Value())); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename PortalType>
|
||||||
struct FillBinaryTestValueKernel : vtkm::exec::FunctorBase
|
struct FillBinaryTestValueKernel : vtkm::exec::FunctorBase
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
using PortalType = typename ValueArrayHandle::template ExecutionTypes<DeviceAdapterTag>::Portal;
|
|
||||||
|
|
||||||
PortalType Output;
|
PortalType Output;
|
||||||
const vtkm::Id Modulus;
|
const vtkm::Id Modulus;
|
||||||
@ -260,7 +256,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchCopy
|
struct BenchCopy
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -284,7 +280,7 @@ private:
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::Copy(ValueHandle_src, ValueHandle_dst);
|
Algorithm::Copy(ValueHandle_src, ValueHandle_dst);
|
||||||
|
|
||||||
@ -305,7 +301,7 @@ private:
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(Copy, BenchCopy);
|
VTKM_MAKE_BENCHMARK(Copy, BenchCopy);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchCopyIf
|
struct BenchCopyIf
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -322,18 +318,18 @@ private:
|
|||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
vtkm::Id modulo = arraySize / N_VALID;
|
vtkm::Id modulo = arraySize / N_VALID;
|
||||||
Algorithm::Schedule(
|
auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
FillTestValueKernel<Value>(ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillTestValueKernel<Value, decltype(vHPortal)>(vHPortal), arraySize);
|
||||||
arraySize);
|
|
||||||
Algorithm::Schedule(FillBinaryTestValueKernel<vtkm::Id>(
|
auto sHPortal = StencilHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
modulo, StencilHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillBinaryTestValueKernel<vtkm::Id, decltype(sHPortal)>(modulo, sHPortal),
|
||||||
arraySize);
|
arraySize);
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::CopyIf(ValueHandle, StencilHandle, OutHandle);
|
Algorithm::CopyIf(ValueHandle, StencilHandle, OutHandle);
|
||||||
|
|
||||||
@ -365,7 +361,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(CopyIf75, BenchCopyIf, 75);
|
VTKM_MAKE_BENCHMARK(CopyIf75, BenchCopyIf, 75);
|
||||||
VTKM_MAKE_BENCHMARK(CopyIf100, BenchCopyIf, 100);
|
VTKM_MAKE_BENCHMARK(CopyIf100, BenchCopyIf, 100);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchLowerBounds
|
struct BenchLowerBounds
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -381,18 +377,18 @@ private:
|
|||||||
, PERCENT_VALUES(value_percent)
|
, PERCENT_VALUES(value_percent)
|
||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
Algorithm::Schedule(
|
auto iHPortal = InputHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
FillTestValueKernel<Value>(InputHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillTestValueKernel<Value, decltype(iHPortal)>(iHPortal), arraySize);
|
||||||
arraySize);
|
auto vHPortal = ValueHandle.PrepareForOutput(N_VALS, DeviceAdapter());
|
||||||
Algorithm::Schedule(FillScaledTestValueKernel<Value>(
|
Algorithm::Schedule(FillScaledTestValueKernel<Value, decltype(vHPortal)>(2, vHPortal),
|
||||||
2, ValueHandle.PrepareForOutput(N_VALS, DeviceAdapterTag())),
|
|
||||||
N_VALS);
|
N_VALS);
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
|
||||||
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::LowerBounds(InputHandle, ValueHandle, OutHandle);
|
Algorithm::LowerBounds(InputHandle, ValueHandle, OutHandle);
|
||||||
|
|
||||||
@ -424,7 +420,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(LowerBounds75, BenchLowerBounds, 75);
|
VTKM_MAKE_BENCHMARK(LowerBounds75, BenchLowerBounds, 75);
|
||||||
VTKM_MAKE_BENCHMARK(LowerBounds100, BenchLowerBounds, 100);
|
VTKM_MAKE_BENCHMARK(LowerBounds100, BenchLowerBounds, 100);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchReduce
|
struct BenchReduce
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -438,9 +434,8 @@ private:
|
|||||||
BenchReduce()
|
BenchReduce()
|
||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
Algorithm::Schedule(
|
auto iHPortal = this->InputHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
FillTestValueKernel<Value>(InputHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillTestValueKernel<Value, decltype(iHPortal)>(iHPortal), arraySize);
|
||||||
arraySize);
|
|
||||||
this->Result =
|
this->Result =
|
||||||
Algorithm::Reduce(this->InputHandle, vtkm::TypeTraits<Value>::ZeroInitialization());
|
Algorithm::Reduce(this->InputHandle, vtkm::TypeTraits<Value>::ZeroInitialization());
|
||||||
}
|
}
|
||||||
@ -448,9 +443,11 @@ private:
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
|
||||||
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Value tmp = Algorithm::Reduce(InputHandle, vtkm::TypeTraits<Value>::ZeroInitialization());
|
Value tmp =
|
||||||
|
Algorithm::Reduce(this->InputHandle, vtkm::TypeTraits<Value>::ZeroInitialization());
|
||||||
vtkm::Float64 time = timer.GetElapsedTime();
|
vtkm::Float64 time = timer.GetElapsedTime();
|
||||||
if (tmp != this->Result)
|
if (tmp != this->Result)
|
||||||
{
|
{
|
||||||
@ -473,7 +470,7 @@ private:
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(Reduce, BenchReduce);
|
VTKM_MAKE_BENCHMARK(Reduce, BenchReduce);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchReduceByKey
|
struct BenchReduceByKey
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -489,11 +486,10 @@ private:
|
|||||||
, PERCENT_KEYS(key_percent)
|
, PERCENT_KEYS(key_percent)
|
||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
Algorithm::Schedule(
|
auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
FillTestValueKernel<Value>(ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillTestValueKernel<Value, decltype(vHPortal)>(vHPortal), arraySize);
|
||||||
arraySize);
|
auto kHPortal = KeyHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
Algorithm::Schedule(FillModuloTestValueKernel<vtkm::Id>(
|
Algorithm::Schedule(FillModuloTestValueKernel<vtkm::Id, decltype(kHPortal)>(N_KEYS, kHPortal),
|
||||||
N_KEYS, KeyHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
|
||||||
arraySize);
|
arraySize);
|
||||||
Algorithm::SortByKey(KeyHandle, ValueHandle);
|
Algorithm::SortByKey(KeyHandle, ValueHandle);
|
||||||
}
|
}
|
||||||
@ -501,7 +497,7 @@ private:
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::ReduceByKey(KeyHandle, ValueHandle, KeysOut, ValuesOut, vtkm::Add());
|
Algorithm::ReduceByKey(KeyHandle, ValueHandle, KeysOut, ValuesOut, vtkm::Add());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -532,7 +528,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(ReduceByKey75, BenchReduceByKey, 75);
|
VTKM_MAKE_BENCHMARK(ReduceByKey75, BenchReduceByKey, 75);
|
||||||
VTKM_MAKE_BENCHMARK(ReduceByKey100, BenchReduceByKey, 100);
|
VTKM_MAKE_BENCHMARK(ReduceByKey100, BenchReduceByKey, 100);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchScanInclusive
|
struct BenchScanInclusive
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -542,15 +538,14 @@ private:
|
|||||||
BenchScanInclusive()
|
BenchScanInclusive()
|
||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
Algorithm::Schedule(
|
auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
FillTestValueKernel<Value>(ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillTestValueKernel<Value, decltype(vHPortal)>(vHPortal), arraySize);
|
||||||
arraySize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::ScanInclusive(ValueHandle, OutHandle);
|
Algorithm::ScanInclusive(ValueHandle, OutHandle);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -570,7 +565,7 @@ private:
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(ScanInclusive, BenchScanInclusive);
|
VTKM_MAKE_BENCHMARK(ScanInclusive, BenchScanInclusive);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchScanExclusive
|
struct BenchScanExclusive
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -581,15 +576,15 @@ private:
|
|||||||
BenchScanExclusive()
|
BenchScanExclusive()
|
||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
Algorithm::Schedule(
|
auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
FillTestValueKernel<Value>(ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillTestValueKernel<Value, decltype(vHPortal)>(vHPortal), arraySize);
|
||||||
arraySize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
|
||||||
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::ScanExclusive(ValueHandle, OutHandle);
|
Algorithm::ScanExclusive(ValueHandle, OutHandle);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -609,7 +604,7 @@ private:
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(ScanExclusive, BenchScanExclusive);
|
VTKM_MAKE_BENCHMARK(ScanExclusive, BenchScanExclusive);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchSort
|
struct BenchSort
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -634,7 +629,7 @@ private:
|
|||||||
ValueArrayHandle array;
|
ValueArrayHandle array;
|
||||||
Algorithm::Copy(this->ValueHandle, array);
|
Algorithm::Copy(this->ValueHandle, array);
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::Sort(array);
|
Algorithm::Sort(array);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -654,7 +649,7 @@ private:
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(Sort, BenchSort);
|
VTKM_MAKE_BENCHMARK(Sort, BenchSort);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchSortByKey
|
struct BenchSortByKey
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -677,8 +672,8 @@ private:
|
|||||||
{
|
{
|
||||||
portal.Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
|
portal.Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
|
||||||
}
|
}
|
||||||
Algorithm::Schedule(FillModuloTestValueKernel<vtkm::Id>(
|
auto kHPortal = KeyHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
N_KEYS, KeyHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillModuloTestValueKernel<vtkm::Id, decltype(kHPortal)>(N_KEYS, kHPortal),
|
||||||
arraySize);
|
arraySize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -690,7 +685,7 @@ private:
|
|||||||
Algorithm::Copy(this->KeyHandle, keys);
|
Algorithm::Copy(this->KeyHandle, keys);
|
||||||
Algorithm::Copy(this->ValueHandle, values);
|
Algorithm::Copy(this->ValueHandle, values);
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::SortByKey(keys, values);
|
Algorithm::SortByKey(keys, values);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -721,7 +716,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(SortByKey75, BenchSortByKey, 75);
|
VTKM_MAKE_BENCHMARK(SortByKey75, BenchSortByKey, 75);
|
||||||
VTKM_MAKE_BENCHMARK(SortByKey100, BenchSortByKey, 100);
|
VTKM_MAKE_BENCHMARK(SortByKey100, BenchSortByKey, 100);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchStableSortIndices
|
struct BenchStableSortIndices
|
||||||
{
|
{
|
||||||
using SSI = vtkm::worklet::StableSortIndices;
|
using SSI = vtkm::worklet::StableSortIndices;
|
||||||
@ -749,7 +744,7 @@ private:
|
|||||||
vtkm::cont::ArrayHandle<vtkm::Id> indices;
|
vtkm::cont::ArrayHandle<vtkm::Id> indices;
|
||||||
Algorithm::Copy(vtkm::cont::ArrayHandleIndex(arraySize), indices);
|
Algorithm::Copy(vtkm::cont::ArrayHandleIndex(arraySize), indices);
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
SSI::Sort(ValueHandle, indices);
|
SSI::Sort(ValueHandle, indices);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -769,7 +764,7 @@ private:
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(StableSortIndices, BenchStableSortIndices);
|
VTKM_MAKE_BENCHMARK(StableSortIndices, BenchStableSortIndices);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchStableSortIndicesUnique
|
struct BenchStableSortIndicesUnique
|
||||||
{
|
{
|
||||||
using SSI = vtkm::worklet::StableSortIndices;
|
using SSI = vtkm::worklet::StableSortIndices;
|
||||||
@ -787,19 +782,19 @@ private:
|
|||||||
, PERCENT_VALID(percent_valid)
|
, PERCENT_VALID(percent_valid)
|
||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
Algorithm::Schedule(
|
auto vHPortal = this->ValueHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
FillModuloTestValueKernel<Value>(
|
Algorithm::Schedule(FillModuloTestValueKernel<Value, decltype(vHPortal)>(N_VALID, vHPortal),
|
||||||
N_VALID, this->ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
arraySize);
|
||||||
arraySize);
|
|
||||||
this->IndexHandle = SSI::Sort(this->ValueHandle);
|
this->IndexHandle = SSI::Sort(this->ValueHandle);
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
|
|
||||||
IndexArrayHandle indices;
|
IndexArrayHandle indices;
|
||||||
Algorithm::Copy(this->IndexHandle, indices);
|
Algorithm::Copy(this->IndexHandle, indices);
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
SSI::Unique(this->ValueHandle, indices);
|
SSI::Unique(this->ValueHandle, indices);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -830,7 +825,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique75, BenchStableSortIndicesUnique, 75);
|
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique75, BenchStableSortIndicesUnique, 75);
|
||||||
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique100, BenchStableSortIndicesUnique, 100);
|
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique100, BenchStableSortIndicesUnique, 100);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchUnique
|
struct BenchUnique
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -845,8 +840,8 @@ private:
|
|||||||
, PERCENT_VALID(percent_valid)
|
, PERCENT_VALID(percent_valid)
|
||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
Algorithm::Schedule(FillModuloTestValueKernel<Value>(
|
auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
N_VALID, ValueHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillModuloTestValueKernel<Value, decltype(vHPortal)>(N_VALID, vHPortal),
|
||||||
arraySize);
|
arraySize);
|
||||||
Algorithm::Sort(ValueHandle);
|
Algorithm::Sort(ValueHandle);
|
||||||
}
|
}
|
||||||
@ -854,10 +849,11 @@ private:
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
|
|
||||||
ValueArrayHandle array;
|
ValueArrayHandle array;
|
||||||
Algorithm::Copy(this->ValueHandle, array);
|
Algorithm::Copy(this->ValueHandle, array);
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::Unique(array);
|
Algorithm::Unique(array);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -888,7 +884,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(Unique75, BenchUnique, 75);
|
VTKM_MAKE_BENCHMARK(Unique75, BenchUnique, 75);
|
||||||
VTKM_MAKE_BENCHMARK(Unique100, BenchUnique, 100);
|
VTKM_MAKE_BENCHMARK(Unique100, BenchUnique, 100);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchUpperBounds
|
struct BenchUpperBounds
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -904,18 +900,17 @@ private:
|
|||||||
, PERCENT_VALS(percent_vals)
|
, PERCENT_VALS(percent_vals)
|
||||||
{
|
{
|
||||||
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
vtkm::Id arraySize = Config.ComputeSize<Value>();
|
||||||
Algorithm::Schedule(
|
auto iHPortal = InputHandle.PrepareForOutput(arraySize, DeviceAdapter());
|
||||||
FillTestValueKernel<Value>(InputHandle.PrepareForOutput(arraySize, DeviceAdapterTag())),
|
Algorithm::Schedule(FillTestValueKernel<Value, decltype(iHPortal)>(iHPortal), arraySize);
|
||||||
arraySize);
|
auto vHPortal = ValueHandle.PrepareForOutput(N_VALS, DeviceAdapter());
|
||||||
Algorithm::Schedule(FillScaledTestValueKernel<Value>(
|
Algorithm::Schedule(FillScaledTestValueKernel<Value, decltype(vHPortal)>(2, vHPortal),
|
||||||
2, ValueHandle.PrepareForOutput(N_VALS, DeviceAdapterTag())),
|
|
||||||
N_VALS);
|
N_VALS);
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
vtkm::cont::Timer timer;
|
vtkm::cont::Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
Algorithm::UpperBounds(InputHandle, ValueHandle, OutHandle);
|
Algorithm::UpperBounds(InputHandle, ValueHandle, OutHandle);
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -948,11 +943,8 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(UpperBounds100, BenchUpperBounds, 100);
|
VTKM_MAKE_BENCHMARK(UpperBounds100, BenchUpperBounds, 100);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static VTKM_CONT int Run()
|
static VTKM_CONT int Run(vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nRunning DeviceAdapter benchmarks\n";
|
|
||||||
vtkm::cont::GetGlobalRuntimeDeviceTracker().ForceDevice(DeviceAdapterTag());
|
|
||||||
|
|
||||||
// Run fixed bytes / size tests:
|
// Run fixed bytes / size tests:
|
||||||
for (int sizeType = 0; sizeType < 2; ++sizeType)
|
for (int sizeType = 0; sizeType < 2; ++sizeType)
|
||||||
{
|
{
|
||||||
@ -962,11 +954,11 @@ public:
|
|||||||
Config.DoByteSizes = true;
|
Config.DoByteSizes = true;
|
||||||
if (!Config.ExtendedTypeList)
|
if (!Config.ExtendedTypeList)
|
||||||
{
|
{
|
||||||
RunInternal<BaseTypes>();
|
RunInternal<BaseTypes>(id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
RunInternal<ExtendedTypes>();
|
RunInternal<ExtendedTypes>(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sizeType == 1 && Config.TestArraySizeValues)
|
if (sizeType == 1 && Config.TestArraySizeValues)
|
||||||
@ -975,11 +967,11 @@ public:
|
|||||||
Config.DoByteSizes = false;
|
Config.DoByteSizes = false;
|
||||||
if (!Config.ExtendedTypeList)
|
if (!Config.ExtendedTypeList)
|
||||||
{
|
{
|
||||||
RunInternal<BaseTypes>();
|
RunInternal<BaseTypes>(id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
RunInternal<ExtendedTypes>();
|
RunInternal<ExtendedTypes>(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -988,12 +980,12 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename ValueTypes>
|
template <typename ValueTypes>
|
||||||
static VTKM_CONT void RunInternal()
|
static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
if (Config.BenchmarkFlags & COPY)
|
if (Config.BenchmarkFlags & COPY)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nBenchmarking Copy\n";
|
std::cout << DIVIDER << "\nBenchmarking Copy\n";
|
||||||
VTKM_RUN_BENCHMARK(Copy, ValueTypes());
|
VTKM_RUN_BENCHMARK(Copy, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & COPY_IF)
|
if (Config.BenchmarkFlags & COPY_IF)
|
||||||
@ -1001,26 +993,26 @@ public:
|
|||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking CopyIf\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking CopyIf\n";
|
||||||
if (Config.DetailedOutputRangeScaling)
|
if (Config.DetailedOutputRangeScaling)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf10, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf10, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf15, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf15, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf20, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf20, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf30, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf30, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf35, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf35, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf40, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf40, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf45, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf45, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes());
|
VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1029,33 +1021,33 @@ public:
|
|||||||
std::cout << DIVIDER << "\nBenchmarking LowerBounds\n";
|
std::cout << DIVIDER << "\nBenchmarking LowerBounds\n";
|
||||||
if (Config.DetailedOutputRangeScaling)
|
if (Config.DetailedOutputRangeScaling)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds10, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds10, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds15, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds15, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds20, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds20, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds30, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds30, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds35, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds35, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds40, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds40, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds45, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds45, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes());
|
VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & REDUCE)
|
if (Config.BenchmarkFlags & REDUCE)
|
||||||
{
|
{
|
||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking Reduce\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking Reduce\n";
|
||||||
VTKM_RUN_BENCHMARK(Reduce, ValueTypes());
|
VTKM_RUN_BENCHMARK(Reduce, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & REDUCE_BY_KEY)
|
if (Config.BenchmarkFlags & REDUCE_BY_KEY)
|
||||||
@ -1063,45 +1055,45 @@ public:
|
|||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking ReduceByKey\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking ReduceByKey\n";
|
||||||
if (Config.DetailedOutputRangeScaling)
|
if (Config.DetailedOutputRangeScaling)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey10, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey10, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey15, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey15, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey20, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey20, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey30, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey30, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey35, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey35, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey40, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey40, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey45, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey45, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes());
|
VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & SCAN_INCLUSIVE)
|
if (Config.BenchmarkFlags & SCAN_INCLUSIVE)
|
||||||
{
|
{
|
||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking ScanInclusive\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking ScanInclusive\n";
|
||||||
VTKM_RUN_BENCHMARK(ScanInclusive, ValueTypes());
|
VTKM_RUN_BENCHMARK(ScanInclusive, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & SCAN_EXCLUSIVE)
|
if (Config.BenchmarkFlags & SCAN_EXCLUSIVE)
|
||||||
{
|
{
|
||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking ScanExclusive\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking ScanExclusive\n";
|
||||||
VTKM_RUN_BENCHMARK(ScanExclusive, ValueTypes());
|
VTKM_RUN_BENCHMARK(ScanExclusive, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & SORT)
|
if (Config.BenchmarkFlags & SORT)
|
||||||
{
|
{
|
||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking Sort\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking Sort\n";
|
||||||
VTKM_RUN_BENCHMARK(Sort, ValueTypes());
|
VTKM_RUN_BENCHMARK(Sort, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & SORT_BY_KEY)
|
if (Config.BenchmarkFlags & SORT_BY_KEY)
|
||||||
@ -1109,33 +1101,33 @@ public:
|
|||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking SortByKey\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking SortByKey\n";
|
||||||
if (Config.DetailedOutputRangeScaling)
|
if (Config.DetailedOutputRangeScaling)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey10, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey10, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey15, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey15, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey20, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey20, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey30, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey30, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey35, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey35, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey40, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey40, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey45, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey45, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes());
|
VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & STABLE_SORT_INDICES)
|
if (Config.BenchmarkFlags & STABLE_SORT_INDICES)
|
||||||
{
|
{
|
||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Sort\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Sort\n";
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndices, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndices, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Config.BenchmarkFlags & STABLE_SORT_INDICES_UNIQUE)
|
if (Config.BenchmarkFlags & STABLE_SORT_INDICES_UNIQUE)
|
||||||
@ -1143,26 +1135,26 @@ public:
|
|||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Unique\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Unique\n";
|
||||||
if (Config.DetailedOutputRangeScaling)
|
if (Config.DetailedOutputRangeScaling)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique10, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique10, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique15, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique15, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique20, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique20, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique30, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique30, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique35, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique35, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique40, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique40, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique45, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique45, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes());
|
VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1171,26 +1163,26 @@ public:
|
|||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking Unique\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking Unique\n";
|
||||||
if (Config.DetailedOutputRangeScaling)
|
if (Config.DetailedOutputRangeScaling)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(Unique5, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique10, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique10, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique15, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique15, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique20, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique20, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique25, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique30, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique30, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique35, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique35, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique40, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique40, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique45, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique45, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique50, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique75, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique100, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(Unique5, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique25, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique50, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique75, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(Unique100, ValueTypes());
|
VTKM_RUN_BENCHMARK(Unique100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1199,26 +1191,26 @@ public:
|
|||||||
std::cout << "\n" << DIVIDER << "\nBenchmarking UpperBounds\n";
|
std::cout << "\n" << DIVIDER << "\nBenchmarking UpperBounds\n";
|
||||||
if (Config.DetailedOutputRangeScaling)
|
if (Config.DetailedOutputRangeScaling)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds10, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds10, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds15, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds15, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds20, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds20, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds30, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds30, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds35, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds35, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds40, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds40, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds45, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds45, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes());
|
VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1230,19 +1222,28 @@ public:
|
|||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
vtkm::cont::InitLogging(argc, argv);
|
auto opt = vtkm::cont::InitializeOptions::RequireDevice;
|
||||||
|
auto initConfig = vtkm::cont::Initialize(argc, argv, opt);
|
||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
int numThreads{ 0 };
|
||||||
int numThreads = tbb::task_scheduler_init::automatic;
|
#ifdef VTKM_ENABLE_TBB
|
||||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB())
|
||||||
int numThreads = omp_get_max_threads();
|
{
|
||||||
#endif // TBB
|
numThreads = tbb::task_scheduler_init::automatic;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
|
if (initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP())
|
||||||
|
{
|
||||||
|
numThreads = omp_get_max_threads();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
vtkm::benchmarking::BenchDevAlgoConfig& config = vtkm::benchmarking::Config;
|
vtkm::benchmarking::BenchDevAlgoConfig& config = vtkm::benchmarking::Config;
|
||||||
|
|
||||||
for (int i = 1; i < argc; ++i)
|
for (size_t i = 0; i < initConfig.Arguments.size(); ++i)
|
||||||
{
|
{
|
||||||
std::string arg = argv[i];
|
std::string arg = initConfig.Arguments[i];
|
||||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||||
});
|
});
|
||||||
@ -1301,7 +1302,7 @@ int main(int argc, char* argv[])
|
|||||||
else if (arg == "typelist")
|
else if (arg == "typelist")
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
arg = argv[i];
|
arg = initConfig.Arguments[i];
|
||||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||||
});
|
});
|
||||||
@ -1315,14 +1316,14 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cerr << "Unrecognized TypeList: " << argv[i] << std::endl;
|
std::cerr << "Unrecognized TypeList: " << initConfig.Arguments[i] << std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (arg == "fixbytes")
|
else if (arg == "fixbytes")
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
arg = argv[i];
|
arg = initConfig.Arguments[i];
|
||||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||||
});
|
});
|
||||||
@ -1340,7 +1341,7 @@ int main(int argc, char* argv[])
|
|||||||
else if (arg == "fixsizes")
|
else if (arg == "fixsizes")
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
arg = argv[i];
|
arg = initConfig.Arguments[i];
|
||||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||||
});
|
});
|
||||||
@ -1362,30 +1363,38 @@ int main(int argc, char* argv[])
|
|||||||
else if (arg == "numthreads")
|
else if (arg == "numthreads")
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB() ||
|
||||||
std::istringstream parse(argv[i]);
|
initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP())
|
||||||
parse >> numThreads;
|
{
|
||||||
std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
|
std::istringstream parse(initConfig.Arguments[i]);
|
||||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
parse >> numThreads;
|
||||||
std::istringstream parse(argv[i]);
|
std::cout << "Selected " << numThreads << " " << initConfig.Device.GetName() << " threads."
|
||||||
parse >> numThreads;
|
<< std::endl;
|
||||||
std::cout << "Selected " << numThreads << " OpenMP threads." << std::endl;
|
}
|
||||||
#else
|
else
|
||||||
std::cerr << "NumThreads not valid on this device. Ignoring." << std::endl;
|
{
|
||||||
#endif // TBB
|
std::cerr << "NumThreads not valid on this device. Ignoring." << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cerr << "Unrecognized benchmark: " << argv[i] << std::endl;
|
std::cerr << "Unrecognized benchmark: " << initConfig.Arguments[i] << std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
// Must not be destroyed as long as benchmarks are running:
|
// Must not be destroyed as long as benchmarks are running:
|
||||||
tbb::task_scheduler_init init(numThreads);
|
if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB())
|
||||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
{
|
||||||
omp_set_num_threads(numThreads);
|
tbb::task_scheduler_init init(numThreads);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
|
if (initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP())
|
||||||
|
{
|
||||||
|
omp_set_num_threads(numThreads);
|
||||||
|
}
|
||||||
#endif // TBB
|
#endif // TBB
|
||||||
|
|
||||||
if (config.BenchmarkFlags == 0)
|
if (config.BenchmarkFlags == 0)
|
||||||
@ -1394,5 +1403,5 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
//now actually execute the benchmarks
|
//now actually execute the benchmarks
|
||||||
return vtkm::benchmarking::BenchmarkDeviceAdapter<VTKM_DEFAULT_DEVICE_ADAPTER_TAG>::Run();
|
return vtkm::benchmarking::BenchmarkDeviceAdapter::Run(initConfig.Device);
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include <vtkm/cont/ArrayHandle.h>
|
#include <vtkm/cont/ArrayHandle.h>
|
||||||
#include <vtkm/cont/CellSetStructured.h>
|
#include <vtkm/cont/CellSetStructured.h>
|
||||||
#include <vtkm/cont/ImplicitFunctionHandle.h>
|
#include <vtkm/cont/ImplicitFunctionHandle.h>
|
||||||
|
#include <vtkm/cont/Initialize.h>
|
||||||
#include <vtkm/cont/Timer.h>
|
#include <vtkm/cont/Timer.h>
|
||||||
#include <vtkm/cont/VariantArrayHandle.h>
|
#include <vtkm/cont/VariantArrayHandle.h>
|
||||||
|
|
||||||
@ -311,7 +312,6 @@ struct InterpValueTypes : vtkm::ListTagBase<vtkm::Float32, vtkm::Vec<vtkm::Float
|
|||||||
|
|
||||||
/// This class runs a series of micro-benchmarks to measure
|
/// This class runs a series of micro-benchmarks to measure
|
||||||
/// performance of different field operations
|
/// performance of different field operations
|
||||||
template <class DeviceAdapterTag>
|
|
||||||
class BenchmarkFieldAlgorithms
|
class BenchmarkFieldAlgorithms
|
||||||
{
|
{
|
||||||
using StorageTag = vtkm::cont::StorageTagBasic;
|
using StorageTag = vtkm::cont::StorageTagBasic;
|
||||||
@ -323,7 +323,7 @@ class BenchmarkFieldAlgorithms
|
|||||||
using EdgeIdVariantHandle = vtkm::cont::VariantArrayHandleBase<vtkm::TypeListTagId2>;
|
using EdgeIdVariantHandle = vtkm::cont::VariantArrayHandleBase<vtkm::TypeListTagId2>;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchBlackScholes
|
struct BenchBlackScholes
|
||||||
{
|
{
|
||||||
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
|
||||||
@ -366,11 +366,10 @@ private:
|
|||||||
const Value RISKFREE = 0.02f;
|
const Value RISKFREE = 0.02f;
|
||||||
const Value VOLATILITY = 0.30f;
|
const Value VOLATILITY = 0.30f;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
BlackScholes<Value> worklet(RISKFREE, VOLATILITY);
|
BlackScholes<Value> worklet(RISKFREE, VOLATILITY);
|
||||||
vtkm::worklet::DispatcherMapField<BlackScholes<Value>> dispatcher(worklet);
|
vtkm::worklet::DispatcherMapField<BlackScholes<Value>> dispatcher(worklet);
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
|
|
||||||
dispatcher.Invoke(
|
dispatcher.Invoke(
|
||||||
this->StockPrice, this->OptionStrike, this->OptionYears, callResultHandle, putResultHandle);
|
this->StockPrice, this->OptionStrike, this->OptionYears, callResultHandle, putResultHandle);
|
||||||
@ -391,8 +390,8 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchBlackScholesDynamic : public BenchBlackScholes<Value>
|
struct BenchBlackScholesDynamic : public BenchBlackScholes<Value, DeviceAdapter>
|
||||||
{
|
{
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
@ -406,11 +405,10 @@ private:
|
|||||||
const Value RISKFREE = 0.02f;
|
const Value RISKFREE = 0.02f;
|
||||||
const Value VOLATILITY = 0.30f;
|
const Value VOLATILITY = 0.30f;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
BlackScholes<Value> worklet(RISKFREE, VOLATILITY);
|
BlackScholes<Value> worklet(RISKFREE, VOLATILITY);
|
||||||
vtkm::worklet::DispatcherMapField<BlackScholes<Value>> dispatcher(worklet);
|
vtkm::worklet::DispatcherMapField<BlackScholes<Value>> dispatcher(worklet);
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
|
|
||||||
dispatcher.Invoke(dstocks, dstrikes, doptions, callResultHandle, putResultHandle);
|
dispatcher.Invoke(dstocks, dstrikes, doptions, callResultHandle, putResultHandle);
|
||||||
|
|
||||||
@ -423,7 +421,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(BlackScholes, BenchBlackScholes);
|
VTKM_MAKE_BENCHMARK(BlackScholes, BenchBlackScholes);
|
||||||
VTKM_MAKE_BENCHMARK(BlackScholesDynamic, BenchBlackScholesDynamic);
|
VTKM_MAKE_BENCHMARK(BlackScholesDynamic, BenchBlackScholesDynamic);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchMath
|
struct BenchMath
|
||||||
{
|
{
|
||||||
std::vector<vtkm::Vec<Value, 3>> input;
|
std::vector<vtkm::Vec<Value, 3>> input;
|
||||||
@ -450,10 +448,10 @@ private:
|
|||||||
vtkm::cont::ArrayHandle<Value> tempHandle1;
|
vtkm::cont::ArrayHandle<Value> tempHandle1;
|
||||||
vtkm::cont::ArrayHandle<Value> tempHandle2;
|
vtkm::cont::ArrayHandle<Value> tempHandle2;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
vtkm::worklet::Invoker invoke(DeviceAdapterTag{});
|
vtkm::worklet::Invoker invoke(DeviceAdapter{});
|
||||||
invoke(Mag{}, this->InputHandle, tempHandle1);
|
invoke(Mag{}, this->InputHandle, tempHandle1);
|
||||||
invoke(Sin{}, tempHandle1, tempHandle2);
|
invoke(Sin{}, tempHandle1, tempHandle2);
|
||||||
invoke(Square{}, tempHandle2, tempHandle1);
|
invoke(Square{}, tempHandle2, tempHandle1);
|
||||||
@ -475,8 +473,8 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchMathDynamic : public BenchMath<Value>
|
struct BenchMathDynamic : public BenchMath<Value, DeviceAdapter>
|
||||||
{
|
{
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
@ -490,10 +488,10 @@ private:
|
|||||||
ValueVariantHandle dtemp1(temp1);
|
ValueVariantHandle dtemp1(temp1);
|
||||||
ValueVariantHandle dtemp2(temp2);
|
ValueVariantHandle dtemp2(temp2);
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
vtkm::worklet::Invoker invoke(DeviceAdapterTag{});
|
vtkm::worklet::Invoker invoke(DeviceAdapter{});
|
||||||
invoke(Mag{}, dinput, dtemp1);
|
invoke(Mag{}, dinput, dtemp1);
|
||||||
invoke(Sin{}, dtemp1, dtemp2);
|
invoke(Sin{}, dtemp1, dtemp2);
|
||||||
invoke(Square{}, dtemp2, dtemp1);
|
invoke(Square{}, dtemp2, dtemp1);
|
||||||
@ -508,7 +506,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(Math, BenchMath);
|
VTKM_MAKE_BENCHMARK(Math, BenchMath);
|
||||||
VTKM_MAKE_BENCHMARK(MathDynamic, BenchMathDynamic);
|
VTKM_MAKE_BENCHMARK(MathDynamic, BenchMathDynamic);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchFusedMath
|
struct BenchFusedMath
|
||||||
{
|
{
|
||||||
std::vector<vtkm::Vec<Value, 3>> input;
|
std::vector<vtkm::Vec<Value, 3>> input;
|
||||||
@ -534,10 +532,9 @@ private:
|
|||||||
{
|
{
|
||||||
vtkm::cont::ArrayHandle<Value> result;
|
vtkm::cont::ArrayHandle<Value> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
vtkm::worklet::DispatcherMapField<FusedMath> dispatcher;
|
vtkm::worklet::DispatcherMapField<FusedMath> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->InputHandle, result);
|
dispatcher.Invoke(this->InputHandle, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -556,8 +553,8 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchFusedMathDynamic : public BenchFusedMath<Value>
|
struct BenchFusedMathDynamic : public BenchFusedMath<Value, DeviceAdapter>
|
||||||
{
|
{
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
@ -569,10 +566,9 @@ private:
|
|||||||
|
|
||||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
vtkm::worklet::DispatcherMapField<FusedMath> dispatcher;
|
vtkm::worklet::DispatcherMapField<FusedMath> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(dinput, result);
|
dispatcher.Invoke(dinput, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -584,7 +580,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(FusedMath, BenchFusedMath);
|
VTKM_MAKE_BENCHMARK(FusedMath, BenchFusedMath);
|
||||||
VTKM_MAKE_BENCHMARK(FusedMathDynamic, BenchFusedMathDynamic);
|
VTKM_MAKE_BENCHMARK(FusedMathDynamic, BenchFusedMathDynamic);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchEdgeInterp
|
struct BenchEdgeInterp
|
||||||
{
|
{
|
||||||
std::vector<vtkm::Float32> weight;
|
std::vector<vtkm::Float32> weight;
|
||||||
@ -617,7 +613,6 @@ private:
|
|||||||
|
|
||||||
this->EdgePairHandle.Allocate(numberOfEdges);
|
this->EdgePairHandle.Allocate(numberOfEdges);
|
||||||
vtkm::worklet::DispatcherMapTopology<GenerateEdges> dispatcher;
|
vtkm::worklet::DispatcherMapTopology<GenerateEdges> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(cellSet, this->EdgePairHandle);
|
dispatcher.Invoke(cellSet, this->EdgePairHandle);
|
||||||
|
|
||||||
this->weight.resize(esize);
|
this->weight.resize(esize);
|
||||||
@ -641,10 +636,9 @@ private:
|
|||||||
{
|
{
|
||||||
vtkm::cont::ArrayHandle<Value> result;
|
vtkm::cont::ArrayHandle<Value> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
vtkm::worklet::DispatcherMapField<InterpolateField> dispatcher;
|
vtkm::worklet::DispatcherMapField<InterpolateField> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->EdgePairHandle, this->WeightHandle, this->FieldHandle, result);
|
dispatcher.Invoke(this->EdgePairHandle, this->WeightHandle, this->FieldHandle, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -664,8 +658,8 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchEdgeInterpDynamic : public BenchEdgeInterp<Value>
|
struct BenchEdgeInterpDynamic : public BenchEdgeInterp<Value, DeviceAdapter>
|
||||||
{
|
{
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
@ -676,10 +670,9 @@ private:
|
|||||||
EdgeIdVariantHandle dedges(this->EdgePairHandle);
|
EdgeIdVariantHandle dedges(this->EdgePairHandle);
|
||||||
vtkm::cont::ArrayHandle<Value> result;
|
vtkm::cont::ArrayHandle<Value> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
vtkm::worklet::DispatcherMapField<InterpolateField> dispatcher;
|
vtkm::worklet::DispatcherMapField<InterpolateField> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(dedges, dweight, dfield, result);
|
dispatcher.Invoke(dedges, dweight, dfield, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -724,7 +717,7 @@ private:
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchImplicitFunction
|
struct BenchImplicitFunction
|
||||||
{
|
{
|
||||||
BenchImplicitFunction()
|
BenchImplicitFunction()
|
||||||
@ -739,14 +732,12 @@ private:
|
|||||||
using EvalDispatcher = vtkm::worklet::DispatcherMapField<EvalWorklet>;
|
using EvalDispatcher = vtkm::worklet::DispatcherMapField<EvalWorklet>;
|
||||||
|
|
||||||
auto handle = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1);
|
auto handle = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1);
|
||||||
auto function =
|
auto function = static_cast<const vtkm::Sphere*>(handle.PrepareForExecution(DeviceAdapter()));
|
||||||
static_cast<const vtkm::Sphere*>(handle.PrepareForExecution(DeviceAdapterTag()));
|
|
||||||
EvalWorklet eval(function);
|
EvalWorklet eval(function);
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
EvalDispatcher dispatcher(eval);
|
EvalDispatcher dispatcher(eval);
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->Internal.Points, this->Internal.Result);
|
dispatcher.Invoke(this->Internal.Points, this->Internal.Result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -764,7 +755,7 @@ private:
|
|||||||
ImplicitFunctionBenchData Internal;
|
ImplicitFunctionBenchData Internal;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchVirtualImplicitFunction
|
struct BenchVirtualImplicitFunction
|
||||||
{
|
{
|
||||||
BenchVirtualImplicitFunction()
|
BenchVirtualImplicitFunction()
|
||||||
@ -779,12 +770,11 @@ private:
|
|||||||
using EvalDispatcher = vtkm::worklet::DispatcherMapField<EvalWorklet>;
|
using EvalDispatcher = vtkm::worklet::DispatcherMapField<EvalWorklet>;
|
||||||
|
|
||||||
auto sphere = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1);
|
auto sphere = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1);
|
||||||
EvalWorklet eval(sphere.PrepareForExecution(DeviceAdapterTag()));
|
EvalWorklet eval(sphere.PrepareForExecution(DeviceAdapter()));
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
EvalDispatcher dispatcher(eval);
|
EvalDispatcher dispatcher(eval);
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->Internal.Points, this->Internal.Result);
|
dispatcher.Invoke(this->Internal.Points, this->Internal.Result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -802,7 +792,7 @@ private:
|
|||||||
ImplicitFunctionBenchData Internal;
|
ImplicitFunctionBenchData Internal;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct Bench2ImplicitFunctions
|
struct Bench2ImplicitFunctions
|
||||||
{
|
{
|
||||||
Bench2ImplicitFunctions()
|
Bench2ImplicitFunctions()
|
||||||
@ -818,14 +808,13 @@ private:
|
|||||||
|
|
||||||
auto h1 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1);
|
auto h1 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1);
|
||||||
auto h2 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere2);
|
auto h2 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere2);
|
||||||
auto f1 = static_cast<const vtkm::Sphere*>(h1.PrepareForExecution(DeviceAdapterTag()));
|
auto f1 = static_cast<const vtkm::Sphere*>(h1.PrepareForExecution(DeviceAdapter()));
|
||||||
auto f2 = static_cast<const vtkm::Sphere*>(h2.PrepareForExecution(DeviceAdapterTag()));
|
auto f2 = static_cast<const vtkm::Sphere*>(h2.PrepareForExecution(DeviceAdapter()));
|
||||||
EvalWorklet eval(f1, f2);
|
EvalWorklet eval(f1, f2);
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
EvalDispatcher dispatcher(eval);
|
EvalDispatcher dispatcher(eval);
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->Internal.Points, this->Internal.Result);
|
dispatcher.Invoke(this->Internal.Points, this->Internal.Result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -843,7 +832,7 @@ private:
|
|||||||
ImplicitFunctionBenchData Internal;
|
ImplicitFunctionBenchData Internal;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct Bench2VirtualImplicitFunctions
|
struct Bench2VirtualImplicitFunctions
|
||||||
{
|
{
|
||||||
Bench2VirtualImplicitFunctions()
|
Bench2VirtualImplicitFunctions()
|
||||||
@ -860,13 +849,12 @@ private:
|
|||||||
|
|
||||||
auto s1 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1);
|
auto s1 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere1);
|
||||||
auto s2 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere2);
|
auto s2 = vtkm::cont::make_ImplicitFunctionHandle(Internal.Sphere2);
|
||||||
EvalWorklet eval(s1.PrepareForExecution(DeviceAdapterTag()),
|
EvalWorklet eval(s1.PrepareForExecution(DeviceAdapter()),
|
||||||
s2.PrepareForExecution(DeviceAdapterTag()));
|
s2.PrepareForExecution(DeviceAdapter()));
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
EvalDispatcher dispatcher(eval);
|
EvalDispatcher dispatcher(eval);
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->Internal.Points, this->Internal.Result);
|
dispatcher.Invoke(this->Internal.Points, this->Internal.Result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -890,36 +878,36 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(ImplicitFunctionVirtual2, Bench2VirtualImplicitFunctions);
|
VTKM_MAKE_BENCHMARK(ImplicitFunctionVirtual2, Bench2VirtualImplicitFunctions);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static VTKM_CONT int Run(int benchmarks)
|
static VTKM_CONT int Run(int benchmarks, vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nRunning Field Algorithm benchmarks\n";
|
std::cout << DIVIDER << "\nRunning Field Algorithm benchmarks\n";
|
||||||
|
|
||||||
if (benchmarks & BLACK_SCHOLES)
|
if (benchmarks & BLACK_SCHOLES)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nBenchmarking BlackScholes\n";
|
std::cout << DIVIDER << "\nBenchmarking BlackScholes\n";
|
||||||
VTKM_RUN_BENCHMARK(BlackScholes, ValueTypes());
|
VTKM_RUN_BENCHMARK(BlackScholes, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(BlackScholesDynamic, ValueTypes());
|
VTKM_RUN_BENCHMARK(BlackScholesDynamic, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (benchmarks & MATH)
|
if (benchmarks & MATH)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nBenchmarking Multiple Math Worklets\n";
|
std::cout << DIVIDER << "\nBenchmarking Multiple Math Worklets\n";
|
||||||
VTKM_RUN_BENCHMARK(Math, ValueTypes());
|
VTKM_RUN_BENCHMARK(Math, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(MathDynamic, ValueTypes());
|
VTKM_RUN_BENCHMARK(MathDynamic, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (benchmarks & FUSED_MATH)
|
if (benchmarks & FUSED_MATH)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nBenchmarking Single Fused Math Worklet\n";
|
std::cout << DIVIDER << "\nBenchmarking Single Fused Math Worklet\n";
|
||||||
VTKM_RUN_BENCHMARK(FusedMath, ValueTypes());
|
VTKM_RUN_BENCHMARK(FusedMath, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(FusedMathDynamic, ValueTypes());
|
VTKM_RUN_BENCHMARK(FusedMathDynamic, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (benchmarks & INTERPOLATE_FIELD)
|
if (benchmarks & INTERPOLATE_FIELD)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nBenchmarking Edge Based Field InterpolationWorklet\n";
|
std::cout << DIVIDER << "\nBenchmarking Edge Based Field InterpolationWorklet\n";
|
||||||
VTKM_RUN_BENCHMARK(EdgeInterp, InterpValueTypes());
|
VTKM_RUN_BENCHMARK(EdgeInterp, InterpValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(EdgeInterpDynamic, InterpValueTypes());
|
VTKM_RUN_BENCHMARK(EdgeInterpDynamic, InterpValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (benchmarks & IMPLICIT_FUNCTION)
|
if (benchmarks & IMPLICIT_FUNCTION)
|
||||||
@ -927,10 +915,10 @@ public:
|
|||||||
using FloatDefaultType = vtkm::ListTagBase<vtkm::FloatDefault>;
|
using FloatDefaultType = vtkm::ListTagBase<vtkm::FloatDefault>;
|
||||||
|
|
||||||
std::cout << "\nBenchmarking Implicit Function\n";
|
std::cout << "\nBenchmarking Implicit Function\n";
|
||||||
VTKM_RUN_BENCHMARK(ImplicitFunction, FloatDefaultType());
|
VTKM_RUN_BENCHMARK(ImplicitFunction, FloatDefaultType(), id);
|
||||||
VTKM_RUN_BENCHMARK(ImplicitFunctionVirtual, FloatDefaultType());
|
VTKM_RUN_BENCHMARK(ImplicitFunctionVirtual, FloatDefaultType(), id);
|
||||||
VTKM_RUN_BENCHMARK(ImplicitFunction2, FloatDefaultType());
|
VTKM_RUN_BENCHMARK(ImplicitFunction2, FloatDefaultType(), id);
|
||||||
VTKM_RUN_BENCHMARK(ImplicitFunctionVirtual2, FloatDefaultType());
|
VTKM_RUN_BENCHMARK(ImplicitFunctionVirtual2, FloatDefaultType(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -943,18 +931,19 @@ public:
|
|||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
vtkm::cont::InitLogging(argc, argv);
|
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
|
||||||
|
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||||
|
|
||||||
int benchmarks = 0;
|
int benchmarks = 0;
|
||||||
if (argc < 2)
|
if (!config.Arguments.size())
|
||||||
{
|
{
|
||||||
benchmarks = vtkm::benchmarking::ALL;
|
benchmarks = vtkm::benchmarking::ALL;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 1; i < argc; ++i)
|
for (size_t i = 0; i < config.Arguments.size(); ++i)
|
||||||
{
|
{
|
||||||
std::string arg = argv[i];
|
std::string arg = config.Arguments[i];
|
||||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||||
});
|
});
|
||||||
@ -980,16 +969,13 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cout << "Unrecognized benchmark: " << argv[i] << std::endl;
|
std::cout << "Unrecognized benchmark: " << config.Arguments[i] << std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//now actually execute the benchmarks
|
//now actually execute the benchmarks
|
||||||
using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG;
|
|
||||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
|
||||||
tracker.ForceDevice(Device{});
|
|
||||||
|
|
||||||
return vtkm::benchmarking::BenchmarkFieldAlgorithms<Device>::Run(benchmarks);
|
return vtkm::benchmarking::BenchmarkFieldAlgorithms::Run(benchmarks, config.Device);
|
||||||
}
|
}
|
||||||
|
@ -62,9 +62,10 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
#include <tbb/task_scheduler_init.h>
|
#include <tbb/task_scheduler_init.h>
|
||||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
#endif
|
||||||
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -97,8 +98,6 @@
|
|||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG;
|
|
||||||
using DevTraits = vtkm::cont::DeviceAdapterTraits<Device>;
|
|
||||||
|
|
||||||
// unscoped enum so we can use bitwise ops without a lot of hassle:
|
// unscoped enum so we can use bitwise ops without a lot of hassle:
|
||||||
enum BenchmarkName
|
enum BenchmarkName
|
||||||
@ -156,6 +155,16 @@ using AllCellList = vtkm::ListTagJoin<StructuredCellList, UnstructuredCellList>;
|
|||||||
|
|
||||||
using CoordinateList = vtkm::ListTagBase<vtkm::Vec<vtkm::Float32, 3>, vtkm::Vec<vtkm::Float64, 3>>;
|
using CoordinateList = vtkm::ListTagBase<vtkm::Vec<vtkm::Float32, 3>, vtkm::Vec<vtkm::Float64, 3>>;
|
||||||
|
|
||||||
|
struct WaveletGeneratorDataFunctor
|
||||||
|
{
|
||||||
|
template <typename DeviceAdapter>
|
||||||
|
bool operator()(DeviceAdapter, vtkm::worklet::WaveletGenerator& gen)
|
||||||
|
{
|
||||||
|
InputDataSet = gen.GenerateDataSet<DeviceAdapter>();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class BenchmarkFilterPolicy : public vtkm::filter::PolicyBase<BenchmarkFilterPolicy>
|
class BenchmarkFilterPolicy : public vtkm::filter::PolicyBase<BenchmarkFilterPolicy>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -169,7 +178,6 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Class implementing all filter benchmarks:
|
// Class implementing all filter benchmarks:
|
||||||
template <class DeviceAdapterTag>
|
|
||||||
class BenchmarkFilters
|
class BenchmarkFilters
|
||||||
{
|
{
|
||||||
using Timer = vtkm::cont::Timer;
|
using Timer = vtkm::cont::Timer;
|
||||||
@ -185,7 +193,7 @@ class BenchmarkFilters
|
|||||||
ScalarInput = 1 << 6
|
ScalarInput = 1 << 6
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchGradient
|
struct BenchGradient
|
||||||
{
|
{
|
||||||
vtkm::filter::Gradient Filter;
|
vtkm::filter::Gradient Filter;
|
||||||
@ -229,7 +237,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -293,7 +301,7 @@ class BenchmarkFilters
|
|||||||
BenchGradient,
|
BenchGradient,
|
||||||
Gradient | PointGradient | Divergence | Vorticity | QCriterion);
|
Gradient | PointGradient | Divergence | Vorticity | QCriterion);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchThreshold
|
struct BenchThreshold
|
||||||
{
|
{
|
||||||
vtkm::filter::Threshold Filter;
|
vtkm::filter::Threshold Filter;
|
||||||
@ -317,7 +325,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -328,7 +336,7 @@ class BenchmarkFilters
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(Threshold, BenchThreshold);
|
VTKM_MAKE_BENCHMARK(Threshold, BenchThreshold);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchThresholdPoints
|
struct BenchThresholdPoints
|
||||||
{
|
{
|
||||||
bool CompactPoints;
|
bool CompactPoints;
|
||||||
@ -356,7 +364,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -368,7 +376,7 @@ class BenchmarkFilters
|
|||||||
VTKM_MAKE_BENCHMARK(ThresholdPoints, BenchThresholdPoints, false);
|
VTKM_MAKE_BENCHMARK(ThresholdPoints, BenchThresholdPoints, false);
|
||||||
VTKM_MAKE_BENCHMARK(ThresholdPointsCompact, BenchThresholdPoints, true);
|
VTKM_MAKE_BENCHMARK(ThresholdPointsCompact, BenchThresholdPoints, true);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchCellAverage
|
struct BenchCellAverage
|
||||||
{
|
{
|
||||||
vtkm::filter::CellAverage Filter;
|
vtkm::filter::CellAverage Filter;
|
||||||
@ -382,7 +390,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -393,7 +401,7 @@ class BenchmarkFilters
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(CellAverage, BenchCellAverage);
|
VTKM_MAKE_BENCHMARK(CellAverage, BenchCellAverage);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchPointAverage
|
struct BenchPointAverage
|
||||||
{
|
{
|
||||||
vtkm::filter::PointAverage Filter;
|
vtkm::filter::PointAverage Filter;
|
||||||
@ -407,7 +415,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -418,7 +426,7 @@ class BenchmarkFilters
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(PointAverage, BenchPointAverage);
|
VTKM_MAKE_BENCHMARK(PointAverage, BenchPointAverage);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchWarpScalar
|
struct BenchWarpScalar
|
||||||
{
|
{
|
||||||
vtkm::filter::WarpScalar Filter;
|
vtkm::filter::WarpScalar Filter;
|
||||||
@ -435,7 +443,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -446,7 +454,7 @@ class BenchmarkFilters
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(WarpScalar, BenchWarpScalar);
|
VTKM_MAKE_BENCHMARK(WarpScalar, BenchWarpScalar);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchWarpVector
|
struct BenchWarpVector
|
||||||
{
|
{
|
||||||
vtkm::filter::WarpVector Filter;
|
vtkm::filter::WarpVector Filter;
|
||||||
@ -462,7 +470,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -473,7 +481,7 @@ class BenchmarkFilters
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(WarpVector, BenchWarpVector);
|
VTKM_MAKE_BENCHMARK(WarpVector, BenchWarpVector);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchMarchingCubes
|
struct BenchMarchingCubes
|
||||||
{
|
{
|
||||||
vtkm::filter::MarchingCubes Filter;
|
vtkm::filter::MarchingCubes Filter;
|
||||||
@ -505,7 +513,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -535,7 +543,7 @@ class BenchmarkFilters
|
|||||||
VTKM_MAKE_BENCHMARK(MarchingCubes3FTT, BenchMarchingCubes, 3, false, true, true);
|
VTKM_MAKE_BENCHMARK(MarchingCubes3FTT, BenchMarchingCubes, 3, false, true, true);
|
||||||
VTKM_MAKE_BENCHMARK(MarchingCubes12FTT, BenchMarchingCubes, 12, false, true, true);
|
VTKM_MAKE_BENCHMARK(MarchingCubes12FTT, BenchMarchingCubes, 12, false, true, true);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchExternalFaces
|
struct BenchExternalFaces
|
||||||
{
|
{
|
||||||
vtkm::filter::ExternalFaces Filter;
|
vtkm::filter::ExternalFaces Filter;
|
||||||
@ -550,7 +558,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -571,7 +579,7 @@ class BenchmarkFilters
|
|||||||
VTKM_MAKE_BENCHMARK(ExternalFaces, BenchExternalFaces, false);
|
VTKM_MAKE_BENCHMARK(ExternalFaces, BenchExternalFaces, false);
|
||||||
VTKM_MAKE_BENCHMARK(ExternalFacesCompact, BenchExternalFaces, true);
|
VTKM_MAKE_BENCHMARK(ExternalFacesCompact, BenchExternalFaces, true);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchTetrahedralize
|
struct BenchTetrahedralize
|
||||||
{
|
{
|
||||||
vtkm::filter::Tetrahedralize Filter;
|
vtkm::filter::Tetrahedralize Filter;
|
||||||
@ -585,7 +593,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -596,7 +604,7 @@ class BenchmarkFilters
|
|||||||
};
|
};
|
||||||
VTKM_MAKE_BENCHMARK(Tetrahedralize, BenchTetrahedralize);
|
VTKM_MAKE_BENCHMARK(Tetrahedralize, BenchTetrahedralize);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchVertexClustering
|
struct BenchVertexClustering
|
||||||
{
|
{
|
||||||
vtkm::filter::VertexClustering Filter;
|
vtkm::filter::VertexClustering Filter;
|
||||||
@ -611,7 +619,7 @@ class BenchmarkFilters
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
this->Filter.Execute(InputDataSet, BenchmarkFilterPolicy());
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -633,7 +641,7 @@ class BenchmarkFilters
|
|||||||
VTKM_MAKE_BENCHMARK(VertexClustering512, BenchVertexClustering, 512);
|
VTKM_MAKE_BENCHMARK(VertexClustering512, BenchVertexClustering, 512);
|
||||||
VTKM_MAKE_BENCHMARK(VertexClustering1024, BenchVertexClustering, 1024);
|
VTKM_MAKE_BENCHMARK(VertexClustering1024, BenchVertexClustering, 1024);
|
||||||
|
|
||||||
template <typename>
|
template <typename, typename DeviceAdapter>
|
||||||
struct BenchCellToPoint
|
struct BenchCellToPoint
|
||||||
{
|
{
|
||||||
struct PrepareForInput
|
struct PrepareForInput
|
||||||
@ -661,10 +669,10 @@ class BenchmarkFilters
|
|||||||
vtkm::TopologyElementTagPoint{});
|
vtkm::TopologyElementTagPoint{});
|
||||||
}
|
}
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
cellSet.PrepareForInput(
|
cellSet.PrepareForInput(
|
||||||
Device{}, vtkm::TopologyElementTagCell{}, vtkm::TopologyElementTagPoint{});
|
DeviceAdapter(), vtkm::TopologyElementTagCell{}, vtkm::TopologyElementTagPoint{});
|
||||||
this->Time = timer.GetElapsedTime();
|
this->Time = timer.GetElapsedTime();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -692,7 +700,7 @@ class BenchmarkFilters
|
|||||||
VTKM_MAKE_BENCHMARK(CellToPoint, BenchCellToPoint);
|
VTKM_MAKE_BENCHMARK(CellToPoint, BenchCellToPoint);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static VTKM_CONT int Run(int benches)
|
static VTKM_CONT int Run(int benches, vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
// This has no influence on the benchmarks. See issue #286.
|
// This has no influence on the benchmarks. See issue #286.
|
||||||
auto dummyTypes = vtkm::ListTagBase<vtkm::Int32>{};
|
auto dummyTypes = vtkm::ListTagBase<vtkm::Int32>{};
|
||||||
@ -703,104 +711,104 @@ public:
|
|||||||
{
|
{
|
||||||
if (ReducedOptions)
|
if (ReducedOptions)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(GradientScalar, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientScalar, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientVector, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientVector, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientVectorRow, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientVectorRow, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientKitchenSink, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientKitchenSink, dummyTypes, id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(GradientScalar, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientScalar, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientVector, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientVector, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientVectorRow, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientVectorRow, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientPoint, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientPoint, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientDivergence, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientDivergence, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientVorticity, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientVorticity, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientQCriterion, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientQCriterion, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(GradientKitchenSink, dummyTypes);
|
VTKM_RUN_BENCHMARK(GradientKitchenSink, dummyTypes, id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::THRESHOLD)
|
if (benches & BenchmarkName::THRESHOLD)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(Threshold, dummyTypes);
|
VTKM_RUN_BENCHMARK(Threshold, dummyTypes, id);
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::THRESHOLD_POINTS)
|
if (benches & BenchmarkName::THRESHOLD_POINTS)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(ThresholdPoints, dummyTypes);
|
VTKM_RUN_BENCHMARK(ThresholdPoints, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(ThresholdPointsCompact, dummyTypes);
|
VTKM_RUN_BENCHMARK(ThresholdPointsCompact, dummyTypes, id);
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::CELL_AVERAGE)
|
if (benches & BenchmarkName::CELL_AVERAGE)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(CellAverage, dummyTypes);
|
VTKM_RUN_BENCHMARK(CellAverage, dummyTypes, id);
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::POINT_AVERAGE)
|
if (benches & BenchmarkName::POINT_AVERAGE)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(PointAverage, dummyTypes);
|
VTKM_RUN_BENCHMARK(PointAverage, dummyTypes, id);
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::WARP_SCALAR)
|
if (benches & BenchmarkName::WARP_SCALAR)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(WarpScalar, dummyTypes);
|
VTKM_RUN_BENCHMARK(WarpScalar, dummyTypes, id);
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::WARP_VECTOR)
|
if (benches & BenchmarkName::WARP_VECTOR)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(WarpVector, dummyTypes);
|
VTKM_RUN_BENCHMARK(WarpVector, dummyTypes, id);
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::MARCHING_CUBES)
|
if (benches & BenchmarkName::MARCHING_CUBES)
|
||||||
{
|
{
|
||||||
if (ReducedOptions)
|
if (ReducedOptions)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes1FFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes1FFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes12FFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes12FFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes12TFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes12TFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes12FTF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes12FTF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes12FTT, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes12FTT, dummyTypes, id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes1FFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes1FFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes3FFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes3FFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes12FFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes12FFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes1TFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes1TFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes3TFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes3TFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes12TFF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes12TFF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes1FTF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes1FTF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes3FTF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes3FTF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes12FTF, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes12FTF, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes1FTT, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes1FTT, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes3FTT, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes3FTT, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(MarchingCubes12FTT, dummyTypes);
|
VTKM_RUN_BENCHMARK(MarchingCubes12FTT, dummyTypes, id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::EXTERNAL_FACES)
|
if (benches & BenchmarkName::EXTERNAL_FACES)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(ExternalFaces, dummyTypes);
|
VTKM_RUN_BENCHMARK(ExternalFaces, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(ExternalFacesCompact, dummyTypes);
|
VTKM_RUN_BENCHMARK(ExternalFacesCompact, dummyTypes, id);
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::TETRAHEDRALIZE)
|
if (benches & BenchmarkName::TETRAHEDRALIZE)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(Tetrahedralize, dummyTypes);
|
VTKM_RUN_BENCHMARK(Tetrahedralize, dummyTypes, id);
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::VERTEX_CLUSTERING)
|
if (benches & BenchmarkName::VERTEX_CLUSTERING)
|
||||||
{
|
{
|
||||||
if (ReducedOptions)
|
if (ReducedOptions)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering32, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering32, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering256, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering256, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering1024, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering1024, dummyTypes, id);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering32, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering32, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering64, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering64, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering128, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering128, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering256, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering256, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering512, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering512, dummyTypes, id);
|
||||||
VTKM_RUN_BENCHMARK(VertexClustering1024, dummyTypes);
|
VTKM_RUN_BENCHMARK(VertexClustering1024, dummyTypes, id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (benches & BenchmarkName::CELL_TO_POINT)
|
if (benches & BenchmarkName::CELL_TO_POINT)
|
||||||
{
|
{
|
||||||
VTKM_RUN_BENCHMARK(CellToPoint, dummyTypes);
|
VTKM_RUN_BENCHMARK(CellToPoint, dummyTypes, id);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -936,7 +944,6 @@ void CreateFields(bool needPointScalars, bool needCellScalars, bool needPointVec
|
|||||||
|
|
||||||
PointVectorGenerator worklet(bounds);
|
PointVectorGenerator worklet(bounds);
|
||||||
vtkm::worklet::DispatcherMapField<PointVectorGenerator> dispatch(worklet);
|
vtkm::worklet::DispatcherMapField<PointVectorGenerator> dispatch(worklet);
|
||||||
dispatch.SetDevice(Device());
|
|
||||||
dispatch.Invoke(points, pvecs);
|
dispatch.Invoke(points, pvecs);
|
||||||
InputDataSet.AddField(
|
InputDataSet.AddField(
|
||||||
vtkm::cont::Field("GeneratedPointVectors", vtkm::cont::Field::Association::POINTS, pvecs));
|
vtkm::cont::Field("GeneratedPointVectors", vtkm::cont::Field::Association::POINTS, pvecs));
|
||||||
@ -1050,18 +1057,21 @@ void AssertFields(bool needPointScalars, bool needCellScalars, bool needPointVec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int BenchmarkBody(int argc, char* argv[])
|
int BenchmarkBody(const std::vector<std::string>& argv, vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
int numThreads = 1;
|
int numThreads = 1;
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
numThreads = tbb::task_scheduler_init::automatic;
|
if (id == vtkm::cont::DeviceAdapterTagTBB())
|
||||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
{
|
||||||
numThreads = omp_get_max_threads();
|
numThreads = tbb::task_scheduler_init::automatic;
|
||||||
#endif // TBB
|
}
|
||||||
|
#endif
|
||||||
// Force the requested device in case a tracker is used internally by a filter:
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
if (id == vtkm::cont::DeviceAdapterTagOpenMP())
|
||||||
tracker.ForceDevice(Device());
|
{
|
||||||
|
numThreads = omp_get_max_threads();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int benches = BenchmarkName::NONE;
|
int benches = BenchmarkName::NONE;
|
||||||
std::string filename;
|
std::string filename;
|
||||||
@ -1073,7 +1083,7 @@ int BenchmarkBody(int argc, char* argv[])
|
|||||||
|
|
||||||
ReducedOptions = false;
|
ReducedOptions = false;
|
||||||
|
|
||||||
for (int i = 1; i < argc; ++i)
|
for (size_t i = 0; i < argv.size(); ++i)
|
||||||
{
|
{
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||||
@ -1174,13 +1184,11 @@ int BenchmarkBody(int argc, char* argv[])
|
|||||||
else if (arg == "numthreads")
|
else if (arg == "numthreads")
|
||||||
{
|
{
|
||||||
++i;
|
++i;
|
||||||
if (Device{} == vtkm::cont::DeviceAdapterTagOpenMP{} ||
|
if (id == vtkm::cont::DeviceAdapterTagOpenMP() || id == vtkm::cont::DeviceAdapterTagTBB())
|
||||||
Device{} == vtkm::cont::DeviceAdapterTagTBB{})
|
|
||||||
{
|
{
|
||||||
std::istringstream parse(argv[i]);
|
std::istringstream parse(argv[i]);
|
||||||
parse >> numThreads;
|
parse >> numThreads;
|
||||||
std::cout << "Selected " << numThreads << " " << DevTraits::GetName() << " threads."
|
std::cout << "Selected " << numThreads << " " << id.GetName() << " threads." << std::endl;
|
||||||
<< std::endl;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1194,12 +1202,19 @@ int BenchmarkBody(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#ifdef VTKM_ENABLE_TBB
|
||||||
// Must not be destroyed as long as benchmarks are running:
|
// Must not be destroyed as long as benchmarks are running:
|
||||||
tbb::task_scheduler_init init(numThreads);
|
if (id == vtkm::cont::DeviceAdapterTagTBB())
|
||||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
{
|
||||||
omp_set_num_threads(numThreads);
|
tbb::task_scheduler_init init(numThreads);
|
||||||
#endif // TBB
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
|
if (id == vtkm::cont::DeviceAdapterTagOpenMP())
|
||||||
|
{
|
||||||
|
omp_set_num_threads(numThreads);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (benches == BenchmarkName::NONE)
|
if (benches == BenchmarkName::NONE)
|
||||||
{
|
{
|
||||||
@ -1222,7 +1237,10 @@ int BenchmarkBody(int argc, char* argv[])
|
|||||||
<< " wavelet...\n";
|
<< " wavelet...\n";
|
||||||
vtkm::worklet::WaveletGenerator gen;
|
vtkm::worklet::WaveletGenerator gen;
|
||||||
gen.SetExtent({ 0 }, { waveletDim });
|
gen.SetExtent({ 0 }, { waveletDim });
|
||||||
InputDataSet = gen.GenerateDataSet<Device>();
|
|
||||||
|
// WaveletGenerator needs a template device argument not a id to deduce the portal type.
|
||||||
|
WaveletGeneratorDataFunctor genFunctor;
|
||||||
|
vtkm::cont::TryExecuteOnDevice(id, genFunctor, gen);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tetra)
|
if (tetra)
|
||||||
@ -1271,7 +1289,7 @@ int BenchmarkBody(int argc, char* argv[])
|
|||||||
std::cout << "\n";
|
std::cout << "\n";
|
||||||
|
|
||||||
//now actually execute the benchmarks
|
//now actually execute the benchmarks
|
||||||
int result = BenchmarkFilters<Device>::Run(benches);
|
int result = BenchmarkFilters::Run(benches, id);
|
||||||
|
|
||||||
// Explicitly free resources before exit.
|
// Explicitly free resources before exit.
|
||||||
InputDataSet.Clear();
|
InputDataSet.Clear();
|
||||||
@ -1283,15 +1301,13 @@ int BenchmarkBody(int argc, char* argv[])
|
|||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
vtkm::cont::Initialize(argc, argv);
|
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
|
||||||
|
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
|
||||||
tracker.ForceDevice(Device{});
|
|
||||||
|
|
||||||
int retval = 1;
|
int retval = 1;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
retval = BenchmarkBody(argc, argv);
|
retval = BenchmarkBody(config.Arguments, config.Device);
|
||||||
}
|
}
|
||||||
catch (std::exception& e)
|
catch (std::exception& e)
|
||||||
{
|
{
|
||||||
|
@ -47,7 +47,7 @@ namespace vtkm
|
|||||||
namespace benchmarking
|
namespace benchmarking
|
||||||
{
|
{
|
||||||
|
|
||||||
template <typename Precision>
|
template <typename Precision, typename DeviceAdapter>
|
||||||
struct BenchRayTracing
|
struct BenchRayTracing
|
||||||
{
|
{
|
||||||
vtkm::rendering::raytracing::RayTracer Tracer;
|
vtkm::rendering::raytracing::RayTracer Tracer;
|
||||||
@ -118,9 +118,7 @@ struct BenchRayTracing
|
|||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
{
|
{
|
||||||
|
vtkm::cont::Timer timer{ DeviceAdapter() };
|
||||||
|
|
||||||
vtkm::cont::Timer timer;
|
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
RayCamera.CreateRays(Rays, Coords.GetBounds());
|
RayCamera.CreateRays(Rays, Coords.GetBounds());
|
||||||
@ -147,12 +145,9 @@ VTKM_MAKE_BENCHMARK(RayTracing, BenchRayTracing);
|
|||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
vtkm::cont::InitLogging(argc, argv);
|
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
|
||||||
|
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||||
|
|
||||||
using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG;
|
VTKM_RUN_BENCHMARK(RayTracing, vtkm::ListTagBase<vtkm::Float32>(), config.Device);
|
||||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
|
||||||
tracker.ForceDevice(Device{});
|
|
||||||
|
|
||||||
VTKM_RUN_BENCHMARK(RayTracing, vtkm::ListTagBase<vtkm::Float32>());
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -135,7 +135,6 @@ struct ValueTypes
|
|||||||
|
|
||||||
/// This class runs a series of micro-benchmarks to measure
|
/// This class runs a series of micro-benchmarks to measure
|
||||||
/// performance of different field operations
|
/// performance of different field operations
|
||||||
template <class DeviceAdapterTag>
|
|
||||||
class BenchmarkTopologyAlgorithms
|
class BenchmarkTopologyAlgorithms
|
||||||
{
|
{
|
||||||
using StorageTag = vtkm::cont::StorageTagBasic;
|
using StorageTag = vtkm::cont::StorageTagBasic;
|
||||||
@ -177,7 +176,7 @@ private:
|
|||||||
T next() { return distribution(rng); }
|
T next() { return distribution(rng); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchCellToPointAvg
|
struct BenchCellToPointAvg
|
||||||
{
|
{
|
||||||
std::vector<Value> input;
|
std::vector<Value> input;
|
||||||
@ -205,11 +204,10 @@ private:
|
|||||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
|
vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -229,8 +227,8 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchCellToPointAvgDynamic : public BenchCellToPointAvg<Value>
|
struct BenchCellToPointAvgDynamic : public BenchCellToPointAvg<Value, DeviceAdapter>
|
||||||
{
|
{
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
@ -242,11 +240,10 @@ private:
|
|||||||
ValueVariantHandle dinput(this->InputHandle);
|
ValueVariantHandle dinput(this->InputHandle);
|
||||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
|
vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(dinput, cellSet, result);
|
dispatcher.Invoke(dinput, cellSet, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -258,7 +255,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(CellToPointAvg, BenchCellToPointAvg);
|
VTKM_MAKE_BENCHMARK(CellToPointAvg, BenchCellToPointAvg);
|
||||||
VTKM_MAKE_BENCHMARK(CellToPointAvgDynamic, BenchCellToPointAvgDynamic);
|
VTKM_MAKE_BENCHMARK(CellToPointAvgDynamic, BenchCellToPointAvgDynamic);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchPointToCellAvg
|
struct BenchPointToCellAvg
|
||||||
{
|
{
|
||||||
std::vector<Value> input;
|
std::vector<Value> input;
|
||||||
@ -286,11 +283,10 @@ private:
|
|||||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
|
vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -310,8 +306,8 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchPointToCellAvgDynamic : public BenchPointToCellAvg<Value>
|
struct BenchPointToCellAvgDynamic : public BenchPointToCellAvg<Value, DeviceAdapter>
|
||||||
{
|
{
|
||||||
|
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
@ -323,11 +319,10 @@ private:
|
|||||||
ValueVariantHandle dinput(this->InputHandle);
|
ValueVariantHandle dinput(this->InputHandle);
|
||||||
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
vtkm::cont::ArrayHandle<Value, StorageTag> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
|
vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(dinput, cellSet, result);
|
dispatcher.Invoke(dinput, cellSet, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -339,7 +334,7 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(PointToCellAvg, BenchPointToCellAvg);
|
VTKM_MAKE_BENCHMARK(PointToCellAvg, BenchPointToCellAvg);
|
||||||
VTKM_MAKE_BENCHMARK(PointToCellAvgDynamic, BenchPointToCellAvgDynamic);
|
VTKM_MAKE_BENCHMARK(PointToCellAvgDynamic, BenchPointToCellAvgDynamic);
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchClassification
|
struct BenchClassification
|
||||||
{
|
{
|
||||||
std::vector<Value> input;
|
std::vector<Value> input;
|
||||||
@ -371,12 +366,11 @@ private:
|
|||||||
|
|
||||||
ValueVariantHandle dinput(this->InputHandle);
|
ValueVariantHandle dinput(this->InputHandle);
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
Classification<Value> worklet(this->IsoValue);
|
Classification<Value> worklet(this->IsoValue);
|
||||||
vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
|
vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(dinput, cellSet, result);
|
dispatcher.Invoke(dinput, cellSet, result);
|
||||||
|
|
||||||
return timer.GetElapsedTime();
|
return timer.GetElapsedTime();
|
||||||
@ -396,8 +390,8 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value, typename DeviceAdapter>
|
||||||
struct BenchClassificationDynamic : public BenchClassification<Value>
|
struct BenchClassificationDynamic : public BenchClassification<Value, DeviceAdapter>
|
||||||
{
|
{
|
||||||
VTKM_CONT
|
VTKM_CONT
|
||||||
vtkm::Float64 operator()()
|
vtkm::Float64 operator()()
|
||||||
@ -406,12 +400,11 @@ private:
|
|||||||
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
|
||||||
vtkm::cont::ArrayHandle<vtkm::IdComponent, StorageTag> result;
|
vtkm::cont::ArrayHandle<vtkm::IdComponent, StorageTag> result;
|
||||||
|
|
||||||
Timer timer{ DeviceAdapterTag() };
|
Timer timer{ DeviceAdapter() };
|
||||||
timer.Start();
|
timer.Start();
|
||||||
|
|
||||||
Classification<Value> worklet(this->IsoValue);
|
Classification<Value> worklet(this->IsoValue);
|
||||||
vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
|
vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
|
||||||
dispatcher.SetDevice(DeviceAdapterTag());
|
|
||||||
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
dispatcher.Invoke(this->InputHandle, cellSet, result);
|
||||||
|
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
@ -425,29 +418,29 @@ private:
|
|||||||
VTKM_MAKE_BENCHMARK(ClassificationDynamic, BenchClassificationDynamic);
|
VTKM_MAKE_BENCHMARK(ClassificationDynamic, BenchClassificationDynamic);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static VTKM_CONT int Run(int benchmarks)
|
static VTKM_CONT int Run(int benchmarks, vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nRunning Topology Algorithm benchmarks\n";
|
std::cout << DIVIDER << "\nRunning Topology Algorithm benchmarks\n";
|
||||||
|
|
||||||
if (benchmarks & CELL_TO_POINT)
|
if (benchmarks & CELL_TO_POINT)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nBenchmarking Cell To Point Average\n";
|
std::cout << DIVIDER << "\nBenchmarking Cell To Point Average\n";
|
||||||
VTKM_RUN_BENCHMARK(CellToPointAvg, ValueTypes());
|
VTKM_RUN_BENCHMARK(CellToPointAvg, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(CellToPointAvgDynamic, ValueTypes());
|
VTKM_RUN_BENCHMARK(CellToPointAvgDynamic, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (benchmarks & POINT_TO_CELL)
|
if (benchmarks & POINT_TO_CELL)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nBenchmarking Point to Cell Average\n";
|
std::cout << DIVIDER << "\nBenchmarking Point to Cell Average\n";
|
||||||
VTKM_RUN_BENCHMARK(PointToCellAvg, ValueTypes());
|
VTKM_RUN_BENCHMARK(PointToCellAvg, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(PointToCellAvgDynamic, ValueTypes());
|
VTKM_RUN_BENCHMARK(PointToCellAvgDynamic, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (benchmarks & MC_CLASSIFY)
|
if (benchmarks & MC_CLASSIFY)
|
||||||
{
|
{
|
||||||
std::cout << DIVIDER << "\nBenchmarking Hex/Voxel MC Classification\n";
|
std::cout << DIVIDER << "\nBenchmarking Hex/Voxel MC Classification\n";
|
||||||
VTKM_RUN_BENCHMARK(Classification, ValueTypes());
|
VTKM_RUN_BENCHMARK(Classification, ValueTypes(), id);
|
||||||
VTKM_RUN_BENCHMARK(ClassificationDynamic, ValueTypes());
|
VTKM_RUN_BENCHMARK(ClassificationDynamic, ValueTypes(), id);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -460,18 +453,19 @@ public:
|
|||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
vtkm::cont::InitLogging(argc, argv);
|
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
|
||||||
|
auto config = vtkm::cont::Initialize(argc, argv, opts);
|
||||||
|
|
||||||
int benchmarks = 0;
|
int benchmarks = 0;
|
||||||
if (argc < 2)
|
if (!config.Arguments.size())
|
||||||
{
|
{
|
||||||
benchmarks = vtkm::benchmarking::ALL;
|
benchmarks = vtkm::benchmarking::ALL;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 1; i < argc; ++i)
|
for (size_t i = 0; i < config.Arguments.size(); ++i)
|
||||||
{
|
{
|
||||||
std::string arg = argv[i];
|
std::string arg = config.Arguments[i];
|
||||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||||
});
|
});
|
||||||
@ -489,16 +483,13 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cout << "Unrecognized benchmark: " << argv[i] << std::endl;
|
std::cout << "Unrecognized benchmark: " << config.Arguments[i] << std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//now actually execute the benchmarks
|
//now actually execute the benchmarks
|
||||||
using Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG;
|
|
||||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
|
||||||
tracker.ForceDevice(Device{});
|
|
||||||
|
|
||||||
return vtkm::benchmarking::BenchmarkTopologyAlgorithms<Device>::Run(benchmarks);
|
return vtkm::benchmarking::BenchmarkTopologyAlgorithms::Run(benchmarks, config.Device);
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,8 @@
|
|||||||
|
|
||||||
#include <vtkm/ListTag.h>
|
#include <vtkm/ListTag.h>
|
||||||
#include <vtkm/Math.h>
|
#include <vtkm/Math.h>
|
||||||
|
#include <vtkm/cont/TryExecute.h>
|
||||||
|
#include <vtkm/cont/internal/DeviceAdapterTag.h>
|
||||||
#include <vtkm/cont/testing/Testing.h>
|
#include <vtkm/cont/testing/Testing.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -83,17 +85,18 @@
|
|||||||
/*
|
/*
|
||||||
* Use the VTKM_MAKE_BENCHMARK macro to define a maker functor for your benchmark.
|
* Use the VTKM_MAKE_BENCHMARK macro to define a maker functor for your benchmark.
|
||||||
* This is used to allow you to template the benchmark functor on the type being benchmarked
|
* This is used to allow you to template the benchmark functor on the type being benchmarked
|
||||||
* so you can write init code in the constructor. Then the maker will return a constructed
|
* and the device adapter so you can write init code in the constructor. Then the maker will
|
||||||
* instance of your benchmark for the type being benchmarked. The VA_ARGS are used to
|
* return a constructed instance of your benchmark for the type being benchmarked.
|
||||||
* pass any extra arguments needed by your benchmark
|
* The VA_ARGS are used to pass any extra arguments needed by your benchmark
|
||||||
*/
|
*/
|
||||||
#define VTKM_MAKE_BENCHMARK(Name, Bench, ...) \
|
#define VTKM_MAKE_BENCHMARK(Name, Bench, ...) \
|
||||||
struct MakeBench##Name \
|
struct MakeBench##Name \
|
||||||
{ \
|
{ \
|
||||||
template <typename Value> \
|
template <typename Value, typename DeviceAdapter> \
|
||||||
VTKM_CONT Bench<Value> operator()(const Value vtkmNotUsed(v)) const \
|
VTKM_CONT Bench<Value, DeviceAdapter> operator()(const Value vtkmNotUsed(v), \
|
||||||
|
DeviceAdapter vtkmNotUsed(id)) const \
|
||||||
{ \
|
{ \
|
||||||
return Bench<Value>(__VA_ARGS__); \
|
return Bench<Value, DeviceAdapter>(__VA_ARGS__); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,8 +105,8 @@
|
|||||||
* You must have previously defined a maker functor with VTKM_MAKE_BENCHMARK that this
|
* You must have previously defined a maker functor with VTKM_MAKE_BENCHMARK that this
|
||||||
* macro will look for and use
|
* macro will look for and use
|
||||||
*/
|
*/
|
||||||
#define VTKM_RUN_BENCHMARK(Name, Types) \
|
#define VTKM_RUN_BENCHMARK(Name, Types, Id) \
|
||||||
vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types))
|
vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types), (Id))
|
||||||
|
|
||||||
namespace vtkm
|
namespace vtkm
|
||||||
{
|
{
|
||||||
@ -223,7 +226,7 @@ vtkm::Float64 MedianAbsDeviation(const std::vector<vtkm::Float64>& samples)
|
|||||||
* in seconds, this lets us avoid including any per-run setup time in the benchmark.
|
* in seconds, this lets us avoid including any per-run setup time in the benchmark.
|
||||||
* However any one-time setup should be done in the functor's constructor
|
* However any one-time setup should be done in the functor's constructor
|
||||||
*/
|
*/
|
||||||
class Benchmarker
|
struct Benchmarker
|
||||||
{
|
{
|
||||||
std::vector<vtkm::Float64> Samples;
|
std::vector<vtkm::Float64> Samples;
|
||||||
std::string BenchmarkName;
|
std::string BenchmarkName;
|
||||||
@ -286,11 +289,13 @@ public:
|
|||||||
<< "\tmax = " << this->Samples.back() << "s\n";
|
<< "\tmax = " << this->Samples.back() << "s\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Functor>
|
template <typename DeviceAdapter, typename MakerFunctor, typename T>
|
||||||
VTKM_CONT void operator()(Functor func)
|
VTKM_CONT bool operator()(DeviceAdapter id, MakerFunctor&& makerFunctor, T t)
|
||||||
{
|
{
|
||||||
|
auto func = makerFunctor(t, id);
|
||||||
this->GatherSamples(func);
|
this->GatherSamples(func);
|
||||||
this->PrintSummary();
|
this->PrintSummary();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
VTKM_CONT const std::vector<vtkm::Float64>& GetSamples() const { return this->Samples; }
|
VTKM_CONT const std::vector<vtkm::Float64>& GetSamples() const { return this->Samples; }
|
||||||
@ -315,13 +320,14 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
VTKM_CONT void operator()(T t) const
|
VTKM_CONT void operator()(T t, vtkm::cont::DeviceAdapterId id) const
|
||||||
{
|
{
|
||||||
std::cout << "*** " << vtkm::testing::TypeName<T>::Name() << " ***************" << std::endl;
|
std::cout << "*** " << vtkm::testing::TypeName<T>::Name() << " on device " << id.GetName()
|
||||||
|
<< " ***************" << std::endl;
|
||||||
Benchmarker bench;
|
Benchmarker bench;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
bench(Maker(t));
|
vtkm::cont::TryExecuteOnDevice(id, bench, Maker, t);
|
||||||
}
|
}
|
||||||
catch (std::exception& e)
|
catch (std::exception& e)
|
||||||
{
|
{
|
||||||
@ -333,9 +339,10 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class MakerFunctor, class TypeList>
|
template <class MakerFunctor, class TypeList>
|
||||||
VTKM_CONT void BenchmarkTypes(const MakerFunctor& maker, TypeList)
|
VTKM_CONT void BenchmarkTypes(MakerFunctor&& maker, TypeList, vtkm::cont::DeviceAdapterId id)
|
||||||
{
|
{
|
||||||
vtkm::ListForEach(InternalPrintTypeAndBench<MakerFunctor>(maker), TypeList());
|
vtkm::ListForEach(
|
||||||
|
InternalPrintTypeAndBench<MakerFunctor>(std::forward<MakerFunctor>(maker)), TypeList(), id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,43 +17,32 @@
|
|||||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
## this software.
|
## this software.
|
||||||
##============================================================================
|
##============================================================================
|
||||||
function(add_benchmark name files lib)
|
function(add_benchmark)
|
||||||
set(benchmarks )
|
set(options)
|
||||||
|
set(oneValueArgs NAME FILE)
|
||||||
add_executable(${name}_SERIAL ${files})
|
set(multiValueArgs LIBS)
|
||||||
list(APPEND benchmarks ${name}_SERIAL)
|
cmake_parse_arguments(VTKm_AB
|
||||||
target_compile_definitions(${name}_SERIAL PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_SERIAL")
|
"${options}" "${oneValueArgs}" "${multiValueArgs}"
|
||||||
|
${ARGN}
|
||||||
if (TARGET vtkm::tbb)
|
)
|
||||||
add_executable(${name}_TBB ${files})
|
set(exe_name ${VTKm_AB_NAME})
|
||||||
list(APPEND benchmarks ${name}_TBB)
|
|
||||||
target_compile_definitions(${name}_TBB PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_TBB")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (TARGET vtkm::openmp)
|
|
||||||
add_executable(${name}_OPENMP ${files})
|
|
||||||
list(APPEND benchmarks ${name}_OPENMP)
|
|
||||||
target_compile_definitions(${name}_OPENMP PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_OPENMP")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (TARGET vtkm::cuda)
|
if (TARGET vtkm::cuda)
|
||||||
get_filename_component(fname "${name}" NAME_WE)
|
get_filename_component(fname ${VTKm_AB_FILE} NAME_WE)
|
||||||
get_filename_component(fullpath "${name}.cxx" ABSOLUTE)
|
get_filename_component(fullpath ${VTKm_AB_FILE} ABSOLUTE)
|
||||||
file(GENERATE
|
file(GENERATE
|
||||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${fname}.cu
|
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${fname}.cu
|
||||||
CONTENT "#include \"${fullpath}\"")
|
CONTENT "#include \"${fullpath}\"")
|
||||||
|
|
||||||
add_executable(${name}_CUDA ${CMAKE_CURRENT_BINARY_DIR}/${fname}.cu)
|
add_executable(${exe_name} ${CMAKE_CURRENT_BINARY_DIR}/${fname}.cu)
|
||||||
list(APPEND benchmarks ${name}_CUDA)
|
set_property(TARGET ${exe_name} PROPERTY CUDA_SEPARABLE_COMPILATION ON)
|
||||||
target_compile_definitions(${name}_CUDA PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_CUDA")
|
else()
|
||||||
|
add_executable(${exe_name} ${VTKm_AB_FILE})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
foreach(benchmark ${benchmarks})
|
target_link_libraries(${exe_name} PRIVATE ${VTKm_AB_LIBS})
|
||||||
target_link_libraries(${benchmark} PRIVATE ${lib})
|
set_target_properties(${exe_name} PROPERTIES
|
||||||
set_target_properties(${benchmark} PROPERTIES
|
RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH}
|
||||||
RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH}
|
)
|
||||||
)
|
|
||||||
endforeach()
|
|
||||||
|
|
||||||
|
|
||||||
endfunction()
|
endfunction()
|
||||||
@ -69,9 +58,9 @@ set(benchmarks
|
|||||||
)
|
)
|
||||||
|
|
||||||
foreach (benchmark ${benchmarks})
|
foreach (benchmark ${benchmarks})
|
||||||
add_benchmark(${benchmark} ${benchmark}.cxx vtkm_filter)
|
add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_filter vtkm_cont)
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
if(TARGET vtkm_rendering)
|
if(TARGET vtkm_rendering)
|
||||||
add_benchmark(BenchmarkRayTracing BenchmarkRayTracing.cxx vtkm_rendering)
|
add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering)
|
||||||
endif()
|
endif()
|
||||||
|
6
docs/changelog/merge-benchmark-executables.md
Normal file
6
docs/changelog/merge-benchmark-executables.md
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# Merge benchmark executables into a device dependent shared library
|
||||||
|
|
||||||
|
VTK-m has been updated to replace old per device benchmark executables with a device
|
||||||
|
dependent shared library so that it's able to accept a device adapter at runtime through
|
||||||
|
the "--device=" argument.
|
||||||
|
|
@ -122,7 +122,7 @@ struct ReduceFunctor
|
|||||||
U result;
|
U result;
|
||||||
|
|
||||||
ReduceFunctor()
|
ReduceFunctor()
|
||||||
: result(U(0))
|
: result(vtkm::TypeTraits<U>::ZeroInitialization())
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,12 +148,13 @@ struct ReduceByKeyFunctor
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename U>
|
||||||
struct ScanInclusiveResultFunctor
|
struct ScanInclusiveResultFunctor
|
||||||
{
|
{
|
||||||
T result;
|
U result;
|
||||||
|
|
||||||
ScanInclusiveResultFunctor()
|
ScanInclusiveResultFunctor()
|
||||||
: result(T(0))
|
: result(vtkm::TypeTraits<U>::ZeroInitialization())
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user