diff --git a/vtkm/benchmarking/BenchmarkDeviceAdapter.cxx b/vtkm/benchmarking/BenchmarkDeviceAdapter.cxx index 20a49e1a0..44cffefb2 100644 --- a/vtkm/benchmarking/BenchmarkDeviceAdapter.cxx +++ b/vtkm/benchmarking/BenchmarkDeviceAdapter.cxx @@ -22,9 +22,58 @@ #include -int BenchmarkDeviceAdapter(int, char *[]) +#include +#include +#include +#include + +int BenchmarkDeviceAdapter(int argc, char *argv[]) { - return vtkm::benchmarking::BenchmarkDeviceAdapter - ::Run(); + int benchmarks = 0; + if (argc < 2){ + benchmarks = vtkm::benchmarking::ALL; + } + else { + for (int i = 1; i < argc; ++i){ + std::string arg = argv[i]; + std::transform(arg.begin(), arg.end(), arg.begin(), ::tolower); + if (arg == "lowerbounds"){ + benchmarks |= vtkm::benchmarking::LOWER_BOUNDS; + } + else if (arg == "reduce"){ + benchmarks |= vtkm::benchmarking::REDUCE; + } + else if (arg == "reducebykey"){ + benchmarks |= vtkm::benchmarking::REDUCE_BY_KEY; + } + else if (arg == "scaninclusive"){ + benchmarks |= vtkm::benchmarking::SCAN_INCLUSIVE; + } + else if (arg == "scanexclusive"){ + benchmarks |= vtkm::benchmarking::SCAN_EXCLUSIVE; + } + else if (arg == "sort"){ + benchmarks |= vtkm::benchmarking::SORT; + } + else if (arg == "sortbykey"){ + benchmarks |= vtkm::benchmarking::SORT_BY_KEY; + } + else if (arg == "streamcompact"){ + benchmarks |= vtkm::benchmarking::STREAM_COMPACT; + } + else if (arg == "unique"){ + benchmarks |= vtkm::benchmarking::UNIQUE; + } + else if (arg == "upperbounds"){ + benchmarks |= vtkm::benchmarking::UPPER_BOUNDS; + } + else { + std::cout << "Unrecognized benchmark: " << argv[i] << std::endl; + return 1; + } + } + } + return vtkm::benchmarking::BenchmarkDeviceAdapter + ::Run(benchmarks); } diff --git a/vtkm/benchmarking/BenchmarkDeviceAdapter.h b/vtkm/benchmarking/BenchmarkDeviceAdapter.h index 9309ab79f..37ed747c3 100644 --- a/vtkm/benchmarking/BenchmarkDeviceAdapter.h +++ b/vtkm/benchmarking/BenchmarkDeviceAdapter.h @@ -33,10 +33,9 @@ #include #include #include - #include - #include +#include #include @@ -58,16 +57,30 @@ namespace vtkm { namespace benchmarking { -#define ARRAY_SIZE (1 << 20) +#define ARRAY_SIZE (1 << 21) const static std::string DIVIDER(40, '-'); +enum BenchmarkName { + LOWER_BOUNDS = 1, + REDUCE = 1 << 1, + REDUCE_BY_KEY = 1 << 2, + SCAN_INCLUSIVE = 1 << 3, + SCAN_EXCLUSIVE = 1 << 4, + SORT = 1 << 5, + SORT_BY_KEY = 1 << 6, + STREAM_COMPACT = 1 << 7, + UNIQUE = 1 << 8, + UPPER_BOUNDS = 1 << 9, + ALL = LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY | SCAN_INCLUSIVE + | SCAN_EXCLUSIVE | SORT | SORT_BY_KEY | STREAM_COMPACT | UNIQUE + | UPPER_BOUNDS +}; + /// This class runs a series of micro-benchmarks to measure /// performance of the parallel primitives provided by each /// device adapter -/// template -struct BenchmarkDeviceAdapter { -private: +class BenchmarkDeviceAdapter { typedef vtkm::cont::StorageTagBasic StorageTagBasic; typedef vtkm::cont::StorageTagBasic StorageTag; @@ -78,314 +91,470 @@ private: typedef vtkm::cont::Timer Timer; +public: + // Various kernels used by the different benchmarks to accelerate + // initialization of data + template + struct FillTestValueKernel : vtkm::exec::FunctorBase { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef typename ValueArrayHandle::template ExecutionTypes + ::Portal PortalType; + + PortalType Output; + + VTKM_CONT_EXPORT + FillTestValueKernel(PortalType out) : Output(out){} + + VTKM_EXEC_EXPORT void operator()(vtkm::Id i) const { + Output.Set(i, TestValue(i, Value())); + } + }; + + template + struct FillScaledTestValueKernel : vtkm::exec::FunctorBase { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef typename ValueArrayHandle::template ExecutionTypes + ::Portal PortalType; + + PortalType Output; + const vtkm::Id IdScale; + + VTKM_CONT_EXPORT + FillScaledTestValueKernel(vtkm::Id id_scale, PortalType out) : Output(out), IdScale(id_scale) {} + + VTKM_EXEC_EXPORT void operator()(vtkm::Id i) const { + Output.Set(i, TestValue(i * IdScale, Value())); + } + }; + + template + struct FillModuloTestValueKernel : vtkm::exec::FunctorBase { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef typename ValueArrayHandle::template ExecutionTypes + ::Portal PortalType; + + PortalType Output; + const vtkm::Id Modulus; + + VTKM_CONT_EXPORT + FillModuloTestValueKernel(vtkm::Id modulus, PortalType out) : Output(out), Modulus(modulus) {} + + VTKM_EXEC_EXPORT void operator()(vtkm::Id i) const { + Output.Set(i, TestValue(i % Modulus, Value())); + } + }; + + template + struct FillBinaryTestValueKernel : vtkm::exec::FunctorBase { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef typename ValueArrayHandle::template ExecutionTypes + ::Portal PortalType; + + PortalType Output; + const vtkm::Id Modulus; + + VTKM_CONT_EXPORT + FillBinaryTestValueKernel(vtkm::Id modulus, PortalType out) : Output(out), Modulus(modulus) {} + + VTKM_EXEC_EXPORT void operator()(vtkm::Id i) const { + Output.Set(i, i % Modulus == 0 ? TestValue(vtkm::Id(1), Value()) : Value()); + } + }; + +private: + template struct BenchLowerBounds { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + const vtkm::Id N_VALS; + ValueArrayHandle InputHandle, ValueHandle; + IdArrayHandle OutHandle; + + VTKM_CONT_EXPORT + BenchLowerBounds(vtkm::Id value_percent) : N_VALS((ARRAY_SIZE * value_percent) / 100) + { + Algorithm::Schedule(FillTestValueKernel( + InputHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + Algorithm::Schedule(FillScaledTestValueKernel(2, + ValueHandle.PrepareForOutput(N_VALS, DeviceAdapterTag())), N_VALS); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ Timer timer; + Algorithm::LowerBounds(InputHandle, ValueHandle, OutHandle); + return timer.GetElapsedTime(); + } - std::vector input(ARRAY_SIZE, Value()); - for (size_t i = 0; i < input.size(); ++i){ - input[i] = TestValue(vtkm::Id(i), Value()); - } - ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input); - - // We benchmark finding indices for the elements using various - // ratios of values to input from 5-30% of # of elements in input - for (size_t p = 5; p <= 30; p += 5){ - size_t n_vals = (ARRAY_SIZE * p) / 100; - std::vector values(n_vals, Value()); - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(2 * i), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - IdArrayHandle out_handle; - timer.Reset(); - Algorithm::LowerBounds(input_handle, value_handle, out_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "LowerBounds on " << ARRAY_SIZE << " input and " - << n_vals << " values took " << elapsed << "s\n"; - } + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "LowerBounds on " << ARRAY_SIZE << " input and " + << N_VALS << " values"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(LowerBounds5, BenchLowerBounds, 5); + VTKM_MAKE_BENCHMARK(LowerBounds10, BenchLowerBounds, 10); + VTKM_MAKE_BENCHMARK(LowerBounds15, BenchLowerBounds, 15); + VTKM_MAKE_BENCHMARK(LowerBounds20, BenchLowerBounds, 20); + VTKM_MAKE_BENCHMARK(LowerBounds25, BenchLowerBounds, 25); + VTKM_MAKE_BENCHMARK(LowerBounds30, BenchLowerBounds, 30); + template struct BenchReduce { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + ValueArrayHandle InputHandle; + + VTKM_CONT_EXPORT + BenchReduce(){ + Algorithm::Schedule(FillTestValueKernel( + InputHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ Timer timer; - std::vector input(ARRAY_SIZE, Value()); - for (size_t i = 0; i < input.size(); ++i){ - input[i] = TestValue(vtkm::Id(i), Value()); - } - ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input); - timer.Reset(); - Algorithm::Reduce(input_handle, Value()); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "Reduce on " << ARRAY_SIZE - << " values took " << elapsed << "s\n"; + Algorithm::Reduce(InputHandle, Value()); + return timer.GetElapsedTime(); + } + + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "Reduce on " << ARRAY_SIZE << " values"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(Reduce, BenchReduce); + template struct BenchReduceByKey { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + const vtkm::Id N_KEYS; + ValueArrayHandle ValueHandle, ValuesOut; + IdArrayHandle KeyHandle, KeysOut; + + VTKM_CONT_EXPORT + BenchReduceByKey(vtkm::Id key_percent) : N_KEYS((ARRAY_SIZE * key_percent) / 100) + { + Algorithm::Schedule(FillTestValueKernel( + ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + Algorithm::Schedule(FillModuloTestValueKernel(N_KEYS, + KeyHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + Algorithm::SortByKey(KeyHandle, ValueHandle); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ Timer timer; - // We benchmark 5% to 30% of ARRAY_SIZE keys in 5% increments - for (size_t p = 5; p <= 30; p += 5){ - size_t n_keys = (ARRAY_SIZE * p) / 100; - std::vector values(ARRAY_SIZE, Value()); - std::vector keys(ARRAY_SIZE, 0); - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(i), Value()); - keys[i] = vtkm::Id(i % n_keys); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - ValueArrayHandle values_out; - IdArrayHandle key_handle = vtkm::cont::make_ArrayHandle(keys); - IdArrayHandle keys_out; - Algorithm::SortByKey(key_handle, value_handle); - timer.Reset(); - Algorithm::ReduceByKey(key_handle, value_handle, keys_out, values_out, - vtkm::internal::Add()); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "ReduceByKey on " << ARRAY_SIZE - << " values with " << n_keys << " distinct vtkm::Id" - << " keys took " << elapsed << "s\n"; - } + Algorithm::ReduceByKey(KeyHandle, ValueHandle, KeysOut, ValuesOut, + vtkm::internal::Add()); + return timer.GetElapsedTime(); + } + + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "ReduceByKey on " << ARRAY_SIZE + << " values with " << N_KEYS << " distinct vtkm::Id keys"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(ReduceByKey5, BenchReduceByKey, 5); + VTKM_MAKE_BENCHMARK(ReduceByKey10, BenchReduceByKey, 10); + VTKM_MAKE_BENCHMARK(ReduceByKey15, BenchReduceByKey, 15); + VTKM_MAKE_BENCHMARK(ReduceByKey20, BenchReduceByKey, 20); + VTKM_MAKE_BENCHMARK(ReduceByKey25, BenchReduceByKey, 25); + VTKM_MAKE_BENCHMARK(ReduceByKey30, BenchReduceByKey, 30); + template struct BenchScanInclusive { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + ValueArrayHandle ValueHandle, OutHandle; + VTKM_CONT_EXPORT + BenchScanInclusive(){ + Algorithm::Schedule(FillTestValueKernel( + ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ Timer timer; - std::vector values(ARRAY_SIZE, Value()); - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(i), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - ValueArrayHandle out_handle; - timer.Reset(); - Algorithm::ScanInclusive(value_handle, out_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "ScanInclusive on " << ARRAY_SIZE - << " values took " << elapsed << "s\n"; + Algorithm::ScanInclusive(ValueHandle, OutHandle); + return timer.GetElapsedTime(); + } + + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "ScanInclusive on " << ARRAY_SIZE << " values"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(ScanInclusive, BenchScanInclusive); + template struct BenchScanExclusive { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + ValueArrayHandle ValueHandle, OutHandle; + + VTKM_CONT_EXPORT + BenchScanExclusive(){ + Algorithm::Schedule(FillTestValueKernel( + ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ Timer timer; - std::vector values(ARRAY_SIZE, Value()); - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(i), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - ValueArrayHandle out_handle; - timer.Reset(); - Algorithm::ScanExclusive(value_handle, out_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "ScanExclusive on " << ARRAY_SIZE - << " values took " << elapsed << "s\n"; + Algorithm::ScanExclusive(ValueHandle, OutHandle); + return timer.GetElapsedTime(); + } + + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "ScanExclusive on " << ARRAY_SIZE << " values"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(ScanExclusive, BenchScanExclusive); - /// This benchmark tests sort on a few configurations of data - /// sorted, reverse-ordered, almost sorted and random - /// TODO: Is it really worth testing all these possible configurations - /// of data? How often will we really care about anything besides unsorted data? + template struct BenchSort { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + std::vector Values; + ValueArrayHandle ValueHandle; + boost::mt19937 Rng; + + VTKM_CONT_EXPORT + BenchSort() : Values(ARRAY_SIZE, Value()) { + ValueHandle = vtkm::cont::make_ArrayHandle(Values); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ + for (size_t i = 0; i < Values.size(); ++i){ + ValueHandle.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value())); + } Timer timer; - std::vector values(ARRAY_SIZE, Value()); - // Test sort on already sorted data - { - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(i), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - timer.Reset(); - Algorithm::Sort(value_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "Sort on " << ARRAY_SIZE << " already sorted " - << " values took " << elapsed << "s\n"; - } - // Test sort on reverse-sorted data - { - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(values.size() - i), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - timer.Reset(); - Algorithm::Sort(value_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "Sort on " << ARRAY_SIZE << " reverse-ordered " - << " values took " << elapsed << "s\n"; - } - // Test on almost sorted data - { - size_t modulus = values.size() / 4; - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(i % modulus), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - timer.Reset(); - Algorithm::Sort(value_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "Sort on " << ARRAY_SIZE << " almost-sorted " - << " values took " << elapsed << "s\n"; - } - // Test on random data - { - boost::mt19937 rng; - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(rng()), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - timer.Reset(); - Algorithm::Sort(value_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "Sort on " << ARRAY_SIZE << " random " - << " values took " << elapsed << "s\n"; - } + Algorithm::Sort(ValueHandle); + return timer.GetElapsedTime(); + } + + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "Sort on " << ARRAY_SIZE << " random values"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(Sort, BenchSort); + template struct BenchSortByKey { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; - Timer timer; - boost::mt19937 rng; - // We benchmark 5% to 30% of ARRAY_SIZE keys in 5% increments - for (size_t p = 5; p <= 30; p += 5){ - size_t n_keys = (ARRAY_SIZE * p) / 100; - std::vector values(ARRAY_SIZE, Value()); - std::vector keys(ARRAY_SIZE, 0); - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(rng()), Value()); - keys[i] = vtkm::Id(i % n_keys); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - IdArrayHandle key_handle = vtkm::cont::make_ArrayHandle(keys); - timer.Reset(); - Algorithm::SortByKey(value_handle, key_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "SortByKey on " << ARRAY_SIZE - << " random values with " << n_keys << " different vtkm::Id keys took " - << elapsed << "s\n"; + boost::mt19937 Rng; + vtkm::Id N_KEYS; + std::vector Values; + ValueArrayHandle ValueHandle; + IdArrayHandle KeyHandle; + + VTKM_CONT_EXPORT + BenchSortByKey(vtkm::Id percent_key) : N_KEYS((ARRAY_SIZE * percent_key) / 100), + Values(ARRAY_SIZE, Value()) + { + ValueHandle = vtkm::cont::make_ArrayHandle(Values); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ + for (size_t i = 0; i < Values.size(); ++i){ + ValueHandle.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value())); } + Algorithm::Schedule(FillModuloTestValueKernel(N_KEYS, + KeyHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + Timer timer; + Algorithm::SortByKey(ValueHandle, KeyHandle); + return timer.GetElapsedTime(); + } + + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "SortByKey on " << ARRAY_SIZE + << " random values with " << N_KEYS << " different vtkm::Id keys"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(SortByKey5, BenchSortByKey, 5); + VTKM_MAKE_BENCHMARK(SortByKey10, BenchSortByKey, 10); + VTKM_MAKE_BENCHMARK(SortByKey15, BenchSortByKey, 15); + VTKM_MAKE_BENCHMARK(SortByKey20, BenchSortByKey, 20); + VTKM_MAKE_BENCHMARK(SortByKey25, BenchSortByKey, 25); + VTKM_MAKE_BENCHMARK(SortByKey30, BenchSortByKey, 30); + template struct BenchStreamCompact { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + const vtkm::Id N_VALID; + ValueArrayHandle ValueHandle; + IdArrayHandle OutHandle; + + VTKM_CONT_EXPORT + BenchStreamCompact(vtkm::Id percent_valid) : N_VALID((ARRAY_SIZE * percent_valid) / 100) + { + vtkm::Id modulo = ARRAY_SIZE / N_VALID; + Algorithm::Schedule(FillBinaryTestValueKernel(modulo, + ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()() { Timer timer; - // We benchmark 5% to 30% valid values in 5% increments - for (size_t p = 5; p <= 30; p += 5){ - size_t n_valid = (ARRAY_SIZE * p) / 100; - size_t modulo = ARRAY_SIZE / n_valid; - std::vector values(ARRAY_SIZE, Value()); - for (size_t i = 0; i < values.size(); ++i){ - values[i] = i % modulo == 0 ? TestValue(1, Value()) : Value(); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - IdArrayHandle out_handle; - timer.Reset(); - Algorithm::StreamCompact(value_handle, out_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "StreamCompact on " << ARRAY_SIZE << " " - << " values with " << out_handle.GetNumberOfValues() - << " valid values took " << elapsed << "s\n"; + Algorithm::StreamCompact(ValueHandle, OutHandle); + return timer.GetElapsedTime(); + } - std::vector stencil(ARRAY_SIZE, 0); - for (size_t i = 0; i < stencil.size(); ++i){ - stencil[i] = i % modulo == 0 ? 1 : vtkm::Id(); - } - IdArrayHandle stencil_handle = vtkm::cont::make_ArrayHandle(stencil); - ValueArrayHandle out_val_handle; - timer.Reset(); - Algorithm::StreamCompact(value_handle, stencil_handle, out_val_handle); - elapsed = timer.GetElapsedTime(); - std::cout << "StreamCompact with stencil on " << ARRAY_SIZE - << " values with " << out_val_handle.GetNumberOfValues() - << " valid values took " << elapsed << "s\n"; - } + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "StreamCompact on " << ARRAY_SIZE << " " + << " values with " << OutHandle.GetNumberOfValues() + << " valid values"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(StreamCompact5, BenchStreamCompact, 5); + VTKM_MAKE_BENCHMARK(StreamCompact10, BenchStreamCompact, 10); + VTKM_MAKE_BENCHMARK(StreamCompact15, BenchStreamCompact, 15); + VTKM_MAKE_BENCHMARK(StreamCompact20, BenchStreamCompact, 20); + VTKM_MAKE_BENCHMARK(StreamCompact25, BenchStreamCompact, 25); + VTKM_MAKE_BENCHMARK(StreamCompact30, BenchStreamCompact, 30); + template + struct BenchStreamCompactStencil { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + const vtkm::Id N_VALID; + ValueArrayHandle ValueHandle; + IdArrayHandle StencilHandle, OutHandle; + + VTKM_CONT_EXPORT + BenchStreamCompactStencil(vtkm::Id percent_valid) : N_VALID((ARRAY_SIZE * percent_valid) / 100) + { + vtkm::Id modulo = ARRAY_SIZE / N_VALID; + Algorithm::Schedule(FillTestValueKernel( + ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + Algorithm::Schdule(FillBinaryTestValueKernel(modulo, + StencilHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()() { + Timer timer; + Algorithm::StreamCompact(ValueHandle, StencilHandle, OutHandle); + return timer.GetElapsedTime(); + } + + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "StreamCompactStencil on " << ARRAY_SIZE << " " + << " values with " << OutHandle.GetNumberOfValues() + << " valid values"; + return description.str(); + } + }; + VTKM_MAKE_BENCHMARK(StreamCompactStencil5, BenchStreamCompactStencil, 5); + VTKM_MAKE_BENCHMARK(StreamCompactStencil10, BenchStreamCompactStencil, 10); + VTKM_MAKE_BENCHMARK(StreamCompactStencil15, BenchStreamCompactStencil, 15); + VTKM_MAKE_BENCHMARK(StreamCompactStencil20, BenchStreamCompactStencil, 20); + VTKM_MAKE_BENCHMARK(StreamCompactStencil25, BenchStreamCompactStencil, 25); + VTKM_MAKE_BENCHMARK(StreamCompactStencil30, BenchStreamCompactStencil, 30); + + template struct BenchUnique { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + const vtkm::Id N_VALID; + ValueArrayHandle ValueHandle; + + VTKM_CONT_EXPORT + BenchUnique(vtkm::Id percent_valid) : N_VALID((ARRAY_SIZE * percent_valid) / 100) + {} + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ + Algorithm::Schedule(FillModuloTestValueKernel(N_VALID, + ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + Algorithm::Sort(ValueHandle); Timer timer; - // We benchmark 5% to 30% valid values in 5% increments - for (size_t p = 5; p <= 30; p += 5){ - size_t n_valid = (ARRAY_SIZE * p) / 100; - std::vector values(ARRAY_SIZE, Value()); - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(i % n_valid), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - Algorithm::Sort(value_handle); - timer.Reset(); - Algorithm::Unique(value_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "Unique on " << ARRAY_SIZE << " values with " - << value_handle.GetNumberOfValues() << " valid values took " - << elapsed << "s\n"; - } + Algorithm::Unique(ValueHandle); + return timer.GetElapsedTime(); + } + + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "Unique on " << ARRAY_SIZE << " values with " + << ValueHandle.GetNumberOfValues() << " valid values"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(Unique5, BenchUnique, 5); + VTKM_MAKE_BENCHMARK(Unique10, BenchUnique, 10); + VTKM_MAKE_BENCHMARK(Unique15, BenchUnique, 15); + VTKM_MAKE_BENCHMARK(Unique20, BenchUnique, 20); + VTKM_MAKE_BENCHMARK(Unique25, BenchUnique, 25); + VTKM_MAKE_BENCHMARK(Unique30, BenchUnique, 30); + template struct BenchUpperBounds { - template - VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { - typedef vtkm::cont::ArrayHandle ValueArrayHandle; + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + const vtkm::Id N_VALS; + ValueArrayHandle InputHandle, ValueHandle; + IdArrayHandle OutHandle; + + VTKM_CONT_EXPORT + BenchUpperBounds(vtkm::Id percent_vals) : N_VALS((ARRAY_SIZE * percent_vals) / 100) + { + Algorithm::Schedule(FillTestValueKernel( + InputHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())), ARRAY_SIZE); + Algorithm::Schedule(FillScaledTestValueKernel(2, + ValueHandle.PrepareForOutput(N_VALS, DeviceAdapterTag())), N_VALS); + } + + VTKM_CONT_EXPORT + vtkm::Float64 operator()(){ Timer timer; - std::vector input(ARRAY_SIZE, Value()); - for (size_t i = 0; i < input.size(); ++i){ - input[i] = TestValue(vtkm::Id(i), Value()); - } - ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input); + Algorithm::UpperBounds(InputHandle, ValueHandle, OutHandle); + return timer.GetElapsedTime(); + } - // We benchmark finding indices for the elements using various - // ratios of values to input from 5-30% of # of elements in input - for (size_t p = 5; p <= 30; p += 5){ - size_t n_vals = (ARRAY_SIZE * p) / 100; - std::vector values(n_vals, Value()); - for (size_t i = 0; i < values.size(); ++i){ - values[i] = TestValue(vtkm::Id(2 * i), Value()); - } - ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); - IdArrayHandle out_handle; - timer.Reset(); - Algorithm::UpperBounds(input_handle, value_handle, out_handle); - vtkm::Float64 elapsed = timer.GetElapsedTime(); - std::cout << "UpperBounds on " << ARRAY_SIZE << " input and " - << n_vals << " values took " << elapsed << "s\n"; - } + VTKM_CONT_EXPORT + std::string Description() const { + std::stringstream description; + description << "UpperBounds on " << ARRAY_SIZE << " input and " + << N_VALS << " values"; + return description.str(); } }; + VTKM_MAKE_BENCHMARK(UpperBounds5, BenchUpperBounds, 5); + VTKM_MAKE_BENCHMARK(UpperBounds10, BenchUpperBounds, 10); + VTKM_MAKE_BENCHMARK(UpperBounds15, BenchUpperBounds, 15); + VTKM_MAKE_BENCHMARK(UpperBounds20, BenchUpperBounds, 20); + VTKM_MAKE_BENCHMARK(UpperBounds25, BenchUpperBounds, 25); + VTKM_MAKE_BENCHMARK(UpperBounds30, BenchUpperBounds, 30); public: @@ -395,40 +564,89 @@ public: vtkm::Float64, vtkm::Vec, vtkm::Vec >{}; + static VTKM_CONT_EXPORT int Run(int benchmarks){ + std::cout << DIVIDER << "\nRunning DeviceAdapter benchmarks\n"; - static VTKM_CONT_EXPORT int Run(){ - std::cout << DIVIDER << "\nRunning DeviceAdapter benchmarks\n"; - + if (benchmarks & LOWER_BOUNDS){ std::cout << DIVIDER << "\nBenchmarking LowerBounds\n"; - vtkm::testing::Testing::TryTypes(BenchLowerBounds(), ValueTypes()); + VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes()); + VTKM_RUN_BENCHMARK(LowerBounds10, ValueTypes()); + VTKM_RUN_BENCHMARK(LowerBounds15, ValueTypes()); + VTKM_RUN_BENCHMARK(LowerBounds20, ValueTypes()); + VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes()); + VTKM_RUN_BENCHMARK(LowerBounds30, ValueTypes()); + } + if (benchmarks & REDUCE){ std::cout << "\n" << DIVIDER << "\nBenchmarking Reduce\n"; - vtkm::testing::Testing::TryTypes(BenchReduce(), ValueTypes()); + VTKM_RUN_BENCHMARK(Reduce, ValueTypes()); + } + if (benchmarks & REDUCE_BY_KEY){ std::cout << "\n" << DIVIDER << "\nBenchmarking ReduceByKey\n"; - vtkm::testing::Testing::TryTypes(BenchReduceByKey(), ValueTypes()); + VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes()); + VTKM_RUN_BENCHMARK(ReduceByKey10, ValueTypes()); + VTKM_RUN_BENCHMARK(ReduceByKey15, ValueTypes()); + VTKM_RUN_BENCHMARK(ReduceByKey20, ValueTypes()); + VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes()); + VTKM_RUN_BENCHMARK(ReduceByKey30, ValueTypes()); + } + if (benchmarks & SCAN_INCLUSIVE){ std::cout << "\n" << DIVIDER << "\nBenchmarking ScanInclusive\n"; - vtkm::testing::Testing::TryTypes(BenchScanInclusive(), ValueTypes()); + VTKM_RUN_BENCHMARK(ScanInclusive, ValueTypes()); + } + if (benchmarks & SCAN_EXCLUSIVE){ std::cout << "\n" << DIVIDER << "\nBenchmarking ScanExclusive\n"; - vtkm::testing::Testing::TryTypes(BenchScanExclusive(), ValueTypes()); + VTKM_RUN_BENCHMARK(ScanExclusive, ValueTypes()); + } + if (benchmarks & SORT){ std::cout << "\n" << DIVIDER << "\nBenchmarking Sort\n"; - vtkm::testing::Testing::TryTypes(BenchSort(), ValueTypes()); + VTKM_RUN_BENCHMARK(Sort, ValueTypes()); + } + if (benchmarks & SORT_BY_KEY){ std::cout << "\n" << DIVIDER << "\nBenchmarking SortByKey\n"; - vtkm::testing::Testing::TryTypes(BenchSortByKey(), ValueTypes()); + VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes()); + VTKM_RUN_BENCHMARK(SortByKey10, ValueTypes()); + VTKM_RUN_BENCHMARK(SortByKey15, ValueTypes()); + VTKM_RUN_BENCHMARK(SortByKey20, ValueTypes()); + VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes()); + VTKM_RUN_BENCHMARK(SortByKey30, ValueTypes()); + } + if (benchmarks & STREAM_COMPACT){ std::cout << "\n" << DIVIDER << "\nBenchmarking StreamCompact\n"; - vtkm::testing::Testing::TryTypes(BenchStreamCompact(), ValueTypes()); + VTKM_RUN_BENCHMARK(StreamCompact5, ValueTypes()); + VTKM_RUN_BENCHMARK(StreamCompact10, ValueTypes()); + VTKM_RUN_BENCHMARK(StreamCompact15, ValueTypes()); + VTKM_RUN_BENCHMARK(StreamCompact20, ValueTypes()); + VTKM_RUN_BENCHMARK(StreamCompact25, ValueTypes()); + VTKM_RUN_BENCHMARK(StreamCompact30, ValueTypes()); + } + if (benchmarks & UNIQUE){ std::cout << "\n" << DIVIDER << "\nBenchmarking Unique\n"; - vtkm::testing::Testing::TryTypes(BenchUnique(), ValueTypes()); + VTKM_RUN_BENCHMARK(Unique5, ValueTypes()); + VTKM_RUN_BENCHMARK(Unique10, ValueTypes()); + VTKM_RUN_BENCHMARK(Unique15, ValueTypes()); + VTKM_RUN_BENCHMARK(Unique20, ValueTypes()); + VTKM_RUN_BENCHMARK(Unique25, ValueTypes()); + VTKM_RUN_BENCHMARK(Unique30, ValueTypes()); + } + if (benchmarks & UPPER_BOUNDS){ std::cout << "\n" << DIVIDER << "\nBenchmarking UpperBounds\n"; - vtkm::testing::Testing::TryTypes(BenchUpperBounds(), ValueTypes()); - return 0; + VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes()); + VTKM_RUN_BENCHMARK(UpperBounds10, ValueTypes()); + VTKM_RUN_BENCHMARK(UpperBounds15, ValueTypes()); + VTKM_RUN_BENCHMARK(UpperBounds20, ValueTypes()); + VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes()); + VTKM_RUN_BENCHMARK(UpperBounds30, ValueTypes()); + } + return 0; } }; diff --git a/vtkm/benchmarking/Benchmarker.h b/vtkm/benchmarking/Benchmarker.h new file mode 100644 index 000000000..48b8915f1 --- /dev/null +++ b/vtkm/benchmarking/Benchmarker.h @@ -0,0 +1,256 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2014 Sandia Corporation. +// Copyright 2014 UT-Battelle, LLC. +// Copyright 2014 Los Alamos National Security. +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================================ + +#ifndef vtk_m_benchmarking_Benchmarker_h +#define vtk_m_benchmarking_Benchmarker_h + +#include + +#include +#include +#include + +/* + * Writing a Benchmark + * ------------------- + * To write a benchmark you must provide a functor that will run the operations + * you want to time and return the run time of those operations using the timer + * for the device. The benchmark should also be templated on the value type being + * operated on. Then use VTKM_MAKE_BENCHMARK to generate a maker functor and + * VTKM_RUN_BENCHMARK to run the benchmark on a list of types. + * + * For Example: + * + * template + * struct BenchSilly { + * // Setup anything that doesn't need to change per run in the constructor + * VTKM_CONT_EXPORT BenchSilly(){} + * + * // The overloaded call operator will run the operations being timed and + * // return the execution time + * VTKM_CONT_EXPORT + * vtkm::Float64 operator()(){ + * return 0.05; + * } + * + * // The benchmark must also provide a method describing itself, this is + * // used when printing out run time statistics + * VTKM_CONT_EXPORT + * std::string Description() const { + * return "A silly benchmark"; + * } + * }; + * + * // Now use the VTKM_MAKE_BENCHMARK macro to generate a maker functor for + * // your benchmark. This lets us generate the benchmark functor for each type + * // we want to test + * VTKM_MAKE_BENCHMARK(Silly, BenchSilly); + * + * // You can also optionally pass arguments to the constructor like so: + * // VTKM_MAKE_BENCHMARK(Blah, BenchBlah, 1, 2, 3); + * // Note that benchmark names (the first argument) must be unique so different + * // parameters to the constructor should have different names + * + * // We can now run our benchmark using VTKM_RUN_BENCHMARK, passing the + * // benchmark name and type list to run on + * int main(int, char**){ + * VTKM_RUN_BENCHMARK(Silly, vtkm::ListTagBase()); + * return 0; + * } + * + * Check out vtkm/benchmarking/BenchmarkDeviceAdapter.h for some example usage + */ + +/* + * Use the VTKM_MAKE_BENCHMARK macro to define a maker functor for your benchmark. + * This is used to allow you to template the benchmark functor on the type being benchmarked + * so you can write init code in the constructor. Then the maker will return a constructed + * instance of your benchmark for the type being benchmarked. The VA_ARGS are used to + * pass any extra arguments needed by your benchmark + */ +#define VTKM_MAKE_BENCHMARK(Name, Bench, ...) \ + struct MakeBench##Name { \ + template \ + VTKM_CONT_EXPORT \ + Bench operator()(const Value vtkmNotUsed(v)) const { \ + return Bench(__VA_ARGS__); \ + } \ + } + +/* + * Use the VTKM_RUN_BENCHMARK macro to run your benchmark on the type list passed. + * You must have previously defined a maker functor with VTKM_MAKE_BENCHMARK that this + * macro will look for and use + */ +#define VTKM_RUN_BENCHMARK(Name, Types) \ + vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types)) + +namespace vtkm { +namespace benchmarking { +namespace stats { + +// Get the value representing the `percent` percentile of the +// sorted samples using linear interpolation +vtkm::Float64 PercentileValue(const std::vector &samples, const vtkm::Float64 percent){ + VTKM_ASSERT_CONT(!samples.empty()); + if (samples.size() == 1){ + return samples.front(); + } + VTKM_ASSERT_CONT(percent >= 0.0); + VTKM_ASSERT_CONT(percent <= 100.0); + VTKM_ASSERT_CONT(std::is_sorted(samples.begin(), samples.end())); + if (percent == 100.0){ + return samples.back(); + } + // Find the two nearest percentile values and linearly + // interpolate between them + const vtkm::Float64 rank = percent / 100.0 * (static_cast(samples.size()) - 1.0); + const vtkm::Float64 low_rank = vtkm::Floor(rank); + const vtkm::Float64 dist = rank - low_rank; + const size_t k = static_cast(low_rank); + const vtkm::Float64 low = samples[k]; + const vtkm::Float64 high = samples[k + 1]; + return low + (high - low) * dist; +} +// Winsorize the samples to clean up any very extreme outliers +// Will replace all samples below `percent` and above 100 - `percent` percentiles +// with the value at the percentile +// NOTE: Assumes the samples have been sorted, as we make use of PercentileValue +void Winsorize(std::vector &samples, const vtkm::Float64 percent){ + const vtkm::Float64 low_percentile = PercentileValue(samples, percent); + const vtkm::Float64 high_percentile = PercentileValue(samples, 100.0 - percent); + for (std::vector::iterator it = samples.begin(); it != samples.end(); ++it){ + if (*it < low_percentile){ + *it = low_percentile; + } + else if (*it > high_percentile){ + *it = high_percentile; + } + } +} +// Compute the mean value of the dataset +vtkm::Float64 Mean(const std::vector &samples){ + vtkm::Float64 mean = 0; + for (std::vector::const_iterator it = samples.begin(); it != samples.end(); ++it){ + mean += *it; + } + return mean / static_cast(samples.size()); +} +// Compute the sample variance of the samples +vtkm::Float64 Variance(const std::vector &samples){ + vtkm::Float64 mean = Mean(samples); + vtkm::Float64 square_deviations = 0; + for (std::vector::const_iterator it = samples.begin(); it != samples.end(); ++it){ + square_deviations += vtkm::Pow(*it - mean, 2.0); + } + return square_deviations / (static_cast(samples.size()) - 1.0); +} +// Compute the standard deviation of the samples +vtkm::Float64 StandardDeviation(const std::vector &samples){ + return vtkm::Sqrt(Variance(samples)); +} +// Compute the median absolute deviation of the dataset +vtkm::Float64 MedianAbsDeviation(const std::vector &samples){ + std::vector abs_deviations; + abs_deviations.reserve(samples.size()); + const vtkm::Float64 median = PercentileValue(samples, 50.0); + for (std::vector::const_iterator it = samples.begin(); it != samples.end(); ++it){ + abs_deviations.push_back(vtkm::Abs(*it - median)); + } + return PercentileValue(abs_deviations, 50.0); +} +} // stats + +/* + * The benchmarker takes a functor to benchmark and runs it multiple times, + * printing out statistics of the run time at the end. + * The functor passed should return the run time of the thing being benchmarked + * in seconds, this lets us avoid including any per-run setup time in the benchmark. + * However any one-time setup should be done in the functor's constructor + */ +struct Benchmarker { + const vtkm::Float64 MAX_RUNTIME; + const size_t MAX_ITERATIONS; + + Benchmarker() : MAX_RUNTIME(1.5), MAX_ITERATIONS(500){} + + template + VTKM_CONT_EXPORT + void operator()(Functor func) const { + std::vector samples; + // Do a warm-up run. If the benchmark allocates any additional memory + // eg. storage for output results, this will let it do that and + // allow us to avoid measuring the allocation time in the actual benchmark run + func(); + + samples.reserve(MAX_ITERATIONS); + // Run each benchmark for MAX_RUNTIME seconds or MAX_ITERATIONS iterations, whichever + // takes less time. This kind of assumes that running for 500 iterations or 1.5s will give + // good statistics, but if median abs dev and/or std dev are too high both these limits + // could be increased + size_t iter = 0; + for (vtkm::Float64 elapsed = 0.0; elapsed < MAX_RUNTIME && iter < MAX_ITERATIONS; + elapsed += samples.back(), ++iter) + { + samples.push_back(func()); + } + std::sort(samples.begin(), samples.end()); + stats::Winsorize(samples, 5.0); + std::cout << "Benchmark \'" + << func.Description() << "\' results:\n" + << "\tmedian = " << stats::PercentileValue(samples, 50.0) << "s\n" + << "\tmedian abs dev = " << stats::MedianAbsDeviation(samples) << "s\n" + << "\tmean = " << stats::Mean(samples) << "s\n" + << "\tstd dev = " << stats::StandardDeviation(samples) << "s\n" + << "\tmin = " << samples.front() << "s\n" + << "\tmax = " << samples.back() << "s\n"; + } +}; + +template +class InternalPrintTypeAndBench { + MakerFunctor Maker; + +public: + VTKM_CONT_EXPORT + InternalPrintTypeAndBench(MakerFunctor maker) : Maker(maker) {} + + template + VTKM_CONT_EXPORT + void operator()(T t) const { + std::cout << "*** " + << vtkm::testing::TypeName::Name() + << " ***************" << std::endl; + Benchmarker bench; + bench(Maker(t)); + } +}; + +template +VTKM_CONT_EXPORT +void BenchmarkTypes(const MakerFunctor &maker, TypeList){ + vtkm::ListForEach(InternalPrintTypeAndBench(maker), TypeList()); +} + +} +} + +#endif +