vtk-m/benchmarking/BenchmarkAtomicArray.cxx

519 lines
18 KiB
C++
Raw Permalink Normal View History

2018-08-29 16:03:07 +00:00
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
2019-04-15 23:24:21 +00:00
//
2018-08-29 16:03:07 +00:00
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include "Benchmarker.h"
#include <vtkm/cont/Algorithm.h>
2018-08-29 16:03:07 +00:00
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/AtomicArray.h>
2019-04-03 20:28:31 +00:00
#include <vtkm/cont/DeviceAdapterTag.h>
#include <vtkm/cont/Initialize.h>
#include <vtkm/cont/Invoker.h>
2018-08-29 16:03:07 +00:00
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/worklet/WorkletMapField.h>
#include <vtkm/TypeTraits.h>
2018-08-29 16:03:07 +00:00
#include <sstream>
#include <string>
namespace
2018-08-29 16:03:07 +00:00
{
// Provide access to the requested device to the benchmark functions:
vtkm::cont::InitializeResult Config;
// Range for array sizes
static constexpr vtkm::Id ARRAY_SIZE_MIN = 1;
static constexpr vtkm::Id ARRAY_SIZE_MAX = 1 << 20;
2018-08-29 16:03:07 +00:00
// This is 32x larger than the largest array size.
static constexpr vtkm::Id NUM_WRITES = 33554432; // 2^25
static constexpr vtkm::Id STRIDE = 32;
// Benchmarks AtomicArray::Add such that each work index writes to adjacent indices.
struct AddSeqWorker : public vtkm::worklet::WorkletMapField
2018-08-29 16:03:07 +00:00
{
using ControlSignature = void(FieldIn, AtomicArrayInOut);
using ExecutionSignature = void(InputIndex, _1, _2);
template <typename T, typename AtomicPortal>
VTKM_EXEC void operator()(const vtkm::Id i, const T& val, AtomicPortal& portal) const
2018-08-29 16:03:07 +00:00
{
portal.Add(i % portal.GetNumberOfValues(), val);
}
};
2018-08-29 16:03:07 +00:00
template <typename ValueType>
void BenchAddSeq(benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
2018-08-29 16:03:07 +00:00
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
2018-08-29 16:03:07 +00:00
vtkm::cont::ArrayHandle<ValueType> atomicArray;
atomicArray.AllocateAndFill(numValues, vtkm::TypeTraits<ValueType>::ZeroInitialization());
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
timer.Start();
invoker(AddSeqWorker{}, ones, atomicArray);
timer.Stop();
2018-08-29 16:03:07 +00:00
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
const int64_t valsWritten = static_cast<int64_t>(numWrites);
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
state.SetItemsProcessed(valsWritten * iterations);
state.SetItemsProcessed(bytesWritten * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchAddSeq,
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX },
{ NUM_WRITES, NUM_WRITES } })
->ArgNames({ "AtomicsValues", "AtomicOps" }),
vtkm::cont::AtomicArrayTypeList);
// Provides a non-atomic baseline for BenchAddSeq
struct AddSeqBaselineWorker : public vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn, WholeArrayInOut);
using ExecutionSignature = void(InputIndex, _1, _2);
template <typename T, typename Portal>
VTKM_EXEC void operator()(const vtkm::Id i, const T& val, Portal& portal) const
2018-08-29 16:03:07 +00:00
{
const vtkm::Id j = i % portal.GetNumberOfValues();
portal.Set(j, portal.Get(j) + val);
}
};
2018-08-29 16:03:07 +00:00
template <typename ValueType>
void BenchAddSeqBaseline(benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
2018-08-29 16:03:07 +00:00
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
2018-08-29 16:03:07 +00:00
vtkm::cont::ArrayHandle<ValueType> array;
array.AllocateAndFill(numValues, vtkm::TypeTraits<ValueType>::ZeroInitialization());
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
timer.Start();
invoker(AddSeqBaselineWorker{}, ones, array);
timer.Stop();
2018-08-29 16:03:07 +00:00
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
const int64_t valsWritten = static_cast<int64_t>(numWrites);
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
state.SetItemsProcessed(valsWritten * iterations);
state.SetItemsProcessed(bytesWritten * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchAddSeqBaseline,
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX },
{ NUM_WRITES, NUM_WRITES } })
->ArgNames({ "Values", "Ops" }),
vtkm::cont::AtomicArrayTypeList);
// Benchmarks AtomicArray::Add such that each work index writes to a strided
// index ( floor(i / stride) + stride * (i % stride)
struct AddStrideWorker : public vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn, AtomicArrayInOut);
using ExecutionSignature = void(InputIndex, _1, _2);
vtkm::Id Stride;
AddStrideWorker(vtkm::Id stride)
: Stride{ stride }
2018-08-29 16:03:07 +00:00
{
}
2018-08-29 16:03:07 +00:00
template <typename T, typename AtomicPortal>
VTKM_EXEC void operator()(const vtkm::Id i, const T& val, AtomicPortal& portal) const
{
const vtkm::Id numVals = portal.GetNumberOfValues();
const vtkm::Id j = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
portal.Add(j, val);
}
};
2018-08-29 16:03:07 +00:00
template <typename ValueType>
void BenchAddStride(benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
const vtkm::Id stride = static_cast<vtkm::Id>(state.range(2));
2018-08-29 16:03:07 +00:00
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
vtkm::cont::ArrayHandle<ValueType> atomicArray;
atomicArray.AllocateAndFill(numValues, vtkm::TypeTraits<ValueType>::ZeroInitialization());
2018-08-29 16:03:07 +00:00
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
2018-08-29 16:03:07 +00:00
{
(void)_;
timer.Start();
invoker(AddStrideWorker{ stride }, ones, atomicArray);
timer.Stop();
2018-08-29 16:03:07 +00:00
state.SetIterationTime(timer.GetElapsedTime());
}
2018-08-29 16:03:07 +00:00
const int64_t iterations = static_cast<int64_t>(state.iterations());
const int64_t valsWritten = static_cast<int64_t>(numWrites);
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
state.SetItemsProcessed(valsWritten * iterations);
state.SetItemsProcessed(bytesWritten * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(
BenchAddStride,
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX }, { NUM_WRITES, NUM_WRITES }, { STRIDE, STRIDE } })
->ArgNames({ "AtomicsValues", "AtomicOps", "Stride" }),
vtkm::cont::AtomicArrayTypeList);
// Non-atomic baseline for AddStride
struct AddStrideBaselineWorker : public vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn, WholeArrayInOut);
using ExecutionSignature = void(InputIndex, _1, _2);
2018-08-29 16:03:07 +00:00
vtkm::Id Stride;
AddStrideBaselineWorker(vtkm::Id stride)
: Stride{ stride }
{
}
2018-08-29 16:03:07 +00:00
template <typename T, typename Portal>
VTKM_EXEC void operator()(const vtkm::Id i, const T& val, Portal& portal) const
2018-08-29 16:03:07 +00:00
{
const vtkm::Id numVals = portal.GetNumberOfValues();
const vtkm::Id j = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
portal.Set(j, portal.Get(j) + val);
}
};
2018-08-29 16:03:07 +00:00
template <typename ValueType>
void BenchAddStrideBaseline(benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
const vtkm::Id stride = static_cast<vtkm::Id>(state.range(2));
2018-08-29 16:03:07 +00:00
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
2018-08-29 16:03:07 +00:00
vtkm::cont::ArrayHandle<ValueType> array;
array.AllocateAndFill(numValues, vtkm::TypeTraits<ValueType>::ZeroInitialization());
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
timer.Start();
invoker(AddStrideBaselineWorker{ stride }, ones, array);
timer.Stop();
2018-08-29 16:03:07 +00:00
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
const int64_t valsWritten = static_cast<int64_t>(numWrites);
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
state.SetItemsProcessed(valsWritten * iterations);
state.SetItemsProcessed(bytesWritten * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(
BenchAddStrideBaseline,
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX }, { NUM_WRITES, NUM_WRITES }, { STRIDE, STRIDE } })
->ArgNames({ "Values", "Ops", "Stride" }),
vtkm::cont::AtomicArrayTypeList);
Change interface of atomic compare and swap The old atomic compare and swap operations (`vtkm::AtomicCompareAndSwap` and `vtkm::exec::AtomicArrayExecutionObject::CompareAndSwap`) had an order of arguments that was confusing. The order of the arguments was shared pointer (or index), desired value, expected value. Most people probably assume expected value comes before desired value. And this order conflicts with the order in the `std` methods, GCC atomics, and Kokkos. Change the interface of atomic operations to be patterned off the `std::atomic_compare_exchange` and `std::atomic<T>::compare_exchange` methods. First, these methods have a more intuitive order of parameters (shared pointer, expected, desired). Second, rather than take a value for the expected and return the actual old value, they take a pointer to the expected value (or reference in `AtomicArrayExecutionObject`) and modify this value in the case that it does not match the actual value. This makes it harder to mix up the expected and desired parameters. Also, because the methods return a bool indicating whether the value was changed, there is an additional benefit that compare-exchange loops are implemented easier. For example, consider you want to apply the function `MyOp` on a `sharedValue` atomically. With the old interface, you would have to do something like this. ```cpp T oldValue; T newValue; do { oldValue = *sharedValue; newValue = MyOp(oldValue); } while (vtkm::AtomicCompareAndSwap(sharedValue, newValue, oldValue) != oldValue); ``` With the new interface, this is simplfied to this. ```cpp T oldValue = *sharedValue; while (!vtkm::AtomicCompareExchange(sharedValue, &oldValue, MyOp(oldValue)); ```
2020-09-25 00:02:59 +00:00
// Benchmarks AtomicArray::CompareExchange such that each work index writes to adjacent
// indices.
struct CASSeqWorker : public vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn, AtomicArrayInOut);
using ExecutionSignature = void(InputIndex, _1, _2);
template <typename T, typename AtomicPortal>
VTKM_EXEC void operator()(const vtkm::Id i, const T& in, AtomicPortal& portal) const
2018-08-29 16:03:07 +00:00
{
const vtkm::Id idx = i % portal.GetNumberOfValues();
const T val = static_cast<T>(i) + in;
T oldVal = portal.Get(idx);
Change interface of atomic compare and swap The old atomic compare and swap operations (`vtkm::AtomicCompareAndSwap` and `vtkm::exec::AtomicArrayExecutionObject::CompareAndSwap`) had an order of arguments that was confusing. The order of the arguments was shared pointer (or index), desired value, expected value. Most people probably assume expected value comes before desired value. And this order conflicts with the order in the `std` methods, GCC atomics, and Kokkos. Change the interface of atomic operations to be patterned off the `std::atomic_compare_exchange` and `std::atomic<T>::compare_exchange` methods. First, these methods have a more intuitive order of parameters (shared pointer, expected, desired). Second, rather than take a value for the expected and return the actual old value, they take a pointer to the expected value (or reference in `AtomicArrayExecutionObject`) and modify this value in the case that it does not match the actual value. This makes it harder to mix up the expected and desired parameters. Also, because the methods return a bool indicating whether the value was changed, there is an additional benefit that compare-exchange loops are implemented easier. For example, consider you want to apply the function `MyOp` on a `sharedValue` atomically. With the old interface, you would have to do something like this. ```cpp T oldValue; T newValue; do { oldValue = *sharedValue; newValue = MyOp(oldValue); } while (vtkm::AtomicCompareAndSwap(sharedValue, newValue, oldValue) != oldValue); ``` With the new interface, this is simplfied to this. ```cpp T oldValue = *sharedValue; while (!vtkm::AtomicCompareExchange(sharedValue, &oldValue, MyOp(oldValue)); ```
2020-09-25 00:02:59 +00:00
while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
;
}
};
2018-08-29 16:03:07 +00:00
template <typename ValueType>
void BenchCASSeq(benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
2018-08-29 16:03:07 +00:00
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
2018-08-29 16:03:07 +00:00
vtkm::cont::ArrayHandle<ValueType> atomicArray;
atomicArray.AllocateAndFill(numValues, vtkm::TypeTraits<ValueType>::ZeroInitialization());
2018-08-29 16:03:07 +00:00
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
2018-08-29 16:03:07 +00:00
{
(void)_;
timer.Start();
invoker(CASSeqWorker{}, ones, atomicArray);
timer.Stop();
2018-08-29 16:03:07 +00:00
state.SetIterationTime(timer.GetElapsedTime());
}
2018-08-29 16:03:07 +00:00
const int64_t iterations = static_cast<int64_t>(state.iterations());
const int64_t valsWritten = static_cast<int64_t>(numWrites);
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
state.SetItemsProcessed(valsWritten * iterations);
state.SetItemsProcessed(bytesWritten * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCASSeq,
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX },
{ NUM_WRITES, NUM_WRITES } })
->ArgNames({ "AtomicsValues", "AtomicOps" }),
vtkm::cont::AtomicArrayTypeList);
// Provides a non-atomic baseline for BenchCASSeq
struct CASSeqBaselineWorker : public vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn, WholeArrayInOut);
using ExecutionSignature = void(InputIndex, _1, _2);
2018-08-29 16:03:07 +00:00
template <typename T, typename Portal>
VTKM_EXEC void operator()(const vtkm::Id i, const T& in, Portal& portal) const
{
const vtkm::Id idx = i % portal.GetNumberOfValues();
const T val = static_cast<T>(i) + in;
const T oldVal = portal.Get(idx);
portal.Set(idx, oldVal + val);
}
};
template <typename ValueType>
void BenchCASSeqBaseline(benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
2018-08-29 16:03:07 +00:00
vtkm::cont::ArrayHandle<ValueType> array;
array.AllocateAndFill(numValues, vtkm::TypeTraits<ValueType>::ZeroInitialization());
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
2018-08-29 16:03:07 +00:00
{
(void)_;
timer.Start();
invoker(CASSeqBaselineWorker{}, ones, array);
timer.Stop();
2018-08-29 16:03:07 +00:00
state.SetIterationTime(timer.GetElapsedTime());
}
2018-08-29 16:03:07 +00:00
const int64_t iterations = static_cast<int64_t>(state.iterations());
const int64_t valsWritten = static_cast<int64_t>(numWrites);
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
state.SetItemsProcessed(valsWritten * iterations);
state.SetItemsProcessed(bytesWritten * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCASSeqBaseline,
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX },
{ NUM_WRITES, NUM_WRITES } })
->ArgNames({ "Values", "Ops" }),
vtkm::cont::AtomicArrayTypeList);
Change interface of atomic compare and swap The old atomic compare and swap operations (`vtkm::AtomicCompareAndSwap` and `vtkm::exec::AtomicArrayExecutionObject::CompareAndSwap`) had an order of arguments that was confusing. The order of the arguments was shared pointer (or index), desired value, expected value. Most people probably assume expected value comes before desired value. And this order conflicts with the order in the `std` methods, GCC atomics, and Kokkos. Change the interface of atomic operations to be patterned off the `std::atomic_compare_exchange` and `std::atomic<T>::compare_exchange` methods. First, these methods have a more intuitive order of parameters (shared pointer, expected, desired). Second, rather than take a value for the expected and return the actual old value, they take a pointer to the expected value (or reference in `AtomicArrayExecutionObject`) and modify this value in the case that it does not match the actual value. This makes it harder to mix up the expected and desired parameters. Also, because the methods return a bool indicating whether the value was changed, there is an additional benefit that compare-exchange loops are implemented easier. For example, consider you want to apply the function `MyOp` on a `sharedValue` atomically. With the old interface, you would have to do something like this. ```cpp T oldValue; T newValue; do { oldValue = *sharedValue; newValue = MyOp(oldValue); } while (vtkm::AtomicCompareAndSwap(sharedValue, newValue, oldValue) != oldValue); ``` With the new interface, this is simplfied to this. ```cpp T oldValue = *sharedValue; while (!vtkm::AtomicCompareExchange(sharedValue, &oldValue, MyOp(oldValue)); ```
2020-09-25 00:02:59 +00:00
// Benchmarks AtomicArray::CompareExchange such that each work index writes to
// a strided index:
// ( floor(i / stride) + stride * (i % stride)
struct CASStrideWorker : public vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn, AtomicArrayInOut);
using ExecutionSignature = void(InputIndex, _1, _2);
2018-08-29 16:03:07 +00:00
vtkm::Id Stride;
CASStrideWorker(vtkm::Id stride)
: Stride{ stride }
{
}
2018-08-29 16:03:07 +00:00
template <typename T, typename AtomicPortal>
VTKM_EXEC void operator()(const vtkm::Id i, const T& in, AtomicPortal& portal) const
2018-08-29 16:03:07 +00:00
{
const vtkm::Id numVals = portal.GetNumberOfValues();
const vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
const T val = static_cast<T>(i) + in;
T oldVal = portal.Get(idx);
Change interface of atomic compare and swap The old atomic compare and swap operations (`vtkm::AtomicCompareAndSwap` and `vtkm::exec::AtomicArrayExecutionObject::CompareAndSwap`) had an order of arguments that was confusing. The order of the arguments was shared pointer (or index), desired value, expected value. Most people probably assume expected value comes before desired value. And this order conflicts with the order in the `std` methods, GCC atomics, and Kokkos. Change the interface of atomic operations to be patterned off the `std::atomic_compare_exchange` and `std::atomic<T>::compare_exchange` methods. First, these methods have a more intuitive order of parameters (shared pointer, expected, desired). Second, rather than take a value for the expected and return the actual old value, they take a pointer to the expected value (or reference in `AtomicArrayExecutionObject`) and modify this value in the case that it does not match the actual value. This makes it harder to mix up the expected and desired parameters. Also, because the methods return a bool indicating whether the value was changed, there is an additional benefit that compare-exchange loops are implemented easier. For example, consider you want to apply the function `MyOp` on a `sharedValue` atomically. With the old interface, you would have to do something like this. ```cpp T oldValue; T newValue; do { oldValue = *sharedValue; newValue = MyOp(oldValue); } while (vtkm::AtomicCompareAndSwap(sharedValue, newValue, oldValue) != oldValue); ``` With the new interface, this is simplfied to this. ```cpp T oldValue = *sharedValue; while (!vtkm::AtomicCompareExchange(sharedValue, &oldValue, MyOp(oldValue)); ```
2020-09-25 00:02:59 +00:00
while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
;
2018-08-29 16:03:07 +00:00
}
};
template <typename ValueType>
void BenchCASStride(benchmark::State& state)
2018-08-29 16:03:07 +00:00
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
const vtkm::Id stride = static_cast<vtkm::Id>(state.range(2));
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
vtkm::cont::ArrayHandle<ValueType> atomicArray;
atomicArray.AllocateAndFill(numValues, vtkm::TypeTraits<ValueType>::ZeroInitialization());
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
timer.Start();
invoker(CASStrideWorker{ stride }, ones, atomicArray);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
const int64_t valsWritten = static_cast<int64_t>(numWrites);
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
state.SetItemsProcessed(valsWritten * iterations);
state.SetItemsProcessed(bytesWritten * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(
BenchCASStride,
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX }, { NUM_WRITES, NUM_WRITES }, { STRIDE, STRIDE } })
->ArgNames({ "AtomicsValues", "AtomicOps", "Stride" }),
vtkm::cont::AtomicArrayTypeList);
// Non-atomic baseline for CASStride
struct CASStrideBaselineWorker : public vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn, AtomicArrayInOut);
using ExecutionSignature = void(InputIndex, _1, _2);
vtkm::Id Stride;
CASStrideBaselineWorker(vtkm::Id stride)
: Stride{ stride }
{
}
2018-08-29 16:03:07 +00:00
template <typename T, typename AtomicPortal>
VTKM_EXEC void operator()(const vtkm::Id i, const T& in, AtomicPortal& portal) const
2018-08-29 16:03:07 +00:00
{
const vtkm::Id numVals = portal.GetNumberOfValues();
const vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
const T val = static_cast<T>(i) + in;
T oldVal = portal.Get(idx);
portal.Set(idx, oldVal + val);
2018-08-29 16:03:07 +00:00
}
};
template <typename ValueType>
void BenchCASStrideBaseline(benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
const vtkm::Id stride = static_cast<vtkm::Id>(state.range(2));
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
vtkm::cont::ArrayHandle<ValueType> array;
array.AllocateAndFill(numValues, vtkm::TypeTraits<ValueType>::ZeroInitialization());
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
2018-08-29 16:03:07 +00:00
{
(void)_;
timer.Start();
invoker(CASStrideBaselineWorker{ stride }, ones, array);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
2018-08-29 16:03:07 +00:00
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
const int64_t valsWritten = static_cast<int64_t>(numWrites);
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
state.SetItemsProcessed(valsWritten * iterations);
state.SetItemsProcessed(bytesWritten * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(
BenchCASStrideBaseline,
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX }, { NUM_WRITES, NUM_WRITES }, { STRIDE, STRIDE } })
->ArgNames({ "AtomicsValues", "AtomicOps", "Stride" }),
vtkm::cont::AtomicArrayTypeList);
} // end anon namespace
int main(int argc, char* argv[])
{
// Parse VTK-m options:
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
2018-08-29 16:03:07 +00:00
}