28ecf3636d
The old atomic compare and swap operations (`vtkm::AtomicCompareAndSwap` and `vtkm::exec::AtomicArrayExecutionObject::CompareAndSwap`) had an order of arguments that was confusing. The order of the arguments was shared pointer (or index), desired value, expected value. Most people probably assume expected value comes before desired value. And this order conflicts with the order in the `std` methods, GCC atomics, and Kokkos. Change the interface of atomic operations to be patterned off the `std::atomic_compare_exchange` and `std::atomic<T>::compare_exchange` methods. First, these methods have a more intuitive order of parameters (shared pointer, expected, desired). Second, rather than take a value for the expected and return the actual old value, they take a pointer to the expected value (or reference in `AtomicArrayExecutionObject`) and modify this value in the case that it does not match the actual value. This makes it harder to mix up the expected and desired parameters. Also, because the methods return a bool indicating whether the value was changed, there is an additional benefit that compare-exchange loops are implemented easier. For example, consider you want to apply the function `MyOp` on a `sharedValue` atomically. With the old interface, you would have to do something like this. ```cpp T oldValue; T newValue; do { oldValue = *sharedValue; newValue = MyOp(oldValue); } while (vtkm::AtomicCompareAndSwap(sharedValue, newValue, oldValue) != oldValue); ``` With the new interface, this is simplfied to this. ```cpp T oldValue = *sharedValue; while (!vtkm::AtomicCompareExchange(sharedValue, &oldValue, MyOp(oldValue)); ```
522 lines
18 KiB
C++
522 lines
18 KiB
C++
//============================================================================
|
|
// Copyright (c) Kitware, Inc.
|
|
// All rights reserved.
|
|
// See LICENSE.txt for details.
|
|
//
|
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
// PURPOSE. See the above copyright notice for more information.
|
|
//============================================================================
|
|
|
|
#include "Benchmarker.h"
|
|
|
|
#include <vtkm/cont/ArrayHandle.h>
|
|
#include <vtkm/cont/AtomicArray.h>
|
|
#include <vtkm/cont/DeviceAdapterTag.h>
|
|
#include <vtkm/cont/Initialize.h>
|
|
#include <vtkm/cont/Invoker.h>
|
|
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
|
#include <vtkm/cont/Timer.h>
|
|
|
|
#include <vtkm/worklet/WorkletMapField.h>
|
|
|
|
#include <vtkm/TypeTraits.h>
|
|
|
|
#include <sstream>
|
|
#include <string>
|
|
|
|
namespace
|
|
{
|
|
|
|
// Provide access to the requested device to the benchmark functions:
|
|
vtkm::cont::InitializeResult Config;
|
|
|
|
// Range for array sizes
|
|
static constexpr vtkm::Id ARRAY_SIZE_MIN = 1;
|
|
static constexpr vtkm::Id ARRAY_SIZE_MAX = 1 << 20;
|
|
|
|
// This is 32x larger than the largest array size.
|
|
static constexpr vtkm::Id NUM_WRITES = 33554432; // 2^25
|
|
|
|
static constexpr vtkm::Id STRIDE = 32;
|
|
|
|
// Benchmarks AtomicArray::Add such that each work index writes to adjacent indices.
|
|
struct AddSeqWorker : public vtkm::worklet::WorkletMapField
|
|
{
|
|
using ControlSignature = void(FieldIn, AtomicArrayInOut);
|
|
using ExecutionSignature = void(InputIndex, _1, _2);
|
|
|
|
template <typename T, typename AtomicPortal>
|
|
VTKM_EXEC void operator()(const vtkm::Id i, const T& val, AtomicPortal& portal) const
|
|
{
|
|
portal.Add(i % portal.GetNumberOfValues(), val);
|
|
}
|
|
};
|
|
|
|
template <typename ValueType>
|
|
void BenchAddSeq(benchmark::State& state)
|
|
{
|
|
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
|
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
|
|
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
|
|
|
|
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> atomicArray;
|
|
vtkm::cont::Algorithm::Fill(
|
|
atomicArray, vtkm::TypeTraits<ValueType>::ZeroInitialization(), numValues);
|
|
|
|
vtkm::cont::Invoker invoker{ device };
|
|
vtkm::cont::Timer timer{ device };
|
|
for (auto _ : state)
|
|
{
|
|
(void)_;
|
|
timer.Start();
|
|
invoker(AddSeqWorker{}, ones, atomicArray);
|
|
timer.Stop();
|
|
|
|
state.SetIterationTime(timer.GetElapsedTime());
|
|
}
|
|
|
|
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
|
const int64_t valsWritten = static_cast<int64_t>(numWrites);
|
|
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
|
|
state.SetItemsProcessed(valsWritten * iterations);
|
|
state.SetItemsProcessed(bytesWritten * iterations);
|
|
}
|
|
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchAddSeq,
|
|
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX },
|
|
{ NUM_WRITES, NUM_WRITES } })
|
|
->ArgNames({ "AtomicsValues", "AtomicOps" }),
|
|
vtkm::cont::AtomicArrayTypeList);
|
|
|
|
// Provides a non-atomic baseline for BenchAddSeq
|
|
struct AddSeqBaselineWorker : public vtkm::worklet::WorkletMapField
|
|
{
|
|
using ControlSignature = void(FieldIn, WholeArrayInOut);
|
|
using ExecutionSignature = void(InputIndex, _1, _2);
|
|
|
|
template <typename T, typename Portal>
|
|
VTKM_EXEC void operator()(const vtkm::Id i, const T& val, Portal& portal) const
|
|
{
|
|
const vtkm::Id j = i % portal.GetNumberOfValues();
|
|
portal.Set(j, portal.Get(j) + val);
|
|
}
|
|
};
|
|
|
|
template <typename ValueType>
|
|
void BenchAddSeqBaseline(benchmark::State& state)
|
|
{
|
|
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
|
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
|
|
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
|
|
|
|
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> array;
|
|
vtkm::cont::Algorithm::Fill(array, vtkm::TypeTraits<ValueType>::ZeroInitialization(), numValues);
|
|
|
|
vtkm::cont::Invoker invoker{ device };
|
|
vtkm::cont::Timer timer{ device };
|
|
for (auto _ : state)
|
|
{
|
|
(void)_;
|
|
timer.Start();
|
|
invoker(AddSeqBaselineWorker{}, ones, array);
|
|
timer.Stop();
|
|
|
|
state.SetIterationTime(timer.GetElapsedTime());
|
|
}
|
|
|
|
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
|
const int64_t valsWritten = static_cast<int64_t>(numWrites);
|
|
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
|
|
state.SetItemsProcessed(valsWritten * iterations);
|
|
state.SetItemsProcessed(bytesWritten * iterations);
|
|
}
|
|
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchAddSeqBaseline,
|
|
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX },
|
|
{ NUM_WRITES, NUM_WRITES } })
|
|
->ArgNames({ "Values", "Ops" }),
|
|
vtkm::cont::AtomicArrayTypeList);
|
|
|
|
// Benchmarks AtomicArray::Add such that each work index writes to a strided
|
|
// index ( floor(i / stride) + stride * (i % stride)
|
|
struct AddStrideWorker : public vtkm::worklet::WorkletMapField
|
|
{
|
|
using ControlSignature = void(FieldIn, AtomicArrayInOut);
|
|
using ExecutionSignature = void(InputIndex, _1, _2);
|
|
|
|
vtkm::Id Stride;
|
|
|
|
AddStrideWorker(vtkm::Id stride)
|
|
: Stride{ stride }
|
|
{
|
|
}
|
|
|
|
template <typename T, typename AtomicPortal>
|
|
VTKM_EXEC void operator()(const vtkm::Id i, const T& val, AtomicPortal& portal) const
|
|
{
|
|
const vtkm::Id numVals = portal.GetNumberOfValues();
|
|
const vtkm::Id j = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
|
|
portal.Add(j, val);
|
|
}
|
|
};
|
|
|
|
template <typename ValueType>
|
|
void BenchAddStride(benchmark::State& state)
|
|
{
|
|
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
|
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
|
|
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
|
|
const vtkm::Id stride = static_cast<vtkm::Id>(state.range(2));
|
|
|
|
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> atomicArray;
|
|
vtkm::cont::Algorithm::Fill(
|
|
atomicArray, vtkm::TypeTraits<ValueType>::ZeroInitialization(), numValues);
|
|
|
|
vtkm::cont::Invoker invoker{ device };
|
|
vtkm::cont::Timer timer{ device };
|
|
for (auto _ : state)
|
|
{
|
|
(void)_;
|
|
timer.Start();
|
|
invoker(AddStrideWorker{ stride }, ones, atomicArray);
|
|
timer.Stop();
|
|
|
|
state.SetIterationTime(timer.GetElapsedTime());
|
|
}
|
|
|
|
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
|
const int64_t valsWritten = static_cast<int64_t>(numWrites);
|
|
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
|
|
state.SetItemsProcessed(valsWritten * iterations);
|
|
state.SetItemsProcessed(bytesWritten * iterations);
|
|
}
|
|
VTKM_BENCHMARK_TEMPLATES_OPTS(
|
|
BenchAddStride,
|
|
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX }, { NUM_WRITES, NUM_WRITES }, { STRIDE, STRIDE } })
|
|
->ArgNames({ "AtomicsValues", "AtomicOps", "Stride" }),
|
|
vtkm::cont::AtomicArrayTypeList);
|
|
|
|
// Non-atomic baseline for AddStride
|
|
struct AddStrideBaselineWorker : public vtkm::worklet::WorkletMapField
|
|
{
|
|
using ControlSignature = void(FieldIn, WholeArrayInOut);
|
|
using ExecutionSignature = void(InputIndex, _1, _2);
|
|
|
|
vtkm::Id Stride;
|
|
|
|
AddStrideBaselineWorker(vtkm::Id stride)
|
|
: Stride{ stride }
|
|
{
|
|
}
|
|
|
|
template <typename T, typename Portal>
|
|
VTKM_EXEC void operator()(const vtkm::Id i, const T& val, Portal& portal) const
|
|
{
|
|
const vtkm::Id numVals = portal.GetNumberOfValues();
|
|
const vtkm::Id j = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
|
|
portal.Set(j, portal.Get(j) + val);
|
|
}
|
|
};
|
|
|
|
template <typename ValueType>
|
|
void BenchAddStrideBaseline(benchmark::State& state)
|
|
{
|
|
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
|
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
|
|
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
|
|
const vtkm::Id stride = static_cast<vtkm::Id>(state.range(2));
|
|
|
|
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> array;
|
|
vtkm::cont::Algorithm::Fill(array, vtkm::TypeTraits<ValueType>::ZeroInitialization(), numValues);
|
|
|
|
vtkm::cont::Invoker invoker{ device };
|
|
vtkm::cont::Timer timer{ device };
|
|
for (auto _ : state)
|
|
{
|
|
(void)_;
|
|
timer.Start();
|
|
invoker(AddStrideBaselineWorker{ stride }, ones, array);
|
|
timer.Stop();
|
|
|
|
state.SetIterationTime(timer.GetElapsedTime());
|
|
}
|
|
|
|
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
|
const int64_t valsWritten = static_cast<int64_t>(numWrites);
|
|
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
|
|
state.SetItemsProcessed(valsWritten * iterations);
|
|
state.SetItemsProcessed(bytesWritten * iterations);
|
|
}
|
|
VTKM_BENCHMARK_TEMPLATES_OPTS(
|
|
BenchAddStrideBaseline,
|
|
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX }, { NUM_WRITES, NUM_WRITES }, { STRIDE, STRIDE } })
|
|
->ArgNames({ "Values", "Ops", "Stride" }),
|
|
vtkm::cont::AtomicArrayTypeList);
|
|
|
|
// Benchmarks AtomicArray::CompareExchange such that each work index writes to adjacent
|
|
// indices.
|
|
struct CASSeqWorker : public vtkm::worklet::WorkletMapField
|
|
{
|
|
using ControlSignature = void(FieldIn, AtomicArrayInOut);
|
|
using ExecutionSignature = void(InputIndex, _1, _2);
|
|
|
|
template <typename T, typename AtomicPortal>
|
|
VTKM_EXEC void operator()(const vtkm::Id i, const T& in, AtomicPortal& portal) const
|
|
{
|
|
const vtkm::Id idx = i % portal.GetNumberOfValues();
|
|
const T val = static_cast<T>(i) + in;
|
|
T oldVal = portal.Get(idx);
|
|
while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
|
|
;
|
|
}
|
|
};
|
|
|
|
template <typename ValueType>
|
|
void BenchCASSeq(benchmark::State& state)
|
|
{
|
|
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
|
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
|
|
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
|
|
|
|
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> atomicArray;
|
|
vtkm::cont::Algorithm::Fill(
|
|
atomicArray, vtkm::TypeTraits<ValueType>::ZeroInitialization(), numValues);
|
|
|
|
vtkm::cont::Invoker invoker{ device };
|
|
vtkm::cont::Timer timer{ device };
|
|
for (auto _ : state)
|
|
{
|
|
(void)_;
|
|
timer.Start();
|
|
invoker(CASSeqWorker{}, ones, atomicArray);
|
|
timer.Stop();
|
|
|
|
state.SetIterationTime(timer.GetElapsedTime());
|
|
}
|
|
|
|
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
|
const int64_t valsWritten = static_cast<int64_t>(numWrites);
|
|
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
|
|
state.SetItemsProcessed(valsWritten * iterations);
|
|
state.SetItemsProcessed(bytesWritten * iterations);
|
|
}
|
|
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCASSeq,
|
|
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX },
|
|
{ NUM_WRITES, NUM_WRITES } })
|
|
->ArgNames({ "AtomicsValues", "AtomicOps" }),
|
|
vtkm::cont::AtomicArrayTypeList);
|
|
|
|
// Provides a non-atomic baseline for BenchCASSeq
|
|
struct CASSeqBaselineWorker : public vtkm::worklet::WorkletMapField
|
|
{
|
|
using ControlSignature = void(FieldIn, WholeArrayInOut);
|
|
using ExecutionSignature = void(InputIndex, _1, _2);
|
|
|
|
template <typename T, typename Portal>
|
|
VTKM_EXEC void operator()(const vtkm::Id i, const T& in, Portal& portal) const
|
|
{
|
|
const vtkm::Id idx = i % portal.GetNumberOfValues();
|
|
const T val = static_cast<T>(i) + in;
|
|
const T oldVal = portal.Get(idx);
|
|
portal.Set(idx, oldVal + val);
|
|
}
|
|
};
|
|
|
|
template <typename ValueType>
|
|
void BenchCASSeqBaseline(benchmark::State& state)
|
|
{
|
|
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
|
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
|
|
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
|
|
|
|
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> array;
|
|
vtkm::cont::Algorithm::Fill(array, vtkm::TypeTraits<ValueType>::ZeroInitialization(), numValues);
|
|
|
|
vtkm::cont::Invoker invoker{ device };
|
|
vtkm::cont::Timer timer{ device };
|
|
for (auto _ : state)
|
|
{
|
|
(void)_;
|
|
timer.Start();
|
|
invoker(CASSeqBaselineWorker{}, ones, array);
|
|
timer.Stop();
|
|
|
|
state.SetIterationTime(timer.GetElapsedTime());
|
|
}
|
|
|
|
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
|
const int64_t valsWritten = static_cast<int64_t>(numWrites);
|
|
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
|
|
state.SetItemsProcessed(valsWritten * iterations);
|
|
state.SetItemsProcessed(bytesWritten * iterations);
|
|
}
|
|
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCASSeqBaseline,
|
|
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX },
|
|
{ NUM_WRITES, NUM_WRITES } })
|
|
->ArgNames({ "Values", "Ops" }),
|
|
vtkm::cont::AtomicArrayTypeList);
|
|
|
|
// Benchmarks AtomicArray::CompareExchange such that each work index writes to
|
|
// a strided index:
|
|
// ( floor(i / stride) + stride * (i % stride)
|
|
struct CASStrideWorker : public vtkm::worklet::WorkletMapField
|
|
{
|
|
using ControlSignature = void(FieldIn, AtomicArrayInOut);
|
|
using ExecutionSignature = void(InputIndex, _1, _2);
|
|
|
|
vtkm::Id Stride;
|
|
|
|
CASStrideWorker(vtkm::Id stride)
|
|
: Stride{ stride }
|
|
{
|
|
}
|
|
|
|
template <typename T, typename AtomicPortal>
|
|
VTKM_EXEC void operator()(const vtkm::Id i, const T& in, AtomicPortal& portal) const
|
|
{
|
|
const vtkm::Id numVals = portal.GetNumberOfValues();
|
|
const vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
|
|
const T val = static_cast<T>(i) + in;
|
|
T oldVal = portal.Get(idx);
|
|
while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
|
|
;
|
|
}
|
|
};
|
|
|
|
template <typename ValueType>
|
|
void BenchCASStride(benchmark::State& state)
|
|
{
|
|
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
|
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
|
|
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
|
|
const vtkm::Id stride = static_cast<vtkm::Id>(state.range(2));
|
|
|
|
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> atomicArray;
|
|
vtkm::cont::Algorithm::Fill(
|
|
atomicArray, vtkm::TypeTraits<ValueType>::ZeroInitialization(), numValues);
|
|
|
|
vtkm::cont::Invoker invoker{ device };
|
|
vtkm::cont::Timer timer{ device };
|
|
for (auto _ : state)
|
|
{
|
|
(void)_;
|
|
timer.Start();
|
|
invoker(CASStrideWorker{ stride }, ones, atomicArray);
|
|
timer.Stop();
|
|
|
|
state.SetIterationTime(timer.GetElapsedTime());
|
|
}
|
|
|
|
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
|
const int64_t valsWritten = static_cast<int64_t>(numWrites);
|
|
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
|
|
state.SetItemsProcessed(valsWritten * iterations);
|
|
state.SetItemsProcessed(bytesWritten * iterations);
|
|
}
|
|
VTKM_BENCHMARK_TEMPLATES_OPTS(
|
|
BenchCASStride,
|
|
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX }, { NUM_WRITES, NUM_WRITES }, { STRIDE, STRIDE } })
|
|
->ArgNames({ "AtomicsValues", "AtomicOps", "Stride" }),
|
|
vtkm::cont::AtomicArrayTypeList);
|
|
|
|
// Non-atomic baseline for CASStride
|
|
struct CASStrideBaselineWorker : public vtkm::worklet::WorkletMapField
|
|
{
|
|
using ControlSignature = void(FieldIn, AtomicArrayInOut);
|
|
using ExecutionSignature = void(InputIndex, _1, _2);
|
|
|
|
vtkm::Id Stride;
|
|
|
|
CASStrideBaselineWorker(vtkm::Id stride)
|
|
: Stride{ stride }
|
|
{
|
|
}
|
|
|
|
template <typename T, typename AtomicPortal>
|
|
VTKM_EXEC void operator()(const vtkm::Id i, const T& in, AtomicPortal& portal) const
|
|
{
|
|
const vtkm::Id numVals = portal.GetNumberOfValues();
|
|
const vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
|
|
const T val = static_cast<T>(i) + in;
|
|
T oldVal = portal.Get(idx);
|
|
portal.Set(idx, oldVal + val);
|
|
}
|
|
};
|
|
|
|
template <typename ValueType>
|
|
void BenchCASStrideBaseline(benchmark::State& state)
|
|
{
|
|
const vtkm::cont::DeviceAdapterId device = Config.Device;
|
|
const vtkm::Id numValues = static_cast<vtkm::Id>(state.range(0));
|
|
const vtkm::Id numWrites = static_cast<vtkm::Id>(state.range(1));
|
|
const vtkm::Id stride = static_cast<vtkm::Id>(state.range(2));
|
|
|
|
auto ones = vtkm::cont::make_ArrayHandleConstant<ValueType>(static_cast<ValueType>(1), numWrites);
|
|
|
|
vtkm::cont::ArrayHandle<ValueType> array;
|
|
vtkm::cont::Algorithm::Fill(array, vtkm::TypeTraits<ValueType>::ZeroInitialization(), numValues);
|
|
|
|
vtkm::cont::Invoker invoker{ device };
|
|
vtkm::cont::Timer timer{ device };
|
|
for (auto _ : state)
|
|
{
|
|
(void)_;
|
|
timer.Start();
|
|
invoker(CASStrideBaselineWorker{ stride }, ones, array);
|
|
timer.Stop();
|
|
|
|
state.SetIterationTime(timer.GetElapsedTime());
|
|
}
|
|
|
|
const int64_t iterations = static_cast<int64_t>(state.iterations());
|
|
const int64_t valsWritten = static_cast<int64_t>(numWrites);
|
|
const int64_t bytesWritten = static_cast<int64_t>(sizeof(ValueType)) * valsWritten;
|
|
state.SetItemsProcessed(valsWritten * iterations);
|
|
state.SetItemsProcessed(bytesWritten * iterations);
|
|
}
|
|
VTKM_BENCHMARK_TEMPLATES_OPTS(
|
|
BenchCASStrideBaseline,
|
|
->Ranges({ { ARRAY_SIZE_MIN, ARRAY_SIZE_MAX }, { NUM_WRITES, NUM_WRITES }, { STRIDE, STRIDE } })
|
|
->ArgNames({ "AtomicsValues", "AtomicOps", "Stride" }),
|
|
vtkm::cont::AtomicArrayTypeList);
|
|
|
|
} // end anon namespace
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
// Parse VTK-m options:
|
|
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
|
|
|
|
std::vector<char*> args(argv, argv + argc);
|
|
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
|
|
|
|
// Parse VTK-m options:
|
|
Config = vtkm::cont::Initialize(argc, args.data(), opts);
|
|
|
|
// This occurs when it is help
|
|
if (opts == vtkm::cont::InitializeOptions::None)
|
|
{
|
|
std::cout << Config.Usage << std::endl;
|
|
}
|
|
else
|
|
{
|
|
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
|
|
}
|
|
|
|
// handle benchmarking related args and run benchmarks:
|
|
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
|
|
}
|