vtk-m/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h

1107 lines
45 KiB
C
Raw Normal View History

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
2016-09-15 23:46:09 +00:00
#ifndef vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h
#define vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleImplicit.h>
#include <vtkm/cont/ArrayHandleIndex.h>
2017-05-18 14:51:24 +00:00
#include <vtkm/cont/ArrayHandleStreaming.h>
#include <vtkm/cont/ArrayHandleZip.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
#include <vtkm/exec/internal/TaskSingular.h>
#include <vtkm/TypeTraits.h>
#include <vtkm/internal/Windows.h>
2017-05-18 14:29:41 +00:00
namespace vtkm
{
namespace cont
{
namespace internal
{
/// \brief
///
/// This struct provides algorithms that implement "general" device adapter
/// algorithms. If a device adapter provides implementations for Schedule,
/// and Synchronize, the rest of the algorithms can be implemented by calling
/// these functions.
///
/// It should be noted that we recommend that you also implement Sort,
/// ScanInclusive, and ScanExclusive for improved performance.
///
/// An easy way to implement the DeviceAdapterAlgorithm specialization is to
/// subclass this and override the implementation of methods as necessary.
/// As an example, the code would look something like this.
///
/// \code{.cpp}
/// template<>
/// struct DeviceAdapterAlgorithm<DeviceAdapterTagFoo>
/// : DeviceAdapterAlgorithmGeneral<DeviceAdapterAlgorithm<DeviceAdapterTagFoo>,
/// DeviceAdapterTagFoo>
/// {
/// template<class Functor>
/// VTKM_CONT static void Schedule(Functor functor,
/// vtkm::Id numInstances)
/// {
/// ...
/// }
///
/// template<class Functor>
/// VTKM_CONT static void Schedule(Functor functor,
/// vtkm::Id3 maxRange)
/// {
/// ...
/// }
///
/// VTKM_CONT static void Synchronize()
/// {
/// ...
/// }
/// };
/// \endcode
///
/// You might note that DeviceAdapterAlgorithmGeneral has two template
/// parameters that are redundant. Although the first parameter, the class for
/// the actual DeviceAdapterAlgorithm class containing Schedule, and
/// Synchronize is the same as DeviceAdapterAlgorithm<DeviceAdapterTag>, it is
/// made a separate template parameter to avoid a recursive dependence between
/// DeviceAdapterAlgorithmGeneral.h and DeviceAdapterAlgorithm.h
///
2017-05-18 14:29:41 +00:00
template <class DerivedAlgorithm, class DeviceAdapterTag>
struct DeviceAdapterAlgorithmGeneral
{
//--------------------------------------------------------------------------
// Get Execution Value
// This method is used internally to get a single element from the execution
// array. Might want to expose this and/or allow actual device adapter
// implementations to provide one.
private:
2017-05-18 14:29:41 +00:00
template <typename T, class CIn>
VTKM_CONT static T GetExecutionValue(const vtkm::cont::ArrayHandle<T, CIn>& input, vtkm::Id index)
{
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::ArrayHandle<T, CIn> InputArrayType;
typedef vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> OutputArrayType;
OutputArrayType output;
2017-05-18 14:29:41 +00:00
CopyKernel<typename InputArrayType::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename OutputArrayType::template ExecutionTypes<DeviceAdapterTag>::Portal>
kernel(input.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(1, DeviceAdapterTag()),
index);
DerivedAlgorithm::Schedule(kernel, 1);
return output.GetPortalConstControl().Get(0);
}
public:
//--------------------------------------------------------------------------
// Copy
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<U, COut>& output)
{
2017-05-18 14:29:41 +00:00
typedef CopyKernel<
typename vtkm::cont::ArrayHandle<T,
CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<U, COut>::template ExecutionTypes<DeviceAdapterTag>::Portal>
CopyKernelType;
const vtkm::Id inSize = input.GetNumberOfValues();
CopyKernelType kernel(input.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(inSize, DeviceAdapterTag()));
DerivedAlgorithm::Schedule(kernel, inSize);
}
//--------------------------------------------------------------------------
// CopyIf
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn, class CStencil, class COut, class UnaryPredicate>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output,
UnaryPredicate unary_predicate)
{
VTKM_ASSERT(input.GetNumberOfValues() == stencil.GetNumberOfValues());
vtkm::Id arrayLength = stencil.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::ArrayHandle<vtkm::Id, vtkm::cont::StorageTagBasic> IndexArrayType;
IndexArrayType indices;
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<U, CStencil>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst StencilPortalType;
StencilPortalType stencilPortal = stencil.PrepareForInput(DeviceAdapterTag());
2017-05-18 14:29:41 +00:00
typedef
typename IndexArrayType::template ExecutionTypes<DeviceAdapterTag>::Portal IndexPortalType;
IndexPortalType indexPortal = indices.PrepareForOutput(arrayLength, DeviceAdapterTag());
2017-05-18 14:29:41 +00:00
StencilToIndexFlagKernel<StencilPortalType, IndexPortalType, UnaryPredicate> indexKernel(
stencilPortal, indexPortal, unary_predicate);
DerivedAlgorithm::Schedule(indexKernel, arrayLength);
vtkm::Id outArrayLength = DerivedAlgorithm::ScanExclusive(indices, indices);
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, CIn>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst InputPortalType;
InputPortalType inputPortal = input.PrepareForInput(DeviceAdapterTag());
2017-05-18 14:29:41 +00:00
typedef
typename vtkm::cont::ArrayHandle<T, COut>::template ExecutionTypes<DeviceAdapterTag>::Portal
OutputPortalType;
OutputPortalType outputPortal = output.PrepareForOutput(outArrayLength, DeviceAdapterTag());
CopyIfKernel<InputPortalType,
StencilPortalType,
IndexPortalType,
OutputPortalType,
2017-05-18 14:29:41 +00:00
UnaryPredicate>
copyKernel(inputPortal, stencilPortal, indexPortal, outputPortal, unary_predicate);
DerivedAlgorithm::Schedule(copyKernel, arrayLength);
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn, class CStencil, class COut>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output)
{
::vtkm::NotZeroInitialized unary_predicate;
DerivedAlgorithm::CopyIf(input, stencil, output, unary_predicate);
}
//--------------------------------------------------------------------------
// CopySubRange
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::Id inputStartIndex,
vtkm::Id numberOfElementsToCopy,
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandle<U, COut>& output,
vtkm::Id outputIndex = 0)
{
2017-05-18 14:29:41 +00:00
typedef CopyKernel<
typename vtkm::cont::ArrayHandle<T,
CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<U, COut>::template ExecutionTypes<DeviceAdapterTag>::Portal>
CopyKernel;
const vtkm::Id inSize = input.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
if (inputStartIndex < 0 || numberOfElementsToCopy < 0 || outputIndex < 0 ||
inputStartIndex >= inSize)
{ //invalid parameters
return false;
}
//determine if the numberOfElementsToCopy needs to be reduced
2017-05-18 14:29:41 +00:00
if (inSize < (inputStartIndex + numberOfElementsToCopy))
{ //adjust the size
numberOfElementsToCopy = (inSize - inputStartIndex);
2017-05-18 14:29:41 +00:00
}
const vtkm::Id outSize = output.GetNumberOfValues();
const vtkm::Id copyOutEnd = outputIndex + numberOfElementsToCopy;
2017-05-18 14:29:41 +00:00
if (outSize < copyOutEnd)
{ //output is not large enough
2017-05-18 14:29:41 +00:00
if (outSize == 0)
{ //since output has nothing, just need to allocate to correct length
output.Allocate(copyOutEnd);
}
else
{ //we currently have data in this array, so preserve it in the new
//resized array
vtkm::cont::ArrayHandle<U, COut> temp;
temp.Allocate(copyOutEnd);
DerivedAlgorithm::CopySubRange(output, 0, outSize, temp);
output = temp;
}
}
CopyKernel kernel(input.PrepareForInput(DeviceAdapterTag()),
output.PrepareForInPlace(DeviceAdapterTag()),
inputStartIndex,
outputIndex);
DerivedAlgorithm::Schedule(kernel, numberOfElementsToCopy);
return true;
}
//--------------------------------------------------------------------------
// Lower Bounds
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class CVal, class COut>
VTKM_CONT static void LowerBounds(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<T, CVal>& values,
vtkm::cont::ArrayHandle<vtkm::Id, COut>& output)
{
vtkm::Id arraySize = values.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
LowerBoundsKernel<typename vtkm::cont::ArrayHandle<T, CIn>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<T, CVal>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id, COut>::template ExecutionTypes<
DeviceAdapterTag>::Portal>
kernel(input.PrepareForInput(DeviceAdapterTag()),
values.PrepareForInput(DeviceAdapterTag()),
2017-05-18 14:29:41 +00:00
output.PrepareForOutput(arraySize, DeviceAdapterTag()));
DerivedAlgorithm::Schedule(kernel, arraySize);
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class CVal, class COut, class BinaryCompare>
VTKM_CONT static void LowerBounds(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<T, CVal>& values,
vtkm::cont::ArrayHandle<vtkm::Id, COut>& output,
BinaryCompare binary_compare)
{
vtkm::Id arraySize = values.GetNumberOfValues();
LowerBoundsComparisonKernel<
2017-05-18 14:29:41 +00:00
typename vtkm::cont::ArrayHandle<T,
CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<T, CVal>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id,
COut>::template ExecutionTypes<DeviceAdapterTag>::Portal,
BinaryCompare>
kernel(input.PrepareForInput(DeviceAdapterTag()),
values.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(arraySize, DeviceAdapterTag()),
binary_compare);
DerivedAlgorithm::Schedule(kernel, arraySize);
}
2017-05-18 14:29:41 +00:00
template <class CIn, class COut>
VTKM_CONT static void LowerBounds(const vtkm::cont::ArrayHandle<vtkm::Id, CIn>& input,
vtkm::cont::ArrayHandle<vtkm::Id, COut>& values_output)
{
2017-05-18 14:29:41 +00:00
DeviceAdapterAlgorithmGeneral<DerivedAlgorithm, DeviceAdapterTag>::LowerBounds(
input, values_output, values_output);
}
//--------------------------------------------------------------------------
// Reduce
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn>
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input, U initialValue)
{
2017-05-18 14:29:41 +00:00
return DerivedAlgorithm::Reduce(input, initialValue, vtkm::Add());
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn, class BinaryFunctor>
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input,
U initialValue,
2017-05-18 14:29:41 +00:00
BinaryFunctor binary_functor)
{
//Crazy Idea:
//We create a implicit array handle that wraps the input
//array handle. The implicit functor is passed the input array handle, and
//the number of elements it needs to sum. This way the implicit handle
//acts as the first level reduction. Say for example reducing 16 values
//at a time.
//
//Now that we have an implicit array that is 1/16 the length of full array
//we can use scan inclusive to compute the final sum
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, CIn>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst InputPortalType;
typedef ReduceKernel<InputPortalType, U, BinaryFunctor> ReduceKernelType;
2017-05-30 15:13:18 +00:00
typedef vtkm::cont::ArrayHandleImplicit<ReduceKernelType> ReduceHandleType;
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::ArrayHandle<U, vtkm::cont::StorageTagBasic> TempArrayType;
ReduceKernelType kernel(
input.PrepareForInput(DeviceAdapterTag()), initialValue, binary_functor);
vtkm::Id length = (input.GetNumberOfValues() / 16);
length += (input.GetNumberOfValues() % 16 == 0) ? 0 : 1;
2017-05-30 15:13:18 +00:00
ReduceHandleType reduced = vtkm::cont::make_ArrayHandleImplicit(kernel, length);
TempArrayType inclusiveScanStorage;
2017-05-18 14:29:41 +00:00
const U scanResult =
DerivedAlgorithm::ScanInclusive(reduced, inclusiveScanStorage, binary_functor);
return scanResult;
}
2016-09-29 21:06:34 +00:00
//--------------------------------------------------------------------------
// Streaming Reduce
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn>
VTKM_CONT static U StreamingReduce(const vtkm::Id numBlocks,
const vtkm::cont::ArrayHandle<T, CIn>& input,
U initialValue)
2016-09-29 21:06:34 +00:00
{
2016-11-09 21:21:50 +00:00
return DerivedAlgorithm::StreamingReduce(numBlocks, input, initialValue, vtkm::Add());
2016-09-29 21:06:34 +00:00
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn, class BinaryFunctor>
VTKM_CONT static U StreamingReduce(const vtkm::Id numBlocks,
const vtkm::cont::ArrayHandle<T, CIn>& input,
U initialValue,
2017-05-18 14:29:41 +00:00
BinaryFunctor binary_functor)
2016-09-29 21:06:34 +00:00
{
vtkm::Id fullSize = input.GetNumberOfValues();
vtkm::Id blockSize = fullSize / numBlocks;
2017-05-18 14:29:41 +00:00
if (fullSize % numBlocks != 0)
blockSize += 1;
2016-09-29 21:06:34 +00:00
U lastResult;
2017-05-18 14:29:41 +00:00
for (vtkm::Id block = 0; block < numBlocks; block++)
2016-09-29 21:06:34 +00:00
{
vtkm::Id numberOfInstances = blockSize;
2017-05-18 14:29:41 +00:00
if (block == numBlocks - 1)
numberOfInstances = fullSize - blockSize * block;
2016-09-29 21:06:34 +00:00
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandleStreaming<vtkm::cont::ArrayHandle<T, CIn>> streamIn =
vtkm::cont::ArrayHandleStreaming<vtkm::cont::ArrayHandle<T, CIn>>(
input, block, blockSize, numberOfInstances);
2016-09-29 21:06:34 +00:00
if (block == 0)
2016-09-29 21:06:34 +00:00
lastResult = DerivedAlgorithm::Reduce(streamIn, initialValue, binary_functor);
else
lastResult = DerivedAlgorithm::Reduce(streamIn, lastResult, binary_functor);
2016-09-29 21:06:34 +00:00
}
return lastResult;
}
2015-05-04 19:53:35 +00:00
//--------------------------------------------------------------------------
// Reduce By Key
template <typename T,
typename U,
class KIn,
class VIn,
class KOut,
class VOut,
2017-05-18 14:29:41 +00:00
class BinaryFunctor>
VTKM_CONT static void ReduceByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<T, KOut>& keys_output,
vtkm::cont::ArrayHandle<U, VOut>& values_output,
BinaryFunctor binary_functor)
2015-05-04 19:53:35 +00:00
{
VTKM_ASSERT(keys.GetNumberOfValues() == values.GetNumberOfValues());
2015-05-04 19:53:35 +00:00
const vtkm::Id numberOfKeys = keys.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
if (numberOfKeys <= 1)
{ //we only have a single key/value so that is our output
2015-05-04 19:53:35 +00:00
DerivedAlgorithm::Copy(keys, keys_output);
DerivedAlgorithm::Copy(values, values_output);
return;
2017-05-18 14:29:41 +00:00
}
2015-05-04 19:53:35 +00:00
//we need to determine based on the keys what is the keystate for
//each key. The states are start, middle, end of a series and the special
//state start and end of a series
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandle<ReduceKeySeriesStates> keystate;
2015-05-04 19:53:35 +00:00
{
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, KIn>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst InputPortalType;
2015-05-04 19:53:35 +00:00
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<ReduceKeySeriesStates>::template ExecutionTypes<
DeviceAdapterTag>::Portal KeyStatePortalType;
2015-05-04 19:53:35 +00:00
2017-05-18 14:29:41 +00:00
InputPortalType inputPortal = keys.PrepareForInput(DeviceAdapterTag());
KeyStatePortalType keyStatePortal =
keystate.PrepareForOutput(numberOfKeys, DeviceAdapterTag());
ReduceStencilGeneration<InputPortalType, KeyStatePortalType> kernel(inputPortal,
keyStatePortal);
DerivedAlgorithm::Schedule(kernel, numberOfKeys);
2015-05-04 19:53:35 +00:00
}
//next step is we need to reduce the values for each key. This is done
//by running an inclusive scan over the values array using the stencil.
//
// this inclusive scan will write out two values, the first being
// the value summed currently, the second being 0 or 1, with 1 being used
// when this is a value of a key we need to write ( END or START_AND_END)
{
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::ArrayHandle<U, VIn> ValueInHandleType;
typedef vtkm::cont::ArrayHandle<U, VOut> ValueOutHandleType;
typedef vtkm::cont::ArrayHandle<ReduceKeySeriesStates> StencilHandleType;
typedef vtkm::cont::ArrayHandleZip<ValueInHandleType, StencilHandleType> ZipInHandleType;
typedef vtkm::cont::ArrayHandleZip<ValueOutHandleType, StencilHandleType> ZipOutHandleType;
2015-05-04 19:53:35 +00:00
2017-05-18 14:29:41 +00:00
StencilHandleType stencil;
ValueOutHandleType reducedValues;
2015-05-04 19:53:35 +00:00
2017-05-18 14:29:41 +00:00
ZipInHandleType scanInput(values, keystate);
ZipOutHandleType scanOutput(reducedValues, stencil);
DerivedAlgorithm::ScanInclusive(
scanInput, scanOutput, ReduceByKeyAdd<BinaryFunctor>(binary_functor));
2017-05-18 14:29:41 +00:00
//at this point we are done with keystate, so free the memory
keystate.ReleaseResources();
// all we need know is an efficient way of doing the write back to the
// reduced global memory. this is done by using CopyIf with the
// stencil and values we just created with the inclusive scan
DerivedAlgorithm::CopyIf(reducedValues, stencil, values_output, ReduceByKeyUnaryStencilOp());
} //release all temporary memory
2015-05-04 19:53:35 +00:00
//find all the unique keys
2017-05-18 14:29:41 +00:00
DerivedAlgorithm::Copy(keys, keys_output);
2015-05-04 19:53:35 +00:00
DerivedAlgorithm::Unique(keys_output);
}
//--------------------------------------------------------------------------
// Scan Exclusive
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output,
BinaryFunctor binaryFunctor,
const T& initialValue)
{
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> TempArrayType;
typedef vtkm::cont::ArrayHandle<T, COut> OutputArrayType;
2017-05-18 14:29:41 +00:00
typedef
typename TempArrayType::template ExecutionTypes<DeviceAdapterTag>::PortalConst SrcPortalType;
typedef
typename OutputArrayType::template ExecutionTypes<DeviceAdapterTag>::Portal DestPortalType;
vtkm::Id numValues = input.GetNumberOfValues();
if (numValues <= 0)
{
return initialValue;
}
TempArrayType inclusiveScan;
T result = DerivedAlgorithm::ScanInclusive(input, inclusiveScan, binaryFunctor);
2017-05-18 14:29:41 +00:00
InclusiveToExclusiveKernel<SrcPortalType, DestPortalType, BinaryFunctor> inclusiveToExclusive(
inclusiveScan.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(numValues, DeviceAdapterTag()),
binaryFunctor,
initialValue);
DerivedAlgorithm::Schedule(inclusiveToExclusive, numValues);
return binaryFunctor(initialValue, result);
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut>
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output)
{
return DerivedAlgorithm::ScanExclusive(
input, output, vtkm::Sum(), vtkm::TypeTraits<T>::ZeroInitialization());
}
2017-04-13 17:50:56 +00:00
//--------------------------------------------------------------------------
// Scan Exclusive By Key
2017-05-18 14:29:41 +00:00
template <typename T, typename U, typename KIn, typename VIn, typename VOut, class BinaryFunctor>
VTKM_CONT static void ScanExclusiveByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<U, VOut>& output,
const U& initialValue,
BinaryFunctor binaryFunctor)
2017-04-13 17:50:56 +00:00
{
VTKM_ASSERT(keys.GetNumberOfValues() == values.GetNumberOfValues());
// 0. Special case for 0 and 1 element input
vtkm::Id numberOfKeys = keys.GetNumberOfValues();
if (numberOfKeys == 0)
{
return;
}
else if (numberOfKeys == 1)
{
output.PrepareForOutput(1, DeviceAdapterTag());
output.GetPortalControl().Set(0, initialValue);
return;
}
// 1. Create head flags
//we need to determine based on the keys what is the keystate for
//each key. The states are start, middle, end of a series and the special
//state start and end of a series
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandle<ReduceKeySeriesStates> keystate;
{
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, KIn>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst InputPortalType;
2017-04-13 17:50:56 +00:00
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<ReduceKeySeriesStates>::template ExecutionTypes<
DeviceAdapterTag>::Portal KeyStatePortalType;
InputPortalType inputPortal = keys.PrepareForInput(DeviceAdapterTag());
2017-05-18 14:29:41 +00:00
KeyStatePortalType keyStatePortal =
keystate.PrepareForOutput(numberOfKeys, DeviceAdapterTag());
ReduceStencilGeneration<InputPortalType, KeyStatePortalType> kernel(inputPortal,
keyStatePortal);
DerivedAlgorithm::Schedule(kernel, numberOfKeys);
}
// 2. Shift input and initialize elements at head flags position to initValue
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> TempArrayType;
typedef
typename vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>::template ExecutionTypes<
DeviceAdapterTag>::Portal TempPortalType;
TempArrayType temp;
{
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, KIn>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst InputPortalType;
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<ReduceKeySeriesStates>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst KeyStatePortalType;
InputPortalType inputPortal = values.PrepareForInput(DeviceAdapterTag());
KeyStatePortalType keyStatePortal = keystate.PrepareForInput(DeviceAdapterTag());
2017-05-18 14:29:41 +00:00
TempPortalType tempPortal = temp.PrepareForOutput(numberOfKeys, DeviceAdapterTag());
2017-05-18 14:29:41 +00:00
ShiftCopyAndInit<U, InputPortalType, KeyStatePortalType, TempPortalType> kernel(
inputPortal, keyStatePortal, tempPortal, initialValue);
DerivedAlgorithm::Schedule(kernel, numberOfKeys);
}
// 3. Perform a ScanInclusiveByKey
DerivedAlgorithm::ScanInclusiveByKey(keys, temp, output, binaryFunctor);
2017-04-13 17:50:56 +00:00
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class KIn, typename VIn, typename VOut>
VTKM_CONT static void ScanExclusiveByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<U, VOut>& output)
2017-04-13 17:50:56 +00:00
{
DerivedAlgorithm::ScanExclusiveByKey(
keys, values, output, vtkm::TypeTraits<U>::ZeroInitialization(), vtkm::Sum());
2017-04-13 17:50:56 +00:00
}
//--------------------------------------------------------------------------
// Streaming exclusive scan
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut>
VTKM_CONT static T StreamingScanExclusive(const vtkm::Id numBlocks,
const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output)
{
return DerivedAlgorithm::StreamingScanExclusive(
numBlocks, input, output, vtkm::Sum(), vtkm::TypeTraits<T>::ZeroInitialization());
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT static T StreamingScanExclusive(const vtkm::Id numBlocks,
const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output,
BinaryFunctor binary_functor,
const T& initialValue)
{
vtkm::Id fullSize = input.GetNumberOfValues();
vtkm::Id blockSize = fullSize / numBlocks;
2017-05-18 14:29:41 +00:00
if (fullSize % numBlocks != 0)
blockSize += 1;
T lastResult;
2017-05-18 14:29:41 +00:00
for (vtkm::Id block = 0; block < numBlocks; block++)
{
vtkm::Id numberOfInstances = blockSize;
2017-05-18 14:29:41 +00:00
if (block == numBlocks - 1)
numberOfInstances = fullSize - blockSize * block;
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandleStreaming<vtkm::cont::ArrayHandle<T, CIn>> streamIn =
vtkm::cont::ArrayHandleStreaming<vtkm::cont::ArrayHandle<T, CIn>>(
input, block, blockSize, numberOfInstances);
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandleStreaming<vtkm::cont::ArrayHandle<T, COut>> streamOut =
vtkm::cont::ArrayHandleStreaming<vtkm::cont::ArrayHandle<T, COut>>(
output, block, blockSize, numberOfInstances);
if (block == 0)
{
streamOut.AllocateFullArray(fullSize);
2017-05-18 14:29:41 +00:00
lastResult =
DerivedAlgorithm::ScanExclusive(streamIn, streamOut, binary_functor, initialValue);
}
else
{
2017-05-18 14:29:41 +00:00
lastResult =
DerivedAlgorithm::ScanExclusive(streamIn, streamOut, binary_functor, lastResult);
}
streamOut.SyncControlArray();
}
return lastResult;
}
//--------------------------------------------------------------------------
// Scan Inclusive
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut>
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output)
{
2017-05-18 14:29:41 +00:00
return DerivedAlgorithm::ScanInclusive(input, output, vtkm::Add());
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output,
BinaryFunctor binary_functor)
{
2017-05-18 14:29:41 +00:00
typedef
typename vtkm::cont::ArrayHandle<T, COut>::template ExecutionTypes<DeviceAdapterTag>::Portal
PortalType;
2017-05-18 14:29:41 +00:00
typedef ScanKernel<PortalType, BinaryFunctor> ScanKernelType;
DerivedAlgorithm::Copy(input, output);
vtkm::Id numValues = output.GetNumberOfValues();
if (numValues < 1)
{
return vtkm::TypeTraits<T>::ZeroInitialization();
}
PortalType portal = output.PrepareForInPlace(DeviceAdapterTag());
vtkm::Id stride;
2017-05-18 14:29:41 +00:00
for (stride = 2; stride - 1 < numValues; stride *= 2)
{
2017-05-18 14:29:41 +00:00
ScanKernelType kernel(portal, binary_functor, stride, stride / 2 - 1);
DerivedAlgorithm::Schedule(kernel, numValues / stride);
}
// Do reverse operation on odd indices. Start at stride we were just at.
for (stride /= 2; stride > 1; stride /= 2)
{
ScanKernelType kernel(portal, binary_functor, stride, stride - 1);
2017-05-18 14:29:41 +00:00
DerivedAlgorithm::Schedule(kernel, numValues / stride);
}
2017-05-18 14:29:41 +00:00
return GetExecutionValue(output, numValues - 1);
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class KIn, class VIn, class VOut>
VTKM_CONT static void ScanInclusiveByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<U, VOut>& values_output)
{
2017-05-18 14:29:41 +00:00
return DerivedAlgorithm::ScanInclusiveByKey(keys, values, values_output, vtkm::Add());
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class KIn, class VIn, class VOut, class BinaryFunctor>
VTKM_CONT static void ScanInclusiveByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<U, VOut>& values_output,
BinaryFunctor binary_functor)
{
VTKM_ASSERT(keys.GetNumberOfValues() == values.GetNumberOfValues());
const vtkm::Id numberOfKeys = keys.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
if (numberOfKeys <= 1)
{ //we only have a single key/value so that is our output
DerivedAlgorithm::Copy(values, values_output);
return;
}
//we need to determine based on the keys what is the keystate for
//each key. The states are start, middle, end of a series and the special
//state start and end of a series
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandle<ReduceKeySeriesStates> keystate;
{
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, KIn>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst InputPortalType;
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<ReduceKeySeriesStates>::template ExecutionTypes<
DeviceAdapterTag>::Portal KeyStatePortalType;
InputPortalType inputPortal = keys.PrepareForInput(DeviceAdapterTag());
2017-05-18 14:29:41 +00:00
KeyStatePortalType keyStatePortal =
keystate.PrepareForOutput(numberOfKeys, DeviceAdapterTag());
ReduceStencilGeneration<InputPortalType, KeyStatePortalType> kernel(inputPortal,
keyStatePortal);
DerivedAlgorithm::Schedule(kernel, numberOfKeys);
}
//next step is we need to reduce the values for each key. This is done
//by running an inclusive scan over the values array using the stencil.
//
// this inclusive scan will write out two values, the first being
// the value summed currently, the second being 0 or 1, with 1 being used
// when this is a value of a key we need to write ( END or START_AND_END)
{
typedef vtkm::cont::ArrayHandle<U, VIn> ValueInHandleType;
typedef vtkm::cont::ArrayHandle<U, VOut> ValueOutHandleType;
typedef vtkm::cont::ArrayHandle<ReduceKeySeriesStates> StencilHandleType;
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::ArrayHandleZip<ValueInHandleType, StencilHandleType> ZipInHandleType;
typedef vtkm::cont::ArrayHandleZip<ValueOutHandleType, StencilHandleType> ZipOutHandleType;
StencilHandleType stencil;
vtkm::cont::ArrayHandle<U> tempArray;
ValueOutHandleType reducedValues(tempArray);
ZipInHandleType scanInput(values, keystate);
ZipOutHandleType scanOutput(reducedValues, stencil);
DerivedAlgorithm::ScanInclusive(
scanInput, scanOutput, ReduceByKeyAdd<BinaryFunctor>(binary_functor));
//at this point we are done with keystate, so free the memory
keystate.ReleaseResources();
DerivedAlgorithm::Copy(reducedValues, values_output);
}
}
2017-04-13 17:50:56 +00:00
2016-09-15 23:46:09 +00:00
//--------------------------------------------------------------------------
// Sort
2017-05-18 14:29:41 +00:00
template <typename T, class Storage, class BinaryCompare>
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values,
BinaryCompare binary_compare)
{
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, Storage> ArrayType;
typedef typename ArrayType::template ExecutionTypes<DeviceAdapterTag>::Portal PortalType;
vtkm::Id numValues = values.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
if (numValues < 2)
{
return;
}
PortalType portal = values.PrepareForInPlace(DeviceAdapterTag());
vtkm::Id numThreads = 1;
2017-05-18 14:29:41 +00:00
while (numThreads < numValues)
{
numThreads *= 2;
}
numThreads /= 2;
2017-05-18 14:29:41 +00:00
typedef BitonicSortMergeKernel<PortalType, BinaryCompare> MergeKernel;
typedef BitonicSortCrossoverKernel<PortalType, BinaryCompare> CrossoverKernel;
2017-05-18 14:29:41 +00:00
for (vtkm::Id crossoverSize = 1; crossoverSize < numValues; crossoverSize *= 2)
{
2017-05-18 14:29:41 +00:00
DerivedAlgorithm::Schedule(CrossoverKernel(portal, binary_compare, crossoverSize),
numThreads);
2017-05-18 14:29:41 +00:00
for (vtkm::Id mergeSize = crossoverSize / 2; mergeSize > 0; mergeSize /= 2)
{
2017-05-18 14:29:41 +00:00
DerivedAlgorithm::Schedule(MergeKernel(portal, binary_compare, mergeSize), numThreads);
}
}
}
2017-05-18 14:29:41 +00:00
template <typename T, class Storage>
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values)
{
DerivedAlgorithm::Sort(values, DefaultCompareFunctor());
}
//--------------------------------------------------------------------------
// Sort by Key
public:
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class StorageT, class StorageU>
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values)
{
//combine the keys and values into a ZipArrayHandle
//we than need to specify a custom compare function wrapper
//that only checks for key side of the pair, using a custom compare functor.
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::ArrayHandle<T, StorageT> KeyType;
typedef vtkm::cont::ArrayHandle<U, StorageU> ValueType;
typedef vtkm::cont::ArrayHandleZip<KeyType, ValueType> ZipHandleType;
2017-05-18 14:29:41 +00:00
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, values);
DerivedAlgorithm::Sort(zipHandle, internal::KeyCompare<T, U>());
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values,
BinaryCompare binary_compare)
{
//combine the keys and values into a ZipArrayHandle
//we than need to specify a custom compare function wrapper
//that only checks for key side of the pair, using the custom compare
//functor that the user passed in
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::ArrayHandle<T, StorageT> KeyType;
typedef vtkm::cont::ArrayHandle<U, StorageU> ValueType;
typedef vtkm::cont::ArrayHandleZip<KeyType, ValueType> ZipHandleType;
2017-05-18 14:29:41 +00:00
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, values);
DerivedAlgorithm::Sort(zipHandle, internal::KeyCompare<T, U, BinaryCompare>(binary_compare));
}
//--------------------------------------------------------------------------
// Unique
2017-05-18 14:29:41 +00:00
template <typename T, class Storage>
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values)
{
Unique(values, std::equal_to<T>());
}
2017-05-18 14:29:41 +00:00
template <typename T, class Storage, class BinaryCompare>
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values,
BinaryCompare binary_compare)
{
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandle<vtkm::Id, vtkm::cont::StorageTagBasic> stencilArray;
vtkm::Id inputSize = values.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
typedef internal::WrappedBinaryOperator<bool, BinaryCompare> WrappedBOpType;
WrappedBOpType wrappedCompare(binary_compare);
ClassifyUniqueComparisonKernel<
2017-05-18 14:29:41 +00:00
typename vtkm::cont::ArrayHandle<T, Storage>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id, vtkm::cont::StorageTagBasic>::
template ExecutionTypes<DeviceAdapterTag>::Portal,
2017-05-18 14:29:41 +00:00
WrappedBOpType>
classifyKernel(values.PrepareForInput(DeviceAdapterTag()),
stencilArray.PrepareForOutput(inputSize, DeviceAdapterTag()),
wrappedCompare);
DerivedAlgorithm::Schedule(classifyKernel, inputSize);
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> outputArray;
DerivedAlgorithm::CopyIf(values, stencilArray, outputArray);
values.Allocate(outputArray.GetNumberOfValues());
DerivedAlgorithm::Copy(outputArray, values);
}
//--------------------------------------------------------------------------
// Upper bounds
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class CVal, class COut>
VTKM_CONT static void UpperBounds(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<T, CVal>& values,
vtkm::cont::ArrayHandle<vtkm::Id, COut>& output)
{
vtkm::Id arraySize = values.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
UpperBoundsKernel<typename vtkm::cont::ArrayHandle<T, CIn>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<T, CVal>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id, COut>::template ExecutionTypes<
DeviceAdapterTag>::Portal>
kernel(input.PrepareForInput(DeviceAdapterTag()),
values.PrepareForInput(DeviceAdapterTag()),
2017-05-18 14:29:41 +00:00
output.PrepareForOutput(arraySize, DeviceAdapterTag()));
DerivedAlgorithm::Schedule(kernel, arraySize);
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class CVal, class COut, class BinaryCompare>
VTKM_CONT static void UpperBounds(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<T, CVal>& values,
vtkm::cont::ArrayHandle<vtkm::Id, COut>& output,
BinaryCompare binary_compare)
{
vtkm::Id arraySize = values.GetNumberOfValues();
UpperBoundsKernelComparisonKernel<
2017-05-18 14:29:41 +00:00
typename vtkm::cont::ArrayHandle<T,
CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<T, CVal>::template ExecutionTypes<
DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id,
COut>::template ExecutionTypes<DeviceAdapterTag>::Portal,
BinaryCompare>
kernel(input.PrepareForInput(DeviceAdapterTag()),
values.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(arraySize, DeviceAdapterTag()),
binary_compare);
DerivedAlgorithm::Schedule(kernel, arraySize);
}
2017-05-18 14:29:41 +00:00
template <class CIn, class COut>
VTKM_CONT static void UpperBounds(const vtkm::cont::ArrayHandle<vtkm::Id, CIn>& input,
vtkm::cont::ArrayHandle<vtkm::Id, COut>& values_output)
{
2017-05-18 14:29:41 +00:00
DeviceAdapterAlgorithmGeneral<DerivedAlgorithm, DeviceAdapterTag>::UpperBounds(
input, values_output, values_output);
}
};
}
}
} // namespace vtkm::cont::internal
2017-05-18 14:29:41 +00:00
namespace vtkm
{
namespace cont
{
2016-02-10 15:51:31 +00:00
/// \brief Class providing a device-specific atomic interface.
///
/// The class provide the actual implementation used by vtkm::exec::AtomicArray.
/// A serial default implementation is provided. But each device will have a different
/// implementation.
///
/// Serial requires no form of atomicity
///
2017-05-18 14:29:41 +00:00
template <typename T, typename DeviceTag>
2016-02-10 15:51:31 +00:00
class DeviceAdapterAtomicArrayImplementation
{
public:
VTKM_CONT
DeviceAdapterAtomicArrayImplementation(
2017-05-18 14:29:41 +00:00
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle)
: Iterators(IteratorsType(handle.PrepareForInPlace(DeviceTag())))
2016-02-10 15:51:31 +00:00
{
}
VTKM_EXEC
2016-02-10 15:51:31 +00:00
T Add(vtkm::Id index, const T& value) const
{
T* lockedValue;
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp =
2017-05-18 14:29:41 +00:00
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
lockedValue = temp;
return vtkmAtomicAdd(lockedValue, value);
#else
2017-05-18 14:29:41 +00:00
lockedValue = (Iterators.GetBegin() + index);
return vtkmAtomicAdd(lockedValue, value);
#endif
2016-02-10 15:51:31 +00:00
}
VTKM_EXEC
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
{
T* lockedValue;
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp =
2017-05-18 14:29:41 +00:00
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
lockedValue = temp;
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#else
2017-05-18 14:29:41 +00:00
lockedValue = (Iterators.GetBegin() + index);
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#endif
}
2016-02-10 15:51:31 +00:00
private:
2017-05-18 14:29:41 +00:00
typedef typename vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>::template ExecutionTypes<
DeviceTag>::Portal PortalType;
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
IteratorsType Iterators;
#if defined(VTKM_MSVC) //MSVC atomics
VTKM_EXEC
2017-05-18 14:29:41 +00:00
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
{
2017-05-18 14:29:41 +00:00
return InterlockedExchangeAdd(reinterpret_cast<volatile long*>(address), value);
}
VTKM_EXEC
2017-05-18 14:29:41 +00:00
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
{
2017-05-18 14:29:41 +00:00
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(address), value);
}
VTKM_EXEC
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
const vtkm::Int32& newValue,
2017-05-18 14:29:41 +00:00
const vtkm::Int32& oldValue) const
{
return InterlockedCompareExchange(
reinterpret_cast<volatile long*>(address), newValue, oldValue);
}
VTKM_EXEC
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
const vtkm::Int64& newValue,
2017-05-18 14:29:41 +00:00
const vtkm::Int64& oldValue) const
{
return InterlockedCompareExchange64(
reinterpret_cast<volatile long long*>(address), newValue, oldValue);
}
#else //gcc built-in atomics
VTKM_EXEC
2017-05-18 14:29:41 +00:00
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
{
2017-05-18 14:29:41 +00:00
return __sync_fetch_and_add(address, value);
}
VTKM_EXEC
2017-05-18 14:29:41 +00:00
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
{
2017-05-18 14:29:41 +00:00
return __sync_fetch_and_add(address, value);
}
VTKM_EXEC
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
const vtkm::Int32& newValue,
2017-05-18 14:29:41 +00:00
const vtkm::Int32& oldValue) const
{
2017-05-18 14:29:41 +00:00
return __sync_val_compare_and_swap(address, oldValue, newValue);
}
VTKM_EXEC
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
const vtkm::Int64& newValue,
2017-05-18 14:29:41 +00:00
const vtkm::Int64& oldValue) const
{
2017-05-18 14:29:41 +00:00
return __sync_val_compare_and_swap(address, oldValue, newValue);
}
#endif
2016-02-10 15:51:31 +00:00
};
/// \brief Class providing a device-specific support for selecting the optimal
/// Task type for a given worklet.
///
/// When worklets are launched inside the execution enviornment we need to
/// ask the device adapter what is the preferred execution style, be it
/// a tiled iteration pattern, or strided. This class
///
/// By default if not specialized for a device adapter the default
/// is to use vtkm::exec::internal::TaskSingular
///
template <typename DeviceTag>
class DeviceTaskTypes
{
public:
template <typename WorkletType, typename InvocationType>
static vtkm::exec::internal::TaskSingular<WorkletType, InvocationType> MakeTask(
const WorkletType& worklet,
const InvocationType& invocation,
vtkm::Id,
vtkm::Id globalIndexOffset = 0)
{
using Task = vtkm::exec::internal::TaskSingular<WorkletType, InvocationType>;
return Task(worklet, invocation, globalIndexOffset);
}
template <typename WorkletType, typename InvocationType>
static vtkm::exec::internal::TaskSingular<WorkletType, InvocationType> MakeTask(
const WorkletType& worklet,
const InvocationType& invocation,
vtkm::Id3,
vtkm::Id globalIndexOffset = 0)
{
using Task = vtkm::exec::internal::TaskSingular<WorkletType, InvocationType>;
return Task(worklet, invocation, globalIndexOffset);
}
};
2016-02-10 16:21:38 +00:00
}
} // namespace vtkm::cont
#endif //vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h