vtk-m/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h
Robert Maynard 1e19101eee Obey VisualStudio checked iterator levels, and NULL checked iterators
When compiling under VisualStudio we need to first determine if checked
iterators are enabled ( _ITERATOR_DEBUG_LEVEL ). We don't want to use the
NDEBUG key, as we could be inside a project that is in Debug mode with
disabled checked iterators.

Secondly if they are enabled we need to handle the use case of NULL iterators
that get advanced by length zero. This last case is valid, but isn't supported
by the checked iterators so we need to work around it
2016-06-15 13:13:02 -04:00

813 lines
31 KiB
C++

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h
#define vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleImplicit.h>
#include <vtkm/cont/ArrayHandleIndex.h>
#include <vtkm/cont/ArrayHandleZip.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
VTKM_THIRDPARTY_PRE_INCLUDE
#if defined(VTKM_MSVC)
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#include <Windows.h>
#undef WIN32_LEAN_AND_MEAN
#undef NOMINMAX
#endif
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm {
namespace cont {
namespace internal {
/// \brief
///
/// This struct provides algorithms that implement "general" device adapter
/// algorithms. If a device adapter provides implementations for Schedule,
/// and Synchronize, the rest of the algorithms can be implemented by calling
/// these functions.
///
/// It should be noted that we recommend that you also implement Sort,
/// ScanInclusive, and ScanExclusive for improved performance.
///
/// An easy way to implement the DeviceAdapterAlgorithm specialization is to
/// subclass this and override the implementation of methods as necessary.
/// As an example, the code would look something like this.
///
/// \code{.cpp}
/// template<>
/// struct DeviceAdapterAlgorithm<DeviceAdapterTagFoo>
/// : DeviceAdapterAlgorithmGeneral<DeviceAdapterAlgorithm<DeviceAdapterTagFoo>,
/// DeviceAdapterTagFoo>
/// {
/// template<class Functor>
/// VTKM_CONT_EXPORT static void Schedule(Functor functor,
/// vtkm::Id numInstances)
/// {
/// ...
/// }
///
/// template<class Functor>
/// VTKM_CONT_EXPORT static void Schedule(Functor functor,
/// vtkm::Id3 maxRange)
/// {
/// ...
/// }
///
/// VTKM_CONT_EXPORT static void Synchronize()
/// {
/// ...
/// }
/// };
/// \endcode
///
/// You might note that DeviceAdapterAlgorithmGeneral has two template
/// parameters that are redundant. Although the first parameter, the class for
/// the actual DeviceAdapterAlgorithm class containing Schedule, and
/// Synchronize is the same as DeviceAdapterAlgorithm<DeviceAdapterTag>, it is
/// made a separate template parameter to avoid a recursive dependence between
/// DeviceAdapterAlgorithmGeneral.h and DeviceAdapterAlgorithm.h
///
template<class DerivedAlgorithm, class DeviceAdapterTag>
struct DeviceAdapterAlgorithmGeneral
{
//--------------------------------------------------------------------------
// Get Execution Value
// This method is used internally to get a single element from the execution
// array. Might want to expose this and/or allow actual device adapter
// implementations to provide one.
private:
template<typename T, class CIn>
VTKM_CONT_EXPORT
static T GetExecutionValue(const vtkm::cont::ArrayHandle<T, CIn> &input,
vtkm::Id index)
{
typedef vtkm::cont::ArrayHandle<T,CIn> InputArrayType;
typedef vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
OutputArrayType;
OutputArrayType output;
CopyKernel<
typename InputArrayType::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename OutputArrayType::template ExecutionTypes<DeviceAdapterTag>::Portal>
kernel(input.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(1, DeviceAdapterTag()),
index);
DerivedAlgorithm::Schedule(kernel, 1);
return output.GetPortalConstControl().Get(0);
}
public:
//--------------------------------------------------------------------------
// Copy
template<typename T, typename U, class CIn, class COut>
VTKM_CONT_EXPORT static void Copy(const vtkm::cont::ArrayHandle<T, CIn> &input,
vtkm::cont::ArrayHandle<U, COut> &output)
{
vtkm::Id arraySize = input.GetNumberOfValues();
CopyKernel<
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<U,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal>
kernel(input.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(arraySize, DeviceAdapterTag()));
DerivedAlgorithm::Schedule(kernel, arraySize);
}
//--------------------------------------------------------------------------
// Lower Bounds
template<typename T, class CIn, class CVal, class COut>
VTKM_CONT_EXPORT static void LowerBounds(
const vtkm::cont::ArrayHandle<T,CIn> &input,
const vtkm::cont::ArrayHandle<T,CVal> &values,
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output)
{
vtkm::Id arraySize = values.GetNumberOfValues();
LowerBoundsKernel<
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<T,CVal>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal>
kernel(input.PrepareForInput(DeviceAdapterTag()),
values.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(arraySize, DeviceAdapterTag()));
DerivedAlgorithm::Schedule(kernel, arraySize);
}
template<typename T, class CIn, class CVal, class COut, class BinaryCompare>
VTKM_CONT_EXPORT static void LowerBounds(
const vtkm::cont::ArrayHandle<T,CIn> &input,
const vtkm::cont::ArrayHandle<T,CVal> &values,
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output,
BinaryCompare binary_compare)
{
vtkm::Id arraySize = values.GetNumberOfValues();
LowerBoundsComparisonKernel<
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<T,CVal>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal,
BinaryCompare>
kernel(input.PrepareForInput(DeviceAdapterTag()),
values.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(arraySize, DeviceAdapterTag()),
binary_compare);
DerivedAlgorithm::Schedule(kernel, arraySize);
}
template<class CIn, class COut>
VTKM_CONT_EXPORT static void LowerBounds(
const vtkm::cont::ArrayHandle<vtkm::Id,CIn> &input,
vtkm::cont::ArrayHandle<vtkm::Id,COut> &values_output)
{
DeviceAdapterAlgorithmGeneral<
DerivedAlgorithm,DeviceAdapterTag>::LowerBounds(input,
values_output,
values_output);
}
//--------------------------------------------------------------------------
// Reduce
template<typename T, class CIn>
VTKM_CONT_EXPORT static T Reduce(
const vtkm::cont::ArrayHandle<T,CIn> &input, T initialValue)
{
return DerivedAlgorithm::Reduce(input, initialValue, vtkm::internal::Add());
}
template<typename T, class CIn, class BinaryFunctor>
VTKM_CONT_EXPORT static T Reduce(
const vtkm::cont::ArrayHandle<T,CIn> &input,
T initialValue,
BinaryFunctor binary_functor)
{
//Crazy Idea:
//We create a implicit array handle that wraps the input
//array handle. The implicit functor is passed the input array handle, and
//the number of elements it needs to sum. This way the implicit handle
//acts as the first level reduction. Say for example reducing 16 values
//at a time.
//
//Now that we have an implicit array that is 1/16 the length of full array
//we can use scan inclusive to compute the final sum
typedef typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>
::PortalConst InputPortalType;
typedef ReduceKernel<
InputPortalType,
BinaryFunctor
> ReduceKernelType;
typedef vtkm::cont::ArrayHandleImplicit<
T,
ReduceKernelType > ReduceHandleType;
typedef vtkm::cont::ArrayHandle<
T,
vtkm::cont::StorageTagBasic> TempArrayType;
ReduceKernelType kernel(input.PrepareForInput( DeviceAdapterTag() ),
binary_functor);
vtkm::Id length = (input.GetNumberOfValues() / 16);
length += (input.GetNumberOfValues() % 16 == 0) ? 0 : 1;
ReduceHandleType reduced = vtkm::cont::make_ArrayHandleImplicit<T>(kernel,
length);
TempArrayType inclusiveScanStorage;
T scanResult = DerivedAlgorithm::ScanInclusive(reduced,
inclusiveScanStorage,
binary_functor);
return binary_functor(initialValue, scanResult);
}
//--------------------------------------------------------------------------
// Reduce By Key
template<typename T, typename U, class KIn, class VIn, class KOut, class VOut,
class BinaryFunctor>
VTKM_CONT_EXPORT static void ReduceByKey(
const vtkm::cont::ArrayHandle<T,KIn> &keys,
const vtkm::cont::ArrayHandle<U,VIn> &values,
vtkm::cont::ArrayHandle<T,KOut> &keys_output,
vtkm::cont::ArrayHandle<U,VOut> &values_output,
BinaryFunctor binary_functor)
{
VTKM_ASSERT(keys.GetNumberOfValues() == values.GetNumberOfValues());
const vtkm::Id numberOfKeys = keys.GetNumberOfValues();
if(numberOfKeys <= 1)
{ //we only have a single key/value so that is our output
DerivedAlgorithm::Copy(keys, keys_output);
DerivedAlgorithm::Copy(values, values_output);
return;
}
//we need to determine based on the keys what is the keystate for
//each key. The states are start, middle, end of a series and the special
//state start and end of a series
vtkm::cont::ArrayHandle< ReduceKeySeriesStates > keystate;
{
typedef typename vtkm::cont::ArrayHandle<T,KIn>::template ExecutionTypes<DeviceAdapterTag>
::PortalConst InputPortalType;
typedef typename vtkm::cont::ArrayHandle< ReduceKeySeriesStates >::template ExecutionTypes<DeviceAdapterTag>
::Portal KeyStatePortalType;
InputPortalType inputPortal = keys.PrepareForInput(DeviceAdapterTag());
KeyStatePortalType keyStatePortal = keystate.PrepareForOutput(numberOfKeys,
DeviceAdapterTag());
ReduceStencilGeneration<InputPortalType, KeyStatePortalType> kernel(inputPortal, keyStatePortal);
DerivedAlgorithm::Schedule(kernel, numberOfKeys);
}
//next step is we need to reduce the values for each key. This is done
//by running an inclusive scan over the values array using the stencil.
//
// this inclusive scan will write out two values, the first being
// the value summed currently, the second being 0 or 1, with 1 being used
// when this is a value of a key we need to write ( END or START_AND_END)
{
typedef vtkm::cont::ArrayHandle<U,VIn> ValueInHandleType;
typedef vtkm::cont::ArrayHandle<U,VOut> ValueOutHandleType;
typedef vtkm::cont::ArrayHandle< ReduceKeySeriesStates> StencilHandleType;
typedef vtkm::cont::ArrayHandleZip<ValueInHandleType,
StencilHandleType> ZipInHandleType;
typedef vtkm::cont::ArrayHandleZip<ValueOutHandleType,
StencilHandleType> ZipOutHandleType;
StencilHandleType stencil;
ValueOutHandleType reducedValues;
ZipInHandleType scanInput( values, keystate);
ZipOutHandleType scanOutput( reducedValues, stencil);
DerivedAlgorithm::ScanInclusive(scanInput,
scanOutput,
ReduceByKeyAdd<BinaryFunctor>(binary_functor) );
//at this point we are done with keystate, so free the memory
keystate.ReleaseResources();
// all we need know is an efficient way of doing the write back to the
// reduced global memory. this is done by using StreamCompact with the
// stencil and values we just created with the inclusive scan
DerivedAlgorithm::StreamCompact( reducedValues,
stencil,
values_output,
ReduceByKeyUnaryStencilOp());
} //release all temporary memory
//find all the unique keys
DerivedAlgorithm::Copy(keys,keys_output);
DerivedAlgorithm::Unique(keys_output);
}
//--------------------------------------------------------------------------
// Scan Exclusive
template<typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT_EXPORT static T ScanExclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut>& output,
BinaryFunctor binaryFunctor,
const T& initialValue)
{
typedef vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
TempArrayType;
typedef vtkm::cont::ArrayHandle<T,COut> OutputArrayType;
typedef typename TempArrayType::template ExecutionTypes<DeviceAdapterTag>
::PortalConst SrcPortalType;
typedef typename OutputArrayType::template ExecutionTypes<DeviceAdapterTag>
::Portal DestPortalType;
vtkm::Id numValues = input.GetNumberOfValues();
if (numValues <= 0)
{
return initialValue;
}
TempArrayType inclusiveScan;
T result = DerivedAlgorithm::ScanInclusive(input, inclusiveScan, binaryFunctor);
InclusiveToExclusiveKernel<SrcPortalType, DestPortalType, BinaryFunctor>
inclusiveToExclusive(inclusiveScan.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(numValues, DeviceAdapterTag()),
binaryFunctor,
initialValue);
DerivedAlgorithm::Schedule(inclusiveToExclusive, numValues);
return binaryFunctor(initialValue, result);
}
template<typename T, class CIn, class COut>
VTKM_CONT_EXPORT static T ScanExclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut>& output)
{
return ScanExclusive(input, output, vtkm::Sum(),
vtkm::TypeTraits<T>::ZeroInitialization());
}
//--------------------------------------------------------------------------
// Scan Inclusive
template<typename T, class CIn, class COut>
VTKM_CONT_EXPORT static T ScanInclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut>& output)
{
return DerivedAlgorithm::ScanInclusive(input,
output,
vtkm::internal::Add());
}
template<typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT_EXPORT static T ScanInclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut>& output,
BinaryFunctor binary_functor)
{
typedef typename
vtkm::cont::ArrayHandle<T,COut>
::template ExecutionTypes<DeviceAdapterTag>::Portal PortalType;
typedef ScanKernel<PortalType,BinaryFunctor> ScanKernelType;
DerivedAlgorithm::Copy(input, output);
vtkm::Id numValues = output.GetNumberOfValues();
if (numValues < 1)
{
return output.GetPortalConstControl().Get(0);
}
PortalType portal = output.PrepareForInPlace(DeviceAdapterTag());
vtkm::Id stride;
for (stride = 2; stride-1 < numValues; stride *= 2)
{
ScanKernelType kernel(portal, binary_functor, stride, stride/2 - 1);
DerivedAlgorithm::Schedule(kernel, numValues/stride);
}
// Do reverse operation on odd indices. Start at stride we were just at.
for (stride /= 2; stride > 1; stride /= 2)
{
ScanKernelType kernel(portal, binary_functor, stride, stride - 1);
DerivedAlgorithm::Schedule(kernel, numValues/stride);
}
return GetExecutionValue(output, numValues-1);
}
//--------------------------------------------------------------------------
// Sort
template<typename T, class Storage, class BinaryCompare>
VTKM_CONT_EXPORT static void Sort(
vtkm::cont::ArrayHandle<T,Storage> &values,
BinaryCompare binary_compare)
{
typedef typename vtkm::cont::ArrayHandle<T,Storage> ArrayType;
typedef typename ArrayType::template ExecutionTypes<DeviceAdapterTag>
::Portal PortalType;
vtkm::Id numValues = values.GetNumberOfValues();
if (numValues < 2) { return; }
PortalType portal = values.PrepareForInPlace(DeviceAdapterTag());
vtkm::Id numThreads = 1;
while (numThreads < numValues) { numThreads *= 2; }
numThreads /= 2;
typedef BitonicSortMergeKernel<PortalType,BinaryCompare> MergeKernel;
typedef BitonicSortCrossoverKernel<PortalType,BinaryCompare> CrossoverKernel;
for (vtkm::Id crossoverSize = 1;
crossoverSize < numValues;
crossoverSize *= 2)
{
DerivedAlgorithm::Schedule(CrossoverKernel(portal,binary_compare,crossoverSize),
numThreads);
for (vtkm::Id mergeSize = crossoverSize/2; mergeSize > 0; mergeSize /= 2)
{
DerivedAlgorithm::Schedule(MergeKernel(portal,binary_compare,mergeSize),
numThreads);
}
}
}
template<typename T, class Storage>
VTKM_CONT_EXPORT static void Sort(
vtkm::cont::ArrayHandle<T,Storage> &values)
{
DerivedAlgorithm::Sort(values, DefaultCompareFunctor());
}
//--------------------------------------------------------------------------
// Sort by Key
public:
template<typename T, typename U, class StorageT, class StorageU>
VTKM_CONT_EXPORT static void SortByKey(
vtkm::cont::ArrayHandle<T,StorageT> &keys,
vtkm::cont::ArrayHandle<U,StorageU> &values)
{
//combine the keys and values into a ZipArrayHandle
//we than need to specify a custom compare function wrapper
//that only checks for key side of the pair, using a custom compare functor.
typedef vtkm::cont::ArrayHandle<T,StorageT> KeyType;
typedef vtkm::cont::ArrayHandle<U,StorageU> ValueType;
typedef vtkm::cont::ArrayHandleZip<KeyType,ValueType> ZipHandleType;
ZipHandleType zipHandle =
vtkm::cont::make_ArrayHandleZip(keys,values);
DerivedAlgorithm::Sort(zipHandle,internal::KeyCompare<T,U>());
}
template<typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
VTKM_CONT_EXPORT static void SortByKey(
vtkm::cont::ArrayHandle<T,StorageT> &keys,
vtkm::cont::ArrayHandle<U,StorageU> &values,
BinaryCompare binary_compare)
{
//combine the keys and values into a ZipArrayHandle
//we than need to specify a custom compare function wrapper
//that only checks for key side of the pair, using the custom compare
//functor that the user passed in
typedef vtkm::cont::ArrayHandle<T,StorageT> KeyType;
typedef vtkm::cont::ArrayHandle<U,StorageU> ValueType;
typedef vtkm::cont::ArrayHandleZip<KeyType,ValueType> ZipHandleType;
ZipHandleType zipHandle =
vtkm::cont::make_ArrayHandleZip(keys,values);
DerivedAlgorithm::Sort(zipHandle,internal::KeyCompare<T,U,BinaryCompare>(binary_compare));
}
//--------------------------------------------------------------------------
// Stream Compact
template<typename T, typename U, class CIn, class CStencil,
class COut, class UnaryPredicate>
VTKM_CONT_EXPORT static void StreamCompact(
const vtkm::cont::ArrayHandle<T,CIn>& input,
const vtkm::cont::ArrayHandle<U,CStencil>& stencil,
vtkm::cont::ArrayHandle<T,COut>& output,
UnaryPredicate unary_predicate)
{
VTKM_ASSERT(input.GetNumberOfValues() == stencil.GetNumberOfValues());
vtkm::Id arrayLength = stencil.GetNumberOfValues();
typedef vtkm::cont::ArrayHandle<
vtkm::Id, vtkm::cont::StorageTagBasic> IndexArrayType;
IndexArrayType indices;
typedef typename vtkm::cont::ArrayHandle<U,CStencil>
::template ExecutionTypes<DeviceAdapterTag>::PortalConst
StencilPortalType;
StencilPortalType stencilPortal =
stencil.PrepareForInput(DeviceAdapterTag());
typedef typename IndexArrayType
::template ExecutionTypes<DeviceAdapterTag>::Portal IndexPortalType;
IndexPortalType indexPortal =
indices.PrepareForOutput(arrayLength, DeviceAdapterTag());
StencilToIndexFlagKernel< StencilPortalType,
IndexPortalType,
UnaryPredicate> indexKernel(stencilPortal,
indexPortal,
unary_predicate);
DerivedAlgorithm::Schedule(indexKernel, arrayLength);
vtkm::Id outArrayLength = DerivedAlgorithm::ScanExclusive(indices, indices);
typedef typename vtkm::cont::ArrayHandle<T,CIn>
::template ExecutionTypes<DeviceAdapterTag>::PortalConst
InputPortalType;
InputPortalType inputPortal = input.PrepareForInput(DeviceAdapterTag());
typedef typename vtkm::cont::ArrayHandle<T,COut>
::template ExecutionTypes<DeviceAdapterTag>::Portal OutputPortalType;
OutputPortalType outputPortal =
output.PrepareForOutput(outArrayLength, DeviceAdapterTag());
CopyIfKernel<
InputPortalType,
StencilPortalType,
IndexPortalType,
OutputPortalType,
UnaryPredicate> copyKernel(inputPortal,
stencilPortal,
indexPortal,
outputPortal,
unary_predicate);
DerivedAlgorithm::Schedule(copyKernel, arrayLength);
}
template<typename T, typename U, class CIn, class CStencil, class COut>
VTKM_CONT_EXPORT static void StreamCompact(
const vtkm::cont::ArrayHandle<T,CIn>& input,
const vtkm::cont::ArrayHandle<U,CStencil>& stencil,
vtkm::cont::ArrayHandle<T,COut>& output)
{
::vtkm::NotZeroInitialized unary_predicate;
DerivedAlgorithm::StreamCompact(input, stencil, output, unary_predicate);
}
template<typename T, class CStencil, class COut>
VTKM_CONT_EXPORT static void StreamCompact(
const vtkm::cont::ArrayHandle<T,CStencil> &stencil,
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output)
{
vtkm::cont::ArrayHandleIndex input(stencil.GetNumberOfValues());
DerivedAlgorithm::StreamCompact(input, stencil, output);
}
//--------------------------------------------------------------------------
// Unique
template<typename T, class Storage>
VTKM_CONT_EXPORT static void Unique(
vtkm::cont::ArrayHandle<T,Storage> &values)
{
Unique(values, std::equal_to<T>());
}
template<typename T, class Storage, class BinaryCompare>
VTKM_CONT_EXPORT static void Unique(
vtkm::cont::ArrayHandle<T,Storage> &values,
BinaryCompare binary_compare)
{
vtkm::cont::ArrayHandle<vtkm::Id, vtkm::cont::StorageTagBasic>
stencilArray;
vtkm::Id inputSize = values.GetNumberOfValues();
typedef internal::WrappedBinaryOperator<bool,BinaryCompare> WrappedBOpType;
WrappedBOpType wrappedCompare(binary_compare);
ClassifyUniqueComparisonKernel<
typename vtkm::cont::ArrayHandle<T,Storage>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id,vtkm::cont::StorageTagBasic>::template ExecutionTypes<DeviceAdapterTag>::Portal,
WrappedBOpType>
classifyKernel(values.PrepareForInput(DeviceAdapterTag()),
stencilArray.PrepareForOutput(inputSize, DeviceAdapterTag()),
wrappedCompare);
DerivedAlgorithm::Schedule(classifyKernel, inputSize);
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>
outputArray;
DerivedAlgorithm::StreamCompact(values, stencilArray, outputArray);
DerivedAlgorithm::Copy(outputArray, values);
}
//--------------------------------------------------------------------------
// Upper bounds
template<typename T, class CIn, class CVal, class COut>
VTKM_CONT_EXPORT static void UpperBounds(
const vtkm::cont::ArrayHandle<T,CIn> &input,
const vtkm::cont::ArrayHandle<T,CVal> &values,
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output)
{
vtkm::Id arraySize = values.GetNumberOfValues();
UpperBoundsKernel<
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<T,CVal>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal>
kernel(input.PrepareForInput(DeviceAdapterTag()),
values.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(arraySize, DeviceAdapterTag()));
DerivedAlgorithm::Schedule(kernel, arraySize);
}
template<typename T, class CIn, class CVal, class COut, class BinaryCompare>
VTKM_CONT_EXPORT static void UpperBounds(
const vtkm::cont::ArrayHandle<T,CIn> &input,
const vtkm::cont::ArrayHandle<T,CVal> &values,
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output,
BinaryCompare binary_compare)
{
vtkm::Id arraySize = values.GetNumberOfValues();
UpperBoundsKernelComparisonKernel<
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<T,CVal>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
typename vtkm::cont::ArrayHandle<vtkm::Id,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal,
BinaryCompare>
kernel(input.PrepareForInput(DeviceAdapterTag()),
values.PrepareForInput(DeviceAdapterTag()),
output.PrepareForOutput(arraySize, DeviceAdapterTag()),
binary_compare);
DerivedAlgorithm::Schedule(kernel, arraySize);
}
template<class CIn, class COut>
VTKM_CONT_EXPORT static void UpperBounds(
const vtkm::cont::ArrayHandle<vtkm::Id,CIn> &input,
vtkm::cont::ArrayHandle<vtkm::Id,COut> &values_output)
{
DeviceAdapterAlgorithmGeneral<DerivedAlgorithm,
DeviceAdapterTag>::UpperBounds(input, values_output, values_output);
}
};
}
}
} // namespace vtkm::cont::internal
namespace vtkm {
namespace cont {
/// \brief Class providing a device-specific atomic interface.
///
/// The class provide the actual implementation used by vtkm::exec::AtomicArray.
/// A serial default implementation is provided. But each device will have a different
/// implementation.
///
/// Serial requires no form of atomicity
///
template<typename T, typename DeviceTag>
class DeviceAdapterAtomicArrayImplementation
{
public:
VTKM_CONT_EXPORT
DeviceAdapterAtomicArrayImplementation(
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle):
Iterators( IteratorsType( handle.PrepareForInPlace(DeviceTag()) ) )
{
}
VTKM_EXEC_EXPORT
T Add(vtkm::Id index, const T& value) const
{
T* lockedValue;
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp =
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
lockedValue = temp;
return vtkmAtomicAdd(lockedValue, value);
#else
lockedValue = (Iterators.GetBegin()+index);
return vtkmAtomicAdd(lockedValue, value);
#endif
}
VTKM_EXEC_EXPORT
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
{
T* lockedValue;
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp =
&(*(Iterators.GetBegin()+static_cast<std::ptrdiff_t>(index)));
lockedValue = temp;
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#else
lockedValue = (Iterators.GetBegin()+index);
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#endif
}
private:
typedef typename vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
::template ExecutionTypes<DeviceTag>::Portal PortalType;
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
IteratorsType Iterators;
#if defined(VTKM_MSVC) //MSVC atomics
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
return InterlockedExchangeAdd(reinterpret_cast<volatile long *>(address),value);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long *>(address),value);
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
return InterlockedCompareExchange(reinterpret_cast<volatile long *>(address),newValue,oldValue);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
return InterlockedCompareExchange64(reinterpret_cast<volatile long long *>(address),newValue, oldValue);
}
#else //gcc built-in atomics
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
return __sync_fetch_and_add(address,value);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
return __sync_fetch_and_add(address,value);
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
return __sync_val_compare_and_swap(address,oldValue, newValue);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
return __sync_val_compare_and_swap(address,oldValue,newValue);
}
#endif
};
}
} // namespace vtkm::cont
#endif //vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h