mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-10-05 18:08:59 +00:00
1e19101eee
When compiling under VisualStudio we need to first determine if checked iterators are enabled ( _ITERATOR_DEBUG_LEVEL ). We don't want to use the NDEBUG key, as we could be inside a project that is in Debug mode with disabled checked iterators. Secondly if they are enabled we need to handle the use case of NULL iterators that get advanced by length zero. This last case is valid, but isn't supported by the checked iterators so we need to work around it
813 lines
31 KiB
C++
813 lines
31 KiB
C++
//============================================================================
|
|
// Copyright (c) Kitware, Inc.
|
|
// All rights reserved.
|
|
// See LICENSE.txt for details.
|
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
// PURPOSE. See the above copyright notice for more information.
|
|
//
|
|
// Copyright 2014 Sandia Corporation.
|
|
// Copyright 2014 UT-Battelle, LLC.
|
|
// Copyright 2014 Los Alamos National Security.
|
|
//
|
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
// the U.S. Government retains certain rights in this software.
|
|
//
|
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
|
// this software.
|
|
//============================================================================
|
|
#ifndef vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h
|
|
#define vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h
|
|
|
|
#include <vtkm/cont/ArrayHandle.h>
|
|
#include <vtkm/cont/ArrayHandleImplicit.h>
|
|
#include <vtkm/cont/ArrayHandleIndex.h>
|
|
#include <vtkm/cont/ArrayHandleZip.h>
|
|
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
|
|
|
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
|
|
|
|
VTKM_THIRDPARTY_PRE_INCLUDE
|
|
#if defined(VTKM_MSVC)
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#define NOMINMAX
|
|
#include <Windows.h>
|
|
#undef WIN32_LEAN_AND_MEAN
|
|
#undef NOMINMAX
|
|
#endif
|
|
VTKM_THIRDPARTY_POST_INCLUDE
|
|
|
|
namespace vtkm {
|
|
namespace cont {
|
|
namespace internal {
|
|
|
|
/// \brief
|
|
///
|
|
/// This struct provides algorithms that implement "general" device adapter
|
|
/// algorithms. If a device adapter provides implementations for Schedule,
|
|
/// and Synchronize, the rest of the algorithms can be implemented by calling
|
|
/// these functions.
|
|
///
|
|
/// It should be noted that we recommend that you also implement Sort,
|
|
/// ScanInclusive, and ScanExclusive for improved performance.
|
|
///
|
|
/// An easy way to implement the DeviceAdapterAlgorithm specialization is to
|
|
/// subclass this and override the implementation of methods as necessary.
|
|
/// As an example, the code would look something like this.
|
|
///
|
|
/// \code{.cpp}
|
|
/// template<>
|
|
/// struct DeviceAdapterAlgorithm<DeviceAdapterTagFoo>
|
|
/// : DeviceAdapterAlgorithmGeneral<DeviceAdapterAlgorithm<DeviceAdapterTagFoo>,
|
|
/// DeviceAdapterTagFoo>
|
|
/// {
|
|
/// template<class Functor>
|
|
/// VTKM_CONT_EXPORT static void Schedule(Functor functor,
|
|
/// vtkm::Id numInstances)
|
|
/// {
|
|
/// ...
|
|
/// }
|
|
///
|
|
/// template<class Functor>
|
|
/// VTKM_CONT_EXPORT static void Schedule(Functor functor,
|
|
/// vtkm::Id3 maxRange)
|
|
/// {
|
|
/// ...
|
|
/// }
|
|
///
|
|
/// VTKM_CONT_EXPORT static void Synchronize()
|
|
/// {
|
|
/// ...
|
|
/// }
|
|
/// };
|
|
/// \endcode
|
|
///
|
|
/// You might note that DeviceAdapterAlgorithmGeneral has two template
|
|
/// parameters that are redundant. Although the first parameter, the class for
|
|
/// the actual DeviceAdapterAlgorithm class containing Schedule, and
|
|
/// Synchronize is the same as DeviceAdapterAlgorithm<DeviceAdapterTag>, it is
|
|
/// made a separate template parameter to avoid a recursive dependence between
|
|
/// DeviceAdapterAlgorithmGeneral.h and DeviceAdapterAlgorithm.h
|
|
///
|
|
template<class DerivedAlgorithm, class DeviceAdapterTag>
|
|
struct DeviceAdapterAlgorithmGeneral
|
|
{
|
|
//--------------------------------------------------------------------------
|
|
// Get Execution Value
|
|
// This method is used internally to get a single element from the execution
|
|
// array. Might want to expose this and/or allow actual device adapter
|
|
// implementations to provide one.
|
|
private:
|
|
template<typename T, class CIn>
|
|
VTKM_CONT_EXPORT
|
|
static T GetExecutionValue(const vtkm::cont::ArrayHandle<T, CIn> &input,
|
|
vtkm::Id index)
|
|
{
|
|
typedef vtkm::cont::ArrayHandle<T,CIn> InputArrayType;
|
|
typedef vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
|
|
OutputArrayType;
|
|
|
|
OutputArrayType output;
|
|
|
|
CopyKernel<
|
|
typename InputArrayType::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename OutputArrayType::template ExecutionTypes<DeviceAdapterTag>::Portal>
|
|
kernel(input.PrepareForInput(DeviceAdapterTag()),
|
|
output.PrepareForOutput(1, DeviceAdapterTag()),
|
|
index);
|
|
|
|
DerivedAlgorithm::Schedule(kernel, 1);
|
|
|
|
return output.GetPortalConstControl().Get(0);
|
|
}
|
|
|
|
|
|
public:
|
|
//--------------------------------------------------------------------------
|
|
// Copy
|
|
template<typename T, typename U, class CIn, class COut>
|
|
VTKM_CONT_EXPORT static void Copy(const vtkm::cont::ArrayHandle<T, CIn> &input,
|
|
vtkm::cont::ArrayHandle<U, COut> &output)
|
|
{
|
|
vtkm::Id arraySize = input.GetNumberOfValues();
|
|
|
|
CopyKernel<
|
|
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<U,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal>
|
|
kernel(input.PrepareForInput(DeviceAdapterTag()),
|
|
output.PrepareForOutput(arraySize, DeviceAdapterTag()));
|
|
|
|
DerivedAlgorithm::Schedule(kernel, arraySize);
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Lower Bounds
|
|
template<typename T, class CIn, class CVal, class COut>
|
|
VTKM_CONT_EXPORT static void LowerBounds(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
const vtkm::cont::ArrayHandle<T,CVal> &values,
|
|
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output)
|
|
{
|
|
vtkm::Id arraySize = values.GetNumberOfValues();
|
|
|
|
LowerBoundsKernel<
|
|
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<T,CVal>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<vtkm::Id,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal>
|
|
kernel(input.PrepareForInput(DeviceAdapterTag()),
|
|
values.PrepareForInput(DeviceAdapterTag()),
|
|
output.PrepareForOutput(arraySize, DeviceAdapterTag()));
|
|
|
|
DerivedAlgorithm::Schedule(kernel, arraySize);
|
|
}
|
|
|
|
template<typename T, class CIn, class CVal, class COut, class BinaryCompare>
|
|
VTKM_CONT_EXPORT static void LowerBounds(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
const vtkm::cont::ArrayHandle<T,CVal> &values,
|
|
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output,
|
|
BinaryCompare binary_compare)
|
|
{
|
|
vtkm::Id arraySize = values.GetNumberOfValues();
|
|
|
|
LowerBoundsComparisonKernel<
|
|
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<T,CVal>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<vtkm::Id,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal,
|
|
BinaryCompare>
|
|
kernel(input.PrepareForInput(DeviceAdapterTag()),
|
|
values.PrepareForInput(DeviceAdapterTag()),
|
|
output.PrepareForOutput(arraySize, DeviceAdapterTag()),
|
|
binary_compare);
|
|
|
|
DerivedAlgorithm::Schedule(kernel, arraySize);
|
|
}
|
|
|
|
template<class CIn, class COut>
|
|
VTKM_CONT_EXPORT static void LowerBounds(
|
|
const vtkm::cont::ArrayHandle<vtkm::Id,CIn> &input,
|
|
vtkm::cont::ArrayHandle<vtkm::Id,COut> &values_output)
|
|
{
|
|
DeviceAdapterAlgorithmGeneral<
|
|
DerivedAlgorithm,DeviceAdapterTag>::LowerBounds(input,
|
|
values_output,
|
|
values_output);
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Reduce
|
|
template<typename T, class CIn>
|
|
VTKM_CONT_EXPORT static T Reduce(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input, T initialValue)
|
|
{
|
|
return DerivedAlgorithm::Reduce(input, initialValue, vtkm::internal::Add());
|
|
}
|
|
|
|
template<typename T, class CIn, class BinaryFunctor>
|
|
VTKM_CONT_EXPORT static T Reduce(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
T initialValue,
|
|
BinaryFunctor binary_functor)
|
|
{
|
|
//Crazy Idea:
|
|
//We create a implicit array handle that wraps the input
|
|
//array handle. The implicit functor is passed the input array handle, and
|
|
//the number of elements it needs to sum. This way the implicit handle
|
|
//acts as the first level reduction. Say for example reducing 16 values
|
|
//at a time.
|
|
//
|
|
//Now that we have an implicit array that is 1/16 the length of full array
|
|
//we can use scan inclusive to compute the final sum
|
|
typedef typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>
|
|
::PortalConst InputPortalType;
|
|
|
|
typedef ReduceKernel<
|
|
InputPortalType,
|
|
BinaryFunctor
|
|
> ReduceKernelType;
|
|
|
|
typedef vtkm::cont::ArrayHandleImplicit<
|
|
T,
|
|
ReduceKernelType > ReduceHandleType;
|
|
typedef vtkm::cont::ArrayHandle<
|
|
T,
|
|
vtkm::cont::StorageTagBasic> TempArrayType;
|
|
|
|
ReduceKernelType kernel(input.PrepareForInput( DeviceAdapterTag() ),
|
|
binary_functor);
|
|
|
|
vtkm::Id length = (input.GetNumberOfValues() / 16);
|
|
length += (input.GetNumberOfValues() % 16 == 0) ? 0 : 1;
|
|
ReduceHandleType reduced = vtkm::cont::make_ArrayHandleImplicit<T>(kernel,
|
|
length);
|
|
|
|
TempArrayType inclusiveScanStorage;
|
|
T scanResult = DerivedAlgorithm::ScanInclusive(reduced,
|
|
inclusiveScanStorage,
|
|
binary_functor);
|
|
return binary_functor(initialValue, scanResult);
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Reduce By Key
|
|
template<typename T, typename U, class KIn, class VIn, class KOut, class VOut,
|
|
class BinaryFunctor>
|
|
VTKM_CONT_EXPORT static void ReduceByKey(
|
|
const vtkm::cont::ArrayHandle<T,KIn> &keys,
|
|
const vtkm::cont::ArrayHandle<U,VIn> &values,
|
|
vtkm::cont::ArrayHandle<T,KOut> &keys_output,
|
|
vtkm::cont::ArrayHandle<U,VOut> &values_output,
|
|
BinaryFunctor binary_functor)
|
|
{
|
|
VTKM_ASSERT(keys.GetNumberOfValues() == values.GetNumberOfValues());
|
|
const vtkm::Id numberOfKeys = keys.GetNumberOfValues();
|
|
|
|
if(numberOfKeys <= 1)
|
|
{ //we only have a single key/value so that is our output
|
|
DerivedAlgorithm::Copy(keys, keys_output);
|
|
DerivedAlgorithm::Copy(values, values_output);
|
|
return;
|
|
}
|
|
|
|
//we need to determine based on the keys what is the keystate for
|
|
//each key. The states are start, middle, end of a series and the special
|
|
//state start and end of a series
|
|
vtkm::cont::ArrayHandle< ReduceKeySeriesStates > keystate;
|
|
|
|
{
|
|
typedef typename vtkm::cont::ArrayHandle<T,KIn>::template ExecutionTypes<DeviceAdapterTag>
|
|
::PortalConst InputPortalType;
|
|
|
|
typedef typename vtkm::cont::ArrayHandle< ReduceKeySeriesStates >::template ExecutionTypes<DeviceAdapterTag>
|
|
::Portal KeyStatePortalType;
|
|
|
|
InputPortalType inputPortal = keys.PrepareForInput(DeviceAdapterTag());
|
|
KeyStatePortalType keyStatePortal = keystate.PrepareForOutput(numberOfKeys,
|
|
DeviceAdapterTag());
|
|
ReduceStencilGeneration<InputPortalType, KeyStatePortalType> kernel(inputPortal, keyStatePortal);
|
|
DerivedAlgorithm::Schedule(kernel, numberOfKeys);
|
|
}
|
|
|
|
//next step is we need to reduce the values for each key. This is done
|
|
//by running an inclusive scan over the values array using the stencil.
|
|
//
|
|
// this inclusive scan will write out two values, the first being
|
|
// the value summed currently, the second being 0 or 1, with 1 being used
|
|
// when this is a value of a key we need to write ( END or START_AND_END)
|
|
{
|
|
typedef vtkm::cont::ArrayHandle<U,VIn> ValueInHandleType;
|
|
typedef vtkm::cont::ArrayHandle<U,VOut> ValueOutHandleType;
|
|
typedef vtkm::cont::ArrayHandle< ReduceKeySeriesStates> StencilHandleType;
|
|
typedef vtkm::cont::ArrayHandleZip<ValueInHandleType,
|
|
StencilHandleType> ZipInHandleType;
|
|
typedef vtkm::cont::ArrayHandleZip<ValueOutHandleType,
|
|
StencilHandleType> ZipOutHandleType;
|
|
|
|
StencilHandleType stencil;
|
|
ValueOutHandleType reducedValues;
|
|
|
|
ZipInHandleType scanInput( values, keystate);
|
|
ZipOutHandleType scanOutput( reducedValues, stencil);
|
|
|
|
DerivedAlgorithm::ScanInclusive(scanInput,
|
|
scanOutput,
|
|
ReduceByKeyAdd<BinaryFunctor>(binary_functor) );
|
|
|
|
//at this point we are done with keystate, so free the memory
|
|
keystate.ReleaseResources();
|
|
|
|
// all we need know is an efficient way of doing the write back to the
|
|
// reduced global memory. this is done by using StreamCompact with the
|
|
// stencil and values we just created with the inclusive scan
|
|
DerivedAlgorithm::StreamCompact( reducedValues,
|
|
stencil,
|
|
values_output,
|
|
ReduceByKeyUnaryStencilOp());
|
|
|
|
} //release all temporary memory
|
|
|
|
|
|
//find all the unique keys
|
|
DerivedAlgorithm::Copy(keys,keys_output);
|
|
DerivedAlgorithm::Unique(keys_output);
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Scan Exclusive
|
|
template<typename T, class CIn, class COut, class BinaryFunctor>
|
|
VTKM_CONT_EXPORT static T ScanExclusive(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
vtkm::cont::ArrayHandle<T,COut>& output,
|
|
BinaryFunctor binaryFunctor,
|
|
const T& initialValue)
|
|
{
|
|
typedef vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
|
|
TempArrayType;
|
|
typedef vtkm::cont::ArrayHandle<T,COut> OutputArrayType;
|
|
|
|
typedef typename TempArrayType::template ExecutionTypes<DeviceAdapterTag>
|
|
::PortalConst SrcPortalType;
|
|
typedef typename OutputArrayType::template ExecutionTypes<DeviceAdapterTag>
|
|
::Portal DestPortalType;
|
|
|
|
vtkm::Id numValues = input.GetNumberOfValues();
|
|
if (numValues <= 0)
|
|
{
|
|
return initialValue;
|
|
}
|
|
|
|
TempArrayType inclusiveScan;
|
|
T result = DerivedAlgorithm::ScanInclusive(input, inclusiveScan, binaryFunctor);
|
|
|
|
InclusiveToExclusiveKernel<SrcPortalType, DestPortalType, BinaryFunctor>
|
|
inclusiveToExclusive(inclusiveScan.PrepareForInput(DeviceAdapterTag()),
|
|
output.PrepareForOutput(numValues, DeviceAdapterTag()),
|
|
binaryFunctor,
|
|
initialValue);
|
|
|
|
DerivedAlgorithm::Schedule(inclusiveToExclusive, numValues);
|
|
|
|
return binaryFunctor(initialValue, result);
|
|
}
|
|
|
|
template<typename T, class CIn, class COut>
|
|
VTKM_CONT_EXPORT static T ScanExclusive(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
vtkm::cont::ArrayHandle<T,COut>& output)
|
|
{
|
|
return ScanExclusive(input, output, vtkm::Sum(),
|
|
vtkm::TypeTraits<T>::ZeroInitialization());
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Scan Inclusive
|
|
template<typename T, class CIn, class COut>
|
|
VTKM_CONT_EXPORT static T ScanInclusive(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
vtkm::cont::ArrayHandle<T,COut>& output)
|
|
{
|
|
return DerivedAlgorithm::ScanInclusive(input,
|
|
output,
|
|
vtkm::internal::Add());
|
|
}
|
|
|
|
template<typename T, class CIn, class COut, class BinaryFunctor>
|
|
VTKM_CONT_EXPORT static T ScanInclusive(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
vtkm::cont::ArrayHandle<T,COut>& output,
|
|
BinaryFunctor binary_functor)
|
|
{
|
|
typedef typename
|
|
vtkm::cont::ArrayHandle<T,COut>
|
|
::template ExecutionTypes<DeviceAdapterTag>::Portal PortalType;
|
|
|
|
typedef ScanKernel<PortalType,BinaryFunctor> ScanKernelType;
|
|
|
|
DerivedAlgorithm::Copy(input, output);
|
|
|
|
vtkm::Id numValues = output.GetNumberOfValues();
|
|
if (numValues < 1)
|
|
{
|
|
return output.GetPortalConstControl().Get(0);
|
|
}
|
|
|
|
PortalType portal = output.PrepareForInPlace(DeviceAdapterTag());
|
|
|
|
vtkm::Id stride;
|
|
for (stride = 2; stride-1 < numValues; stride *= 2)
|
|
{
|
|
ScanKernelType kernel(portal, binary_functor, stride, stride/2 - 1);
|
|
DerivedAlgorithm::Schedule(kernel, numValues/stride);
|
|
}
|
|
|
|
// Do reverse operation on odd indices. Start at stride we were just at.
|
|
for (stride /= 2; stride > 1; stride /= 2)
|
|
{
|
|
ScanKernelType kernel(portal, binary_functor, stride, stride - 1);
|
|
DerivedAlgorithm::Schedule(kernel, numValues/stride);
|
|
}
|
|
|
|
return GetExecutionValue(output, numValues-1);
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Sort
|
|
template<typename T, class Storage, class BinaryCompare>
|
|
VTKM_CONT_EXPORT static void Sort(
|
|
vtkm::cont::ArrayHandle<T,Storage> &values,
|
|
BinaryCompare binary_compare)
|
|
{
|
|
typedef typename vtkm::cont::ArrayHandle<T,Storage> ArrayType;
|
|
typedef typename ArrayType::template ExecutionTypes<DeviceAdapterTag>
|
|
::Portal PortalType;
|
|
|
|
vtkm::Id numValues = values.GetNumberOfValues();
|
|
if (numValues < 2) { return; }
|
|
|
|
PortalType portal = values.PrepareForInPlace(DeviceAdapterTag());
|
|
|
|
vtkm::Id numThreads = 1;
|
|
while (numThreads < numValues) { numThreads *= 2; }
|
|
numThreads /= 2;
|
|
|
|
typedef BitonicSortMergeKernel<PortalType,BinaryCompare> MergeKernel;
|
|
typedef BitonicSortCrossoverKernel<PortalType,BinaryCompare> CrossoverKernel;
|
|
|
|
for (vtkm::Id crossoverSize = 1;
|
|
crossoverSize < numValues;
|
|
crossoverSize *= 2)
|
|
{
|
|
DerivedAlgorithm::Schedule(CrossoverKernel(portal,binary_compare,crossoverSize),
|
|
numThreads);
|
|
for (vtkm::Id mergeSize = crossoverSize/2; mergeSize > 0; mergeSize /= 2)
|
|
{
|
|
DerivedAlgorithm::Schedule(MergeKernel(portal,binary_compare,mergeSize),
|
|
numThreads);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<typename T, class Storage>
|
|
VTKM_CONT_EXPORT static void Sort(
|
|
vtkm::cont::ArrayHandle<T,Storage> &values)
|
|
{
|
|
DerivedAlgorithm::Sort(values, DefaultCompareFunctor());
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Sort by Key
|
|
public:
|
|
|
|
template<typename T, typename U, class StorageT, class StorageU>
|
|
VTKM_CONT_EXPORT static void SortByKey(
|
|
vtkm::cont::ArrayHandle<T,StorageT> &keys,
|
|
vtkm::cont::ArrayHandle<U,StorageU> &values)
|
|
{
|
|
//combine the keys and values into a ZipArrayHandle
|
|
//we than need to specify a custom compare function wrapper
|
|
//that only checks for key side of the pair, using a custom compare functor.
|
|
typedef vtkm::cont::ArrayHandle<T,StorageT> KeyType;
|
|
typedef vtkm::cont::ArrayHandle<U,StorageU> ValueType;
|
|
typedef vtkm::cont::ArrayHandleZip<KeyType,ValueType> ZipHandleType;
|
|
|
|
ZipHandleType zipHandle =
|
|
vtkm::cont::make_ArrayHandleZip(keys,values);
|
|
DerivedAlgorithm::Sort(zipHandle,internal::KeyCompare<T,U>());
|
|
}
|
|
|
|
template<typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
|
|
VTKM_CONT_EXPORT static void SortByKey(
|
|
vtkm::cont::ArrayHandle<T,StorageT> &keys,
|
|
vtkm::cont::ArrayHandle<U,StorageU> &values,
|
|
BinaryCompare binary_compare)
|
|
{
|
|
//combine the keys and values into a ZipArrayHandle
|
|
//we than need to specify a custom compare function wrapper
|
|
//that only checks for key side of the pair, using the custom compare
|
|
//functor that the user passed in
|
|
typedef vtkm::cont::ArrayHandle<T,StorageT> KeyType;
|
|
typedef vtkm::cont::ArrayHandle<U,StorageU> ValueType;
|
|
typedef vtkm::cont::ArrayHandleZip<KeyType,ValueType> ZipHandleType;
|
|
|
|
ZipHandleType zipHandle =
|
|
vtkm::cont::make_ArrayHandleZip(keys,values);
|
|
DerivedAlgorithm::Sort(zipHandle,internal::KeyCompare<T,U,BinaryCompare>(binary_compare));
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Stream Compact
|
|
template<typename T, typename U, class CIn, class CStencil,
|
|
class COut, class UnaryPredicate>
|
|
VTKM_CONT_EXPORT static void StreamCompact(
|
|
const vtkm::cont::ArrayHandle<T,CIn>& input,
|
|
const vtkm::cont::ArrayHandle<U,CStencil>& stencil,
|
|
vtkm::cont::ArrayHandle<T,COut>& output,
|
|
UnaryPredicate unary_predicate)
|
|
{
|
|
VTKM_ASSERT(input.GetNumberOfValues() == stencil.GetNumberOfValues());
|
|
vtkm::Id arrayLength = stencil.GetNumberOfValues();
|
|
|
|
typedef vtkm::cont::ArrayHandle<
|
|
vtkm::Id, vtkm::cont::StorageTagBasic> IndexArrayType;
|
|
IndexArrayType indices;
|
|
|
|
typedef typename vtkm::cont::ArrayHandle<U,CStencil>
|
|
::template ExecutionTypes<DeviceAdapterTag>::PortalConst
|
|
StencilPortalType;
|
|
StencilPortalType stencilPortal =
|
|
stencil.PrepareForInput(DeviceAdapterTag());
|
|
|
|
typedef typename IndexArrayType
|
|
::template ExecutionTypes<DeviceAdapterTag>::Portal IndexPortalType;
|
|
IndexPortalType indexPortal =
|
|
indices.PrepareForOutput(arrayLength, DeviceAdapterTag());
|
|
|
|
StencilToIndexFlagKernel< StencilPortalType,
|
|
IndexPortalType,
|
|
UnaryPredicate> indexKernel(stencilPortal,
|
|
indexPortal,
|
|
unary_predicate);
|
|
|
|
DerivedAlgorithm::Schedule(indexKernel, arrayLength);
|
|
|
|
vtkm::Id outArrayLength = DerivedAlgorithm::ScanExclusive(indices, indices);
|
|
|
|
typedef typename vtkm::cont::ArrayHandle<T,CIn>
|
|
::template ExecutionTypes<DeviceAdapterTag>::PortalConst
|
|
InputPortalType;
|
|
InputPortalType inputPortal = input.PrepareForInput(DeviceAdapterTag());
|
|
|
|
typedef typename vtkm::cont::ArrayHandle<T,COut>
|
|
::template ExecutionTypes<DeviceAdapterTag>::Portal OutputPortalType;
|
|
OutputPortalType outputPortal =
|
|
output.PrepareForOutput(outArrayLength, DeviceAdapterTag());
|
|
|
|
CopyIfKernel<
|
|
InputPortalType,
|
|
StencilPortalType,
|
|
IndexPortalType,
|
|
OutputPortalType,
|
|
UnaryPredicate> copyKernel(inputPortal,
|
|
stencilPortal,
|
|
indexPortal,
|
|
outputPortal,
|
|
unary_predicate);
|
|
DerivedAlgorithm::Schedule(copyKernel, arrayLength);
|
|
}
|
|
|
|
template<typename T, typename U, class CIn, class CStencil, class COut>
|
|
VTKM_CONT_EXPORT static void StreamCompact(
|
|
const vtkm::cont::ArrayHandle<T,CIn>& input,
|
|
const vtkm::cont::ArrayHandle<U,CStencil>& stencil,
|
|
vtkm::cont::ArrayHandle<T,COut>& output)
|
|
{
|
|
::vtkm::NotZeroInitialized unary_predicate;
|
|
DerivedAlgorithm::StreamCompact(input, stencil, output, unary_predicate);
|
|
}
|
|
|
|
template<typename T, class CStencil, class COut>
|
|
VTKM_CONT_EXPORT static void StreamCompact(
|
|
const vtkm::cont::ArrayHandle<T,CStencil> &stencil,
|
|
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output)
|
|
{
|
|
vtkm::cont::ArrayHandleIndex input(stencil.GetNumberOfValues());
|
|
DerivedAlgorithm::StreamCompact(input, stencil, output);
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Unique
|
|
template<typename T, class Storage>
|
|
VTKM_CONT_EXPORT static void Unique(
|
|
vtkm::cont::ArrayHandle<T,Storage> &values)
|
|
{
|
|
Unique(values, std::equal_to<T>());
|
|
}
|
|
|
|
template<typename T, class Storage, class BinaryCompare>
|
|
VTKM_CONT_EXPORT static void Unique(
|
|
vtkm::cont::ArrayHandle<T,Storage> &values,
|
|
BinaryCompare binary_compare)
|
|
{
|
|
vtkm::cont::ArrayHandle<vtkm::Id, vtkm::cont::StorageTagBasic>
|
|
stencilArray;
|
|
vtkm::Id inputSize = values.GetNumberOfValues();
|
|
|
|
typedef internal::WrappedBinaryOperator<bool,BinaryCompare> WrappedBOpType;
|
|
WrappedBOpType wrappedCompare(binary_compare);
|
|
|
|
ClassifyUniqueComparisonKernel<
|
|
typename vtkm::cont::ArrayHandle<T,Storage>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<vtkm::Id,vtkm::cont::StorageTagBasic>::template ExecutionTypes<DeviceAdapterTag>::Portal,
|
|
WrappedBOpType>
|
|
classifyKernel(values.PrepareForInput(DeviceAdapterTag()),
|
|
stencilArray.PrepareForOutput(inputSize, DeviceAdapterTag()),
|
|
wrappedCompare);
|
|
|
|
DerivedAlgorithm::Schedule(classifyKernel, inputSize);
|
|
|
|
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>
|
|
outputArray;
|
|
|
|
DerivedAlgorithm::StreamCompact(values, stencilArray, outputArray);
|
|
|
|
DerivedAlgorithm::Copy(outputArray, values);
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Upper bounds
|
|
template<typename T, class CIn, class CVal, class COut>
|
|
VTKM_CONT_EXPORT static void UpperBounds(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
const vtkm::cont::ArrayHandle<T,CVal> &values,
|
|
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output)
|
|
{
|
|
vtkm::Id arraySize = values.GetNumberOfValues();
|
|
|
|
UpperBoundsKernel<
|
|
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<T,CVal>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<vtkm::Id,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal>
|
|
kernel(input.PrepareForInput(DeviceAdapterTag()),
|
|
values.PrepareForInput(DeviceAdapterTag()),
|
|
output.PrepareForOutput(arraySize, DeviceAdapterTag()));
|
|
|
|
DerivedAlgorithm::Schedule(kernel, arraySize);
|
|
}
|
|
|
|
template<typename T, class CIn, class CVal, class COut, class BinaryCompare>
|
|
VTKM_CONT_EXPORT static void UpperBounds(
|
|
const vtkm::cont::ArrayHandle<T,CIn> &input,
|
|
const vtkm::cont::ArrayHandle<T,CVal> &values,
|
|
vtkm::cont::ArrayHandle<vtkm::Id,COut> &output,
|
|
BinaryCompare binary_compare)
|
|
{
|
|
vtkm::Id arraySize = values.GetNumberOfValues();
|
|
|
|
UpperBoundsKernelComparisonKernel<
|
|
typename vtkm::cont::ArrayHandle<T,CIn>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<T,CVal>::template ExecutionTypes<DeviceAdapterTag>::PortalConst,
|
|
typename vtkm::cont::ArrayHandle<vtkm::Id,COut>::template ExecutionTypes<DeviceAdapterTag>::Portal,
|
|
BinaryCompare>
|
|
kernel(input.PrepareForInput(DeviceAdapterTag()),
|
|
values.PrepareForInput(DeviceAdapterTag()),
|
|
output.PrepareForOutput(arraySize, DeviceAdapterTag()),
|
|
binary_compare);
|
|
|
|
DerivedAlgorithm::Schedule(kernel, arraySize);
|
|
}
|
|
|
|
template<class CIn, class COut>
|
|
VTKM_CONT_EXPORT static void UpperBounds(
|
|
const vtkm::cont::ArrayHandle<vtkm::Id,CIn> &input,
|
|
vtkm::cont::ArrayHandle<vtkm::Id,COut> &values_output)
|
|
{
|
|
DeviceAdapterAlgorithmGeneral<DerivedAlgorithm,
|
|
DeviceAdapterTag>::UpperBounds(input, values_output, values_output);
|
|
}
|
|
|
|
};
|
|
|
|
|
|
}
|
|
}
|
|
} // namespace vtkm::cont::internal
|
|
|
|
namespace vtkm {
|
|
namespace cont {
|
|
/// \brief Class providing a device-specific atomic interface.
|
|
///
|
|
/// The class provide the actual implementation used by vtkm::exec::AtomicArray.
|
|
/// A serial default implementation is provided. But each device will have a different
|
|
/// implementation.
|
|
///
|
|
/// Serial requires no form of atomicity
|
|
///
|
|
template<typename T, typename DeviceTag>
|
|
class DeviceAdapterAtomicArrayImplementation
|
|
{
|
|
public:
|
|
VTKM_CONT_EXPORT
|
|
DeviceAdapterAtomicArrayImplementation(
|
|
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle):
|
|
Iterators( IteratorsType( handle.PrepareForInPlace(DeviceTag()) ) )
|
|
{
|
|
}
|
|
|
|
VTKM_EXEC_EXPORT
|
|
T Add(vtkm::Id index, const T& value) const
|
|
{
|
|
T* lockedValue;
|
|
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
|
|
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
|
|
typename IteratorType::pointer temp =
|
|
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
|
|
lockedValue = temp;
|
|
return vtkmAtomicAdd(lockedValue, value);
|
|
#else
|
|
lockedValue = (Iterators.GetBegin()+index);
|
|
return vtkmAtomicAdd(lockedValue, value);
|
|
#endif
|
|
}
|
|
|
|
VTKM_EXEC_EXPORT
|
|
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
|
|
{
|
|
T* lockedValue;
|
|
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
|
|
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
|
|
typename IteratorType::pointer temp =
|
|
&(*(Iterators.GetBegin()+static_cast<std::ptrdiff_t>(index)));
|
|
lockedValue = temp;
|
|
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
|
|
#else
|
|
lockedValue = (Iterators.GetBegin()+index);
|
|
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
|
|
#endif
|
|
}
|
|
|
|
private:
|
|
typedef typename vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
|
|
::template ExecutionTypes<DeviceTag>::Portal PortalType;
|
|
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
|
|
IteratorsType Iterators;
|
|
|
|
#if defined(VTKM_MSVC) //MSVC atomics
|
|
VTKM_EXEC_EXPORT
|
|
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
|
|
{
|
|
return InterlockedExchangeAdd(reinterpret_cast<volatile long *>(address),value);
|
|
}
|
|
|
|
VTKM_EXEC_EXPORT
|
|
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
|
|
{
|
|
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long *>(address),value);
|
|
}
|
|
|
|
VTKM_EXEC_EXPORT
|
|
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
|
|
{
|
|
return InterlockedCompareExchange(reinterpret_cast<volatile long *>(address),newValue,oldValue);
|
|
}
|
|
|
|
VTKM_EXEC_EXPORT
|
|
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
|
|
{
|
|
return InterlockedCompareExchange64(reinterpret_cast<volatile long long *>(address),newValue, oldValue);
|
|
}
|
|
|
|
#else //gcc built-in atomics
|
|
|
|
VTKM_EXEC_EXPORT
|
|
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
|
|
{
|
|
return __sync_fetch_and_add(address,value);
|
|
}
|
|
|
|
VTKM_EXEC_EXPORT
|
|
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
|
|
{
|
|
return __sync_fetch_and_add(address,value);
|
|
}
|
|
|
|
VTKM_EXEC_EXPORT
|
|
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
|
|
{
|
|
return __sync_val_compare_and_swap(address,oldValue, newValue);
|
|
}
|
|
|
|
VTKM_EXEC_EXPORT
|
|
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
|
|
{
|
|
return __sync_val_compare_and_swap(address,oldValue,newValue);
|
|
}
|
|
|
|
#endif
|
|
};
|
|
|
|
}
|
|
} // namespace vtkm::cont
|
|
|
|
#endif //vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h
|