vtk-m2/vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h

498 lines
19 KiB
C
Raw Normal View History

2017-05-18 14:29:41 +00:00
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_serial_internal_DeviceAdapterAlgorithmSerial_h
#define vtk_m_cont_serial_internal_DeviceAdapterAlgorithmSerial_h
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleZip.h>
#include <vtkm/cont/ArrayPortalToIterators.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
#include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
#include <vtkm/BinaryOperators.h>
#include <vtkm/exec/serial/internal/TaskTiling.h>
#include <algorithm>
#include <numeric>
2017-05-18 14:29:41 +00:00
namespace vtkm
{
namespace cont
{
2017-05-18 14:29:41 +00:00
template <>
struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagSerial>
: vtkm::cont::internal::DeviceAdapterAlgorithmGeneral<
DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagSerial>,
vtkm::cont::DeviceAdapterTagSerial>
{
private:
using Device = vtkm::cont::DeviceAdapterTagSerial;
public:
Use std::copy in serial Copy implementation. I had assumed that the compiler would be clever enough to turn the iterative implementation of Copy into a memcpy, but inspecting the disassembly on a release GCC build shows that this is not the case, likely because it can't assume that the memory ranges do not overlap. Replacing the loop with std::copy speeds things up (about 30-50%) for most data types, though there is a slight (usually < 5%) slowdown for Vec types. The uint8 copy improved by a factor of 8. Comparison: | Speedup | iteration | std::copy | Benchmark (Type) | |---------|----------------------|----------------------|------------------| | 1.363 | 0.001590 +- 0.000087 | 0.001166 +- 0.000049 | Copy 2097152 values (vtkm::Float32) | | 1.487 | 0.003429 +- 0.000185 | 0.002305 +- 0.000146 | Copy 2097152 values (vtkm::Float64) | | 1.379 | 0.001568 +- 0.000072 | 0.001137 +- 0.000093 | Copy 2097152 values (vtkm::Int32) | | 1.420 | 0.003410 +- 0.000173 | 0.002402 +- 0.000101 | Copy 2097152 values (vtkm::Int64) | | 1.303 | 0.001564 +- 0.000083 | 0.001201 +- 0.000078 | Copy 2097152 values (vtkm::UInt32) | | 7.204 | 0.002441 +- 0.000104 | 0.000339 +- 0.000029 | Copy 2097152 values (vtkm::UInt8) | | 0.987 | 0.006602 +- 0.000266 | 0.006688 +- 0.000291 | Copy 2097152 values (vtkm::Vec< vtkm::Float32, 4 >) | | 0.965 | 0.010065 +- 0.000528 | 0.010427 +- 0.000617 | Copy 2097152 values (vtkm::Vec< vtkm::Float64, 3 >) | | 0.979 | 0.003327 +- 0.000191 | 0.003398 +- 0.000142 | Copy 2097152 values (vtkm::Vec< vtkm::Int32, 2 >) | | 0.851 | 0.001579 +- 0.000090 | 0.001856 +- 0.000098 | Copy 2097152 values (vtkm::Vec< vtkm::UInt8, 4 >) |
2017-10-10 16:42:13 +00:00
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<U, COut>& output)
{
const vtkm::Id inSize = input.GetNumberOfValues();
auto inputPortal = input.PrepareForInput(DeviceAdapterTagSerial());
auto outputPortal = output.PrepareForOutput(inSize, DeviceAdapterTagSerial());
std::copy(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal),
vtkm::cont::ArrayPortalToIteratorEnd(inputPortal),
vtkm::cont::ArrayPortalToIteratorBegin(outputPortal));
}
2017-09-19 15:08:10 +00:00
template <typename T, typename U, class CIn, class CStencil, class COut>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output)
{
::vtkm::NotZeroInitialized unary_predicate;
CopyIf(input, stencil, output, unary_predicate);
}
template <typename T, typename U, class CIn, class CStencil, class COut, class UnaryPredicate>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output,
UnaryPredicate predicate)
{
vtkm::Id inputSize = input.GetNumberOfValues();
VTKM_ASSERT(inputSize == stencil.GetNumberOfValues());
auto inputPortal = input.PrepareForInput(DeviceAdapterTagSerial());
auto stencilPortal = stencil.PrepareForInput(DeviceAdapterTagSerial());
auto outputPortal = output.PrepareForOutput(inputSize, DeviceAdapterTagSerial());
vtkm::Id readPos = 0;
vtkm::Id writePos = 0;
for (; readPos < inputSize; ++readPos)
{
if (predicate(stencilPortal.Get(readPos)))
{
outputPortal.Set(writePos, inputPortal.Get(readPos));
++writePos;
}
}
output.Shrink(writePos);
}
Use std::copy in serial Copy implementation. I had assumed that the compiler would be clever enough to turn the iterative implementation of Copy into a memcpy, but inspecting the disassembly on a release GCC build shows that this is not the case, likely because it can't assume that the memory ranges do not overlap. Replacing the loop with std::copy speeds things up (about 30-50%) for most data types, though there is a slight (usually < 5%) slowdown for Vec types. The uint8 copy improved by a factor of 8. Comparison: | Speedup | iteration | std::copy | Benchmark (Type) | |---------|----------------------|----------------------|------------------| | 1.363 | 0.001590 +- 0.000087 | 0.001166 +- 0.000049 | Copy 2097152 values (vtkm::Float32) | | 1.487 | 0.003429 +- 0.000185 | 0.002305 +- 0.000146 | Copy 2097152 values (vtkm::Float64) | | 1.379 | 0.001568 +- 0.000072 | 0.001137 +- 0.000093 | Copy 2097152 values (vtkm::Int32) | | 1.420 | 0.003410 +- 0.000173 | 0.002402 +- 0.000101 | Copy 2097152 values (vtkm::Int64) | | 1.303 | 0.001564 +- 0.000083 | 0.001201 +- 0.000078 | Copy 2097152 values (vtkm::UInt32) | | 7.204 | 0.002441 +- 0.000104 | 0.000339 +- 0.000029 | Copy 2097152 values (vtkm::UInt8) | | 0.987 | 0.006602 +- 0.000266 | 0.006688 +- 0.000291 | Copy 2097152 values (vtkm::Vec< vtkm::Float32, 4 >) | | 0.965 | 0.010065 +- 0.000528 | 0.010427 +- 0.000617 | Copy 2097152 values (vtkm::Vec< vtkm::Float64, 3 >) | | 0.979 | 0.003327 +- 0.000191 | 0.003398 +- 0.000142 | Copy 2097152 values (vtkm::Vec< vtkm::Int32, 2 >) | | 0.851 | 0.001579 +- 0.000090 | 0.001856 +- 0.000098 | Copy 2097152 values (vtkm::Vec< vtkm::UInt8, 4 >) |
2017-10-10 16:42:13 +00:00
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::Id inputStartIndex,
vtkm::Id numberOfElementsToCopy,
vtkm::cont::ArrayHandle<U, COut>& output,
vtkm::Id outputIndex = 0)
{
const vtkm::Id inSize = input.GetNumberOfValues();
// Check if the ranges overlap and fail if they do.
if (input == output && ((outputIndex >= inputStartIndex &&
outputIndex < inputStartIndex + numberOfElementsToCopy) ||
(inputStartIndex >= outputIndex &&
inputStartIndex < outputIndex + numberOfElementsToCopy)))
{
return false;
}
Use std::copy in serial Copy implementation. I had assumed that the compiler would be clever enough to turn the iterative implementation of Copy into a memcpy, but inspecting the disassembly on a release GCC build shows that this is not the case, likely because it can't assume that the memory ranges do not overlap. Replacing the loop with std::copy speeds things up (about 30-50%) for most data types, though there is a slight (usually < 5%) slowdown for Vec types. The uint8 copy improved by a factor of 8. Comparison: | Speedup | iteration | std::copy | Benchmark (Type) | |---------|----------------------|----------------------|------------------| | 1.363 | 0.001590 +- 0.000087 | 0.001166 +- 0.000049 | Copy 2097152 values (vtkm::Float32) | | 1.487 | 0.003429 +- 0.000185 | 0.002305 +- 0.000146 | Copy 2097152 values (vtkm::Float64) | | 1.379 | 0.001568 +- 0.000072 | 0.001137 +- 0.000093 | Copy 2097152 values (vtkm::Int32) | | 1.420 | 0.003410 +- 0.000173 | 0.002402 +- 0.000101 | Copy 2097152 values (vtkm::Int64) | | 1.303 | 0.001564 +- 0.000083 | 0.001201 +- 0.000078 | Copy 2097152 values (vtkm::UInt32) | | 7.204 | 0.002441 +- 0.000104 | 0.000339 +- 0.000029 | Copy 2097152 values (vtkm::UInt8) | | 0.987 | 0.006602 +- 0.000266 | 0.006688 +- 0.000291 | Copy 2097152 values (vtkm::Vec< vtkm::Float32, 4 >) | | 0.965 | 0.010065 +- 0.000528 | 0.010427 +- 0.000617 | Copy 2097152 values (vtkm::Vec< vtkm::Float64, 3 >) | | 0.979 | 0.003327 +- 0.000191 | 0.003398 +- 0.000142 | Copy 2097152 values (vtkm::Vec< vtkm::Int32, 2 >) | | 0.851 | 0.001579 +- 0.000090 | 0.001856 +- 0.000098 | Copy 2097152 values (vtkm::Vec< vtkm::UInt8, 4 >) |
2017-10-10 16:42:13 +00:00
if (inputStartIndex < 0 || numberOfElementsToCopy < 0 || outputIndex < 0 ||
inputStartIndex >= inSize)
{ //invalid parameters
return false;
}
//determine if the numberOfElementsToCopy needs to be reduced
if (inSize < (inputStartIndex + numberOfElementsToCopy))
{ //adjust the size
numberOfElementsToCopy = (inSize - inputStartIndex);
}
const vtkm::Id outSize = output.GetNumberOfValues();
const vtkm::Id copyOutEnd = outputIndex + numberOfElementsToCopy;
if (outSize < copyOutEnd)
{ //output is not large enough
if (outSize == 0)
{ //since output has nothing, just need to allocate to correct length
output.Allocate(copyOutEnd);
}
else
{ //we currently have data in this array, so preserve it in the new
//resized array
vtkm::cont::ArrayHandle<U, COut> temp;
temp.Allocate(copyOutEnd);
CopySubRange(output, 0, outSize, temp);
output = temp;
}
}
auto inputPortal = input.PrepareForInput(DeviceAdapterTagSerial());
auto outputPortal = output.PrepareForInPlace(DeviceAdapterTagSerial());
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(inputPortal);
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outputPortal);
std::copy(inIter + inputStartIndex,
inIter + inputStartIndex + numberOfElementsToCopy,
outIter + outputIndex);
return true;
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn>
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input, U initialValue)
2015-05-04 19:53:35 +00:00
{
2017-05-18 14:29:41 +00:00
return Reduce(input, initialValue, vtkm::Add());
2015-05-04 19:53:35 +00:00
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class CIn, class BinaryFunctor>
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input,
U initialValue,
2017-05-18 14:29:41 +00:00
BinaryFunctor binary_functor)
2015-05-04 19:53:35 +00:00
{
2017-05-18 14:29:41 +00:00
internal::WrappedBinaryOperator<U, BinaryFunctor> wrappedOp(binary_functor);
auto inputPortal = input.PrepareForInput(Device());
2015-05-04 19:53:35 +00:00
return std::accumulate(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal),
vtkm::cont::ArrayPortalToIteratorEnd(inputPortal),
initialValue,
wrappedOp);
2015-05-04 19:53:35 +00:00
}
template <typename T,
typename U,
class KIn,
class VIn,
class KOut,
class VOut,
2017-05-18 14:29:41 +00:00
class BinaryFunctor>
VTKM_CONT static void ReduceByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<T, KOut>& keys_output,
vtkm::cont::ArrayHandle<U, VOut>& values_output,
BinaryFunctor binary_functor)
2015-05-04 19:53:35 +00:00
{
auto keysPortalIn = keys.PrepareForInput(Device());
auto valuesPortalIn = values.PrepareForInput(Device());
2015-05-04 19:53:35 +00:00
const vtkm::Id numberOfKeys = keys.GetNumberOfValues();
VTKM_ASSERT(numberOfKeys == values.GetNumberOfValues());
if (numberOfKeys == 0)
{
keys_output.Shrink(0);
values_output.Shrink(0);
return;
}
auto keysPortalOut = keys_output.PrepareForOutput(numberOfKeys, Device());
auto valuesPortalOut = values_output.PrepareForOutput(numberOfKeys, Device());
2015-05-04 19:53:35 +00:00
vtkm::Id writePos = 0;
vtkm::Id readPos = 0;
T currentKey = keysPortalIn.Get(readPos);
U currentValue = valuesPortalIn.Get(readPos);
2017-05-18 14:29:41 +00:00
for (++readPos; readPos < numberOfKeys; ++readPos)
{
while (readPos < numberOfKeys && currentKey == keysPortalIn.Get(readPos))
2015-05-04 19:53:35 +00:00
{
currentValue = binary_functor(currentValue, valuesPortalIn.Get(readPos));
2015-05-04 19:53:35 +00:00
++readPos;
2017-05-18 14:29:41 +00:00
}
2015-05-04 19:53:35 +00:00
2017-05-18 14:29:41 +00:00
if (readPos < numberOfKeys)
{
2015-05-04 19:53:35 +00:00
keysPortalOut.Set(writePos, currentKey);
valuesPortalOut.Set(writePos, currentValue);
++writePos;
currentKey = keysPortalIn.Get(readPos);
currentValue = valuesPortalIn.Get(readPos);
}
2017-05-18 14:29:41 +00:00
}
2015-05-04 19:53:35 +00:00
//now write out the last set of values
keysPortalOut.Set(writePos, currentKey);
valuesPortalOut.Set(writePos, currentValue);
//now we need to shrink to the correct number of keys/values
//writePos is zero-based so add 1 to get correct length
2017-05-18 14:29:41 +00:00
keys_output.Shrink(writePos + 1);
values_output.Shrink(writePos + 1);
2015-05-04 19:53:35 +00:00
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut>
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output)
{
vtkm::Id numberOfValues = input.GetNumberOfValues();
auto inputPortal = input.PrepareForInput(Device());
auto outputPortal = output.PrepareForOutput(numberOfValues, Device());
2017-05-18 14:29:41 +00:00
if (numberOfValues <= 0)
{
return vtkm::TypeTraits<T>::ZeroInitialization();
}
std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal),
vtkm::cont::ArrayPortalToIteratorEnd(inputPortal),
vtkm::cont::ArrayPortalToIteratorBegin(outputPortal));
// Return the value at the last index in the array, which is the full sum.
return outputPortal.Get(numberOfValues - 1);
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output,
BinaryFunctor binary_functor)
{
2017-05-18 14:29:41 +00:00
internal::WrappedBinaryOperator<T, BinaryFunctor> wrappedBinaryOp(binary_functor);
vtkm::Id numberOfValues = input.GetNumberOfValues();
auto inputPortal = input.PrepareForInput(Device());
auto outputPortal = output.PrepareForOutput(numberOfValues, Device());
2017-05-18 14:29:41 +00:00
if (numberOfValues <= 0)
{
return vtkm::TypeTraits<T>::ZeroInitialization();
}
std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal),
vtkm::cont::ArrayPortalToIteratorEnd(inputPortal),
vtkm::cont::ArrayPortalToIteratorBegin(outputPortal),
wrappedBinaryOp);
// Return the value at the last index in the array, which is the full sum.
return outputPortal.Get(numberOfValues - 1);
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output,
BinaryFunctor binaryFunctor,
const T& initialValue)
{
2017-05-18 14:29:41 +00:00
internal::WrappedBinaryOperator<T, BinaryFunctor> wrappedBinaryOp(binaryFunctor);
vtkm::Id numberOfValues = input.GetNumberOfValues();
auto inputPortal = input.PrepareForInput(Device());
auto outputPortal = output.PrepareForOutput(numberOfValues, Device());
2017-05-18 14:29:41 +00:00
if (numberOfValues <= 0)
{
return initialValue;
}
// Shift right by one, by iterating backwards. We are required to iterate
//backwards so that the algorithm works correctly when the input and output
//are the same array, otherwise you just propagate the first element
//to all elements
//Note: We explicitly do not use std::copy_backwards for good reason.
//The ICC compiler has been found to improperly optimize the copy_backwards
//into a standard copy, causing the above issue.
T lastValue = inputPortal.Get(numberOfValues - 1);
2017-05-18 14:29:41 +00:00
for (vtkm::Id i = (numberOfValues - 1); i >= 1; --i)
{
2017-05-18 14:29:41 +00:00
outputPortal.Set(i, inputPortal.Get(i - 1));
}
outputPortal.Set(0, initialValue);
std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(outputPortal),
vtkm::cont::ArrayPortalToIteratorEnd(outputPortal),
vtkm::cont::ArrayPortalToIteratorBegin(outputPortal),
wrappedBinaryOp);
return wrappedBinaryOp(outputPortal.Get(numberOfValues - 1), lastValue);
}
2017-05-18 14:29:41 +00:00
template <typename T, class CIn, class COut>
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output)
{
2017-05-18 14:29:41 +00:00
return ScanExclusive(input, output, vtkm::Sum(), vtkm::TypeTraits<T>::ZeroInitialization());
}
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::serial::internal::TaskTiling1D& functor,
vtkm::Id size);
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::serial::internal::TaskTiling3D& functor,
vtkm::Id3 size);
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id size)
{
vtkm::exec::serial::internal::TaskTiling1D kernel(functor);
ScheduleTask(kernel, size);
}
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 size)
{
vtkm::exec::serial::internal::TaskTiling3D kernel(functor);
ScheduleTask(kernel, size);
}
private:
template <typename Vin,
typename I,
typename Vout,
class StorageVin,
class StorageI,
2017-05-18 14:29:41 +00:00
class StorageVout>
VTKM_CONT static void Scatter(vtkm::cont::ArrayHandle<Vin, StorageVin>& values,
vtkm::cont::ArrayHandle<I, StorageI>& index,
vtkm::cont::ArrayHandle<Vout, StorageVout>& values_out)
2015-06-04 17:59:04 +00:00
{
const vtkm::Id n = values.GetNumberOfValues();
2017-05-18 14:29:41 +00:00
VTKM_ASSERT(n == index.GetNumberOfValues());
2015-06-04 17:59:04 +00:00
auto valuesPortal = values.PrepareForInput(Device());
auto indexPortal = index.PrepareForInput(Device());
auto valuesOutPortal = values_out.PrepareForOutput(n, Device());
2015-06-04 17:59:04 +00:00
2017-05-18 14:29:41 +00:00
for (vtkm::Id i = 0; i < n; i++)
{
2017-05-18 14:29:41 +00:00
valuesOutPortal.Set(i, valuesPortal.Get(indexPortal.Get(i)));
}
2015-06-04 17:59:04 +00:00
}
private:
2015-06-08 16:40:44 +00:00
/// Reorder the value array along with the sorting algorithm
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
VTKM_CONT static void SortByKeyDirect(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values,
BinaryCompare binary_compare)
2015-06-04 17:59:04 +00:00
{
//combine the keys and values into a ZipArrayHandle
//we than need to specify a custom compare function wrapper
//that only checks for key side of the pair, using the custom compare
//functor that the user passed in
auto zipHandle = vtkm::cont::make_ArrayHandleZip(keys, values);
2017-05-18 14:29:41 +00:00
Sort(zipHandle, internal::KeyCompare<T, U, BinaryCompare>(binary_compare));
2015-06-04 17:59:04 +00:00
}
public:
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class StorageT, class StorageU>
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values)
2015-06-04 18:18:59 +00:00
{
SortByKey(keys, values, std::less<T>());
2015-06-04 18:18:59 +00:00
}
2017-05-18 14:29:41 +00:00
template <typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values,
const BinaryCompare& binary_compare)
2015-06-04 17:59:04 +00:00
{
2017-05-18 14:29:41 +00:00
internal::WrappedBinaryOperator<bool, BinaryCompare> wrappedCompare(binary_compare);
VTKM_CONSTEXPR bool larger_than_64bits = sizeof(U) > sizeof(vtkm::Int64);
if (larger_than_64bits)
{
/// More efficient sort:
/// Move value indexes when sorting and reorder the value array at last
vtkm::cont::ArrayHandle<vtkm::Id> indexArray;
vtkm::cont::ArrayHandle<U, StorageU> valuesScattered;
2017-05-18 14:29:41 +00:00
Copy(ArrayHandleIndex(keys.GetNumberOfValues()), indexArray);
SortByKeyDirect(keys, indexArray, wrappedCompare);
Scatter(values, indexArray, valuesScattered);
2017-05-18 14:29:41 +00:00
Copy(valuesScattered, values);
}
else
{
SortByKeyDirect(keys, values, wrappedCompare);
}
2015-06-04 17:59:04 +00:00
}
2017-05-18 14:29:41 +00:00
template <typename T, class Storage>
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values)
{
Sort(values, std::less<T>());
}
2017-05-18 14:29:41 +00:00
template <typename T, class Storage, class BinaryCompare>
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values,
BinaryCompare binary_compare)
{
auto arrayPortal = values.PrepareForInPlace(Device());
vtkm::cont::ArrayPortalToIterators<decltype(arrayPortal)> iterators(arrayPortal);
2017-05-18 14:29:41 +00:00
internal::WrappedBinaryOperator<bool, BinaryCompare> wrappedCompare(binary_compare);
std::sort(iterators.GetBegin(), iterators.GetEnd(), wrappedCompare);
}
template <typename T, class Storage>
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values)
{
Unique(values, std::equal_to<T>());
}
template <typename T, class Storage, class BinaryCompare>
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values,
BinaryCompare binary_compare)
{
auto arrayPortal = values.PrepareForInPlace(Device());
vtkm::cont::ArrayPortalToIterators<decltype(arrayPortal)> iterators(arrayPortal);
internal::WrappedBinaryOperator<bool, BinaryCompare> wrappedCompare(binary_compare);
auto end = std::unique(iterators.GetBegin(), iterators.GetEnd(), wrappedCompare);
values.Shrink(static_cast<vtkm::Id>(end - iterators.GetBegin()));
}
VTKM_CONT static void Synchronize()
{
// Nothing to do. This device is serial and has no asynchronous operations.
}
};
template <>
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagSerial>
{
public:
template <typename WorkletType, typename InvocationType>
static vtkm::exec::serial::internal::TaskTiling1D MakeTask(const WorkletType& worklet,
const InvocationType& invocation,
vtkm::Id,
vtkm::Id globalIndexOffset = 0)
{
return vtkm::exec::serial::internal::TaskTiling1D(worklet, invocation, globalIndexOffset);
}
template <typename WorkletType, typename InvocationType>
static vtkm::exec::serial::internal::TaskTiling3D MakeTask(const WorkletType& worklet,
const InvocationType& invocation,
vtkm::Id3,
vtkm::Id globalIndexOffset = 0)
{
return vtkm::exec::serial::internal::TaskTiling3D(worklet, invocation, globalIndexOffset);
}
};
}
} // namespace vtkm::cont
#endif //vtk_m_cont_serial_internal_DeviceAdapterAlgorithmSerial_h