1398 lines
43 KiB
C++
1398 lines
43 KiB
C++
//============================================================================
|
|
// Copyright (c) Kitware, Inc.
|
|
// All rights reserved.
|
|
// See LICENSE.txt for details.
|
|
//
|
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
// PURPOSE. See the above copyright notice for more information.
|
|
//============================================================================
|
|
#ifndef vtk_m_cont_tbb_internal_FunctorsTBB_h
|
|
#define vtk_m_cont_tbb_internal_FunctorsTBB_h
|
|
|
|
#include <vtkm/TypeTraits.h>
|
|
#include <vtkm/Types.h>
|
|
#include <vtkm/cont/ArrayPortalToIterators.h>
|
|
#include <vtkm/cont/Error.h>
|
|
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
|
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
|
|
|
|
#include <algorithm>
|
|
#include <iterator>
|
|
#include <sstream>
|
|
#include <type_traits>
|
|
|
|
VTKM_THIRDPARTY_PRE_INCLUDE
|
|
|
|
#if defined(VTKM_MSVC)
|
|
|
|
// TBB's header include a #pragma comment(lib,"tbb.lib") line to make all
|
|
// consuming libraries link to tbb, this is bad behavior in a header
|
|
// based project
|
|
#pragma push_macro("__TBB_NO_IMPLICITLINKAGE")
|
|
#define __TBB_NO_IMPLICIT_LINKAGE 1
|
|
|
|
#endif // defined(VTKM_MSVC)
|
|
|
|
// TBB includes windows.h, so instead we want to include windows.h with the
|
|
// correct settings so that we don't clobber any existing function
|
|
#include <vtkm/internal/Windows.h>
|
|
|
|
#include <tbb/tbb_stddef.h>
|
|
#if (TBB_VERSION_MAJOR == 4) && (TBB_VERSION_MINOR == 2)
|
|
//we provide an patched implementation of tbb parallel_sort
|
|
//that fixes ADL for std::swap. This patch has been submitted to Intel
|
|
//and is fixed in TBB 4.2 update 2.
|
|
#include <vtkm/cont/tbb/internal/parallel_sort.h>
|
|
#else
|
|
#include <tbb/parallel_sort.h>
|
|
#endif
|
|
|
|
#include <numeric>
|
|
#include <tbb/blocked_range.h>
|
|
#include <tbb/blocked_range3d.h>
|
|
#include <tbb/parallel_for.h>
|
|
#include <tbb/parallel_reduce.h>
|
|
#include <tbb/parallel_scan.h>
|
|
#include <tbb/partitioner.h>
|
|
#include <tbb/tick_count.h>
|
|
|
|
#if defined(VTKM_MSVC)
|
|
#pragma pop_macro("__TBB_NO_IMPLICITLINKAGE")
|
|
#endif
|
|
|
|
VTKM_THIRDPARTY_POST_INCLUDE
|
|
|
|
namespace vtkm
|
|
{
|
|
namespace cont
|
|
{
|
|
namespace tbb
|
|
{
|
|
|
|
namespace internal
|
|
{
|
|
template <typename ResultType, typename Function>
|
|
using WrappedBinaryOperator = vtkm::cont::internal::WrappedBinaryOperator<ResultType, Function>;
|
|
}
|
|
|
|
// The "grain size" of scheduling with TBB. Not a lot of thought has gone
|
|
// into picking this size.
|
|
static constexpr vtkm::Id TBB_GRAIN_SIZE = 1024;
|
|
|
|
template <typename InputPortalType, typename OutputPortalType>
|
|
struct CopyBody
|
|
{
|
|
InputPortalType InputPortal;
|
|
OutputPortalType OutputPortal;
|
|
vtkm::Id InputOffset;
|
|
vtkm::Id OutputOffset;
|
|
|
|
CopyBody(const InputPortalType& inPortal,
|
|
const OutputPortalType& outPortal,
|
|
vtkm::Id inOffset,
|
|
vtkm::Id outOffset)
|
|
: InputPortal(inPortal)
|
|
, OutputPortal(outPortal)
|
|
, InputOffset(inOffset)
|
|
, OutputOffset(outOffset)
|
|
{
|
|
}
|
|
|
|
// MSVC likes complain about narrowing type conversions in std::copy and
|
|
// provides no reasonable way to disable the warning. As a work-around, this
|
|
// template calls std::copy if and only if the types match, otherwise falls
|
|
// back to a iterative casting approach. Since std::copy can only really
|
|
// optimize same-type copies, this shouldn't affect performance.
|
|
template <typename InIter, typename OutIter>
|
|
void DoCopy(InIter src, InIter srcEnd, OutIter dst, std::false_type) const
|
|
{
|
|
using InputType = typename InputPortalType::ValueType;
|
|
using OutputType = typename OutputPortalType::ValueType;
|
|
while (src != srcEnd)
|
|
{
|
|
// The conversion to InputType and then OutputType looks weird, but it is necessary.
|
|
// *src actually returns an ArrayPortalValueReference, which can automatically convert
|
|
// itself to InputType but not necessarily OutputType. Thus, we first convert to
|
|
// InputType, and then allow the conversion to OutputType.
|
|
*dst = static_cast<OutputType>(static_cast<InputType>(*src));
|
|
++src;
|
|
++dst;
|
|
}
|
|
}
|
|
|
|
|
|
template <typename InIter, typename OutIter>
|
|
void DoCopy(InIter src, InIter srcEnd, OutIter dst, std::true_type) const
|
|
{
|
|
std::copy(src, srcEnd, dst);
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range) const
|
|
{
|
|
if (range.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(this->InputPortal);
|
|
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(this->OutputPortal);
|
|
|
|
using InputType = typename InputPortalType::ValueType;
|
|
using OutputType = typename OutputPortalType::ValueType;
|
|
|
|
this->DoCopy(inIter + this->InputOffset + range.begin(),
|
|
inIter + this->InputOffset + range.end(),
|
|
outIter + this->OutputOffset + range.begin(),
|
|
std::is_same<InputType, OutputType>());
|
|
}
|
|
};
|
|
|
|
template <typename InputPortalType, typename OutputPortalType>
|
|
void CopyPortals(const InputPortalType& inPortal,
|
|
const OutputPortalType& outPortal,
|
|
vtkm::Id inOffset,
|
|
vtkm::Id outOffset,
|
|
vtkm::Id numValues)
|
|
{
|
|
using Kernel = CopyBody<InputPortalType, OutputPortalType>;
|
|
Kernel kernel(inPortal, outPortal, inOffset, outOffset);
|
|
::tbb::blocked_range<vtkm::Id> range(0, numValues, TBB_GRAIN_SIZE);
|
|
::tbb::parallel_for(range, kernel);
|
|
}
|
|
|
|
template <typename InputPortalType,
|
|
typename StencilPortalType,
|
|
typename OutputPortalType,
|
|
typename UnaryPredicateType>
|
|
struct CopyIfBody
|
|
{
|
|
using ValueType = typename InputPortalType::ValueType;
|
|
using StencilType = typename StencilPortalType::ValueType;
|
|
|
|
struct Range
|
|
{
|
|
vtkm::Id InputBegin;
|
|
vtkm::Id InputEnd;
|
|
vtkm::Id OutputBegin;
|
|
vtkm::Id OutputEnd;
|
|
|
|
|
|
Range()
|
|
: InputBegin(-1)
|
|
, InputEnd(-1)
|
|
, OutputBegin(-1)
|
|
, OutputEnd(-1)
|
|
{
|
|
}
|
|
|
|
|
|
Range(vtkm::Id inputBegin, vtkm::Id inputEnd, vtkm::Id outputBegin, vtkm::Id outputEnd)
|
|
: InputBegin(inputBegin)
|
|
, InputEnd(inputEnd)
|
|
, OutputBegin(outputBegin)
|
|
, OutputEnd(outputEnd)
|
|
{
|
|
this->AssertSane();
|
|
}
|
|
|
|
|
|
void AssertSane() const
|
|
{
|
|
VTKM_ASSERT("Input begin precedes end" && this->InputBegin <= this->InputEnd);
|
|
VTKM_ASSERT("Output begin precedes end" && this->OutputBegin <= this->OutputEnd);
|
|
VTKM_ASSERT("Output not past input" && this->OutputBegin <= this->InputBegin &&
|
|
this->OutputEnd <= this->InputEnd);
|
|
VTKM_ASSERT("Output smaller than input" &&
|
|
(this->OutputEnd - this->OutputBegin) <= (this->InputEnd - this->InputBegin));
|
|
}
|
|
|
|
|
|
bool IsNext(const Range& next) const { return this->InputEnd == next.InputBegin; }
|
|
};
|
|
|
|
InputPortalType InputPortal;
|
|
StencilPortalType StencilPortal;
|
|
OutputPortalType OutputPortal;
|
|
UnaryPredicateType UnaryPredicate;
|
|
Range Ranges;
|
|
|
|
VTKM_CONT
|
|
CopyIfBody(const InputPortalType& inputPortal,
|
|
const StencilPortalType& stencilPortal,
|
|
const OutputPortalType& outputPortal,
|
|
UnaryPredicateType unaryPredicate)
|
|
: InputPortal(inputPortal)
|
|
, StencilPortal(stencilPortal)
|
|
, OutputPortal(outputPortal)
|
|
, UnaryPredicate(unaryPredicate)
|
|
{
|
|
}
|
|
|
|
|
|
CopyIfBody(const CopyIfBody& body, ::tbb::split)
|
|
: InputPortal(body.InputPortal)
|
|
, StencilPortal(body.StencilPortal)
|
|
, OutputPortal(body.OutputPortal)
|
|
, UnaryPredicate(body.UnaryPredicate)
|
|
{
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range)
|
|
{
|
|
if (range.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
bool firstRun = this->Ranges.OutputBegin < 0; // First use of this body object
|
|
if (firstRun)
|
|
{
|
|
this->Ranges.InputBegin = range.begin();
|
|
}
|
|
else
|
|
{
|
|
// Must be a continuation of the previous input range:
|
|
VTKM_ASSERT(this->Ranges.InputEnd == range.begin());
|
|
}
|
|
this->Ranges.InputEnd = range.end();
|
|
this->Ranges.AssertSane();
|
|
|
|
using InputIteratorsType = vtkm::cont::ArrayPortalToIterators<InputPortalType>;
|
|
using StencilIteratorsType = vtkm::cont::ArrayPortalToIterators<StencilPortalType>;
|
|
using OutputIteratorsType = vtkm::cont::ArrayPortalToIterators<OutputPortalType>;
|
|
|
|
InputIteratorsType inputIters(this->InputPortal);
|
|
StencilIteratorsType stencilIters(this->StencilPortal);
|
|
OutputIteratorsType outputIters(this->OutputPortal);
|
|
|
|
using InputIteratorType = typename InputIteratorsType::IteratorType;
|
|
using StencilIteratorType = typename StencilIteratorsType::IteratorType;
|
|
using OutputIteratorType = typename OutputIteratorsType::IteratorType;
|
|
|
|
InputIteratorType inIter = inputIters.GetBegin();
|
|
StencilIteratorType stencilIter = stencilIters.GetBegin();
|
|
OutputIteratorType outIter = outputIters.GetBegin();
|
|
|
|
vtkm::Id readPos = range.begin();
|
|
const vtkm::Id readEnd = range.end();
|
|
|
|
// Determine output index. If we're reusing the body, pick up where the
|
|
// last block left off. If not, use the input range.
|
|
vtkm::Id writePos;
|
|
if (firstRun)
|
|
{
|
|
this->Ranges.OutputBegin = range.begin();
|
|
this->Ranges.OutputEnd = range.begin();
|
|
writePos = range.begin();
|
|
}
|
|
else
|
|
{
|
|
writePos = this->Ranges.OutputEnd;
|
|
}
|
|
this->Ranges.AssertSane();
|
|
|
|
// We're either writing at the end of a previous block, or at the input
|
|
// location. Either way, the write position will never be greater than
|
|
// the read position.
|
|
VTKM_ASSERT(writePos <= readPos);
|
|
|
|
UnaryPredicateType predicate(this->UnaryPredicate);
|
|
for (; readPos < readEnd; ++readPos)
|
|
{
|
|
if (predicate(stencilIter[readPos]))
|
|
{
|
|
outIter[writePos] = inIter[readPos];
|
|
++writePos;
|
|
}
|
|
}
|
|
|
|
this->Ranges.OutputEnd = writePos;
|
|
}
|
|
|
|
|
|
|
|
void join(const CopyIfBody& rhs)
|
|
{
|
|
using OutputIteratorsType = vtkm::cont::ArrayPortalToIterators<OutputPortalType>;
|
|
using OutputIteratorType = typename OutputIteratorsType::IteratorType;
|
|
|
|
OutputIteratorsType outputIters(this->OutputPortal);
|
|
OutputIteratorType outIter = outputIters.GetBegin();
|
|
|
|
this->Ranges.AssertSane();
|
|
rhs.Ranges.AssertSane();
|
|
// Ensure that we're joining two consecutive subsets of the input:
|
|
VTKM_ASSERT(this->Ranges.IsNext(rhs.Ranges));
|
|
|
|
vtkm::Id srcBegin = rhs.Ranges.OutputBegin;
|
|
const vtkm::Id srcEnd = rhs.Ranges.OutputEnd;
|
|
const vtkm::Id dstBegin = this->Ranges.OutputEnd;
|
|
|
|
// move data:
|
|
if (srcBegin != dstBegin && srcBegin != srcEnd)
|
|
{
|
|
// Sanity check:
|
|
VTKM_ASSERT(srcBegin < srcEnd);
|
|
std::copy(outIter + srcBegin, outIter + srcEnd, outIter + dstBegin);
|
|
}
|
|
|
|
this->Ranges.InputEnd = rhs.Ranges.InputEnd;
|
|
this->Ranges.OutputEnd += srcEnd - srcBegin;
|
|
this->Ranges.AssertSane();
|
|
}
|
|
};
|
|
|
|
|
|
template <typename InputPortalType,
|
|
typename StencilPortalType,
|
|
typename OutputPortalType,
|
|
typename UnaryPredicateType>
|
|
VTKM_CONT vtkm::Id CopyIfPortals(InputPortalType inputPortal,
|
|
StencilPortalType stencilPortal,
|
|
OutputPortalType outputPortal,
|
|
UnaryPredicateType unaryPredicate)
|
|
{
|
|
const vtkm::Id inputLength = inputPortal.GetNumberOfValues();
|
|
VTKM_ASSERT(inputLength == stencilPortal.GetNumberOfValues());
|
|
|
|
if (inputLength == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
CopyIfBody<InputPortalType, StencilPortalType, OutputPortalType, UnaryPredicateType> body(
|
|
inputPortal, stencilPortal, outputPortal, unaryPredicate);
|
|
::tbb::blocked_range<vtkm::Id> range(0, inputLength, TBB_GRAIN_SIZE);
|
|
|
|
::tbb::parallel_reduce(range, body);
|
|
|
|
body.Ranges.AssertSane();
|
|
VTKM_ASSERT(body.Ranges.InputBegin == 0 && body.Ranges.InputEnd == inputLength &&
|
|
body.Ranges.OutputBegin == 0 && body.Ranges.OutputEnd <= inputLength);
|
|
|
|
return body.Ranges.OutputEnd;
|
|
}
|
|
|
|
template <class InputPortalType, class T, class BinaryOperationType>
|
|
struct ReduceBody
|
|
{
|
|
T Sum;
|
|
T InitialValue;
|
|
bool FirstCall;
|
|
InputPortalType InputPortal;
|
|
BinaryOperationType BinaryOperation;
|
|
|
|
VTKM_CONT
|
|
ReduceBody(const InputPortalType& inputPortal,
|
|
T initialValue,
|
|
BinaryOperationType binaryOperation)
|
|
: Sum(vtkm::TypeTraits<T>::ZeroInitialization())
|
|
, InitialValue(initialValue)
|
|
, FirstCall(true)
|
|
, InputPortal(inputPortal)
|
|
, BinaryOperation(binaryOperation)
|
|
{
|
|
}
|
|
|
|
|
|
ReduceBody(const ReduceBody& body, ::tbb::split)
|
|
: Sum(vtkm::TypeTraits<T>::ZeroInitialization())
|
|
, InitialValue(body.InitialValue)
|
|
, FirstCall(true)
|
|
, InputPortal(body.InputPortal)
|
|
, BinaryOperation(body.BinaryOperation)
|
|
{
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range)
|
|
{
|
|
using InputIteratorsType = vtkm::cont::ArrayPortalToIterators<InputPortalType>;
|
|
InputIteratorsType inputIterators(this->InputPortal);
|
|
|
|
//use temp, and iterators instead of member variable to reduce false sharing
|
|
typename InputIteratorsType::IteratorType inIter =
|
|
inputIterators.GetBegin() + static_cast<std::ptrdiff_t>(range.begin());
|
|
|
|
T temp = this->BinaryOperation(*inIter, *(inIter + 1));
|
|
++inIter;
|
|
++inIter;
|
|
for (vtkm::Id index = range.begin() + 2; index != range.end(); ++index, ++inIter)
|
|
{
|
|
temp = this->BinaryOperation(temp, *inIter);
|
|
}
|
|
|
|
//determine if we also have to add the initial value to temp
|
|
if (range.begin() == 0)
|
|
{
|
|
temp = this->BinaryOperation(temp, this->InitialValue);
|
|
}
|
|
|
|
//Now we can save temp back to sum, taking into account if
|
|
//this task has been called before, and the sum value needs
|
|
//to also be reduced.
|
|
if (this->FirstCall)
|
|
{
|
|
this->Sum = temp;
|
|
}
|
|
else
|
|
{
|
|
this->Sum = this->BinaryOperation(this->Sum, temp);
|
|
}
|
|
|
|
this->FirstCall = false;
|
|
}
|
|
|
|
|
|
|
|
void join(const ReduceBody& left)
|
|
{
|
|
// std::cout << "join" << std::endl;
|
|
this->Sum = this->BinaryOperation(left.Sum, this->Sum);
|
|
}
|
|
};
|
|
|
|
|
|
template <class InputPortalType, typename T, class BinaryOperationType>
|
|
VTKM_CONT static auto ReducePortals(InputPortalType inputPortal,
|
|
T initialValue,
|
|
BinaryOperationType binaryOperation)
|
|
-> decltype(binaryOperation(initialValue, inputPortal.Get(0)))
|
|
{
|
|
using ResultType = decltype(binaryOperation(initialValue, inputPortal.Get(0)));
|
|
using WrappedBinaryOp = internal::WrappedBinaryOperator<ResultType, BinaryOperationType>;
|
|
|
|
WrappedBinaryOp wrappedBinaryOp(binaryOperation);
|
|
ReduceBody<InputPortalType, ResultType, WrappedBinaryOp> body(
|
|
inputPortal, initialValue, wrappedBinaryOp);
|
|
vtkm::Id arrayLength = inputPortal.GetNumberOfValues();
|
|
|
|
if (arrayLength > 1)
|
|
{
|
|
::tbb::blocked_range<vtkm::Id> range(0, arrayLength, TBB_GRAIN_SIZE);
|
|
::tbb::parallel_reduce(range, body);
|
|
return body.Sum;
|
|
}
|
|
else if (arrayLength == 1)
|
|
{
|
|
//ReduceBody does not work with an array of size 1.
|
|
return binaryOperation(initialValue, inputPortal.Get(0));
|
|
}
|
|
else // arrayLength == 0
|
|
{
|
|
// ReduceBody does not work with an array of size 0.
|
|
return static_cast<ResultType>(initialValue);
|
|
}
|
|
}
|
|
|
|
// Define this to print out timing information from the reduction and join
|
|
// operations in the tbb ReduceByKey algorithm:
|
|
//#define VTKM_DEBUG_TBB_RBK
|
|
|
|
template <typename KeysInPortalType,
|
|
typename ValuesInPortalType,
|
|
typename KeysOutPortalType,
|
|
typename ValuesOutPortalType,
|
|
class BinaryOperationType>
|
|
struct ReduceByKeyBody
|
|
{
|
|
using KeyType = typename KeysInPortalType::ValueType;
|
|
using ValueType = typename ValuesInPortalType::ValueType;
|
|
|
|
struct Range
|
|
{
|
|
vtkm::Id InputBegin;
|
|
vtkm::Id InputEnd;
|
|
vtkm::Id OutputBegin;
|
|
vtkm::Id OutputEnd;
|
|
|
|
|
|
Range()
|
|
: InputBegin(-1)
|
|
, InputEnd(-1)
|
|
, OutputBegin(-1)
|
|
, OutputEnd(-1)
|
|
{
|
|
}
|
|
|
|
|
|
Range(vtkm::Id inputBegin, vtkm::Id inputEnd, vtkm::Id outputBegin, vtkm::Id outputEnd)
|
|
: InputBegin(inputBegin)
|
|
, InputEnd(inputEnd)
|
|
, OutputBegin(outputBegin)
|
|
, OutputEnd(outputEnd)
|
|
{
|
|
this->AssertSane();
|
|
}
|
|
|
|
|
|
void AssertSane() const
|
|
{
|
|
VTKM_ASSERT("Input begin precedes end" && this->InputBegin <= this->InputEnd);
|
|
VTKM_ASSERT("Output begin precedes end" && this->OutputBegin <= this->OutputEnd);
|
|
VTKM_ASSERT("Output not past input" && this->OutputBegin <= this->InputBegin &&
|
|
this->OutputEnd <= this->InputEnd);
|
|
VTKM_ASSERT("Output smaller than input" &&
|
|
(this->OutputEnd - this->OutputBegin) <= (this->InputEnd - this->InputBegin));
|
|
}
|
|
|
|
|
|
bool IsNext(const Range& next) const { return this->InputEnd == next.InputBegin; }
|
|
};
|
|
|
|
KeysInPortalType KeysInPortal;
|
|
ValuesInPortalType ValuesInPortal;
|
|
KeysOutPortalType KeysOutPortal;
|
|
ValuesOutPortalType ValuesOutPortal;
|
|
BinaryOperationType BinaryOperation;
|
|
Range Ranges;
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
double ReduceTime;
|
|
double JoinTime;
|
|
#endif
|
|
|
|
VTKM_CONT
|
|
ReduceByKeyBody(const KeysInPortalType& keysInPortal,
|
|
const ValuesInPortalType& valuesInPortal,
|
|
const KeysOutPortalType& keysOutPortal,
|
|
const ValuesOutPortalType& valuesOutPortal,
|
|
BinaryOperationType binaryOperation)
|
|
: KeysInPortal(keysInPortal)
|
|
, ValuesInPortal(valuesInPortal)
|
|
, KeysOutPortal(keysOutPortal)
|
|
, ValuesOutPortal(valuesOutPortal)
|
|
, BinaryOperation(binaryOperation)
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
, ReduceTime(0)
|
|
, JoinTime(0)
|
|
#endif
|
|
{
|
|
}
|
|
|
|
|
|
ReduceByKeyBody(const ReduceByKeyBody& body, ::tbb::split)
|
|
: KeysInPortal(body.KeysInPortal)
|
|
, ValuesInPortal(body.ValuesInPortal)
|
|
, KeysOutPortal(body.KeysOutPortal)
|
|
, ValuesOutPortal(body.ValuesOutPortal)
|
|
, BinaryOperation(body.BinaryOperation)
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
, ReduceTime(0)
|
|
, JoinTime(0)
|
|
#endif
|
|
{
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range)
|
|
{
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
::tbb::tick_count startTime = ::tbb::tick_count::now();
|
|
#endif // VTKM_DEBUG_TBB_RBK
|
|
if (range.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
bool firstRun = this->Ranges.OutputBegin < 0; // First use of this body object
|
|
if (firstRun)
|
|
{
|
|
this->Ranges.InputBegin = range.begin();
|
|
}
|
|
else
|
|
{
|
|
// Must be a continuation of the previous input range:
|
|
VTKM_ASSERT(this->Ranges.InputEnd == range.begin());
|
|
}
|
|
this->Ranges.InputEnd = range.end();
|
|
this->Ranges.AssertSane();
|
|
|
|
using KeysInIteratorsType = vtkm::cont::ArrayPortalToIterators<KeysInPortalType>;
|
|
using ValuesInIteratorsType = vtkm::cont::ArrayPortalToIterators<ValuesInPortalType>;
|
|
using KeysOutIteratorsType = vtkm::cont::ArrayPortalToIterators<KeysOutPortalType>;
|
|
using ValuesOutIteratorsType = vtkm::cont::ArrayPortalToIterators<ValuesOutPortalType>;
|
|
|
|
KeysInIteratorsType keysInIters(this->KeysInPortal);
|
|
ValuesInIteratorsType valuesInIters(this->ValuesInPortal);
|
|
KeysOutIteratorsType keysOutIters(this->KeysOutPortal);
|
|
ValuesOutIteratorsType valuesOutIters(this->ValuesOutPortal);
|
|
|
|
using KeysInIteratorType = typename KeysInIteratorsType::IteratorType;
|
|
using ValuesInIteratorType = typename ValuesInIteratorsType::IteratorType;
|
|
using KeysOutIteratorType = typename KeysOutIteratorsType::IteratorType;
|
|
using ValuesOutIteratorType = typename ValuesOutIteratorsType::IteratorType;
|
|
|
|
KeysInIteratorType keysIn = keysInIters.GetBegin();
|
|
ValuesInIteratorType valuesIn = valuesInIters.GetBegin();
|
|
KeysOutIteratorType keysOut = keysOutIters.GetBegin();
|
|
ValuesOutIteratorType valuesOut = valuesOutIters.GetBegin();
|
|
|
|
vtkm::Id readPos = range.begin();
|
|
const vtkm::Id readEnd = range.end();
|
|
|
|
// Determine output index. If we're reusing the body, pick up where the
|
|
// last block left off. If not, use the input range.
|
|
vtkm::Id writePos;
|
|
if (firstRun)
|
|
{
|
|
this->Ranges.OutputBegin = range.begin();
|
|
this->Ranges.OutputEnd = range.begin();
|
|
writePos = range.begin();
|
|
}
|
|
else
|
|
{
|
|
writePos = this->Ranges.OutputEnd;
|
|
}
|
|
this->Ranges.AssertSane();
|
|
|
|
// We're either writing at the end of a previous block, or at the input
|
|
// location. Either way, the write position will never be greater than
|
|
// the read position.
|
|
VTKM_ASSERT(writePos <= readPos);
|
|
|
|
// Initialize reduction variables:
|
|
BinaryOperationType functor(this->BinaryOperation);
|
|
KeyType currentKey = keysIn[readPos];
|
|
ValueType currentValue = valuesIn[readPos];
|
|
++readPos;
|
|
|
|
// If the start of the current range continues a previous key block,
|
|
// initialize with the previous result and decrement the write index.
|
|
VTKM_ASSERT(firstRun || writePos > 0);
|
|
if (!firstRun && keysOut[writePos - 1] == currentKey)
|
|
{
|
|
// Ensure that we'll overwrite the continued key values:
|
|
--writePos;
|
|
|
|
// Update our accumulator with the partial value:
|
|
currentValue = functor(valuesOut[writePos], currentValue);
|
|
}
|
|
|
|
// Special case: single value in range
|
|
if (readPos >= readEnd)
|
|
{
|
|
keysOut[writePos] = currentKey;
|
|
valuesOut[writePos] = currentValue;
|
|
++writePos;
|
|
|
|
this->Ranges.OutputEnd = writePos;
|
|
return;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
while (readPos < readEnd && currentKey == keysIn[readPos])
|
|
{
|
|
currentValue = functor(currentValue, valuesIn[readPos]);
|
|
++readPos;
|
|
}
|
|
|
|
VTKM_ASSERT(writePos <= readPos);
|
|
keysOut[writePos] = currentKey;
|
|
valuesOut[writePos] = currentValue;
|
|
++writePos;
|
|
|
|
if (readPos < readEnd)
|
|
{
|
|
currentKey = keysIn[readPos];
|
|
currentValue = valuesIn[readPos];
|
|
++readPos;
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
this->Ranges.OutputEnd = writePos;
|
|
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
::tbb::tick_count endTime = ::tbb::tick_count::now();
|
|
double time = (endTime - startTime).seconds();
|
|
this->ReduceTime += time;
|
|
std::ostringstream out;
|
|
out << "Reduced " << range.size() << " key/value pairs in " << time << "s. "
|
|
<< "InRange: " << this->Ranges.InputBegin << " " << this->Ranges.InputEnd << " "
|
|
<< "OutRange: " << this->Ranges.OutputBegin << " " << this->Ranges.OutputEnd << "\n";
|
|
std::cerr << out.str();
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
void join(const ReduceByKeyBody& rhs)
|
|
{
|
|
using KeysIteratorsType = vtkm::cont::ArrayPortalToIterators<KeysOutPortalType>;
|
|
using ValuesIteratorsType = vtkm::cont::ArrayPortalToIterators<ValuesOutPortalType>;
|
|
using KeysIteratorType = typename KeysIteratorsType::IteratorType;
|
|
using ValuesIteratorType = typename ValuesIteratorsType::IteratorType;
|
|
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
::tbb::tick_count startTime = ::tbb::tick_count::now();
|
|
#endif
|
|
|
|
this->Ranges.AssertSane();
|
|
rhs.Ranges.AssertSane();
|
|
// Ensure that we're joining two consecutive subsets of the input:
|
|
VTKM_ASSERT(this->Ranges.IsNext(rhs.Ranges));
|
|
|
|
KeysIteratorsType keysIters(this->KeysOutPortal);
|
|
ValuesIteratorsType valuesIters(this->ValuesOutPortal);
|
|
KeysIteratorType keys = keysIters.GetBegin();
|
|
ValuesIteratorType values = valuesIters.GetBegin();
|
|
|
|
const vtkm::Id dstBegin = this->Ranges.OutputEnd;
|
|
const vtkm::Id lastDstIdx = this->Ranges.OutputEnd - 1;
|
|
|
|
vtkm::Id srcBegin = rhs.Ranges.OutputBegin;
|
|
const vtkm::Id srcEnd = rhs.Ranges.OutputEnd;
|
|
|
|
// Merge boundaries if needed:
|
|
if (keys[srcBegin] == keys[lastDstIdx])
|
|
{
|
|
values[lastDstIdx] = this->BinaryOperation(values[lastDstIdx], values[srcBegin]);
|
|
++srcBegin; // Don't copy the key/value we just reduced
|
|
}
|
|
|
|
// move data:
|
|
if (srcBegin != dstBegin && srcBegin != srcEnd)
|
|
{
|
|
// Sanity check:
|
|
VTKM_ASSERT(srcBegin < srcEnd);
|
|
std::copy(keys + srcBegin, keys + srcEnd, keys + dstBegin);
|
|
std::copy(values + srcBegin, values + srcEnd, values + dstBegin);
|
|
}
|
|
|
|
this->Ranges.InputEnd = rhs.Ranges.InputEnd;
|
|
this->Ranges.OutputEnd += srcEnd - srcBegin;
|
|
this->Ranges.AssertSane();
|
|
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
::tbb::tick_count endTime = ::tbb::tick_count::now();
|
|
double time = (endTime - startTime).seconds();
|
|
this->JoinTime += rhs.JoinTime + time;
|
|
std::ostringstream out;
|
|
out << "Joined " << (srcEnd - srcBegin) << " rhs values into body in " << time << "s. "
|
|
<< "InRange: " << this->Ranges.InputBegin << " " << this->Ranges.InputEnd << " "
|
|
<< "OutRange: " << this->Ranges.OutputBegin << " " << this->Ranges.OutputEnd << "\n";
|
|
std::cerr << out.str();
|
|
#endif
|
|
}
|
|
};
|
|
|
|
|
|
template <typename KeysInPortalType,
|
|
typename ValuesInPortalType,
|
|
typename KeysOutPortalType,
|
|
typename ValuesOutPortalType,
|
|
typename BinaryOperationType>
|
|
VTKM_CONT vtkm::Id ReduceByKeyPortals(KeysInPortalType keysInPortal,
|
|
ValuesInPortalType valuesInPortal,
|
|
KeysOutPortalType keysOutPortal,
|
|
ValuesOutPortalType valuesOutPortal,
|
|
BinaryOperationType binaryOperation)
|
|
{
|
|
const vtkm::Id inputLength = keysInPortal.GetNumberOfValues();
|
|
VTKM_ASSERT(inputLength == valuesInPortal.GetNumberOfValues());
|
|
|
|
if (inputLength == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
using ValueType = typename ValuesInPortalType::ValueType;
|
|
using WrappedBinaryOp = internal::WrappedBinaryOperator<ValueType, BinaryOperationType>;
|
|
WrappedBinaryOp wrappedBinaryOp(binaryOperation);
|
|
|
|
ReduceByKeyBody<KeysInPortalType,
|
|
ValuesInPortalType,
|
|
KeysOutPortalType,
|
|
ValuesOutPortalType,
|
|
WrappedBinaryOp>
|
|
body(keysInPortal, valuesInPortal, keysOutPortal, valuesOutPortal, wrappedBinaryOp);
|
|
::tbb::blocked_range<vtkm::Id> range(0, inputLength, TBB_GRAIN_SIZE);
|
|
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
std::cerr << "\n\nTBB ReduceByKey:\n";
|
|
#endif
|
|
|
|
::tbb::parallel_reduce(range, body);
|
|
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
std::cerr << "Total reduce time: " << body.ReduceTime << "s\n";
|
|
std::cerr << "Total join time: " << body.JoinTime << "s\n";
|
|
std::cerr << "\nend\n";
|
|
#endif
|
|
|
|
body.Ranges.AssertSane();
|
|
VTKM_ASSERT(body.Ranges.InputBegin == 0 && body.Ranges.InputEnd == inputLength &&
|
|
body.Ranges.OutputBegin == 0 && body.Ranges.OutputEnd <= inputLength);
|
|
|
|
return body.Ranges.OutputEnd;
|
|
}
|
|
|
|
#ifdef VTKM_DEBUG_TBB_RBK
|
|
#undef VTKM_DEBUG_TBB_RBK
|
|
#endif
|
|
|
|
template <class InputPortalType, class OutputPortalType, class BinaryOperationType>
|
|
struct ScanInclusiveBody
|
|
{
|
|
using ValueType = typename std::remove_reference<typename OutputPortalType::ValueType>::type;
|
|
ValueType Sum;
|
|
bool FirstCall;
|
|
InputPortalType InputPortal;
|
|
OutputPortalType OutputPortal;
|
|
BinaryOperationType BinaryOperation;
|
|
|
|
VTKM_CONT
|
|
ScanInclusiveBody(const InputPortalType& inputPortal,
|
|
const OutputPortalType& outputPortal,
|
|
BinaryOperationType binaryOperation)
|
|
: Sum(vtkm::TypeTraits<ValueType>::ZeroInitialization())
|
|
, FirstCall(true)
|
|
, InputPortal(inputPortal)
|
|
, OutputPortal(outputPortal)
|
|
, BinaryOperation(binaryOperation)
|
|
{
|
|
}
|
|
|
|
|
|
ScanInclusiveBody(const ScanInclusiveBody& body, ::tbb::split)
|
|
: Sum(vtkm::TypeTraits<ValueType>::ZeroInitialization())
|
|
, FirstCall(true)
|
|
, InputPortal(body.InputPortal)
|
|
, OutputPortal(body.OutputPortal)
|
|
, BinaryOperation(body.BinaryOperation)
|
|
{
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range, ::tbb::pre_scan_tag)
|
|
{
|
|
using InputIteratorsType = vtkm::cont::ArrayPortalToIterators<InputPortalType>;
|
|
InputIteratorsType inputIterators(this->InputPortal);
|
|
|
|
//use temp, and iterators instead of member variable to reduce false sharing
|
|
typename InputIteratorsType::IteratorType inIter =
|
|
inputIterators.GetBegin() + static_cast<std::ptrdiff_t>(range.begin());
|
|
ValueType temp = this->FirstCall ? *inIter++ : this->BinaryOperation(this->Sum, *inIter++);
|
|
this->FirstCall = false;
|
|
for (vtkm::Id index = range.begin() + 1; index != range.end(); ++index, ++inIter)
|
|
{
|
|
temp = this->BinaryOperation(temp, *inIter);
|
|
}
|
|
this->Sum = temp;
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range, ::tbb::final_scan_tag)
|
|
{
|
|
using InputIteratorsType = vtkm::cont::ArrayPortalToIterators<InputPortalType>;
|
|
using OutputIteratorsType = vtkm::cont::ArrayPortalToIterators<OutputPortalType>;
|
|
|
|
InputIteratorsType inputIterators(this->InputPortal);
|
|
OutputIteratorsType outputIterators(this->OutputPortal);
|
|
|
|
//use temp, and iterators instead of member variable to reduce false sharing
|
|
typename InputIteratorsType::IteratorType inIter =
|
|
inputIterators.GetBegin() + static_cast<std::ptrdiff_t>(range.begin());
|
|
typename OutputIteratorsType::IteratorType outIter =
|
|
outputIterators.GetBegin() + static_cast<std::ptrdiff_t>(range.begin());
|
|
ValueType temp = this->FirstCall ? *inIter++ : this->BinaryOperation(this->Sum, *inIter++);
|
|
this->FirstCall = false;
|
|
*outIter++ = temp;
|
|
for (vtkm::Id index = range.begin() + 1; index != range.end(); ++index, ++inIter, ++outIter)
|
|
{
|
|
*outIter = temp = this->BinaryOperation(temp, *inIter);
|
|
}
|
|
this->Sum = temp;
|
|
}
|
|
|
|
|
|
|
|
void reverse_join(const ScanInclusiveBody& left)
|
|
{
|
|
this->Sum = this->BinaryOperation(left.Sum, this->Sum);
|
|
}
|
|
|
|
|
|
|
|
void assign(const ScanInclusiveBody& src) { this->Sum = src.Sum; }
|
|
};
|
|
|
|
template <class InputPortalType, class OutputPortalType, class BinaryOperationType>
|
|
struct ScanExclusiveBody
|
|
{
|
|
using ValueType = typename std::remove_reference<typename OutputPortalType::ValueType>::type;
|
|
|
|
ValueType Sum;
|
|
bool FirstCall;
|
|
InputPortalType InputPortal;
|
|
OutputPortalType OutputPortal;
|
|
BinaryOperationType BinaryOperation;
|
|
|
|
VTKM_CONT
|
|
ScanExclusiveBody(const InputPortalType& inputPortal,
|
|
const OutputPortalType& outputPortal,
|
|
BinaryOperationType binaryOperation,
|
|
const ValueType& initialValue)
|
|
: Sum(initialValue)
|
|
, FirstCall(true)
|
|
, InputPortal(inputPortal)
|
|
, OutputPortal(outputPortal)
|
|
, BinaryOperation(binaryOperation)
|
|
{
|
|
}
|
|
|
|
|
|
ScanExclusiveBody(const ScanExclusiveBody& body, ::tbb::split)
|
|
: Sum(body.Sum)
|
|
, FirstCall(true)
|
|
, InputPortal(body.InputPortal)
|
|
, OutputPortal(body.OutputPortal)
|
|
, BinaryOperation(body.BinaryOperation)
|
|
{
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range, ::tbb::pre_scan_tag)
|
|
{
|
|
using InputIteratorsType = vtkm::cont::ArrayPortalToIterators<InputPortalType>;
|
|
InputIteratorsType inputIterators(this->InputPortal);
|
|
|
|
//move the iterator to the first item
|
|
typename InputIteratorsType::IteratorType iter =
|
|
inputIterators.GetBegin() + static_cast<std::ptrdiff_t>(range.begin());
|
|
|
|
ValueType temp = *iter;
|
|
++iter;
|
|
if (!(this->FirstCall && range.begin() > 0))
|
|
{
|
|
temp = this->BinaryOperation(this->Sum, temp);
|
|
}
|
|
for (vtkm::Id index = range.begin() + 1; index != range.end(); ++index, ++iter)
|
|
{
|
|
temp = this->BinaryOperation(temp, *iter);
|
|
}
|
|
this->Sum = temp;
|
|
this->FirstCall = false;
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range, ::tbb::final_scan_tag)
|
|
{
|
|
using InputIteratorsType = vtkm::cont::ArrayPortalToIterators<InputPortalType>;
|
|
using OutputIteratorsType = vtkm::cont::ArrayPortalToIterators<OutputPortalType>;
|
|
|
|
InputIteratorsType inputIterators(this->InputPortal);
|
|
OutputIteratorsType outputIterators(this->OutputPortal);
|
|
|
|
//move the iterators to the first item
|
|
typename InputIteratorsType::IteratorType inIter =
|
|
inputIterators.GetBegin() + static_cast<std::ptrdiff_t>(range.begin());
|
|
typename OutputIteratorsType::IteratorType outIter =
|
|
outputIterators.GetBegin() + static_cast<std::ptrdiff_t>(range.begin());
|
|
|
|
ValueType temp = this->Sum;
|
|
for (vtkm::Id index = range.begin(); index != range.end(); ++index, ++inIter, ++outIter)
|
|
{
|
|
//copy into a local reference since Input and Output portal
|
|
//could point to the same memory location
|
|
ValueType v = *inIter;
|
|
*outIter = temp;
|
|
temp = this->BinaryOperation(temp, v);
|
|
}
|
|
this->Sum = temp;
|
|
this->FirstCall = false;
|
|
}
|
|
|
|
|
|
|
|
void reverse_join(const ScanExclusiveBody& left)
|
|
{
|
|
//The contract we have with TBB is that they will only join
|
|
//two objects that have been scanned, or two objects which
|
|
//haven't been scanned
|
|
VTKM_ASSERT(left.FirstCall == this->FirstCall);
|
|
if (!left.FirstCall && !this->FirstCall)
|
|
{
|
|
this->Sum = this->BinaryOperation(left.Sum, this->Sum);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void assign(const ScanExclusiveBody& src) { this->Sum = src.Sum; }
|
|
};
|
|
|
|
|
|
template <class InputPortalType, class OutputPortalType, class BinaryOperationType>
|
|
VTKM_CONT static typename std::remove_reference<typename OutputPortalType::ValueType>::type
|
|
ScanInclusivePortals(InputPortalType inputPortal,
|
|
OutputPortalType outputPortal,
|
|
BinaryOperationType binaryOperation)
|
|
{
|
|
using ValueType = typename std::remove_reference<typename OutputPortalType::ValueType>::type;
|
|
|
|
using WrappedBinaryOp = internal::WrappedBinaryOperator<ValueType, BinaryOperationType>;
|
|
|
|
WrappedBinaryOp wrappedBinaryOp(binaryOperation);
|
|
ScanInclusiveBody<InputPortalType, OutputPortalType, WrappedBinaryOp> body(
|
|
inputPortal, outputPortal, wrappedBinaryOp);
|
|
vtkm::Id arrayLength = inputPortal.GetNumberOfValues();
|
|
|
|
::tbb::blocked_range<vtkm::Id> range(0, arrayLength, TBB_GRAIN_SIZE);
|
|
::tbb::parallel_scan(range, body);
|
|
return body.Sum;
|
|
}
|
|
|
|
|
|
template <class InputPortalType, class OutputPortalType, class BinaryOperationType>
|
|
VTKM_CONT static typename std::remove_reference<typename OutputPortalType::ValueType>::type
|
|
ScanExclusivePortals(
|
|
InputPortalType inputPortal,
|
|
OutputPortalType outputPortal,
|
|
BinaryOperationType binaryOperation,
|
|
typename std::remove_reference<typename OutputPortalType::ValueType>::type initialValue)
|
|
{
|
|
using ValueType = typename std::remove_reference<typename OutputPortalType::ValueType>::type;
|
|
|
|
using WrappedBinaryOp = internal::WrappedBinaryOperator<ValueType, BinaryOperationType>;
|
|
|
|
WrappedBinaryOp wrappedBinaryOp(binaryOperation);
|
|
ScanExclusiveBody<InputPortalType, OutputPortalType, WrappedBinaryOp> body(
|
|
inputPortal, outputPortal, wrappedBinaryOp, initialValue);
|
|
vtkm::Id arrayLength = inputPortal.GetNumberOfValues();
|
|
|
|
::tbb::blocked_range<vtkm::Id> range(0, arrayLength, TBB_GRAIN_SIZE);
|
|
::tbb::parallel_scan(range, body);
|
|
|
|
// Seems a little weird to me that we would return the last value in the
|
|
// array rather than the sum, but that is how the function is specified.
|
|
return body.Sum;
|
|
}
|
|
|
|
template <typename InputPortalType, typename IndexPortalType, typename OutputPortalType>
|
|
class ScatterKernel
|
|
{
|
|
public:
|
|
VTKM_CONT ScatterKernel(InputPortalType inputPortal,
|
|
IndexPortalType indexPortal,
|
|
OutputPortalType outputPortal)
|
|
: ValuesPortal(inputPortal)
|
|
, IndexPortal(indexPortal)
|
|
, OutputPortal(outputPortal)
|
|
{
|
|
}
|
|
|
|
VTKM_CONT
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range) const
|
|
{
|
|
// The TBB device adapter causes array classes to be shared between
|
|
// control and execution environment. This means that it is possible for
|
|
// an exception to be thrown even though this is typically not allowed.
|
|
// Throwing an exception from here is bad because there are several
|
|
// simultaneous threads running. Get around the problem by catching the
|
|
// error and setting the message buffer as expected.
|
|
try
|
|
{
|
|
VTKM_VECTORIZATION_PRE_LOOP
|
|
for (vtkm::Id i = range.begin(); i < range.end(); i++)
|
|
{
|
|
VTKM_VECTORIZATION_IN_LOOP
|
|
OutputPortal.Set(i, ValuesPortal.Get(IndexPortal.Get(i)));
|
|
}
|
|
}
|
|
catch (vtkm::cont::Error& error)
|
|
{
|
|
this->ErrorMessage.RaiseError(error.GetMessage().c_str());
|
|
}
|
|
catch (...)
|
|
{
|
|
this->ErrorMessage.RaiseError("Unexpected error in execution environment.");
|
|
}
|
|
}
|
|
|
|
private:
|
|
InputPortalType ValuesPortal;
|
|
IndexPortalType IndexPortal;
|
|
OutputPortalType OutputPortal;
|
|
vtkm::exec::internal::ErrorMessageBuffer ErrorMessage;
|
|
};
|
|
|
|
|
|
template <typename InputPortalType, typename IndexPortalType, typename OutputPortalType>
|
|
VTKM_CONT static void ScatterPortal(InputPortalType inputPortal,
|
|
IndexPortalType indexPortal,
|
|
OutputPortalType outputPortal)
|
|
{
|
|
const vtkm::Id size = inputPortal.GetNumberOfValues();
|
|
VTKM_ASSERT(size == indexPortal.GetNumberOfValues());
|
|
|
|
ScatterKernel<InputPortalType, IndexPortalType, OutputPortalType> scatter(
|
|
inputPortal, indexPortal, outputPortal);
|
|
|
|
::tbb::blocked_range<vtkm::Id> range(0, size, TBB_GRAIN_SIZE);
|
|
::tbb::parallel_for(range, scatter);
|
|
}
|
|
|
|
template <typename PortalType, typename BinaryOperationType>
|
|
struct UniqueBody
|
|
{
|
|
using ValueType = typename PortalType::ValueType;
|
|
|
|
struct Range
|
|
{
|
|
vtkm::Id InputBegin;
|
|
vtkm::Id InputEnd;
|
|
vtkm::Id OutputBegin;
|
|
vtkm::Id OutputEnd;
|
|
|
|
|
|
|
|
Range()
|
|
: InputBegin(-1)
|
|
, InputEnd(-1)
|
|
, OutputBegin(-1)
|
|
, OutputEnd(-1)
|
|
{
|
|
}
|
|
|
|
|
|
|
|
Range(vtkm::Id inputBegin, vtkm::Id inputEnd, vtkm::Id outputBegin, vtkm::Id outputEnd)
|
|
: InputBegin(inputBegin)
|
|
, InputEnd(inputEnd)
|
|
, OutputBegin(outputBegin)
|
|
, OutputEnd(outputEnd)
|
|
{
|
|
this->AssertSane();
|
|
}
|
|
|
|
|
|
|
|
void AssertSane() const
|
|
{
|
|
VTKM_ASSERT("Input begin precedes end" && this->InputBegin <= this->InputEnd);
|
|
VTKM_ASSERT("Output begin precedes end" && this->OutputBegin <= this->OutputEnd);
|
|
VTKM_ASSERT("Output not past input" && this->OutputBegin <= this->InputBegin &&
|
|
this->OutputEnd <= this->InputEnd);
|
|
VTKM_ASSERT("Output smaller than input" &&
|
|
(this->OutputEnd - this->OutputBegin) <= (this->InputEnd - this->InputBegin));
|
|
}
|
|
|
|
|
|
|
|
bool IsNext(const Range& next) const { return this->InputEnd == next.InputBegin; }
|
|
};
|
|
|
|
PortalType Portal;
|
|
BinaryOperationType BinaryOperation;
|
|
Range Ranges;
|
|
|
|
VTKM_CONT
|
|
UniqueBody(const PortalType& portal, BinaryOperationType binaryOperation)
|
|
: Portal(portal)
|
|
, BinaryOperation(binaryOperation)
|
|
{
|
|
}
|
|
|
|
VTKM_CONT
|
|
UniqueBody(const UniqueBody& body, ::tbb::split)
|
|
: Portal(body.Portal)
|
|
, BinaryOperation(body.BinaryOperation)
|
|
{
|
|
}
|
|
|
|
|
|
|
|
void operator()(const ::tbb::blocked_range<vtkm::Id>& range)
|
|
{
|
|
if (range.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
bool firstRun = this->Ranges.OutputBegin < 0; // First use of this body object
|
|
if (firstRun)
|
|
{
|
|
this->Ranges.InputBegin = range.begin();
|
|
}
|
|
else
|
|
{
|
|
// Must be a continuation of the previous input range:
|
|
VTKM_ASSERT(this->Ranges.InputEnd == range.begin());
|
|
}
|
|
this->Ranges.InputEnd = range.end();
|
|
this->Ranges.AssertSane();
|
|
|
|
using IteratorsType = vtkm::cont::ArrayPortalToIterators<PortalType>;
|
|
using IteratorType = typename IteratorsType::IteratorType;
|
|
|
|
IteratorsType iters(this->Portal);
|
|
IteratorType data = iters.GetBegin();
|
|
|
|
vtkm::Id readPos = range.begin();
|
|
const vtkm::Id readEnd = range.end();
|
|
|
|
// Determine output index. If we're reusing the body, pick up where the
|
|
// last block left off. If not, use the input range.
|
|
vtkm::Id writePos;
|
|
if (firstRun)
|
|
{
|
|
this->Ranges.OutputBegin = range.begin();
|
|
this->Ranges.OutputEnd = range.begin();
|
|
writePos = range.begin();
|
|
}
|
|
else
|
|
{
|
|
writePos = this->Ranges.OutputEnd;
|
|
}
|
|
this->Ranges.AssertSane();
|
|
|
|
// We're either writing at the end of a previous block, or at the input
|
|
// location. Either way, the write position will never be greater than
|
|
// the read position.
|
|
VTKM_ASSERT(writePos <= readPos);
|
|
|
|
// Initialize loop variables:
|
|
BinaryOperationType functor(this->BinaryOperation);
|
|
ValueType current = data[readPos];
|
|
++readPos;
|
|
|
|
// If the start of the current range continues a previous block of
|
|
// identical elements, initialize with the previous result and decrement
|
|
// the write index.
|
|
VTKM_ASSERT(firstRun || writePos > 0);
|
|
if (!firstRun && functor(data[writePos - 1], current))
|
|
{
|
|
// Ensure that we'll overwrite the duplicate value:
|
|
--writePos;
|
|
|
|
// Copy the last data value into current. TestingDeviceAdapter's test
|
|
// using the FuseAll functor requires that the first value in a set of
|
|
// duplicates must be used, and this ensures that condition is met.
|
|
current = data[writePos];
|
|
}
|
|
|
|
// Special case: single value in range
|
|
if (readPos >= readEnd)
|
|
{
|
|
data[writePos] = current;
|
|
++writePos;
|
|
|
|
this->Ranges.OutputEnd = writePos;
|
|
return;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
// Advance readPos until the value changes
|
|
while (readPos < readEnd && functor(current, data[readPos]))
|
|
{
|
|
++readPos;
|
|
}
|
|
|
|
// Write out the unique value
|
|
VTKM_ASSERT(writePos <= readPos);
|
|
data[writePos] = current;
|
|
++writePos;
|
|
|
|
// Update the current value if there are more entries
|
|
if (readPos < readEnd)
|
|
{
|
|
current = data[readPos];
|
|
++readPos;
|
|
continue;
|
|
}
|
|
|
|
// Input range is exhausted if we reach this point.
|
|
break;
|
|
}
|
|
|
|
this->Ranges.OutputEnd = writePos;
|
|
}
|
|
|
|
|
|
|
|
void join(const UniqueBody& rhs)
|
|
{
|
|
using IteratorsType = vtkm::cont::ArrayPortalToIterators<PortalType>;
|
|
using IteratorType = typename IteratorsType::IteratorType;
|
|
|
|
this->Ranges.AssertSane();
|
|
rhs.Ranges.AssertSane();
|
|
|
|
// Ensure that we're joining two consecutive subsets of the input:
|
|
VTKM_ASSERT(this->Ranges.IsNext(rhs.Ranges));
|
|
|
|
IteratorsType iters(this->Portal);
|
|
IteratorType data = iters.GetBegin();
|
|
BinaryOperationType functor(this->BinaryOperation);
|
|
|
|
const vtkm::Id dstBegin = this->Ranges.OutputEnd;
|
|
const vtkm::Id lastDstIdx = this->Ranges.OutputEnd - 1;
|
|
|
|
vtkm::Id srcBegin = rhs.Ranges.OutputBegin;
|
|
const vtkm::Id srcEnd = rhs.Ranges.OutputEnd;
|
|
|
|
// Merge boundaries if needed:
|
|
if (functor(data[srcBegin], data[lastDstIdx]))
|
|
{
|
|
++srcBegin; // Don't copy the duplicate value
|
|
}
|
|
|
|
// move data:
|
|
if (srcBegin != dstBegin && srcBegin != srcEnd)
|
|
{
|
|
// Sanity check:
|
|
VTKM_ASSERT(srcBegin < srcEnd);
|
|
std::copy(data + srcBegin, data + srcEnd, data + dstBegin);
|
|
}
|
|
|
|
this->Ranges.InputEnd = rhs.Ranges.InputEnd;
|
|
this->Ranges.OutputEnd += srcEnd - srcBegin;
|
|
this->Ranges.AssertSane();
|
|
}
|
|
};
|
|
|
|
|
|
template <typename PortalType, typename BinaryOperationType>
|
|
VTKM_CONT vtkm::Id UniquePortals(PortalType portal, BinaryOperationType binaryOperation)
|
|
{
|
|
const vtkm::Id inputLength = portal.GetNumberOfValues();
|
|
if (inputLength == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
using WrappedBinaryOp = internal::WrappedBinaryOperator<bool, BinaryOperationType>;
|
|
WrappedBinaryOp wrappedBinaryOp(binaryOperation);
|
|
|
|
UniqueBody<PortalType, WrappedBinaryOp> body(portal, wrappedBinaryOp);
|
|
::tbb::blocked_range<vtkm::Id> range(0, inputLength, TBB_GRAIN_SIZE);
|
|
|
|
::tbb::parallel_reduce(range, body);
|
|
|
|
body.Ranges.AssertSane();
|
|
VTKM_ASSERT(body.Ranges.InputBegin == 0 && body.Ranges.InputEnd == inputLength &&
|
|
body.Ranges.OutputBegin == 0 && body.Ranges.OutputEnd <= inputLength);
|
|
|
|
return body.Ranges.OutputEnd;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif //vtk_m_cont_tbb_internal_FunctorsTBB_h
|