//============================================================================ // Copyright (c) Kitware, Inc. // All rights reserved. // See LICENSE.txt for details. // // This software is distributed WITHOUT ANY WARRANTY; without even // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. See the above copyright notice for more information. //============================================================================ #ifndef vtk_m_cont_tbb_internal_FunctorsTBB_h #define vtk_m_cont_tbb_internal_FunctorsTBB_h #include #include #include #include #include #include #include #include #include #include VTKM_THIRDPARTY_PRE_INCLUDE #if defined(VTKM_MSVC) // TBB's header include a #pragma comment(lib,"tbb.lib") line to make all // consuming libraries link to tbb, this is bad behavior in a header // based project #pragma push_macro("__TBB_NO_IMPLICITLINKAGE") #define __TBB_NO_IMPLICIT_LINKAGE 1 #endif // defined(VTKM_MSVC) // TBB includes windows.h, so instead we want to include windows.h with the // correct settings so that we don't clobber any existing function #include #include #if (TBB_VERSION_MAJOR == 4) && (TBB_VERSION_MINOR == 2) //we provide an patched implementation of tbb parallel_sort //that fixes ADL for std::swap. This patch has been submitted to Intel //and is fixed in TBB 4.2 update 2. #include #else #include #endif #include #include #include #include #include #include #include #include #if defined(VTKM_MSVC) #pragma pop_macro("__TBB_NO_IMPLICITLINKAGE") #endif VTKM_THIRDPARTY_POST_INCLUDE namespace vtkm { namespace cont { namespace tbb { namespace internal { template using WrappedBinaryOperator = vtkm::cont::internal::WrappedBinaryOperator; } // The "grain size" of scheduling with TBB. Not a lot of thought has gone // into picking this size. static constexpr vtkm::Id TBB_GRAIN_SIZE = 1024; template struct CopyBody { InputPortalType InputPortal; OutputPortalType OutputPortal; vtkm::Id InputOffset; vtkm::Id OutputOffset; CopyBody(const InputPortalType& inPortal, const OutputPortalType& outPortal, vtkm::Id inOffset, vtkm::Id outOffset) : InputPortal(inPortal) , OutputPortal(outPortal) , InputOffset(inOffset) , OutputOffset(outOffset) { } // MSVC likes complain about narrowing type conversions in std::copy and // provides no reasonable way to disable the warning. As a work-around, this // template calls std::copy if and only if the types match, otherwise falls // back to a iterative casting approach. Since std::copy can only really // optimize same-type copies, this shouldn't affect performance. template void DoCopy(InIter src, InIter srcEnd, OutIter dst, std::false_type) const { using InputType = typename InputPortalType::ValueType; using OutputType = typename OutputPortalType::ValueType; while (src != srcEnd) { // The conversion to InputType and then OutputType looks weird, but it is necessary. // *src actually returns an ArrayPortalValueReference, which can automatically convert // itself to InputType but not necessarily OutputType. Thus, we first convert to // InputType, and then allow the conversion to OutputType. *dst = static_cast(static_cast(*src)); ++src; ++dst; } } template void DoCopy(InIter src, InIter srcEnd, OutIter dst, std::true_type) const { std::copy(src, srcEnd, dst); } void operator()(const ::tbb::blocked_range& range) const { if (range.empty()) { return; } auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(this->InputPortal); auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(this->OutputPortal); using InputType = typename InputPortalType::ValueType; using OutputType = typename OutputPortalType::ValueType; this->DoCopy(inIter + this->InputOffset + range.begin(), inIter + this->InputOffset + range.end(), outIter + this->OutputOffset + range.begin(), std::is_same()); } }; template void CopyPortals(const InputPortalType& inPortal, const OutputPortalType& outPortal, vtkm::Id inOffset, vtkm::Id outOffset, vtkm::Id numValues) { using Kernel = CopyBody; Kernel kernel(inPortal, outPortal, inOffset, outOffset); ::tbb::blocked_range range(0, numValues, TBB_GRAIN_SIZE); ::tbb::parallel_for(range, kernel); } template struct CopyIfBody { using ValueType = typename InputPortalType::ValueType; using StencilType = typename StencilPortalType::ValueType; struct Range { vtkm::Id InputBegin; vtkm::Id InputEnd; vtkm::Id OutputBegin; vtkm::Id OutputEnd; Range() : InputBegin(-1) , InputEnd(-1) , OutputBegin(-1) , OutputEnd(-1) { } Range(vtkm::Id inputBegin, vtkm::Id inputEnd, vtkm::Id outputBegin, vtkm::Id outputEnd) : InputBegin(inputBegin) , InputEnd(inputEnd) , OutputBegin(outputBegin) , OutputEnd(outputEnd) { this->AssertSane(); } void AssertSane() const { VTKM_ASSERT("Input begin precedes end" && this->InputBegin <= this->InputEnd); VTKM_ASSERT("Output begin precedes end" && this->OutputBegin <= this->OutputEnd); VTKM_ASSERT("Output not past input" && this->OutputBegin <= this->InputBegin && this->OutputEnd <= this->InputEnd); VTKM_ASSERT("Output smaller than input" && (this->OutputEnd - this->OutputBegin) <= (this->InputEnd - this->InputBegin)); } bool IsNext(const Range& next) const { return this->InputEnd == next.InputBegin; } }; InputPortalType InputPortal; StencilPortalType StencilPortal; OutputPortalType OutputPortal; UnaryPredicateType UnaryPredicate; Range Ranges; VTKM_CONT CopyIfBody(const InputPortalType& inputPortal, const StencilPortalType& stencilPortal, const OutputPortalType& outputPortal, UnaryPredicateType unaryPredicate) : InputPortal(inputPortal) , StencilPortal(stencilPortal) , OutputPortal(outputPortal) , UnaryPredicate(unaryPredicate) { } CopyIfBody(const CopyIfBody& body, ::tbb::split) : InputPortal(body.InputPortal) , StencilPortal(body.StencilPortal) , OutputPortal(body.OutputPortal) , UnaryPredicate(body.UnaryPredicate) { } void operator()(const ::tbb::blocked_range& range) { if (range.empty()) { return; } bool firstRun = this->Ranges.OutputBegin < 0; // First use of this body object if (firstRun) { this->Ranges.InputBegin = range.begin(); } else { // Must be a continuation of the previous input range: VTKM_ASSERT(this->Ranges.InputEnd == range.begin()); } this->Ranges.InputEnd = range.end(); this->Ranges.AssertSane(); using InputIteratorsType = vtkm::cont::ArrayPortalToIterators; using StencilIteratorsType = vtkm::cont::ArrayPortalToIterators; using OutputIteratorsType = vtkm::cont::ArrayPortalToIterators; InputIteratorsType inputIters(this->InputPortal); StencilIteratorsType stencilIters(this->StencilPortal); OutputIteratorsType outputIters(this->OutputPortal); using InputIteratorType = typename InputIteratorsType::IteratorType; using StencilIteratorType = typename StencilIteratorsType::IteratorType; using OutputIteratorType = typename OutputIteratorsType::IteratorType; InputIteratorType inIter = inputIters.GetBegin(); StencilIteratorType stencilIter = stencilIters.GetBegin(); OutputIteratorType outIter = outputIters.GetBegin(); vtkm::Id readPos = range.begin(); const vtkm::Id readEnd = range.end(); // Determine output index. If we're reusing the body, pick up where the // last block left off. If not, use the input range. vtkm::Id writePos; if (firstRun) { this->Ranges.OutputBegin = range.begin(); this->Ranges.OutputEnd = range.begin(); writePos = range.begin(); } else { writePos = this->Ranges.OutputEnd; } this->Ranges.AssertSane(); // We're either writing at the end of a previous block, or at the input // location. Either way, the write position will never be greater than // the read position. VTKM_ASSERT(writePos <= readPos); UnaryPredicateType predicate(this->UnaryPredicate); for (; readPos < readEnd; ++readPos) { if (predicate(stencilIter[readPos])) { outIter[writePos] = inIter[readPos]; ++writePos; } } this->Ranges.OutputEnd = writePos; } void join(const CopyIfBody& rhs) { using OutputIteratorsType = vtkm::cont::ArrayPortalToIterators; using OutputIteratorType = typename OutputIteratorsType::IteratorType; OutputIteratorsType outputIters(this->OutputPortal); OutputIteratorType outIter = outputIters.GetBegin(); this->Ranges.AssertSane(); rhs.Ranges.AssertSane(); // Ensure that we're joining two consecutive subsets of the input: VTKM_ASSERT(this->Ranges.IsNext(rhs.Ranges)); vtkm::Id srcBegin = rhs.Ranges.OutputBegin; const vtkm::Id srcEnd = rhs.Ranges.OutputEnd; const vtkm::Id dstBegin = this->Ranges.OutputEnd; // move data: if (srcBegin != dstBegin && srcBegin != srcEnd) { // Sanity check: VTKM_ASSERT(srcBegin < srcEnd); std::copy(outIter + srcBegin, outIter + srcEnd, outIter + dstBegin); } this->Ranges.InputEnd = rhs.Ranges.InputEnd; this->Ranges.OutputEnd += srcEnd - srcBegin; this->Ranges.AssertSane(); } }; template VTKM_CONT vtkm::Id CopyIfPortals(InputPortalType inputPortal, StencilPortalType stencilPortal, OutputPortalType outputPortal, UnaryPredicateType unaryPredicate) { const vtkm::Id inputLength = inputPortal.GetNumberOfValues(); VTKM_ASSERT(inputLength == stencilPortal.GetNumberOfValues()); if (inputLength == 0) { return 0; } CopyIfBody body( inputPortal, stencilPortal, outputPortal, unaryPredicate); ::tbb::blocked_range range(0, inputLength, TBB_GRAIN_SIZE); ::tbb::parallel_reduce(range, body); body.Ranges.AssertSane(); VTKM_ASSERT(body.Ranges.InputBegin == 0 && body.Ranges.InputEnd == inputLength && body.Ranges.OutputBegin == 0 && body.Ranges.OutputEnd <= inputLength); return body.Ranges.OutputEnd; } template struct ReduceBody { T Sum; T InitialValue; bool FirstCall; InputPortalType InputPortal; BinaryOperationType BinaryOperation; VTKM_CONT ReduceBody(const InputPortalType& inputPortal, T initialValue, BinaryOperationType binaryOperation) : Sum(vtkm::TypeTraits::ZeroInitialization()) , InitialValue(initialValue) , FirstCall(true) , InputPortal(inputPortal) , BinaryOperation(binaryOperation) { } ReduceBody(const ReduceBody& body, ::tbb::split) : Sum(vtkm::TypeTraits::ZeroInitialization()) , InitialValue(body.InitialValue) , FirstCall(true) , InputPortal(body.InputPortal) , BinaryOperation(body.BinaryOperation) { } void operator()(const ::tbb::blocked_range& range) { using InputIteratorsType = vtkm::cont::ArrayPortalToIterators; InputIteratorsType inputIterators(this->InputPortal); //use temp, and iterators instead of member variable to reduce false sharing typename InputIteratorsType::IteratorType inIter = inputIterators.GetBegin() + static_cast(range.begin()); T temp = this->BinaryOperation(*inIter, *(inIter + 1)); ++inIter; ++inIter; for (vtkm::Id index = range.begin() + 2; index != range.end(); ++index, ++inIter) { temp = this->BinaryOperation(temp, *inIter); } //determine if we also have to add the initial value to temp if (range.begin() == 0) { temp = this->BinaryOperation(temp, this->InitialValue); } //Now we can save temp back to sum, taking into account if //this task has been called before, and the sum value needs //to also be reduced. if (this->FirstCall) { this->Sum = temp; } else { this->Sum = this->BinaryOperation(this->Sum, temp); } this->FirstCall = false; } void join(const ReduceBody& left) { // std::cout << "join" << std::endl; this->Sum = this->BinaryOperation(left.Sum, this->Sum); } }; template VTKM_CONT static T ReducePortals(InputPortalType inputPortal, T initialValue, BinaryOperationType binaryOperation) { using WrappedBinaryOp = internal::WrappedBinaryOperator; WrappedBinaryOp wrappedBinaryOp(binaryOperation); ReduceBody body(inputPortal, initialValue, wrappedBinaryOp); vtkm::Id arrayLength = inputPortal.GetNumberOfValues(); if (arrayLength > 1) { ::tbb::blocked_range range(0, arrayLength, TBB_GRAIN_SIZE); ::tbb::parallel_reduce(range, body); return body.Sum; } else if (arrayLength == 1) { //ReduceBody does not work with an array of size 1. return binaryOperation(initialValue, inputPortal.Get(0)); } else // arrayLength == 0 { // ReduceBody does not work with an array of size 0. return initialValue; } } // Define this to print out timing information from the reduction and join // operations in the tbb ReduceByKey algorithm: //#define VTKM_DEBUG_TBB_RBK template struct ReduceByKeyBody { using KeyType = typename KeysInPortalType::ValueType; using ValueType = typename ValuesInPortalType::ValueType; struct Range { vtkm::Id InputBegin; vtkm::Id InputEnd; vtkm::Id OutputBegin; vtkm::Id OutputEnd; Range() : InputBegin(-1) , InputEnd(-1) , OutputBegin(-1) , OutputEnd(-1) { } Range(vtkm::Id inputBegin, vtkm::Id inputEnd, vtkm::Id outputBegin, vtkm::Id outputEnd) : InputBegin(inputBegin) , InputEnd(inputEnd) , OutputBegin(outputBegin) , OutputEnd(outputEnd) { this->AssertSane(); } void AssertSane() const { VTKM_ASSERT("Input begin precedes end" && this->InputBegin <= this->InputEnd); VTKM_ASSERT("Output begin precedes end" && this->OutputBegin <= this->OutputEnd); VTKM_ASSERT("Output not past input" && this->OutputBegin <= this->InputBegin && this->OutputEnd <= this->InputEnd); VTKM_ASSERT("Output smaller than input" && (this->OutputEnd - this->OutputBegin) <= (this->InputEnd - this->InputBegin)); } bool IsNext(const Range& next) const { return this->InputEnd == next.InputBegin; } }; KeysInPortalType KeysInPortal; ValuesInPortalType ValuesInPortal; KeysOutPortalType KeysOutPortal; ValuesOutPortalType ValuesOutPortal; BinaryOperationType BinaryOperation; Range Ranges; #ifdef VTKM_DEBUG_TBB_RBK double ReduceTime; double JoinTime; #endif VTKM_CONT ReduceByKeyBody(const KeysInPortalType& keysInPortal, const ValuesInPortalType& valuesInPortal, const KeysOutPortalType& keysOutPortal, const ValuesOutPortalType& valuesOutPortal, BinaryOperationType binaryOperation) : KeysInPortal(keysInPortal) , ValuesInPortal(valuesInPortal) , KeysOutPortal(keysOutPortal) , ValuesOutPortal(valuesOutPortal) , BinaryOperation(binaryOperation) #ifdef VTKM_DEBUG_TBB_RBK , ReduceTime(0) , JoinTime(0) #endif { } ReduceByKeyBody(const ReduceByKeyBody& body, ::tbb::split) : KeysInPortal(body.KeysInPortal) , ValuesInPortal(body.ValuesInPortal) , KeysOutPortal(body.KeysOutPortal) , ValuesOutPortal(body.ValuesOutPortal) , BinaryOperation(body.BinaryOperation) #ifdef VTKM_DEBUG_TBB_RBK , ReduceTime(0) , JoinTime(0) #endif { } void operator()(const ::tbb::blocked_range& range) { #ifdef VTKM_DEBUG_TBB_RBK ::tbb::tick_count startTime = ::tbb::tick_count::now(); #endif // VTKM_DEBUG_TBB_RBK if (range.empty()) { return; } bool firstRun = this->Ranges.OutputBegin < 0; // First use of this body object if (firstRun) { this->Ranges.InputBegin = range.begin(); } else { // Must be a continuation of the previous input range: VTKM_ASSERT(this->Ranges.InputEnd == range.begin()); } this->Ranges.InputEnd = range.end(); this->Ranges.AssertSane(); using KeysInIteratorsType = vtkm::cont::ArrayPortalToIterators; using ValuesInIteratorsType = vtkm::cont::ArrayPortalToIterators; using KeysOutIteratorsType = vtkm::cont::ArrayPortalToIterators; using ValuesOutIteratorsType = vtkm::cont::ArrayPortalToIterators; KeysInIteratorsType keysInIters(this->KeysInPortal); ValuesInIteratorsType valuesInIters(this->ValuesInPortal); KeysOutIteratorsType keysOutIters(this->KeysOutPortal); ValuesOutIteratorsType valuesOutIters(this->ValuesOutPortal); using KeysInIteratorType = typename KeysInIteratorsType::IteratorType; using ValuesInIteratorType = typename ValuesInIteratorsType::IteratorType; using KeysOutIteratorType = typename KeysOutIteratorsType::IteratorType; using ValuesOutIteratorType = typename ValuesOutIteratorsType::IteratorType; KeysInIteratorType keysIn = keysInIters.GetBegin(); ValuesInIteratorType valuesIn = valuesInIters.GetBegin(); KeysOutIteratorType keysOut = keysOutIters.GetBegin(); ValuesOutIteratorType valuesOut = valuesOutIters.GetBegin(); vtkm::Id readPos = range.begin(); const vtkm::Id readEnd = range.end(); // Determine output index. If we're reusing the body, pick up where the // last block left off. If not, use the input range. vtkm::Id writePos; if (firstRun) { this->Ranges.OutputBegin = range.begin(); this->Ranges.OutputEnd = range.begin(); writePos = range.begin(); } else { writePos = this->Ranges.OutputEnd; } this->Ranges.AssertSane(); // We're either writing at the end of a previous block, or at the input // location. Either way, the write position will never be greater than // the read position. VTKM_ASSERT(writePos <= readPos); // Initialize reduction variables: BinaryOperationType functor(this->BinaryOperation); KeyType currentKey = keysIn[readPos]; ValueType currentValue = valuesIn[readPos]; ++readPos; // If the start of the current range continues a previous key block, // initialize with the previous result and decrement the write index. VTKM_ASSERT(firstRun || writePos > 0); if (!firstRun && keysOut[writePos - 1] == currentKey) { // Ensure that we'll overwrite the continued key values: --writePos; // Update our accumulator with the partial value: currentValue = functor(valuesOut[writePos], currentValue); } // Special case: single value in range if (readPos >= readEnd) { keysOut[writePos] = currentKey; valuesOut[writePos] = currentValue; ++writePos; this->Ranges.OutputEnd = writePos; return; } for (;;) { while (readPos < readEnd && currentKey == keysIn[readPos]) { currentValue = functor(currentValue, valuesIn[readPos]); ++readPos; } VTKM_ASSERT(writePos <= readPos); keysOut[writePos] = currentKey; valuesOut[writePos] = currentValue; ++writePos; if (readPos < readEnd) { currentKey = keysIn[readPos]; currentValue = valuesIn[readPos]; ++readPos; continue; } break; } this->Ranges.OutputEnd = writePos; #ifdef VTKM_DEBUG_TBB_RBK ::tbb::tick_count endTime = ::tbb::tick_count::now(); double time = (endTime - startTime).seconds(); this->ReduceTime += time; std::ostringstream out; out << "Reduced " << range.size() << " key/value pairs in " << time << "s. " << "InRange: " << this->Ranges.InputBegin << " " << this->Ranges.InputEnd << " " << "OutRange: " << this->Ranges.OutputBegin << " " << this->Ranges.OutputEnd << "\n"; std::cerr << out.str(); #endif } void join(const ReduceByKeyBody& rhs) { using KeysIteratorsType = vtkm::cont::ArrayPortalToIterators; using ValuesIteratorsType = vtkm::cont::ArrayPortalToIterators; using KeysIteratorType = typename KeysIteratorsType::IteratorType; using ValuesIteratorType = typename ValuesIteratorsType::IteratorType; #ifdef VTKM_DEBUG_TBB_RBK ::tbb::tick_count startTime = ::tbb::tick_count::now(); #endif this->Ranges.AssertSane(); rhs.Ranges.AssertSane(); // Ensure that we're joining two consecutive subsets of the input: VTKM_ASSERT(this->Ranges.IsNext(rhs.Ranges)); KeysIteratorsType keysIters(this->KeysOutPortal); ValuesIteratorsType valuesIters(this->ValuesOutPortal); KeysIteratorType keys = keysIters.GetBegin(); ValuesIteratorType values = valuesIters.GetBegin(); const vtkm::Id dstBegin = this->Ranges.OutputEnd; const vtkm::Id lastDstIdx = this->Ranges.OutputEnd - 1; vtkm::Id srcBegin = rhs.Ranges.OutputBegin; const vtkm::Id srcEnd = rhs.Ranges.OutputEnd; // Merge boundaries if needed: if (keys[srcBegin] == keys[lastDstIdx]) { values[lastDstIdx] = this->BinaryOperation(values[lastDstIdx], values[srcBegin]); ++srcBegin; // Don't copy the key/value we just reduced } // move data: if (srcBegin != dstBegin && srcBegin != srcEnd) { // Sanity check: VTKM_ASSERT(srcBegin < srcEnd); std::copy(keys + srcBegin, keys + srcEnd, keys + dstBegin); std::copy(values + srcBegin, values + srcEnd, values + dstBegin); } this->Ranges.InputEnd = rhs.Ranges.InputEnd; this->Ranges.OutputEnd += srcEnd - srcBegin; this->Ranges.AssertSane(); #ifdef VTKM_DEBUG_TBB_RBK ::tbb::tick_count endTime = ::tbb::tick_count::now(); double time = (endTime - startTime).seconds(); this->JoinTime += rhs.JoinTime + time; std::ostringstream out; out << "Joined " << (srcEnd - srcBegin) << " rhs values into body in " << time << "s. " << "InRange: " << this->Ranges.InputBegin << " " << this->Ranges.InputEnd << " " << "OutRange: " << this->Ranges.OutputBegin << " " << this->Ranges.OutputEnd << "\n"; std::cerr << out.str(); #endif } }; template VTKM_CONT vtkm::Id ReduceByKeyPortals(KeysInPortalType keysInPortal, ValuesInPortalType valuesInPortal, KeysOutPortalType keysOutPortal, ValuesOutPortalType valuesOutPortal, BinaryOperationType binaryOperation) { const vtkm::Id inputLength = keysInPortal.GetNumberOfValues(); VTKM_ASSERT(inputLength == valuesInPortal.GetNumberOfValues()); if (inputLength == 0) { return 0; } using ValueType = typename ValuesInPortalType::ValueType; using WrappedBinaryOp = internal::WrappedBinaryOperator; WrappedBinaryOp wrappedBinaryOp(binaryOperation); ReduceByKeyBody body(keysInPortal, valuesInPortal, keysOutPortal, valuesOutPortal, wrappedBinaryOp); ::tbb::blocked_range range(0, inputLength, TBB_GRAIN_SIZE); #ifdef VTKM_DEBUG_TBB_RBK std::cerr << "\n\nTBB ReduceByKey:\n"; #endif ::tbb::parallel_reduce(range, body); #ifdef VTKM_DEBUG_TBB_RBK std::cerr << "Total reduce time: " << body.ReduceTime << "s\n"; std::cerr << "Total join time: " << body.JoinTime << "s\n"; std::cerr << "\nend\n"; #endif body.Ranges.AssertSane(); VTKM_ASSERT(body.Ranges.InputBegin == 0 && body.Ranges.InputEnd == inputLength && body.Ranges.OutputBegin == 0 && body.Ranges.OutputEnd <= inputLength); return body.Ranges.OutputEnd; } #ifdef VTKM_DEBUG_TBB_RBK #undef VTKM_DEBUG_TBB_RBK #endif template struct ScanInclusiveBody { using ValueType = typename std::remove_reference::type; ValueType Sum; bool FirstCall; InputPortalType InputPortal; OutputPortalType OutputPortal; BinaryOperationType BinaryOperation; VTKM_CONT ScanInclusiveBody(const InputPortalType& inputPortal, const OutputPortalType& outputPortal, BinaryOperationType binaryOperation) : Sum(vtkm::TypeTraits::ZeroInitialization()) , FirstCall(true) , InputPortal(inputPortal) , OutputPortal(outputPortal) , BinaryOperation(binaryOperation) { } ScanInclusiveBody(const ScanInclusiveBody& body, ::tbb::split) : Sum(vtkm::TypeTraits::ZeroInitialization()) , FirstCall(true) , InputPortal(body.InputPortal) , OutputPortal(body.OutputPortal) , BinaryOperation(body.BinaryOperation) { } void operator()(const ::tbb::blocked_range& range, ::tbb::pre_scan_tag) { using InputIteratorsType = vtkm::cont::ArrayPortalToIterators; InputIteratorsType inputIterators(this->InputPortal); //use temp, and iterators instead of member variable to reduce false sharing typename InputIteratorsType::IteratorType inIter = inputIterators.GetBegin() + static_cast(range.begin()); ValueType temp = this->FirstCall ? *inIter++ : this->BinaryOperation(this->Sum, *inIter++); this->FirstCall = false; for (vtkm::Id index = range.begin() + 1; index != range.end(); ++index, ++inIter) { temp = this->BinaryOperation(temp, *inIter); } this->Sum = temp; } void operator()(const ::tbb::blocked_range& range, ::tbb::final_scan_tag) { using InputIteratorsType = vtkm::cont::ArrayPortalToIterators; using OutputIteratorsType = vtkm::cont::ArrayPortalToIterators; InputIteratorsType inputIterators(this->InputPortal); OutputIteratorsType outputIterators(this->OutputPortal); //use temp, and iterators instead of member variable to reduce false sharing typename InputIteratorsType::IteratorType inIter = inputIterators.GetBegin() + static_cast(range.begin()); typename OutputIteratorsType::IteratorType outIter = outputIterators.GetBegin() + static_cast(range.begin()); ValueType temp = this->FirstCall ? *inIter++ : this->BinaryOperation(this->Sum, *inIter++); this->FirstCall = false; *outIter++ = temp; for (vtkm::Id index = range.begin() + 1; index != range.end(); ++index, ++inIter, ++outIter) { *outIter = temp = this->BinaryOperation(temp, *inIter); } this->Sum = temp; } void reverse_join(const ScanInclusiveBody& left) { this->Sum = this->BinaryOperation(left.Sum, this->Sum); } void assign(const ScanInclusiveBody& src) { this->Sum = src.Sum; } }; template struct ScanExclusiveBody { using ValueType = typename std::remove_reference::type; ValueType Sum; bool FirstCall; InputPortalType InputPortal; OutputPortalType OutputPortal; BinaryOperationType BinaryOperation; VTKM_CONT ScanExclusiveBody(const InputPortalType& inputPortal, const OutputPortalType& outputPortal, BinaryOperationType binaryOperation, const ValueType& initialValue) : Sum(initialValue) , FirstCall(true) , InputPortal(inputPortal) , OutputPortal(outputPortal) , BinaryOperation(binaryOperation) { } ScanExclusiveBody(const ScanExclusiveBody& body, ::tbb::split) : Sum(body.Sum) , FirstCall(true) , InputPortal(body.InputPortal) , OutputPortal(body.OutputPortal) , BinaryOperation(body.BinaryOperation) { } void operator()(const ::tbb::blocked_range& range, ::tbb::pre_scan_tag) { using InputIteratorsType = vtkm::cont::ArrayPortalToIterators; InputIteratorsType inputIterators(this->InputPortal); //move the iterator to the first item typename InputIteratorsType::IteratorType iter = inputIterators.GetBegin() + static_cast(range.begin()); ValueType temp = *iter; ++iter; if (!(this->FirstCall && range.begin() > 0)) { temp = this->BinaryOperation(this->Sum, temp); } for (vtkm::Id index = range.begin() + 1; index != range.end(); ++index, ++iter) { temp = this->BinaryOperation(temp, *iter); } this->Sum = temp; this->FirstCall = false; } void operator()(const ::tbb::blocked_range& range, ::tbb::final_scan_tag) { using InputIteratorsType = vtkm::cont::ArrayPortalToIterators; using OutputIteratorsType = vtkm::cont::ArrayPortalToIterators; InputIteratorsType inputIterators(this->InputPortal); OutputIteratorsType outputIterators(this->OutputPortal); //move the iterators to the first item typename InputIteratorsType::IteratorType inIter = inputIterators.GetBegin() + static_cast(range.begin()); typename OutputIteratorsType::IteratorType outIter = outputIterators.GetBegin() + static_cast(range.begin()); ValueType temp = this->Sum; for (vtkm::Id index = range.begin(); index != range.end(); ++index, ++inIter, ++outIter) { //copy into a local reference since Input and Output portal //could point to the same memory location ValueType v = *inIter; *outIter = temp; temp = this->BinaryOperation(temp, v); } this->Sum = temp; this->FirstCall = false; } void reverse_join(const ScanExclusiveBody& left) { //The contract we have with TBB is that they will only join //two objects that have been scanned, or two objects which //haven't been scanned VTKM_ASSERT(left.FirstCall == this->FirstCall); if (!left.FirstCall && !this->FirstCall) { this->Sum = this->BinaryOperation(left.Sum, this->Sum); } } void assign(const ScanExclusiveBody& src) { this->Sum = src.Sum; } }; template VTKM_CONT static typename std::remove_reference::type ScanInclusivePortals(InputPortalType inputPortal, OutputPortalType outputPortal, BinaryOperationType binaryOperation) { using ValueType = typename std::remove_reference::type; using WrappedBinaryOp = internal::WrappedBinaryOperator; WrappedBinaryOp wrappedBinaryOp(binaryOperation); ScanInclusiveBody body( inputPortal, outputPortal, wrappedBinaryOp); vtkm::Id arrayLength = inputPortal.GetNumberOfValues(); ::tbb::blocked_range range(0, arrayLength, TBB_GRAIN_SIZE); ::tbb::parallel_scan(range, body); return body.Sum; } template VTKM_CONT static typename std::remove_reference::type ScanExclusivePortals( InputPortalType inputPortal, OutputPortalType outputPortal, BinaryOperationType binaryOperation, typename std::remove_reference::type initialValue) { using ValueType = typename std::remove_reference::type; using WrappedBinaryOp = internal::WrappedBinaryOperator; WrappedBinaryOp wrappedBinaryOp(binaryOperation); ScanExclusiveBody body( inputPortal, outputPortal, wrappedBinaryOp, initialValue); vtkm::Id arrayLength = inputPortal.GetNumberOfValues(); ::tbb::blocked_range range(0, arrayLength, TBB_GRAIN_SIZE); ::tbb::parallel_scan(range, body); // Seems a little weird to me that we would return the last value in the // array rather than the sum, but that is how the function is specified. return body.Sum; } template class ScatterKernel { public: VTKM_CONT ScatterKernel(InputPortalType inputPortal, IndexPortalType indexPortal, OutputPortalType outputPortal) : ValuesPortal(inputPortal) , IndexPortal(indexPortal) , OutputPortal(outputPortal) { } VTKM_CONT void operator()(const ::tbb::blocked_range& range) const { // The TBB device adapter causes array classes to be shared between // control and execution environment. This means that it is possible for // an exception to be thrown even though this is typically not allowed. // Throwing an exception from here is bad because there are several // simultaneous threads running. Get around the problem by catching the // error and setting the message buffer as expected. try { VTKM_VECTORIZATION_PRE_LOOP for (vtkm::Id i = range.begin(); i < range.end(); i++) { VTKM_VECTORIZATION_IN_LOOP OutputPortal.Set(i, ValuesPortal.Get(IndexPortal.Get(i))); } } catch (vtkm::cont::Error& error) { this->ErrorMessage.RaiseError(error.GetMessage().c_str()); } catch (...) { this->ErrorMessage.RaiseError("Unexpected error in execution environment."); } } private: InputPortalType ValuesPortal; IndexPortalType IndexPortal; OutputPortalType OutputPortal; vtkm::exec::internal::ErrorMessageBuffer ErrorMessage; }; template VTKM_CONT static void ScatterPortal(InputPortalType inputPortal, IndexPortalType indexPortal, OutputPortalType outputPortal) { const vtkm::Id size = inputPortal.GetNumberOfValues(); VTKM_ASSERT(size == indexPortal.GetNumberOfValues()); ScatterKernel scatter( inputPortal, indexPortal, outputPortal); ::tbb::blocked_range range(0, size, TBB_GRAIN_SIZE); ::tbb::parallel_for(range, scatter); } template struct UniqueBody { using ValueType = typename PortalType::ValueType; struct Range { vtkm::Id InputBegin; vtkm::Id InputEnd; vtkm::Id OutputBegin; vtkm::Id OutputEnd; Range() : InputBegin(-1) , InputEnd(-1) , OutputBegin(-1) , OutputEnd(-1) { } Range(vtkm::Id inputBegin, vtkm::Id inputEnd, vtkm::Id outputBegin, vtkm::Id outputEnd) : InputBegin(inputBegin) , InputEnd(inputEnd) , OutputBegin(outputBegin) , OutputEnd(outputEnd) { this->AssertSane(); } void AssertSane() const { VTKM_ASSERT("Input begin precedes end" && this->InputBegin <= this->InputEnd); VTKM_ASSERT("Output begin precedes end" && this->OutputBegin <= this->OutputEnd); VTKM_ASSERT("Output not past input" && this->OutputBegin <= this->InputBegin && this->OutputEnd <= this->InputEnd); VTKM_ASSERT("Output smaller than input" && (this->OutputEnd - this->OutputBegin) <= (this->InputEnd - this->InputBegin)); } bool IsNext(const Range& next) const { return this->InputEnd == next.InputBegin; } }; PortalType Portal; BinaryOperationType BinaryOperation; Range Ranges; VTKM_CONT UniqueBody(const PortalType& portal, BinaryOperationType binaryOperation) : Portal(portal) , BinaryOperation(binaryOperation) { } VTKM_CONT UniqueBody(const UniqueBody& body, ::tbb::split) : Portal(body.Portal) , BinaryOperation(body.BinaryOperation) { } void operator()(const ::tbb::blocked_range& range) { if (range.empty()) { return; } bool firstRun = this->Ranges.OutputBegin < 0; // First use of this body object if (firstRun) { this->Ranges.InputBegin = range.begin(); } else { // Must be a continuation of the previous input range: VTKM_ASSERT(this->Ranges.InputEnd == range.begin()); } this->Ranges.InputEnd = range.end(); this->Ranges.AssertSane(); using IteratorsType = vtkm::cont::ArrayPortalToIterators; using IteratorType = typename IteratorsType::IteratorType; IteratorsType iters(this->Portal); IteratorType data = iters.GetBegin(); vtkm::Id readPos = range.begin(); const vtkm::Id readEnd = range.end(); // Determine output index. If we're reusing the body, pick up where the // last block left off. If not, use the input range. vtkm::Id writePos; if (firstRun) { this->Ranges.OutputBegin = range.begin(); this->Ranges.OutputEnd = range.begin(); writePos = range.begin(); } else { writePos = this->Ranges.OutputEnd; } this->Ranges.AssertSane(); // We're either writing at the end of a previous block, or at the input // location. Either way, the write position will never be greater than // the read position. VTKM_ASSERT(writePos <= readPos); // Initialize loop variables: BinaryOperationType functor(this->BinaryOperation); ValueType current = data[readPos]; ++readPos; // If the start of the current range continues a previous block of // identical elements, initialize with the previous result and decrement // the write index. VTKM_ASSERT(firstRun || writePos > 0); if (!firstRun && functor(data[writePos - 1], current)) { // Ensure that we'll overwrite the duplicate value: --writePos; // Copy the last data value into current. TestingDeviceAdapter's test // using the FuseAll functor requires that the first value in a set of // duplicates must be used, and this ensures that condition is met. current = data[writePos]; } // Special case: single value in range if (readPos >= readEnd) { data[writePos] = current; ++writePos; this->Ranges.OutputEnd = writePos; return; } for (;;) { // Advance readPos until the value changes while (readPos < readEnd && functor(current, data[readPos])) { ++readPos; } // Write out the unique value VTKM_ASSERT(writePos <= readPos); data[writePos] = current; ++writePos; // Update the current value if there are more entries if (readPos < readEnd) { current = data[readPos]; ++readPos; continue; } // Input range is exhausted if we reach this point. break; } this->Ranges.OutputEnd = writePos; } void join(const UniqueBody& rhs) { using IteratorsType = vtkm::cont::ArrayPortalToIterators; using IteratorType = typename IteratorsType::IteratorType; this->Ranges.AssertSane(); rhs.Ranges.AssertSane(); // Ensure that we're joining two consecutive subsets of the input: VTKM_ASSERT(this->Ranges.IsNext(rhs.Ranges)); IteratorsType iters(this->Portal); IteratorType data = iters.GetBegin(); BinaryOperationType functor(this->BinaryOperation); const vtkm::Id dstBegin = this->Ranges.OutputEnd; const vtkm::Id lastDstIdx = this->Ranges.OutputEnd - 1; vtkm::Id srcBegin = rhs.Ranges.OutputBegin; const vtkm::Id srcEnd = rhs.Ranges.OutputEnd; // Merge boundaries if needed: if (functor(data[srcBegin], data[lastDstIdx])) { ++srcBegin; // Don't copy the duplicate value } // move data: if (srcBegin != dstBegin && srcBegin != srcEnd) { // Sanity check: VTKM_ASSERT(srcBegin < srcEnd); std::copy(data + srcBegin, data + srcEnd, data + dstBegin); } this->Ranges.InputEnd = rhs.Ranges.InputEnd; this->Ranges.OutputEnd += srcEnd - srcBegin; this->Ranges.AssertSane(); } }; template VTKM_CONT vtkm::Id UniquePortals(PortalType portal, BinaryOperationType binaryOperation) { const vtkm::Id inputLength = portal.GetNumberOfValues(); if (inputLength == 0) { return 0; } using WrappedBinaryOp = internal::WrappedBinaryOperator; WrappedBinaryOp wrappedBinaryOp(binaryOperation); UniqueBody body(portal, wrappedBinaryOp); ::tbb::blocked_range range(0, inputLength, TBB_GRAIN_SIZE); ::tbb::parallel_reduce(range, body); body.Ranges.AssertSane(); VTKM_ASSERT(body.Ranges.InputBegin == 0 && body.Ranges.InputEnd == inputLength && body.Ranges.OutputBegin == 0 && body.Ranges.OutputEnd <= inputLength); return body.Ranges.OutputEnd; } } } } #endif //vtk_m_cont_tbb_internal_FunctorsTBB_h