vtk-m/vtkm/worklet/MaskSelect.cxx
Kenneth Moreland 68f39b86a8 Deprecate VariantArrayHandle
`VaraintArrayHandle` has been replaced by `UnknownArrayHandle` and
`UncertainArrayHandle`. Officially make it deprecated and point users to
the new implementations.
2021-04-07 16:12:38 -06:00

140 lines
5.2 KiB
C++

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/worklet/MaskSelect.h>
#include <vtkm/worklet/DispatcherMapField.h>
#include <vtkm/worklet/WorkletMapField.h>
#include <vtkm/cont/Algorithm.h>
#include <vtkm/cont/ArrayHandleCast.h>
#include <vtkm/cont/ArrayHandleView.h>
namespace
{
struct ReverseOutputToThreadMap : vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn outputToThreadMap,
FieldIn maskArray,
WholeArrayOut threadToOutputMap);
using ExecutionSignature = void(_1, InputIndex, _2, _3);
template <typename MaskType, typename ThreadToOutputPortal>
VTKM_EXEC void operator()(vtkm::Id threadIndex,
vtkm::Id outputIndex,
MaskType mask,
ThreadToOutputPortal threadToOutput) const
{
if (mask)
{
threadToOutput.Set(threadIndex, outputIndex);
}
}
};
VTKM_CONT static vtkm::worklet::MaskSelect::ThreadToOutputMapType BuildThreadToOutputMapWithFind(
vtkm::Id numThreads,
vtkm::cont::ArrayHandle<vtkm::Id> outputToThreadMap,
vtkm::cont::DeviceAdapterId device)
{
vtkm::worklet::MaskSelect::ThreadToOutputMapType threadToOutputMap;
vtkm::Id outputSize = outputToThreadMap.GetNumberOfValues();
vtkm::cont::ArrayHandleIndex threadIndices(numThreads);
vtkm::cont::Algorithm::UpperBounds(
device,
vtkm::cont::make_ArrayHandleView(outputToThreadMap, 1, outputSize - 1),
threadIndices,
threadToOutputMap);
return threadToOutputMap;
}
template <typename MaskArrayType>
VTKM_CONT static vtkm::worklet::MaskSelect::ThreadToOutputMapType BuildThreadToOutputMapWithCopy(
vtkm::Id numThreads,
const vtkm::cont::ArrayHandle<vtkm::Id>& outputToThreadMap,
const MaskArrayType& maskArray,
vtkm::cont::DeviceAdapterId device)
{
vtkm::worklet::MaskSelect::ThreadToOutputMapType threadToOutputMap;
threadToOutputMap.Allocate(numThreads);
vtkm::worklet::DispatcherMapField<ReverseOutputToThreadMap> dispatcher;
dispatcher.SetDevice(device);
dispatcher.Invoke(outputToThreadMap, maskArray, threadToOutputMap);
return threadToOutputMap;
}
VTKM_CONT static vtkm::worklet::MaskSelect::ThreadToOutputMapType BuildThreadToOutputMapAllOn(
vtkm::Id numThreads,
vtkm::cont::DeviceAdapterId device)
{
vtkm::worklet::MaskSelect::ThreadToOutputMapType threadToOutputMap;
threadToOutputMap.Allocate(numThreads);
vtkm::cont::Algorithm::Copy(
device, vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(0, 1, numThreads), threadToOutputMap);
return threadToOutputMap;
}
struct MaskBuilder
{
template <typename ArrayHandleType>
void operator()(const ArrayHandleType& maskArray,
vtkm::worklet::MaskSelect::ThreadToOutputMapType& threadToOutputMap,
vtkm::cont::DeviceAdapterId device)
{
vtkm::cont::ArrayHandle<vtkm::Id> outputToThreadMap;
vtkm::Id numThreads = vtkm::cont::Algorithm::ScanExclusive(
device, vtkm::cont::make_ArrayHandleCast<vtkm::Id>(maskArray), outputToThreadMap);
VTKM_ASSERT(numThreads <= maskArray.GetNumberOfValues());
// We have implemented two different ways to compute the thread to output map. The first way is
// to use a binary search on each thread index into the output map. The second way is to
// schedule on each output and copy the the index to the thread map. The first way is faster
// for output sizes that are small relative to the input and also tends to be well load
// balanced. The second way is faster for larger outputs.
//
// The former is obviously faster for one thread and the latter is obviously faster when all
// outputs have a thread. We have to guess for values in the middle. I'm using if the square of
// the number of threads is less than the number of outputs because it is easy to compute.
if (numThreads == maskArray.GetNumberOfValues())
{ //fast path when everything is on
threadToOutputMap = BuildThreadToOutputMapAllOn(numThreads, device);
}
else if ((numThreads * numThreads) < maskArray.GetNumberOfValues())
{
threadToOutputMap = BuildThreadToOutputMapWithFind(numThreads, outputToThreadMap, device);
}
else
{
threadToOutputMap =
BuildThreadToOutputMapWithCopy(numThreads, outputToThreadMap, maskArray, device);
}
}
};
} // anonymous namespace
vtkm::worklet::MaskSelect::ThreadToOutputMapType vtkm::worklet::MaskSelect::Build(
const vtkm::cont::UnknownArrayHandle& maskArray,
vtkm::cont::DeviceAdapterId device)
{
VTKM_LOG_SCOPE(vtkm::cont::LogLevel::Perf, "MaskSelect::Build");
vtkm::worklet::MaskSelect::ThreadToOutputMapType threadToOutputMap;
maskArray.CastAndCallForTypes<MaskTypes, vtkm::List<vtkm::cont::StorageTagBasic>>(
MaskBuilder(), threadToOutputMap, device);
return threadToOutputMap;
}