Fix slow ArrayCopy calls

The `ArrayCopy` method has been changed to be precompiled. It handles
most standard array types. But there are some special `ArrayHandle`
types that are not correctly handled, and these go to a slow fallback.
Find places in the code that use that fallback and fix them.

There are also some instances of replacing an `ArrayHandleCounting` with
an `ArrayHandleIndex`. This change is probably not strictly necessary to
make the `ArrayCopy` faster, but when it can be used `ArrayHandleIndex`
is generally better.
This commit is contained in:
Kenneth Moreland 2022-01-20 10:53:16 -07:00
parent 63c45efc04
commit 170a10e4b4
17 changed files with 97 additions and 66 deletions

@ -208,6 +208,15 @@ public:
FirstStorage::CreateWritePortal(FirstArrayBuffers(buffers), device, token),
SecondStorage::CreateWritePortal(SecondArrayBuffers(buffers), device, token));
}
vtkm::cont::ArrayHandle<T1, ST1> GetFirstArray(const vtkm::cont::internal::Buffer* buffers)
{
return { FirstArrayBuffers(buffers) };
}
vtkm::cont::ArrayHandle<T2, ST2> GetSecondArray(const vtkm::cont::internal::Buffer* buffers)
{
return { SecondArrayBuffers(buffers) };
}
};
} // namespace internal
@ -238,6 +247,15 @@ public:
: Superclass(vtkm::cont::internal::CreateBuffers(firstArray, secondArray))
{
}
FirstHandleType GetFirstArray() const
{
return this->GetStorage().GetFirstArray(this->GetBuffers());
}
SecondHandleType GetSecondArray() const
{
return this->GetStorage().GetSecondArray(this->GetBuffers());
}
};
/// A convenience function for creating an ArrayHandleZip. It takes the two

@ -10,6 +10,7 @@
#ifndef vtk_m_cont_DataSetBuilderRectilinear_h
#define vtk_m_cont_DataSetBuilderRectilinear_h
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayHandleCartesianProduct.h>
#include <vtkm/cont/ArrayPortalToIterators.h>
#include <vtkm/cont/CoordinateSystem.h>
@ -34,7 +35,7 @@ class VTKM_CONT_EXPORT DataSetBuilderRectilinear
VTKM_CONT static void CopyInto(const vtkm::cont::ArrayHandle<T>& input,
vtkm::cont::ArrayHandle<U>& output)
{
vtkm::cont::UnknownArrayHandle(output).DeepCopyFrom(input);
vtkm::cont::ArrayCopy(input, output);
}
template <typename T, typename U>

@ -12,7 +12,6 @@
#define vtk_m_cont_ParticleArrayCopy_hxx
#include <vtkm/cont/Algorithm.h>
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayHandleTransform.h>
#include <vtkm/cont/Invoker.h>
#include <vtkm/cont/ParticleArrayCopy.h>
@ -84,7 +83,7 @@ VTKM_ALWAYS_EXPORT inline void ParticleArrayCopy(
vtkm::cont::Algorithm::CopyIf(posTrn, termTrn, outPos);
}
else
vtkm::cont::ArrayCopy(posTrn, outPos);
vtkm::cont::Algorithm::Copy(posTrn, outPos);
}

@ -21,7 +21,7 @@
#include <vtkm/cont/Algorithm.h>
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayHandleCounting.h>
#include <vtkm/cont/ArrayHandleIndex.h>
#include <vtkm/cont/Invoker.h>
#include <vtkm/worklet/WorkletMapField.h>
@ -82,9 +82,8 @@ void PointLocatorSparseGrid::Build()
static_cast<vtkm::FloatDefault>(this->Range[2].Max));
// generate unique id for each input point
vtkm::cont::ArrayHandleCounting<vtkm::Id> pointCounting(
0, 1, this->GetCoordinates().GetNumberOfValues());
vtkm::cont::ArrayCopy(pointCounting, this->PointIds);
vtkm::cont::ArrayHandleIndex pointIndex(this->GetCoordinates().GetNumberOfValues());
vtkm::cont::ArrayCopy(pointIndex, this->PointIds);
using internal::BinPointsWorklet;

@ -426,7 +426,7 @@ private:
SetPortal(basicArray.WritePortal());
vtkm::cont::ArrayHandleSOA<ValueType> soaArray;
vtkm::cont::ArrayCopy(basicArray, soaArray);
vtkm::cont::Invoker{}(PassThrough{}, basicArray, soaArray);
VTKM_TEST_ASSERT(soaArray.GetNumberOfValues() == ARRAY_SIZE);
for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
@ -1085,13 +1085,13 @@ private:
VTKM_EXEC void operator()(const InputType& input, vtkm::Id workIndex, vtkm::Id& dummyOut) const
{
using ComponentType = typename InputType::ComponentType;
vtkm::IdComponent expectedSize = static_cast<vtkm::IdComponent>(workIndex + 1);
vtkm::IdComponent expectedSize = static_cast<vtkm::IdComponent>(workIndex);
if (expectedSize != input.GetNumberOfComponents())
{
this->RaiseError("Got unexpected number of components.");
}
vtkm::Id valueIndex = workIndex * (workIndex + 1) / 2;
vtkm::Id valueIndex = workIndex * (workIndex - 1) / 2;
dummyOut = valueIndex;
for (vtkm::IdComponent componentIndex = 0; componentIndex < expectedSize; componentIndex++)
{
@ -1113,8 +1113,7 @@ private:
vtkm::Id sourceArraySize;
vtkm::cont::ArrayHandle<vtkm::Id> numComponentsArray;
vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleCounting<vtkm::IdComponent>(1, 1, ARRAY_SIZE),
numComponentsArray);
vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleIndex(ARRAY_SIZE), numComponentsArray);
vtkm::cont::ArrayHandle<vtkm::Id> offsetsArray =
vtkm::cont::ConvertNumComponentsToOffsets(numComponentsArray, sourceArraySize);
@ -1147,13 +1146,13 @@ private:
VTKM_EXEC void operator()(OutputType& output, vtkm::Id workIndex) const
{
using ComponentType = typename OutputType::ComponentType;
vtkm::IdComponent expectedSize = static_cast<vtkm::IdComponent>(workIndex + 1);
vtkm::IdComponent expectedSize = static_cast<vtkm::IdComponent>(workIndex);
if (expectedSize != output.GetNumberOfComponents())
{
this->RaiseError("Got unexpected number of components.");
}
vtkm::Id valueIndex = workIndex * (workIndex + 1) / 2;
vtkm::Id valueIndex = workIndex * (workIndex - 1) / 2;
for (vtkm::IdComponent componentIndex = 0; componentIndex < expectedSize; componentIndex++)
{
output[componentIndex] = TestValue(valueIndex, ComponentType());
@ -1170,8 +1169,7 @@ private:
vtkm::Id sourceArraySize;
vtkm::cont::ArrayHandle<vtkm::Id> numComponentsArray;
vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleCounting<vtkm::IdComponent>(1, 1, ARRAY_SIZE),
numComponentsArray);
vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleIndex(ARRAY_SIZE), numComponentsArray);
vtkm::cont::ArrayHandle<vtkm::Id> offsetsArray = vtkm::cont::ConvertNumComponentsToOffsets(
numComponentsArray, sourceArraySize, DeviceAdapterTag());

@ -10,8 +10,12 @@
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayCopyDevice.h>
#include <vtkm/cont/ArrayHandleConcatenate.h>
#include <vtkm/cont/ArrayHandleConstant.h>
#include <vtkm/cont/ArrayHandleCounting.h>
#include <vtkm/cont/ArrayHandleIndex.h>
#include <vtkm/cont/ArrayHandlePermutation.h>
#include <vtkm/cont/ArrayHandleView.h>
#include <vtkm/cont/UncertainArrayHandle.h>
#include <vtkm/cont/UnknownArrayHandle.h>
@ -140,6 +144,25 @@ void TryCopy()
TestValues(input, output);
}
{
std::cout << "view -> basic" << std::endl;
vtkm::cont::ArrayHandle<ValueType> input = MakeInputArray<ValueType>();
auto viewInput = vtkm::cont::make_ArrayHandleView(input, 1, ARRAY_SIZE / 2);
vtkm::cont::ArrayHandle<ValueType> output;
vtkm::cont::ArrayCopy(input, output);
TestValues(input, output);
}
{
std::cout << "concatinate -> basic" << std::endl;
vtkm::cont::ArrayHandle<ValueType> input1 = MakeInputArray<ValueType>();
vtkm::cont::ArrayHandleConstant<ValueType> input2(TestValue(6, ValueType{}), ARRAY_SIZE / 2);
auto concatInput = vtkm::cont::make_ArrayHandleConcatenate(input1, input2);
vtkm::cont::ArrayHandle<ValueType> output;
vtkm::cont::ArrayCopy(concatInput, output);
TestValues(concatInput, output);
}
{
std::cout << "permutation -> basic" << std::endl;
vtkm::cont::ArrayHandle<vtkm::Id> indices;

@ -8,7 +8,7 @@
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayCopyDevice.h>
#include <vtkm/cont/ArrayHandleBasic.h>
#include <vtkm/cont/ArrayHandleCartesianProduct.h>
#include <vtkm/cont/ArrayHandleCompositeVector.h>
@ -84,13 +84,13 @@ void FillArray(vtkm::cont::ArrayHandle<T, S>& array)
using Traits = vtkm::VecTraits<T>;
vtkm::IdComponent numComponents = Traits::NUM_COMPONENTS;
vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandleConstant(T{}, ARRAY_SIZE), array);
array.AllocateAndFill(ARRAY_SIZE, vtkm::TypeTraits<T>::ZeroInitialization());
for (vtkm::IdComponent component = 0; component < numComponents; ++component)
{
vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float64> randomArray(ARRAY_SIZE);
auto dest = vtkm::cont::make_ArrayHandleExtractComponent(array, component);
vtkm::cont::ArrayCopy(randomArray, dest);
vtkm::cont::ArrayCopyDevice(randomArray, dest);
}
}

@ -7,7 +7,7 @@
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayCopyDevice.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleConstant.h>
#include <vtkm/cont/ArrayHandleCounting.h>
@ -143,16 +143,13 @@ auto PermutationArray = vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 2, BaseLine
vtkm::cont::CellSetExplicit<> MakeCellSetExplicit()
{
vtkm::cont::ArrayHandle<vtkm::UInt8> shapes;
vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleConstant<vtkm::UInt8>{ vtkm::CELL_SHAPE_HEXAHEDRON,
BaseLineNumberOfCells },
shapes);
shapes.AllocateAndFill(BaseLineNumberOfCells, vtkm::CELL_SHAPE_HEXAHEDRON);
vtkm::cont::ArrayHandle<vtkm::IdComponent> numIndices;
vtkm::cont::ArrayCopy(
vtkm::cont::ArrayHandleConstant<vtkm::IdComponent>{ 8, BaseLineNumberOfCells }, numIndices);
numIndices.AllocateAndFill(BaseLineNumberOfCells, 8);
vtkm::cont::ArrayHandle<vtkm::Id> connectivity;
vtkm::cont::ArrayCopy(BaseLineConnectivity, connectivity);
vtkm::cont::ArrayCopyDevice(BaseLineConnectivity, connectivity);
auto offsets = vtkm::cont::ConvertNumComponentsToOffsets(numIndices);

@ -10,7 +10,7 @@
#ifndef vtk_m_worklet_RemoveUnusedPoints_h
#define vtk_m_worklet_RemoveUnusedPoints_h
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayCopyDevice.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleConstant.h>
#include <vtkm/cont/ArrayHandlePermutation.h>
@ -102,9 +102,7 @@ public:
if (this->MaskArray.GetNumberOfValues() < 1)
{
// Initialize mask array to 0.
vtkm::cont::ArrayCopy(
vtkm::cont::ArrayHandleConstant<vtkm::IdComponent>(0, inCellSet.GetNumberOfPoints()),
this->MaskArray);
this->MaskArray.AllocateAndFill(inCellSet.GetNumberOfPoints(), 0);
}
VTKM_ASSERT(this->MaskArray.GetNumberOfValues() == inCellSet.GetNumberOfPoints());
@ -257,7 +255,7 @@ public:
VTKM_CONT void MapPointFieldDeep(const vtkm::cont::ArrayHandle<InT, InS>& inArray,
vtkm::cont::ArrayHandle<OutT, OutS>& outArray) const
{
vtkm::cont::ArrayCopy(this->MapPointFieldShallow(inArray), outArray);
vtkm::cont::ArrayCopyDevice(this->MapPointFieldShallow(inArray), outArray);
}
template <typename T, typename S>

@ -23,6 +23,6 @@ set(libraries
vtkm_unit_tests(
SOURCES ${unit_tests}
LIBRARIES ${libraries}
ALL_BACKENDS # use ArrayCopy
ALL_BACKENDS # UnitTestParticleDensity.cxx uses DescriptiveStatistcs worklet
USE_VTKM_JOB_POOL
)

@ -8,6 +8,8 @@
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayCopyDevice.h>
#include <vtkm/cont/ArrayHandleRandomUniformReal.h>
#include <vtkm/cont/DataSetBuilderExplicit.h>
#include <vtkm/cont/testing/Testing.h>
@ -27,7 +29,7 @@ void TestNGP()
vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float32>(N, 0xdeed),
vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float32>(N, 0xabba));
vtkm::cont::ArrayHandle<vtkm::Vec3f> positions;
vtkm::cont::ArrayCopy(composite, positions);
vtkm::cont::ArrayCopyDevice(composite, positions);
vtkm::cont::ArrayHandle<vtkm::Id> connectivity;
vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandleIndex(N), connectivity);
@ -36,8 +38,8 @@ void TestNGP()
positions, vtkm::CellShapeTagVertex{}, 1, connectivity);
vtkm::cont::ArrayHandle<vtkm::FloatDefault> mass;
vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleRandomUniformReal<vtkm::FloatDefault>(N, 0xd1ce),
mass);
vtkm::cont::ArrayCopyDevice(
vtkm::cont::ArrayHandleRandomUniformReal<vtkm::FloatDefault>(N, 0xd1ce), mass);
dataSet.AddCellField("mass", mass);
auto cellDims = vtkm::Id3{ 3, 3, 3 };
@ -78,7 +80,7 @@ void TestCIC()
vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float32>(N, 0xdeed),
vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float32>(N, 0xabba));
vtkm::cont::ArrayHandle<vtkm::Vec3f> positions;
vtkm::cont::ArrayCopy(composite, positions);
vtkm::cont::ArrayCopyDevice(composite, positions);
vtkm::cont::ArrayHandle<vtkm::Id> connectivity;
vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandleIndex(N), connectivity);
@ -87,7 +89,8 @@ void TestCIC()
positions, vtkm::CellShapeTagVertex{}, 1, connectivity);
vtkm::cont::ArrayHandle<vtkm::Float32> mass;
vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float32>(N, 0xd1ce), mass);
vtkm::cont::ArrayCopyDevice(vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float32>(N, 0xd1ce),
mass);
dataSet.AddCellField("mass", mass);
auto cellDims = vtkm::Id3{ 3, 3, 3 };

@ -18,6 +18,7 @@
#include <vtkm/cont/Algorithm.h>
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayCopyDevice.h>
#include <vtkm/cont/ArrayGetValues.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleConcatenate.h>
@ -940,7 +941,9 @@ public:
auto offsetsArray =
vtkm::cont::make_ArrayHandleConcatenate(faceOffsetsTrim, adjustedPolyDataOffsets);
OffsetsArrayType joinedOffsets;
vtkm::cont::ArrayCopy(offsetsArray, joinedOffsets);
// Need to compile a special device copy because the precompiled ArrayCopy does not
// know how to copy the ArrayHandleTransform.
vtkm::cont::ArrayCopyDevice(offsetsArray, joinedOffsets);
vtkm::cont::ArrayHandleConcatenate<vtkm::cont::ArrayHandle<vtkm::Id>,
vtkm::cont::ArrayHandle<vtkm::Id>>

@ -11,7 +11,7 @@
#define vtk_m_worklet_AverageByKey_h
#include <vtkm/VecTraits.h>
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayCopyDevice.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/worklet/DescriptiveStatistics.h>
#include <vtkm/worklet/Keys.h>
@ -86,23 +86,13 @@ struct AverageByKey
return outAverages;
}
struct ExtractKey
{
template <typename First, typename Second>
VTKM_EXEC First operator()(const vtkm::Pair<First, Second>& pair) const
{
return pair.first;
}
};
struct ExtractMean
{
template <typename KeyType, typename ValueType>
VTKM_EXEC ValueType operator()(
const vtkm::Pair<KeyType, vtkm::worklet::DescriptiveStatistics::StatState<ValueType>>& pair)
const
template <typename ValueType>
VTKM_EXEC ValueType
operator()(const vtkm::worklet::DescriptiveStatistics::StatState<ValueType>& state) const
{
return pair.second.Mean();
return state.Mean();
}
};
@ -131,13 +121,15 @@ struct AverageByKey
VTKM_LOG_SCOPE(vtkm::cont::LogLevel::Perf, "AverageByKey::Run");
auto results = vtkm::worklet::DescriptiveStatistics::Run(keyArray, valueArray);
// Copy/TransformCopy from results to outputKeyArray and outputValueArray
auto results_key = vtkm::cont::make_ArrayHandleTransform(results, ExtractKey{});
auto results_mean = vtkm::cont::make_ArrayHandleTransform(results, ExtractMean{});
vtkm::cont::ArrayCopy(results_key, outputKeyArray);
vtkm::cont::ArrayCopy(results_mean, outputValueArray);
// Extract results to outputKeyArray and outputValueArray
outputKeyArray = results.GetFirstArray();
// TODO: DescriptiveStatistics should write its output to a SOA instead of an AOS.
// An ArrayHandle of a weird struct by itself is not useful in any general algorithm.
// In fact, using DescriptiveStatistics at all seems like way overkill. It computes
// all sorts of statistics, and we then throw them all away except for mean.
auto resultsMean =
vtkm::cont::make_ArrayHandleTransform(results.GetSecondArray(), ExtractMean{});
vtkm::cont::ArrayCopyDevice(resultsMean, outputValueArray);
}
};
}

@ -56,7 +56,7 @@
// global libraries
#include <vtkm/cont/Algorithm.h>
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayCopyDevice.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandlePermutation.h>
#include <vtkm/cont/ArrayHandleTransform.h>
@ -106,7 +106,7 @@ inline void PermuteArray(const ArrayType& input, IdArrayType& permute, ArrayType
// fancy vtkm array so that we do not actually copy any data here
permute_type permutedInput(maskedPermuteIndex, input);
// Finally, copy the permuted values to the output array
vtkm::cont::ArrayCopy(permutedInput, output);
vtkm::cont::ArrayCopyDevice(permutedInput, output);
} // permuteValues()

@ -225,7 +225,7 @@ public:
vtkm::cont::ArrayHandle<vtkm::Id> cellIndex;
vtkm::Id connectivityLen = vtkm::cont::Algorithm::ScanExclusive(numPoints, cellIndex);
vtkm::cont::ArrayHandleCounting<vtkm::Id> connCount(0, 1, connectivityLen);
vtkm::cont::ArrayHandleIndex connCount(connectivityLen);
vtkm::cont::ArrayHandle<vtkm::Id> connectivity;
vtkm::cont::ArrayCopy(connCount, connectivity);

@ -70,7 +70,8 @@ void TryKeyType(KeyType)
VTKM_TEST_ASSERT(keys.GetInputRange() == NUM_UNIQUE, "Keys has bad input range.");
// Create values array
vtkm::cont::ArrayHandleCounting<vtkm::FloatDefault> valuesArray(0.0f, 1.0f, ARRAY_SIZE);
vtkm::cont::ArrayHandle<vtkm::FloatDefault> valuesArray;
vtkm::cont::ArrayCopy(vtkm::cont::ArrayHandleIndex(ARRAY_SIZE), valuesArray);
std::cout << " Try average with Keys object" << std::endl;
CheckAverageByKey(keys.GetUniqueKeys(), vtkm::worklet::AverageByKey::Run(keys, valuesArray));

@ -152,8 +152,7 @@ inline vtkm::cont::DataSet MakeRadiantDataSet::Make3DRadiantDataSet(vtkm::IdComp
DataArrayHandle distanceToOther(coordinates, EuclideanNorm(CoordType(1., 1., 1.)));
vtkm::cont::ArrayHandle<vtkm::FloatDefault> cellFieldArray;
vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(0, 1, nCells),
cellFieldArray);
vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandleIndex(nCells), cellFieldArray);
ConnectivityArrayHandle connectivity(
vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, nCells * HexTraits::NUM_POINTS),