Use std::copy in serial Copy implementation.
I had assumed that the compiler would be clever enough to turn the iterative implementation of Copy into a memcpy, but inspecting the disassembly on a release GCC build shows that this is not the case, likely because it can't assume that the memory ranges do not overlap. Replacing the loop with std::copy speeds things up (about 30-50%) for most data types, though there is a slight (usually < 5%) slowdown for Vec types. The uint8 copy improved by a factor of 8. Comparison: | Speedup | iteration | std::copy | Benchmark (Type) | |---------|----------------------|----------------------|------------------| | 1.363 | 0.001590 +- 0.000087 | 0.001166 +- 0.000049 | Copy 2097152 values (vtkm::Float32) | | 1.487 | 0.003429 +- 0.000185 | 0.002305 +- 0.000146 | Copy 2097152 values (vtkm::Float64) | | 1.379 | 0.001568 +- 0.000072 | 0.001137 +- 0.000093 | Copy 2097152 values (vtkm::Int32) | | 1.420 | 0.003410 +- 0.000173 | 0.002402 +- 0.000101 | Copy 2097152 values (vtkm::Int64) | | 1.303 | 0.001564 +- 0.000083 | 0.001201 +- 0.000078 | Copy 2097152 values (vtkm::UInt32) | | 7.204 | 0.002441 +- 0.000104 | 0.000339 +- 0.000029 | Copy 2097152 values (vtkm::UInt8) | | 0.987 | 0.006602 +- 0.000266 | 0.006688 +- 0.000291 | Copy 2097152 values (vtkm::Vec< vtkm::Float32, 4 >) | | 0.965 | 0.010065 +- 0.000528 | 0.010427 +- 0.000617 | Copy 2097152 values (vtkm::Vec< vtkm::Float64, 3 >) | | 0.979 | 0.003327 +- 0.000191 | 0.003398 +- 0.000142 | Copy 2097152 values (vtkm::Vec< vtkm::Int32, 2 >) | | 0.851 | 0.001579 +- 0.000090 | 0.001856 +- 0.000098 | Copy 2097152 values (vtkm::Vec< vtkm::UInt8, 4 >) |
This commit is contained in:
parent
b396716f86
commit
825f351d04
@ -50,6 +50,19 @@ private:
|
||||
using Device = vtkm::cont::DeviceAdapterTagSerial;
|
||||
|
||||
public:
|
||||
template <typename T, typename U, class CIn, class COut>
|
||||
VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
vtkm::cont::ArrayHandle<U, COut>& output)
|
||||
{
|
||||
const vtkm::Id inSize = input.GetNumberOfValues();
|
||||
auto inputPortal = input.PrepareForInput(DeviceAdapterTagSerial());
|
||||
auto outputPortal = output.PrepareForOutput(inSize, DeviceAdapterTagSerial());
|
||||
|
||||
std::copy(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal),
|
||||
vtkm::cont::ArrayPortalToIteratorEnd(inputPortal),
|
||||
vtkm::cont::ArrayPortalToIteratorBegin(outputPortal));
|
||||
}
|
||||
|
||||
template <typename T, typename U, class CIn, class CStencil, class COut>
|
||||
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
|
||||
@ -87,6 +100,56 @@ public:
|
||||
output.Shrink(writePos);
|
||||
}
|
||||
|
||||
template <typename T, typename U, class CIn, class COut>
|
||||
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
vtkm::Id inputStartIndex,
|
||||
vtkm::Id numberOfElementsToCopy,
|
||||
vtkm::cont::ArrayHandle<U, COut>& output,
|
||||
vtkm::Id outputIndex = 0)
|
||||
{
|
||||
const vtkm::Id inSize = input.GetNumberOfValues();
|
||||
if (inputStartIndex < 0 || numberOfElementsToCopy < 0 || outputIndex < 0 ||
|
||||
inputStartIndex >= inSize)
|
||||
{ //invalid parameters
|
||||
return false;
|
||||
}
|
||||
|
||||
//determine if the numberOfElementsToCopy needs to be reduced
|
||||
if (inSize < (inputStartIndex + numberOfElementsToCopy))
|
||||
{ //adjust the size
|
||||
numberOfElementsToCopy = (inSize - inputStartIndex);
|
||||
}
|
||||
|
||||
const vtkm::Id outSize = output.GetNumberOfValues();
|
||||
const vtkm::Id copyOutEnd = outputIndex + numberOfElementsToCopy;
|
||||
if (outSize < copyOutEnd)
|
||||
{ //output is not large enough
|
||||
if (outSize == 0)
|
||||
{ //since output has nothing, just need to allocate to correct length
|
||||
output.Allocate(copyOutEnd);
|
||||
}
|
||||
else
|
||||
{ //we currently have data in this array, so preserve it in the new
|
||||
//resized array
|
||||
vtkm::cont::ArrayHandle<U, COut> temp;
|
||||
temp.Allocate(copyOutEnd);
|
||||
CopySubRange(output, 0, outSize, temp);
|
||||
output = temp;
|
||||
}
|
||||
}
|
||||
|
||||
auto inputPortal = input.PrepareForInput(DeviceAdapterTagSerial());
|
||||
auto outputPortal = output.PrepareForInPlace(DeviceAdapterTagSerial());
|
||||
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(inputPortal);
|
||||
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outputPortal);
|
||||
|
||||
std::copy(inIter + inputStartIndex,
|
||||
inIter + inputStartIndex + numberOfElementsToCopy,
|
||||
outIter + outputIndex);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T, typename U, class CIn>
|
||||
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input, U initialValue)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user