Pin user provided memory in ArrayHandle

Often when a user gives memory to an `ArrayHandle`, she wants data to be
written into the memory given to be used elsewhere. Previously, the
`Buffer` objects would delete the given buffer as soon as a write buffer
was created elsewhere. That was a problem if a user wants VTK-m to write
results right into a given buffer.

Instead, when a user provides memory, "pin" that memory so that the
`ArrayHandle` never deletes it.
This commit is contained in:
Kenneth Moreland 2020-06-24 17:27:36 -06:00
parent 56bec1dd7b
commit a47fd42bc1
11 changed files with 422 additions and 119 deletions

@ -47,17 +47,35 @@ public:
throw vtkm::cont::ErrorBadDevice("Tried to manage memory on an invalid device.");
}
VTKM_CONT virtual void CopyHostToDevice(const vtkm::cont::internal::BufferInfo&,
const vtkm::cont::internal::BufferInfo&) const override
{
throw vtkm::cont::ErrorBadDevice("Tried to manage memory on an invalid device.");
}
VTKM_CONT virtual vtkm::cont::internal::BufferInfo CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo&) const override
{
throw vtkm::cont::ErrorBadDevice("Tried to manage memory on an invalid device.");
}
VTKM_CONT virtual void CopyDeviceToHost(const vtkm::cont::internal::BufferInfo&,
const vtkm::cont::internal::BufferInfo&) const override
{
throw vtkm::cont::ErrorBadDevice("Tried to manage memory on an invalid device.");
}
VTKM_CONT virtual vtkm::cont::internal::BufferInfo CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo&) const override
{
throw vtkm::cont::ErrorBadDevice("Tried to manage memory on an invalid device.");
}
VTKM_CONT virtual void CopyDeviceToDevice(const vtkm::cont::internal::BufferInfo&,
const vtkm::cont::internal::BufferInfo&) const override
{
throw vtkm::cont::ErrorBadDevice("Tried to manage memory on an invalid device.");
}
};
struct VTKM_NEVER_EXPORT InitializeDeviceNames

@ -15,6 +15,8 @@
#include <vtkm/cont/ErrorBadAllocation.h>
#include <vtkm/Math.h>
namespace
{
@ -97,12 +99,12 @@ DeviceAdapterMemoryManager<vtkm::cont::DeviceAdapterTagCuda>::CopyHostToDevice(
{
VTKM_ASSERT(src.GetDevice() == vtkm::cont::DeviceAdapterTagUndefined{});
using vtkm::cont::cuda::internal::CudaAllocator;
if (CudaAllocator::IsManagedPointer(src.GetPointer()))
if (vtkm::cont::cuda::internal::CudaAllocator::IsManagedPointer(src.GetPointer()))
{
// In the current code structure, we don't know whether this buffer is going to be used
// for input or output. (Currently, I don't think there is any difference.)
CudaAllocator::PrepareForOutput(src.GetPointer(), static_cast<std::size_t>(src.GetSize()));
vtkm::cont::cuda::internal::CudaAllocator::PrepareForOutput(
src.GetPointer(), static_cast<std::size_t>(src.GetSize()));
// The provided control pointer is already cuda managed and can be accessed on the device
// via unified memory. Just shallow copy the pointer.
@ -113,54 +115,96 @@ DeviceAdapterMemoryManager<vtkm::cont::DeviceAdapterTagCuda>::CopyHostToDevice(
// Make a new buffer
vtkm::cont::internal::BufferInfo dest = this->Allocate(src.GetSize());
// Copy the data to the new buffer
VTKM_LOG_F(vtkm::cont::LogLevel::MemTransfer,
"Copying host --> CUDA dev: %s (%lld bytes)",
vtkm::cont::GetHumanReadableSize(static_cast<std::size_t>(src.GetSize())).c_str(),
src.GetSize());
VTKM_CUDA_CALL(cudaMemcpyAsync(dest.GetPointer(),
src.GetPointer(),
static_cast<std::size_t>(src.GetSize()),
cudaMemcpyHostToDevice,
cudaStreamPerThread));
this->CopyHostToDevice(src, dest);
return dest;
}
}
void DeviceAdapterMemoryManager<vtkm::cont::DeviceAdapterTagCuda>::CopyHostToDevice(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const
{
if (vtkm::cont::cuda::internal::CudaAllocator::IsManagedPointer(src.GetPointer()) &&
src.GetPointer() == dest.GetPointer())
{
// In the current code structure, we don't know whether this buffer is going to be used
// for input or output. (Currently, I don't think there is any difference.)
vtkm::cont::cuda::internal::CudaAllocator::PrepareForOutput(
src.GetPointer(), static_cast<std::size_t>(src.GetSize()));
// The provided pointers are both cuda managed and the same, so the data are already
// the same.
}
else
{
vtkm::BufferSizeType size = vtkm::Min(src.GetSize(), dest.GetSize());
VTKM_LOG_F(vtkm::cont::LogLevel::MemTransfer,
"Copying host --> CUDA dev: %s (%lld bytes)",
vtkm::cont::GetHumanReadableSize(static_cast<std::size_t>(size)).c_str(),
size);
VTKM_CUDA_CALL(cudaMemcpyAsync(
dest.GetPointer(), src.GetPointer(), size, cudaMemcpyHostToDevice, cudaStreamPerThread));
}
}
vtkm::cont::internal::BufferInfo
DeviceAdapterMemoryManager<vtkm::cont::DeviceAdapterTagCuda>::CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src) const
{
using vtkm::cont::cuda::internal::CudaAllocator;
VTKM_ASSERT(src.GetDevice() == vtkm::cont::DeviceAdapterTagCuda{});
vtkm::cont::internal::BufferInfo dest;
if (CudaAllocator::IsManagedPointer(src.GetPointer()))
if (vtkm::cont::cuda::internal::CudaAllocator::IsManagedPointer(src.GetPointer()))
{
// The provided control pointer is already cuda managed and can be accessed on the host
// via unified memory. Just shallow copy the pointer.
CudaAllocator::PrepareForControl(src.GetPointer(), static_cast<std::size_t>(src.GetSize()));
dest = vtkm::cont::internal::BufferInfo(src, vtkm::cont::DeviceAdapterTagCuda{});
vtkm::cont::cuda::internal::CudaAllocator::PrepareForControl(
src.GetPointer(), static_cast<std::size_t>(src.GetSize()));
dest = vtkm::cont::internal::BufferInfo(src, vtkm::cont::DeviceAdapterTagUndefined{});
//In all cases we have possibly multiple async calls queued up in
//our stream. We need to block on the copy back to control since
//we don't wanting it accessing memory that hasn't finished
//being used by the GPU
vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTagCuda>::Synchronize();
}
else
{
// Make a new buffer
dest = vtkm::cont::internal::AllocateOnHost(src.GetSize());
// Copy the data to the new buffer
this->CopyDeviceToHost(src, dest);
}
return dest;
}
void DeviceAdapterMemoryManager<vtkm::cont::DeviceAdapterTagCuda>::CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const
{
if (vtkm::cont::cuda::internal::CudaAllocator::IsManagedPointer(dest.GetPointer()) &&
src.GetPointer() == dest.GetPointer())
{
// The provided pointers are both cuda managed and the same, so the data are already
// the same.
}
else
{
vtkm::BufferSizeType size = vtkm::Min(src.GetSize(), dest.GetSize());
VTKM_LOG_F(vtkm::cont::LogLevel::MemTransfer,
"Copying CUDA dev --> host: %s (%lld bytes)",
vtkm::cont::GetHumanReadableSize(static_cast<vtkm::UInt64>(src.GetSize())).c_str(),
src.GetSize());
vtkm::cont::GetHumanReadableSize(static_cast<std::size_t>(size)).c_str(),
size);
VTKM_CUDA_CALL(cudaMemcpyAsync(dest.GetPointer(),
src.GetPointer(),
static_cast<std::size_t>(src.GetSize()),
cudaMemcpyDeviceToHost,
cudaStreamPerThread));
VTKM_CUDA_CALL(cudaMemcpyAsync(
dest.GetPointer(), src.GetPointer(), size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
}
//In all cases we have possibly multiple async calls queued up in
@ -168,8 +212,6 @@ DeviceAdapterMemoryManager<vtkm::cont::DeviceAdapterTagCuda>::CopyDeviceToHost(
//we don't wanting it accessing memory that hasn't finished
//being used by the GPU
vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTagCuda>::Synchronize();
return dest;
}
vtkm::cont::internal::BufferInfo
@ -177,13 +219,20 @@ DeviceAdapterMemoryManager<vtkm::cont::DeviceAdapterTagCuda>::CopyDeviceToDevice
const vtkm::cont::internal::BufferInfo& src) const
{
vtkm::cont::internal::BufferInfo dest = this->Allocate(src.GetSize());
this->CopyDeviceToDevice(src, dest);
return dest;
}
void DeviceAdapterMemoryManager<vtkm::cont::DeviceAdapterTagCuda>::CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const
{
VTKM_CUDA_CALL(cudaMemcpyAsync(dest.GetPointer(),
src.GetPointer(),
static_cast<std::size_t>(src.GetSize()),
cudaMemcpyDeviceToDevice,
cudaStreamPerThread));
return dest;
}
}
}

@ -33,11 +33,23 @@ public:
VTKM_CONT vtkm::cont::internal::BufferInfo CopyHostToDevice(
const vtkm::cont::internal::BufferInfo& src) const override;
VTKM_CONT virtual void CopyHostToDevice(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const override;
VTKM_CONT vtkm::cont::internal::BufferInfo CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src) const override;
VTKM_CONT virtual void CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const override;
VTKM_CONT vtkm::cont::internal::BufferInfo CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo& src) const override;
VTKM_CONT virtual void CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const override;
};
}
}

@ -10,6 +10,7 @@
#include <vtkm/internal/Assume.h>
#include <vtkm/cont/ErrorBadAllocation.h>
#include <vtkm/cont/ErrorBadDevice.h>
#include <vtkm/cont/RuntimeDeviceInformation.h>
@ -21,8 +22,65 @@
#include <deque>
#include <map>
namespace
{
using LockType = std::unique_lock<std::mutex>;
struct BufferState
{
vtkm::cont::internal::BufferInfo Info;
bool Pinned = false;
bool UpToDate = false;
BufferState() = default;
BufferState(const vtkm::cont::internal::BufferInfo& info,
bool pinned = false,
bool upToDate = true)
: Info(info)
, Pinned(pinned)
, UpToDate(upToDate)
{
}
// Automatically convert to BufferInfo
operator vtkm::cont::internal::BufferInfo&() { return this->Info; }
operator const vtkm::cont::internal::BufferInfo&() const { return this->Info; }
// Pass through common BufferInfo methods.
void* GetPointer() const { return this->Info.GetPointer(); }
vtkm::BufferSizeType GetSize() const { return this->Info.GetSize(); }
vtkm::cont::DeviceAdapterId GetDevice() const { return this->Info.GetDevice(); }
void Reallocate(vtkm::BufferSizeType newSize)
{
if (this->Info.GetSize() != newSize)
{
if (this->Pinned)
{
throw vtkm::cont::ErrorBadAllocation("Attempted to reallocate a pinned buffer.");
}
this->Info.Reallocate(newSize);
}
}
/// Releases the buffer. If the memory is not pinned, it is deleted. In any case, it is
/// marked as no longer up to date.
void Release()
{
if (!this->Pinned)
{
this->Info = vtkm::cont::internal::BufferInfo{};
}
this->UpToDate = false;
}
};
} // anonymous namespace
namespace vtkm
{
namespace cont
@ -33,7 +91,7 @@ namespace internal
class Buffer::InternalsStruct
{
public:
using DeviceBufferMap = std::map<vtkm::cont::DeviceAdapterId, vtkm::cont::internal::BufferInfo>;
using DeviceBufferMap = std::map<vtkm::cont::DeviceAdapterId, BufferState>;
private:
vtkm::cont::Token::ReferenceCount ReadCount = 0;
@ -51,7 +109,7 @@ private:
vtkm::BufferSizeType NumberOfBytes = 0;
DeviceBufferMap DeviceBuffers;
vtkm::cont::internal::BufferInfo HostBuffer;
BufferState HostBuffer;
public:
std::mutex Mutex;
@ -82,7 +140,7 @@ public:
return this->DeviceBuffers;
}
VTKM_CONT vtkm::cont::internal::BufferInfo& GetHostBuffer(const LockType& lock)
VTKM_CONT BufferState& GetHostBuffer(const LockType& lock)
{
this->CheckLock(lock);
return this->HostBuffer;
@ -245,9 +303,9 @@ struct VTKM_NEVER_EXPORT BufferHelper
AccessMode accessMode)
{
Wait(internals, lock, token, accessMode);
vtkm::cont::internal::BufferInfo& hostBuffer = internals->GetHostBuffer(lock);
BufferState& hostBuffer = internals->GetHostBuffer(lock);
vtkm::BufferSizeType targetSize = internals->GetNumberOfBytes(lock);
if (hostBuffer.GetPointer() != nullptr)
if (hostBuffer.UpToDate)
{
// Buffer already exists on the host. Make sure it is the right size.
if (hostBuffer.GetSize() != targetSize)
@ -260,7 +318,7 @@ struct VTKM_NEVER_EXPORT BufferHelper
// Buffer does not exist on host. See if we can find data on a device.
for (auto&& deviceBuffer : internals->GetDeviceBuffers(lock))
{
if (deviceBuffer.second.GetPointer() == nullptr)
if (!deviceBuffer.second.UpToDate)
{
continue;
}
@ -271,21 +329,39 @@ struct VTKM_NEVER_EXPORT BufferHelper
if (deviceBuffer.second.GetSize() > targetSize)
{
// Device buffer too large. Resize.
memoryManager.Reallocate(deviceBuffer.second, targetSize);
deviceBuffer.second.Reallocate(targetSize);
}
hostBuffer = memoryManager.CopyDeviceToHost(deviceBuffer.second);
if (!hostBuffer.Pinned)
{
hostBuffer = memoryManager.CopyDeviceToHost(deviceBuffer.second);
}
else
{
hostBuffer.Reallocate(targetSize);
memoryManager.CopyDeviceToHost(deviceBuffer.second, hostBuffer);
}
if (hostBuffer.GetSize() != targetSize)
{
hostBuffer.Reallocate(targetSize);
}
hostBuffer.UpToDate = true;
return;
}
// Buffer not allocated on host or any device, so just allocate a buffer.
hostBuffer = vtkm::cont::internal::AllocateOnHost(targetSize);
// Buffer not up to date on host or any device, so just allocate a buffer.
if (!hostBuffer.Pinned)
{
hostBuffer = vtkm::cont::internal::AllocateOnHost(targetSize);
}
else
{
hostBuffer.Reallocate(targetSize);
hostBuffer.UpToDate = true;
}
}
static void AllocateOnDevice(const std::shared_ptr<Buffer::InternalsStruct>& internals,
@ -300,7 +376,7 @@ struct VTKM_NEVER_EXPORT BufferHelper
vtkm::cont::internal::DeviceAdapterMemoryManagerBase& memoryManager =
vtkm::cont::RuntimeDeviceInformation().GetMemoryManager(device);
if (deviceBuffers[device].GetPointer() != nullptr)
if (deviceBuffers[device].UpToDate)
{
// Buffer already exists on the device. Make sure it is the right size.
if (deviceBuffers[device].GetSize() != targetSize)
@ -314,11 +390,11 @@ struct VTKM_NEVER_EXPORT BufferHelper
// Buffer does not exist on device. Check to see if it is on another device but not the host.
// We currently do not support device-to-device transfers, so the data has to go to the
// host first.
if (internals->GetHostBuffer(lock).GetPointer() == nullptr)
if (!internals->GetHostBuffer(lock).UpToDate)
{
for (auto&& deviceBuffer : deviceBuffers)
{
if (deviceBuffer.second.GetPointer() != nullptr)
if (deviceBuffer.second.UpToDate)
{
// Copy data to host.
AllocateOnHost(internals, lock, token, accessMode);
@ -328,8 +404,8 @@ struct VTKM_NEVER_EXPORT BufferHelper
}
// If the buffer is now on the host, copy it to the device.
vtkm::cont::internal::BufferInfo& hostBuffer = internals->GetHostBuffer(lock);
if (hostBuffer.GetPointer() != nullptr)
BufferState& hostBuffer = internals->GetHostBuffer(lock);
if (hostBuffer.UpToDate)
{
if (hostBuffer.GetSize() > targetSize)
{
@ -337,7 +413,15 @@ struct VTKM_NEVER_EXPORT BufferHelper
hostBuffer.Reallocate(targetSize);
}
deviceBuffers[device] = memoryManager.CopyHostToDevice(hostBuffer);
if (!deviceBuffers[device].Pinned)
{
deviceBuffers[device] = memoryManager.CopyHostToDevice(hostBuffer);
}
else
{
deviceBuffers[device].Reallocate(targetSize);
memoryManager.CopyHostToDevice(hostBuffer, deviceBuffers[device]);
}
if (deviceBuffers[device].GetSize() != targetSize)
{
@ -345,11 +429,21 @@ struct VTKM_NEVER_EXPORT BufferHelper
}
VTKM_ASSERT(deviceBuffers[device].GetSize() == targetSize);
deviceBuffers[device].UpToDate = true;
return;
}
// Buffer not allocated anywhere, so just allocate a buffer.
deviceBuffers[device] = memoryManager.Allocate(targetSize);
// Buffer not up to date anywhere, so just allocate a buffer.
if (!deviceBuffers[device].Pinned)
{
deviceBuffers[device] = memoryManager.Allocate(targetSize);
}
else
{
deviceBuffers[device].Reallocate(targetSize);
deviceBuffers[device].UpToDate = true;
}
}
static void CopyOnHost(
@ -362,20 +456,23 @@ struct VTKM_NEVER_EXPORT BufferHelper
WaitToRead(srcInternals, srcLock, token);
WaitToWrite(destInternals, destLock, token);
vtkm::BufferSizeType size = srcInternals->GetNumberOfBytes(srcLock);
// Any current buffers in destination can be (and should be) deleted.
destInternals->GetDeviceBuffers(destLock).clear();
// Do this before allocating on the host to avoid unnecessary data copies.
for (auto&& deviceBuffer : destInternals->GetDeviceBuffers(destLock))
{
deviceBuffer.second.Release();
}
// Do the copy
vtkm::cont::internal::BufferInfo& destBuffer = destInternals->GetHostBuffer(destLock);
vtkm::cont::internal::BufferInfo& srcBuffer = srcInternals->GetHostBuffer(srcLock);
destInternals->SetNumberOfBytes(destLock, size);
AllocateOnHost(destInternals, destLock, token, AccessMode::WRITE);
destBuffer = vtkm::cont::internal::AllocateOnHost(srcBuffer.GetSize());
AllocateOnHost(srcInternals, srcLock, token, AccessMode::READ);
std::memcpy(destBuffer.GetPointer(),
srcBuffer.GetPointer(),
static_cast<std::size_t>(srcBuffer.GetSize()));
destInternals->SetNumberOfBytes(destLock, srcInternals->GetNumberOfBytes(srcLock));
std::memcpy(destInternals->GetHostBuffer(destLock).GetPointer(),
srcInternals->GetHostBuffer(srcLock).GetPointer(),
static_cast<std::size_t>(size));
}
static void CopyOnDevice(
@ -390,15 +487,30 @@ struct VTKM_NEVER_EXPORT BufferHelper
WaitToWrite(destInternals, destLock, token);
// Any current buffers in destination can be (and should be) deleted.
destInternals->GetHostBuffer(destLock) = vtkm::cont::internal::BufferInfo{};
destInternals->GetDeviceBuffers(destLock).clear();
// Do this before allocating on the host to avoid unnecessary data copies.
vtkm::cont::internal::Buffer::InternalsStruct::DeviceBufferMap& destDeviceBuffers =
destInternals->GetDeviceBuffers(destLock);
destInternals->GetHostBuffer(destLock).Release();
for (auto&& deviceBuffer : destDeviceBuffers)
{
deviceBuffer.second.Release();
}
// Do the copy
vtkm::cont::internal::DeviceAdapterMemoryManagerBase& memoryManager =
vtkm::cont::RuntimeDeviceInformation().GetMemoryManager(device);
destInternals->GetDeviceBuffers(destLock)[device] =
memoryManager.CopyDeviceToDevice(srcInternals->GetDeviceBuffers(srcLock)[device]);
if (!destDeviceBuffers[device].Pinned)
{
destDeviceBuffers[device] =
memoryManager.CopyDeviceToDevice(srcInternals->GetDeviceBuffers(srcLock)[device]);
}
else
{
memoryManager.CopyDeviceToDevice(srcInternals->GetDeviceBuffers(srcLock)[device],
destDeviceBuffers[device]);
destDeviceBuffers[device].UpToDate = true;
}
destInternals->SetNumberOfBytes(destLock, srcInternals->GetNumberOfBytes(srcLock));
}
@ -471,9 +583,12 @@ void Buffer::SetNumberOfBytes(vtkm::BufferSizeType numberOfBytes, vtkm::CopyFlag
this->Internals->SetNumberOfBytes(lock, numberOfBytes);
if ((preserve == vtkm::CopyFlag::Off) || (numberOfBytes == 0))
{
// No longer need these buffers. Just delete them.
this->Internals->GetHostBuffer(lock) = vtkm::cont::internal::BufferInfo{};
this->Internals->GetDeviceBuffers(lock).clear();
// No longer need these buffers. Just release them.
this->Internals->GetHostBuffer(lock).Release();
for (auto&& deviceBuffer : this->Internals->GetDeviceBuffers(lock))
{
deviceBuffer.second.Release();
}
}
else
{
@ -486,7 +601,7 @@ void Buffer::SetNumberOfBytes(vtkm::BufferSizeType numberOfBytes, vtkm::CopyFlag
bool Buffer::IsAllocatedOnHost() const
{
LockType lock = this->Internals->GetLock();
return (this->Internals->GetHostBuffer(lock).GetPointer() != nullptr);
return this->Internals->GetHostBuffer(lock).UpToDate;
}
bool Buffer::IsAllocatedOnDevice(vtkm::cont::DeviceAdapterId device) const
@ -494,7 +609,7 @@ bool Buffer::IsAllocatedOnDevice(vtkm::cont::DeviceAdapterId device) const
if (device.IsValueValid())
{
LockType lock = this->Internals->GetLock();
return (this->Internals->GetDeviceBuffers(lock)[device].GetPointer() != nullptr);
return this->Internals->GetDeviceBuffers(lock)[device].UpToDate;
}
else if (device == vtkm::cont::DeviceAdapterTagUndefined{})
{
@ -507,7 +622,7 @@ bool Buffer::IsAllocatedOnDevice(vtkm::cont::DeviceAdapterId device) const
LockType lock = this->Internals->GetLock();
for (auto&& deviceBuffer : this->Internals->GetDeviceBuffers(lock))
{
if (deviceBuffer.second.GetPointer() != nullptr)
if (deviceBuffer.second.UpToDate)
{
return true;
}
@ -559,7 +674,10 @@ void* Buffer::WritePointerHost(vtkm::cont::Token& token) const
this->Internals, lock, token, detail::BufferHelper::AccessMode::WRITE);
// Array is being written on host. All other buffers invalidated, so delete them.
this->Internals->GetDeviceBuffers(lock).clear();
for (auto&& deviceBuffer : this->Internals->GetDeviceBuffers(lock))
{
deviceBuffer.second.Release();
}
return this->Internals->GetHostBuffer(lock).GetPointer();
}
@ -574,13 +692,14 @@ void* Buffer::WritePointerDevice(vtkm::cont::DeviceAdapterId device, vtkm::cont:
this->Internals, lock, token, device, detail::BufferHelper::AccessMode::WRITE);
// Array is being written on this device. All other buffers invalided, so delete them.
this->Internals->GetHostBuffer(lock) = vtkm::cont::internal::BufferInfo{};
InternalsStruct::DeviceBufferMap& deviceBuffers = this->Internals->GetDeviceBuffers(lock);
auto iter = deviceBuffers.find(device);
deviceBuffers.erase(deviceBuffers.begin(), iter);
iter = deviceBuffers.find(device);
std::advance(iter, 1);
deviceBuffers.erase(iter, deviceBuffers.end());
this->Internals->GetHostBuffer(lock).Release();
for (auto&& deviceBuffer : this->Internals->GetDeviceBuffers(lock))
{
if (deviceBuffer.first != device)
{
deviceBuffer.second.Release();
}
}
return this->Internals->GetDeviceBuffers(lock)[device].GetPointer();
}
@ -615,7 +734,7 @@ void Buffer::DeepCopy(vtkm::cont::internal::Buffer& dest) const
// If we are on a device, copy there.
for (auto&& deviceBuffer : this->Internals->GetDeviceBuffers(srcLock))
{
if (deviceBuffer.second.GetPointer() != nullptr)
if (deviceBuffer.second.UpToDate)
{
detail::BufferHelper::CopyOnDevice(
deviceBuffer.first, this->Internals, srcLock, dest.Internals, destLock, token);
@ -624,7 +743,7 @@ void Buffer::DeepCopy(vtkm::cont::internal::Buffer& dest) const
}
// If we are here, there were no devices to copy on. Copy on host if possible.
if (this->Internals->GetHostBuffer(srcLock).GetPointer() != nullptr)
if (this->Internals->GetHostBuffer(srcLock).UpToDate)
{
detail::BufferHelper::CopyOnHost(this->Internals, srcLock, dest.Internals, destLock, token);
}
@ -654,17 +773,19 @@ void Buffer::Reset(const vtkm::cont::internal::BufferInfo& bufferInfo)
{
LockType lock = this->Internals->GetLock();
// Clear out any old buffers.
this->Internals->GetHostBuffer(lock) = vtkm::cont::internal::BufferInfo{};
// Clear out any old buffers. Because we are resetting the object, we will also get rid of
// pinned memory.
this->Internals->GetHostBuffer(lock) = BufferState{};
this->Internals->GetDeviceBuffers(lock).clear();
if (bufferInfo.GetDevice().IsValueValid())
{
this->Internals->GetDeviceBuffers(lock)[bufferInfo.GetDevice()] = bufferInfo;
this->Internals->GetDeviceBuffers(lock)[bufferInfo.GetDevice()] =
BufferState{ bufferInfo, true, true };
}
else if (bufferInfo.GetDevice() == vtkm::cont::DeviceAdapterTagUndefined{})
{
this->Internals->GetHostBuffer(lock) = bufferInfo;
this->Internals->GetHostBuffer(lock) = BufferState{ bufferInfo, true, true };
}
else
{

@ -158,11 +158,15 @@ public:
vtkm::cont::DeviceAdapterId device) const;
/// @}
/// \brief Resets the `Buffer` to the memory allocated at the information.
/// \brief Resets the `Buffer` to the memory allocated at the `BufferInfo`.
///
/// The `Buffer` is initialized to a state that contains the given `buffer` of data. The
/// `BufferInfo` object self-describes the pointer, size, and device of the memory.
///
/// The given memory is "pinned" in the `Buffer`. This means that this memory will always
/// be used on the given host or device. If `SetNumberOfBytes` is later called with a size
/// that is inconsistent with the size of this buffer, an exception will be thrown.
///
VTKM_CONT void Reset(const vtkm::cont::internal::BufferInfo& buffer);
/// \brief Gets the `BufferInfo` object to the memory allocated on the host.

@ -128,25 +128,28 @@ namespace cont
namespace internal
{
namespace detail
{
//----------------------------------------------------------------------------------------
// The BufferInfo internals behaves much like a std::shared_ptr. However, we do not use
// std::shared_ptr for compile efficiency issues.
struct BufferInfo::InternalsStruct
struct BufferInfoInternals
{
void* Memory;
void* Container;
Deleter* Delete;
Reallocater* Reallocate;
BufferInfo::Deleter* Delete;
BufferInfo::Reallocater* Reallocate;
vtkm::BufferSizeType Size;
using CountType = vtkm::IdComponent;
std::atomic<CountType> Count;
VTKM_CONT InternalsStruct(void* memory,
void* container,
vtkm::BufferSizeType size,
Deleter deleter,
Reallocater reallocater)
VTKM_CONT BufferInfoInternals(void* memory,
void* container,
vtkm::BufferSizeType size,
BufferInfo::Deleter deleter,
BufferInfo::Reallocater reallocater)
: Memory(memory)
, Container(container)
, Delete(deleter)
@ -156,10 +159,12 @@ struct BufferInfo::InternalsStruct
{
}
InternalsStruct(const InternalsStruct&) = delete;
void operator=(const InternalsStruct&) = delete;
BufferInfoInternals(const BufferInfoInternals&) = delete;
void operator=(const BufferInfoInternals&) = delete;
};
} // namespace detail
void* BufferInfo::GetPointer() const
{
return this->Internals->Memory;
@ -176,7 +181,7 @@ vtkm::cont::DeviceAdapterId BufferInfo::GetDevice() const
}
BufferInfo::BufferInfo()
: Internals(new InternalsStruct(nullptr, nullptr, 0, HostDeleter, HostReallocate))
: Internals(new detail::BufferInfoInternals(nullptr, nullptr, 0, HostDeleter, HostReallocate))
, Device(vtkm::cont::DeviceAdapterTagUndefined{})
{
}
@ -185,7 +190,7 @@ BufferInfo::~BufferInfo()
{
if (this->Internals != nullptr)
{
InternalsStruct::CountType oldCount =
detail::BufferInfoInternals::CountType oldCount =
this->Internals->Count.fetch_sub(1, std::memory_order::memory_order_seq_cst);
if (oldCount == 1)
{
@ -253,7 +258,7 @@ BufferInfo::BufferInfo(vtkm::cont::DeviceAdapterId device,
vtkm::BufferSizeType size,
Deleter deleter,
Reallocater reallocater)
: Internals(new InternalsStruct(memory, container, size, deleter, reallocater))
: Internals(new detail::BufferInfoInternals(memory, container, size, deleter, reallocater))
, Device(device)
{
}

@ -29,6 +29,13 @@ namespace cont
namespace internal
{
namespace detail
{
struct BufferInfoInternals;
} // namespace detail
class VTKM_CONT_EXPORT BufferInfo
{
public:
@ -91,8 +98,7 @@ public:
VTKM_CONT void Reallocate(vtkm::BufferSizeType newSize);
private:
struct InternalsStruct;
InternalsStruct* Internals;
detail::BufferInfoInternals* Internals;
vtkm::cont::DeviceAdapterId Device;
};
@ -135,15 +141,30 @@ public:
VTKM_CONT virtual vtkm::cont::internal::BufferInfo CopyHostToDevice(
const vtkm::cont::internal::BufferInfo& src) const = 0;
/// Copies data from the provided host buffer into the provided pre-allocated device buffer. The
/// `BufferInfo` object for the device was created by a previous call to this object.
VTKM_CONT virtual void CopyHostToDevice(const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const = 0;
/// Copies data from the device buffer provided to the host. The passed in `BufferInfo` object
/// was created by a previous call to this object.
VTKM_CONT virtual vtkm::cont::internal::BufferInfo CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src) const = 0;
/// Copies data from the device buffer provided into the provided pre-allocated host buffer. The
/// `BufferInfo` object for the device was created by a previous call to this object.
VTKM_CONT virtual void CopyDeviceToHost(const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const = 0;
/// Deep copies data from one device buffer to another device buffer. The passed in `BufferInfo`
/// object was created by a previous call to this object.
VTKM_CONT virtual vtkm::cont::internal::BufferInfo CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo& src) const = 0;
/// Deep copies data from one device buffer to another device buffer. The passed in `BufferInfo`
/// objects were created by a previous call to this object.
VTKM_CONT virtual void CopyDeviceToDevice(const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const = 0;
};
/// \brief The device adapter memory manager.

@ -33,6 +33,18 @@ vtkm::cont::internal::BufferInfo DeviceAdapterMemoryManagerShared::CopyHostToDev
return vtkm::cont::internal::BufferInfo(src, this->GetDevice());
}
void DeviceAdapterMemoryManagerShared::CopyHostToDevice(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const
{
VTKM_ASSERT(src.GetDevice() == vtkm::cont::DeviceAdapterTagUndefined{});
VTKM_ASSERT(dest.GetDevice() == this->GetDevice());
if (src.GetPointer() != dest.GetPointer())
{
this->CopyDeviceToDevice(src, dest);
}
}
vtkm::cont::internal::BufferInfo DeviceAdapterMemoryManagerShared::CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src) const
{
@ -40,6 +52,18 @@ vtkm::cont::internal::BufferInfo DeviceAdapterMemoryManagerShared::CopyDeviceToH
return vtkm::cont::internal::BufferInfo(src, vtkm::cont::DeviceAdapterTagUndefined{});
}
void DeviceAdapterMemoryManagerShared::CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const
{
VTKM_ASSERT(src.GetDevice() == this->GetDevice());
VTKM_ASSERT(dest.GetDevice() == vtkm::cont::DeviceAdapterTagUndefined{});
if (src.GetPointer() != dest.GetPointer())
{
this->CopyDeviceToDevice(src, dest);
}
}
vtkm::cont::internal::BufferInfo DeviceAdapterMemoryManagerShared::CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo& src) const
{
@ -47,9 +71,18 @@ vtkm::cont::internal::BufferInfo DeviceAdapterMemoryManagerShared::CopyDeviceToD
vtkm::BufferSizeType size = src.GetSize();
vtkm::cont::internal::BufferInfo dest = this->Allocate(size);
std::memcpy(dest.GetPointer(), src.GetPointer(), static_cast<std::size_t>(size));
this->CopyDeviceToDevice(src, dest);
return dest;
}
void DeviceAdapterMemoryManagerShared::CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const
{
VTKM_ASSERT(src.GetSize() == dest.GetSize());
std::memcpy(dest.GetPointer(), src.GetPointer(), static_cast<std::size_t>(src.GetSize()));
}
}
}
} // namespace vtkm::cont::internal

@ -33,11 +33,23 @@ public:
VTKM_CONT virtual vtkm::cont::internal::BufferInfo CopyHostToDevice(
const vtkm::cont::internal::BufferInfo& src) const override;
VTKM_CONT virtual void CopyHostToDevice(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const override;
VTKM_CONT virtual vtkm::cont::internal::BufferInfo CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src) const override;
VTKM_CONT virtual void CopyDeviceToHost(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const override;
VTKM_CONT virtual vtkm::cont::internal::BufferInfo CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo& src) const override;
VTKM_CONT virtual void CopyDeviceToDevice(
const vtkm::cont::internal::BufferInfo& src,
const vtkm::cont::internal::BufferInfo& dest) const override;
};
}
}

@ -198,7 +198,8 @@ private:
buffer[static_cast<std::size_t>(index)] = TestValue(index, T());
}
vtkm::cont::ArrayHandle<T> arrayHandle = vtkm::cont::make_ArrayHandle(buffer);
vtkm::cont::ArrayHandle<T> arrayHandle =
vtkm::cont::make_ArrayHandle(buffer, vtkm::CopyFlag::Off);
VTKM_TEST_ASSERT(arrayHandle.GetNumberOfValues() == ARRAY_SIZE,
"ArrayHandle has wrong number of entries.");
@ -234,22 +235,45 @@ private:
array_handle_testing::CheckArray(result);
}
//clear out user array for next test
std::fill(buffer.begin(), buffer.end(), static_cast<T>(-1));
std::cout << "Check out output." << std::endl;
{ //as output with same length as user provided. This should work
//as no new memory needs to be allocated
vtkm::cont::Token token;
arrayHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag(), token);
auto outputPortal = arrayHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag(), token);
//we can't verify output contents as those aren't fetched, we
//can just make sure the allocation didn't throw an exception
//fill array on device
Algorithm::Schedule(AssignTestValue<T, decltype(outputPortal)>{ outputPortal }, ARRAY_SIZE);
//sync data, which should fill up the user buffer
token.DetachFromAll();
arrayHandle.SyncControlArray();
//check that we got the proper values in the user array
array_handle_testing::CheckValues(buffer.begin(), buffer.end(), T{});
}
{ //an output with a length larger than the memory provided by the user
//this should drop the user's data
vtkm::cont::Token token;
arrayHandle.PrepareForOutput(ARRAY_SIZE * 2, DeviceAdapterTag(), token);
token.DetachFromAll();
arrayHandle.WritePortal();
{ //as output with a length larger than the memory provided by the user
//this should fail
bool gotException = false;
try
{
//you should not be able to allocate a size larger than the
//user provided and get the results
vtkm::cont::Token token;
arrayHandle.PrepareForOutput(ARRAY_SIZE * 2, DeviceAdapterTag(), token);
token.DetachFromAll();
arrayHandle.WritePortal();
}
catch (vtkm::cont::Error&)
{
gotException = true;
}
VTKM_TEST_ASSERT(gotException,
"PrepareForOutput should fail when asked to "
"re-allocate user provided memory.");
}
}
};
@ -314,8 +338,8 @@ private:
//can just make sure the allocation didn't throw an exception
}
{ //as the memory ownership has been transferred to VTK-m this should
//allow VTK-m to free the memory and allocate a new block
{ //as output with a length larger than the memory provided by the user
//this should fail
bool gotException = false;
try
{
@ -330,9 +354,9 @@ private:
{
gotException = true;
}
VTKM_TEST_ASSERT(!gotException,
"PrepareForOutput shouldn't fail when asked to "
"re-allocate user transferred memory.");
VTKM_TEST_ASSERT(gotException,
"PrepareForOutput should fail when asked to "
"re-allocate user provided memory.");
}
}
};

@ -1329,18 +1329,22 @@ private:
{
std::cout << "-------------------------------------------------" << std::endl;
std::cout << "Testing Unique with comparison object" << std::endl;
std::vector<vtkm::Id> testData(ARRAY_SIZE);
for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
IdArrayHandle input;
input.Allocate(ARRAY_SIZE);
{
testData[i] = static_cast<vtkm::Id>(OFFSET + (i % 50));
auto portal = input.WritePortal();
for (vtkm::Id index = 0; index < ARRAY_SIZE; ++index)
{
portal.Set(index, OFFSET + (index % 50));
}
}
IdArrayHandle input = vtkm::cont::make_ArrayHandle(testData);
Algorithm::Sort(input);
Algorithm::Unique(input, FuseAll());
// Check to make sure that input was resized correctly during Unique.
// (This was a discovered bug at one point.)
input.ReadPortal(); // Forces copy back to control.
input.SyncControlArray(); // Forces copy back to control.
input.ReleaseResourcesExecution(); // Make sure not counting on execution.
VTKM_TEST_ASSERT(input.GetNumberOfValues() == 1,
"Unique did not resize array (or size did not copy to control).");