copying cpu memory to pascal managed memory now works consistently.
When copying small arrays from cpu memory to pascal memory we would see subsequent kernels fail as the memory transfer hadn't finished. This is a bug as each stream should act like a FIFO queue. So for now when encountering this use case we explicitly synchronize after the memcpy.
This commit is contained in:
parent
a4b16c4b4e
commit
e0b6e69878
@ -150,6 +150,15 @@ void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyFromControl(
|
||||
static_cast<std::size_t>(numBytes),
|
||||
cudaMemcpyHostToDevice,
|
||||
cudaStreamPerThread));
|
||||
if (CudaAllocator::IsManagedPointer(executionPtr))
|
||||
{
|
||||
//If we are moving memory from unmanaged host memory
|
||||
//to managed host memory we have the possibility that
|
||||
//the memcpy will not finish before the first usage is finished
|
||||
//to work around this bug we explicitly synchronize for this
|
||||
//one use case
|
||||
cudaStreamSynchronize(cudaStreamPerThread);
|
||||
}
|
||||
}
|
||||
|
||||
void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyToControl(const void* executionPtr,
|
||||
|
@ -205,6 +205,7 @@ public:
|
||||
vtkm::cont::cuda::internal::IteratorBegin(portal),
|
||||
vtkm::cont::cuda::internal::IteratorEnd(portal),
|
||||
thrust::cuda::pointer<ValueType>(beginPointer));
|
||||
cudaStreamSynchronize(cudaStreamPerThread);
|
||||
|
||||
//unmap the resource
|
||||
this->Resource->UnMap();
|
||||
|
Loading…
Reference in New Issue
Block a user