Merge topic 'wait_for_cuda_streams_to_finish_before_host_access'

82cdae002 VTK-m waits for cuda streams to finish before host access

Acked-by: Kitware Robot <kwrobot@kitware.com>
Acked-by: Allison Vacanti <allison.vacanti@kitware.com>
Merge-request: !1244
This commit is contained in:
Robert Maynard 2018-06-01 16:53:15 +00:00 committed by Kitware Robot
commit b8468761c7
2 changed files with 14 additions and 15 deletions

@ -432,6 +432,7 @@ private:
{
cuda::internal::throwAsVTKmException();
}
VTKM_CUDA_CALL(cudaStreamSynchronize(cudaStreamPerThread));
return sum[2];
}

@ -150,15 +150,6 @@ void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyFromControl(
static_cast<std::size_t>(numBytes),
cudaMemcpyHostToDevice,
cudaStreamPerThread));
if (CudaAllocator::IsManagedPointer(executionPtr))
{
//If we are moving memory from unmanaged host memory
//to managed host memory we have the possibility that
//the memcpy will not finish before the first usage is finished
//to work around this bug we explicitly synchronize for this
//one use case
cudaStreamSynchronize(cudaStreamPerThread);
}
}
void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyToControl(const void* executionPtr,
@ -179,14 +170,21 @@ void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyToControl(const voi
// If it is managed, just return and let CUDA handle the migration for us.
CudaAllocator::PrepareForControl(controlPtr, numBytes);
return;
}
else
{
VTKM_CUDA_CALL(cudaMemcpyAsync(controlPtr,
executionPtr,
static_cast<std::size_t>(numBytes),
cudaMemcpyDeviceToHost,
cudaStreamPerThread));
}
VTKM_CUDA_CALL(cudaMemcpyAsync(controlPtr,
executionPtr,
static_cast<std::size_t>(numBytes),
cudaMemcpyDeviceToHost,
cudaStreamPerThread));
//In all cases we have possibly multiple async calls queued up in
//our stream. We need to block on the copy back to control since
//we don't wanting it accessing memory that hasn't finished
//being used by the GPU
cudaStreamSynchronize(cudaStreamPerThread);
}
void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::UsingForRead(