Simplify the DeviceAdapterRuntimeDetectorCuda to not do a kernel launch.

The kernel launch component of the runtime device adapter is fairly
pointless. If the hardware supports CUDA we should expect that
VTK-m has the correct kernel versions.

Plus in the original version if the CUDA device was being used
and the kernel launch returns cudaErrorDevicesUnavailable it
was never possible to restore CUDA support. Now what happens
is that the runtime tracker is marked as failed, but the
calling code can always go back and trying the device again.
This commit is contained in:
Robert Maynard 2019-01-23 15:35:53 -05:00
parent 610bfc21ee
commit d0a70946b8
2 changed files with 22 additions and 40 deletions

@ -0,0 +1,17 @@
# VTK-m CUDA detection properly handles busy devices
When an application that uses VTK-m is first launched it will
do a check to see if CUDA is supported at runtime. If for
some reason that CUDA card is not allowing kernel execution
VTK-m would report the hardware doesn't have CUDA support.
This was problematic as was over aggressive in disabling CUDA
support for hardware that could support kernel execution in
the future. With the fact that every VTK-m worklet is executed
through a TryExecute it is no longer necessary to be so
aggressive in disabling CUDA support.
Now the behavior is that VTK-m considers a machine to have
CUDA runtime support if it has 1+ GPU's of Kepler or
higher hardware (SM_30+).

@ -27,24 +27,6 @@
#include <vtkm/Math.h>
#include <vtkm/cont/cuda/ErrorCuda.h>
namespace vtkm
{
namespace cont
{
namespace cuda
{
namespace internal
{
static __global__ void DetermineIfValidCudaDevice()
{
//used only to see if we can launch kernels. It is possible to have a
//CUDA capable device, but still fail to have CUDA support.
}
}
}
}
}
namespace
{
static std::once_flag deviceQueryFlag;
@ -64,28 +46,11 @@ void queryNumberOfDevicesandHighestArchSupported(vtkm::Int32& nod, vtkm::Int32&
for (vtkm::Int32 i = 0; i < numDevices; i++)
{
cudaDeviceProp prop;
VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
archVersion = vtkm::Max(arch, archVersion);
}
//Make sure we can actually launch a kernel. This could fail for any
//of the following reasons:
//
// 1. cudaErrorInsufficientDriver, caused by out of data drives
// 2. cudaErrorDevicesUnavailable, caused by another process locking the
// device or somebody disabling cuda support on the device
// 3. cudaErrorNoKernelImageForDevice we built for a compute version
// greater than the device we are running on
// Most likely others that I don't even know about
if (numDevices > 0)
{
vtkm::cont::cuda::internal::DetermineIfValidCudaDevice<<<1, 1, 0, cudaStreamPerThread>>>();
cudaStreamSynchronize(cudaStreamPerThread);
if (cudaSuccess != cudaGetLastError())
res = cudaGetDeviceProperties(&prop, i);
if (res == cudaSuccess)
{
numDevices = 0;
archVersion = 0;
const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
archVersion = vtkm::Max(arch, archVersion);
}
}
});
@ -112,7 +77,7 @@ DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::DeviceAdapterRun
bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::Exists() const
{
return this->NumberOfDevices > 0 && this->HighestArchSupported >= 20;
return this->NumberOfDevices > 0 && this->HighestArchSupported >= 30;
}
}
} // namespace vtkm::cont