Simplify the DeviceAdapterRuntimeDetectorCuda to not do a kernel launch.

The kernel launch component of the runtime device adapter is fairly pointless. If the hardware supports CUDA we should expect that VTK-m has the correct kernel versions. Plus in the original version if the CUDA device was being used and the kernel launch returns cudaErrorDevicesUnavailable it was never possible to restore CUDA support. Now what happens is that the runtime tracker is marked as failed, but the calling code can always go back and trying the device again.
2024-09-16 17:22:55 +00:00 · 2019-01-23 15:35:53 -05:00 · 2019-01-23 15:35:53 -05:00 · d0a70946b8
commit d0a70946b8
parent 610bfc21ee
2 changed files with 22 additions and 40 deletions
--- a/docs/changelog/vtkm-handles-busy-cuda-devices-better.md
+++ b/docs/changelog/vtkm-handles-busy-cuda-devices-better.md
@ -0,0 +1,17 @@
+# VTK-m CUDA detection properly handles busy devices
+
+When an application that uses VTK-m is first launched it will
+do a check to see if CUDA is supported at runtime. If for
+some reason that CUDA card is not allowing kernel execution
+VTK-m would report the hardware doesn't have CUDA support.
+
+This was problematic as was over aggressive in disabling CUDA
+support for hardware that could support kernel execution in
+the future. With the fact that every VTK-m worklet is executed
+through a TryExecute it is no longer necessary to be so
+aggressive in disabling CUDA support.
+
+Now the behavior is that VTK-m considers a machine to have
+CUDA runtime support if it has 1+ GPU's of Kepler or
+higher hardware (SM_30+).
+
--- a/vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.cxx
+++ b/vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.cxx
@ -27,24 +27,6 @@
 #include <vtkm/Math.h>
 #include <vtkm/cont/cuda/ErrorCuda.h>

-namespace vtkm
-{
-namespace cont
-{
-namespace cuda
-{
-namespace internal
-{
-
-static __global__ void DetermineIfValidCudaDevice()
-{
-  //used only to see if we can launch kernels. It is possible to have a
-  //CUDA capable device, but still fail to have CUDA support.
-}
-}
-}
-}
-}
 namespace
 {
 static std::once_flag deviceQueryFlag;
@ -64,28 +46,11 @@ void queryNumberOfDevicesandHighestArchSupported(vtkm::Int32& nod, vtkm::Int32&
    for (vtkm::Int32 i = 0; i < numDevices; i++)
    {
      cudaDeviceProp prop;
-      VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
-      const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
-      archVersion = vtkm::Max(arch, archVersion);
-    }
-
-    //Make sure we can actually launch a kernel. This could fail for any
-    //of the following reasons:
-    //
-    // 1. cudaErrorInsufficientDriver, caused by out of data drives
-    // 2. cudaErrorDevicesUnavailable, caused by another process locking the
-    //    device or somebody disabling cuda support on the device
-    // 3. cudaErrorNoKernelImageForDevice we built for a compute version
-    //    greater than the device we are running on
-    // Most likely others that I don't even know about
-    if (numDevices > 0)
-    {
-      vtkm::cont::cuda::internal::DetermineIfValidCudaDevice<<<1, 1, 0, cudaStreamPerThread>>>();
-      cudaStreamSynchronize(cudaStreamPerThread);
-      if (cudaSuccess != cudaGetLastError())
+      res = cudaGetDeviceProperties(&prop, i);
+      if (res == cudaSuccess)
      {
-        numDevices = 0;
-        archVersion = 0;
+        const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
+        archVersion = vtkm::Max(arch, archVersion);
      }
    }
  });
@ -112,7 +77,7 @@ DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::DeviceAdapterRun

 bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::Exists() const
 {
-  return this->NumberOfDevices > 0 && this->HighestArchSupported >= 20;
+  return this->NumberOfDevices > 0 && this->HighestArchSupported >= 30;
 }
 }
 } // namespace vtkm::cont