remove cudaGetDevice calls, favor runtime device config

2024-09-08 13:23:51 +00:00 · 2021-08-26 14:17:15 -06:00 · 2021-08-26 14:17:15 -06:00 · 9730de8074
commit 9730de8074
parent adac415f15
5 changed files with 36 additions and 29 deletions
--- a/vtkm/cont/cuda/internal/CudaAllocator.cu
+++ b/vtkm/cont/cuda/internal/CudaAllocator.cu
@ -239,8 +239,10 @@ void CudaAllocator::PrepareForInput(const void* ptr, std::size_t numBytes)
  if (IsManagedPointer(ptr) && numBytes >= Threshold)
  {
 #if CUDART_VERSION >= 8000
-    int dev;
-    VTKM_CUDA_CALL(cudaGetDevice(&dev));
+    vtkm::Id dev;
+    vtkm::cont::RuntimeDeviceInformation()
+      .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda())
+      .GetDeviceInstance(dev);
    // VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
    // VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetReadMostly, dev));
    VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
@ -254,8 +256,10 @@ void CudaAllocator::PrepareForOutput(const void* ptr, std::size_t numBytes)
  if (IsManagedPointer(ptr) && numBytes >= Threshold)
  {
 #if CUDART_VERSION >= 8000
-    int dev;
-    VTKM_CUDA_CALL(cudaGetDevice(&dev));
+    vtkm::Id dev;
+    vtkm::cont::RuntimeDeviceInformation()
+      .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda())
+      .GetDeviceInstance(dev);
    // VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
    // VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev));
    VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.cu
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.cu
@ -10,6 +10,10 @@

 #include <vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h>

+#include <vtkm/cont/RuntimeDeviceInformation.h>
+#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
+#include <vtkm/cont/cuda/internal/RuntimeDeviceConfigurationCuda.h>
+
 #include <atomic>
 #include <cstring>
 #include <functional>
@ -133,14 +137,14 @@ VTKM_CONT_EXPORT void SetupKernelSchedulingParameters()

  std::call_once(lookupBuiltFlag, []() {
    ScheduleParameterBuilder builder;
-    //iterate over all devices
-    int count = 0;
-    VTKM_CUDA_CALL(cudaGetDeviceCount(&count));
-    for (int deviceId = 0; deviceId < count; ++deviceId)
+    auto cudaDeviceConfig = dynamic_cast<
+      vtkm::cont::internal::RuntimeDeviceConfiguration<vtkm::cont::DeviceAdapterTagCuda>&>(
+      vtkm::cont::RuntimeDeviceInformation{}.GetRuntimeConfiguration(
+        vtkm::cont::DeviceAdapterTagCuda()));
+    std::vector<cudaDeviceProp> cudaDevices;
+    cudaDeviceConfig.GetCudaDeviceProp(cudaDevices);
+    for (const auto& deviceProp : cudaDevices)
    {
-      cudaDeviceProp deviceProp;
-      cudaGetDeviceProperties(&deviceProp, deviceId);
-
      ScheduleParameters params = builder.Compute(deviceProp.name,
                                                  deviceProp.major,
                                                  deviceProp.minor,
@ -204,8 +208,10 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
  (void)size;
  vtkm::cont::cuda::internal::SetupKernelSchedulingParameters();

-  int deviceId;
-  VTKM_CUDA_CALL(cudaGetDevice(&deviceId)); //get deviceid from cuda
+  vtkm::Id deviceId;
+  vtkm::cont::RuntimeDeviceInformation()
+    .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda())
+    .GetDeviceInstance(deviceId);
  const auto& params = cuda::internal::scheduling_1d_parameters[static_cast<size_t>(deviceId)];
  blocks = static_cast<vtkm::UInt32>(params.first);
  threadsPerBlock = static_cast<vtkm::UInt32>(params.second);
@ -218,8 +224,10 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
 {
  vtkm::cont::cuda::internal::SetupKernelSchedulingParameters();

-  int deviceId;
-  VTKM_CUDA_CALL(cudaGetDevice(&deviceId)); //get deviceid from cuda
+  vtkm::Id deviceId;
+  vtkm::cont::RuntimeDeviceInformation()
+    .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda())
+    .GetDeviceInstance(deviceId);
  if (size.z <= 1)
  { //2d images
    const auto& params = cuda::internal::scheduling_2d_parameters[static_cast<size_t>(deviceId)];
--- a/vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.cxx
+++ b/vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.cxx
@ -25,6 +25,11 @@ static int archVersion = 0;

 void queryNumberOfDevicesandHighestArchSupported(vtkm::Int32& nod, vtkm::Int32& has)
 {
+  // We currently cannot use RuntimeDeviceInformation{}.GetRuntimeConfiguration(
+  // vtkm::cont::DeviceAdapterTagCuda()) in this function due to constraints in
+  // initialize that query device Existence before we initialize the Runtime
+  // Configuration. Once those constraints are removed/fixed this file can be
+  // updated to use that call instead of directly querying the cuda device
  std::call_once(deviceQueryFlag, []() {
    //first query for the number of devices
    auto res = cudaGetDeviceCount(&numDevices);
--- a/vtkm/cont/cuda/internal/RuntimeDeviceConfigurationCuda.h
+++ b/vtkm/cont/cuda/internal/RuntimeDeviceConfigurationCuda.h
@ -62,17 +62,8 @@ public:
          << value << " >= " << this->CudaDeviceCount);
      return RuntimeDeviceConfigReturnCode::INVALID_VALUE;
    }
-    try
-    {
-      VTKM_CUDA_CALL(cudaSetDevice(value));
-      return RuntimeDeviceConfigReturnCode::SUCCESS;
-    }
-    catch (const vtkm::cont::cuda::ErrorCuda& err)
-    {
-      VTKM_LOG_S(vtkm::cont::LogLevel::Error,
-                 "Failed to set CudaDeviceInstance: " << err.GetMessage());
-      return RuntimeDeviceConfigReturnCode::INTERNAL_ERROR;
-    }
+    VTKM_CUDA_CALL(cudaSetDevice(value));
+    return RuntimeDeviceConfigReturnCode::SUCCESS;
  }

  VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetDeviceInstance(
--- a/vtkm/cont/internal/RuntimeDeviceConfiguration.h
+++ b/vtkm/cont/internal/RuntimeDeviceConfiguration.h
@ -24,14 +24,13 @@ namespace cont
 namespace internal
 {

-enum class RuntimeDeviceConfigReturnCode : vtkm::Id
+enum class RuntimeDeviceConfigReturnCode
 {
  SUCCESS,
  OUT_OF_BOUNDS,
  INVALID_FOR_DEVICE,
  INVALID_VALUE,
-  NOT_APPLIED,
-  INTERNAL_ERROR
+  NOT_APPLIED
 };

 class VTKM_CONT_EXPORT RuntimeDeviceConfigurationBase