Use async memory allocation for cuda when managed memory is off.

2024-09-08 13:23:51 +00:00 · 2022-08-10 10:21:48 -04:00 · 2022-08-10 10:21:48 -04:00 · 0d30520bb3
commit 0d30520bb3
parent baf80acb31
2 changed files with 35 additions and 1 deletions
--- a/vtkm/cont/cuda/internal/CudaAllocator.cu
+++ b/vtkm/cont/cuda/internal/CudaAllocator.cu
@ -158,7 +158,11 @@ void* CudaAllocator::Allocate(std::size_t numBytes)
  }
  else
  {
+#if CUDART_VERSION >= 11030
+    VTKM_CUDA_CALL(cudaMallocAsync(&ptr, numBytes, cudaStreamPerThread));
+#else
    VTKM_CUDA_CALL(cudaMalloc(&ptr, numBytes));
+#endif
  }

  {
@ -195,7 +199,19 @@ void CudaAllocator::Free(void* ptr)
  }

  VTKM_LOG_F(vtkm::cont::LogLevel::MemExec, "Freeing CUDA allocation at %p.", ptr);
+
+#if CUDART_VERSION >= 11030
+  if (ManagedMemoryEnabled)
+  {
+    VTKM_CUDA_CALL(cudaFree(ptr));
+  }
+  else
+  {
+    VTKM_CUDA_CALL(cudaFreeAsync(ptr, cudaStreamPerThread));
+  }
+#else
  VTKM_CUDA_CALL(cudaFree(ptr));
+#endif
 }

 void CudaAllocator::FreeDeferred(void* ptr, std::size_t numBytes)
--- a/vtkm/cont/testing/UnitTestArrayHandle.cxx
+++ b/vtkm/cont/testing/UnitTestArrayHandle.cxx
@ -16,6 +16,10 @@
 #include <vtkm/cont/ArrayHandleExtractComponent.h>
 #include <vtkm/cont/testing/Testing.h>

+#include <vtkm/cont/DeviceAdapterTag.h>
+#include <vtkm/cont/RuntimeDeviceInformation.h>
+#include <vtkm/cont/cuda/internal/CudaAllocator.h>
+
 #include <algorithm>
 #include <vector>

@ -548,7 +552,7 @@ struct VerifyFill
  }
 };

-VTKM_CONT void Run()
+VTKM_CONT void RunTests()
 {
  vtkm::testing::Testing::TryTypes(VerifyEmptyArrays{});
  vtkm::testing::Testing::TryTypes(VerifyUserOwnedMemory{});
@ -561,6 +565,20 @@ VTKM_CONT void Run()
  vtkm::testing::Testing::TryTypes(VerifyFill{});
 }

+VTKM_CONT void Run()
+{
+#ifdef VTKM_CUDA
+  //For cuda, run tests with and without managed memory.
+  //When managed memory is off, async cuda malloc will be used.
+  vtkm::cont::cuda::internal::CudaAllocator::ForceManagedMemoryOff();
+  RunTests();
+  vtkm::cont::cuda::internal::CudaAllocator::ForceManagedMemoryOn();
+  RunTests();
+#else
+  RunTests();
+#endif
+}
+
 } // anonymous namespace

 int UnitTestArrayHandle(int argc, char* argv[])