Merge branch 'cuda-timer-bug' into 'master'

Synchronize the CUDA timer on both the start and end events Previously, the timer for CUDA devices only called cudaEventSynchronize at the end event when asking for the elapsed time. This, however, could allow time to pass from when the timer was reset to when the start event happened that was not recorded in the timer. This added synchronization should make sure that all time spent in CUDA is recorded. See merge request !291
2024-09-19 18:45:43 +00:00 · 2016-01-14 18:22:08 -05:00 · 2016-01-14 18:22:08 -05:00 · f71b59e733
commit f71b59e733
parent 1d1334e2ea 3f446ad261
4 changed files with 114 additions and 17 deletions
--- a/vtkm/cont/cuda/CMakeLists.txt
+++ b/vtkm/cont/cuda/CMakeLists.txt
@ -22,6 +22,7 @@ set(headers
  ArrayHandleCuda.h
  ChooseCudaDevice.h
  DeviceAdapterCuda.h
  ErrorControlCuda.h
  )
 #-----------------------------------------------------------------------------
--- a/vtkm/cont/cuda/ErrorControlCuda.h
+++ b/vtkm/cont/cuda/ErrorControlCuda.h
@ -0,0 +1,95 @@
 //============================================================================
 //  Copyright (c) Kitware, Inc.
 //  All rights reserved.
 //  See LICENSE.txt for details.
 //  This software is distributed WITHOUT ANY WARRANTY; without even
 //  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 //  PURPOSE.  See the above copyright notice for more information.
 //
 //  Copyright 2015 Sandia Corporation.
 //  Copyright 2015 UT-Battelle, LLC.
 //  Copyright 2015 Los Alamos National Security.
 //
 //  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 //  the U.S. Government retains certain rights in this software.
 //
 //  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
 //  Laboratory (LANL), the U.S. Government retains certain rights in
 //  this software.
 //============================================================================
 #ifndef vtk_m_cont_cuda_ErrorControlCuda_h
 #define vtk_m_cont_cuda_ErrorControlCuda_h
 #include <vtkm/Types.h>
 #include <vtkm/cont/ErrorControl.h>
 #include <cuda.h>
 #include <sstream>
 /// A macro that can be used to check to see if there are any unchecked
 /// CUDA errors. Will throw an ErrorControlCuda if there are.
 ///
 #define VTKM_CUDA_CHECK_ASYNCHRONOUS_ERROR() \
  do { \
    const cudaError_t vtkm_cuda_check_async_error = cudaGetLastError(); \
    if (vtkm_cuda_check_async_error != cudaSuccess) \
    { \
      throw ::vtkm::cont::cuda::ErrorControlCuda( \
        vtkm_cuda_check_async_error, \
        __FILE__, \
        __LINE__, \
        "Unchecked asycnronous error"); \
    } \
  } while(false)
 /// A macro that can be wrapped around a CUDA command and will throw an
 /// ErrorControlCuda exception if the CUDA command fails.
 ///
 #define VTKM_CUDA_CALL(command) \
  VTKM_CUDA_CHECK_ASYNCHRONOUS_ERROR(); \
  do { \
    const cudaError_t vtkm_cuda_call_error = command; \
    if (vtkm_cuda_call_error != cudaSuccess) \
    { \
      throw ::vtkm::cont::cuda::ErrorControlCuda(vtkm_cuda_call_error, \
                                                 __FILE__, \
                                                 __LINE__, \
                                                 #command); \
    } \
  } while(false)
 namespace vtkm {
 namespace cont {
 namespace cuda {
 /// This error is thrown whenever an unidentified CUDA runtime error is
 /// encountered.
 ///
 class ErrorControlCuda : public vtkm::cont::ErrorControl
 {
 public:
  ErrorControlCuda(cudaError_t error)
  {
    std::stringstream message;
    message << "CUDA Error: " << cudaGetErrorString(error);
    this->SetMessage(message.str());
  }
  ErrorControlCuda(cudaError_t error,
                   const std::string &file,
                   vtkm::Id line,
                   const std::string &description)
  {
    std::stringstream message;
    message << "CUDA Error: " << cudaGetErrorString(error) << std::endl
            << description << " @ " << file << ":" << line;
    this->SetMessage(message.str());
  }
 };
 }
 }
 } // namespace vtkm::cont:cuda
 #endif //vtk_m_cont_cuda_ErrorControlCuda_h
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
@ -20,11 +20,11 @@
 #ifndef vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h
 #define vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h
 #include <vtkm/cont/cuda/ErrorControlCuda.h>
 #include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
 #include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
 #include <vtkm/cont/DeviceAdapterAlgorithm.h>
 #include <vtkm/cont/ErrorControlInternal.h>
 #include <vtkm/Math.h>
@ -62,11 +62,7 @@ struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>
  VTKM_CONT_EXPORT static void Synchronize()
  {
-    cudaError_t error = cudaDeviceSynchronize();
+    VTKM_CUDA_CALL(cudaDeviceSynchronize());
    if (error != cudaSuccess)
    {
      throw vtkm::cont::ErrorControlInternal(cudaGetErrorString(error));
    }
  }
 };
@ -79,29 +75,30 @@ class DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>
 public:
  VTKM_CONT_EXPORT DeviceAdapterTimerImplementation()
  {
-    cudaEventCreate(&this->StartEvent);
+    VTKM_CUDA_CALL(cudaEventCreate(&this->StartEvent));
-    cudaEventCreate(&this->EndEvent);
+    VTKM_CUDA_CALL(cudaEventCreate(&this->EndEvent));
    this->Reset();
  }
  VTKM_CONT_EXPORT ~DeviceAdapterTimerImplementation()
  {
-    cudaEventDestroy(this->StartEvent);
+    VTKM_CUDA_CALL(cudaEventDestroy(this->StartEvent));
-    cudaEventDestroy(this->EndEvent);
+    VTKM_CUDA_CALL(cudaEventDestroy(this->EndEvent));
  }
  VTKM_CONT_EXPORT void Reset()
  {
-    cudaEventRecord(this->StartEvent, 0);
+    VTKM_CUDA_CALL(cudaEventRecord(this->StartEvent, 0));
    VTKM_CUDA_CALL(cudaEventSynchronize(this->StartEvent));
  }
  VTKM_CONT_EXPORT vtkm::Float64 GetElapsedTime()
  {
-    cudaEventRecord(this->EndEvent, 0);
+    VTKM_CUDA_CALL(cudaEventRecord(this->EndEvent, 0));
-    cudaEventSynchronize(this->EndEvent);
+    VTKM_CUDA_CALL(cudaEventSynchronize(this->EndEvent));
    float elapsedTimeMilliseconds;
-    cudaEventElapsedTime(&elapsedTimeMilliseconds,
+    VTKM_CUDA_CALL(cudaEventElapsedTime(&elapsedTimeMilliseconds,
-                         this->StartEvent,
+                                        this->StartEvent,
-                         this->EndEvent);
+                                        this->EndEvent));
    return static_cast<vtkm::Float64>(0.001f*elapsedTimeMilliseconds);
  }
@ -145,7 +142,7 @@ public:
      for (vtkm::Int32 i = 0; i < numDevices; i++)
      {
        cudaDeviceProp prop;
-        cudaGetDeviceProperties(&prop, i);
+        VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
        const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
        archVersion = vtkm::Max(arch, archVersion);
      }
--- a/vtkm/cont/testing/TestingDeviceAdapter.h
+++ b/vtkm/cont/testing/TestingDeviceAdapter.h
@ -405,12 +405,16 @@ private:
    vtkm::cont::Timer<DeviceAdapterTag> timer;
    std::cout << "Timer started. Sleeping..." << std::endl;
 #ifndef _WIN32
    sleep(1);
 #else
    Sleep(1000);
 #endif
    std::cout << "Woke up. Check time." << std::endl;
    vtkm::Float64 elapsedTime = timer.GetElapsedTime();
    std::cout << "Elapsed time: " << elapsedTime << std::endl;