From f5827298033f5f5f71c9a506867af708e9e6dba9 Mon Sep 17 00:00:00 2001
From: Kenneth Moreland <kmorel@sandia.gov>
Date: Mon, 14 Dec 2015 11:11:01 -0700
Subject: [PATCH 1/2] Synchronize the CUDA timer on both the start and end
 events

Previously, the timer for CUDA devices only called cudaEventSynchronize
at the end event when asking for the elapsed time. This, however, could
allow time to pass from when the timer was reset to when the start event
happened that was not recorded in the timer. This added synchronization
should make sure that all time spent in CUDA is recorded.
---
 vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h | 1 +
 1 file changed, 1 insertion(+)
diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
index c46ec7cba..4d288fce2 100644
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
@@ -92,6 +92,7 @@ public:
   VTKM_CONT_EXPORT void Reset()
   {
     cudaEventRecord(this->StartEvent, 0);
+    cudaEventSynchronize(this->StartEvent);
   }
 
   VTKM_CONT_EXPORT vtkm::Float64 GetElapsedTime()

From 3f446ad261cc68a8c8cf3b3580f57004f0e92700 Mon Sep 17 00:00:00 2001
From: Kenneth Moreland <kmorel@sandia.gov>
Date: Tue, 15 Dec 2015 10:48:10 -0700
Subject: [PATCH 2/2] Add ErrorControlCuda for better CUDA error checking.

Add lots of checks to CUDA calls in the timer to try to identify any
problems that might be showing up on the dashboard.

Also adding some print statements around the sleep function in the
device adapter testing. For some reason the problem just went away with
them.
---
 vtkm/cont/cuda/CMakeLists.txt                 |  1 +
 vtkm/cont/cuda/ErrorControlCuda.h             | 95 +++++++++++++++++++
 .../internal/DeviceAdapterAlgorithmCuda.h     | 32 +++----
 vtkm/cont/testing/TestingDeviceAdapter.h      |  4 +
 4 files changed, 114 insertions(+), 18 deletions(-)
 create mode 100644 vtkm/cont/cuda/ErrorControlCuda.h

diff --git a/vtkm/cont/cuda/CMakeLists.txt b/vtkm/cont/cuda/CMakeLists.txt
index 5d4b7f4eb..1b72b3178 100644
--- a/vtkm/cont/cuda/CMakeLists.txt
+++ b/vtkm/cont/cuda/CMakeLists.txt
@@ -22,6 +22,7 @@ set(headers
   ArrayHandleCuda.h
   ChooseCudaDevice.h
   DeviceAdapterCuda.h
+  ErrorControlCuda.h
   )
 
 #-----------------------------------------------------------------------------
diff --git a/vtkm/cont/cuda/ErrorControlCuda.h b/vtkm/cont/cuda/ErrorControlCuda.h
new file mode 100644
index 000000000..4fdce4d5c
--- /dev/null
+++ b/vtkm/cont/cuda/ErrorControlCuda.h
@@ -0,0 +1,95 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2015 Sandia Corporation.
+//  Copyright 2015 UT-Battelle, LLC.
+//  Copyright 2015 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_cuda_ErrorControlCuda_h
+#define vtk_m_cont_cuda_ErrorControlCuda_h
+
+#include <vtkm/Types.h>
+#include <vtkm/cont/ErrorControl.h>
+
+#include <cuda.h>
+
+#include <sstream>
+
+/// A macro that can be used to check to see if there are any unchecked
+/// CUDA errors. Will throw an ErrorControlCuda if there are.
+///
+#define VTKM_CUDA_CHECK_ASYNCHRONOUS_ERROR() \
+  do { \
+    const cudaError_t vtkm_cuda_check_async_error = cudaGetLastError(); \
+    if (vtkm_cuda_check_async_error != cudaSuccess) \
+    { \
+      throw ::vtkm::cont::cuda::ErrorControlCuda( \
+        vtkm_cuda_check_async_error, \
+        __FILE__, \
+        __LINE__, \
+        "Unchecked asycnronous error"); \
+    } \
+  } while(false)
+
+/// A macro that can be wrapped around a CUDA command and will throw an
+/// ErrorControlCuda exception if the CUDA command fails.
+///
+#define VTKM_CUDA_CALL(command) \
+  VTKM_CUDA_CHECK_ASYNCHRONOUS_ERROR(); \
+  do { \
+    const cudaError_t vtkm_cuda_call_error = command; \
+    if (vtkm_cuda_call_error != cudaSuccess) \
+    { \
+      throw ::vtkm::cont::cuda::ErrorControlCuda(vtkm_cuda_call_error, \
+                                                 __FILE__, \
+                                                 __LINE__, \
+                                                 #command); \
+    } \
+  } while(false)
+
+namespace vtkm {
+namespace cont {
+namespace cuda {
+
+/// This error is thrown whenever an unidentified CUDA runtime error is
+/// encountered.
+///
+class ErrorControlCuda : public vtkm::cont::ErrorControl
+{
+public:
+  ErrorControlCuda(cudaError_t error)
+  {
+    std::stringstream message;
+    message << "CUDA Error: " << cudaGetErrorString(error);
+    this->SetMessage(message.str());
+  }
+
+  ErrorControlCuda(cudaError_t error,
+                   const std::string &file,
+                   vtkm::Id line,
+                   const std::string &description)
+  {
+    std::stringstream message;
+    message << "CUDA Error: " << cudaGetErrorString(error) << std::endl
+            << description << " @ " << file << ":" << line;
+    this->SetMessage(message.str());
+  }
+};
+
+}
+}
+} // namespace vtkm::cont:cuda
+
+#endif //vtk_m_cont_cuda_ErrorControlCuda_h
diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
index 4d288fce2..ca66b27d9 100644
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
@@ -20,11 +20,11 @@
 #ifndef vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h
 #define vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h
 
+#include <vtkm/cont/cuda/ErrorControlCuda.h>
 #include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
 #include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
 
 #include <vtkm/cont/DeviceAdapterAlgorithm.h>
-#include <vtkm/cont/ErrorControlInternal.h>
 
 #include <vtkm/Math.h>
 
@@ -62,11 +62,7 @@ struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>
 
   VTKM_CONT_EXPORT static void Synchronize()
   {
-    cudaError_t error = cudaDeviceSynchronize();
-    if (error != cudaSuccess)
-    {
-      throw vtkm::cont::ErrorControlInternal(cudaGetErrorString(error));
-    }
+    VTKM_CUDA_CALL(cudaDeviceSynchronize());
   }
 
 };
@@ -79,30 +75,30 @@ class DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>
 public:
   VTKM_CONT_EXPORT DeviceAdapterTimerImplementation()
   {
-    cudaEventCreate(&this->StartEvent);
-    cudaEventCreate(&this->EndEvent);
+    VTKM_CUDA_CALL(cudaEventCreate(&this->StartEvent));
+    VTKM_CUDA_CALL(cudaEventCreate(&this->EndEvent));
     this->Reset();
   }
   VTKM_CONT_EXPORT ~DeviceAdapterTimerImplementation()
   {
-    cudaEventDestroy(this->StartEvent);
-    cudaEventDestroy(this->EndEvent);
+    VTKM_CUDA_CALL(cudaEventDestroy(this->StartEvent));
+    VTKM_CUDA_CALL(cudaEventDestroy(this->EndEvent));
   }
 
   VTKM_CONT_EXPORT void Reset()
   {
-    cudaEventRecord(this->StartEvent, 0);
-    cudaEventSynchronize(this->StartEvent);
+    VTKM_CUDA_CALL(cudaEventRecord(this->StartEvent, 0));
+    VTKM_CUDA_CALL(cudaEventSynchronize(this->StartEvent));
   }
 
   VTKM_CONT_EXPORT vtkm::Float64 GetElapsedTime()
   {
-    cudaEventRecord(this->EndEvent, 0);
-    cudaEventSynchronize(this->EndEvent);
+    VTKM_CUDA_CALL(cudaEventRecord(this->EndEvent, 0));
+    VTKM_CUDA_CALL(cudaEventSynchronize(this->EndEvent));
     float elapsedTimeMilliseconds;
-    cudaEventElapsedTime(&elapsedTimeMilliseconds,
-                         this->StartEvent,
-                         this->EndEvent);
+    VTKM_CUDA_CALL(cudaEventElapsedTime(&elapsedTimeMilliseconds,
+                                        this->StartEvent,
+                                        this->EndEvent));
     return static_cast<vtkm::Float64>(0.001f*elapsedTimeMilliseconds);
   }
 
@@ -146,7 +142,7 @@ public:
       for (vtkm::Int32 i = 0; i < numDevices; i++)
       {
         cudaDeviceProp prop;
-        cudaGetDeviceProperties(&prop, i);
+        VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
         const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
         archVersion = vtkm::Max(arch, archVersion);
       }
diff --git a/vtkm/cont/testing/TestingDeviceAdapter.h b/vtkm/cont/testing/TestingDeviceAdapter.h
index a32b1e6cd..3ca160e42 100644
--- a/vtkm/cont/testing/TestingDeviceAdapter.h
+++ b/vtkm/cont/testing/TestingDeviceAdapter.h
@@ -405,12 +405,16 @@ private:
 
     vtkm::cont::Timer<DeviceAdapterTag> timer;
 
+    std::cout << "Timer started. Sleeping..." << std::endl;
+
 #ifndef _WIN32
     sleep(1);
 #else
     Sleep(1000);
 #endif
 
+    std::cout << "Woke up. Check time." << std::endl;
+
     vtkm::Float64 elapsedTime = timer.GetElapsedTime();
 
     std::cout << "Elapsed time: " << elapsedTime << std::endl;