Merge branch 'cuda-timer-bug' into 'master'

Synchronize the CUDA timer on both the start and end events

Previously, the timer for CUDA devices only called cudaEventSynchronize
at the end event when asking for the elapsed time. This, however, could
allow time to pass from when the timer was reset to when the start event
happened that was not recorded in the timer. This added synchronization
should make sure that all time spent in CUDA is recorded.

See merge request !291
This commit is contained in:
Kenneth Moreland 2016-01-14 18:22:08 -05:00
commit f71b59e733
4 changed files with 114 additions and 17 deletions

@ -22,6 +22,7 @@ set(headers
ArrayHandleCuda.h ArrayHandleCuda.h
ChooseCudaDevice.h ChooseCudaDevice.h
DeviceAdapterCuda.h DeviceAdapterCuda.h
ErrorControlCuda.h
) )
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------

@ -0,0 +1,95 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2015 Sandia Corporation.
// Copyright 2015 UT-Battelle, LLC.
// Copyright 2015 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_cuda_ErrorControlCuda_h
#define vtk_m_cont_cuda_ErrorControlCuda_h
#include <vtkm/Types.h>
#include <vtkm/cont/ErrorControl.h>
#include <cuda.h>
#include <sstream>
/// A macro that can be used to check to see if there are any unchecked
/// CUDA errors. Will throw an ErrorControlCuda if there are.
///
#define VTKM_CUDA_CHECK_ASYNCHRONOUS_ERROR() \
do { \
const cudaError_t vtkm_cuda_check_async_error = cudaGetLastError(); \
if (vtkm_cuda_check_async_error != cudaSuccess) \
{ \
throw ::vtkm::cont::cuda::ErrorControlCuda( \
vtkm_cuda_check_async_error, \
__FILE__, \
__LINE__, \
"Unchecked asycnronous error"); \
} \
} while(false)
/// A macro that can be wrapped around a CUDA command and will throw an
/// ErrorControlCuda exception if the CUDA command fails.
///
#define VTKM_CUDA_CALL(command) \
VTKM_CUDA_CHECK_ASYNCHRONOUS_ERROR(); \
do { \
const cudaError_t vtkm_cuda_call_error = command; \
if (vtkm_cuda_call_error != cudaSuccess) \
{ \
throw ::vtkm::cont::cuda::ErrorControlCuda(vtkm_cuda_call_error, \
__FILE__, \
__LINE__, \
#command); \
} \
} while(false)
namespace vtkm {
namespace cont {
namespace cuda {
/// This error is thrown whenever an unidentified CUDA runtime error is
/// encountered.
///
class ErrorControlCuda : public vtkm::cont::ErrorControl
{
public:
ErrorControlCuda(cudaError_t error)
{
std::stringstream message;
message << "CUDA Error: " << cudaGetErrorString(error);
this->SetMessage(message.str());
}
ErrorControlCuda(cudaError_t error,
const std::string &file,
vtkm::Id line,
const std::string &description)
{
std::stringstream message;
message << "CUDA Error: " << cudaGetErrorString(error) << std::endl
<< description << " @ " << file << ":" << line;
this->SetMessage(message.str());
}
};
}
}
} // namespace vtkm::cont:cuda
#endif //vtk_m_cont_cuda_ErrorControlCuda_h

@ -20,11 +20,11 @@
#ifndef vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h #ifndef vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h
#define vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h #define vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h
#include <vtkm/cont/cuda/ErrorControlCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h> #include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
#include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h> #include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h> #include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorControlInternal.h>
#include <vtkm/Math.h> #include <vtkm/Math.h>
@ -62,11 +62,7 @@ struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>
VTKM_CONT_EXPORT static void Synchronize() VTKM_CONT_EXPORT static void Synchronize()
{ {
cudaError_t error = cudaDeviceSynchronize(); VTKM_CUDA_CALL(cudaDeviceSynchronize());
if (error != cudaSuccess)
{
throw vtkm::cont::ErrorControlInternal(cudaGetErrorString(error));
}
} }
}; };
@ -79,29 +75,30 @@ class DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>
public: public:
VTKM_CONT_EXPORT DeviceAdapterTimerImplementation() VTKM_CONT_EXPORT DeviceAdapterTimerImplementation()
{ {
cudaEventCreate(&this->StartEvent); VTKM_CUDA_CALL(cudaEventCreate(&this->StartEvent));
cudaEventCreate(&this->EndEvent); VTKM_CUDA_CALL(cudaEventCreate(&this->EndEvent));
this->Reset(); this->Reset();
} }
VTKM_CONT_EXPORT ~DeviceAdapterTimerImplementation() VTKM_CONT_EXPORT ~DeviceAdapterTimerImplementation()
{ {
cudaEventDestroy(this->StartEvent); VTKM_CUDA_CALL(cudaEventDestroy(this->StartEvent));
cudaEventDestroy(this->EndEvent); VTKM_CUDA_CALL(cudaEventDestroy(this->EndEvent));
} }
VTKM_CONT_EXPORT void Reset() VTKM_CONT_EXPORT void Reset()
{ {
cudaEventRecord(this->StartEvent, 0); VTKM_CUDA_CALL(cudaEventRecord(this->StartEvent, 0));
VTKM_CUDA_CALL(cudaEventSynchronize(this->StartEvent));
} }
VTKM_CONT_EXPORT vtkm::Float64 GetElapsedTime() VTKM_CONT_EXPORT vtkm::Float64 GetElapsedTime()
{ {
cudaEventRecord(this->EndEvent, 0); VTKM_CUDA_CALL(cudaEventRecord(this->EndEvent, 0));
cudaEventSynchronize(this->EndEvent); VTKM_CUDA_CALL(cudaEventSynchronize(this->EndEvent));
float elapsedTimeMilliseconds; float elapsedTimeMilliseconds;
cudaEventElapsedTime(&elapsedTimeMilliseconds, VTKM_CUDA_CALL(cudaEventElapsedTime(&elapsedTimeMilliseconds,
this->StartEvent, this->StartEvent,
this->EndEvent); this->EndEvent));
return static_cast<vtkm::Float64>(0.001f*elapsedTimeMilliseconds); return static_cast<vtkm::Float64>(0.001f*elapsedTimeMilliseconds);
} }
@ -145,7 +142,7 @@ public:
for (vtkm::Int32 i = 0; i < numDevices; i++) for (vtkm::Int32 i = 0; i < numDevices; i++)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, i); VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
const vtkm::Int32 arch = (prop.major * 10) + prop.minor; const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
archVersion = vtkm::Max(arch, archVersion); archVersion = vtkm::Max(arch, archVersion);
} }

@ -405,12 +405,16 @@ private:
vtkm::cont::Timer<DeviceAdapterTag> timer; vtkm::cont::Timer<DeviceAdapterTag> timer;
std::cout << "Timer started. Sleeping..." << std::endl;
#ifndef _WIN32 #ifndef _WIN32
sleep(1); sleep(1);
#else #else
Sleep(1000); Sleep(1000);
#endif #endif
std::cout << "Woke up. Check time." << std::endl;
vtkm::Float64 elapsedTime = timer.GetElapsedTime(); vtkm::Float64 elapsedTime = timer.GetElapsedTime();
std::cout << "Elapsed time: " << elapsedTime << std::endl; std::cout << "Elapsed time: " << elapsedTime << std::endl;