mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-16 17:22:55 +00:00
Introduce a log level that details kernel launch parameters
This commit is contained in:
parent
990b0241a3
commit
8aaf922aa4
12
docs/changelog/add-cuda-kernel-details-to-logging.md
Normal file
12
docs/changelog/add-cuda-kernel-details-to-logging.md
Normal file
@ -0,0 +1,12 @@
|
||||
# VTK-m logs details about each CUDA kernel launch
|
||||
|
||||
The VTK-m logging infrastructure has been extended with a new log level
|
||||
`KernelLaunches` which exists between `MemTransfer` and `Cast`.
|
||||
|
||||
This log level reports the number of blocks, threads per block, and the
|
||||
PTX version of each CUDA kernel launched.
|
||||
|
||||
This logging level was primarily introduced to help developers that are
|
||||
tracking down issues that occur when VTK-m components have been built with
|
||||
different `sm_XX` flags and help people looking to do kernel performance
|
||||
tuning.
|
@ -115,6 +115,7 @@ void InitLogging(int& argc, char* argv[])
|
||||
SetLogLevelName(vtkm::cont::LogLevel::MemCont, "MemC");
|
||||
SetLogLevelName(vtkm::cont::LogLevel::MemExec, "MemE");
|
||||
SetLogLevelName(vtkm::cont::LogLevel::MemTransfer, "MemT");
|
||||
SetLogLevelName(vtkm::cont::LogLevel::KernelLaunches, "Kern");
|
||||
SetLogLevelName(vtkm::cont::LogLevel::Cast, "Cast");
|
||||
|
||||
|
||||
@ -152,6 +153,16 @@ void SetStderrLogLevel(LogLevel level)
|
||||
#endif // VTKM_ENABLE_LOGGING
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::cont::LogLevel GetStderrLogLevel()
|
||||
{
|
||||
#ifdef VTKM_ENABLE_LOGGING
|
||||
return static_cast<vtkm::cont::LogLevel>(loguru::g_stderr_verbosity);
|
||||
#else // VTKM_ENABLE_LOGGING
|
||||
return vtkm::cont::LogLevel::Off;
|
||||
#endif // VTKM_ENABLE_LOGGING
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
void SetLogThreadName(const std::string& name)
|
||||
{
|
||||
|
@ -104,6 +104,8 @@
|
||||
/// execution environments, respectively.
|
||||
/// - MemTransfer: This level logs memory transfers between the control and host
|
||||
/// environments.
|
||||
/// - KernelLaunches: This level logs details about each device side kernel launch
|
||||
/// such as the CUDA PTX, Warps, and Grids used.
|
||||
/// - Cast: Logs details of dynamic object resolution.
|
||||
///
|
||||
/// The log may be shared and extended by applications that use VTK-m. There
|
||||
@ -310,6 +312,9 @@ enum class LogLevel
|
||||
/// Host->device / device->host data copies
|
||||
MemTransfer,
|
||||
|
||||
/// Details on Device-side Kernel Launches
|
||||
KernelLaunches,
|
||||
|
||||
/// When a dynamic object is (or isn't) resolved via CastAndCall, etc.
|
||||
Cast,
|
||||
|
||||
@ -350,6 +355,13 @@ VTKM_CONT_EXPORT
|
||||
VTKM_CONT
|
||||
void SetStderrLogLevel(vtkm::cont::LogLevel level);
|
||||
|
||||
/**
|
||||
* Get the active highest log level that will be printed to stderr.
|
||||
*/
|
||||
VTKM_CONT_EXPORT
|
||||
VTKM_CONT
|
||||
vtkm::cont::LogLevel GetStderrLogLevel();
|
||||
|
||||
/**
|
||||
* Register a custom name to identify a log level. The name will be truncated
|
||||
* to 4 characters internally.
|
||||
@ -416,20 +428,23 @@ std::string GetSizeString(vtkm::UInt64 bytes, int prec = 2);
|
||||
* enabled and the platform supports it, the type name will also be demangled.
|
||||
* @{
|
||||
*/
|
||||
static inline VTKM_CONT std::string TypeToString(const std::type_info& t)
|
||||
{
|
||||
#ifdef VTKM_ENABLE_LOGGING
|
||||
return loguru::demangle(t.name()).c_str();
|
||||
#else // VTKM_ENABLE_LOGGING
|
||||
return t.name();
|
||||
#endif // VTKM_ENABLE_LOGGING
|
||||
}
|
||||
template <typename T>
|
||||
static inline VTKM_CONT std::string TypeToString()
|
||||
{
|
||||
#ifdef VTKM_ENABLE_LOGGING
|
||||
return loguru::demangle(typeid(T).name()).c_str();
|
||||
#else // VTKM_ENABLE_LOGGING
|
||||
return typeid(T).name();
|
||||
#endif // VTKM_ENABLE_LOGGING
|
||||
return TypeToString(typeid(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline VTKM_CONT std::string TypeToString(const T&)
|
||||
static inline VTKM_CONT std::string TypeToString(const T& t)
|
||||
{
|
||||
return TypeToString<T>();
|
||||
return TypeToString(typeid(t));
|
||||
}
|
||||
/**@}*/
|
||||
}
|
||||
|
@ -233,5 +233,45 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
|
||||
threadsPerBlock = params.second;
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::LogKernelLaunch(
|
||||
const cudaFuncAttributes& func_attrs,
|
||||
const std::type_info& worklet_info,
|
||||
vtkm::UInt32 blocks,
|
||||
vtkm::UInt32 threadsPerBlock,
|
||||
vtkm::Id)
|
||||
{
|
||||
(void)func_attrs;
|
||||
(void)blocks;
|
||||
(void)threadsPerBlock;
|
||||
std::string name = vtkm::cont::TypeToString(worklet_info);
|
||||
VTKM_LOG_F(vtkm::cont::LogLevel::KernelLaunches,
|
||||
"Launching 1D kernel %s on CUDA [ptx=%i, blocks=%i, threadsPerBlock=%i]",
|
||||
name.c_str(),
|
||||
(func_attrs.ptxVersion * 10),
|
||||
blocks,
|
||||
threadsPerBlock);
|
||||
}
|
||||
|
||||
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::LogKernelLaunch(
|
||||
const cudaFuncAttributes& func_attrs,
|
||||
const std::type_info& worklet_info,
|
||||
vtkm::UInt32 blocks,
|
||||
dim3 threadsPerBlock,
|
||||
const dim3&)
|
||||
{
|
||||
(void)func_attrs;
|
||||
(void)blocks;
|
||||
(void)threadsPerBlock;
|
||||
std::string name = vtkm::cont::TypeToString(worklet_info);
|
||||
VTKM_LOG_F(vtkm::cont::LogLevel::KernelLaunches,
|
||||
"Launching 3D kernel %s on CUDA [ptx=%i, blocks=%i, threadsPerBlock=%i, %i, %i]",
|
||||
name.c_str(),
|
||||
(func_attrs.ptxVersion * 10),
|
||||
blocks,
|
||||
threadsPerBlock.x,
|
||||
threadsPerBlock.y,
|
||||
threadsPerBlock.z);
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont
|
||||
|
@ -132,7 +132,6 @@ VTKM_CONT_EXPORT void InitScheduleParameters(
|
||||
int maxThreadsPerMultiProcessor,
|
||||
int maxThreadsPerBlock));
|
||||
|
||||
|
||||
namespace internal
|
||||
{
|
||||
|
||||
@ -169,7 +168,6 @@ __global__ void TaskStrided3DLaunch(TaskType task, dim3 size)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename BinaryOperationType>
|
||||
__global__ void SumExclusiveScan(T a, T b, T result, BinaryOperationType binary_op)
|
||||
{
|
||||
@ -1418,6 +1416,20 @@ public:
|
||||
VTKM_CONT_EXPORT
|
||||
static void GetBlocksAndThreads(vtkm::UInt32& blocks, dim3& threadsPerBlock, const dim3& size);
|
||||
|
||||
VTKM_CONT_EXPORT
|
||||
static void LogKernelLaunch(const cudaFuncAttributes& func_attrs,
|
||||
const std::type_info& worklet_info,
|
||||
vtkm::UInt32 blocks,
|
||||
vtkm::UInt32 threadsPerBlock,
|
||||
vtkm::Id size);
|
||||
|
||||
VTKM_CONT_EXPORT
|
||||
static void LogKernelLaunch(const cudaFuncAttributes& func_attrs,
|
||||
const std::type_info& worklet_info,
|
||||
vtkm::UInt32 blocks,
|
||||
dim3 threadsPerBlock,
|
||||
const dim3& size);
|
||||
|
||||
public:
|
||||
template <typename WType, typename IType>
|
||||
static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided1D<WType, IType>& functor,
|
||||
@ -1436,6 +1448,17 @@ public:
|
||||
vtkm::UInt32 blocks, threadsPerBlock;
|
||||
GetBlocksAndThreads(blocks, threadsPerBlock, numInstances);
|
||||
|
||||
#ifdef VTKM_ENABLE_LOGGING
|
||||
if (GetStderrLogLevel() >= vtkm::cont::LogLevel::KernelLaunches)
|
||||
{
|
||||
using FunctorType = vtkm::exec::cuda::internal::TaskStrided1D<WType, IType>;
|
||||
cudaFuncAttributes empty_kernel_attrs;
|
||||
VTKM_CUDA_CALL(cudaFuncGetAttributes(&empty_kernel_attrs,
|
||||
cuda::internal::TaskStrided1DLaunch<FunctorType>));
|
||||
LogKernelLaunch(empty_kernel_attrs, typeid(WType), blocks, threadsPerBlock, numInstances);
|
||||
}
|
||||
#endif
|
||||
|
||||
cuda::internal::TaskStrided1DLaunch<<<blocks, threadsPerBlock, 0, cudaStreamPerThread>>>(
|
||||
functor, numInstances);
|
||||
}
|
||||
@ -1462,6 +1485,17 @@ public:
|
||||
dim3 threadsPerBlock;
|
||||
GetBlocksAndThreads(blocks, threadsPerBlock, ranges);
|
||||
|
||||
#ifdef VTKM_ENABLE_LOGGING
|
||||
if (GetStderrLogLevel() >= vtkm::cont::LogLevel::KernelLaunches)
|
||||
{
|
||||
using FunctorType = vtkm::exec::cuda::internal::TaskStrided3D<WType, IType>;
|
||||
cudaFuncAttributes empty_kernel_attrs;
|
||||
VTKM_CUDA_CALL(cudaFuncGetAttributes(&empty_kernel_attrs,
|
||||
cuda::internal::TaskStrided3DLaunch<FunctorType>));
|
||||
LogKernelLaunch(empty_kernel_attrs, typeid(WType), blocks, threadsPerBlock, ranges);
|
||||
}
|
||||
#endif
|
||||
|
||||
cuda::internal::TaskStrided3DLaunch<<<blocks, threadsPerBlock, 0, cudaStreamPerThread>>>(
|
||||
functor, ranges);
|
||||
}
|
||||
@ -1472,6 +1506,7 @@ public:
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
vtkm::exec::cuda::internal::TaskStrided1D<Functor, vtkm::internal::NullType> kernel(functor);
|
||||
|
||||
ScheduleTask(kernel, numInstances);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user