From 46a613d183e758ec89b5320f3d7534c8088f57e8 Mon Sep 17 00:00:00 2001 From: Kenneth Moreland Date: Wed, 7 Jun 2023 17:15:33 -0400 Subject: [PATCH] Speed up compilation of ArrayRangeCompute.cxx The file `ArrayRangeCompute.cxx` was taking a long time to compile with some device compilers. This is because it precompiles the range computation for many types of array structures. It thus compiled the same operation many times over. The new implementation compiles just as many cases. However, the compilation is split into many different translation units using the instantiations feature of VTK-m's configuration. Although this rarely reduces the overall CPU time spent during compiling, it prevents parallel compiles from waiting for this one build to complete. It also avoids potential issues with compilers running out of resources as it tries to build a monolithic file. --- CMake/VTKmWrappers.cmake | 4 + docs/changelog/array-range-instantiations.md | 14 ++ vtkm/cont/ArrayRangeComputeTemplate.h | 156 ++++++++++++++++++- vtkm/cont/CMakeLists.txt | 8 + 4 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/array-range-instantiations.md diff --git a/CMake/VTKmWrappers.cmake b/CMake/VTKmWrappers.cmake index 10ba21e06..c1833a374 100644 --- a/CMake/VTKmWrappers.cmake +++ b/CMake/VTKmWrappers.cmake @@ -658,6 +658,10 @@ function(vtkm_add_instantiations instantiations_list) set(file_template_source ${instantiations_file}) endif() + set_property(DIRECTORY + APPEND + PROPERTY CMAKE_CONFIGURE_DEPENDS ${instantiations_file}) + # Extract explicit instantiations _vtkm_extract_instantiations(instantiations ${instantiations_file}) diff --git a/docs/changelog/array-range-instantiations.md b/docs/changelog/array-range-instantiations.md new file mode 100644 index 000000000..1bf4e98ff --- /dev/null +++ b/docs/changelog/array-range-instantiations.md @@ -0,0 +1,14 @@ +# Sped up compilation of ArrayRangeCompute.cxx + +The file `ArrayRangeCompute.cxx` was taking a long time to compile with +some device compilers. This is because it precompiles the range computation +for many types of array structures. It thus compiled the same operation +many times over. + +The new implementation compiles just as many cases. However, the +compilation is split into many different translation units using the +instantiations feature of VTK-m's configuration. Although this rarely +reduces the overall CPU time spent during compiling, it prevents parallel +compiles from waiting for this one build to complete. It also avoids +potential issues with compilers running out of resources as it tries to +build a monolithic file. diff --git a/vtkm/cont/ArrayRangeComputeTemplate.h b/vtkm/cont/ArrayRangeComputeTemplate.h index aeab8c3c5..8e29f1166 100644 --- a/vtkm/cont/ArrayRangeComputeTemplate.h +++ b/vtkm/cont/ArrayRangeComputeTemplate.h @@ -16,6 +16,8 @@ #include #include +#include + #include #include @@ -114,7 +116,7 @@ struct ArrayRangeComputeImpl template -inline vtkm::cont::ArrayHandle ArrayRangeComputeTemplate( +vtkm::cont::ArrayHandle ArrayRangeComputeTemplate( const ArrayHandleType& input, vtkm::cont::DeviceAdapterId device = vtkm::cont::DeviceAdapterTagAny{}) { @@ -134,4 +136,156 @@ inline vtkm::cont::ArrayHandle ArrayRangeCompute( } } // namespace vtkm::cont +#define VTK_M_ARRAY_RANGE_COMPUTE_ALL_SCALARS(modifiers, ...) \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle& input, \ + vtkm::cont::DeviceAdapterId device) + +#define VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(modifiers, N, ...) \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& input, \ + vtkm::cont::DeviceAdapterId device); \ + modifiers vtkm::cont::ArrayHandle vtkm::cont::ArrayRangeComputeTemplate( \ + const vtkm::cont::ArrayHandle, __VA_ARGS__>& \ + input, \ + vtkm::cont::DeviceAdapterId device) + +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_SCALARS(extern template VTKM_CONT_TEMPLATE_EXPORT, + vtkm::cont::StorageTagBasic); +VTKM_INSTANTIATION_END +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 2, + vtkm::cont::StorageTagBasic); +VTKM_INSTANTIATION_END +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 3, + vtkm::cont::StorageTagBasic); +VTKM_INSTANTIATION_END +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 4, + vtkm::cont::StorageTagBasic); +VTKM_INSTANTIATION_END + +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 2, + vtkm::cont::StorageTagSOA); +VTKM_INSTANTIATION_END +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 3, + vtkm::cont::StorageTagSOA); +VTKM_INSTANTIATION_END +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 4, + vtkm::cont::StorageTagSOA); +VTKM_INSTANTIATION_END + +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN( + extern template VTKM_CONT_TEMPLATE_EXPORT, + 3, + vtkm::cont::StorageTagCartesianProduct); +VTKM_INSTANTIATION_END + +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_SCALARS(extern template VTKM_CONT_TEMPLATE_EXPORT, + vtkm::cont::StorageTagConstant); +VTKM_INSTANTIATION_END +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 2, + vtkm::cont::StorageTagConstant); +VTKM_INSTANTIATION_END +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 3, + vtkm::cont::StorageTagConstant); +VTKM_INSTANTIATION_END +VTKM_INSTANTIATION_BEGIN +VTK_M_ARRAY_RANGE_COMPUTE_ALL_VECN(extern template VTKM_CONT_TEMPLATE_EXPORT, + 4, + vtkm::cont::StorageTagConstant); +VTKM_INSTANTIATION_END + #endif //vtk_m_cont_ArrayRangeComputeTemplate_h diff --git a/vtkm/cont/CMakeLists.txt b/vtkm/cont/CMakeLists.txt index 9e824ff95..c5de8ac6f 100644 --- a/vtkm/cont/CMakeLists.txt +++ b/vtkm/cont/CMakeLists.txt @@ -263,6 +263,14 @@ vtkm_install_headers(vtkm/cont ${VTKm_BINARY_INCLUDE_DIR}/${kit_dir}/DefaultTypes.h ) +#----------------------------------------------------------------------------- +# Some operations are pre-compiled for many different types. Improve parallel +# compiles by breaking them up into smaller units. +vtkm_add_instantiations(array_range_instantiations + INSTANTIATIONS_FILE ArrayRangeComputeTemplate.h + ) +list(APPEND device_sources ${array_range_instantiations}) + #----------------------------------------------------------------------------- vtkm_library( NAME vtkm_cont SOURCES ${sources}