Enable shared CUDA builds when not compiling virtuals

The reason why we did not support shared libraries when CUDA compiles
were on is that virtual methods require a special linking step to pull
together all virtual methods that might be called. I other words, you
cannot call a virtual CUDA method defined inside a library. This
requirement goes away when virtuals are removed.

Also removed the necessity of using seprable compilation with cuda.
Again, this is only needed when a CUDA function is defined in one
translation unit and used in another. Now we can enforce that all
translation units define their own CUDA functions.

Also, suppress warnings in cuda/internal/ExecutionPolicy.h

This is where we call the thrust algorithms. There must be some loop
where it, on some code path, calls back a host function. This must be in
an execution path that never happens. The thrust version has its own
suppress, but that does not seem to actually suppress the warning (it
just means that the warning does not tell you where the actual call is).

Get around the problem by suppressing the warnings in VTK-m.

Co-authored-by: Kenneth Moreland <morelandkd@ornl.gov>
Co-authored-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com>

Signed-off-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com>
This commit is contained in:
Vicente Adolfo Bolea Sanchez 2021-08-20 18:01:30 -04:00
parent a6c4e8479f
commit d348b11183
8 changed files with 29 additions and 14 deletions

@ -86,7 +86,7 @@
- .docker_image
.ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20201016"
image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20210819"
extends:
- .docker_image

@ -17,7 +17,7 @@ build:centos7_gcc73:
variables:
CMAKE_BUILD_TYPE: RelWithDebInfo
CMAKE_GENERATOR: "Unix Makefiles"
VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering"
VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering+no_virtual+shared"
test:centos7_gcc73:
tags:

@ -40,7 +40,7 @@ RUN mkdir -p /opt/kokkos/build && \
-DKokkos_ENABLE_CUDA_CONSTEXPR=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_CUDA_LDG_INTRINSIC=ON \
-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=OFF \
-DKokkos_ENABLE_CUDA_UVM=ON \
-DKokkos_ARCH_TURING75=ON && \
cmake --build . -j 8 && \

@ -18,7 +18,7 @@ build:ubuntu1604_gcc5:
CC: "gcc-5"
CXX: "g++-5"
CMAKE_BUILD_TYPE: RelWithDebInfo
VTKM_SETTINGS: "cuda+pascal+no_virtual+ascent_types+32bit_ids+64bit_floats"
VTKM_SETTINGS: "cuda+pascal+no_virtual+shared+ascent_types+32bit_ids+64bit_floats"
test:ubuntu1604_gcc5:
tags:
@ -56,7 +56,7 @@ build:ubuntu1604_gcc5_2:
CC: "gcc-5"
CXX: "g++-5"
CMAKE_BUILD_TYPE: Release
VTKM_SETTINGS: "openmp+cuda+pascal+examples"
VTKM_SETTINGS: "openmp+cuda+pascal+examples+static"
test:ubuntu1804_test_ubuntu1604_gcc5_2:
tags:

@ -56,7 +56,7 @@ build:ubuntu1804_gcc7:
CC: "gcc-7"
CXX: "g++-7"
CUDAHOSTCXX: "g++-7"
VTKM_SETTINGS: "benchmarks+cuda+turing+mpi+64bit_floats+no_virtual"
VTKM_SETTINGS: "benchmarks+cuda+turing+mpi+64bit_floats+no_virtual+shared"
test:ubuntu1804_gcc7:
tags:
@ -96,7 +96,7 @@ build:ubuntu1804_clang_cuda:
CC: "clang-8"
CXX: "clang++-8"
CUDAHOSTCXX: "clang++-8"
VTKM_SETTINGS: "cuda+pascal+tbb+static+examples"
VTKM_SETTINGS: "cuda+pascal+tbb+examples+no_virtual+shared"
test:ubuntu1804_clang_cuda:
tags:
@ -202,7 +202,7 @@ build:ubuntu1804_kokkos:
variables:
CMAKE_GENERATOR: "Ninja"
CMAKE_BUILD_TYPE: Release
VTKM_SETTINGS: "benchmarks+kokkos+turing+static+64bit_floats"
VTKM_SETTINGS: "benchmarks+kokkos+turing+64bit_floats+no_virtual+shared"
test:ubuntu1804_kokkos:
tags:

@ -361,8 +361,10 @@ function(vtkm_add_target_information uses_vtkm_target)
endforeach()
# set the required target properties
set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
if(NOT VTKm_NO_DEPRECATED_VIRTUAL)
set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()
# CUDA_ARCHITECTURES added in CMake 3.18
set_target_properties(${targets} PROPERTIES CUDA_ARCHITECTURES OFF)
@ -385,7 +387,7 @@ function(vtkm_add_target_information uses_vtkm_target)
#
# This is required as CUDA currently doesn't support device side calls across
# dynamic library boundaries.
if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
if((NOT VTKm_NO_DEPRECATED_VIRTUAL) AND ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda)))
foreach(target IN LISTS targets)
get_target_property(lib_type ${target} TYPE)
if (TARGET vtkm::cuda)
@ -403,12 +405,14 @@ function(vtkm_add_target_information uses_vtkm_target)
if(PROJECT_NAME STREQUAL "VTKm")
message(SEND_ERROR "${target} needs to be built STATIC as CUDA doesn't"
" support virtual methods across dynamic library boundaries. You"
" need to set the CMake option BUILD_SHARED_LIBS to `OFF`.")
" need to set the CMake option BUILD_SHARED_LIBS to `OFF` or"
" (better) turn VTKm_NO_DEPRECATED_VIRTUAL to `ON`.")
else()
message(SEND_ERROR "${target} needs to be built STATIC as CUDA doesn't"
" support virtual methods across dynamic library boundaries. You"
" should either explicitly call add_library with the `STATIC` keyword"
" or set the CMake option BUILD_SHARED_LIBS to `OFF`.")
" or set the CMake option BUILD_SHARED_LIBS to `OFF` or"
" (better) turn VTKm_NO_DEPRECATED_VIRTUAL to `ON`.")
endif()
endif()
endforeach()

@ -139,7 +139,7 @@ vtkm_option(VTKm_INSTALL_ONLY_LIBRARIES "install only vtk-m libraries and no hea
# VTK does.
vtkm_option(VTKm_HIDE_PRIVATE_SYMBOLS "Hide symbols from libraries." ON)
vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" OFF)
vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" ON)
set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
# This flag can be used to prevent VTK-m from exporting its warning flags in its

@ -31,6 +31,7 @@ struct vtkm_cuda_policy : thrust::device_execution_policy<vtkm_cuda_policy>
//The purpose of this is that for 32bit types (UInt32,Int32,Float32) thrust
//will call a super fast radix sort only if the operator is thrust::less
//or thrust::greater.
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T>
__host__ __device__ void sort(
const vtkm_cuda_policy& exec,
@ -42,6 +43,7 @@ __host__ __device__ void sort(
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::less<T>());
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T, typename RandomAccessIterator>
__host__ __device__ void sort_by_key(
const vtkm_cuda_policy& exec,
@ -55,6 +57,7 @@ __host__ __device__ void sort_by_key(
ThrustCudaPolicyPerThread, first, last, values_first, thrust::less<T>());
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T>
__host__ __device__ void sort(
const vtkm_cuda_policy& exec,
@ -66,6 +69,7 @@ __host__ __device__ void sort(
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::less<T>());
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T, typename RandomAccessIterator>
__host__ __device__ void sort_by_key(
const vtkm_cuda_policy& exec,
@ -79,6 +83,7 @@ __host__ __device__ void sort_by_key(
ThrustCudaPolicyPerThread, first, last, values_first, thrust::less<T>());
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T>
__host__ __device__ void sort(
const vtkm_cuda_policy& exec,
@ -90,6 +95,7 @@ __host__ __device__ void sort(
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::greater<T>());
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T, typename RandomAccessIterator>
__host__ __device__ void sort_by_key(
const vtkm_cuda_policy& exec,
@ -103,6 +109,7 @@ __host__ __device__ void sort_by_key(
ThrustCudaPolicyPerThread, first, last, values_first, thrust::greater<T>());
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T>
__host__ __device__ void sort(
const vtkm_cuda_policy& exec,
@ -114,6 +121,7 @@ __host__ __device__ void sort(
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::greater<T>());
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T, typename RandomAccessIterator>
__host__ __device__ void sort_by_key(
const vtkm_cuda_policy& exec,
@ -127,6 +135,7 @@ __host__ __device__ void sort_by_key(
ThrustCudaPolicyPerThread, first, last, values_first, thrust::greater<T>());
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename RandomAccessIterator, typename StrictWeakOrdering>
__host__ __device__ void sort(const vtkm_cuda_policy& exec,
RandomAccessIterator first,
@ -140,6 +149,7 @@ __host__ __device__ void sort(const vtkm_cuda_policy& exec,
return thrust::sort(ThrustCudaPolicyPerThread, first, last, comp);
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename RandomAccessIteratorKeys,
typename RandomAccessIteratorValues,
typename StrictWeakOrdering>
@ -156,6 +166,7 @@ __host__ __device__ void sort_by_key(const vtkm_cuda_policy& exec,
return thrust::sort_by_key(ThrustCudaPolicyPerThread, first, last, values_first, comp);
}
VTKM_SUPPRESS_EXEC_WARNINGS
template <typename T,
typename InputIterator2,
typename OutputIterator1,