Enable shared CUDA builds when not compiling virtuals

The reason why we did not support shared libraries when CUDA compiles were on is that virtual methods require a special linking step to pull together all virtual methods that might be called. I other words, you cannot call a virtual CUDA method defined inside a library. This requirement goes away when virtuals are removed. Also removed the necessity of using seprable compilation with cuda. Again, this is only needed when a CUDA function is defined in one translation unit and used in another. Now we can enforce that all translation units define their own CUDA functions. Also, suppress warnings in cuda/internal/ExecutionPolicy.h This is where we call the thrust algorithms. There must be some loop where it, on some code path, calls back a host function. This must be in an execution path that never happens. The thrust version has its own suppress, but that does not seem to actually suppress the warning (it just means that the warning does not tell you where the actual call is). Get around the problem by suppressing the warnings in VTK-m. Co-authored-by: Kenneth Moreland <morelandkd@ornl.gov> Co-authored-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com> Signed-off-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com>
2021-08-20 18:01:30 -04:00 · 2021-08-20 18:01:30 -04:00 · d348b11183
commit d348b11183
parent a6c4e8479f
8 changed files with 29 additions and 14 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -86,7 +86,7 @@
    - .docker_image

 .ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
-  image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20201016"
+  image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20210819"
  extends:
    - .docker_image

--- a/.gitlab/ci/centos7.yml
+++ b/.gitlab/ci/centos7.yml
@ -17,7 +17,7 @@ build:centos7_gcc73:
  variables:
    CMAKE_BUILD_TYPE: RelWithDebInfo
    CMAKE_GENERATOR: "Unix Makefiles"
-    VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering"
+    VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering+no_virtual+shared"

 test:centos7_gcc73:
  tags:
--- a/.gitlab/ci/docker/ubuntu1804/kokkos-cuda/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/kokkos-cuda/Dockerfile
@ -40,7 +40,7 @@ RUN mkdir -p /opt/kokkos/build && \
          -DKokkos_ENABLE_CUDA_CONSTEXPR=ON \
          -DKokkos_ENABLE_CUDA_LAMBDA=ON \
          -DKokkos_ENABLE_CUDA_LDG_INTRINSIC=ON \
-          -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
+          -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=OFF \
          -DKokkos_ENABLE_CUDA_UVM=ON \
          -DKokkos_ARCH_TURING75=ON && \
    cmake --build . -j 8 && \
--- a/.gitlab/ci/ubuntu1604.yml
+++ b/.gitlab/ci/ubuntu1604.yml
@ -18,7 +18,7 @@ build:ubuntu1604_gcc5:
    CC: "gcc-5"
    CXX: "g++-5"
    CMAKE_BUILD_TYPE: RelWithDebInfo
-    VTKM_SETTINGS: "cuda+pascal+no_virtual+ascent_types+32bit_ids+64bit_floats"
+    VTKM_SETTINGS: "cuda+pascal+no_virtual+shared+ascent_types+32bit_ids+64bit_floats"

 test:ubuntu1604_gcc5:
  tags:
@ -56,7 +56,7 @@ build:ubuntu1604_gcc5_2:
    CC: "gcc-5"
    CXX: "g++-5"
    CMAKE_BUILD_TYPE: Release
-    VTKM_SETTINGS: "openmp+cuda+pascal+examples"
+    VTKM_SETTINGS: "openmp+cuda+pascal+examples+static"

 test:ubuntu1804_test_ubuntu1604_gcc5_2:
  tags:
--- a/.gitlab/ci/ubuntu1804.yml
+++ b/.gitlab/ci/ubuntu1804.yml
@ -56,7 +56,7 @@ build:ubuntu1804_gcc7:
    CC: "gcc-7"
    CXX: "g++-7"
    CUDAHOSTCXX: "g++-7"
-    VTKM_SETTINGS: "benchmarks+cuda+turing+mpi+64bit_floats+no_virtual"
+    VTKM_SETTINGS: "benchmarks+cuda+turing+mpi+64bit_floats+no_virtual+shared"

 test:ubuntu1804_gcc7:
  tags:
@ -96,7 +96,7 @@ build:ubuntu1804_clang_cuda:
    CC: "clang-8"
    CXX: "clang++-8"
    CUDAHOSTCXX: "clang++-8"
-    VTKM_SETTINGS: "cuda+pascal+tbb+static+examples"
+    VTKM_SETTINGS: "cuda+pascal+tbb+examples+no_virtual+shared"

 test:ubuntu1804_clang_cuda:
  tags:
@ -202,7 +202,7 @@ build:ubuntu1804_kokkos:
  variables:
    CMAKE_GENERATOR: "Ninja"
    CMAKE_BUILD_TYPE: Release
-    VTKM_SETTINGS: "benchmarks+kokkos+turing+static+64bit_floats"
+    VTKM_SETTINGS: "benchmarks+kokkos+turing+64bit_floats+no_virtual+shared"

 test:ubuntu1804_kokkos:
  tags:
--- a/CMake/VTKmWrappers.cmake
+++ b/CMake/VTKmWrappers.cmake
@ -361,8 +361,10 @@ function(vtkm_add_target_information uses_vtkm_target)
  endforeach()

  # set the required target properties
-  set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-  set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+  if(NOT VTKm_NO_DEPRECATED_VIRTUAL)
+    set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+  endif()
  # CUDA_ARCHITECTURES added in CMake 3.18
  set_target_properties(${targets} PROPERTIES CUDA_ARCHITECTURES OFF)

@ -385,7 +387,7 @@ function(vtkm_add_target_information uses_vtkm_target)
  #
  # This is required as CUDA currently doesn't support device side calls across
  # dynamic library boundaries.
-  if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
+  if((NOT VTKm_NO_DEPRECATED_VIRTUAL) AND ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda)))
    foreach(target IN LISTS targets)
      get_target_property(lib_type ${target} TYPE)
      if (TARGET vtkm::cuda)
@ -403,12 +405,14 @@ function(vtkm_add_target_information uses_vtkm_target)
        if(PROJECT_NAME STREQUAL "VTKm")
          message(SEND_ERROR "${target} needs to be built STATIC as CUDA doesn't"
                " support virtual methods across dynamic library boundaries. You"
-                " need to set the CMake option BUILD_SHARED_LIBS to `OFF`.")
+                " need to set the CMake option BUILD_SHARED_LIBS to `OFF` or"
+                " (better) turn VTKm_NO_DEPRECATED_VIRTUAL to `ON`.")
        else()
          message(SEND_ERROR "${target} needs to be built STATIC as CUDA doesn't"
                  " support virtual methods across dynamic library boundaries. You"
                  " should either explicitly call add_library with the `STATIC` keyword"
-                  " or set the CMake option BUILD_SHARED_LIBS to `OFF`.")
+                  " or set the CMake option BUILD_SHARED_LIBS to `OFF` or"
+                  " (better) turn VTKm_NO_DEPRECATED_VIRTUAL to `ON`.")
        endif()
      endif()
    endforeach()
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -139,7 +139,7 @@ vtkm_option(VTKm_INSTALL_ONLY_LIBRARIES "install only vtk-m libraries and no hea
 # VTK does.
 vtkm_option(VTKm_HIDE_PRIVATE_SYMBOLS "Hide symbols from libraries." ON)

-vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" OFF)
+vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" ON)
 set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})

 # This flag can be used to prevent VTK-m from exporting its warning flags in its
--- a/vtkm/exec/cuda/internal/ExecutionPolicy.h
+++ b/vtkm/exec/cuda/internal/ExecutionPolicy.h
@ -31,6 +31,7 @@ struct vtkm_cuda_policy : thrust::device_execution_policy<vtkm_cuda_policy>
 //The purpose of this is that for 32bit types (UInt32,Int32,Float32) thrust
 //will call a super fast radix sort only if the operator is thrust::less
 //or thrust::greater.
+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T>
 __host__ __device__ void sort(
  const vtkm_cuda_policy& exec,
@ -42,6 +43,7 @@ __host__ __device__ void sort(
  return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::less<T>());
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T, typename RandomAccessIterator>
 __host__ __device__ void sort_by_key(
  const vtkm_cuda_policy& exec,
@ -55,6 +57,7 @@ __host__ __device__ void sort_by_key(
    ThrustCudaPolicyPerThread, first, last, values_first, thrust::less<T>());
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T>
 __host__ __device__ void sort(
  const vtkm_cuda_policy& exec,
@ -66,6 +69,7 @@ __host__ __device__ void sort(
  return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::less<T>());
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T, typename RandomAccessIterator>
 __host__ __device__ void sort_by_key(
  const vtkm_cuda_policy& exec,
@ -79,6 +83,7 @@ __host__ __device__ void sort_by_key(
    ThrustCudaPolicyPerThread, first, last, values_first, thrust::less<T>());
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T>
 __host__ __device__ void sort(
  const vtkm_cuda_policy& exec,
@ -90,6 +95,7 @@ __host__ __device__ void sort(
  return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::greater<T>());
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T, typename RandomAccessIterator>
 __host__ __device__ void sort_by_key(
  const vtkm_cuda_policy& exec,
@ -103,6 +109,7 @@ __host__ __device__ void sort_by_key(
    ThrustCudaPolicyPerThread, first, last, values_first, thrust::greater<T>());
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T>
 __host__ __device__ void sort(
  const vtkm_cuda_policy& exec,
@ -114,6 +121,7 @@ __host__ __device__ void sort(
  return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::greater<T>());
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T, typename RandomAccessIterator>
 __host__ __device__ void sort_by_key(
  const vtkm_cuda_policy& exec,
@ -127,6 +135,7 @@ __host__ __device__ void sort_by_key(
    ThrustCudaPolicyPerThread, first, last, values_first, thrust::greater<T>());
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename RandomAccessIterator, typename StrictWeakOrdering>
 __host__ __device__ void sort(const vtkm_cuda_policy& exec,
                              RandomAccessIterator first,
@ -140,6 +149,7 @@ __host__ __device__ void sort(const vtkm_cuda_policy& exec,
  return thrust::sort(ThrustCudaPolicyPerThread, first, last, comp);
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename RandomAccessIteratorKeys,
          typename RandomAccessIteratorValues,
          typename StrictWeakOrdering>
@ -156,6 +166,7 @@ __host__ __device__ void sort_by_key(const vtkm_cuda_policy& exec,
  return thrust::sort_by_key(ThrustCudaPolicyPerThread, first, last, values_first, comp);
 }

+VTKM_SUPPRESS_EXEC_WARNINGS
 template <typename T,
          typename InputIterator2,
          typename OutputIterator1,