mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-10-05 01:49:02 +00:00
Enable shared CUDA builds when not compiling virtuals
The reason why we did not support shared libraries when CUDA compiles were on is that virtual methods require a special linking step to pull together all virtual methods that might be called. I other words, you cannot call a virtual CUDA method defined inside a library. This requirement goes away when virtuals are removed. Also removed the necessity of using seprable compilation with cuda. Again, this is only needed when a CUDA function is defined in one translation unit and used in another. Now we can enforce that all translation units define their own CUDA functions. Also, suppress warnings in cuda/internal/ExecutionPolicy.h This is where we call the thrust algorithms. There must be some loop where it, on some code path, calls back a host function. This must be in an execution path that never happens. The thrust version has its own suppress, but that does not seem to actually suppress the warning (it just means that the warning does not tell you where the actual call is). Get around the problem by suppressing the warnings in VTK-m. Co-authored-by: Kenneth Moreland <morelandkd@ornl.gov> Co-authored-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com> Signed-off-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com>
This commit is contained in:
parent
a6c4e8479f
commit
d348b11183
@ -86,7 +86,7 @@
|
|||||||
- .docker_image
|
- .docker_image
|
||||||
|
|
||||||
.ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
|
.ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
|
||||||
image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20201016"
|
image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20210819"
|
||||||
extends:
|
extends:
|
||||||
- .docker_image
|
- .docker_image
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ build:centos7_gcc73:
|
|||||||
variables:
|
variables:
|
||||||
CMAKE_BUILD_TYPE: RelWithDebInfo
|
CMAKE_BUILD_TYPE: RelWithDebInfo
|
||||||
CMAKE_GENERATOR: "Unix Makefiles"
|
CMAKE_GENERATOR: "Unix Makefiles"
|
||||||
VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering"
|
VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering+no_virtual+shared"
|
||||||
|
|
||||||
test:centos7_gcc73:
|
test:centos7_gcc73:
|
||||||
tags:
|
tags:
|
||||||
|
@ -40,7 +40,7 @@ RUN mkdir -p /opt/kokkos/build && \
|
|||||||
-DKokkos_ENABLE_CUDA_CONSTEXPR=ON \
|
-DKokkos_ENABLE_CUDA_CONSTEXPR=ON \
|
||||||
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
|
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
|
||||||
-DKokkos_ENABLE_CUDA_LDG_INTRINSIC=ON \
|
-DKokkos_ENABLE_CUDA_LDG_INTRINSIC=ON \
|
||||||
-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
|
-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=OFF \
|
||||||
-DKokkos_ENABLE_CUDA_UVM=ON \
|
-DKokkos_ENABLE_CUDA_UVM=ON \
|
||||||
-DKokkos_ARCH_TURING75=ON && \
|
-DKokkos_ARCH_TURING75=ON && \
|
||||||
cmake --build . -j 8 && \
|
cmake --build . -j 8 && \
|
||||||
|
@ -18,7 +18,7 @@ build:ubuntu1604_gcc5:
|
|||||||
CC: "gcc-5"
|
CC: "gcc-5"
|
||||||
CXX: "g++-5"
|
CXX: "g++-5"
|
||||||
CMAKE_BUILD_TYPE: RelWithDebInfo
|
CMAKE_BUILD_TYPE: RelWithDebInfo
|
||||||
VTKM_SETTINGS: "cuda+pascal+no_virtual+ascent_types+32bit_ids+64bit_floats"
|
VTKM_SETTINGS: "cuda+pascal+no_virtual+shared+ascent_types+32bit_ids+64bit_floats"
|
||||||
|
|
||||||
test:ubuntu1604_gcc5:
|
test:ubuntu1604_gcc5:
|
||||||
tags:
|
tags:
|
||||||
@ -56,7 +56,7 @@ build:ubuntu1604_gcc5_2:
|
|||||||
CC: "gcc-5"
|
CC: "gcc-5"
|
||||||
CXX: "g++-5"
|
CXX: "g++-5"
|
||||||
CMAKE_BUILD_TYPE: Release
|
CMAKE_BUILD_TYPE: Release
|
||||||
VTKM_SETTINGS: "openmp+cuda+pascal+examples"
|
VTKM_SETTINGS: "openmp+cuda+pascal+examples+static"
|
||||||
|
|
||||||
test:ubuntu1804_test_ubuntu1604_gcc5_2:
|
test:ubuntu1804_test_ubuntu1604_gcc5_2:
|
||||||
tags:
|
tags:
|
||||||
|
@ -56,7 +56,7 @@ build:ubuntu1804_gcc7:
|
|||||||
CC: "gcc-7"
|
CC: "gcc-7"
|
||||||
CXX: "g++-7"
|
CXX: "g++-7"
|
||||||
CUDAHOSTCXX: "g++-7"
|
CUDAHOSTCXX: "g++-7"
|
||||||
VTKM_SETTINGS: "benchmarks+cuda+turing+mpi+64bit_floats+no_virtual"
|
VTKM_SETTINGS: "benchmarks+cuda+turing+mpi+64bit_floats+no_virtual+shared"
|
||||||
|
|
||||||
test:ubuntu1804_gcc7:
|
test:ubuntu1804_gcc7:
|
||||||
tags:
|
tags:
|
||||||
@ -96,7 +96,7 @@ build:ubuntu1804_clang_cuda:
|
|||||||
CC: "clang-8"
|
CC: "clang-8"
|
||||||
CXX: "clang++-8"
|
CXX: "clang++-8"
|
||||||
CUDAHOSTCXX: "clang++-8"
|
CUDAHOSTCXX: "clang++-8"
|
||||||
VTKM_SETTINGS: "cuda+pascal+tbb+static+examples"
|
VTKM_SETTINGS: "cuda+pascal+tbb+examples+no_virtual+shared"
|
||||||
|
|
||||||
test:ubuntu1804_clang_cuda:
|
test:ubuntu1804_clang_cuda:
|
||||||
tags:
|
tags:
|
||||||
@ -202,7 +202,7 @@ build:ubuntu1804_kokkos:
|
|||||||
variables:
|
variables:
|
||||||
CMAKE_GENERATOR: "Ninja"
|
CMAKE_GENERATOR: "Ninja"
|
||||||
CMAKE_BUILD_TYPE: Release
|
CMAKE_BUILD_TYPE: Release
|
||||||
VTKM_SETTINGS: "benchmarks+kokkos+turing+static+64bit_floats"
|
VTKM_SETTINGS: "benchmarks+kokkos+turing+64bit_floats+no_virtual+shared"
|
||||||
|
|
||||||
test:ubuntu1804_kokkos:
|
test:ubuntu1804_kokkos:
|
||||||
tags:
|
tags:
|
||||||
|
@ -361,8 +361,10 @@ function(vtkm_add_target_information uses_vtkm_target)
|
|||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
# set the required target properties
|
# set the required target properties
|
||||||
set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
if(NOT VTKm_NO_DEPRECATED_VIRTUAL)
|
||||||
set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||||
|
endif()
|
||||||
# CUDA_ARCHITECTURES added in CMake 3.18
|
# CUDA_ARCHITECTURES added in CMake 3.18
|
||||||
set_target_properties(${targets} PROPERTIES CUDA_ARCHITECTURES OFF)
|
set_target_properties(${targets} PROPERTIES CUDA_ARCHITECTURES OFF)
|
||||||
|
|
||||||
@ -385,7 +387,7 @@ function(vtkm_add_target_information uses_vtkm_target)
|
|||||||
#
|
#
|
||||||
# This is required as CUDA currently doesn't support device side calls across
|
# This is required as CUDA currently doesn't support device side calls across
|
||||||
# dynamic library boundaries.
|
# dynamic library boundaries.
|
||||||
if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
|
if((NOT VTKm_NO_DEPRECATED_VIRTUAL) AND ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda)))
|
||||||
foreach(target IN LISTS targets)
|
foreach(target IN LISTS targets)
|
||||||
get_target_property(lib_type ${target} TYPE)
|
get_target_property(lib_type ${target} TYPE)
|
||||||
if (TARGET vtkm::cuda)
|
if (TARGET vtkm::cuda)
|
||||||
@ -403,12 +405,14 @@ function(vtkm_add_target_information uses_vtkm_target)
|
|||||||
if(PROJECT_NAME STREQUAL "VTKm")
|
if(PROJECT_NAME STREQUAL "VTKm")
|
||||||
message(SEND_ERROR "${target} needs to be built STATIC as CUDA doesn't"
|
message(SEND_ERROR "${target} needs to be built STATIC as CUDA doesn't"
|
||||||
" support virtual methods across dynamic library boundaries. You"
|
" support virtual methods across dynamic library boundaries. You"
|
||||||
" need to set the CMake option BUILD_SHARED_LIBS to `OFF`.")
|
" need to set the CMake option BUILD_SHARED_LIBS to `OFF` or"
|
||||||
|
" (better) turn VTKm_NO_DEPRECATED_VIRTUAL to `ON`.")
|
||||||
else()
|
else()
|
||||||
message(SEND_ERROR "${target} needs to be built STATIC as CUDA doesn't"
|
message(SEND_ERROR "${target} needs to be built STATIC as CUDA doesn't"
|
||||||
" support virtual methods across dynamic library boundaries. You"
|
" support virtual methods across dynamic library boundaries. You"
|
||||||
" should either explicitly call add_library with the `STATIC` keyword"
|
" should either explicitly call add_library with the `STATIC` keyword"
|
||||||
" or set the CMake option BUILD_SHARED_LIBS to `OFF`.")
|
" or set the CMake option BUILD_SHARED_LIBS to `OFF` or"
|
||||||
|
" (better) turn VTKm_NO_DEPRECATED_VIRTUAL to `ON`.")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
|
@ -139,7 +139,7 @@ vtkm_option(VTKm_INSTALL_ONLY_LIBRARIES "install only vtk-m libraries and no hea
|
|||||||
# VTK does.
|
# VTK does.
|
||||||
vtkm_option(VTKm_HIDE_PRIVATE_SYMBOLS "Hide symbols from libraries." ON)
|
vtkm_option(VTKm_HIDE_PRIVATE_SYMBOLS "Hide symbols from libraries." ON)
|
||||||
|
|
||||||
vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" OFF)
|
vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" ON)
|
||||||
set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
|
set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
|
||||||
|
|
||||||
# This flag can be used to prevent VTK-m from exporting its warning flags in its
|
# This flag can be used to prevent VTK-m from exporting its warning flags in its
|
||||||
|
@ -31,6 +31,7 @@ struct vtkm_cuda_policy : thrust::device_execution_policy<vtkm_cuda_policy>
|
|||||||
//The purpose of this is that for 32bit types (UInt32,Int32,Float32) thrust
|
//The purpose of this is that for 32bit types (UInt32,Int32,Float32) thrust
|
||||||
//will call a super fast radix sort only if the operator is thrust::less
|
//will call a super fast radix sort only if the operator is thrust::less
|
||||||
//or thrust::greater.
|
//or thrust::greater.
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__host__ __device__ void sort(
|
__host__ __device__ void sort(
|
||||||
const vtkm_cuda_policy& exec,
|
const vtkm_cuda_policy& exec,
|
||||||
@ -42,6 +43,7 @@ __host__ __device__ void sort(
|
|||||||
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::less<T>());
|
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::less<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T, typename RandomAccessIterator>
|
template <typename T, typename RandomAccessIterator>
|
||||||
__host__ __device__ void sort_by_key(
|
__host__ __device__ void sort_by_key(
|
||||||
const vtkm_cuda_policy& exec,
|
const vtkm_cuda_policy& exec,
|
||||||
@ -55,6 +57,7 @@ __host__ __device__ void sort_by_key(
|
|||||||
ThrustCudaPolicyPerThread, first, last, values_first, thrust::less<T>());
|
ThrustCudaPolicyPerThread, first, last, values_first, thrust::less<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__host__ __device__ void sort(
|
__host__ __device__ void sort(
|
||||||
const vtkm_cuda_policy& exec,
|
const vtkm_cuda_policy& exec,
|
||||||
@ -66,6 +69,7 @@ __host__ __device__ void sort(
|
|||||||
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::less<T>());
|
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::less<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T, typename RandomAccessIterator>
|
template <typename T, typename RandomAccessIterator>
|
||||||
__host__ __device__ void sort_by_key(
|
__host__ __device__ void sort_by_key(
|
||||||
const vtkm_cuda_policy& exec,
|
const vtkm_cuda_policy& exec,
|
||||||
@ -79,6 +83,7 @@ __host__ __device__ void sort_by_key(
|
|||||||
ThrustCudaPolicyPerThread, first, last, values_first, thrust::less<T>());
|
ThrustCudaPolicyPerThread, first, last, values_first, thrust::less<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__host__ __device__ void sort(
|
__host__ __device__ void sort(
|
||||||
const vtkm_cuda_policy& exec,
|
const vtkm_cuda_policy& exec,
|
||||||
@ -90,6 +95,7 @@ __host__ __device__ void sort(
|
|||||||
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::greater<T>());
|
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::greater<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T, typename RandomAccessIterator>
|
template <typename T, typename RandomAccessIterator>
|
||||||
__host__ __device__ void sort_by_key(
|
__host__ __device__ void sort_by_key(
|
||||||
const vtkm_cuda_policy& exec,
|
const vtkm_cuda_policy& exec,
|
||||||
@ -103,6 +109,7 @@ __host__ __device__ void sort_by_key(
|
|||||||
ThrustCudaPolicyPerThread, first, last, values_first, thrust::greater<T>());
|
ThrustCudaPolicyPerThread, first, last, values_first, thrust::greater<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__host__ __device__ void sort(
|
__host__ __device__ void sort(
|
||||||
const vtkm_cuda_policy& exec,
|
const vtkm_cuda_policy& exec,
|
||||||
@ -114,6 +121,7 @@ __host__ __device__ void sort(
|
|||||||
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::greater<T>());
|
return thrust::sort(ThrustCudaPolicyPerThread, first, last, thrust::greater<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T, typename RandomAccessIterator>
|
template <typename T, typename RandomAccessIterator>
|
||||||
__host__ __device__ void sort_by_key(
|
__host__ __device__ void sort_by_key(
|
||||||
const vtkm_cuda_policy& exec,
|
const vtkm_cuda_policy& exec,
|
||||||
@ -127,6 +135,7 @@ __host__ __device__ void sort_by_key(
|
|||||||
ThrustCudaPolicyPerThread, first, last, values_first, thrust::greater<T>());
|
ThrustCudaPolicyPerThread, first, last, values_first, thrust::greater<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename RandomAccessIterator, typename StrictWeakOrdering>
|
template <typename RandomAccessIterator, typename StrictWeakOrdering>
|
||||||
__host__ __device__ void sort(const vtkm_cuda_policy& exec,
|
__host__ __device__ void sort(const vtkm_cuda_policy& exec,
|
||||||
RandomAccessIterator first,
|
RandomAccessIterator first,
|
||||||
@ -140,6 +149,7 @@ __host__ __device__ void sort(const vtkm_cuda_policy& exec,
|
|||||||
return thrust::sort(ThrustCudaPolicyPerThread, first, last, comp);
|
return thrust::sort(ThrustCudaPolicyPerThread, first, last, comp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename RandomAccessIteratorKeys,
|
template <typename RandomAccessIteratorKeys,
|
||||||
typename RandomAccessIteratorValues,
|
typename RandomAccessIteratorValues,
|
||||||
typename StrictWeakOrdering>
|
typename StrictWeakOrdering>
|
||||||
@ -156,6 +166,7 @@ __host__ __device__ void sort_by_key(const vtkm_cuda_policy& exec,
|
|||||||
return thrust::sort_by_key(ThrustCudaPolicyPerThread, first, last, values_first, comp);
|
return thrust::sort_by_key(ThrustCudaPolicyPerThread, first, last, values_first, comp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||||
template <typename T,
|
template <typename T,
|
||||||
typename InputIterator2,
|
typename InputIterator2,
|
||||||
typename OutputIterator1,
|
typename OutputIterator1,
|
||||||
|
Loading…
Reference in New Issue
Block a user