diff --git a/CMake/UseVTKmCUDA.cmake b/CMake/UseVTKmCUDA.cmake index 219886150..715df3128 100644 --- a/CMake/UseVTKmCUDA.cmake +++ b/CMake/UseVTKmCUDA.cmake @@ -51,6 +51,80 @@ if (VTKm_Base_FOUND) endif () endif () + if(VTKm_CUDA_FOUND) + #--------------------------------------------------------------------------- + # Setup build flags for CUDA + #--------------------------------------------------------------------------- + # Populates CUDA_NVCC_FLAGS with the best set of flags to compile for a + # given GPU architecture. The majority of developers should leave the + # option at the default of 'native' which uses system introspection to + # determine the smallest numerous of virtual and real architectures it + # should target. + # + # The option of 'all' is provided for people generating libraries that + # will deployed to any number of machines, it will compile all CUDA code + # for all major virtual architectures, guaranteeing that the code will run + # anywhere. + # + # + # 1 - native + # - Uses system introspection to determine compile flags + # 2 - fermi + # - Uses: --generate-code arch=compute_20,code=compute_20 + # 3 - kepler + # - Uses: --generate-code arch=compute_30,code=compute_30 + # - Uses: --generate-code arch=compute_35,code=compute_35 + # 4 - maxwell + # - Uses: --generate-code arch=compute_50,code=compute_50 + # - Uses: --generate-code arch=compute_52,code=compute_52 + # 5 - all + # - Uses: --generate-code arch=compute_20,code=compute_20 + # - Uses: --generate-code arch=compute_30,code=compute_30 + # - Uses: --generate-code arch=compute_35,code=compute_35 + # - Uses: --generate-code arch=compute_50,code=compute_50 + # + + #specify the property + set(VTKm_CUDA_Architecture "native" CACHE STRING "Which GPU Architecture(s) to compile for") + set_property(CACHE VTKm_CUDA_Architecture PROPERTY STRINGS native fermi kepler maxwell all) + + #detect what the propery is set too + if(VTKm_CUDA_Architecture STREQUAL "native") + #run execute_process to do auto_detection + execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${CMAKE_CURRENT_LIST_DIR}/VTKmDetectCUDAVersion.cxx" + RESULT_VARIABLE ran_properly + OUTPUT_VARIABLE run_output) + + if(ran_properly EQUAL 0) + #find the position of the "--generate-code" output. With some compilers such as + #msvc we get compile output plus run output. So we need to strip out just the + #run output + string(FIND "${run_output}" "--generate-code" position) + string(SUBSTRING "${run_output}" ${position} -1 run_output) + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${run_output}") + else() + message(STATUS "Unable to run \"${CUDA_NVCC_EXECUTABLE}\" to autodetect GPU architecture." + "Falling back to fermi, please manually specify if you want something else.") + set(VTKm_CUDA_Architecture "fermi") + endif() + + elseif(VTKm_CUDA_Architecture STREQUAL "fermi") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_20,code=compute_20") + elseif(VTKm_CUDA_Architecture STREQUAL "kepler") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_30,code=compute_30") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_35,code=compute_35") + elseif(VTKm_CUDA_Architecture STREQUAL "maxwell") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_50,code=compute_50") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_52,code=compute_52") + elseif(VTKm_CUDA_Architecture STREQUAL "all") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_20,code=compute_20") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_30,code=compute_30") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_35,code=compute_35") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_50,code=compute_50") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_52,code=compute_52") + endif() + endif() + #--------------------------------------------------------------------------- # Find Thrust library. #--------------------------------------------------------------------------- diff --git a/CMake/VTKmDetectCUDAVersion.cxx b/CMake/VTKmDetectCUDAVersion.cxx new file mode 100644 index 000000000..0c95a5d69 --- /dev/null +++ b/CMake/VTKmDetectCUDAVersion.cxx @@ -0,0 +1,88 @@ +//============================================================================= +// +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2015 Sandia Corporation. +// Copyright 2015 UT-Battelle, LLC. +// Copyright 2015 Los Alamos National Security. +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +// +//============================================================================= +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + std::map< int, std::string > arch_to_compute; + arch_to_compute[11] = "compute_11"; + arch_to_compute[12] = "compute_12"; + arch_to_compute[13] = "compute_13"; + arch_to_compute[20] = "compute_20"; + arch_to_compute[21] = "compute_20"; + arch_to_compute[30] = "compute_30"; + arch_to_compute[32] = "compute_32"; + arch_to_compute[35] = "compute_35"; + arch_to_compute[37] = "compute_37"; + arch_to_compute[50] = "compute_50"; + arch_to_compute[52] = "compute_52"; + arch_to_compute[53] = "compute_53"; + + int nDevices; + cudaGetDeviceCount(&nDevices); + if(nDevices == 0) + { //return failure if no cuda devices found + return 1; + } + + //iterate over the devices outputting a string that would be the compile + //flags needed to target all gpu's on this machine. + int prev_arch = 0; + for (int i = 0; i < nDevices; i++) + { + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, i); + + //convert 2.1 to 21, 3.5 to 35, etc + int arch = (prop.major * 10) + prop.minor; + + //if we have multiple gpu's make sure they have different arch's + //instead of adding the same compile options multiple times + if(prev_arch == arch) + { + continue; + } + prev_arch = arch; + + //look up the closest virtual architecture, if the arch we are building + //for is not found + if(arch_to_compute.find(arch) != arch_to_compute.end() ) + { + std::string compute_level = arch_to_compute[arch]; + std::cout << "--generate-code arch=" << compute_level << ",code=sm_"<< arch << " "; + } + else + { + //if not found default to known highest arch, and compile to a virtual arch + //instead of a known sm. + std::map< int, std::string >::const_iterator i = arch_to_compute.end(); + --i; + std::string compute_level = i->second; + std::cout << "--generate-code arch=" << compute_level << ",code=" << compute_level << " "; + } + } + return 0; +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index a8b9b84ae..ab88f453e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -314,6 +314,7 @@ install( install( FILES ${VTKm_SOURCE_DIR}/CMake/VTKmCompilerOptimizations.cmake + ${VTKm_SOURCE_DIR}/CMake/VTKmDetectCUDAVersion.cxx DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR} )