Make VTKmDetectCUDAVersion work with C compilers.

2024-09-16 17:22:55 +00:00 · 2016-09-21 15:07:34 -04:00 · 2016-09-21 15:07:34 -04:00 · c8726814b8
commit c8726814b8
parent c4a78d952c
3 changed files with 63 additions and 98 deletions
--- a/CMake/VTKmConfigureComponents.cmake
+++ b/CMake/VTKmConfigureComponents.cmake
@ -293,7 +293,10 @@ macro(vtkm_configure_component_CUDA)
    # 4 - maxwell
    #   - Uses: --generate-code arch=compute_50,code=compute_50
    #   - Uses: --generate-code arch=compute_52,code=compute_52
-    # 5 - all
+    # 5 - pascal
+    #   - Uses: --generate-code arch=compute_60,code=compute_60
+    #   - Uses: --generate-code arch=compute_61,code=compute_61
+    # 6 - all
    #   - Uses: --generate-code arch=compute_20,code=compute_20
    #   - Uses: --generate-code arch=compute_30,code=compute_30
    #   - Uses: --generate-code arch=compute_35,code=compute_35
@ -309,14 +312,14 @@ macro(vtkm_configure_component_CUDA)

      if(VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT)
        #Use the cached value
-        list(APPEND CUDA_NVCC_FLAG S{VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT})
+        list(APPEND CUDA_NVCC_FLAGS ${VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT})
      else()

        #run execute_process to do auto_detection
        if(CMAKE_GENERATOR MATCHES "Visual Studio")
-          set(args "-ccbin" "${CMAKE_CXX_COMPILER}" "--run" "${VTKm_CMAKE_MODULE_PATH}/VTKmDetectCUDAVersion.cxx")
+          set(args "-ccbin" "${CMAKE_CXX_COMPILER}" "--run" "${VTKm_CMAKE_MODULE_PATH}/VTKmDetectCUDAVersion.cu")
        else()
-          set(args "-ccbin" "${CUDA_HOST_COMPILER}" "--run" "${VTKm_CMAKE_MODULE_PATH}/VTKmDetectCUDAVersion.cxx")
+          set(args "-ccbin" "${CUDA_HOST_COMPILER}" "--run" "${VTKm_CMAKE_MODULE_PATH}/VTKmDetectCUDAVersion.cu")
        endif()

        execute_process(
@ -328,10 +331,11 @@ macro(vtkm_configure_component_CUDA)
          #find the position of the "--generate-code" output. With some compilers such as
          #msvc we get compile output plus run output. So we need to strip out just the
          #run output
+          message(STATUS "run_output: ${run_output}")
          string(FIND "${run_output}" "--generate-code" position)
          string(SUBSTRING "${run_output}" ${position} -1 run_output)

-          list(APPEND CUDA_NVCC_FLAG S{run_output})
+          list(APPEND CUDA_NVCC_FLAGS ${run_output})
          set(VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT ${run_output} CACHE INTERNAL
              "device type(s) for cuda[native]")
        else()
@ -353,12 +357,17 @@ Falling back to fermi, please manually specify if you want something else.")
    elseif(VTKm_CUDA_Architecture STREQUAL "maxwell")
      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_50,code=compute_50")
      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_52,code=compute_52")
+    elseif(VTKm_CUDA_Architecture STREQUAL "pascal")
+      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_60,code=compute_60")
+      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_61,code=compute_61")
    elseif(VTKm_CUDA_Architecture STREQUAL "all")
      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_20,code=compute_20")
      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_30,code=compute_30")
      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_35,code=compute_35")
      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_50,code=compute_50")
      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_52,code=compute_52")
+      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_60,code=compute_60")
+      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_61,code=compute_61")
    endif()

    if(WIN32)
--- a/CMake/VTKmDetectCUDAVersion.cu
+++ b/CMake/VTKmDetectCUDAVersion.cu
@ -0,0 +1,49 @@
+//=============================================================================
+//
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2015 Sandia Corporation.
+//  Copyright 2015 UT-Battelle, LLC.
+//  Copyright 2015 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+//  the U.S. Government retains certain rights in this software.
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//
+//=============================================================================
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <cstdio>
+int main()
+{
+  int count = 0;
+  if (cudaSuccess != cudaGetDeviceCount(&count)) return 1;
+  if (count == 0) return 1;
+
+  int prev_arch = 0;
+  for (int device = 0; device < count; ++device)
+    {
+    cudaDeviceProp prop;
+    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
+      {
+      int arch = (prop.major * 10) + prop.minor;
+      int compute_level = arch;
+      //arch 21 has no equivalent compute level.
+      if(compute_level == 21) { compute_level = 20; }
+
+      //handle multiple cards of the same architecture
+      if(arch == prev_arch) { continue; }
+      prev_arch = arch;
+      printf("--generate-code arch=compute_%d,code=sm_%d ", compute_level, arch);
+      }
+    }
+  return 0;
+}
--- a/CMake/VTKmDetectCUDAVersion.cxx
+++ b/CMake/VTKmDetectCUDAVersion.cxx
@ -1,93 +0,0 @@
-//=============================================================================
-//
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//
-//  Copyright 2015 Sandia Corporation.
-//  Copyright 2015 UT-Battelle, LLC.
-//  Copyright 2015 Los Alamos National Security.
-//
-//  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-//  the U.S. Government retains certain rights in this software.
-//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
-//  Laboratory (LANL), the U.S. Government retains certain rights in
-//  this software.
-//
-//=============================================================================
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <iostream>
-#include <string>
-#include <map>
-
-int main(int argc, char **argv)
-{
-  std::map< int, std::string > arch_to_compute;
-  arch_to_compute[11] = "compute_11";
-  arch_to_compute[12] = "compute_12";
-  arch_to_compute[13] = "compute_13";
-  arch_to_compute[20] = "compute_20";
-  arch_to_compute[21] = "compute_20";
-  arch_to_compute[30] = "compute_30";
-  arch_to_compute[32] = "compute_32";
-  arch_to_compute[35] = "compute_35";
-  arch_to_compute[37] = "compute_37";
-  arch_to_compute[50] = "compute_50";
-  arch_to_compute[52] = "compute_52";
-  arch_to_compute[53] = "compute_53";
-
-  cudaError_t err;
-  int nDevices;
-  err = cudaGetDeviceCount(&nDevices);
-  if(err != cudaSuccess || nDevices < 1)
-  { //return failure if no cuda devices found
-    return 1;
-  }
-
-  //iterate over the devices outputting a string that would be the compile
-  //flags needed to target all gpu's on this machine.
-  int prev_arch = 0;
-  for (int i = 0; i < nDevices; i++)
-  {
-    cudaDeviceProp prop;
-    err = cudaGetDeviceProperties(&prop, i);
-    if(err != cudaSuccess)
-    {
-      continue;
-    }
-
-    //convert 2.1 to 21, 3.5 to 35, etc
-    int arch = (prop.major * 10) + prop.minor;
-
-    //if we have multiple gpu's make sure they have different arch's
-    //instead of adding the same compile options multiple times
-    if(prev_arch == arch)
-    {
-      continue;
-    }
-    prev_arch = arch;
-
-    //look up the closest virtual architecture, if the arch we are building
-    //for is not found
-    if(arch_to_compute.find(arch) != arch_to_compute.end() )
-    {
-      std::string compute_level = arch_to_compute[arch];
-      std::cout << "--generate-code arch=" << compute_level
-                << ",code=sm_"<< arch << " ";
-    }
-    else
-    {
-      //if not found default to known highest arch, and compile to a virtual
-      //arch instead of a known sm.
-      std::string compute_level = arch_to_compute.rbegin()->second;
-      std::cout << "--generate-code arch=" << compute_level
-                << ",code=" << compute_level << " ";
-    }
-  }
-  return 0;
-}