mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-16 17:22:55 +00:00
Make VTKmDetectCUDAVersion work with C compilers.
This commit is contained in:
parent
c4a78d952c
commit
c8726814b8
@ -293,7 +293,10 @@ macro(vtkm_configure_component_CUDA)
|
||||
# 4 - maxwell
|
||||
# - Uses: --generate-code arch=compute_50,code=compute_50
|
||||
# - Uses: --generate-code arch=compute_52,code=compute_52
|
||||
# 5 - all
|
||||
# 5 - pascal
|
||||
# - Uses: --generate-code arch=compute_60,code=compute_60
|
||||
# - Uses: --generate-code arch=compute_61,code=compute_61
|
||||
# 6 - all
|
||||
# - Uses: --generate-code arch=compute_20,code=compute_20
|
||||
# - Uses: --generate-code arch=compute_30,code=compute_30
|
||||
# - Uses: --generate-code arch=compute_35,code=compute_35
|
||||
@ -309,14 +312,14 @@ macro(vtkm_configure_component_CUDA)
|
||||
|
||||
if(VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT)
|
||||
#Use the cached value
|
||||
list(APPEND CUDA_NVCC_FLAG S{VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT})
|
||||
list(APPEND CUDA_NVCC_FLAGS ${VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT})
|
||||
else()
|
||||
|
||||
#run execute_process to do auto_detection
|
||||
if(CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||
set(args "-ccbin" "${CMAKE_CXX_COMPILER}" "--run" "${VTKm_CMAKE_MODULE_PATH}/VTKmDetectCUDAVersion.cxx")
|
||||
set(args "-ccbin" "${CMAKE_CXX_COMPILER}" "--run" "${VTKm_CMAKE_MODULE_PATH}/VTKmDetectCUDAVersion.cu")
|
||||
else()
|
||||
set(args "-ccbin" "${CUDA_HOST_COMPILER}" "--run" "${VTKm_CMAKE_MODULE_PATH}/VTKmDetectCUDAVersion.cxx")
|
||||
set(args "-ccbin" "${CUDA_HOST_COMPILER}" "--run" "${VTKm_CMAKE_MODULE_PATH}/VTKmDetectCUDAVersion.cu")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
@ -328,10 +331,11 @@ macro(vtkm_configure_component_CUDA)
|
||||
#find the position of the "--generate-code" output. With some compilers such as
|
||||
#msvc we get compile output plus run output. So we need to strip out just the
|
||||
#run output
|
||||
message(STATUS "run_output: ${run_output}")
|
||||
string(FIND "${run_output}" "--generate-code" position)
|
||||
string(SUBSTRING "${run_output}" ${position} -1 run_output)
|
||||
|
||||
list(APPEND CUDA_NVCC_FLAG S{run_output})
|
||||
list(APPEND CUDA_NVCC_FLAGS ${run_output})
|
||||
set(VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT ${run_output} CACHE INTERNAL
|
||||
"device type(s) for cuda[native]")
|
||||
else()
|
||||
@ -353,12 +357,17 @@ Falling back to fermi, please manually specify if you want something else.")
|
||||
elseif(VTKm_CUDA_Architecture STREQUAL "maxwell")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_50,code=compute_50")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_52,code=compute_52")
|
||||
elseif(VTKm_CUDA_Architecture STREQUAL "pascal")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_60,code=compute_60")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_61,code=compute_61")
|
||||
elseif(VTKm_CUDA_Architecture STREQUAL "all")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_20,code=compute_20")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_30,code=compute_30")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_35,code=compute_35")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_50,code=compute_50")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_52,code=compute_52")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_60,code=compute_60")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --generate-code arch=compute_61,code=compute_61")
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
|
49
CMake/VTKmDetectCUDAVersion.cu
Normal file
49
CMake/VTKmDetectCUDAVersion.cu
Normal file
@ -0,0 +1,49 @@
|
||||
//=============================================================================
|
||||
//
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
//
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2015 Sandia Corporation.
|
||||
// Copyright 2015 UT-Battelle, LLC.
|
||||
// Copyright 2015 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//
|
||||
//=============================================================================
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cstdio>
|
||||
int main()
|
||||
{
|
||||
int count = 0;
|
||||
if (cudaSuccess != cudaGetDeviceCount(&count)) return 1;
|
||||
if (count == 0) return 1;
|
||||
|
||||
int prev_arch = 0;
|
||||
for (int device = 0; device < count; ++device)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
|
||||
{
|
||||
int arch = (prop.major * 10) + prop.minor;
|
||||
int compute_level = arch;
|
||||
//arch 21 has no equivalent compute level.
|
||||
if(compute_level == 21) { compute_level = 20; }
|
||||
|
||||
//handle multiple cards of the same architecture
|
||||
if(arch == prev_arch) { continue; }
|
||||
prev_arch = arch;
|
||||
printf("--generate-code arch=compute_%d,code=sm_%d ", compute_level, arch);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,93 +0,0 @@
|
||||
//=============================================================================
|
||||
//
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
//
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2015 Sandia Corporation.
|
||||
// Copyright 2015 UT-Battelle, LLC.
|
||||
// Copyright 2015 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//
|
||||
//=============================================================================
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::map< int, std::string > arch_to_compute;
|
||||
arch_to_compute[11] = "compute_11";
|
||||
arch_to_compute[12] = "compute_12";
|
||||
arch_to_compute[13] = "compute_13";
|
||||
arch_to_compute[20] = "compute_20";
|
||||
arch_to_compute[21] = "compute_20";
|
||||
arch_to_compute[30] = "compute_30";
|
||||
arch_to_compute[32] = "compute_32";
|
||||
arch_to_compute[35] = "compute_35";
|
||||
arch_to_compute[37] = "compute_37";
|
||||
arch_to_compute[50] = "compute_50";
|
||||
arch_to_compute[52] = "compute_52";
|
||||
arch_to_compute[53] = "compute_53";
|
||||
|
||||
cudaError_t err;
|
||||
int nDevices;
|
||||
err = cudaGetDeviceCount(&nDevices);
|
||||
if(err != cudaSuccess || nDevices < 1)
|
||||
{ //return failure if no cuda devices found
|
||||
return 1;
|
||||
}
|
||||
|
||||
//iterate over the devices outputting a string that would be the compile
|
||||
//flags needed to target all gpu's on this machine.
|
||||
int prev_arch = 0;
|
||||
for (int i = 0; i < nDevices; i++)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
err = cudaGetDeviceProperties(&prop, i);
|
||||
if(err != cudaSuccess)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
//convert 2.1 to 21, 3.5 to 35, etc
|
||||
int arch = (prop.major * 10) + prop.minor;
|
||||
|
||||
//if we have multiple gpu's make sure they have different arch's
|
||||
//instead of adding the same compile options multiple times
|
||||
if(prev_arch == arch)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
prev_arch = arch;
|
||||
|
||||
//look up the closest virtual architecture, if the arch we are building
|
||||
//for is not found
|
||||
if(arch_to_compute.find(arch) != arch_to_compute.end() )
|
||||
{
|
||||
std::string compute_level = arch_to_compute[arch];
|
||||
std::cout << "--generate-code arch=" << compute_level
|
||||
<< ",code=sm_"<< arch << " ";
|
||||
}
|
||||
else
|
||||
{
|
||||
//if not found default to known highest arch, and compile to a virtual
|
||||
//arch instead of a known sm.
|
||||
std::string compute_level = arch_to_compute.rbegin()->second;
|
||||
std::cout << "--generate-code arch=" << compute_level
|
||||
<< ",code=" << compute_level << " ";
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user