vtk-m/CMake/VTKmDetectCUDAVersion.cxx

//=============================================================================
//
//  Copyright (c) Kitware, Inc.
//  All rights reserved.
//  See LICENSE.txt for details.
//
//  This software is distributed WITHOUT ANY WARRANTY; without even
//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
//  PURPOSE.  See the above copyright notice for more information.
//
//  Copyright 2015 Sandia Corporation.
//  Copyright 2015 UT-Battelle, LLC.
//  Copyright 2015 Los Alamos National Security.
//
//  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
//  the U.S. Government retains certain rights in this software.
//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
//  Laboratory (LANL), the U.S. Government retains certain rights in
//  this software.
//
//=============================================================================
#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream>
#include <string>
#include <map>

int main(int argc, char **argv)
{
  std::map< int, std::string > arch_to_compute;
  arch_to_compute[11] = "compute_11";
  arch_to_compute[12] = "compute_12";
  arch_to_compute[13] = "compute_13";
  arch_to_compute[20] = "compute_20";
  arch_to_compute[21] = "compute_20";
  arch_to_compute[30] = "compute_30";
  arch_to_compute[32] = "compute_32";
  arch_to_compute[35] = "compute_35";
  arch_to_compute[37] = "compute_37";
  arch_to_compute[50] = "compute_50";
  arch_to_compute[52] = "compute_52";
  arch_to_compute[53] = "compute_53";

  cudaError_t err;
  int nDevices;
  err = cudaGetDeviceCount(&nDevices);
  if(err != cudaSuccess || nDevices < 1)
  { //return failure if no cuda devices found
    return 1;
  }

  //iterate over the devices outputting a string that would be the compile
  //flags needed to target all gpu's on this machine.
  int prev_arch = 0;
  for (int i = 0; i < nDevices; i++)
  {
    cudaDeviceProp prop;
    err = cudaGetDeviceProperties(&prop, i);
    if(err != cudaSuccess)
    {
      continue;
    }

    //convert 2.1 to 21, 3.5 to 35, etc
    int arch = (prop.major * 10) + prop.minor;

    //if we have multiple gpu's make sure they have different arch's
    //instead of adding the same compile options multiple times
    if(prev_arch == arch)
    {
      continue;
    }
    prev_arch = arch;

    //look up the closest virtual architecture, if the arch we are building
    //for is not found
    if(arch_to_compute.find(arch) != arch_to_compute.end() )
    {
      std::string compute_level = arch_to_compute[arch];
      std::cout << "--generate-code arch=" << compute_level
                << ",code=sm_"<< arch << " ";
    }
    else
    {
      //if not found default to known highest arch, and compile to a virtual
      //arch instead of a known sm.
      std::string compute_level = arch_to_compute.rbegin()->second;
      std::cout << "--generate-code arch=" << compute_level
                << ",code=" << compute_level << " ";
    }
  }
  return 0;
}