Cycles: Add support for adaptive kernel compilation to OptiX device

This modifies the common CUDA implementation for adaptive kernel compilation slightly to support both CUBIN and PTX output (the latter which is then used in the OptiX device). It also fixes adaptive kernel compilation on Windows. Reviewed By: brecht Differential Revision: https://developer.blender.org/D6851
2020-02-17 13:35:31 +01:00 · 2020-02-17 13:35:31 +01:00 · 2278aa0da9
commit 2278aa0da9
parent 12b6ddaf95
6 changed files with 122 additions and 134 deletions
--- a/extern/cuew/src/cuew.c
+++ b/extern/cuew/src/cuew.c
@ -683,23 +683,23 @@ static int cuewNvrtcInit(void) {


 int cuewInit(cuuint32_t flags) {
-	int result = CUEW_SUCCESS;
+  int result = CUEW_SUCCESS;

-	if (flags & CUEW_INIT_CUDA) {
-		result = cuewCudaInit();
-		if (result != CUEW_SUCCESS) {
-			return result;
-		}
-	}
+  if (flags & CUEW_INIT_CUDA) {
+    result = cuewCudaInit();
+    if (result != CUEW_SUCCESS) {
+      return result;
+    }
+  }

-	if (flags & CUEW_INIT_NVRTC) {
-		result = cuewNvrtcInit();
-		if (result != CUEW_SUCCESS) {
-			return result;
-		}
-	}
+  if (flags & CUEW_INIT_NVRTC) {
+    result = cuewNvrtcInit();
+    if (result != CUEW_SUCCESS) {
+      return result;
+    }
+  }

-	return result;
+  return result;
 }


@ -798,7 +798,10 @@ static int path_exists(const char *path) {

 const char *cuewCompilerPath(void) {
 #ifdef _WIN32
-  const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
+  const char *defaultpaths[] = {
+    "C:/CUDA/bin",
+    "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin",
+    NULL};
  const char *executable = "nvcc.exe";
 #else
  const char *defaultpaths[] = {
@ -832,9 +835,12 @@ const char *cuewCompilerPath(void) {
    }
  }

-#ifndef _WIN32
  {
+#ifdef _WIN32
+    FILE *handle = popen("where nvcc", "r");
+#else
    FILE *handle = popen("which nvcc", "r");
+#endif
    if (handle) {
      char buffer[4096] = {0};
      int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
@ -845,7 +851,6 @@ const char *cuewCompilerPath(void) {
      }
    }
  }
-#endif

  return NULL;
 }
@ -859,23 +864,6 @@ int cuewNvrtcVersion(void) {
  return 0;
 }

-static size_t safe_strnlen(const char *s, size_t maxlen) {
-  size_t length;
-  for (length = 0; length < maxlen; s++, length++) {
-    if (*s == '\0') {
-      break;
-    }
-  }
-  return length;
-}
-
-static char *safe_strncpy(char *dest, const char *src, size_t n) {
-  const size_t src_len = safe_strnlen(src, n - 1);
-  memcpy(dest, src, src_len);
-  dest[src_len] = '\0';
-  return dest;
-}
-
 int cuewCompilerVersion(void) {
  const char *path = cuewCompilerPath();
  const char *marker = "Cuda compilation tools, release ";
@ -891,8 +879,9 @@ int cuewCompilerVersion(void) {
  }

  /* get --version output */
-  safe_strncpy(command, path, sizeof(command));
-  strncat(command, " --version", sizeof(command) - strlen(path));
+  strncat(command, "\"", 1);
+  strncat(command, path, sizeof(command) - 1);
+  strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
  pipe = popen(command, "r");
  if (!pipe) {
    fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
@ -922,4 +911,3 @@ int cuewCompilerVersion(void) {

  return 10 * major + minor;
 }
-
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@ -228,11 +228,8 @@ if(WITH_CYCLES_DEVICE_OPTIX)
      SYSTEM
      ${OPTIX_INCLUDE_DIR}
      )
-
-    # Need pre-compiled CUDA binaries in the OptiX device
-    set(WITH_CYCLES_CUDA_BINARIES ON)
  else()
-    message(STATUS "Optix not found, disabling it from Cycles")
+    message(STATUS "OptiX not found, disabling it from Cycles")
    set(WITH_CYCLES_DEVICE_OPTIX OFF)
  endif()
 endif()
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@ -109,15 +109,13 @@ class CUDADevice : public Device {

  bool use_split_kernel();

-  string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
-                                          bool filter = false,
-                                          bool split = false);
-
-  bool compile_check_compiler();
+  virtual string compile_kernel_get_common_cflags(
+      const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);

  string compile_kernel(const DeviceRequestedFeatures &requested_features,
-                        bool filter = false,
-                        bool split = false);
+                        const char *name,
+                        const char *base = "cuda",
+                        bool force_ptx = false);

  virtual bool load_kernels(const DeviceRequestedFeatures &requested_features);

--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@ -329,70 +329,27 @@ string CUDADevice::compile_kernel_get_common_cflags(
  return cflags;
 }

-bool CUDADevice::compile_check_compiler()
-{
-  const char *nvcc = cuewCompilerPath();
-  if (nvcc == NULL) {
-    cuda_error_message(
-        "CUDA nvcc compiler not found. "
-        "Install CUDA toolkit in default location.");
-    return false;
-  }
-  const int cuda_version = cuewCompilerVersion();
-  VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << ".";
-  const int major = cuda_version / 10, minor = cuda_version % 10;
-  if (cuda_version == 0) {
-    cuda_error_message("CUDA nvcc compiler version could not be parsed.");
-    return false;
-  }
-  if (cuda_version < 80) {
-    printf(
-        "Unsupported CUDA version %d.%d detected, "
-        "you need CUDA 8.0 or newer.\n",
-        major,
-        minor);
-    return false;
-  }
-  else if (cuda_version != 101) {
-    printf(
-        "CUDA version %d.%d detected, build may succeed but only "
-        "CUDA 10.1 is officially supported.\n",
-        major,
-        minor);
-  }
-  return true;
-}
-
 string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features,
-                                  bool filter,
-                                  bool split)
+                                  const char *name,
+                                  const char *base,
+                                  bool force_ptx)
 {
-  const char *name, *source;
-  if (filter) {
-    name = "filter";
-    source = "filter.cu";
-  }
-  else if (split) {
-    name = "kernel_split";
-    source = "kernel_split.cu";
-  }
-  else {
-    name = "kernel";
-    source = "kernel.cu";
-  }
-  /* Compute cubin name. */
+  /* Compute kernel name. */
  int major, minor;
  cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
  cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);

  /* Attempt to use kernel provided with Blender. */
  if (!use_adaptive_compilation()) {
-    const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
-    VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
-    if (path_exists(cubin)) {
-      VLOG(1) << "Using precompiled kernel.";
-      return cubin;
+    if (!force_ptx) {
+      const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+      VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+      if (path_exists(cubin)) {
+        VLOG(1) << "Using precompiled kernel.";
+        return cubin;
+      }
    }
+
    const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
    VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
    if (path_exists(ptx)) {
@ -401,19 +358,21 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
    }
  }

-  const string common_cflags = compile_kernel_get_common_cflags(requested_features, filter, split);
-
  /* Try to use locally compiled kernel. */
-  const string source_path = path_get("source");
-  const string kernel_md5 = path_files_md5_hash(source_path);
+  string source_path = path_get("source");
+  const string source_md5 = path_files_md5_hash(source_path);

  /* We include cflags into md5 so changing cuda toolkit or changing other
   * compiler command line arguments makes sure cubin gets re-built.
   */
-  const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
+  string common_cflags = compile_kernel_get_common_cflags(
+      requested_features, strstr(name, "filter") != NULL, strstr(name, "split") != NULL);
+  const string kernel_md5 = util_md5_string(source_md5 + common_cflags);

+  const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
+  const char *const kernel_arch = force_ptx ? "compute" : "sm";
  const string cubin_file = string_printf(
-      "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str());
+      "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext);
  const string cubin = path_cache_get(path_join("kernels", cubin_file));
  VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
  if (path_exists(cubin)) {
@ -422,7 +381,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
  }

 #  ifdef _WIN32
-  if (have_precompiled_kernels()) {
+  if (!use_adaptive_compilation() && have_precompiled_kernels()) {
    if (major < 3) {
      cuda_error_message(
          string_printf("CUDA device requires compute capability 3.0 or up, "
@ -437,42 +396,69 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
                        major,
                        minor));
    }
-    return "";
+    return string();
  }
 #  endif

  /* Compile. */
-  if (!compile_check_compiler()) {
-    return "";
+  const char *const nvcc = cuewCompilerPath();
+  if (nvcc == NULL) {
+    cuda_error_message(
+        "CUDA nvcc compiler not found. "
+        "Install CUDA toolkit in default location.");
+    return string();
  }
-  const char *nvcc = cuewCompilerPath();
-  const string kernel = path_join(path_join(source_path, "kernel"),
-                                  path_join("kernels", path_join("cuda", source)));
+
+  const int nvcc_cuda_version = cuewCompilerVersion();
+  VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << ".";
+  if (nvcc_cuda_version < 80) {
+    printf(
+        "Unsupported CUDA version %d.%d detected, "
+        "you need CUDA 8.0 or newer.\n",
+        nvcc_cuda_version / 10,
+        nvcc_cuda_version % 10);
+    return string();
+  }
+  else if (nvcc_cuda_version != 101) {
+    printf(
+        "CUDA version %d.%d detected, build may succeed but only "
+        "CUDA 10.1 is officially supported.\n",
+        nvcc_cuda_version / 10,
+        nvcc_cuda_version % 10);
+  }
+
  double starttime = time_dt();
-  printf("Compiling CUDA kernel ...\n");

  path_create_directories(cubin);

+  source_path = path_join(path_join(source_path, "kernel"),
+                          path_join("kernels", path_join(base, string_printf("%s.cu", name))));
+
  string command = string_printf(
      "\"%s\" "
-      "-arch=sm_%d%d "
-      "--cubin \"%s\" "
+      "-arch=%s_%d%d "
+      "--%s \"%s\" "
      "-o \"%s\" "
-      "%s ",
+      "%s",
      nvcc,
+      kernel_arch,
      major,
      minor,
-      kernel.c_str(),
+      kernel_ext,
+      source_path.c_str(),
      cubin.c_str(),
      common_cflags.c_str());

-  printf("%s\n", command.c_str());
+  printf("Compiling CUDA kernel ...\n%s\n", command.c_str());

-  if (system(command.c_str()) == -1) {
+#ifdef _WIN32
+  command = "call " + command;
+#endif
+  if (system(command.c_str()) != 0) {
    cuda_error_message(
        "Failed to execute compilation command, "
        "see console for details.");
-    return "";
+    return string();
  }

  /* Verify if compilation succeeded */
@ -480,7 +466,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
    cuda_error_message(
        "CUDA kernel compilation failed, "
        "see console for details.");
-    return "";
+    return string();
  }

  printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
@ -509,12 +495,14 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features)
    return false;

  /* get kernel */
-  string cubin = compile_kernel(requested_features, false, use_split_kernel());
-  if (cubin == "")
+  const char *kernel_name = use_split_kernel() ? "kernel_split" : "kernel";
+  string cubin = compile_kernel(requested_features, kernel_name);
+  if (cubin.empty())
    return false;

-  string filter_cubin = compile_kernel(requested_features, true, false);
-  if (filter_cubin == "")
+  const char *filter_name = "filter";
+  string filter_cubin = compile_kernel(requested_features, filter_name);
+  if (filter_cubin.empty())
    return false;

  /* open module */
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@ -43,7 +43,6 @@ bool device_cuda_init()
      VLOG(1) << "Found precompiled kernels";
      result = true;
    }
-#    ifndef _WIN32
    else if (cuewCompilerPath() != NULL) {
      VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
      result = true;
@ -52,7 +51,6 @@ bool device_cuda_init()
      VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
              << " unable to use CUDA";
    }
-#    endif
  }
  else {
    VLOG(1) << "CUEW initialization failed: "
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@ -293,6 +293,23 @@ class OptiXDevice : public CUDADevice {
    return BVH_LAYOUT_OPTIX;
  }

+  string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
+                                          bool filter,
+                                          bool /*split*/) override
+  {
+    // Split kernel is not supported in OptiX
+    string common_cflags = CUDADevice::compile_kernel_get_common_cflags(
+        requested_features, filter, false);
+
+    // Add OptiX SDK include directory to include paths
+    const char *optix_sdk_path = getenv("OPTIX_ROOT_DIR");
+    if (optix_sdk_path) {
+      common_cflags += string_printf(" -I\"%s/include\"", optix_sdk_path);
+    }
+
+    return common_cflags;
+  }
+
  bool load_kernels(const DeviceRequestedFeatures &requested_features) override
  {
    if (have_error()) {
@ -367,9 +384,11 @@ class OptiXDevice : public CUDADevice {
    }

    {  // Load and compile PTX module with OptiX kernels
-      string ptx_data;
-      const string ptx_filename = "lib/kernel_optix.ptx";
-      if (!path_read_text(path_get(ptx_filename), ptx_data)) {
+      string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
+      if (use_adaptive_compilation()) {
+        ptx_filename = compile_kernel(requested_features, "kernel_optix", "optix", true);
+      }
+      if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
        set_error("Failed loading OptiX kernel " + ptx_filename + ".");
        return false;
      }