cycles: Add an nvrtc based cubin cli compiler.
nvcc is very picky regarding compiler versions, severely limiting the compiler we can use, this commit adds a nvrtc based compiler that'll allow us to build the cubins even if the host compiler is unsupported. for details see D2913. Differential Revision: http://developer.blender.org/D2913
This commit is contained in:
parent
db989e1f11
commit
a5052770b8
@ -409,12 +409,14 @@ option(WITH_CYCLES_STANDALONE_GUI "Build Cycles standalone with GUI" OFF)
|
||||
option(WITH_CYCLES_OSL "Build Cycles with OSL support" ${_init_CYCLES_OSL})
|
||||
option(WITH_CYCLES_OPENSUBDIV "Build Cycles with OpenSubdiv support" ${_init_CYCLES_OPENSUBDIV})
|
||||
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles CUDA binaries" OFF)
|
||||
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_20 sm_21 sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 CACHE STRING "CUDA architectures to build binaries for")
|
||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||
unset(PLATFORM_DEFAULT)
|
||||
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
|
||||
option(WITH_CYCLES_DEBUG "Build Cycles with extra debug capabilities" OFF)
|
||||
option(WITH_CYCLES_NATIVE_ONLY "Build Cycles with native kernel only (which fits current CPU, use for development only)" OFF)
|
||||
mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
|
||||
mark_as_advanced(WITH_CYCLES_LOGGING)
|
||||
mark_as_advanced(WITH_CYCLES_DEBUG)
|
||||
mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
|
||||
|
1
extern/cuew/include/cuew.h
vendored
1
extern/cuew/include/cuew.h
vendored
@ -1323,6 +1323,7 @@ int cuewInit(void);
|
||||
const char *cuewErrorString(CUresult result);
|
||||
const char *cuewCompilerPath(void);
|
||||
int cuewCompilerVersion(void);
|
||||
int cuewNvrtcVersion(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
11
extern/cuew/src/cuew.c
vendored
11
extern/cuew/src/cuew.c
vendored
@ -329,7 +329,7 @@ int cuewInit(void) {
|
||||
#ifdef _WIN32
|
||||
/* Expected in c:/windows/system or similar, no path needed. */
|
||||
const char *cuda_paths[] = {"nvcuda.dll", NULL};
|
||||
const char *nvrtc_paths[] = {"nvrtc.dll", NULL};
|
||||
const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL};
|
||||
#elif defined(__APPLE__)
|
||||
/* Default installation path. */
|
||||
const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
|
||||
@ -766,6 +766,15 @@ const char *cuewCompilerPath(void) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int cuewNvrtcVersion(void) {
|
||||
int major, minor;
|
||||
if (nvrtcVersion) {
|
||||
nvrtcVersion(&major, &minor);
|
||||
return 10 * major + minor;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cuewCompilerVersion(void) {
|
||||
const char *path = cuewCompilerPath();
|
||||
const char *marker = "Cuda compilation tools, release ";
|
||||
|
@ -242,6 +242,24 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
unset(_has_no_error_unused_macros)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
|
||||
if(MSVC)
|
||||
set(MAX_MSVC 1800)
|
||||
if(${CUDA_VERSION} EQUAL "8.0")
|
||||
set(MAX_MSVC 1900)
|
||||
elseif(${CUDA_VERSION} EQUAL "9.0")
|
||||
set(MAX_MSVC 1910)
|
||||
elseif(${CUDA_VERSION} EQUAL "9.1")
|
||||
set(MAX_MSVC 1911)
|
||||
endif()
|
||||
if (NOT MSVC_VERSION LESS ${MAX_MSVC})
|
||||
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
|
||||
set(WITH_CYCLES_CUBIN_COMPILER ON)
|
||||
endif()
|
||||
unset(MAX_MSVC)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
# Subdirectories
|
||||
|
||||
@ -254,7 +272,7 @@ if(WITH_CYCLES_NETWORK)
|
||||
add_definitions(-DWITH_NETWORK)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK)
|
||||
if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER)
|
||||
add_subdirectory(app)
|
||||
endif()
|
||||
|
||||
|
@ -120,3 +120,27 @@ if(WITH_CYCLES_NETWORK)
|
||||
endif()
|
||||
unset(SRC)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||
# 32 bit windows is special, nvrtc is not supported on x86, so even
|
||||
# though we are building 32 bit blender a 64 bit cubin_cc will have
|
||||
# to be build to compile the cubins.
|
||||
if(MSVC AND NOT CMAKE_CL_64)
|
||||
Message("cycles_cubin_cc not supported on x86")
|
||||
else()
|
||||
set(SRC cycles_cubin_cc.cpp)
|
||||
set(INC ../../../extern/cuew/include)
|
||||
add_executable(cycles_cubin_cc ${SRC})
|
||||
include_directories(${INC})
|
||||
target_link_libraries(cycles_cubin_cc
|
||||
extern_cuew
|
||||
${OPENIMAGEIO_LIBRARIES}
|
||||
${PLATFORM_LINKLIBS}
|
||||
)
|
||||
if(NOT CYCLES_STANDALONE_REPOSITORY)
|
||||
target_link_libraries(cycles_cubin_cc bf_intern_guardedalloc)
|
||||
endif()
|
||||
unset(SRC)
|
||||
unset(INC)
|
||||
endif()
|
||||
endif()
|
||||
|
284
intern/cycles/app/cycles_cubin_cc.cpp
Normal file
284
intern/cycles/app/cycles_cubin_cc.cpp
Normal file
@ -0,0 +1,284 @@
|
||||
/*
|
||||
* Copyright 2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <OpenImageIO/argparse.h>
|
||||
#include <OpenImageIO/filesystem.h>
|
||||
|
||||
#include "cuew.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <Windows.h>
|
||||
#endif
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
class CompilationSettings
|
||||
{
|
||||
public:
|
||||
CompilationSettings()
|
||||
: target_arch(0),
|
||||
bits(64),
|
||||
verbose(false),
|
||||
fast_math(false)
|
||||
{}
|
||||
|
||||
string cuda_toolkit_dir;
|
||||
string input_file;
|
||||
string output_file;
|
||||
string ptx_file;
|
||||
vector<string> defines;
|
||||
vector<string> includes;
|
||||
int target_arch;
|
||||
int bits;
|
||||
bool verbose;
|
||||
bool fast_math;
|
||||
};
|
||||
|
||||
bool compile_cuda(CompilationSettings &settings)
|
||||
{
|
||||
const char* headers[] = {"stdlib.h" , "float.h", "math.h", "stdio.h"};
|
||||
const char* header_content[] = {"\n", "\n", "\n", "\n"};
|
||||
|
||||
printf("Building %s\n", settings.input_file.c_str());
|
||||
|
||||
string code;
|
||||
if(!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
|
||||
fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<string> options;
|
||||
for(size_t i = 0; i < settings.includes.size(); i++) {
|
||||
options.push_back("-I" + settings.includes[i]);
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < settings.defines.size(); i++) {
|
||||
options.push_back("-D" + settings.defines[i]);
|
||||
}
|
||||
|
||||
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
|
||||
options.push_back("--device-as-default-execution-space");
|
||||
if(settings.fast_math)
|
||||
options.push_back("--use_fast_math");
|
||||
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult result = nvrtcCreateProgram(&prog,
|
||||
code.c_str(), // buffer
|
||||
NULL, // name
|
||||
sizeof(headers) / sizeof(void*), // numHeaders
|
||||
header_content, // headers
|
||||
headers); // includeNames
|
||||
|
||||
if(result != NVRTC_SUCCESS) {
|
||||
fprintf(stderr, "Error: nvrtcCreateProgram failed (%x)\n\n", result);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Tranfer options to a classic C array. */
|
||||
vector<const char*> opts(options.size());
|
||||
for(size_t i = 0; i < options.size(); i++) {
|
||||
opts[i] = options[i].c_str();
|
||||
}
|
||||
|
||||
result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
|
||||
|
||||
if(result != NVRTC_SUCCESS) {
|
||||
fprintf(stderr, "Error: nvrtcCompileProgram failed (%x)\n\n", result);
|
||||
|
||||
size_t log_size;
|
||||
nvrtcGetProgramLogSize(prog, &log_size);
|
||||
|
||||
vector<char> log(log_size);
|
||||
nvrtcGetProgramLog(prog, &log[0]);
|
||||
fprintf(stderr, "%s\n", &log[0]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Retrieve the ptx code. */
|
||||
size_t ptx_size;
|
||||
result = nvrtcGetPTXSize(prog, &ptx_size);
|
||||
if(result != NVRTC_SUCCESS) {
|
||||
fprintf(stderr, "Error: nvrtcGetPTXSize failed (%x)\n\n", result);
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<char> ptx_code(ptx_size);
|
||||
result = nvrtcGetPTX(prog, &ptx_code[0]);
|
||||
if(result != NVRTC_SUCCESS) {
|
||||
fprintf(stderr, "Error: nvrtcGetPTX failed (%x)\n\n", result);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Write a file in the temp folder with the ptx code. */
|
||||
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + OIIO::Filesystem::unique_path();
|
||||
FILE * f= fopen(settings.ptx_file.c_str(), "wb");
|
||||
fwrite(&ptx_code[0], 1, ptx_size, f);
|
||||
fclose(f);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool link_ptxas(CompilationSettings &settings)
|
||||
{
|
||||
string cudapath = "";
|
||||
if(settings.cuda_toolkit_dir.size())
|
||||
cudapath = settings.cuda_toolkit_dir + "/bin/";
|
||||
|
||||
string ptx = "\"" +cudapath + "ptxas\" " + settings.ptx_file +
|
||||
" -o " + settings.output_file +
|
||||
" --gpu-name sm_" + std::to_string(settings.target_arch) +
|
||||
" -m" + std::to_string(settings.bits);
|
||||
|
||||
if(settings.verbose)
|
||||
ptx += " --verbose";
|
||||
|
||||
int pxresult = system(ptx.c_str());
|
||||
if(pxresult) {
|
||||
fprintf(stderr, "Error: ptxas failed (%x)\n\n", pxresult);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!OIIO::Filesystem::remove(settings.ptx_file)) {
|
||||
fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool init(CompilationSettings &settings)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
if(settings.cuda_toolkit_dir.size()) {
|
||||
SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
|
||||
}
|
||||
#endif
|
||||
|
||||
int cuewresult = cuewInit();
|
||||
if(cuewresult != CUEW_SUCCESS) {
|
||||
fprintf(stderr, "Error: cuew init fialed (0x%x)\n\n", cuewresult);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(cuewNvrtcVersion() < 80) {
|
||||
fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!nvrtcCreateProgram) {
|
||||
fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!nvrtcCompileProgram) {
|
||||
fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!nvrtcGetProgramLogSize) {
|
||||
fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!nvrtcGetProgramLog) {
|
||||
fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!nvrtcGetPTXSize) {
|
||||
fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!nvrtcGetPTX) {
|
||||
fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
|
||||
{
|
||||
OIIO::ArgParse ap;
|
||||
ap.options("Usage: cycles_cubin_cc [options]",
|
||||
"-target %d", &settings.target_arch, "target shader model",
|
||||
"-m %d", &settings.bits, "Cuda architecture bits",
|
||||
"-i %s", &settings.input_file, "Input source filename",
|
||||
"-o %s", &settings.output_file, "Output cubin filename",
|
||||
"-I %L", &settings.includes, "Add additional includepath",
|
||||
"-D %L", &settings.defines, "Add additional defines",
|
||||
"-v", &settings.verbose, "Use verbose logging",
|
||||
"--use_fast_math", &settings.fast_math, "Use fast math",
|
||||
"-cuda-toolkit-dir %s", &settings.cuda_toolkit_dir, "path to the cuda toolkit binary directory",
|
||||
NULL);
|
||||
|
||||
if(ap.parse(argc, argv) < 0) {
|
||||
fprintf(stderr, "%s\n", ap.geterror().c_str());
|
||||
ap.usage();
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!settings.output_file.size()) {
|
||||
fprintf(stderr, "Error: Output file not set(-o), required\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!settings.input_file.size()) {
|
||||
fprintf(stderr, "Error: Input file not set(-i, required\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!settings.target_arch) {
|
||||
fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
CompilationSettings settings;
|
||||
|
||||
if(!parse_parameters(argc, argv, settings)) {
|
||||
fprintf(stderr, "Error: invalid parameters, exiting\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if(!init(settings)) {
|
||||
fprintf(stderr, "Error: initialization error, exiting\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if(!compile_cuda(settings)) {
|
||||
fprintf(stderr, "Error: compilation error, exiting\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if(!link_ptxas(settings)) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -348,7 +348,6 @@ public:
|
||||
const DeviceRequestedFeatures& requested_features,
|
||||
bool filter=false, bool split=false)
|
||||
{
|
||||
const int cuda_version = cuewCompilerVersion();
|
||||
const int machine = system_cpu_bits();
|
||||
const string source_path = path_get("source");
|
||||
const string include_path = source_path;
|
||||
@ -356,10 +355,8 @@ public:
|
||||
"--ptxas-options=\"-v\" "
|
||||
"--use_fast_math "
|
||||
"-DNVCC "
|
||||
"-D__KERNEL_CUDA_VERSION__=%d "
|
||||
"-I\"%s\"",
|
||||
machine,
|
||||
cuda_version,
|
||||
include_path.c_str());
|
||||
if(!filter && use_adaptive_compilation()) {
|
||||
cflags += " " + requested_features.get_build_options();
|
||||
|
@ -356,55 +356,67 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(cuda_cubins)
|
||||
|
||||
macro(CYCLES_CUDA_KERNEL_ADD arch name flags sources experimental)
|
||||
set(cuda_cubin ${name}_${arch}.cubin)
|
||||
set(cuda_kernel_src "/kernels/cuda/${name}.cu")
|
||||
|
||||
set(cuda_flags
|
||||
-D CCL_NAMESPACE_BEGIN=
|
||||
-D CCL_NAMESPACE_END=
|
||||
-D NVCC
|
||||
-m ${CUDA_BITS}
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
|
||||
--use_fast_math
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
|
||||
|
||||
if(${experimental})
|
||||
set(flags ${flags} -D__KERNEL_EXPERIMENTAL__)
|
||||
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
|
||||
set(name ${name}_experimental)
|
||||
endif()
|
||||
|
||||
set(cuda_cubin ${name}_${arch}.cubin)
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(cuda_debug_flags "-D__KERNEL_DEBUG__")
|
||||
else()
|
||||
set(cuda_debug_flags "")
|
||||
set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
|
||||
endif()
|
||||
|
||||
set(cuda_nvcc_command ${CUDA_NVCC_EXECUTABLE})
|
||||
set(cuda_nvcc_version ${CUDA_VERSION})
|
||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
|
||||
|
||||
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}")
|
||||
set(cuda_math_flags "--use_fast_math")
|
||||
|
||||
set(cuda_kernel_src "/kernels/cuda/${name}.cu")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_cubin}
|
||||
COMMAND ${cuda_nvcc_command}
|
||||
-arch=${arch}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
-m${CUDA_BITS}
|
||||
--cubin ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
|
||||
--ptxas-options="-v"
|
||||
${cuda_arch_flags}
|
||||
${cuda_version_flags}
|
||||
${cuda_math_flags}
|
||||
${flags}
|
||||
${cuda_debug_flags}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||
-DCCL_NAMESPACE_BEGIN=
|
||||
-DCCL_NAMESPACE_END=
|
||||
-DNVCC
|
||||
DEPENDS ${sources})
|
||||
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
||||
# cycles_cubin_cc since the env variable is read before main()
|
||||
if(APPLE)
|
||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
||||
-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
|
||||
elseif(UNIX)
|
||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
||||
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_cubin}
|
||||
COMMAND ${CUBIN_CC_ENV}
|
||||
"$<TARGET_FILE:cycles_cubin_cc>"
|
||||
-target ${CUDA_ARCH}
|
||||
-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
${cuda_flags}
|
||||
-v
|
||||
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
|
||||
DEPENDS ${sources} cycles_cubin_cc)
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_cubin}
|
||||
COMMAND ${CUDA_NVCC_EXECUTABLE}
|
||||
-arch=${arch}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
--cubin
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
--ptxas-options="-v"
|
||||
${cuda_flags}
|
||||
DEPENDS ${sources})
|
||||
endif()
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND cuda_cubins ${cuda_cubin})
|
||||
|
||||
unset(cuda_extra_flags)
|
||||
unset(cuda_debug_flags)
|
||||
|
||||
unset(cuda_nvcc_command)
|
||||
unset(cuda_nvcc_version)
|
||||
endmacro()
|
||||
|
||||
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
|
||||
@ -412,12 +424,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
message(STATUS "CUDA binaries for ${arch} disabled, not supported by CUDA 9.")
|
||||
else()
|
||||
# Compile regular kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE)
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} filter "" "${cuda_filter_sources}" FALSE)
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE)
|
||||
|
||||
if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
|
||||
# Compile split kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D__SPLIT__" ${cuda_sources} FALSE)
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D __SPLIT__" ${cuda_sources} FALSE)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
@ -30,10 +30,22 @@
|
||||
# define __NODES_FEATURES__ NODE_FEATURE_ALL
|
||||
#endif
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_fp16.h>
|
||||
#include <float.h>
|
||||
#include <stdint.h>
|
||||
/* Manual definitions so we can compile without CUDA toolkit. */
|
||||
|
||||
typedef unsigned int uint32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef unsigned short half;
|
||||
typedef unsigned long long CUtexObject;
|
||||
|
||||
#define FLT_MAX 1.175494350822287507969e-38f
|
||||
#define FLT_MIN 340282346638528859811704183484516925440.0f
|
||||
|
||||
__device__ half __float2half(const float f)
|
||||
{
|
||||
half val;
|
||||
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
|
||||
return val;
|
||||
}
|
||||
|
||||
/* Qualifier wrappers for different names on different devices */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user