Merge branch 'master' into blender2.8
This commit is contained in:
commit
b9b88d59dd
@ -404,7 +404,7 @@ option(WITH_CYCLES_CUDA_BINARIES "Build Cycles CUDA binaries" OFF)
|
||||
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
|
||||
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
|
||||
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 CACHE STRING "CUDA architectures to build binaries for")
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_72 sm_75 CACHE STRING "CUDA architectures to build binaries for")
|
||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||
unset(PLATFORM_DEFAULT)
|
||||
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
|
||||
|
@ -72,9 +72,6 @@ if 'cmake' in builder:
|
||||
if builder.endswith('x86_64_10_9_cmake'):
|
||||
cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64')
|
||||
cmake_extra_options.append('-DCMAKE_OSX_DEPLOYMENT_TARGET=10.9')
|
||||
# Used to trick CUDFA to see CLang as an older version.
|
||||
# cmake_extra_options.append('-DCUDA_HOST_COMPILER=/usr/local/cuda-hack/clang')
|
||||
# cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-hack/nvcc')
|
||||
|
||||
elif builder.startswith('win'):
|
||||
if builder.endswith('_vs2017'):
|
||||
@ -119,12 +116,28 @@ if 'cmake' in builder:
|
||||
cmake_extra_options.extend(["-DCMAKE_C_COMPILER=/usr/bin/gcc-7",
|
||||
"-DCMAKE_CXX_COMPILER=/usr/bin/g++-7"])
|
||||
|
||||
# Workaround to build only sm_7x kernels with CUDA 10, until
|
||||
# older kernels work well with this version.
|
||||
if builder.startswith('win'):
|
||||
cmake_extra_options.append('-DCUDA_VERSION=9.1')
|
||||
cmake_extra_options.append('-DCUDA_TOOLKIT_INCLUDE:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1/include')
|
||||
cmake_extra_options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1')
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1/bin/nvcc.exe')
|
||||
cmake_extra_options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/nvcc.exe')
|
||||
cmake_extra_options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0')
|
||||
elif builder.startswith('linux'):
|
||||
cmake_extra_options.append('-DCUDA_VERSION=9.1')
|
||||
cmake_extra_options.append('-DCUDA_TOOLKIT_INCLDUE:PATH=/usr/local/cuda-9.1/include')
|
||||
cmake_extra_options.append('-DCUDA_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-9.1')
|
||||
cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-9.1/bin/nvcc')
|
||||
cmake_extra_options.append('-DCUDA10_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda-10.0/bin/nvcc')
|
||||
cmake_extra_options.append('-DCUDA10_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda-10.0')
|
||||
|
||||
cmake_options.append("-C" + os.path.join(blender_dir, cmake_config_file))
|
||||
|
||||
# Prepare CMake options needed to configure cuda binaries compilation, 64bit only.
|
||||
if bits == 64:
|
||||
cuda_cmake_options.append("-DWITH_CYCLES_CUDA_BINARIES=%s" % ('ON' if build_cubins else 'OFF'))
|
||||
cuda_cmake_options.append("-DCYCLES_CUDA_BINARIES_ARCH=sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70")
|
||||
if build_cubins or 'cuda' in targets:
|
||||
cuda_cmake_options.append("-DCUDA_64_BIT_DEVICE_CODE=ON")
|
||||
|
||||
|
@ -52,7 +52,7 @@ set(WITH_X11_XF86VMODE ON CACHE BOOL "" FORCE)
|
||||
|
||||
set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61 CACHE STRING "" FORCE)
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_72;sm_75 CACHE STRING "" FORCE)
|
||||
|
||||
# platform dependent options
|
||||
if(UNIX AND NOT APPLE)
|
||||
|
6
extern/cuew/src/cuew.c
vendored
6
extern/cuew/src/cuew.c
vendored
@ -619,7 +619,11 @@ static int cuewNvrtcInit(void) {
|
||||
/* Library paths. */
|
||||
#ifdef _WIN32
|
||||
/* Expected in c:/windows/system or similar, no path needed. */
|
||||
const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL};
|
||||
const char *nvrtc_paths[] = {"nvrtc64_80.dll",
|
||||
"nvrtc64_90.dll",
|
||||
"nvrtc64_91.dll",
|
||||
"nvrtc64_10_0.dll",
|
||||
NULL};
|
||||
#elif defined(__APPLE__)
|
||||
/* Default installation path. */
|
||||
const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
|
||||
|
@ -315,6 +315,13 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
|
||||
if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER)
|
||||
if(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0)
|
||||
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
|
||||
set(WITH_CYCLES_CUBIN_COMPILER OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Subdirectories
|
||||
|
||||
|
@ -343,11 +343,11 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
|
||||
|
||||
# warn for other versions
|
||||
if(CUDA_VERSION MATCHES "90" OR CUDA_VERSION MATCHES "91")
|
||||
if(CUDA_VERSION MATCHES "90" OR CUDA_VERSION MATCHES "91" OR CUDA_VERSION MATCHES "100")
|
||||
else()
|
||||
message(WARNING
|
||||
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
|
||||
"build may succeed but only CUDA 9.0 and 9.1 are officially supported")
|
||||
"build may succeed but only CUDA 9.0, 9.1 and 10.0 are officially supported")
|
||||
endif()
|
||||
|
||||
# build for each arch
|
||||
@ -397,17 +397,29 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
||||
# Workaround to build only sm_7x kernels with CUDA 10, until
|
||||
# older kernels work well with this version.
|
||||
if(DEFINED CUDA10_NVCC_EXECUTABLE AND (${arch} MATCHES "sm_7."))
|
||||
set(with_cubin_compiler OFF)
|
||||
set(cuda_nvcc_executable "${CUDA10_NVCC_EXECUTABLE}")
|
||||
set(cuda_toolkit_root_dir "${CUDA10_TOOLKIT_ROOT_DIR}")
|
||||
else()
|
||||
set(with_cubin_compiler ${WITH_CYCLES_CUBIN_COMPILER})
|
||||
set(cuda_nvcc_executable "${CUDA_NVCC_EXECUTABLE}")
|
||||
set(cuda_toolkit_root_dir "${CUDA_TOOLKIT_ROOT_DIR}")
|
||||
endif()
|
||||
|
||||
if(with_cubin_compiler)
|
||||
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
|
||||
|
||||
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
||||
# cycles_cubin_cc since the env variable is read before main()
|
||||
if(APPLE)
|
||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
||||
-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
|
||||
-E env DYLD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib")
|
||||
elseif(UNIX)
|
||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
||||
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
|
||||
-E env LD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib64")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
@ -418,12 +430,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
${cuda_flags}
|
||||
-v
|
||||
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
|
||||
-cuda-toolkit-dir "${cuda_toolkit_root_dir}"
|
||||
DEPENDS ${kernel_sources} cycles_cubin_cc)
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_cubin}
|
||||
COMMAND ${CUDA_NVCC_EXECUTABLE}
|
||||
COMMAND ${cuda_nvcc_executable}
|
||||
-arch=${arch}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
--cubin
|
||||
@ -442,6 +454,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
|
||||
if(${arch} MATCHES "sm_2.")
|
||||
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
|
||||
elseif(${arch} MATCHES "sm_7." AND NOT (${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0))
|
||||
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
|
||||
else()
|
||||
# Compile regular kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
|
||||
|
@ -52,8 +52,8 @@
|
||||
# define CUDA_KERNEL_MAX_REGISTERS 63
|
||||
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
|
||||
|
||||
/* 5.0, 5.2, 5.3, 6.0, 6.1 */
|
||||
#elif __CUDA_ARCH__ >= 500
|
||||
/* 5.x, 6.x */
|
||||
#elif __CUDA_ARCH__ <= 699
|
||||
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
|
||||
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
|
||||
# define CUDA_BLOCK_MAX_THREADS 1024
|
||||
@ -62,13 +62,25 @@
|
||||
/* tunable parameters */
|
||||
# define CUDA_THREADS_BLOCK_WIDTH 16
|
||||
/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of registers */
|
||||
# if __CUDACC_VER_MAJOR__ == 9 && __CUDA_ARCH__ >= 600
|
||||
# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600
|
||||
# define CUDA_KERNEL_MAX_REGISTERS 64
|
||||
# else
|
||||
# define CUDA_KERNEL_MAX_REGISTERS 48
|
||||
# endif
|
||||
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
|
||||
|
||||
/* 7.x */
|
||||
#elif __CUDA_ARCH__ <= 799
|
||||
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
|
||||
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
|
||||
# define CUDA_BLOCK_MAX_THREADS 1024
|
||||
# define CUDA_THREAD_MAX_REGISTERS 255
|
||||
|
||||
/* tunable parameters */
|
||||
# define CUDA_THREADS_BLOCK_WIDTH 16
|
||||
# define CUDA_KERNEL_MAX_REGISTERS 64
|
||||
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72
|
||||
|
||||
|
||||
/* unknown architecture */
|
||||
#else
|
||||
|
Loading…
Reference in New Issue
Block a user