forked from bartvdbraak/blender
fb3f32760d
Now we build 2 .cubins per architecture (e.g. kernel_sm_21.cubin, kernel_experimental_sm_21.cubin). The experimental kernel can be used by switching to the Experimental Feature Set: http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Experimental_Features This enables Subsurface Scattering and Correlated Multi Jitter Sampling on GPU, while keeping the stability and performance of the regular kernel. Differential Revision: https://developer.blender.org/D762 Patch by Sergey and myself. Developer / Builder Note: CUDA Toolkit 6.5 is highly recommended for this, also note that building the experimental kernel requires a lot of system memory (~7-8GB).
118 lines
4.1 KiB
Python
118 lines
4.1 KiB
Python
#!/usr/bin/env python
|
|
#
|
|
# ***** BEGIN GPL LICENSE BLOCK *****
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software Foundation,
|
|
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
#
|
|
# The Original Code is Copyright (C) 2011, Blender Foundation
|
|
# All rights reserved.
|
|
#
|
|
# The Original Code is: all of this file.
|
|
#
|
|
# Contributor(s): Nathan Letwory.
|
|
#
|
|
# ***** END GPL LICENSE BLOCK *****
|
|
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
import Blender as B
|
|
import btools
|
|
|
|
def normpath(path):
|
|
return os.path.abspath(os.path.normpath(path))
|
|
|
|
Import ('env')
|
|
|
|
kernel_binaries = []
|
|
|
|
#Bitness
|
|
if B.bitness == 32:
|
|
bits = 32
|
|
else:
|
|
bits = 64
|
|
|
|
if env['WITH_BF_CYCLES_CUDA_BINARIES']:
|
|
kernel = env.Clone()
|
|
|
|
# cuda info
|
|
nvcc = env['BF_CYCLES_CUDA_NVCC']
|
|
cuda_archs = env['BF_CYCLES_CUDA_BINARIES_ARCH']
|
|
|
|
# build directory
|
|
root_build_dir = normpath(env['BF_BUILDDIR'])
|
|
build_dir = os.path.join(root_build_dir, 'intern/cycles/kernel')
|
|
|
|
# source directories and files
|
|
source_dir = Dir('.').srcnode().path
|
|
kernel_file = os.path.join(source_dir, "kernel.cu")
|
|
util_dir = os.path.join(source_dir, "../util")
|
|
svm_dir = os.path.join(source_dir, "../svm")
|
|
geom_dir = os.path.join(source_dir, "../geom")
|
|
closure_dir = os.path.join(source_dir, "../closure")
|
|
|
|
# get CUDA version
|
|
output = btools.get_command_output([nvcc, "--version"])
|
|
cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0]
|
|
cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1])
|
|
|
|
if cuda_version != 65:
|
|
print("CUDA version %d.%d detected, build may succeed but only CUDA 6.5 is officially supported." % (cuda_version/10, cuda_version%10))
|
|
|
|
# nvcc flags
|
|
nvcc_flags = "-m%s" % (bits)
|
|
nvcc_flags += " --cubin --ptxas-options=\"-v\" --use_fast_math"
|
|
nvcc_flags += " -D__KERNEL_CUDA_VERSION__=%d" % (cuda_version)
|
|
nvcc_flags += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC"
|
|
nvcc_flags += " -I \"%s\" -I \"%s\" -I \"%s\" -I \"%s\"" % (util_dir, svm_dir, geom_dir, closure_dir)
|
|
|
|
# dependencies
|
|
dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h')
|
|
last_cubin_file = None
|
|
|
|
configs = (("kernel_%s.cubin", ''),
|
|
("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__'))
|
|
|
|
# add command for each cuda architecture
|
|
for arch in cuda_archs:
|
|
for config in configs:
|
|
# TODO(sergey): Use dict instead ocouple in order to increase readability?
|
|
name = config[0]
|
|
extra_flags = config[1]
|
|
|
|
cubin_file = os.path.join(build_dir, name % arch)
|
|
current_flags = nvcc_flags + extra_flags
|
|
|
|
if env['BF_CYCLES_CUDA_ENV']:
|
|
MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
|
|
command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, current_flags, kernel_file, cubin_file)
|
|
else:
|
|
command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file)
|
|
|
|
kernel.Command(cubin_file, 'kernel.cu', command)
|
|
kernel.Depends(cubin_file, dependencies)
|
|
|
|
kernel_binaries.append(cubin_file)
|
|
|
|
if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
|
|
# trick to compile one kernel at a time to reduce memory usage
|
|
if last_cubin_file:
|
|
kernel.Depends(cubin_file, last_cubin_file)
|
|
last_cubin_file = cubin_file
|
|
|
|
Return('kernel_binaries')
|
|
|