forked from bartvdbraak/blender
Cycles
* Compile all of cycles with -ffast-math again * Add scons compilation of cuda binaries, tested on mac/linux. * Add UI option for supported/experimental features, to make it more clear what is supported, opencl/subdivision is experimental. * Remove cycles xml exporter, was just for testing.
This commit is contained in:
parent
f6f7e270e3
commit
45de380771
@ -582,6 +582,14 @@ if env['OURPLATFORM']!='darwin':
|
||||
source=['intern/cycles/doc/license/'+s for s in source]
|
||||
scriptinstall.append(env.Install(dir=dir,source=source))
|
||||
|
||||
# cuda binaries
|
||||
if env['WITH_BF_CYCLES_CUDA_BINARIES']:
|
||||
dir=os.path.join(env['BF_INSTALLDIR'], VERSION, 'scripts', 'addons','cycles', 'lib')
|
||||
for arch in env['BF_CYCLES_CUDA_BINARIES_ARCH']:
|
||||
kernel_build_dir = os.path.join(B.root_build_dir, 'intern/cycles/kernel')
|
||||
cubin_file = os.path.join(kernel_build_dir, "kernel_%s.cubin" % arch)
|
||||
scriptinstall.append(env.Install(dir=dir,source=cubin_file))
|
||||
|
||||
if env['WITH_BF_INTERNATIONAL']:
|
||||
internationalpaths=['release' + os.sep + 'datafiles']
|
||||
|
||||
|
@ -283,7 +283,7 @@ BF_PCRE_LIBPATH = '${BF_PCRE}/lib'
|
||||
#BF_EXPAT_LIB = 'expat'
|
||||
#BF_EXPAT_LIBPATH = '/usr/lib'
|
||||
|
||||
#Cycles
|
||||
# Cycles
|
||||
WITH_BF_CYCLES = True
|
||||
|
||||
WITH_BF_OIIO = True
|
||||
@ -298,6 +298,10 @@ BF_BOOST_INC = BF_BOOST + '/include'
|
||||
BF_BOOST_LIB = 'boost_date_time-mt boost_filesystem-mt boost_regex-mt boost_system-mt boost_thread-mt'
|
||||
BF_BOOST_LIBPATH = BF_BOOST + '/lib'
|
||||
|
||||
WITH_BF_CYCLES_CUDA_BINARIES = False
|
||||
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21']
|
||||
|
||||
#Ray trace optimization
|
||||
if MACOSX_ARCHITECTURE == 'x86_64' or MACOSX_ARCHITECTURE == 'i386':
|
||||
WITH_BF_RAYOPTIMIZATION = True
|
||||
|
@ -219,6 +219,10 @@ BF_BOOST_LIBPATH = BF_BOOST + '/lib'
|
||||
|
||||
WITH_BF_CYCLES = WITH_BF_OIIO and WITH_BF_BOOST
|
||||
|
||||
WITH_BF_CYCLES_CUDA_BINARIES = False
|
||||
BF_CYCLES_CUDA_NVCC = '/usr/local/cuda/bin/nvcc'
|
||||
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21']
|
||||
|
||||
WITH_BF_OPENMP = True
|
||||
|
||||
#Ray trace optimization
|
||||
|
@ -588,6 +588,8 @@ def AppIt(target=None, source=None, env=None):
|
||||
commands.getoutput(cmd)
|
||||
cmd = 'mkdir %s/kernel' % (cinstalldir)
|
||||
commands.getoutput(cmd)
|
||||
cmd = 'mkdir %s/lib' % (cinstalldir)
|
||||
commands.getoutput(cmd)
|
||||
cmd = 'cp -R %s/blender/addon/*.py %s/' % (croot, cinstalldir)
|
||||
commands.getoutput(cmd)
|
||||
cmd = 'cp -R %s/doc/license %s/license' % (croot, cinstalldir)
|
||||
@ -596,6 +598,8 @@ def AppIt(target=None, source=None, env=None):
|
||||
commands.getoutput(cmd)
|
||||
cmd = 'cp -R %s/kernel/svm %s/util/util_color.h %s/util/util_math.h %s/util/util_transform.h %s/util/util_types.h %s/kernel/' % (croot, croot, croot, croot, croot, cinstalldir)
|
||||
commands.getoutput(cmd)
|
||||
cmd = 'cp -R %s/../intern/cycles/kernel/*.cubin %s/lib/' % (builddir, cinstalldir)
|
||||
commands.getoutput(cmd)
|
||||
|
||||
if env['WITH_OSX_STATICPYTHON']:
|
||||
cmd = 'mkdir %s/%s.app/Contents/MacOS/%s/python/'%(installdir,binary, VERSION)
|
||||
|
@ -157,7 +157,7 @@ def validate_arguments(args, bc):
|
||||
'WITH_BF_JEMALLOC', 'WITH_BF_STATICJEMALLOC', 'BF_JEMALLOC', 'BF_JEMALLOC_INC', 'BF_JEMALLOC_LIBPATH', 'BF_JEMALLOC_LIB', 'BF_JEMALLOC_LIB_STATIC',
|
||||
'BUILDBOT_BRANCH',
|
||||
'WITH_BF_3DMOUSE', 'WITH_BF_STATIC3DMOUSE', 'BF_3DMOUSE', 'BF_3DMOUSE_INC', 'BF_3DMOUSE_LIB', 'BF_3DMOUSE_LIBPATH', 'BF_3DMOUSE_LIB_STATIC',
|
||||
'WITH_BF_CYCLES', 'WITH_BF_CYCLES_BINARIES' 'BF_CYCLES_BINARIES_ARCH',
|
||||
'WITH_BF_CYCLES', 'WITH_BF_CYCLES_CUDA_BINARIES' 'BF_CYCLES_CUDA_NVCC', 'BF_CYCLES_CUDA_NVCC',
|
||||
'WITH_BF_OIIO', 'WITH_BF_STATICOIIO', 'BF_OIIO', 'BF_OIIO_INC', 'BF_OIIO_LIB', 'BF_OIIO_LIB_STATIC', 'BF_OIIO_LIBPATH',
|
||||
'WITH_BF_BOOST', 'WITH_BF_STATICBOOST', 'BF_BOOST', 'BF_BOOST_INC', 'BF_BOOST_LIB', 'BF_BOOST_LIB_STATIC', 'BF_BOOST_LIBPATH'
|
||||
]
|
||||
@ -544,7 +544,9 @@ def read_opts(env, cfg, args):
|
||||
|
||||
localopts.AddVariables(
|
||||
(BoolVariable('WITH_BF_CYCLES', 'Build with the Cycles engine', True)),
|
||||
(BoolVariable('WITH_BF_CYCLES_BINARIES', 'Build with precompiled CUDA binaries', False)),
|
||||
(BoolVariable('WITH_BF_CYCLES_CUDA_BINARIES', 'Build with precompiled CUDA binaries', False)),
|
||||
('BF_CYCLES_CUDA_NVCC', 'CUDA nvcc compiler path', ''),
|
||||
('BF_CYCLES_CUDA_BINARIES_ARCH', 'CUDA architectures to compile binaries for', []),
|
||||
|
||||
(BoolVariable('WITH_BF_OIIO', 'Build with OpenImageIO', False)),
|
||||
(BoolVariable('WITH_BF_STATICOIIO', 'Staticly link to OpenImageIO', False)),
|
||||
|
@ -10,12 +10,14 @@ include(cmake/external_libs.cmake)
|
||||
|
||||
if(WITH_RAYOPTIMIZATION AND SUPPORT_SSE_BUILD)
|
||||
set(WITH_CYCLES_OPTIMIZED_KERNEL ON)
|
||||
endif()
|
||||
|
||||
if(WIN32 AND MSVC)
|
||||
set(CYCLES_OPTIMIZED_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast")
|
||||
elseif(CMAKE_COMPILER_IS_GNUCC)
|
||||
if(WIN32 AND MSVC)
|
||||
set(CYCLES_OPTIMIZED_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /EHsc")
|
||||
elseif(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(CYCLES_OPTIMIZED_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
|
||||
endif()
|
||||
|
||||
# for OSL, not needed yet
|
||||
|
@ -14,6 +14,7 @@ sources.remove(path.join('kernel', 'kernel_optimized.cpp'))
|
||||
|
||||
incs = []
|
||||
defs = []
|
||||
cxxflags = []
|
||||
|
||||
defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {')
|
||||
defs.append('CCL_NAMESPACE_END=}')
|
||||
@ -22,7 +23,7 @@ defs.append('WITH_OPENCL')
|
||||
defs.append('WITH_MULTI')
|
||||
defs.append('WITH_CUDA')
|
||||
|
||||
if env['WITH_BF_CYCLES_BINARIES']:
|
||||
if env['WITH_BF_CYCLES_CUDA_BINARIES']:
|
||||
defs.append('WITH_CUDA_BINARIES')
|
||||
|
||||
incs.extend('. bvh render device kernel kernel/osl kernel/svm util subd'.split())
|
||||
@ -33,12 +34,17 @@ incs.append(cycles['BF_OIIO_INC'])
|
||||
incs.append(cycles['BF_BOOST_INC'])
|
||||
incs.append(cycles['BF_PYTHON_INC'])
|
||||
|
||||
if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'):
|
||||
cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split())
|
||||
else:
|
||||
cxxflags.append('-ffast-math'.split())
|
||||
|
||||
# optimized kernel
|
||||
if env['WITH_BF_RAYOPTIMIZATION']:
|
||||
optim_cxxflags = []
|
||||
|
||||
if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'):
|
||||
optim_cxxflags.append('/Ox /Ot /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast'.split())
|
||||
optim_cxxflags.append('/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split())
|
||||
else:
|
||||
optim_cxxflags.append('-ffast-math -msse -msse2 -msse3'.split())
|
||||
|
||||
@ -48,5 +54,10 @@ if env['WITH_BF_RAYOPTIMIZATION']:
|
||||
cycles_optim = cycles.Clone()
|
||||
cycles_optim.BlenderLib('bf_intern_cycles_optimized', optim_sources, incs, optim_defs, libtype=['intern'], priority=[0], compileflags=[None], cxx_compileflags=optim_cxxflags)
|
||||
|
||||
cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], compileflags=[None])
|
||||
cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], compileflags=[None], cxx_compileflags=cxxflags)
|
||||
|
||||
# cuda kernel binaries
|
||||
if env['WITH_BF_CYCLES_CUDA_BINARIES']:
|
||||
kernel_binaries = SConscript(['kernel/SConscript'])
|
||||
cycles.Depends("device/device_cuda.o", kernel_binaries)
|
||||
|
||||
|
@ -39,7 +39,6 @@ set(ADDON_FILES
|
||||
addon/presets.py
|
||||
addon/properties.py
|
||||
addon/ui.py
|
||||
addon/xml.py
|
||||
)
|
||||
|
||||
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}")
|
||||
|
@ -33,7 +33,7 @@ bl_info = {
|
||||
"category": "Render"}
|
||||
|
||||
import bpy
|
||||
from . import ui, properties, xml, engine, presets
|
||||
from . import ui, properties, engine, presets
|
||||
|
||||
|
||||
class CyclesRender(bpy.types.RenderEngine):
|
||||
@ -77,13 +77,11 @@ class CyclesRender(bpy.types.RenderEngine):
|
||||
def register():
|
||||
properties.register()
|
||||
ui.register()
|
||||
xml.register()
|
||||
presets.register()
|
||||
bpy.utils.register_module(__name__)
|
||||
|
||||
|
||||
def unregister():
|
||||
xml.unregister()
|
||||
ui.unregister()
|
||||
properties.unregister()
|
||||
presets.unregister()
|
||||
|
@ -41,7 +41,12 @@ devices = (
|
||||
|
||||
gpu_type = (
|
||||
("CUDA", "CUDA", "NVidia only"),
|
||||
("OPENCL", "OpenCL (incomplete)", ""),
|
||||
("OPENCL", "OpenCL", ""),
|
||||
)
|
||||
|
||||
feature_set = (
|
||||
("SUPPORTED", "Supported", "Only use finished and supported features"),
|
||||
("EXPERIMENTAL", "Experimental", "Use experimental and incomplete features that might be broken or change in the future"),
|
||||
)
|
||||
|
||||
shading_systems = (
|
||||
|
@ -41,6 +41,9 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
||||
cls.gpu_type = EnumProperty(name="GPU Type", description="Processing system to use on the GPU",
|
||||
items=enums.gpu_type, default="CUDA")
|
||||
|
||||
cls.feature_set = EnumProperty(name="Feature Set", description="Feature set to use for rendering",
|
||||
items=enums.feature_set, default="SUPPORTED")
|
||||
|
||||
cls.shading_system = EnumProperty(name="Shading System", description="Shading system to use for rendering",
|
||||
items=enums.shading_systems, default="GPU_COMPATIBLE")
|
||||
|
||||
|
@ -294,7 +294,12 @@ class Cycles_PT_mesh_displacement(CyclesButtonsPanel, Panel):
|
||||
|
||||
@classmethod
|
||||
def poll(cls, context):
|
||||
return CyclesButtonsPanel.poll(context) and (context.mesh or context.curve or context.meta_ball)
|
||||
if CyclesButtonsPanel.poll(context):
|
||||
if context.mesh or context.curve or context.meta_ball:
|
||||
if context.scene.cycles.feature_set == 'EXPERIMENTAL':
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def draw(self, context):
|
||||
layout = self.layout
|
||||
@ -706,18 +711,20 @@ def draw_device(self, context):
|
||||
if scene.render.engine == "CYCLES":
|
||||
cscene = scene.cycles
|
||||
|
||||
layout.prop(cscene, "feature_set")
|
||||
experimental = cscene.feature_set == 'EXPERIMENTAL'
|
||||
|
||||
available_devices = engine.available_devices()
|
||||
available_cuda = 'cuda' in available_devices
|
||||
available_opencl = 'opencl' in available_devices
|
||||
available_opencl = experimental and 'opencl' in available_devices
|
||||
|
||||
if available_cuda or available_opencl:
|
||||
layout.prop(cscene, "device")
|
||||
if cscene.device == 'GPU' and available_cuda and available_opencl:
|
||||
layout.prop(cscene, "gpu_type")
|
||||
if cscene.device == 'CPU' and engine.with_osl():
|
||||
if experimental and cscene.device == 'CPU' and engine.with_osl():
|
||||
layout.prop(cscene, "shading_system")
|
||||
|
||||
|
||||
def draw_pause(self, context):
|
||||
layout = self.layout
|
||||
scene = context.scene
|
||||
|
@ -1,105 +0,0 @@
|
||||
#
|
||||
# Copyright 2011, Blender Foundation.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
|
||||
# <pep8 compliant>
|
||||
|
||||
# XML exporter for generating test files, not intended for end users
|
||||
|
||||
import os
|
||||
import bpy
|
||||
from bpy_extras.io_utils import ExportHelper
|
||||
import xml.etree.ElementTree as etree
|
||||
import xml.dom.minidom as dom
|
||||
|
||||
|
||||
def strip(root):
|
||||
root.text = None
|
||||
root.tail = None
|
||||
|
||||
for elem in root:
|
||||
strip(elem)
|
||||
|
||||
|
||||
def write(node, fname):
|
||||
strip(node)
|
||||
|
||||
s = etree.tostring(node)
|
||||
s = dom.parseString(s).toprettyxml()
|
||||
|
||||
f = open(fname, "w")
|
||||
f.write(s)
|
||||
|
||||
|
||||
class ExportCyclesXML(bpy.types.Operator, ExportHelper):
|
||||
''''''
|
||||
bl_idname = "export_mesh.cycles_xml"
|
||||
bl_label = "Export Cycles XML"
|
||||
|
||||
filename_ext = ".xml"
|
||||
|
||||
@classmethod
|
||||
def poll(cls, context):
|
||||
return context.active_object != None
|
||||
|
||||
def execute(self, context):
|
||||
filepath = bpy.path.ensure_ext(self.filepath, ".xml")
|
||||
|
||||
# get mesh
|
||||
scene = context.scene
|
||||
obj = context.object
|
||||
|
||||
if not obj:
|
||||
raise Exception("No active object")
|
||||
|
||||
mesh = obj.to_mesh(scene, True, 'PREVIEW')
|
||||
|
||||
if not mesh:
|
||||
raise Exception("No mesh data in active object")
|
||||
|
||||
# generate mesh node
|
||||
nverts = ""
|
||||
verts = ""
|
||||
P = ""
|
||||
|
||||
for v in mesh.vertices:
|
||||
P += "%f %f %f " % (v.co[0], v.co[1], v.co[2])
|
||||
|
||||
for i, f in enumerate(mesh.faces):
|
||||
nverts += str(len(f.vertices)) + " "
|
||||
|
||||
for v in f.vertices:
|
||||
verts += str(v) + " "
|
||||
verts += " "
|
||||
|
||||
node = etree.Element('mesh', attrib={'nverts': nverts, 'verts': verts, 'P': P})
|
||||
|
||||
# write to file
|
||||
write(node, filepath)
|
||||
|
||||
return {'FINISHED'}
|
||||
|
||||
|
||||
def register():
|
||||
pass
|
||||
|
||||
|
||||
def unregister():
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
register()
|
@ -281,7 +281,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated)
|
||||
mesh->name = ustring(b_ob_data.name().c_str());
|
||||
|
||||
if(b_mesh) {
|
||||
if(cmesh.data && RNA_boolean_get(&cmesh, "use_subdivision"))
|
||||
if(cmesh.data && experimental && RNA_boolean_get(&cmesh, "use_subdivision"))
|
||||
create_subd_mesh(mesh, b_mesh, &cmesh, used_shaders);
|
||||
else
|
||||
create_mesh(scene, mesh, b_mesh, used_shaders);
|
||||
@ -294,7 +294,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated)
|
||||
if(cmesh.data) {
|
||||
int method = RNA_enum_get(&cmesh, "displacement_method");
|
||||
|
||||
if(method == 0)
|
||||
if(method == 0 || !experimental)
|
||||
mesh->displacement_method = Mesh::DISPLACE_BUMP;
|
||||
else if(method == 1)
|
||||
mesh->displacement_method = Mesh::DISPLACE_TRUE;
|
||||
|
@ -47,7 +47,8 @@ BlenderSync::BlenderSync(BL::BlendData b_data_, BL::Scene b_scene_, Scene *scene
|
||||
mesh_map(&scene_->meshes),
|
||||
light_map(&scene_->lights),
|
||||
world_map(NULL),
|
||||
world_recalc(false)
|
||||
world_recalc(false),
|
||||
experimental(false)
|
||||
{
|
||||
scene = scene_;
|
||||
preview = preview_;
|
||||
@ -134,6 +135,8 @@ void BlenderSync::sync_integrator()
|
||||
{
|
||||
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
|
||||
|
||||
experimental = (RNA_enum_get(&cscene, "feature_set") != 0);
|
||||
|
||||
Integrator *integrator = scene->integrator;
|
||||
Integrator previntegrator = *integrator;
|
||||
|
||||
@ -253,6 +256,9 @@ SessionParams BlenderSync::get_session_params(BL::Scene b_scene, bool background
|
||||
SessionParams params;
|
||||
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
|
||||
|
||||
/* feature set */
|
||||
bool experimental = (RNA_enum_get(&cscene, "feature_set") != 0);
|
||||
|
||||
/* device type */
|
||||
params.device_type = DEVICE_CPU;
|
||||
|
||||
@ -262,7 +268,7 @@ SessionParams BlenderSync::get_session_params(BL::Scene b_scene, bool background
|
||||
|
||||
if(device_type_available(types, dtype))
|
||||
params.device_type = dtype;
|
||||
else if(device_type_available(types, DEVICE_OPENCL))
|
||||
else if(experimental && device_type_available(types, DEVICE_OPENCL))
|
||||
params.device_type = DEVICE_OPENCL;
|
||||
else if(device_type_available(types, DEVICE_CUDA))
|
||||
params.device_type = DEVICE_CUDA;
|
||||
|
@ -100,6 +100,7 @@ private:
|
||||
|
||||
Scene *scene;
|
||||
bool preview;
|
||||
bool experimental;
|
||||
|
||||
struct RenderLayerInfo {
|
||||
RenderLayerInfo()
|
||||
|
@ -237,6 +237,9 @@ public:
|
||||
return cubin;
|
||||
|
||||
#ifdef WITH_CUDA_BINARIES
|
||||
if(major <= 1 && minor <= 2)
|
||||
cuda_error(string_printf("CUDA device supported only with shader model 1.3 or up, found %d.%d.", major, minor));
|
||||
else
|
||||
cuda_error("CUDA binary kernel for this graphics card not found.");
|
||||
return "";
|
||||
#else
|
||||
|
50
intern/cycles/kernel/SConscript
Normal file
50
intern/cycles/kernel/SConscript
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import os
|
||||
|
||||
def normpath(path):
|
||||
return os.path.abspath(os.path.normpath(path))
|
||||
|
||||
Import ('env')
|
||||
|
||||
kernel_binaries = []
|
||||
|
||||
if env['WITH_BF_CYCLES_CUDA_BINARIES']:
|
||||
kernel = env.Clone()
|
||||
|
||||
# cuda info
|
||||
nvcc = env['BF_CYCLES_CUDA_NVCC']
|
||||
cuda_archs = env['BF_CYCLES_CUDA_BINARIES_ARCH']
|
||||
|
||||
# build directory
|
||||
root_build_dir = normpath(env['BF_BUILDDIR'])
|
||||
build_dir = os.path.join(root_build_dir, 'intern/cycles/kernel')
|
||||
|
||||
# source directories and files
|
||||
source_dir = Dir('.').srcnode().path
|
||||
kernel_file = os.path.join(source_dir, "kernel.cu")
|
||||
util_dir = os.path.join(source_dir, "../util")
|
||||
svm_dir = os.path.join(source_dir, "../svm")
|
||||
|
||||
# nvcc flags
|
||||
nvcc_flags = "--cubin -use_fast_math --ptxas-options=\"-v\" --maxrregcount=24"
|
||||
nvcc_flags += " --opencc-options -OPT:Olimit=0"
|
||||
nvcc_flags += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC"
|
||||
nvcc_flags += " -I \"%s\" -I \"%s\"" % (util_dir, svm_dir)
|
||||
|
||||
# dependencies
|
||||
dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h')
|
||||
|
||||
# add command for each cuda architecture
|
||||
for arch in cuda_archs:
|
||||
cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
|
||||
|
||||
command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file)
|
||||
|
||||
kernel.Command(cubin_file, 'kernel.cu', command)
|
||||
kernel.Depends(cubin_file, dependencies)
|
||||
|
||||
kernel_binaries.append(cubin_file)
|
||||
|
||||
Return('kernel_binaries')
|
||||
|
Loading…
Reference in New Issue
Block a user