From 500e0e9a3d5a9e7982d4df31a1477b8732c76e71 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Tue, 2 Aug 2016 15:04:34 +0200 Subject: [PATCH] Cycles: Some more inline policy tweaks for CUDA 8 Makes it so toolkit does exactly the same decision about what to inline, but unfortunately it has really barely visible difference on GTX-980. --- intern/cycles/kernel/closure/bsdf.h | 11 ++++++++- intern/cycles/kernel/kernel_path_branched.h | 14 ++++++------ intern/cycles/kernel/kernel_shader.h | 25 +++++++++++++++------ intern/cycles/kernel/svm/svm_attribute.h | 20 ++++++++++++----- intern/cycles/kernel/svm/svm_wireframe.h | 10 ++++----- 5 files changed, 55 insertions(+), 25 deletions(-) diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 55bdf3ecbb4..86e1a7f317f 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -144,7 +144,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg, return label; } -ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf) +#ifndef __KERNEL_CUDS__ +ccl_device +#else +ccl_device_inline +#endif +float3 bsdf_eval(KernelGlobals *kg, + ShaderData *sd, + const ShaderClosure *sc, + const float3 omega_in, + float *pdf) { float3 eval; diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 56516967d8f..64f1468eacf 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN #ifdef __BRANCHED_PATH__ -ccl_device void kernel_branched_path_ao(KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - PathRadiance *L, - PathState *state, - RNG *rng, - float3 throughput) +ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + PathRadiance *L, + PathState *state, + RNG *rng, + float3 throughput) { int num_samples = kernel_data.integrator.ao_samples; float num_samples_inv = 1.0f/num_samples; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index bb3fe933b2c..98d321c9c16 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -149,7 +149,12 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, /* ShaderData setup from BSSRDF scatter */ #ifdef __SUBSURFACE__ -ccl_device void shader_setup_from_subsurface( +# ifndef __KERNEL_CUDS__ +ccl_device +# else +ccl_device_inline +# endif +void shader_setup_from_subsurface( KernelGlobals *kg, ShaderData *sd, const Intersection *isect, @@ -533,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg, } #endif -ccl_device void shader_bsdf_eval(KernelGlobals *kg, - ShaderData *sd, - const float3 omega_in, - BsdfEval *eval, - float light_pdf, - bool use_mis) + +#ifndef __KERNEL_CUDS__ +ccl_device +#else +ccl_device_inline +#endif +void shader_bsdf_eval(KernelGlobals *kg, + ShaderData *sd, + const float3 omega_in, + BsdfEval *eval, + float light_pdf, + bool use_mis) { bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index ff92920c610..bd6013e9205 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u } } -ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +#ifndef __KERNEL_CUDS__ +ccl_device +#else +ccl_device_noinline +#endif +void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { NodeAttributeType type, mesh_type; AttributeElement elem; @@ -123,10 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float * } } -ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node) +#ifndef __KERNEL_CUDS__ +ccl_device +#else +ccl_device_noinline +#endif +void svm_node_attr_bump_dy(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { NodeAttributeType type, mesh_type; AttributeElement elem; diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 30ccd523add..6eed9bc1a99 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN /* Wireframe Node */ -ccl_device float wireframe(KernelGlobals *kg, - ShaderData *sd, - float size, - int pixel_size, - float3 *P) +ccl_device_inline float wireframe(KernelGlobals *kg, + ShaderData *sd, + float size, + int pixel_size, + float3 *P) { #ifdef __HAIR__ if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)