Cycles: Some more inline policy tweaks for CUDA 8

Makes it so toolkit does exactly the same decision about what to inline,
but unfortunately it has really barely visible difference on GTX-980.
This commit is contained in:
Sergey Sharybin 2016-08-02 15:04:34 +02:00
parent b416168d85
commit 500e0e9a3d
5 changed files with 55 additions and 25 deletions

@ -144,7 +144,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
return label; return label;
} }
ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf) #ifndef __KERNEL_CUDS__
ccl_device
#else
ccl_device_inline
#endif
float3 bsdf_eval(KernelGlobals *kg,
ShaderData *sd,
const ShaderClosure *sc,
const float3 omega_in,
float *pdf)
{ {
float3 eval; float3 eval;

@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__ #ifdef __BRANCHED_PATH__
ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
ShaderData *sd, ShaderData *sd,
ShaderData *emission_sd, ShaderData *emission_sd,
PathRadiance *L, PathRadiance *L,
PathState *state, PathState *state,
RNG *rng, RNG *rng,
float3 throughput) float3 throughput)
{ {
int num_samples = kernel_data.integrator.ao_samples; int num_samples = kernel_data.integrator.ao_samples;
float num_samples_inv = 1.0f/num_samples; float num_samples_inv = 1.0f/num_samples;

@ -149,7 +149,12 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
/* ShaderData setup from BSSRDF scatter */ /* ShaderData setup from BSSRDF scatter */
#ifdef __SUBSURFACE__ #ifdef __SUBSURFACE__
ccl_device void shader_setup_from_subsurface( # ifndef __KERNEL_CUDS__
ccl_device
# else
ccl_device_inline
# endif
void shader_setup_from_subsurface(
KernelGlobals *kg, KernelGlobals *kg,
ShaderData *sd, ShaderData *sd,
const Intersection *isect, const Intersection *isect,
@ -533,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
} }
#endif #endif
ccl_device void shader_bsdf_eval(KernelGlobals *kg,
ShaderData *sd, #ifndef __KERNEL_CUDS__
const float3 omega_in, ccl_device
BsdfEval *eval, #else
float light_pdf, ccl_device_inline
bool use_mis) #endif
void shader_bsdf_eval(KernelGlobals *kg,
ShaderData *sd,
const float3 omega_in,
BsdfEval *eval,
float light_pdf,
bool use_mis)
{ {
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);

@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
} }
} }
ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) #ifndef __KERNEL_CUDS__
ccl_device
#else
ccl_device_noinline
#endif
void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{ {
NodeAttributeType type, mesh_type; NodeAttributeType type, mesh_type;
AttributeElement elem; AttributeElement elem;
@ -123,10 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
} }
} }
ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, #ifndef __KERNEL_CUDS__
ShaderData *sd, ccl_device
float *stack, #else
uint4 node) ccl_device_noinline
#endif
void svm_node_attr_bump_dy(KernelGlobals *kg,
ShaderData *sd,
float *stack,
uint4 node)
{ {
NodeAttributeType type, mesh_type; NodeAttributeType type, mesh_type;
AttributeElement elem; AttributeElement elem;

@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN
/* Wireframe Node */ /* Wireframe Node */
ccl_device float wireframe(KernelGlobals *kg, ccl_device_inline float wireframe(KernelGlobals *kg,
ShaderData *sd, ShaderData *sd,
float size, float size,
int pixel_size, int pixel_size,
float3 *P) float3 *P)
{ {
#ifdef __HAIR__ #ifdef __HAIR__
if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)