Cycles: Some more inline policy tweaks for CUDA 8

Makes it so toolkit does exactly the same decision about what to inline,
but unfortunately it has really barely visible difference on GTX-980.
This commit is contained in:
Sergey Sharybin 2016-08-02 15:04:34 +02:00
parent b416168d85
commit 500e0e9a3d
5 changed files with 55 additions and 25 deletions

@ -144,7 +144,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
return label;
}
ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
#ifndef __KERNEL_CUDS__
ccl_device
#else
ccl_device_inline
#endif
float3 bsdf_eval(KernelGlobals *kg,
ShaderData *sd,
const ShaderClosure *sc,
const float3 omega_in,
float *pdf)
{
float3 eval;

@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
float3 throughput)
ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
float3 throughput)
{
int num_samples = kernel_data.integrator.ao_samples;
float num_samples_inv = 1.0f/num_samples;

@ -149,7 +149,12 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
/* ShaderData setup from BSSRDF scatter */
#ifdef __SUBSURFACE__
ccl_device void shader_setup_from_subsurface(
# ifndef __KERNEL_CUDS__
ccl_device
# else
ccl_device_inline
# endif
void shader_setup_from_subsurface(
KernelGlobals *kg,
ShaderData *sd,
const Intersection *isect,
@ -533,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
}
#endif
ccl_device void shader_bsdf_eval(KernelGlobals *kg,
ShaderData *sd,
const float3 omega_in,
BsdfEval *eval,
float light_pdf,
bool use_mis)
#ifndef __KERNEL_CUDS__
ccl_device
#else
ccl_device_inline
#endif
void shader_bsdf_eval(KernelGlobals *kg,
ShaderData *sd,
const float3 omega_in,
BsdfEval *eval,
float light_pdf,
bool use_mis)
{
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);

@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
}
}
ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
#ifndef __KERNEL_CUDS__
ccl_device
#else
ccl_device_noinline
#endif
void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
NodeAttributeType type, mesh_type;
AttributeElement elem;
@ -123,10 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
}
}
ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg,
ShaderData *sd,
float *stack,
uint4 node)
#ifndef __KERNEL_CUDS__
ccl_device
#else
ccl_device_noinline
#endif
void svm_node_attr_bump_dy(KernelGlobals *kg,
ShaderData *sd,
float *stack,
uint4 node)
{
NodeAttributeType type, mesh_type;
AttributeElement elem;

@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN
/* Wireframe Node */
ccl_device float wireframe(KernelGlobals *kg,
ShaderData *sd,
float size,
int pixel_size,
float3 *P)
ccl_device_inline float wireframe(KernelGlobals *kg,
ShaderData *sd,
float size,
int pixel_size,
float3 *P)
{
#ifdef __HAIR__
if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)