forked from bartvdbraak/blender
Cycles: Some more inline policy tweaks for CUDA 8
Makes it so toolkit does exactly the same decision about what to inline, but unfortunately it has really barely visible difference on GTX-980.
This commit is contained in:
parent
b416168d85
commit
500e0e9a3d
@ -144,7 +144,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
|
|||||||
return label;
|
return label;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
|
#ifndef __KERNEL_CUDS__
|
||||||
|
ccl_device
|
||||||
|
#else
|
||||||
|
ccl_device_inline
|
||||||
|
#endif
|
||||||
|
float3 bsdf_eval(KernelGlobals *kg,
|
||||||
|
ShaderData *sd,
|
||||||
|
const ShaderClosure *sc,
|
||||||
|
const float3 omega_in,
|
||||||
|
float *pdf)
|
||||||
{
|
{
|
||||||
float3 eval;
|
float3 eval;
|
||||||
|
|
||||||
|
@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
#ifdef __BRANCHED_PATH__
|
#ifdef __BRANCHED_PATH__
|
||||||
|
|
||||||
ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
|
ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
|
||||||
ShaderData *sd,
|
ShaderData *sd,
|
||||||
ShaderData *emission_sd,
|
ShaderData *emission_sd,
|
||||||
PathRadiance *L,
|
PathRadiance *L,
|
||||||
PathState *state,
|
PathState *state,
|
||||||
RNG *rng,
|
RNG *rng,
|
||||||
float3 throughput)
|
float3 throughput)
|
||||||
{
|
{
|
||||||
int num_samples = kernel_data.integrator.ao_samples;
|
int num_samples = kernel_data.integrator.ao_samples;
|
||||||
float num_samples_inv = 1.0f/num_samples;
|
float num_samples_inv = 1.0f/num_samples;
|
||||||
|
@ -149,7 +149,12 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
|
|||||||
/* ShaderData setup from BSSRDF scatter */
|
/* ShaderData setup from BSSRDF scatter */
|
||||||
|
|
||||||
#ifdef __SUBSURFACE__
|
#ifdef __SUBSURFACE__
|
||||||
ccl_device void shader_setup_from_subsurface(
|
# ifndef __KERNEL_CUDS__
|
||||||
|
ccl_device
|
||||||
|
# else
|
||||||
|
ccl_device_inline
|
||||||
|
# endif
|
||||||
|
void shader_setup_from_subsurface(
|
||||||
KernelGlobals *kg,
|
KernelGlobals *kg,
|
||||||
ShaderData *sd,
|
ShaderData *sd,
|
||||||
const Intersection *isect,
|
const Intersection *isect,
|
||||||
@ -533,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ccl_device void shader_bsdf_eval(KernelGlobals *kg,
|
|
||||||
ShaderData *sd,
|
#ifndef __KERNEL_CUDS__
|
||||||
const float3 omega_in,
|
ccl_device
|
||||||
BsdfEval *eval,
|
#else
|
||||||
float light_pdf,
|
ccl_device_inline
|
||||||
bool use_mis)
|
#endif
|
||||||
|
void shader_bsdf_eval(KernelGlobals *kg,
|
||||||
|
ShaderData *sd,
|
||||||
|
const float3 omega_in,
|
||||||
|
BsdfEval *eval,
|
||||||
|
float light_pdf,
|
||||||
|
bool use_mis)
|
||||||
{
|
{
|
||||||
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
|
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
|
||||||
|
|
||||||
|
@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
|
#ifndef __KERNEL_CUDS__
|
||||||
|
ccl_device
|
||||||
|
#else
|
||||||
|
ccl_device_noinline
|
||||||
|
#endif
|
||||||
|
void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
|
||||||
{
|
{
|
||||||
NodeAttributeType type, mesh_type;
|
NodeAttributeType type, mesh_type;
|
||||||
AttributeElement elem;
|
AttributeElement elem;
|
||||||
@ -123,10 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg,
|
#ifndef __KERNEL_CUDS__
|
||||||
ShaderData *sd,
|
ccl_device
|
||||||
float *stack,
|
#else
|
||||||
uint4 node)
|
ccl_device_noinline
|
||||||
|
#endif
|
||||||
|
void svm_node_attr_bump_dy(KernelGlobals *kg,
|
||||||
|
ShaderData *sd,
|
||||||
|
float *stack,
|
||||||
|
uint4 node)
|
||||||
{
|
{
|
||||||
NodeAttributeType type, mesh_type;
|
NodeAttributeType type, mesh_type;
|
||||||
AttributeElement elem;
|
AttributeElement elem;
|
||||||
|
@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
/* Wireframe Node */
|
/* Wireframe Node */
|
||||||
|
|
||||||
ccl_device float wireframe(KernelGlobals *kg,
|
ccl_device_inline float wireframe(KernelGlobals *kg,
|
||||||
ShaderData *sd,
|
ShaderData *sd,
|
||||||
float size,
|
float size,
|
||||||
int pixel_size,
|
int pixel_size,
|
||||||
float3 *P)
|
float3 *P)
|
||||||
{
|
{
|
||||||
#ifdef __HAIR__
|
#ifdef __HAIR__
|
||||||
if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)
|
if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)
|
||||||
|
Loading…
Reference in New Issue
Block a user