forked from bartvdbraak/blender
Cycles: Some more inline policy tweaks for CUDA 8
Makes it so toolkit does exactly the same decision about what to inline, but unfortunately it has really barely visible difference on GTX-980.
This commit is contained in:
parent
b416168d85
commit
500e0e9a3d
@ -144,7 +144,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
|
||||
return label;
|
||||
}
|
||||
|
||||
ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
|
||||
#ifndef __KERNEL_CUDS__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
float3 bsdf_eval(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const ShaderClosure *sc,
|
||||
const float3 omega_in,
|
||||
float *pdf)
|
||||
{
|
||||
float3 eval;
|
||||
|
||||
|
@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __BRANCHED_PATH__
|
||||
|
||||
ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
PathRadiance *L,
|
||||
PathState *state,
|
||||
RNG *rng,
|
||||
float3 throughput)
|
||||
ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
PathRadiance *L,
|
||||
PathState *state,
|
||||
RNG *rng,
|
||||
float3 throughput)
|
||||
{
|
||||
int num_samples = kernel_data.integrator.ao_samples;
|
||||
float num_samples_inv = 1.0f/num_samples;
|
||||
|
@ -149,7 +149,12 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
|
||||
/* ShaderData setup from BSSRDF scatter */
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_device void shader_setup_from_subsurface(
|
||||
# ifndef __KERNEL_CUDS__
|
||||
ccl_device
|
||||
# else
|
||||
ccl_device_inline
|
||||
# endif
|
||||
void shader_setup_from_subsurface(
|
||||
KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const Intersection *isect,
|
||||
@ -533,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
|
||||
}
|
||||
#endif
|
||||
|
||||
ccl_device void shader_bsdf_eval(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const float3 omega_in,
|
||||
BsdfEval *eval,
|
||||
float light_pdf,
|
||||
bool use_mis)
|
||||
|
||||
#ifndef __KERNEL_CUDS__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
void shader_bsdf_eval(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const float3 omega_in,
|
||||
BsdfEval *eval,
|
||||
float light_pdf,
|
||||
bool use_mis)
|
||||
{
|
||||
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
|
||||
|
||||
|
@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
|
||||
#ifndef __KERNEL_CUDS__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_noinline
|
||||
#endif
|
||||
void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
|
||||
{
|
||||
NodeAttributeType type, mesh_type;
|
||||
AttributeElement elem;
|
||||
@ -123,10 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float *stack,
|
||||
uint4 node)
|
||||
#ifndef __KERNEL_CUDS__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_noinline
|
||||
#endif
|
||||
void svm_node_attr_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float *stack,
|
||||
uint4 node)
|
||||
{
|
||||
NodeAttributeType type, mesh_type;
|
||||
AttributeElement elem;
|
||||
|
@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Wireframe Node */
|
||||
|
||||
ccl_device float wireframe(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float size,
|
||||
int pixel_size,
|
||||
float3 *P)
|
||||
ccl_device_inline float wireframe(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float size,
|
||||
int pixel_size,
|
||||
float3 *P)
|
||||
{
|
||||
#ifdef __HAIR__
|
||||
if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)
|
||||
|
Loading…
Reference in New Issue
Block a user