Cycles: Remove few function arguments needed only for the split kernel

Use KernelGlobals to access all the global arrays for the intermediate
storage instead of passing all this storage things explicitly.

Tested here with Intel OpenCL, NVIDIA GTX580 and AMD Fiji, didn't see
any artifacts, so guess it's all good.

Reviewers: juicyfruit, dingto, lukasstockner97

Differential Revision: https://developer.blender.org/D1736
This commit is contained in:
Sergey Sharybin 2016-01-14 19:58:22 +05:00
parent 53ef03d20f
commit e2161ca854
14 changed files with 62 additions and 87 deletions

@ -1968,8 +1968,7 @@ public:
cl_mem AOAlpha_coop;
cl_mem AOBSDF_coop;
cl_mem AOLightRay_coop;
cl_mem Intersection_coop_AO;
cl_mem Intersection_coop_DL;
cl_mem Intersection_coop_shadow;
#ifdef WITH_CYCLES_DEBUG
/* DebugData memory */
@ -2133,8 +2132,7 @@ public:
BSDFEval_coop = NULL;
ISLamp_coop = NULL;
LightRay_coop = NULL;
Intersection_coop_AO = NULL;
Intersection_coop_DL = NULL;
Intersection_coop_shadow = NULL;
#ifdef WITH_CYCLES_DEBUG
debugdata_coop = NULL;
@ -2259,6 +2257,8 @@ public:
ccl_global type *name;
#include "kernel_textures.h"
#undef KERNEL_TEX
void *sd_input;
void *isect_shadow;
} KernelGlobals;
return sizeof(KernelGlobals);
@ -2475,8 +2475,7 @@ public:
release_mem_object_safe(BSDFEval_coop);
release_mem_object_safe(ISLamp_coop);
release_mem_object_safe(LightRay_coop);
release_mem_object_safe(Intersection_coop_AO);
release_mem_object_safe(Intersection_coop_DL);
release_mem_object_safe(Intersection_coop_shadow);
#ifdef WITH_CYCLES_DEBUG
release_mem_object_safe(debugdata_coop);
#endif
@ -2672,8 +2671,7 @@ public:
BSDFEval_coop = mem_alloc(num_global_elements * sizeof(BsdfEval));
ISLamp_coop = mem_alloc(num_global_elements * sizeof(int));
LightRay_coop = mem_alloc(num_global_elements * sizeof(Ray));
Intersection_coop_AO = mem_alloc(num_global_elements * sizeof(Intersection));
Intersection_coop_DL = mem_alloc(num_global_elements * sizeof(Intersection));
Intersection_coop_shadow = mem_alloc(2 * num_global_elements * sizeof(Intersection));
#ifdef WITH_CYCLES_DEBUG
debugdata_coop = mem_alloc(num_global_elements * sizeof(DebugData));
@ -2779,6 +2777,7 @@ public:
PathRadiance_coop,
Ray_coop,
PathState_coop,
Intersection_coop_shadow,
ray_state);
/* TODO(sergey): Avoid map lookup here. */
@ -2838,7 +2837,6 @@ public:
0,
kgbuffer,
d_data,
sd,
throughput_coop,
PathRadiance_coop,
Ray_coop,
@ -2864,7 +2862,6 @@ public:
0,
kgbuffer,
d_data,
sd,
per_sample_output_buffers,
d_rng_state,
rng_coop,
@ -2946,7 +2943,6 @@ public:
kgbuffer,
d_data,
sd,
sd_DL_shadow,
rng_coop,
PathState_coop,
ISLamp_coop,
@ -2965,8 +2961,6 @@ public:
PathState_coop,
LightRay_coop,
AOLightRay_coop,
Intersection_coop_AO,
Intersection_coop_DL,
ray_state,
Queue_data,
Queue_index,

@ -18,15 +18,16 @@ CCL_NAMESPACE_BEGIN
/* Direction Emission */
ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
LightSample *ls, ccl_addr_space PathState *state, float3 I, differential3 dI, float t, float time
#ifdef __SPLIT_KERNEL__
,ShaderData *sd_input
#endif
)
LightSample *ls,
ccl_addr_space PathState *state,
float3 I,
differential3 dI,
float t,
float time)
{
/* setup shading at emitter */
#ifdef __SPLIT_KERNEL__
ShaderData *sd = sd_input;
ShaderData *sd = kg->sd_input;
#else
ShaderData sd_object;
ShaderData *sd = &sd_object;
@ -76,12 +77,13 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
return eval;
}
ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
LightSample *ls, ccl_addr_space PathState *state, Ray *ray, BsdfEval *eval, bool *is_lamp
#ifdef __SPLIT_KERNEL__
, ShaderData *sd_DL
#endif
)
ccl_device_noinline bool direct_emission(KernelGlobals *kg,
ShaderData *sd,
LightSample *ls,
ccl_addr_space PathState *state,
Ray *ray,
BsdfEval *eval,
bool *is_lamp)
{
if(ls->pdf == 0.0f)
return false;
@ -91,11 +93,13 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd,
/* evaluate closure */
float3 light_eval = direct_emissive_eval(kg, ls, state, -ls->D, dD, ls->t, ccl_fetch(sd, time)
#ifdef __SPLIT_KERNEL__
,sd_DL
#endif
);
float3 light_eval = direct_emissive_eval(kg,
ls,
state,
-ls->D,
dD,
ls->t,
ccl_fetch(sd, time));
if(is_zero(light_eval))
return false;
@ -193,11 +197,10 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
/* Indirect Lamp Emission */
ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, ccl_addr_space PathState *state, Ray *ray, float3 *emission
#ifdef __SPLIT_KERNEL__
,ShaderData *sd
#endif
)
ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
float3 *emission)
{
bool hit_lamp = false;
@ -221,11 +224,13 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, ccl_addr_spac
}
#endif
float3 L = direct_emissive_eval(kg, &ls, state, -ray->D, ray->dD, ls.t, ray->time
#ifdef __SPLIT_KERNEL__
,sd
#endif
);
float3 L = direct_emissive_eval(kg,
&ls,
state,
-ray->D,
ray->dD,
ls.t,
ray->time);
#ifdef __VOLUME__
if(state->volume_stack[0].shader != SHADER_NONE) {
@ -254,11 +259,9 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, ccl_addr_spac
/* Indirect Background */
ccl_device_noinline float3 indirect_background(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space Ray *ray
#ifdef __SPLIT_KERNEL__
,ShaderData *sd_global
#endif
)
ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray)
{
#ifdef __BACKGROUND__
int shader = kernel_data.background.surface_shader;
@ -274,13 +277,13 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, ccl_addr_space
return make_float3(0.0f, 0.0f, 0.0f);
}
#ifdef __SPLIT_KERNEL__
/* evaluate background closure */
#ifdef __SPLIT_KERNEL__
Ray priv_ray = *ray;
shader_setup_from_background(kg, sd_global, &priv_ray);
shader_setup_from_background(kg, kg->sd_input, &priv_ray);
path_state_modify_bounce(state, true);
float3 L = shader_eval_background(kg, sd_global, state, state->flag, SHADER_CONTEXT_EMISSION);
float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
#else
ShaderData sd;

@ -86,6 +86,11 @@ typedef ccl_addr_space struct KernelGlobals {
#define KERNEL_TEX(type, ttype, name) \
ccl_global type *name;
#include "kernel_textures.h"
#ifdef __SPLIT_KERNEL__
ShaderData *sd_input;
Intersection *isect_shadow;
#endif
} KernelGlobals;
#endif

@ -186,11 +186,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray_input,
float3 *shadow
#ifdef __SPLIT_KERNEL__
, ShaderData *sd_mem, Intersection *isect_mem
#endif
)
float3 *shadow)
{
*shadow = make_float3(1.0f, 1.0f, 1.0f);
@ -205,7 +201,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#endif
#ifdef __SPLIT_KERNEL__
Intersection *isect = isect_mem;
Intersection *isect = &kg->isect_shadow[TIDX];
#else
Intersection isect_object;
Intersection *isect = &isect_object;
@ -254,7 +250,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
/* setup shader data at surface */
#ifdef __SPLIT_KERNEL__
ShaderData *sd = sd_mem;
ShaderData *sd = kg->sd_input;
#else
ShaderData sd_object;
ShaderData *sd = &sd_object;

@ -19,7 +19,6 @@
__kernel void kernel_ocl_path_trace_background_buffer_update(
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *sd,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
ccl_global uint *rng_coop, /* Required for buffer Update */
@ -84,7 +83,6 @@ __kernel void kernel_ocl_path_trace_background_buffer_update(
#endif
enqueue_flag =
kernel_background_buffer_update((KernelGlobals *)kg,
(ShaderData *)sd,
per_sample_output_buffers,
rng_state,
rng_coop,

@ -111,6 +111,7 @@ __kernel void kernel_ocl_path_trace_data_init(
PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */
ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */
ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */
Intersection *Intersection_coop_shadow,
ccl_global char *ray_state, /* Stores information on current state of a ray */
#define KERNEL_TEX(type, ttype, name) \
@ -206,6 +207,7 @@ __kernel void kernel_ocl_path_trace_data_init(
PathRadiance_coop,
Ray_coop,
PathState_coop,
Intersection_coop_shadow,
ray_state,
#define KERNEL_TEX(type, ttype, name) name,

@ -20,7 +20,6 @@ __kernel void kernel_ocl_path_trace_direct_lighting(
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *sd, /* Required for direct lighting */
ccl_global char *sd_DL, /* Required for direct lighting */
ccl_global uint *rng_coop, /* Required for direct lighting */
ccl_global PathState *PathState_coop, /* Required for direct lighting */
ccl_global int *ISLamp_coop, /* Required for direct lighting */
@ -63,7 +62,6 @@ __kernel void kernel_ocl_path_trace_direct_lighting(
#endif
enqueue_flag = kernel_direct_lighting((KernelGlobals *)kg,
(ShaderData *)sd,
(ShaderData *)sd_DL,
rng_coop,
PathState_coop,
ISLamp_coop,

@ -19,7 +19,6 @@
__kernel void kernel_ocl_path_trace_lamp_emission(
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *sd, /* Required for lamp emission */
ccl_global float3 *throughput_coop, /* Required for lamp emission */
PathRadiance *PathRadiance_coop, /* Required for lamp emission */
ccl_global Ray *Ray_coop, /* Required for lamp emission */
@ -69,7 +68,6 @@ __kernel void kernel_ocl_path_trace_lamp_emission(
}
kernel_lamp_emission((KernelGlobals *)kg,
(ShaderData *)sd,
throughput_coop,
PathRadiance_coop,
Ray_coop,

@ -23,8 +23,6 @@ __kernel void kernel_ocl_path_trace_shadow_blocked(
ccl_global PathState *PathState_coop, /* Required for shadow blocked */
ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */
ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */
Intersection *Intersection_coop_AO,
Intersection *Intersection_coop_DL,
ccl_global char *ray_state,
ccl_global int *Queue_data, /* Queue memory */
ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
@ -73,8 +71,6 @@ __kernel void kernel_ocl_path_trace_shadow_blocked(
PathState_coop,
LightRay_dl_coop,
LightRay_ao_coop,
Intersection_coop_AO,
Intersection_coop_DL,
ray_state,
total_num_rays,
shadow_blocked_type,

@ -71,7 +71,6 @@
*/
ccl_device char kernel_background_buffer_update(
KernelGlobals *kg,
ShaderData *sd,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
ccl_global uint *rng_coop, /* Required for buffer Update */
@ -158,7 +157,7 @@ ccl_device char kernel_background_buffer_update(
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, state, ray, sd);
float3 L_background = indirect_background(kg, state, ray);
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);

@ -145,6 +145,7 @@ ccl_device void kernel_data_init(
PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */
ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */
ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */
Intersection *Intersection_coop_shadow,
ccl_global char *ray_state, /* Stores information on current state of a ray */
#define KERNEL_TEX(type, ttype, name) \
@ -170,6 +171,8 @@ ccl_device void kernel_data_init(
int parallel_samples) /* Number of samples to be processed in parallel */
{
kg->data = data;
kg->sd_input = sd_DL_shadow;
kg->isect_shadow = Intersection_coop_shadow;
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
#include "../kernel_textures.h"

@ -36,7 +36,6 @@
* kg (globals) -------------------------------------| |
* queuesize ----------------------------------------| |
*
* note on sd_DL : sd_DL is neither input nor output to this kernel; sd_DL is filled and consumed in this kernel itself.
* Note on Queues :
* This kernel only reads from the QUEUE_ACTIVE_AND_REGENERATED_RAYS queue and processes
* only the rays of state RAY_ACTIVE; If a ray needs to execute the corresponding shadow_blocked
@ -51,7 +50,6 @@
ccl_device char kernel_direct_lighting(
KernelGlobals *kg,
ShaderData *sd, /* Required for direct lighting */
ShaderData *sd_DL, /* Required for direct lighting */
ccl_global uint *rng_coop, /* Required for direct lighting */
ccl_global PathState *PathState_coop, /* Required for direct lighting */
ccl_global int *ISLamp_coop, /* Required for direct lighting */
@ -90,9 +88,7 @@ ccl_device char kernel_direct_lighting(
BsdfEval L_light;
bool is_lamp;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp,
sd_DL))
{
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* Write intermediate data to global memory to access from
* the next kernel.
*/

@ -36,12 +36,9 @@
* sw -------------------------------------------------| |
* sh -------------------------------------------------| |
* parallel_samples -----------------------------------| |
*
* note : sd is neither input nor output. Its just filled and consumed in the same, kernel_lamp_emission, kernel.
*/
ccl_device void kernel_lamp_emission(
KernelGlobals *kg,
ShaderData *sd, /* Required for lamp emission */
ccl_global float3 *throughput_coop, /* Required for lamp emission */
PathRadiance *PathRadiance_coop, /* Required for lamp emission */
ccl_global Ray *Ray_coop, /* Required for lamp emission */
@ -79,7 +76,7 @@ ccl_device void kernel_lamp_emission(
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, state, &light_ray, &emission, sd)) {
if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
}

@ -51,8 +51,6 @@ ccl_device void kernel_shadow_blocked(
ccl_global PathState *PathState_coop, /* Required for shadow blocked */
ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */
ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */
Intersection *Intersection_coop_AO,
Intersection *Intersection_coop_DL,
ccl_global char *ray_state,
int total_num_rays,
char shadow_blocked_type,
@ -67,25 +65,17 @@ ccl_device void kernel_shadow_blocked(
ccl_global PathState *state = &PathState_coop[ray_index];
ccl_global Ray *light_ray_dl_global = &LightRay_dl_coop[ray_index];
ccl_global Ray *light_ray_ao_global = &LightRay_ao_coop[ray_index];
Intersection *isect_ao_global = &Intersection_coop_AO[ray_index];
Intersection *isect_dl_global = &Intersection_coop_DL[ray_index];
ccl_global Ray *light_ray_global =
shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO
? light_ray_ao_global
: light_ray_dl_global;
Intersection *isect_global =
shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO
? isect_ao_global
: isect_dl_global;
float3 shadow;
update_path_radiance = !(shadow_blocked(kg,
state,
light_ray_global,
&shadow,
sd_shadow,
isect_global));
&shadow));
/* We use light_ray_global's P and t to store shadow and
* update_path_radiance.