Cycles: Code cleanup, split kernel

This commit is contained in:
Sergey Sharybin 2015-05-27 00:13:32 +05:00
parent da192fb3a7
commit 92022218c2
9 changed files with 161 additions and 78 deletions

@ -130,18 +130,24 @@ ccl_device char kernel_background_buffer_update(
#ifdef __WORK_STEALING__ #ifdef __WORK_STEALING__
my_work = work_array[ray_index]; my_work = work_array[ray_index];
sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index); get_pixel_tile_position(&pixel_x, &pixel_y,
&tile_x, &tile_y,
my_work,
sw, sh, sx, sy,
parallel_samples,
ray_index);
my_sample_tile = 0; my_sample_tile = 0;
initial_per_sample_output_buffers = per_sample_output_buffers; initial_per_sample_output_buffers = per_sample_output_buffers;
initial_rng = rng_state; initial_rng = rng_state;
#else // __WORK_STEALING__ #else /* __WORK_STEALING__ */
sample = work_array[ray_index]; sample = work_array[ray_index];
int tile_index = ray_index / parallel_samples; int tile_index = ray_index / parallel_samples;
/* buffer and rng_state's stride is "stride". Find x and y using ray_index */ /* buffer and rng_state's stride is "stride". Find x and y using ray_index */
tile_x = tile_index % sw; tile_x = tile_index % sw;
tile_y = tile_index / sw; tile_y = tile_index / sw;
my_sample_tile = ray_index - (tile_index * parallel_samples); my_sample_tile = ray_index - (tile_index * parallel_samples);
#endif #endif /* __WORK_STEALING__ */
rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride; rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
@ -189,11 +195,12 @@ ccl_device char kernel_background_buffer_update(
/* If work is invalid, this means no more work is available and the thread may exit */ /* If work is invalid, this means no more work is available and the thread may exit */
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
} }
#else #else /* __WORK_STEALING__ */
if((sample + parallel_samples) >= end_sample) { if((sample + parallel_samples) >= end_sample) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
} }
#endif #endif /* __WORK_STEALING__ */
if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
#ifdef __WORK_STEALING__ #ifdef __WORK_STEALING__
work_array[ray_index] = my_work; work_array[ray_index] = my_work;
@ -208,20 +215,22 @@ ccl_device char kernel_background_buffer_update(
/* Remap per_sample_output_buffers according to the current work */ /* Remap per_sample_output_buffers according to the current work */
per_sample_output_buffers = initial_per_sample_output_buffers per_sample_output_buffers = initial_per_sample_output_buffers
+ (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; + (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
#else #else /* __WORK_STEALING__ */
work_array[ray_index] = sample + parallel_samples; work_array[ray_index] = sample + parallel_samples;
sample = work_array[ray_index]; sample = work_array[ray_index];
/* Get ray position from ray index */ /* Get ray position from ray index */
pixel_x = sx + ((ray_index / parallel_samples) % sw); pixel_x = sx + ((ray_index / parallel_samples) % sw);
pixel_y = sy + ((ray_index / parallel_samples) / sw); pixel_y = sy + ((ray_index / parallel_samples) / sw);
#endif #endif /* __WORK_STEALING__ */
/* initialize random numbers and ray */ /* Initialize random numbers and ray. */
kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray); kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
if(ray->t != 0.0f) { if(ray->t != 0.0f) {
/* Initialize throughput, L_transparent, Ray, PathState; These rays proceed with path-iteration*/ /* Initialize throughput, L_transparent, Ray, PathState;
* These rays proceed with path-iteration.
*/
*throughput = make_float3(1.0f, 1.0f, 1.0f); *throughput = make_float3(1.0f, 1.0f, 1.0f);
*L_transparent = 0.0f; *L_transparent = 0.0f;
path_radiance_init(L, kernel_data.film.use_light_pass); path_radiance_init(L, kernel_data.film.use_light_pass);
@ -232,9 +241,9 @@ ccl_device char kernel_background_buffer_update(
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
enqueue_flag = 1; enqueue_flag = 1;
} else { } else {
/*These rays do not participate in path-iteration */ /* These rays do not participate in path-iteration. */
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
/* accumulate result in output buffer */ /* Accumulate result in output buffer. */
kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad); kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
path_rng_end(kg, rng_state, *rng); path_rng_end(kg, rng_state, *rng);

@ -287,9 +287,9 @@ ccl_device void kernel_data_init(
work_pool_wgs[group_index] = 0; work_pool_wgs[group_index] = 0;
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
#endif // __WORK_STEALING__ #endif /* __WORK_STEALING__ */
/* Initialize queue data and queue index */ /* Initialize queue data and queue index. */
if(thread_index < queuesize) { if(thread_index < queuesize) {
/* Initialize active ray queue */ /* Initialize active ray queue */
Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
@ -319,7 +319,9 @@ ccl_device void kernel_data_init(
int ray_index = x + y * (sw * parallel_samples); int ray_index = x + y * (sw * parallel_samples);
/* This is the first assignment to ray_state; So we dont use ASSIGN_RAY_STATE macro */ /* This is the first assignment to ray_state;
* So we dont use ASSIGN_RAY_STATE macro.
*/
ray_state[ray_index] = RAY_ACTIVE; ray_state[ray_index] = RAY_ACTIVE;
unsigned int my_sample; unsigned int my_sample;
@ -331,58 +333,76 @@ ccl_device void kernel_data_init(
#ifdef __WORK_STEALING__ #ifdef __WORK_STEALING__
unsigned int my_work = 0; unsigned int my_work = 0;
/* get work */ /* Get work. */
get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index); get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
/* Get the sample associated with the work */ /* Get the sample associated with the work. */
my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
my_sample_tile = 0; my_sample_tile = 0;
/* Get pixel and tile position associated with the work */ /* Get pixel and tile position associated with the work. */
get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index); get_pixel_tile_position(&pixel_x, &pixel_y,
&tile_x, &tile_y,
my_work,
sw, sh, sx, sy,
parallel_samples,
ray_index);
work_array[ray_index] = my_work; work_array[ray_index] = my_work;
#else // __WORK_STEALING__ #else /* __WORK_STEALING__ */
unsigned int tile_index = ray_index / parallel_samples; unsigned int tile_index = ray_index / parallel_samples;
tile_x = tile_index % sw; tile_x = tile_index % sw;
tile_y = tile_index / sw; tile_y = tile_index / sw;
my_sample_tile = ray_index - (tile_index * parallel_samples); my_sample_tile = ray_index - (tile_index * parallel_samples);
my_sample = my_sample_tile + start_sample; my_sample = my_sample_tile + start_sample;
/* Initialize work array */ /* Initialize work array. */
work_array[ray_index] = my_sample ; work_array[ray_index] = my_sample ;
/* Calculate pixel position of this ray */ /* Calculate pixel position of this ray. */
pixel_x = sx + tile_x; pixel_x = sx + tile_x;
pixel_y = sy + tile_y; pixel_y = sy + tile_y;
#endif // __WORK_STEALING__ #endif /* __WORK_STEALING__ */
rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride; rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
/* Initialise per_sample_output_buffers to all zeros */ /* Initialise per_sample_output_buffers to all zeros. */
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride; per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride;
int per_sample_output_buffers_iterator = 0; int per_sample_output_buffers_iterator = 0;
for(per_sample_output_buffers_iterator = 0; per_sample_output_buffers_iterator < kernel_data.film.pass_stride; per_sample_output_buffers_iterator++) { for(per_sample_output_buffers_iterator = 0;
per_sample_output_buffers_iterator < kernel_data.film.pass_stride;
per_sample_output_buffers_iterator++)
{
per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f; per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f;
} }
/* initialize random numbers and ray */ /* Initialize random numbers and ray. */
kernel_path_trace_setup(kg, rng_state, my_sample, pixel_x, pixel_y, &rng_coop[ray_index], &Ray_coop[ray_index]); kernel_path_trace_setup(kg,
rng_state,
my_sample,
pixel_x, pixel_y,
&rng_coop[ray_index],
&Ray_coop[ray_index]);
if(Ray_coop[ray_index].t != 0.0f) { if(Ray_coop[ray_index].t != 0.0f) {
/* Initialize throuput, L_transparent, Ray, PathState; These rays proceed with path-iteration*/ /* Initialize throuput, L_transparent, Ray, PathState;
* These rays proceed with path-iteration.
*/
throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f); throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
L_transparent_coop[ray_index] = 0.0f; L_transparent_coop[ray_index] = 0.0f;
path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass); path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
path_state_init(kg, &PathState_coop[ray_index], &rng_coop[ray_index], my_sample, &Ray_coop[ray_index]); path_state_init(kg,
&PathState_coop[ray_index],
&rng_coop[ray_index],
my_sample,
&Ray_coop[ray_index]);
#ifdef __KERNEL_DEBUG__ #ifdef __KERNEL_DEBUG__
debug_data_init(&debugdata_coop[ray_index]); debug_data_init(&debugdata_coop[ray_index]);
#endif #endif
} else { } else {
/*These rays do not participate in path-iteration */ /* These rays do not participate in path-iteration. */
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
/* accumulate result in output buffer */ /* Accumulate result in output buffer. */
kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad); kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
path_rng_end(kg, rng_state, rng_coop[ray_index]); path_rng_end(kg, rng_state, rng_coop[ray_index]);
@ -390,7 +410,7 @@ ccl_device void kernel_data_init(
} }
} }
/* Mark rest of the ray-state indices as RAY_INACTIVE */ /* Mark rest of the ray-state indices as RAY_INACTIVE. */
if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) { if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) {
/* First assignment, hence we dont use ASSIGN_RAY_STATE macro */ /* First assignment, hence we dont use ASSIGN_RAY_STATE macro */
ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE; ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE;

@ -63,7 +63,7 @@ ccl_device char kernel_direct_lighting(
{ {
char enqueue_flag = 0; char enqueue_flag = 0;
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
/* Load kernel globals structure and ShaderData structure */ /* Load kernel globals structure and ShaderData structure. */
KernelGlobals *kg = (KernelGlobals *)globals; KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data; ShaderData *sd = (ShaderData *)shader_data;
ShaderData *sd_DL = (ShaderData *)shader_DL; ShaderData *sd_DL = (ShaderData *)shader_DL;
@ -72,15 +72,22 @@ ccl_device char kernel_direct_lighting(
/* direct lighting */ /* direct lighting */
#ifdef __EMISSION__ #ifdef __EMISSION__
if((kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) { if((kernel_data.integrator.use_direct_light &&
/* sample illumination from lights to find path contribution */ (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
{
/* Sample illumination from lights to find path contribution. */
ccl_global RNG* rng = &rng_coop[ray_index]; ccl_global RNG* rng = &rng_coop[ray_index];
float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT); float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
float light_u, light_v; float light_u, light_v;
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
LightSample ls; LightSample ls;
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls); light_sample(kg,
light_t, light_u, light_v,
ccl_fetch(sd, time),
ccl_fetch(sd, P),
state->bounce,
&ls);
Ray light_ray; Ray light_ray;
#ifdef __OBJECT_MOTION__ #ifdef __OBJECT_MOTION__
@ -89,17 +96,21 @@ ccl_device char kernel_direct_lighting(
BsdfEval L_light; BsdfEval L_light;
bool is_lamp; bool is_lamp;
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce, sd_DL)) { if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp,
/* write intermediate data to global memory to access from the next kernel */ state->bounce, state->transparent_bounce, sd_DL))
{
/* Write intermediate data to global memory to access from
* the next kernel.
*/
LightRay_coop[ray_index] = light_ray; LightRay_coop[ray_index] = light_ray;
BSDFEval_coop[ray_index] = L_light; BSDFEval_coop[ray_index] = L_light;
ISLamp_coop[ray_index] = is_lamp; ISLamp_coop[ray_index] = is_lamp;
/// mark ray state for next shadow kernel /* Mark ray state for next shadow kernel. */
ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL); ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
enqueue_flag = 1; enqueue_flag = 1;
} }
} }
#endif #endif /* __EMISSION__ */
} }
return enqueue_flag; return enqueue_flag;
} }

@ -121,21 +121,30 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
#ifdef __WORK_STEALING__ #ifdef __WORK_STEALING__
my_work = work_array[ray_index]; my_work = work_array[ray_index];
sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index); get_pixel_tile_position(&pixel_x, &pixel_y,
&tile_x, &tile_y,
my_work,
sw, sh, sx, sy,
parallel_samples,
ray_index);
my_sample_tile = 0; my_sample_tile = 0;
#else // __WORK_STEALING__ #else /* __WORK_STEALING__ */
sample = work_array[ray_index]; sample = work_array[ray_index];
/* buffer's stride is "stride"; Find x and y using ray_index */ /* Buffer's stride is "stride"; Find x and y using ray_index. */
int tile_index = ray_index / parallel_samples; int tile_index = ray_index / parallel_samples;
tile_x = tile_index % sw; tile_x = tile_index % sw;
tile_y = tile_index / sw; tile_y = tile_index / sw;
my_sample_tile = ray_index - (tile_index * parallel_samples); my_sample_tile = ray_index - (tile_index * parallel_samples);
#endif // __WORK_STEALING__ #endif /* __WORK_STEALING__ */
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; per_sample_output_buffers +=
(((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) *
kernel_data.film.pass_stride;
/* holdout */ /* holdout */
#ifdef __HOLDOUT__ #ifdef __HOLDOUT__
if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state->flag & PATH_RAY_CAMERA)) { if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) &&
(state->flag & PATH_RAY_CAMERA))
{
if(kernel_data.background.transparent) { if(kernel_data.background.transparent) {
float3 holdout_weight; float3 holdout_weight;
@ -153,20 +162,24 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
*enqueue_flag = 1; *enqueue_flag = 1;
} }
} }
#endif #endif /* __HOLDOUT__ */
} }
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
PathRadiance *L = &PathRadiance_coop[ray_index]; PathRadiance *L = &PathRadiance_coop[ray_index];
/* holdout mask objects do not write data passes */ /* Holdout mask objects do not write data passes. */
kernel_write_data_passes(kg, per_sample_output_buffers, L, sd, sample, state, throughput); kernel_write_data_passes(kg,
per_sample_output_buffers,
/* blurring of bsdf after bounces, for rays that have a small likelihood L,
* of following this particular path (diffuse, rough glossy) */ sd,
sample,
state,
throughput);
/* Blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy.
*/
if(kernel_data.integrator.filter_glossy != FLT_MAX) { if(kernel_data.integrator.filter_glossy != FLT_MAX) {
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf; float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) { if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f; float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
shader_bsdf_blur(kg, sd, blur_roughness); shader_bsdf_blur(kg, sd, blur_roughness);
@ -176,15 +189,21 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
#ifdef __EMISSION__ #ifdef __EMISSION__
/* emission */ /* emission */
if(ccl_fetch(sd, flag) & SD_EMISSION) { if(ccl_fetch(sd, flag) & SD_EMISSION) {
/* todo: is isect.t wrong here for transparent surfaces? */ /* TODO(sergey): is isect.t wrong here for transparent surfaces? */
float3 emission = indirect_primitive_emission(kg, sd, Intersection_coop[ray_index].t, state->flag, state->ray_pdf); float3 emission = indirect_primitive_emission(
kg,
sd,
Intersection_coop[ray_index].t,
state->flag,
state->ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state->bounce); path_radiance_accum_emission(L, throughput, emission, state->bounce);
} }
#endif #endif /* __EMISSION__ */
/* path termination. this is a strange place to put the termination, it's /* Path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded * mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */ * shader evaluations, only need emission if we are going to terminate.
*/
float probability = path_state_terminate_probability(kg, state, throughput); float probability = path_state_terminate_probability(kg, state, throughput);
if(probability == 0.0f) { if(probability == 0.0f) {
@ -195,7 +214,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
if(probability != 1.0f) { if(probability != 1.0f) {
float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE); float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
if(terminate >= probability) { if(terminate >= probability) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
*enqueue_flag = 1; *enqueue_flag = 1;
@ -209,7 +227,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
#ifdef __AO__ #ifdef __AO__
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
/* ambient occlusion */ /* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (ccl_fetch(sd, flag) & SD_AO)) { if(kernel_data.integrator.use_ambient_occlusion ||
(ccl_fetch(sd, flag) & SD_AO))
{
/* todo: solve correlation */ /* todo: solve correlation */
float bsdf_u, bsdf_v; float bsdf_u, bsdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
@ -240,5 +260,5 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
} }
} }
} }
#endif #endif /* __AO__ */
} }

@ -56,7 +56,9 @@ ccl_device void kernel_lamp_emission(
int parallel_samples, /* Number of samples to be processed in parallel */ int parallel_samples, /* Number of samples to be processed in parallel */
int ray_index) int ray_index)
{ {
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) || IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND))
{
KernelGlobals *kg = (KernelGlobals *)globals; KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data; ShaderData *sd = (ShaderData *)shader_data;
PathRadiance *L = &PathRadiance_coop[ray_index]; PathRadiance *L = &PathRadiance_coop[ray_index];
@ -84,7 +86,8 @@ ccl_device void kernel_lamp_emission(
path_radiance_accum_emission(L, throughput, emission, state.bounce); path_radiance_accum_emission(L, throughput, emission, state.bounce);
} }
} }
#endif #endif /* __LAMP_MIS__ */
/* __VOLUME__ feature is disabled */ /* __VOLUME__ feature is disabled */
#if 0 #if 0
#ifdef __VOLUME__ #ifdef __VOLUME__
@ -149,7 +152,7 @@ ccl_device void kernel_lamp_emission(
} }
} }
else else
#endif #endif /* __VOLUME_DECOUPLED__ */
{ {
/* integrate along volume segment with distance sampling */ /* integrate along volume segment with distance sampling */
ShaderData volume_sd; ShaderData volume_sd;
@ -167,10 +170,10 @@ ccl_device void kernel_lamp_emission(
else else
break; break;
} }
#endif #endif /* __VOLUME_SCATTER__ */
} }
} }
#endif #endif /* __VOLUME__ */
#endif #endif
} }
} }

@ -81,14 +81,16 @@ ccl_device char kernel_next_iteration_setup(
{ {
char enqueue_flag = 0; char enqueue_flag = 0;
/* Load kernel globals structure and ShaderData structure */ /* Load kernel globals structure and ShaderData structure. */
KernelGlobals *kg = (KernelGlobals *)globals; KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data; ShaderData *sd = (ShaderData *)shader_data;
PathRadiance *L = 0x0; PathRadiance *L = 0x0;
ccl_global PathState *state = 0x0; ccl_global PathState *state = 0x0;
/* Path radiance update for AO/Direct_lighting's shadow blocked */ /* Path radiance update for AO/Direct_lighting's shadow blocked. */
if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) { if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
{
state = &PathState_coop[ray_index]; state = &PathState_coop[ray_index];
L = &PathRadiance_coop[ray_index]; L = &PathRadiance_coop[ray_index];
float3 _throughput = throughput_coop[ray_index]; float3 _throughput = throughput_coop[ray_index];
@ -97,7 +99,12 @@ ccl_device char kernel_next_iteration_setup(
float3 shadow = LightRay_ao_coop[ray_index].P; float3 shadow = LightRay_ao_coop[ray_index].P;
char update_path_radiance = LightRay_ao_coop[ray_index].t; char update_path_radiance = LightRay_ao_coop[ray_index].t;
if(update_path_radiance) { if(update_path_radiance) {
path_radiance_accum_ao(L, _throughput, AOAlpha_coop[ray_index], AOBSDF_coop[ray_index], shadow, state->bounce); path_radiance_accum_ao(L,
_throughput,
AOAlpha_coop[ray_index],
AOBSDF_coop[ray_index],
shadow,
state->bounce);
} }
REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO); REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
} }
@ -107,7 +114,13 @@ ccl_device char kernel_next_iteration_setup(
char update_path_radiance = LightRay_dl_coop[ray_index].t; char update_path_radiance = LightRay_dl_coop[ray_index].t;
if(update_path_radiance) { if(update_path_radiance) {
BsdfEval L_light = BSDFEval_coop[ray_index]; BsdfEval L_light = BSDFEval_coop[ray_index];
path_radiance_accum_light(L, _throughput, &L_light, shadow, 1.0f, state->bounce, ISLamp_coop[ray_index]); path_radiance_accum_light(L,
_throughput,
&L_light,
shadow,
1.0f,
state->bounce,
ISLamp_coop[ray_index]);
} }
REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL); REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
} }

@ -63,7 +63,12 @@ ccl_device void kernel_shader_eval(
ccl_global PathState *state = &PathState_coop[ray_index]; ccl_global PathState *state = &PathState_coop[ray_index];
Ray ray = Ray_coop[ray_index]; Ray ray = Ray_coop[ray_index];
shader_setup_from_ray(kg, sd, isect, &ray, state->bounce, state->transparent_bounce); shader_setup_from_ray(kg,
sd,
isect,
&ray,
state->bounce,
state->transparent_bounce);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF); float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
shader_eval_surface(kg, sd, rbsdf, state->flag, SHADER_CONTEXT_MAIN); shader_eval_surface(kg, sd, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
} }

@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */
#ifndef _KERNEL_SPLIT_H_ #ifndef __KERNEL_SPLIT_H__
#define _KERNEL_SPLIT_H_ #define __KERNEL_SPLIT_H__
#include "kernel_compat_opencl.h" #include "kernel_compat_opencl.h"
#include "kernel_math.h" #include "kernel_math.h"
@ -59,4 +59,4 @@
#include "kernel_queues.h" #include "kernel_queues.h"
#include "kernel_work_stealing.h" #include "kernel_work_stealing.h"
#endif #endif /* __KERNEL_SPLIT_H__ */

@ -48,7 +48,9 @@ ccl_device void kernel_sum_all_radiance(
for(sample_iterator = 0; sample_iterator < parallel_samples; sample_iterator++) { for(sample_iterator = 0; sample_iterator < parallel_samples; sample_iterator++) {
for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) { for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) {
*(buffer + pass_stride_iterator) = (start_sample == 0 && sample_iterator == 0) ? *(per_sample_output_buffer + pass_stride_iterator) *(buffer + pass_stride_iterator) =
(start_sample == 0 && sample_iterator == 0)
? *(per_sample_output_buffer + pass_stride_iterator)
: *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator); : *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator);
} }
per_sample_output_buffer += sample_stride; per_sample_output_buffer += sample_stride;