forked from bartvdbraak/blender
Cycles: Code cleanup, split kernel
This commit is contained in:
parent
da192fb3a7
commit
92022218c2
@ -130,18 +130,24 @@ ccl_device char kernel_background_buffer_update(
|
|||||||
#ifdef __WORK_STEALING__
|
#ifdef __WORK_STEALING__
|
||||||
my_work = work_array[ray_index];
|
my_work = work_array[ray_index];
|
||||||
sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
|
sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
|
||||||
get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
|
get_pixel_tile_position(&pixel_x, &pixel_y,
|
||||||
|
&tile_x, &tile_y,
|
||||||
|
my_work,
|
||||||
|
sw, sh, sx, sy,
|
||||||
|
parallel_samples,
|
||||||
|
ray_index);
|
||||||
my_sample_tile = 0;
|
my_sample_tile = 0;
|
||||||
initial_per_sample_output_buffers = per_sample_output_buffers;
|
initial_per_sample_output_buffers = per_sample_output_buffers;
|
||||||
initial_rng = rng_state;
|
initial_rng = rng_state;
|
||||||
#else // __WORK_STEALING__
|
#else /* __WORK_STEALING__ */
|
||||||
sample = work_array[ray_index];
|
sample = work_array[ray_index];
|
||||||
int tile_index = ray_index / parallel_samples;
|
int tile_index = ray_index / parallel_samples;
|
||||||
/* buffer and rng_state's stride is "stride". Find x and y using ray_index */
|
/* buffer and rng_state's stride is "stride". Find x and y using ray_index */
|
||||||
tile_x = tile_index % sw;
|
tile_x = tile_index % sw;
|
||||||
tile_y = tile_index / sw;
|
tile_y = tile_index / sw;
|
||||||
my_sample_tile = ray_index - (tile_index * parallel_samples);
|
my_sample_tile = ray_index - (tile_index * parallel_samples);
|
||||||
#endif
|
#endif /* __WORK_STEALING__ */
|
||||||
|
|
||||||
rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
|
rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
|
||||||
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
|
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
|
||||||
|
|
||||||
@ -189,11 +195,12 @@ ccl_device char kernel_background_buffer_update(
|
|||||||
/* If work is invalid, this means no more work is available and the thread may exit */
|
/* If work is invalid, this means no more work is available and the thread may exit */
|
||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
|
||||||
}
|
}
|
||||||
#else
|
#else /* __WORK_STEALING__ */
|
||||||
if((sample + parallel_samples) >= end_sample) {
|
if((sample + parallel_samples) >= end_sample) {
|
||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __WORK_STEALING__ */
|
||||||
|
|
||||||
if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
|
if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
|
||||||
#ifdef __WORK_STEALING__
|
#ifdef __WORK_STEALING__
|
||||||
work_array[ray_index] = my_work;
|
work_array[ray_index] = my_work;
|
||||||
@ -208,20 +215,22 @@ ccl_device char kernel_background_buffer_update(
|
|||||||
/* Remap per_sample_output_buffers according to the current work */
|
/* Remap per_sample_output_buffers according to the current work */
|
||||||
per_sample_output_buffers = initial_per_sample_output_buffers
|
per_sample_output_buffers = initial_per_sample_output_buffers
|
||||||
+ (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
|
+ (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
|
||||||
#else
|
#else /* __WORK_STEALING__ */
|
||||||
work_array[ray_index] = sample + parallel_samples;
|
work_array[ray_index] = sample + parallel_samples;
|
||||||
sample = work_array[ray_index];
|
sample = work_array[ray_index];
|
||||||
|
|
||||||
/* Get ray position from ray index */
|
/* Get ray position from ray index */
|
||||||
pixel_x = sx + ((ray_index / parallel_samples) % sw);
|
pixel_x = sx + ((ray_index / parallel_samples) % sw);
|
||||||
pixel_y = sy + ((ray_index / parallel_samples) / sw);
|
pixel_y = sy + ((ray_index / parallel_samples) / sw);
|
||||||
#endif
|
#endif /* __WORK_STEALING__ */
|
||||||
|
|
||||||
/* initialize random numbers and ray */
|
/* Initialize random numbers and ray. */
|
||||||
kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
|
kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
|
||||||
|
|
||||||
if(ray->t != 0.0f) {
|
if(ray->t != 0.0f) {
|
||||||
/* Initialize throughput, L_transparent, Ray, PathState; These rays proceed with path-iteration*/
|
/* Initialize throughput, L_transparent, Ray, PathState;
|
||||||
|
* These rays proceed with path-iteration.
|
||||||
|
*/
|
||||||
*throughput = make_float3(1.0f, 1.0f, 1.0f);
|
*throughput = make_float3(1.0f, 1.0f, 1.0f);
|
||||||
*L_transparent = 0.0f;
|
*L_transparent = 0.0f;
|
||||||
path_radiance_init(L, kernel_data.film.use_light_pass);
|
path_radiance_init(L, kernel_data.film.use_light_pass);
|
||||||
@ -232,9 +241,9 @@ ccl_device char kernel_background_buffer_update(
|
|||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
|
||||||
enqueue_flag = 1;
|
enqueue_flag = 1;
|
||||||
} else {
|
} else {
|
||||||
/*These rays do not participate in path-iteration */
|
/* These rays do not participate in path-iteration. */
|
||||||
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
/* accumulate result in output buffer */
|
/* Accumulate result in output buffer. */
|
||||||
kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
|
kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
|
||||||
path_rng_end(kg, rng_state, *rng);
|
path_rng_end(kg, rng_state, *rng);
|
||||||
|
|
||||||
|
@ -287,9 +287,9 @@ ccl_device void kernel_data_init(
|
|||||||
work_pool_wgs[group_index] = 0;
|
work_pool_wgs[group_index] = 0;
|
||||||
}
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
#endif // __WORK_STEALING__
|
#endif /* __WORK_STEALING__ */
|
||||||
|
|
||||||
/* Initialize queue data and queue index */
|
/* Initialize queue data and queue index. */
|
||||||
if(thread_index < queuesize) {
|
if(thread_index < queuesize) {
|
||||||
/* Initialize active ray queue */
|
/* Initialize active ray queue */
|
||||||
Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
||||||
@ -319,7 +319,9 @@ ccl_device void kernel_data_init(
|
|||||||
|
|
||||||
int ray_index = x + y * (sw * parallel_samples);
|
int ray_index = x + y * (sw * parallel_samples);
|
||||||
|
|
||||||
/* This is the first assignment to ray_state; So we dont use ASSIGN_RAY_STATE macro */
|
/* This is the first assignment to ray_state;
|
||||||
|
* So we dont use ASSIGN_RAY_STATE macro.
|
||||||
|
*/
|
||||||
ray_state[ray_index] = RAY_ACTIVE;
|
ray_state[ray_index] = RAY_ACTIVE;
|
||||||
|
|
||||||
unsigned int my_sample;
|
unsigned int my_sample;
|
||||||
@ -331,58 +333,76 @@ ccl_device void kernel_data_init(
|
|||||||
|
|
||||||
#ifdef __WORK_STEALING__
|
#ifdef __WORK_STEALING__
|
||||||
unsigned int my_work = 0;
|
unsigned int my_work = 0;
|
||||||
/* get work */
|
/* Get work. */
|
||||||
get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
|
get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index);
|
||||||
/* Get the sample associated with the work */
|
/* Get the sample associated with the work. */
|
||||||
my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
|
my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
|
||||||
|
|
||||||
my_sample_tile = 0;
|
my_sample_tile = 0;
|
||||||
|
|
||||||
/* Get pixel and tile position associated with the work */
|
/* Get pixel and tile position associated with the work. */
|
||||||
get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
|
get_pixel_tile_position(&pixel_x, &pixel_y,
|
||||||
|
&tile_x, &tile_y,
|
||||||
|
my_work,
|
||||||
|
sw, sh, sx, sy,
|
||||||
|
parallel_samples,
|
||||||
|
ray_index);
|
||||||
work_array[ray_index] = my_work;
|
work_array[ray_index] = my_work;
|
||||||
#else // __WORK_STEALING__
|
#else /* __WORK_STEALING__ */
|
||||||
|
|
||||||
unsigned int tile_index = ray_index / parallel_samples;
|
unsigned int tile_index = ray_index / parallel_samples;
|
||||||
tile_x = tile_index % sw;
|
tile_x = tile_index % sw;
|
||||||
tile_y = tile_index / sw;
|
tile_y = tile_index / sw;
|
||||||
my_sample_tile = ray_index - (tile_index * parallel_samples);
|
my_sample_tile = ray_index - (tile_index * parallel_samples);
|
||||||
my_sample = my_sample_tile + start_sample;
|
my_sample = my_sample_tile + start_sample;
|
||||||
|
|
||||||
/* Initialize work array */
|
/* Initialize work array. */
|
||||||
work_array[ray_index] = my_sample ;
|
work_array[ray_index] = my_sample ;
|
||||||
|
|
||||||
/* Calculate pixel position of this ray */
|
/* Calculate pixel position of this ray. */
|
||||||
pixel_x = sx + tile_x;
|
pixel_x = sx + tile_x;
|
||||||
pixel_y = sy + tile_y;
|
pixel_y = sy + tile_y;
|
||||||
#endif // __WORK_STEALING__
|
#endif /* __WORK_STEALING__ */
|
||||||
|
|
||||||
rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
|
rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride;
|
||||||
|
|
||||||
/* Initialise per_sample_output_buffers to all zeros */
|
/* Initialise per_sample_output_buffers to all zeros. */
|
||||||
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride;
|
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride;
|
||||||
int per_sample_output_buffers_iterator = 0;
|
int per_sample_output_buffers_iterator = 0;
|
||||||
for(per_sample_output_buffers_iterator = 0; per_sample_output_buffers_iterator < kernel_data.film.pass_stride; per_sample_output_buffers_iterator++) {
|
for(per_sample_output_buffers_iterator = 0;
|
||||||
|
per_sample_output_buffers_iterator < kernel_data.film.pass_stride;
|
||||||
|
per_sample_output_buffers_iterator++)
|
||||||
|
{
|
||||||
per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f;
|
per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* initialize random numbers and ray */
|
/* Initialize random numbers and ray. */
|
||||||
kernel_path_trace_setup(kg, rng_state, my_sample, pixel_x, pixel_y, &rng_coop[ray_index], &Ray_coop[ray_index]);
|
kernel_path_trace_setup(kg,
|
||||||
|
rng_state,
|
||||||
|
my_sample,
|
||||||
|
pixel_x, pixel_y,
|
||||||
|
&rng_coop[ray_index],
|
||||||
|
&Ray_coop[ray_index]);
|
||||||
|
|
||||||
if(Ray_coop[ray_index].t != 0.0f) {
|
if(Ray_coop[ray_index].t != 0.0f) {
|
||||||
/* Initialize throuput, L_transparent, Ray, PathState; These rays proceed with path-iteration*/
|
/* Initialize throuput, L_transparent, Ray, PathState;
|
||||||
|
* These rays proceed with path-iteration.
|
||||||
|
*/
|
||||||
throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
|
throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
|
||||||
L_transparent_coop[ray_index] = 0.0f;
|
L_transparent_coop[ray_index] = 0.0f;
|
||||||
path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
|
path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
|
||||||
path_state_init(kg, &PathState_coop[ray_index], &rng_coop[ray_index], my_sample, &Ray_coop[ray_index]);
|
path_state_init(kg,
|
||||||
|
&PathState_coop[ray_index],
|
||||||
|
&rng_coop[ray_index],
|
||||||
|
my_sample,
|
||||||
|
&Ray_coop[ray_index]);
|
||||||
#ifdef __KERNEL_DEBUG__
|
#ifdef __KERNEL_DEBUG__
|
||||||
debug_data_init(&debugdata_coop[ray_index]);
|
debug_data_init(&debugdata_coop[ray_index]);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
/*These rays do not participate in path-iteration */
|
/* These rays do not participate in path-iteration. */
|
||||||
|
|
||||||
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
/* accumulate result in output buffer */
|
/* Accumulate result in output buffer. */
|
||||||
kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
|
kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
|
||||||
path_rng_end(kg, rng_state, rng_coop[ray_index]);
|
path_rng_end(kg, rng_state, rng_coop[ray_index]);
|
||||||
|
|
||||||
@ -390,7 +410,7 @@ ccl_device void kernel_data_init(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Mark rest of the ray-state indices as RAY_INACTIVE */
|
/* Mark rest of the ray-state indices as RAY_INACTIVE. */
|
||||||
if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) {
|
if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) {
|
||||||
/* First assignment, hence we dont use ASSIGN_RAY_STATE macro */
|
/* First assignment, hence we dont use ASSIGN_RAY_STATE macro */
|
||||||
ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE;
|
ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE;
|
||||||
|
@ -63,7 +63,7 @@ ccl_device char kernel_direct_lighting(
|
|||||||
{
|
{
|
||||||
char enqueue_flag = 0;
|
char enqueue_flag = 0;
|
||||||
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
||||||
/* Load kernel globals structure and ShaderData structure */
|
/* Load kernel globals structure and ShaderData structure. */
|
||||||
KernelGlobals *kg = (KernelGlobals *)globals;
|
KernelGlobals *kg = (KernelGlobals *)globals;
|
||||||
ShaderData *sd = (ShaderData *)shader_data;
|
ShaderData *sd = (ShaderData *)shader_data;
|
||||||
ShaderData *sd_DL = (ShaderData *)shader_DL;
|
ShaderData *sd_DL = (ShaderData *)shader_DL;
|
||||||
@ -72,15 +72,22 @@ ccl_device char kernel_direct_lighting(
|
|||||||
|
|
||||||
/* direct lighting */
|
/* direct lighting */
|
||||||
#ifdef __EMISSION__
|
#ifdef __EMISSION__
|
||||||
if((kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) {
|
if((kernel_data.integrator.use_direct_light &&
|
||||||
/* sample illumination from lights to find path contribution */
|
(ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
|
||||||
|
{
|
||||||
|
/* Sample illumination from lights to find path contribution. */
|
||||||
ccl_global RNG* rng = &rng_coop[ray_index];
|
ccl_global RNG* rng = &rng_coop[ray_index];
|
||||||
float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
|
float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
|
||||||
float light_u, light_v;
|
float light_u, light_v;
|
||||||
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
|
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
|
||||||
|
|
||||||
LightSample ls;
|
LightSample ls;
|
||||||
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
|
light_sample(kg,
|
||||||
|
light_t, light_u, light_v,
|
||||||
|
ccl_fetch(sd, time),
|
||||||
|
ccl_fetch(sd, P),
|
||||||
|
state->bounce,
|
||||||
|
&ls);
|
||||||
|
|
||||||
Ray light_ray;
|
Ray light_ray;
|
||||||
#ifdef __OBJECT_MOTION__
|
#ifdef __OBJECT_MOTION__
|
||||||
@ -89,17 +96,21 @@ ccl_device char kernel_direct_lighting(
|
|||||||
|
|
||||||
BsdfEval L_light;
|
BsdfEval L_light;
|
||||||
bool is_lamp;
|
bool is_lamp;
|
||||||
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce, sd_DL)) {
|
if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp,
|
||||||
/* write intermediate data to global memory to access from the next kernel */
|
state->bounce, state->transparent_bounce, sd_DL))
|
||||||
|
{
|
||||||
|
/* Write intermediate data to global memory to access from
|
||||||
|
* the next kernel.
|
||||||
|
*/
|
||||||
LightRay_coop[ray_index] = light_ray;
|
LightRay_coop[ray_index] = light_ray;
|
||||||
BSDFEval_coop[ray_index] = L_light;
|
BSDFEval_coop[ray_index] = L_light;
|
||||||
ISLamp_coop[ray_index] = is_lamp;
|
ISLamp_coop[ray_index] = is_lamp;
|
||||||
/// mark ray state for next shadow kernel
|
/* Mark ray state for next shadow kernel. */
|
||||||
ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
|
ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
|
||||||
enqueue_flag = 1;
|
enqueue_flag = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __EMISSION__ */
|
||||||
}
|
}
|
||||||
return enqueue_flag;
|
return enqueue_flag;
|
||||||
}
|
}
|
||||||
|
@ -121,21 +121,30 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
|
|||||||
#ifdef __WORK_STEALING__
|
#ifdef __WORK_STEALING__
|
||||||
my_work = work_array[ray_index];
|
my_work = work_array[ray_index];
|
||||||
sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
|
sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
|
||||||
get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
|
get_pixel_tile_position(&pixel_x, &pixel_y,
|
||||||
|
&tile_x, &tile_y,
|
||||||
|
my_work,
|
||||||
|
sw, sh, sx, sy,
|
||||||
|
parallel_samples,
|
||||||
|
ray_index);
|
||||||
my_sample_tile = 0;
|
my_sample_tile = 0;
|
||||||
#else // __WORK_STEALING__
|
#else /* __WORK_STEALING__ */
|
||||||
sample = work_array[ray_index];
|
sample = work_array[ray_index];
|
||||||
/* buffer's stride is "stride"; Find x and y using ray_index */
|
/* Buffer's stride is "stride"; Find x and y using ray_index. */
|
||||||
int tile_index = ray_index / parallel_samples;
|
int tile_index = ray_index / parallel_samples;
|
||||||
tile_x = tile_index % sw;
|
tile_x = tile_index % sw;
|
||||||
tile_y = tile_index / sw;
|
tile_y = tile_index / sw;
|
||||||
my_sample_tile = ray_index - (tile_index * parallel_samples);
|
my_sample_tile = ray_index - (tile_index * parallel_samples);
|
||||||
#endif // __WORK_STEALING__
|
#endif /* __WORK_STEALING__ */
|
||||||
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
|
per_sample_output_buffers +=
|
||||||
|
(((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) *
|
||||||
|
kernel_data.film.pass_stride;
|
||||||
|
|
||||||
/* holdout */
|
/* holdout */
|
||||||
#ifdef __HOLDOUT__
|
#ifdef __HOLDOUT__
|
||||||
if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state->flag & PATH_RAY_CAMERA)) {
|
if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) &&
|
||||||
|
(state->flag & PATH_RAY_CAMERA))
|
||||||
|
{
|
||||||
if(kernel_data.background.transparent) {
|
if(kernel_data.background.transparent) {
|
||||||
float3 holdout_weight;
|
float3 holdout_weight;
|
||||||
|
|
||||||
@ -153,20 +162,24 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
|
|||||||
*enqueue_flag = 1;
|
*enqueue_flag = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __HOLDOUT__ */
|
||||||
}
|
}
|
||||||
|
|
||||||
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
||||||
|
|
||||||
PathRadiance *L = &PathRadiance_coop[ray_index];
|
PathRadiance *L = &PathRadiance_coop[ray_index];
|
||||||
/* holdout mask objects do not write data passes */
|
/* Holdout mask objects do not write data passes. */
|
||||||
kernel_write_data_passes(kg, per_sample_output_buffers, L, sd, sample, state, throughput);
|
kernel_write_data_passes(kg,
|
||||||
|
per_sample_output_buffers,
|
||||||
/* blurring of bsdf after bounces, for rays that have a small likelihood
|
L,
|
||||||
* of following this particular path (diffuse, rough glossy) */
|
sd,
|
||||||
|
sample,
|
||||||
|
state,
|
||||||
|
throughput);
|
||||||
|
/* Blurring of bsdf after bounces, for rays that have a small likelihood
|
||||||
|
* of following this particular path (diffuse, rough glossy.
|
||||||
|
*/
|
||||||
if(kernel_data.integrator.filter_glossy != FLT_MAX) {
|
if(kernel_data.integrator.filter_glossy != FLT_MAX) {
|
||||||
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
|
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
|
||||||
|
|
||||||
if(blur_pdf < 1.0f) {
|
if(blur_pdf < 1.0f) {
|
||||||
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
|
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
|
||||||
shader_bsdf_blur(kg, sd, blur_roughness);
|
shader_bsdf_blur(kg, sd, blur_roughness);
|
||||||
@ -176,15 +189,21 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
|
|||||||
#ifdef __EMISSION__
|
#ifdef __EMISSION__
|
||||||
/* emission */
|
/* emission */
|
||||||
if(ccl_fetch(sd, flag) & SD_EMISSION) {
|
if(ccl_fetch(sd, flag) & SD_EMISSION) {
|
||||||
/* todo: is isect.t wrong here for transparent surfaces? */
|
/* TODO(sergey): is isect.t wrong here for transparent surfaces? */
|
||||||
float3 emission = indirect_primitive_emission(kg, sd, Intersection_coop[ray_index].t, state->flag, state->ray_pdf);
|
float3 emission = indirect_primitive_emission(
|
||||||
|
kg,
|
||||||
|
sd,
|
||||||
|
Intersection_coop[ray_index].t,
|
||||||
|
state->flag,
|
||||||
|
state->ray_pdf);
|
||||||
path_radiance_accum_emission(L, throughput, emission, state->bounce);
|
path_radiance_accum_emission(L, throughput, emission, state->bounce);
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __EMISSION__ */
|
||||||
|
|
||||||
/* path termination. this is a strange place to put the termination, it's
|
/* Path termination. this is a strange place to put the termination, it's
|
||||||
* mainly due to the mixed in MIS that we use. gives too many unneeded
|
* mainly due to the mixed in MIS that we use. gives too many unneeded
|
||||||
* shader evaluations, only need emission if we are going to terminate */
|
* shader evaluations, only need emission if we are going to terminate.
|
||||||
|
*/
|
||||||
float probability = path_state_terminate_probability(kg, state, throughput);
|
float probability = path_state_terminate_probability(kg, state, throughput);
|
||||||
|
|
||||||
if(probability == 0.0f) {
|
if(probability == 0.0f) {
|
||||||
@ -195,7 +214,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
|
|||||||
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
||||||
if(probability != 1.0f) {
|
if(probability != 1.0f) {
|
||||||
float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
|
float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
|
||||||
|
|
||||||
if(terminate >= probability) {
|
if(terminate >= probability) {
|
||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||||
*enqueue_flag = 1;
|
*enqueue_flag = 1;
|
||||||
@ -209,7 +227,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
|
|||||||
#ifdef __AO__
|
#ifdef __AO__
|
||||||
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
||||||
/* ambient occlusion */
|
/* ambient occlusion */
|
||||||
if(kernel_data.integrator.use_ambient_occlusion || (ccl_fetch(sd, flag) & SD_AO)) {
|
if(kernel_data.integrator.use_ambient_occlusion ||
|
||||||
|
(ccl_fetch(sd, flag) & SD_AO))
|
||||||
|
{
|
||||||
/* todo: solve correlation */
|
/* todo: solve correlation */
|
||||||
float bsdf_u, bsdf_v;
|
float bsdf_u, bsdf_v;
|
||||||
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
|
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
|
||||||
@ -240,5 +260,5 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __AO__ */
|
||||||
}
|
}
|
||||||
|
@ -56,7 +56,9 @@ ccl_device void kernel_lamp_emission(
|
|||||||
int parallel_samples, /* Number of samples to be processed in parallel */
|
int parallel_samples, /* Number of samples to be processed in parallel */
|
||||||
int ray_index)
|
int ray_index)
|
||||||
{
|
{
|
||||||
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) || IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
|
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
|
||||||
|
IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND))
|
||||||
|
{
|
||||||
KernelGlobals *kg = (KernelGlobals *)globals;
|
KernelGlobals *kg = (KernelGlobals *)globals;
|
||||||
ShaderData *sd = (ShaderData *)shader_data;
|
ShaderData *sd = (ShaderData *)shader_data;
|
||||||
PathRadiance *L = &PathRadiance_coop[ray_index];
|
PathRadiance *L = &PathRadiance_coop[ray_index];
|
||||||
@ -84,7 +86,8 @@ ccl_device void kernel_lamp_emission(
|
|||||||
path_radiance_accum_emission(L, throughput, emission, state.bounce);
|
path_radiance_accum_emission(L, throughput, emission, state.bounce);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __LAMP_MIS__ */
|
||||||
|
|
||||||
/* __VOLUME__ feature is disabled */
|
/* __VOLUME__ feature is disabled */
|
||||||
#if 0
|
#if 0
|
||||||
#ifdef __VOLUME__
|
#ifdef __VOLUME__
|
||||||
@ -149,7 +152,7 @@ ccl_device void kernel_lamp_emission(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif /* __VOLUME_DECOUPLED__ */
|
||||||
{
|
{
|
||||||
/* integrate along volume segment with distance sampling */
|
/* integrate along volume segment with distance sampling */
|
||||||
ShaderData volume_sd;
|
ShaderData volume_sd;
|
||||||
@ -167,10 +170,10 @@ ccl_device void kernel_lamp_emission(
|
|||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __VOLUME_SCATTER__ */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __VOLUME__ */
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -81,14 +81,16 @@ ccl_device char kernel_next_iteration_setup(
|
|||||||
{
|
{
|
||||||
char enqueue_flag = 0;
|
char enqueue_flag = 0;
|
||||||
|
|
||||||
/* Load kernel globals structure and ShaderData structure */
|
/* Load kernel globals structure and ShaderData structure. */
|
||||||
KernelGlobals *kg = (KernelGlobals *)globals;
|
KernelGlobals *kg = (KernelGlobals *)globals;
|
||||||
ShaderData *sd = (ShaderData *)shader_data;
|
ShaderData *sd = (ShaderData *)shader_data;
|
||||||
PathRadiance *L = 0x0;
|
PathRadiance *L = 0x0;
|
||||||
ccl_global PathState *state = 0x0;
|
ccl_global PathState *state = 0x0;
|
||||||
|
|
||||||
/* Path radiance update for AO/Direct_lighting's shadow blocked */
|
/* Path radiance update for AO/Direct_lighting's shadow blocked. */
|
||||||
if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
|
if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
|
||||||
|
IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
|
||||||
|
{
|
||||||
state = &PathState_coop[ray_index];
|
state = &PathState_coop[ray_index];
|
||||||
L = &PathRadiance_coop[ray_index];
|
L = &PathRadiance_coop[ray_index];
|
||||||
float3 _throughput = throughput_coop[ray_index];
|
float3 _throughput = throughput_coop[ray_index];
|
||||||
@ -97,7 +99,12 @@ ccl_device char kernel_next_iteration_setup(
|
|||||||
float3 shadow = LightRay_ao_coop[ray_index].P;
|
float3 shadow = LightRay_ao_coop[ray_index].P;
|
||||||
char update_path_radiance = LightRay_ao_coop[ray_index].t;
|
char update_path_radiance = LightRay_ao_coop[ray_index].t;
|
||||||
if(update_path_radiance) {
|
if(update_path_radiance) {
|
||||||
path_radiance_accum_ao(L, _throughput, AOAlpha_coop[ray_index], AOBSDF_coop[ray_index], shadow, state->bounce);
|
path_radiance_accum_ao(L,
|
||||||
|
_throughput,
|
||||||
|
AOAlpha_coop[ray_index],
|
||||||
|
AOBSDF_coop[ray_index],
|
||||||
|
shadow,
|
||||||
|
state->bounce);
|
||||||
}
|
}
|
||||||
REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
|
REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
|
||||||
}
|
}
|
||||||
@ -107,7 +114,13 @@ ccl_device char kernel_next_iteration_setup(
|
|||||||
char update_path_radiance = LightRay_dl_coop[ray_index].t;
|
char update_path_radiance = LightRay_dl_coop[ray_index].t;
|
||||||
if(update_path_radiance) {
|
if(update_path_radiance) {
|
||||||
BsdfEval L_light = BSDFEval_coop[ray_index];
|
BsdfEval L_light = BSDFEval_coop[ray_index];
|
||||||
path_radiance_accum_light(L, _throughput, &L_light, shadow, 1.0f, state->bounce, ISLamp_coop[ray_index]);
|
path_radiance_accum_light(L,
|
||||||
|
_throughput,
|
||||||
|
&L_light,
|
||||||
|
shadow,
|
||||||
|
1.0f,
|
||||||
|
state->bounce,
|
||||||
|
ISLamp_coop[ray_index]);
|
||||||
}
|
}
|
||||||
REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
|
REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
|
||||||
}
|
}
|
||||||
|
@ -63,7 +63,12 @@ ccl_device void kernel_shader_eval(
|
|||||||
ccl_global PathState *state = &PathState_coop[ray_index];
|
ccl_global PathState *state = &PathState_coop[ray_index];
|
||||||
Ray ray = Ray_coop[ray_index];
|
Ray ray = Ray_coop[ray_index];
|
||||||
|
|
||||||
shader_setup_from_ray(kg, sd, isect, &ray, state->bounce, state->transparent_bounce);
|
shader_setup_from_ray(kg,
|
||||||
|
sd,
|
||||||
|
isect,
|
||||||
|
&ray,
|
||||||
|
state->bounce,
|
||||||
|
state->transparent_bounce);
|
||||||
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
|
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
|
||||||
shader_eval_surface(kg, sd, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
|
shader_eval_surface(kg, sd, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
|
||||||
}
|
}
|
||||||
|
@ -14,8 +14,8 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _KERNEL_SPLIT_H_
|
#ifndef __KERNEL_SPLIT_H__
|
||||||
#define _KERNEL_SPLIT_H_
|
#define __KERNEL_SPLIT_H__
|
||||||
|
|
||||||
#include "kernel_compat_opencl.h"
|
#include "kernel_compat_opencl.h"
|
||||||
#include "kernel_math.h"
|
#include "kernel_math.h"
|
||||||
@ -59,4 +59,4 @@
|
|||||||
#include "kernel_queues.h"
|
#include "kernel_queues.h"
|
||||||
#include "kernel_work_stealing.h"
|
#include "kernel_work_stealing.h"
|
||||||
|
|
||||||
#endif
|
#endif /* __KERNEL_SPLIT_H__ */
|
||||||
|
@ -48,8 +48,10 @@ ccl_device void kernel_sum_all_radiance(
|
|||||||
|
|
||||||
for(sample_iterator = 0; sample_iterator < parallel_samples; sample_iterator++) {
|
for(sample_iterator = 0; sample_iterator < parallel_samples; sample_iterator++) {
|
||||||
for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) {
|
for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) {
|
||||||
*(buffer + pass_stride_iterator) = (start_sample == 0 && sample_iterator == 0) ? *(per_sample_output_buffer + pass_stride_iterator)
|
*(buffer + pass_stride_iterator) =
|
||||||
: *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator);
|
(start_sample == 0 && sample_iterator == 0)
|
||||||
|
? *(per_sample_output_buffer + pass_stride_iterator)
|
||||||
|
: *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator);
|
||||||
}
|
}
|
||||||
per_sample_output_buffer += sample_stride;
|
per_sample_output_buffer += sample_stride;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user