Cycles: Code cleanup in split kernel, whitespaces

This commit is contained in:
Sergey Sharybin 2015-07-03 10:56:40 +02:00
parent 80f344fd95
commit b9f89b1647
6 changed files with 50 additions and 61 deletions

@ -27,15 +27,14 @@
/* /*
* Enqueue ray index into the queue * Enqueue ray index into the queue
*/ */
ccl_device void enqueue_ray_index ( ccl_device void enqueue_ray_index(
int ray_index, /* Ray index to be enqueued */ int ray_index, /* Ray index to be enqueued. */
int queue_number, /* Queue in which the ray index should be enqueued*/ int queue_number, /* Queue in which the ray index should be enqueued. */
ccl_global int *queues, /* Buffer of all queues */ ccl_global int *queues, /* Buffer of all queues. */
int queue_size, /* Size of each queue */ int queue_size, /* Size of each queue. */
ccl_global int *queue_index /* Array of size num_queues; Used for atomic increment */ ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
)
{ {
/* This thread's queue index */ /* This thread's queue index. */
int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size); int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size);
queues[my_queue_index] = ray_index; queues[my_queue_index] = ray_index;
} }
@ -47,52 +46,48 @@ ccl_device void enqueue_ray_index (
* i.e All ray's in the queue has been successfully allocated and there * i.e All ray's in the queue has been successfully allocated and there
* is no more ray to allocate to other threads. * is no more ray to allocate to other threads.
*/ */
ccl_device int get_ray_index ( ccl_device int get_ray_index(
int thread_index, /* Global thread index */ int thread_index, /* Global thread index. */
int queue_number, /* Queue to operate on */ int queue_number, /* Queue to operate on. */
ccl_global int *queues, /* Buffer of all queues */ ccl_global int *queues, /* Buffer of all queues. */
int queuesize, /* Size of a queue */ int queuesize, /* Size of a queue. */
int empty_queue /* Empty the queue slot as soon as we fetch the ray index */ int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */
)
{ {
int ray_index = queues[queue_number * queuesize + thread_index]; int ray_index = queues[queue_number * queuesize + thread_index];
if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) { if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT; queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
} }
return ray_index; return ray_index;
} }
/* The following functions are to realize Local memory variant of enqueue ray index function */ /* The following functions are to realize Local memory variant of enqueue ray index function. */
/* All threads should call this function */ /* All threads should call this function. */
ccl_device void enqueue_ray_index_local( ccl_device void enqueue_ray_index_local(
int ray_index, /* Ray index to enqueue*/ int ray_index, /* Ray index to enqueue. */
int queue_number, /* Queue in which to enqueue ray index */ int queue_number, /* Queue in which to enqueue ray index. */
char enqueue_flag, /* True for threads whose ray index has to be enqueued */ char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
int queuesize, /* queue size */ int queuesize, /* queue size. */
ccl_local unsigned int *local_queue_atomics, /* To to local queue atomics */ ccl_local unsigned int *local_queue_atomics, /* To to local queue atomics. */
ccl_global int *Queue_data, /* Queues */ ccl_global int *Queue_data, /* Queues. */
ccl_global int *Queue_index /* To do global queue atomics */ ccl_global int *Queue_index) /* To do global queue atomics. */
)
{ {
int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0); int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0);
/* Get local queue id */ /* Get local queue id .*/
unsigned int lqidx; unsigned int lqidx;
if(enqueue_flag) { if(enqueue_flag) {
lqidx = atomic_inc(local_queue_atomics); lqidx = atomic_inc(local_queue_atomics);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
/* Get global queue offset */ /* Get global queue offset. */
if(lidx == 0) { if(lidx == 0) {
*local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics); *local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
/* Get global queue index and enqueue ray */ /* Get global queue index and enqueue ray. */
if(enqueue_flag) { if(enqueue_flag) {
unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx; unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
Queue_data[my_gqidx] = ray_index; Queue_data[my_gqidx] = ray_index;
@ -100,30 +95,28 @@ ccl_device void enqueue_ray_index_local(
} }
ccl_device unsigned int get_local_queue_index( ccl_device unsigned int get_local_queue_index(
int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */ int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
ccl_local unsigned int *local_queue_atomics ccl_local unsigned int *local_queue_atomics)
)
{ {
int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]); int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]);
return my_lqidx; return my_lqidx;
} }
ccl_device unsigned int get_global_per_queue_offset( ccl_device unsigned int get_global_per_queue_offset(
int queue_number, int queue_number,
ccl_local unsigned int *local_queue_atomics, ccl_local unsigned int *local_queue_atomics,
ccl_global int* global_queue_atomics ccl_global int* global_queue_atomics)
)
{ {
unsigned int queue_offset = atomic_add((&global_queue_atomics[queue_number]), local_queue_atomics[queue_number]); unsigned int queue_offset = atomic_add(&global_queue_atomics[queue_number],
local_queue_atomics[queue_number]);
return queue_offset; return queue_offset;
} }
ccl_device unsigned int get_global_queue_index( ccl_device unsigned int get_global_queue_index(
int queue_number, int queue_number,
int queuesize, int queuesize,
unsigned int lqidx, unsigned int lqidx,
ccl_local unsigned int * global_per_queue_offset ccl_local unsigned int * global_per_queue_offset)
)
{ {
int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number]; int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
return my_gqidx; return my_gqidx;

@ -161,15 +161,14 @@ ccl_device char kernel_background_buffer_update(
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
} }
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
{
#ifdef __BACKGROUND__ #ifdef __BACKGROUND__
/* sample background shader */ /* sample background shader */
float3 L_background = indirect_background(kg, state, ray, sd); float3 L_background = indirect_background(kg, state, ray, sd);
path_radiance_accum_background(L, (*throughput), L_background, state->bounce); path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
#endif #endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
} }
} }
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {

@ -291,13 +291,13 @@ ccl_device void kernel_data_init(
/* Initialize queue data and queue index. */ /* Initialize queue data and queue index. */
if(thread_index < queuesize) { if(thread_index < queuesize) {
/* Initialize active ray queue */ /* Initialize active ray queue. */
Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
/* Initialize background and buffer update queue */ /* Initialize background and buffer update queue. */
Queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; Queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
/* Initialize shadow ray cast of AO queue */ /* Initialize shadow ray cast of AO queue. */
Queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; Queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
/* Initialize shadow ray cast of direct lighting queue */ /* Initialize shadow ray cast of direct lighting queue. */
Queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; Queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
} }
@ -316,7 +316,6 @@ ccl_device void kernel_data_init(
int y = get_global_id(1); int y = get_global_id(1);
if(x < (sw * parallel_samples) && y < sh) { if(x < (sw * parallel_samples) && y < sh) {
int ray_index = x + y * (sw * parallel_samples); int ray_index = x + y * (sw * parallel_samples);
/* This is the first assignment to ray_state; /* This is the first assignment to ray_state;
@ -400,12 +399,10 @@ ccl_device void kernel_data_init(
#endif #endif
} else { } else {
/* These rays do not participate in path-iteration. */ /* These rays do not participate in path-iteration. */
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
/* Accumulate result in output buffer. */ /* Accumulate result in output buffer. */
kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad); kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
path_rng_end(kg, rng_state, rng_coop[ray_index]); path_rng_end(kg, rng_state, rng_coop[ray_index]);
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
} }
} }

@ -127,14 +127,13 @@ ccl_device char kernel_next_iteration_setup(
} }
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global float3 *throughput = &throughput_coop[ray_index]; ccl_global float3 *throughput = &throughput_coop[ray_index];
ccl_global Ray *ray = &Ray_coop[ray_index]; ccl_global Ray *ray = &Ray_coop[ray_index];
ccl_global RNG* rng = &rng_coop[ray_index]; ccl_global RNG* rng = &rng_coop[ray_index];
state = &PathState_coop[ray_index]; state = &PathState_coop[ray_index];
L = &PathRadiance_coop[ray_index]; L = &PathRadiance_coop[ray_index];
/* compute direct lighting and next bounce */ /* Compute direct lighting and next bounce. */
if(!kernel_path_surface_bounce(kg, rng, sd, throughput, state, L, ray)) { if(!kernel_path_surface_bounce(kg, rng, sd, throughput, state, L, ray)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
enqueue_flag = 1; enqueue_flag = 1;

@ -131,7 +131,8 @@ ccl_device void kernel_scene_intersect(
if(!hit) { if(!hit) {
/* Change the state of rays that hit the background; /* Change the state of rays that hit the background;
* These rays undergo special processing in the * These rays undergo special processing in the
* background_bufferUpdate kernel*/ * background_bufferUpdate kernel.
*/
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
} }
} }

@ -65,7 +65,7 @@ ccl_device void kernel_shadow_blocked(
if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
{ {
/* Load kernel global structure */ /* Load kernel global structure. */
KernelGlobals *kg = (KernelGlobals *)globals; KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd_shadow = (ShaderData *)shader_shadow; ShaderData *sd_shadow = (ShaderData *)shader_shadow;