forked from bartvdbraak/blender
Cycles: Code cleanup in split kernel, whitespaces
This commit is contained in:
parent
80f344fd95
commit
b9f89b1647
@ -27,15 +27,14 @@
|
|||||||
/*
|
/*
|
||||||
* Enqueue ray index into the queue
|
* Enqueue ray index into the queue
|
||||||
*/
|
*/
|
||||||
ccl_device void enqueue_ray_index (
|
ccl_device void enqueue_ray_index(
|
||||||
int ray_index, /* Ray index to be enqueued */
|
int ray_index, /* Ray index to be enqueued. */
|
||||||
int queue_number, /* Queue in which the ray index should be enqueued*/
|
int queue_number, /* Queue in which the ray index should be enqueued. */
|
||||||
ccl_global int *queues, /* Buffer of all queues */
|
ccl_global int *queues, /* Buffer of all queues. */
|
||||||
int queue_size, /* Size of each queue */
|
int queue_size, /* Size of each queue. */
|
||||||
ccl_global int *queue_index /* Array of size num_queues; Used for atomic increment */
|
ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
|
||||||
)
|
|
||||||
{
|
{
|
||||||
/* This thread's queue index */
|
/* This thread's queue index. */
|
||||||
int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size);
|
int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size);
|
||||||
queues[my_queue_index] = ray_index;
|
queues[my_queue_index] = ray_index;
|
||||||
}
|
}
|
||||||
@ -47,52 +46,48 @@ ccl_device void enqueue_ray_index (
|
|||||||
* i.e All ray's in the queue has been successfully allocated and there
|
* i.e All ray's in the queue has been successfully allocated and there
|
||||||
* is no more ray to allocate to other threads.
|
* is no more ray to allocate to other threads.
|
||||||
*/
|
*/
|
||||||
ccl_device int get_ray_index (
|
ccl_device int get_ray_index(
|
||||||
int thread_index, /* Global thread index */
|
int thread_index, /* Global thread index. */
|
||||||
int queue_number, /* Queue to operate on */
|
int queue_number, /* Queue to operate on. */
|
||||||
ccl_global int *queues, /* Buffer of all queues */
|
ccl_global int *queues, /* Buffer of all queues. */
|
||||||
int queuesize, /* Size of a queue */
|
int queuesize, /* Size of a queue. */
|
||||||
int empty_queue /* Empty the queue slot as soon as we fetch the ray index */
|
int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */
|
||||||
)
|
|
||||||
{
|
{
|
||||||
int ray_index = queues[queue_number * queuesize + thread_index];
|
int ray_index = queues[queue_number * queuesize + thread_index];
|
||||||
|
|
||||||
if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
|
if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
|
||||||
queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ray_index;
|
return ray_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The following functions are to realize Local memory variant of enqueue ray index function */
|
/* The following functions are to realize Local memory variant of enqueue ray index function. */
|
||||||
|
|
||||||
/* All threads should call this function */
|
/* All threads should call this function. */
|
||||||
ccl_device void enqueue_ray_index_local(
|
ccl_device void enqueue_ray_index_local(
|
||||||
int ray_index, /* Ray index to enqueue*/
|
int ray_index, /* Ray index to enqueue. */
|
||||||
int queue_number, /* Queue in which to enqueue ray index */
|
int queue_number, /* Queue in which to enqueue ray index. */
|
||||||
char enqueue_flag, /* True for threads whose ray index has to be enqueued */
|
char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
|
||||||
int queuesize, /* queue size */
|
int queuesize, /* queue size. */
|
||||||
ccl_local unsigned int *local_queue_atomics, /* To to local queue atomics */
|
ccl_local unsigned int *local_queue_atomics, /* To to local queue atomics. */
|
||||||
ccl_global int *Queue_data, /* Queues */
|
ccl_global int *Queue_data, /* Queues. */
|
||||||
ccl_global int *Queue_index /* To do global queue atomics */
|
ccl_global int *Queue_index) /* To do global queue atomics. */
|
||||||
)
|
|
||||||
{
|
{
|
||||||
int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0);
|
int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0);
|
||||||
|
|
||||||
/* Get local queue id */
|
/* Get local queue id .*/
|
||||||
unsigned int lqidx;
|
unsigned int lqidx;
|
||||||
if(enqueue_flag) {
|
if(enqueue_flag) {
|
||||||
lqidx = atomic_inc(local_queue_atomics);
|
lqidx = atomic_inc(local_queue_atomics);
|
||||||
}
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
/* Get global queue offset */
|
/* Get global queue offset. */
|
||||||
if(lidx == 0) {
|
if(lidx == 0) {
|
||||||
*local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics);
|
*local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics);
|
||||||
}
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
/* Get global queue index and enqueue ray */
|
/* Get global queue index and enqueue ray. */
|
||||||
if(enqueue_flag) {
|
if(enqueue_flag) {
|
||||||
unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
|
unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
|
||||||
Queue_data[my_gqidx] = ray_index;
|
Queue_data[my_gqidx] = ray_index;
|
||||||
@ -100,30 +95,28 @@ ccl_device void enqueue_ray_index_local(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ccl_device unsigned int get_local_queue_index(
|
ccl_device unsigned int get_local_queue_index(
|
||||||
int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
|
int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
|
||||||
ccl_local unsigned int *local_queue_atomics
|
ccl_local unsigned int *local_queue_atomics)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]);
|
int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]);
|
||||||
return my_lqidx;
|
return my_lqidx;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device unsigned int get_global_per_queue_offset(
|
ccl_device unsigned int get_global_per_queue_offset(
|
||||||
int queue_number,
|
int queue_number,
|
||||||
ccl_local unsigned int *local_queue_atomics,
|
ccl_local unsigned int *local_queue_atomics,
|
||||||
ccl_global int* global_queue_atomics
|
ccl_global int* global_queue_atomics)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
unsigned int queue_offset = atomic_add((&global_queue_atomics[queue_number]), local_queue_atomics[queue_number]);
|
unsigned int queue_offset = atomic_add(&global_queue_atomics[queue_number],
|
||||||
|
local_queue_atomics[queue_number]);
|
||||||
return queue_offset;
|
return queue_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device unsigned int get_global_queue_index(
|
ccl_device unsigned int get_global_queue_index(
|
||||||
int queue_number,
|
int queue_number,
|
||||||
int queuesize,
|
int queuesize,
|
||||||
unsigned int lqidx,
|
unsigned int lqidx,
|
||||||
ccl_local unsigned int * global_per_queue_offset
|
ccl_local unsigned int * global_per_queue_offset)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
|
int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
|
||||||
return my_gqidx;
|
return my_gqidx;
|
||||||
|
@ -161,15 +161,14 @@ ccl_device char kernel_background_buffer_update(
|
|||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND))
|
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
|
||||||
{
|
|
||||||
#ifdef __BACKGROUND__
|
#ifdef __BACKGROUND__
|
||||||
/* sample background shader */
|
/* sample background shader */
|
||||||
float3 L_background = indirect_background(kg, state, ray, sd);
|
float3 L_background = indirect_background(kg, state, ray, sd);
|
||||||
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
|
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
|
||||||
#endif
|
#endif
|
||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
|
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
|
||||||
|
@ -291,13 +291,13 @@ ccl_device void kernel_data_init(
|
|||||||
|
|
||||||
/* Initialize queue data and queue index. */
|
/* Initialize queue data and queue index. */
|
||||||
if(thread_index < queuesize) {
|
if(thread_index < queuesize) {
|
||||||
/* Initialize active ray queue */
|
/* Initialize active ray queue. */
|
||||||
Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
||||||
/* Initialize background and buffer update queue */
|
/* Initialize background and buffer update queue. */
|
||||||
Queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
Queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
||||||
/* Initialize shadow ray cast of AO queue */
|
/* Initialize shadow ray cast of AO queue. */
|
||||||
Queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
Queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
||||||
/* Initialize shadow ray cast of direct lighting queue */
|
/* Initialize shadow ray cast of direct lighting queue. */
|
||||||
Queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
Queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -316,7 +316,6 @@ ccl_device void kernel_data_init(
|
|||||||
int y = get_global_id(1);
|
int y = get_global_id(1);
|
||||||
|
|
||||||
if(x < (sw * parallel_samples) && y < sh) {
|
if(x < (sw * parallel_samples) && y < sh) {
|
||||||
|
|
||||||
int ray_index = x + y * (sw * parallel_samples);
|
int ray_index = x + y * (sw * parallel_samples);
|
||||||
|
|
||||||
/* This is the first assignment to ray_state;
|
/* This is the first assignment to ray_state;
|
||||||
@ -400,12 +399,10 @@ ccl_device void kernel_data_init(
|
|||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
/* These rays do not participate in path-iteration. */
|
/* These rays do not participate in path-iteration. */
|
||||||
|
|
||||||
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||||
/* Accumulate result in output buffer. */
|
/* Accumulate result in output buffer. */
|
||||||
kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
|
kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
|
||||||
path_rng_end(kg, rng_state, rng_coop[ray_index]);
|
path_rng_end(kg, rng_state, rng_coop[ray_index]);
|
||||||
|
|
||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -127,14 +127,13 @@ ccl_device char kernel_next_iteration_setup(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
||||||
|
|
||||||
ccl_global float3 *throughput = &throughput_coop[ray_index];
|
ccl_global float3 *throughput = &throughput_coop[ray_index];
|
||||||
ccl_global Ray *ray = &Ray_coop[ray_index];
|
ccl_global Ray *ray = &Ray_coop[ray_index];
|
||||||
ccl_global RNG* rng = &rng_coop[ray_index];
|
ccl_global RNG* rng = &rng_coop[ray_index];
|
||||||
state = &PathState_coop[ray_index];
|
state = &PathState_coop[ray_index];
|
||||||
L = &PathRadiance_coop[ray_index];
|
L = &PathRadiance_coop[ray_index];
|
||||||
|
|
||||||
/* compute direct lighting and next bounce */
|
/* Compute direct lighting and next bounce. */
|
||||||
if(!kernel_path_surface_bounce(kg, rng, sd, throughput, state, L, ray)) {
|
if(!kernel_path_surface_bounce(kg, rng, sd, throughput, state, L, ray)) {
|
||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||||
enqueue_flag = 1;
|
enqueue_flag = 1;
|
||||||
|
@ -131,7 +131,8 @@ ccl_device void kernel_scene_intersect(
|
|||||||
if(!hit) {
|
if(!hit) {
|
||||||
/* Change the state of rays that hit the background;
|
/* Change the state of rays that hit the background;
|
||||||
* These rays undergo special processing in the
|
* These rays undergo special processing in the
|
||||||
* background_bufferUpdate kernel*/
|
* background_bufferUpdate kernel.
|
||||||
|
*/
|
||||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
|
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -65,7 +65,7 @@ ccl_device void kernel_shadow_blocked(
|
|||||||
if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
|
if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
|
||||||
IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
|
IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
|
||||||
{
|
{
|
||||||
/* Load kernel global structure */
|
/* Load kernel global structure. */
|
||||||
KernelGlobals *kg = (KernelGlobals *)globals;
|
KernelGlobals *kg = (KernelGlobals *)globals;
|
||||||
ShaderData *sd_shadow = (ShaderData *)shader_shadow;
|
ShaderData *sd_shadow = (ShaderData *)shader_shadow;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user