blender/intern/cycles/kernel/kernel_queues.h
Sergey Sharybin cb4b5e12ab Cycles: Cleanup, spacing after preprocessor
It is supposed to be two spaces before comment stating which if
else/endif statements corresponds to. Was mainly violated in the
header guards.
2018-11-09 11:34:54 +01:00

149 lines
5.2 KiB
C

/*
* Copyright 2011-2015 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __KERNEL_QUEUE_H__
#define __KERNEL_QUEUE_H__
CCL_NAMESPACE_BEGIN
/*
* Queue utility functions for split kernel
*/
#ifdef __KERNEL_OPENCL__
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#endif
/*
* Enqueue ray index into the queue
*/
ccl_device void enqueue_ray_index(
int ray_index, /* Ray index to be enqueued. */
int queue_number, /* Queue in which the ray index should be enqueued. */
ccl_global int *queues, /* Buffer of all queues. */
int queue_size, /* Size of each queue. */
ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
{
/* This thread's queue index. */
int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint*)&queue_index[queue_number])
+ (queue_number * queue_size);
queues[my_queue_index] = ray_index;
}
/*
* Get the ray index for this thread
* Returns a positive ray_index for threads that have to do some work;
* Returns 'QUEUE_EMPTY_SLOT' for threads that don't have any work
* i.e All ray's in the queue has been successfully allocated and there
* is no more ray to allocate to other threads.
*/
ccl_device int get_ray_index(
KernelGlobals *kg,
int thread_index, /* Global thread index. */
int queue_number, /* Queue to operate on. */
ccl_global int *queues, /* Buffer of all queues. */
int queuesize, /* Size of a queue. */
int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */
{
int ray_index = queues[queue_number * queuesize + thread_index];
if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
}
return ray_index;
}
/* The following functions are to realize Local memory variant of enqueue ray index function. */
/* All threads should call this function. */
ccl_device void enqueue_ray_index_local(
int ray_index, /* Ray index to enqueue. */
int queue_number, /* Queue in which to enqueue ray index. */
char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
int queuesize, /* queue size. */
ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */
ccl_global int *Queue_data, /* Queues. */
ccl_global int *Queue_index) /* To do global queue atomics. */
{
int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
/* Get local queue id .*/
unsigned int lqidx;
if(enqueue_flag) {
lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics);
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
/* Get global queue offset. */
if(lidx == 0) {
*local_queue_atomics = atomic_fetch_and_add_uint32((ccl_global uint*)&Queue_index[queue_number],
*local_queue_atomics);
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
/* Get global queue index and enqueue ray. */
if(enqueue_flag) {
unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
Queue_data[my_gqidx] = ray_index;
}
}
ccl_device unsigned int get_local_queue_index(
int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
ccl_local_param unsigned int *local_queue_atomics)
{
int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]);
return my_lqidx;
}
ccl_device unsigned int get_global_per_queue_offset(
int queue_number,
ccl_local_param unsigned int *local_queue_atomics,
ccl_global int* global_queue_atomics)
{
unsigned int queue_offset = atomic_fetch_and_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number],
local_queue_atomics[queue_number]);
return queue_offset;
}
ccl_device unsigned int get_global_queue_index(
int queue_number,
int queuesize,
unsigned int lqidx,
ccl_local_param unsigned int * global_per_queue_offset)
{
int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
return my_gqidx;
}
ccl_device int dequeue_ray_index(
int queue_number,
ccl_global int *queues,
int queue_size,
ccl_global int *queue_index)
{
int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1;
if(index < 0) {
return QUEUE_EMPTY_SLOT;
}
return queues[index + queue_number * queue_size];
}
CCL_NAMESPACE_END
#endif // __KERNEL_QUEUE_H__