blender/intern/cycles/kernel/kernel_holdout_emission_blurring_pathtermination_ao.cl

284 lines
13 KiB
Common Lisp
Raw Normal View History

2015-05-09 14:34:30 +00:00
/*
* Copyright 2011-2015 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel_split.h"
/*
* Note on kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao kernel.
* This is the sixth kernel in the ray tracing logic. This is the fifth
* of the path iteration kernels. This kernel takes care of the logic to process
* "material of type holdout", indirect primitive emission, bsdf blurring,
* probabilistic path termination and AO.
*
* This kernels determines the rays for which a shadow_blocked() function associated with AO should be executed.
* Those rays for which a shadow_blocked() function for AO must be executed are marked with flag RAY_SHADOW_RAY_CAST_ao and
* enqueued into the queue QUEUE_SHADOW_RAY_CAST_AO_RAYS
*
* Ray state of rays that are terminated in this kernel are changed to RAY_UPDATE_BUFFER
*
* The input and output are as follows,
*
* rng_coop ---------------------------------------------|--- kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
* throughput_coop --------------------------------------| |--- PathState_coop
* PathRadiance_coop ------------------------------------| |--- throughput_coop
* Intersection_coop ------------------------------------| |--- L_transparent_coop
* PathState_coop ---------------------------------------| |--- per_sample_output_buffers
* L_transparent_coop -----------------------------------| |--- PathRadiance_coop
* shader_data ------------------------------------------| |--- ShaderData
* ray_state --------------------------------------------| |--- ray_state
* Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
* Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- AOAlpha_coop
* kg (globals + data) ----------------------------------| |--- AOBSDF_coop
* parallel_samples -------------------------------------| |--- AOLightRay_coop
* per_sample_output_buffers ----------------------------| |
* sw ---------------------------------------------------| |
* sh ---------------------------------------------------| |
* sx ---------------------------------------------------| |
* sy ---------------------------------------------------| |
* stride -----------------------------------------------| |
* work_array -------------------------------------------| |
* queuesize --------------------------------------------| |
* start_sample -----------------------------------------| |
*
* Note on Queues :
* This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only
* the rays of state RAY_ACTIVE.
* There are different points in this kernel where a ray may terminate and reach RAY_UPDATE_BUFFER
* state. These rays are enqueued into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. These rays will
* still be present in QUEUE_ACTIVE_AND_REGENERATED_RAYS queue, but since their ray-state has been
* changed to RAY_UPDATE_BUFFER, there is no problem.
*
* State of queues when this kernel is called :
* At entry,
* QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
* QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays.
* QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty.
* At exit,
* QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and RAY_UPDATE_BUFFER rays
* QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays
* QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO
*/
__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao(
ccl_global char *globals,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */
ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */
ccl_global float *L_transparent_coop, /* Required for handling holdout material */
PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */
ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */
Intersection *Intersection_coop, /* Required for indirect primitive emission */
ccl_global float3 *AOAlpha_coop, /* Required for AO */
ccl_global float3 *AOBSDF_coop, /* Required for AO */
ccl_global Ray *AOLightRay_coop, /* Required for AO */
int sw, int sh, int sx, int sy, int stride,
ccl_global char *ray_state, /* Denotes the state of each ray */
ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */
ccl_global int *Queue_data, /* Queue memory */
ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
int queuesize, /* Size (capacity) of each queue */
#ifdef __WORK_STEALING__
unsigned int start_sample,
#endif
int parallel_samples /* Number of samples to be processed in parallel */
)
{
ccl_local unsigned int local_queue_atomics_bg;
ccl_local unsigned int local_queue_atomics_ao;
if(get_local_id(0) == 0 && get_local_id(1) == 0) {
local_queue_atomics_bg = 0;
local_queue_atomics_ao = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
char enqueue_flag = 0;
char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
ray_index = get_ray_index(ray_index, QUEUE_ACTIVE_AND_REGENERATED_RAYS, Queue_data, queuesize, 0);
#ifdef __COMPUTE_DEVICE_GPU__
/* If we are executing on a GPU device, we exit all threads that are not required
* If we are executing on a CPU device, then we need to keep all threads active
* since we have barrier() calls later in the kernel. CPU devices
* expect all threads to execute barrier statement.
*/
if(ray_index == QUEUE_EMPTY_SLOT)
return;
#endif
#ifndef __COMPUTE_DEVICE_GPU__
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
/* Load kernel globals structure and ShaderData structure */
KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data;
#ifdef __WORK_STEALING__
unsigned int my_work;
unsigned int pixel_x;
unsigned int pixel_y;
#endif
unsigned int tile_x;
unsigned int tile_y;
int my_sample_tile;
unsigned int sample;
ccl_global RNG *rng = 0x0;
ccl_global PathState *state = 0x0;
float3 throughput;
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
throughput = throughput_coop[ray_index];
state = &PathState_coop[ray_index];
rng = &rng_coop[ray_index];
#ifdef __WORK_STEALING__
my_work = work_array[ray_index];
sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample;
get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index);
my_sample_tile = 0;
#else // __WORK_STEALING__
sample = work_array[ray_index];
/* buffer's stride is "stride"; Find x and y using ray_index */
int tile_index = ray_index / parallel_samples;
tile_x = tile_index % sw;
tile_y = tile_index / sw;
my_sample_tile = ray_index - (tile_index * parallel_samples);
#endif // __WORK_STEALING__
per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride;
/* holdout */
#ifdef __HOLDOUT__
if((ccl_fetch(sd, flag) & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state->flag & PATH_RAY_CAMERA)) {
if(kernel_data.background.transparent) {
float3 holdout_weight;
if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK)
holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
else
holdout_weight = shader_holdout_eval(kg, sd);
/* any throughput is ok, should all be identical here */
L_transparent_coop[ray_index] += average(holdout_weight*throughput);
}
if(ccl_fetch(sd, flag) & SD_HOLDOUT_MASK) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
enqueue_flag = 1;
}
}
#endif
}
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
PathRadiance *L = &PathRadiance_coop[ray_index];
/* holdout mask objects do not write data passes */
kernel_write_data_passes(kg, per_sample_output_buffers, L, sd, sample, state, throughput);
/* blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy) */
if(kernel_data.integrator.filter_glossy != FLT_MAX) {
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
shader_bsdf_blur(kg, sd, blur_roughness);
}
}
#ifdef __EMISSION__
/* emission */
if(ccl_fetch(sd, flag) & SD_EMISSION) {
/* todo: is isect.t wrong here for transparent surfaces? */
float3 emission = indirect_primitive_emission(kg, sd, Intersection_coop[ray_index].t, state->flag, state->ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
#endif
/* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */
float probability = path_state_terminate_probability(kg, state, throughput);
if(probability == 0.0f) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
enqueue_flag = 1;
}
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
if(probability != 1.0f) {
float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
if(terminate >= probability) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
enqueue_flag = 1;
} else {
throughput_coop[ray_index] = throughput/probability;
}
}
}
}
#ifdef __AO__
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (ccl_fetch(sd, flag) & SD_AO)) {
/* todo: solve correlation */
float bsdf_u, bsdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
float ao_factor = kernel_data.background.ao_factor;
float3 ao_N;
AOBSDF_coop[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
AOAlpha_coop[ray_index] = shader_bsdf_alpha(kg, sd);
float3 ao_D;
float ao_pdf;
sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
Ray _ray;
_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
_ray.D = ao_D;
_ray.t = kernel_data.background.ao_distance;
#ifdef __OBJECT_MOTION__
_ray.time = ccl_fetch(sd, time);
#endif
_ray.dP = ccl_fetch(sd, dP);
_ray.dD = differential3_zero();
AOLightRay_coop[ray_index] = _ray;
ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
enqueue_flag_AO_SHADOW_RAY_CAST = 1;
}
}
}
#endif
#ifndef __COMPUTE_DEVICE_GPU__
}
#endif
/* Enqueue RAY_UPDATE_BUFFER rays */
enqueue_ray_index_local(ray_index, QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, queuesize, &local_queue_atomics_bg, Queue_data, Queue_index);
#ifdef __AO__
/* Enqueue to-shadow-ray-cast rays */
enqueue_ray_index_local(ray_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, enqueue_flag_AO_SHADOW_RAY_CAST, queuesize, &local_queue_atomics_ao, Queue_data, Queue_index);
#endif
}