forked from bartvdbraak/blender
68dd7617d7
Ref D8237, T78710
467 lines
18 KiB
C
467 lines
18 KiB
C
/*
|
|
* Copyright 2011-2013 Blender Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
#ifdef __VOLUME__
|
|
/* Get PathState ready for use for volume stack evaluation. */
|
|
# ifdef __SPLIT_KERNEL__
|
|
ccl_addr_space
|
|
# endif
|
|
ccl_device_inline PathState *
|
|
shadow_blocked_volume_path_state(KernelGlobals *kg,
|
|
VolumeState *volume_state,
|
|
ccl_addr_space PathState *state,
|
|
ShaderData *sd,
|
|
Ray *ray)
|
|
{
|
|
# ifdef __SPLIT_KERNEL__
|
|
ccl_addr_space PathState *ps =
|
|
&kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
|
|
# else
|
|
PathState *ps = &volume_state->ps;
|
|
# endif
|
|
*ps = *state;
|
|
/* We are checking for shadow on the "other" side of the surface, so need
|
|
* to discard volume we are currently at.
|
|
*/
|
|
if (dot(sd->Ng, ray->D) < 0.0f) {
|
|
kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack);
|
|
}
|
|
return ps;
|
|
}
|
|
#endif /* __VOLUME__ */
|
|
|
|
/* Attenuate throughput accordingly to the given intersection event.
|
|
* Returns true if the throughput is zero and traversal can be aborted.
|
|
*/
|
|
ccl_device_forceinline bool shadow_handle_transparent_isect(KernelGlobals *kg,
|
|
ShaderData *shadow_sd,
|
|
ccl_addr_space PathState *state,
|
|
#ifdef __VOLUME__
|
|
ccl_addr_space PathState *volume_state,
|
|
#endif
|
|
Intersection *isect,
|
|
Ray *ray,
|
|
float3 *throughput)
|
|
{
|
|
#ifdef __VOLUME__
|
|
/* Attenuation between last surface and next surface. */
|
|
if (volume_state->volume_stack[0].shader != SHADER_NONE) {
|
|
Ray segment_ray = *ray;
|
|
segment_ray.t = isect->t;
|
|
kernel_volume_shadow(kg, shadow_sd, volume_state, &segment_ray, throughput);
|
|
}
|
|
#endif
|
|
/* Setup shader data at surface. */
|
|
shader_setup_from_ray(kg, shadow_sd, isect, ray);
|
|
/* Attenuation from transparent surface. */
|
|
if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
|
|
path_state_modify_bounce(state, true);
|
|
shader_eval_surface(kg, shadow_sd, state, NULL, PATH_RAY_SHADOW);
|
|
path_state_modify_bounce(state, false);
|
|
*throughput *= shader_bsdf_transparency(kg, shadow_sd);
|
|
}
|
|
/* Stop if all light is blocked. */
|
|
if (is_zero(*throughput)) {
|
|
return true;
|
|
}
|
|
#ifdef __VOLUME__
|
|
/* Exit/enter volume. */
|
|
kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
/* Special version which only handles opaque shadows. */
|
|
ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
|
|
ShaderData *shadow_sd,
|
|
ccl_addr_space PathState *state,
|
|
const uint visibility,
|
|
Ray *ray,
|
|
Intersection *isect,
|
|
float3 *shadow)
|
|
{
|
|
const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
|
|
#ifdef __VOLUME__
|
|
if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
|
|
/* Apply attenuation from current volume shader. */
|
|
kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
|
|
}
|
|
#endif
|
|
return blocked;
|
|
}
|
|
|
|
#ifdef __TRANSPARENT_SHADOWS__
|
|
# ifdef __SHADOW_RECORD_ALL__
|
|
/* Shadow function to compute how much light is blocked,
|
|
*
|
|
* We trace a single ray. If it hits any opaque surface, or more than a given
|
|
* number of transparent surfaces is hit, then we consider the geometry to be
|
|
* entirely blocked. If not, all transparent surfaces will be recorded and we
|
|
* will shade them one by one to determine how much light is blocked. This all
|
|
* happens in one scene intersection function.
|
|
*
|
|
* Recording all hits works well in some cases but may be slower in others. If
|
|
* we have many semi-transparent hairs, one intersection may be faster because
|
|
* you'd be reinteresecting the same hairs a lot with each step otherwise. If
|
|
* however there is mostly binary transparency then we may be recording many
|
|
* unnecessary intersections when one of the first surfaces blocks all light.
|
|
*
|
|
* From tests in real scenes it seems the performance loss is either minimal,
|
|
* or there is a performance increase anyway due to avoiding the need to send
|
|
* two rays with transparent shadows.
|
|
*
|
|
* On CPU it'll handle all transparent bounces (by allocating storage for
|
|
* intersections when they don't fit into the stack storage).
|
|
*
|
|
* On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
|
|
* is something to be kept an eye on.
|
|
*/
|
|
|
|
# define SHADOW_STACK_MAX_HITS 64
|
|
|
|
/* Actual logic with traversal loop implementation which is free from device
|
|
* specific tweaks.
|
|
*
|
|
* Note that hits array should be as big as max_hits+1.
|
|
*/
|
|
ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
|
|
ShaderData *sd,
|
|
ShaderData *shadow_sd,
|
|
ccl_addr_space PathState *state,
|
|
const uint visibility,
|
|
Ray *ray,
|
|
Intersection *hits,
|
|
uint max_hits,
|
|
float3 *shadow)
|
|
{
|
|
/* Intersect to find an opaque surface, or record all transparent
|
|
* surface hits.
|
|
*/
|
|
uint num_hits;
|
|
const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits);
|
|
# ifdef __VOLUME__
|
|
# ifdef __KERNEL_OPTIX__
|
|
VolumeState &volume_state = kg->volume_state;
|
|
# else
|
|
VolumeState volume_state;
|
|
# endif
|
|
# endif
|
|
/* If no opaque surface found but we did find transparent hits,
|
|
* shade them.
|
|
*/
|
|
if (!blocked && num_hits > 0) {
|
|
float3 throughput = one_float3();
|
|
float3 Pend = ray->P + ray->D * ray->t;
|
|
float last_t = 0.0f;
|
|
int bounce = state->transparent_bounce;
|
|
Intersection *isect = hits;
|
|
# ifdef __VOLUME__
|
|
# ifdef __SPLIT_KERNEL__
|
|
ccl_addr_space
|
|
# endif
|
|
PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
|
|
# endif
|
|
sort_intersections(hits, num_hits);
|
|
for (int hit = 0; hit < num_hits; hit++, isect++) {
|
|
/* Adjust intersection distance for moving ray forward. */
|
|
float new_t = isect->t;
|
|
isect->t -= last_t;
|
|
/* Skip hit if we did not move forward, step by step raytracing
|
|
* would have skipped it as well then.
|
|
*/
|
|
if (last_t == new_t) {
|
|
continue;
|
|
}
|
|
last_t = new_t;
|
|
/* Attenuate the throughput. */
|
|
if (shadow_handle_transparent_isect(kg,
|
|
shadow_sd,
|
|
state,
|
|
# ifdef __VOLUME__
|
|
ps,
|
|
# endif
|
|
isect,
|
|
ray,
|
|
&throughput)) {
|
|
return true;
|
|
}
|
|
/* Move ray forward. */
|
|
ray->P = shadow_sd->P;
|
|
if (ray->t != FLT_MAX) {
|
|
ray->D = normalize_len(Pend - ray->P, &ray->t);
|
|
}
|
|
bounce++;
|
|
}
|
|
# ifdef __VOLUME__
|
|
/* Attenuation for last line segment towards light. */
|
|
if (ps->volume_stack[0].shader != SHADER_NONE) {
|
|
kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
|
|
}
|
|
# endif
|
|
*shadow = throughput;
|
|
return is_zero(throughput);
|
|
}
|
|
# ifdef __VOLUME__
|
|
if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
|
|
/* Apply attenuation from current volume shader. */
|
|
# ifdef __SPLIT_KERNEL__
|
|
ccl_addr_space
|
|
# endif
|
|
PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
|
|
kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
|
|
}
|
|
# endif
|
|
return blocked;
|
|
}
|
|
|
|
/* Here we do all device specific trickery before invoking actual traversal
|
|
* loop to help readability of the actual logic.
|
|
*/
|
|
ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
|
|
ShaderData *sd,
|
|
ShaderData *shadow_sd,
|
|
ccl_addr_space PathState *state,
|
|
const uint visibility,
|
|
Ray *ray,
|
|
uint max_hits,
|
|
float3 *shadow)
|
|
{
|
|
# ifdef __SPLIT_KERNEL__
|
|
Intersection hits_[SHADOW_STACK_MAX_HITS];
|
|
Intersection *hits = &hits_[0];
|
|
# elif defined(__KERNEL_CUDA__)
|
|
Intersection *hits = kg->hits_stack;
|
|
# else
|
|
Intersection hits_stack[SHADOW_STACK_MAX_HITS];
|
|
Intersection *hits = hits_stack;
|
|
# endif
|
|
# ifndef __KERNEL_GPU__
|
|
/* Prefer to use stack but use dynamic allocation if too deep max hits
|
|
* we need max_hits + 1 storage space due to the logic in
|
|
* scene_intersect_shadow_all which will first store and then check if
|
|
* the limit is exceeded.
|
|
*
|
|
* Ignore this on GPU because of slow/unavailable malloc().
|
|
*/
|
|
if (max_hits + 1 > SHADOW_STACK_MAX_HITS) {
|
|
if (kg->transparent_shadow_intersections == NULL) {
|
|
const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
|
|
kg->transparent_shadow_intersections = (Intersection *)malloc(sizeof(Intersection) *
|
|
(transparent_max_bounce + 1));
|
|
}
|
|
hits = kg->transparent_shadow_intersections;
|
|
}
|
|
# endif /* __KERNEL_GPU__ */
|
|
/* Invoke actual traversal. */
|
|
return shadow_blocked_transparent_all_loop(
|
|
kg, sd, shadow_sd, state, visibility, ray, hits, max_hits, shadow);
|
|
}
|
|
# endif /* __SHADOW_RECORD_ALL__ */
|
|
|
|
# if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
|
|
/* Shadow function to compute how much light is blocked,
|
|
*
|
|
* Here we raytrace from one transparent surface to the next step by step.
|
|
* To minimize overhead in cases where we don't need transparent shadows, we
|
|
* first trace a regular shadow ray. We check if the hit primitive was
|
|
* potentially transparent, and only in that case start marching. this gives
|
|
* one extra ray cast for the cases were we do want transparency.
|
|
*/
|
|
|
|
/* This function is only implementing device-independent traversal logic
|
|
* which requires some precalculation done.
|
|
*/
|
|
ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
|
|
ShaderData *sd,
|
|
ShaderData *shadow_sd,
|
|
ccl_addr_space PathState *state,
|
|
const uint visibility,
|
|
Ray *ray,
|
|
Intersection *isect,
|
|
const bool blocked,
|
|
const bool is_transparent_isect,
|
|
float3 *shadow)
|
|
{
|
|
# ifdef __VOLUME__
|
|
# ifdef __KERNEL_OPTIX__
|
|
VolumeState &volume_state = kg->volume_state;
|
|
# else
|
|
VolumeState volume_state;
|
|
# endif
|
|
# endif
|
|
if (blocked && is_transparent_isect) {
|
|
float3 throughput = one_float3();
|
|
float3 Pend = ray->P + ray->D * ray->t;
|
|
int bounce = state->transparent_bounce;
|
|
# ifdef __VOLUME__
|
|
# ifdef __SPLIT_KERNEL__
|
|
ccl_addr_space
|
|
# endif
|
|
PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
|
|
# endif
|
|
for (;;) {
|
|
if (bounce >= kernel_data.integrator.transparent_max_bounce) {
|
|
return true;
|
|
}
|
|
if (!scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) {
|
|
break;
|
|
}
|
|
if (!shader_transparent_shadow(kg, isect)) {
|
|
return true;
|
|
}
|
|
/* Attenuate the throughput. */
|
|
if (shadow_handle_transparent_isect(kg,
|
|
shadow_sd,
|
|
state,
|
|
# ifdef __VOLUME__
|
|
ps,
|
|
# endif
|
|
isect,
|
|
ray,
|
|
&throughput)) {
|
|
return true;
|
|
}
|
|
/* Move ray forward. */
|
|
ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
|
|
if (ray->t != FLT_MAX) {
|
|
ray->D = normalize_len(Pend - ray->P, &ray->t);
|
|
}
|
|
bounce++;
|
|
}
|
|
# ifdef __VOLUME__
|
|
/* Attenuation for last line segment towards light. */
|
|
if (ps->volume_stack[0].shader != SHADER_NONE) {
|
|
kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
|
|
}
|
|
# endif
|
|
*shadow *= throughput;
|
|
return is_zero(throughput);
|
|
}
|
|
# ifdef __VOLUME__
|
|
if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
|
|
/* Apply attenuation from current volume shader. */
|
|
# ifdef __SPLIT_KERNEL__
|
|
ccl_addr_space
|
|
# endif
|
|
PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
|
|
kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
|
|
}
|
|
# endif
|
|
return blocked;
|
|
}
|
|
|
|
ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg,
|
|
ShaderData *sd,
|
|
ShaderData *shadow_sd,
|
|
ccl_addr_space PathState *state,
|
|
const uint visibility,
|
|
Ray *ray,
|
|
Intersection *isect,
|
|
float3 *shadow)
|
|
{
|
|
bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
|
|
bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false;
|
|
return shadow_blocked_transparent_stepped_loop(
|
|
kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow);
|
|
}
|
|
|
|
# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
|
|
#endif /* __TRANSPARENT_SHADOWS__ */
|
|
|
|
ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
|
|
ShaderData *sd,
|
|
ShaderData *shadow_sd,
|
|
ccl_addr_space PathState *state,
|
|
Ray *ray,
|
|
float3 *shadow)
|
|
{
|
|
*shadow = one_float3();
|
|
#if !defined(__KERNEL_OPTIX__)
|
|
/* Some common early checks.
|
|
* Avoid conditional trace call in OptiX though, since those hurt performance there.
|
|
*/
|
|
if (ray->t == 0.0f) {
|
|
return false;
|
|
}
|
|
#endif
|
|
#ifdef __SHADOW_TRICKS__
|
|
const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER :
|
|
PATH_RAY_SHADOW;
|
|
#else
|
|
const uint visibility = PATH_RAY_SHADOW;
|
|
#endif
|
|
/* Do actual shadow shading.
|
|
* First of all, we check if integrator requires transparent shadows.
|
|
* if not, we use simplest and fastest ever way to calculate occlusion.
|
|
* Do not do this in OptiX to avoid the additional trace call.
|
|
*/
|
|
#if !defined(__KERNEL_OPTIX__) || !defined(__TRANSPARENT_SHADOWS__)
|
|
Intersection isect;
|
|
# ifdef __TRANSPARENT_SHADOWS__
|
|
if (!kernel_data.integrator.transparent_shadows)
|
|
# endif
|
|
{
|
|
return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow);
|
|
}
|
|
#endif
|
|
#ifdef __TRANSPARENT_SHADOWS__
|
|
# ifdef __SHADOW_RECORD_ALL__
|
|
/* For the transparent shadows we try to use record-all logic on the
|
|
* devices which supports this.
|
|
*/
|
|
const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
|
|
/* Check transparent bounces here, for volume scatter which can do
|
|
* lighting before surface path termination is checked.
|
|
*/
|
|
if (state->transparent_bounce >= transparent_max_bounce) {
|
|
return true;
|
|
}
|
|
uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
|
|
# if defined(__KERNEL_OPTIX__)
|
|
/* Always use record-all behavior in OptiX, but ensure there are no out of bounds
|
|
* accesses to the hit stack.
|
|
*/
|
|
max_hits = min(max_hits, SHADOW_STACK_MAX_HITS - 1);
|
|
# elif defined(__KERNEL_GPU__)
|
|
/* On GPU we do tricky with tracing opaque ray first, this avoids speed
|
|
* regressions in some files.
|
|
*
|
|
* TODO(sergey): Check why using record-all behavior causes slowdown in such
|
|
* cases. Could that be caused by a higher spill pressure?
|
|
*/
|
|
const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect);
|
|
const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false;
|
|
if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) {
|
|
return shadow_blocked_transparent_stepped_loop(
|
|
kg, sd, shadow_sd, state, visibility, ray, &isect, blocked, is_transparent_isect, shadow);
|
|
}
|
|
# endif /* __KERNEL_GPU__ */
|
|
return shadow_blocked_transparent_all(
|
|
kg, sd, shadow_sd, state, visibility, ray, max_hits, shadow);
|
|
# else /* __SHADOW_RECORD_ALL__ */
|
|
/* Fallback to a slowest version which works on all devices. */
|
|
return shadow_blocked_transparent_stepped(
|
|
kg, sd, shadow_sd, state, visibility, ray, &isect, shadow);
|
|
# endif /* __SHADOW_RECORD_ALL__ */
|
|
#endif /* __TRANSPARENT_SHADOWS__ */
|
|
}
|
|
|
|
#undef SHADOW_STACK_MAX_HITS
|
|
|
|
CCL_NAMESPACE_END
|