diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index ec8c297fbd5..3319e2c2435 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -84,7 +84,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, light_ray.dP = sd->dP; light_ray.dD = differential3_zero(); - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow); } else { diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index c62c3a25405..dde40674ee6 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -54,7 +54,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, light_ray.dP = sd->dP; light_ray.dD = differential3_zero(); - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { path_radiance_accum_ao(L, state, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow); } else { diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 3d10736e90c..6c3a444e48a 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -67,7 +67,7 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light( /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); } @@ -104,7 +104,7 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light( /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); } @@ -130,7 +130,7 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light( /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, is_lamp); } @@ -257,7 +257,7 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); } diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index 3661432f0b7..c9c7f447c42 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -52,7 +52,7 @@ ccl_device_inline void kernel_path_volume_connect_light( /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); } @@ -179,7 +179,7 @@ ccl_device void kernel_branched_path_volume_connect_light( /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, tp*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); } @@ -228,7 +228,7 @@ ccl_device void kernel_branched_path_volume_connect_light( /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, tp*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); } @@ -266,7 +266,7 @@ ccl_device void kernel_branched_path_volume_connect_light( /* trace shadow ray */ float3 shadow; - if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) { + if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { /* accumulate */ path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp); } diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index bb6bdc7fbd0..b409aca7674 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -119,12 +119,46 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, # define SHADOW_STACK_MAX_HITS 64 +# ifdef __VOLUME__ +struct VolumeState { +# ifdef __SPLIT_KERNEL__ +# else + PathState ps; +# endif +}; + +/* Get PathState ready for use for volume stack evaluation. */ +ccl_device_inline PathState *shadow_blocked_volume_path_state( + KernelGlobals *kg, + VolumeState *volume_state, + ccl_addr_space PathState *state, + ShaderData *sd, + Ray *ray) +{ +# ifdef __SPLIT_KERNEL__ + ccl_addr_space PathState *ps = + &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; +# else + PathState *ps = &volume_state->ps; +# endif + *ps = *state; + /* We are checking for shadow on the "other" side of the surface, so need + * to discard volume we are currently at. + */ + if(dot(sd->Ng, ray->D) < 0.0f) { + kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack); + } + return ps; +} +#endif // __VOLUME__ + /* Actual logic with traversal loop implementation which is free from device * specific tweaks. * * Note that hits array should be as big as max_hits+1. */ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, const uint visibility, @@ -143,6 +177,9 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, visibility, max_hits, &num_hits); +# ifdef __VOLUME__ + VolumeState volume_state; +# endif /* If no opaque surface found but we did find transparent hits, * shade them. */ @@ -153,13 +190,11 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, int bounce = state->transparent_bounce; Intersection *isect = hits; # ifdef __VOLUME__ -# ifdef __SPLIT_KERNEL__ - ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; -# else - PathState ps_object; - PathState *ps = &ps_object; -# endif - *ps = *state; + PathState *ps = shadow_blocked_volume_path_state(kg, + &volume_state, + state, + sd, + ray); # endif sort_intersections(hits, num_hits); for(int hit = 0; hit < num_hits; hit++, isect++) { @@ -205,7 +240,12 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, # ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ - kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); + PathState *ps = shadow_blocked_volume_path_state(kg, + &volume_state, + state, + sd, + ray); + kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow); } # endif return blocked; @@ -215,6 +255,7 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, * loop to help readability of the actual logic. */ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, const uint visibility, @@ -250,6 +291,7 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, # endif /* __KERNEL_GPU__ */ /* Invoke actual traversal. */ return shadow_blocked_transparent_all_loop(kg, + sd, shadow_sd, state, visibility, @@ -275,6 +317,7 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, */ ccl_device bool shadow_blocked_transparent_stepped_loop( KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, const uint visibility, @@ -284,18 +327,18 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( const bool is_transparent_isect, float3 *shadow) { +# ifdef __VOLUME__ + VolumeState volume_state; +# endif if(blocked && is_transparent_isect) { float3 throughput = make_float3(1.0f, 1.0f, 1.0f); float3 Pend = ray->P + ray->D*ray->t; int bounce = state->transparent_bounce; # ifdef __VOLUME__ -# ifdef __SPLIT_KERNEL__ - ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; -# else - PathState ps_object; - PathState *ps = &ps_object; -# endif - *ps = *state; + PathState *ps = shadow_blocked_volume_path_state(kg, + &volume_state, + state, + sd); # endif for(;;) { if(bounce >= kernel_data.integrator.transparent_max_bounce) { @@ -345,7 +388,11 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( # ifdef __VOLUME__ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ - kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); + PathState *ps = shadow_blocked_volume_path_state(kg, + &volume_state, + state, + sd); + kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow); } # endif return blocked; @@ -353,6 +400,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( ccl_device bool shadow_blocked_transparent_stepped( KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, const uint visibility, @@ -370,6 +418,7 @@ ccl_device bool shadow_blocked_transparent_stepped( ? shader_transparent_shadow(kg, isect) : false; return shadow_blocked_transparent_stepped_loop(kg, + sd, shadow_sd, state, visibility, @@ -384,6 +433,7 @@ ccl_device bool shadow_blocked_transparent_stepped( #endif /* __TRANSPARENT_SHADOWS__ */ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, + ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, Ray *ray_input, @@ -452,6 +502,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, max_hits + 1 >= SHADOW_STACK_MAX_HITS) { return shadow_blocked_transparent_stepped_loop(kg, + sd, shadow_sd, state, visibility, @@ -463,6 +514,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, } # endif /* __KERNEL_GPU__ */ return shadow_blocked_transparent_all(kg, + sd, shadow_sd, state, visibility, diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index 19bfee6d039..b52f9a5eb81 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -89,6 +89,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) float3 shadow; if(!shadow_blocked(kg, + sd, emission_sd, state, &ray,