forked from bartvdbraak/blender
Cycles CUDA: reduce branched path stack memory by sharing indirect ShaderData.
Saves about 15% for the branched path kernel.
This commit is contained in:
parent
7928030eff
commit
b49185df99
@ -30,8 +30,8 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
|
||||
Ray ray;
|
||||
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
|
||||
|
||||
/* emission shader data memory used by various functions */
|
||||
ShaderData emission_sd;
|
||||
/* emission and indirect shader data memory used by various functions */
|
||||
ShaderData emission_sd, indirect_sd;
|
||||
|
||||
ray.P = sd->P + sd->Ng;
|
||||
ray.D = -sd->Ng;
|
||||
@ -94,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
|
||||
&L_sample,
|
||||
&throughput);
|
||||
kernel_path_indirect(kg,
|
||||
&indirect_sd,
|
||||
&emission_sd,
|
||||
&rng,
|
||||
&ray,
|
||||
@ -117,7 +118,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
|
||||
state.ray_t = 0.0f;
|
||||
#endif
|
||||
/* compute indirect light */
|
||||
kernel_path_indirect(kg, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);
|
||||
kernel_path_indirect(kg, &indirect_sd, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);
|
||||
|
||||
/* sum and reset indirect light pass variables for the next samples */
|
||||
path_radiance_sum_indirect(&L_sample);
|
||||
@ -144,7 +145,8 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
|
||||
/* sample subsurface scattering */
|
||||
if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
|
||||
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
|
||||
kernel_branched_path_subsurface_scatter(kg, sd, &emission_sd, &L_sample, &state, &rng, &ray, throughput);
|
||||
kernel_branched_path_subsurface_scatter(kg, sd, &indirect_sd,
|
||||
&emission_sd, &L_sample, &state, &rng, &ray, throughput);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -161,7 +163,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
|
||||
|
||||
/* indirect light */
|
||||
kernel_branched_path_surface_indirect_light(kg, &rng,
|
||||
sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
|
||||
sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -53,6 +53,7 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
RNG *rng,
|
||||
Ray *ray,
|
||||
@ -61,9 +62,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
PathState *state,
|
||||
PathRadiance *L)
|
||||
{
|
||||
/* shader data memory used for both volumes and surfaces, saves stack space */
|
||||
ShaderData sd;
|
||||
|
||||
/* path iteration */
|
||||
for(;;) {
|
||||
/* intersect scene */
|
||||
@ -121,12 +119,12 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
VolumeSegment volume_segment;
|
||||
|
||||
shader_setup_from_volume(kg,
|
||||
&sd,
|
||||
sd,
|
||||
&volume_ray);
|
||||
kernel_volume_decoupled_record(kg,
|
||||
state,
|
||||
&volume_ray,
|
||||
&sd,
|
||||
sd,
|
||||
&volume_segment,
|
||||
heterogeneous);
|
||||
|
||||
@ -149,7 +147,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
/* direct light sampling */
|
||||
kernel_branched_path_volume_connect_light(kg,
|
||||
rng,
|
||||
&sd,
|
||||
sd,
|
||||
emission_sd,
|
||||
throughput,
|
||||
state,
|
||||
@ -167,7 +165,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
result = kernel_volume_decoupled_scatter(kg,
|
||||
state,
|
||||
&volume_ray,
|
||||
&sd,
|
||||
sd,
|
||||
&throughput,
|
||||
rphase,
|
||||
rscatter,
|
||||
@ -182,7 +180,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
if(result == VOLUME_PATH_SCATTERED) {
|
||||
if(kernel_path_volume_bounce(kg,
|
||||
rng,
|
||||
&sd,
|
||||
sd,
|
||||
&throughput,
|
||||
state,
|
||||
L,
|
||||
@ -203,14 +201,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
{
|
||||
/* integrate along volume segment with distance sampling */
|
||||
VolumeIntegrateResult result = kernel_volume_integrate(
|
||||
kg, state, &sd, &volume_ray, L, &throughput, rng, heterogeneous);
|
||||
kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);
|
||||
|
||||
# ifdef __VOLUME_SCATTER__
|
||||
if(result == VOLUME_PATH_SCATTERED) {
|
||||
/* direct lighting */
|
||||
kernel_path_volume_connect_light(kg,
|
||||
rng,
|
||||
&sd,
|
||||
sd,
|
||||
emission_sd,
|
||||
throughput,
|
||||
state,
|
||||
@ -219,7 +217,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
/* indirect light bounce */
|
||||
if(kernel_path_volume_bounce(kg,
|
||||
rng,
|
||||
&sd,
|
||||
sd,
|
||||
&throughput,
|
||||
state,
|
||||
L,
|
||||
@ -251,13 +249,13 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
|
||||
/* setup shading */
|
||||
shader_setup_from_ray(kg,
|
||||
&sd,
|
||||
sd,
|
||||
&isect,
|
||||
ray);
|
||||
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
|
||||
shader_eval_surface(kg, &sd, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
|
||||
shader_eval_surface(kg, sd, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
|
||||
#ifdef __BRANCHED_PATH__
|
||||
shader_merge_closures(&sd);
|
||||
shader_merge_closures(sd);
|
||||
#endif
|
||||
|
||||
/* blurring of bsdf after bounces, for rays that have a small likelihood
|
||||
@ -267,15 +265,15 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
|
||||
if(blur_pdf < 1.0f) {
|
||||
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
|
||||
shader_bsdf_blur(kg, &sd, blur_roughness);
|
||||
shader_bsdf_blur(kg, sd, blur_roughness);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __EMISSION__
|
||||
/* emission */
|
||||
if(sd.flag & SD_EMISSION) {
|
||||
if(sd->flag & SD_EMISSION) {
|
||||
float3 emission = indirect_primitive_emission(kg,
|
||||
&sd,
|
||||
sd,
|
||||
isect.t,
|
||||
state->flag,
|
||||
state->ray_pdf);
|
||||
@ -305,30 +303,30 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
|
||||
#ifdef __AO__
|
||||
/* ambient occlusion */
|
||||
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
|
||||
if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
|
||||
float bsdf_u, bsdf_v;
|
||||
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
|
||||
|
||||
float ao_factor = kernel_data.background.ao_factor;
|
||||
float3 ao_N;
|
||||
float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
|
||||
float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
|
||||
float3 ao_D;
|
||||
float ao_pdf;
|
||||
float3 ao_alpha = make_float3(0.0f, 0.0f, 0.0f);
|
||||
|
||||
sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
|
||||
|
||||
if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
|
||||
if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
|
||||
Ray light_ray;
|
||||
float3 ao_shadow;
|
||||
|
||||
light_ray.P = ray_offset(sd.P, sd.Ng);
|
||||
light_ray.P = ray_offset(sd->P, sd->Ng);
|
||||
light_ray.D = ao_D;
|
||||
light_ray.t = kernel_data.background.ao_distance;
|
||||
# ifdef __OBJECT_MOTION__
|
||||
light_ray.time = sd.time;
|
||||
light_ray.time = sd->time;
|
||||
# endif
|
||||
light_ray.dP = sd.dP;
|
||||
light_ray.dP = sd->dP;
|
||||
light_ray.dD = differential3_zero();
|
||||
|
||||
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
|
||||
@ -346,9 +344,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
#ifdef __SUBSURFACE__
|
||||
/* bssrdf scatter to a different location on the same object, replacing
|
||||
* the closures with a diffuse BSDF */
|
||||
if(sd.flag & SD_BSSRDF) {
|
||||
if(sd->flag & SD_BSSRDF) {
|
||||
float bssrdf_probability;
|
||||
ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
|
||||
ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
|
||||
|
||||
/* modify throughput for picking bssrdf or bsdf */
|
||||
throughput *= bssrdf_probability;
|
||||
@ -364,7 +362,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
PRNG_BSDF_U,
|
||||
&bssrdf_u, &bssrdf_v);
|
||||
subsurface_scatter_step(kg,
|
||||
&sd,
|
||||
sd,
|
||||
state,
|
||||
state->flag,
|
||||
sc,
|
||||
@ -380,7 +378,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
int all = kernel_data.integrator.sample_all_lights_indirect;
|
||||
kernel_branched_path_surface_connect_light(kg,
|
||||
rng,
|
||||
&sd,
|
||||
sd,
|
||||
emission_sd,
|
||||
state,
|
||||
throughput,
|
||||
@ -390,7 +388,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
}
|
||||
#endif
|
||||
|
||||
if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, state, L, ray))
|
||||
if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -64,8 +64,8 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
|
||||
|
||||
/* bounce off surface and integrate indirect light */
|
||||
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
|
||||
RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput,
|
||||
float num_samples_adjust, PathState *state, PathRadiance *L)
|
||||
RNG *rng, ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd,
|
||||
float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L)
|
||||
{
|
||||
for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
|
||||
const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
|
||||
@ -112,6 +112,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
|
||||
}
|
||||
|
||||
kernel_path_indirect(kg,
|
||||
indirect_sd,
|
||||
emission_sd,
|
||||
rng,
|
||||
&bsdf_ray,
|
||||
@ -131,6 +132,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *indirect_sd,
|
||||
ShaderData *emission_sd,
|
||||
PathRadiance *L,
|
||||
PathState *state,
|
||||
@ -222,6 +224,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
||||
kg,
|
||||
rng,
|
||||
&bssrdf_sd,
|
||||
indirect_sd,
|
||||
emission_sd,
|
||||
throughput,
|
||||
num_samples_inv,
|
||||
@ -244,8 +247,8 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
|
||||
|
||||
/* shader data memory used for both volumes and surfaces, saves stack space */
|
||||
ShaderData sd;
|
||||
/* shader data used by emission, shadows, volume stacks */
|
||||
ShaderData emission_sd;
|
||||
/* shader data used by emission, shadows, volume stacks, indirect path */
|
||||
ShaderData emission_sd, indirect_sd;
|
||||
|
||||
PathState state;
|
||||
path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
|
||||
@ -356,6 +359,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
|
||||
&pray))
|
||||
{
|
||||
kernel_path_indirect(kg,
|
||||
&indirect_sd,
|
||||
&emission_sd,
|
||||
rng,
|
||||
&pray,
|
||||
@ -413,6 +417,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
|
||||
&pray))
|
||||
{
|
||||
kernel_path_indirect(kg,
|
||||
&indirect_sd,
|
||||
&emission_sd,
|
||||
rng,
|
||||
&pray,
|
||||
@ -522,8 +527,8 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
|
||||
#ifdef __SUBSURFACE__
|
||||
/* bssrdf scatter to a different location on the same object */
|
||||
if(sd.flag & SD_BSSRDF) {
|
||||
kernel_branched_path_subsurface_scatter(kg, &sd, &emission_sd, &L, &state,
|
||||
rng, &ray, throughput);
|
||||
kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd,
|
||||
&L, &state, rng, &ray, throughput);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -541,7 +546,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
|
||||
|
||||
/* indirect light */
|
||||
kernel_branched_path_surface_indirect_light(kg, rng,
|
||||
&sd, &emission_sd, throughput, 1.0f, &hit_state, &L);
|
||||
&sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, &L);
|
||||
|
||||
/* continue in case of transparency */
|
||||
throughput *= shader_bsdf_transparency(kg, &sd);
|
||||
|
Loading…
Reference in New Issue
Block a user