diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index e6b3dd7521b..81f47db41a6 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -261,7 +261,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g ShaderData volume_sd; VolumeIntegrateResult result = kernel_volume_integrate(kg, &state, - &volume_sd, &volume_ray, L, &throughput, rng, false); + &volume_sd, &volume_ray, L, &throughput, rng); if(result == VOLUME_PATH_SCATTERED) { if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f)) @@ -650,7 +650,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, ShaderData volume_sd; VolumeIntegrateResult result = kernel_volume_integrate(kg, &state, - &volume_sd, &volume_ray, &L, &throughput, rng, false); + &volume_sd, &volume_ray, &L, &throughput, rng); if(result == VOLUME_PATH_SCATTERED) { if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f)) @@ -1090,6 +1090,60 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in Ray volume_ray = ray; volume_ray.t = (hit)? isect.t: FLT_MAX; +#ifdef __KERNEL_CPU__ + /* decoupled ray marching only supported on CPU */ + bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); + + /* cache steps along volume for repeated sampling */ + VolumeSegment volume_segment; + ShaderData volume_sd; + + shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce); + kernel_volume_decoupled_record(kg, &state, + &volume_ray, &volume_sd, &volume_segment, heterogeneous); + + /* sample scattering */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f/num_samples; + + for(int j = 0; j < num_samples; j++) { + /* workaround to fix correlation bug in T38710, can find better solution + * in random number generator later, for now this is done here to not impact + * performance of rendering without volumes */ + RNG tmp_rng = cmj_hash(*rng, state.rng_offset); + + PathState ps = state; + Ray pray = ray; + float3 tp = throughput; + + /* branch RNG state */ + path_state_branch(&ps, j, num_samples); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + &ps, &volume_ray, &volume_sd, &tp, &tmp_rng, &volume_segment); + + if(result == VOLUME_PATH_SCATTERED) { + /* todo: use all-light sampling */ + if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) { + kernel_path_indirect(kg, rng, pray, buffer, tp*num_samples_inv, num_samples, ps, &L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&L); + path_radiance_reset_indirect(&L); + } + } + } + + /* emission and transmittance */ + if(volume_segment.closure_flag & SD_EMISSION) + path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce); + throughput *= volume_segment.accum_transmittance; + + /* free cached steps */ + kernel_volume_decoupled_free(kg, &volume_segment); +#else + /* GPU: no decoupled ray marching, scatter probalistically */ int num_samples = kernel_data.integrator.volume_samples; float num_samples_inv = 1.0f/num_samples; @@ -1106,7 +1160,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in path_state_branch(&ps, j, num_samples); VolumeIntegrateResult result = kernel_volume_integrate(kg, &ps, - &volume_sd, &volume_ray, &L, &tp, rng, true); + &volume_sd, &volume_ray, &L, &tp, rng); if(result == VOLUME_PATH_SCATTERED) { /* todo: use all-light sampling */ @@ -1123,6 +1177,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in /* todo: avoid this calculation using decoupled ray marching */ kernel_volume_shadow(kg, &state, &volume_ray, &throughput); +#endif } #endif diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index eb32f0bd086..b035725bb86 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -312,7 +312,7 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe * the volume shading coefficient for the entire line segment */ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, - RNG *rng, bool branched) + RNG *rng) { VolumeShaderCoefficients coeff; @@ -337,12 +337,18 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE); - if(branched) { - /* branched path tracing: we always scatter in the segment */ + /* decide if we will hit or miss */ + float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); + float sample_transmittance = expf(-sample_sigma_t * t); + + if(xi >= sample_transmittance) { + /* scattering */ float3 pdf; float sample_t; - /* scattering */ + /* rescale random number so we can reuse it */ + xi = (xi - sample_transmittance)/(1.0f - sample_transmittance); + if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { /* distance sampling */ sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf); @@ -359,53 +365,17 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba pdf = make_float3(equi_pdf, equi_pdf, equi_pdf); } + /* modifiy pdf for hit/miss decision */ + pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t); + new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf); t = sample_t; } else { - /* regular path tracing: we probalistically scatter in the segment - * with probability the transmittance over the segment */ - - /* decide if we will hit or miss */ - float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); - float sample_transmittance = expf(-sample_sigma_t * t); - - if(xi >= sample_transmittance) { - /* scattering */ - float3 pdf; - float sample_t; - - /* rescale random number so we can reuse it */ - xi = (xi - sample_transmittance)/(1.0f - sample_transmittance); - - if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { - /* distance sampling */ - sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf); - } - else { - /* equiangular sampling */ - float3 light_P; - float equi_pdf; - if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P)) - return VOLUME_PATH_MISSED; - - sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &equi_pdf); - transmittance = volume_color_transmittance(sigma_t, sample_t); - pdf = make_float3(equi_pdf, equi_pdf, equi_pdf); - } - - /* modifiy pdf for hit/miss decision */ - pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t); - - new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf); - t = sample_t; - } - else { - /* no scattering */ - transmittance = volume_color_transmittance(sigma_t, t); - float pdf = (transmittance.x + transmittance.y + transmittance.z) * (1.0f/3.0f); - new_tp = *throughput * transmittance / pdf; - } + /* no scattering */ + transmittance = volume_color_transmittance(sigma_t, t); + float pdf = (transmittance.x + transmittance.y + transmittance.z) * (1.0f/3.0f); + new_tp = *throughput * transmittance / pdf; } } else if(closure_flag & SD_ABSORPTION) { @@ -876,8 +846,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( * ray, with the assumption that there are no surfaces blocking light * between the endpoints */ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg, - PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, - bool branched) + PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng) { /* workaround to fix correlation bug in T38710, can find better solution * in random number generator later, for now this is done here to not impact @@ -903,7 +872,7 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals if(heterogeneous) return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, &tmp_rng); else - return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, branched); + return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng); #endif }