Cycles volume: change heterogeneous volume sampling in branched path first hit.

This now uses decoupled ray marching, and removes the probalistic scattering. What this means is that each AA sample will be slower but contain less noise, hopefully giving less render time to reach the same noise levels. For those following along, there's still a bunch of volume sampling improvements to do: all-light sampling, multiple importance sampling, transmittance threshold, better indirect light handling, multiple scatter approximation.
2014-03-29 13:03:50 +01:00 · 2014-03-29 13:03:50 +01:00 · f449542d6a
commit f449542d6a
parent 3847d0c0df
2 changed files with 77 additions and 53 deletions
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@ -261,7 +261,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g

 			ShaderData volume_sd;
 			VolumeIntegrateResult result = kernel_volume_integrate(kg, &state,
-				&volume_sd, &volume_ray, L, &throughput, rng, false);
+				&volume_sd, &volume_ray, L, &throughput, rng);

 			if(result == VOLUME_PATH_SCATTERED) {
 				if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f))
@ -650,7 +650,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,

 			ShaderData volume_sd;
 			VolumeIntegrateResult result = kernel_volume_integrate(kg, &state,
-				&volume_sd, &volume_ray, &L, &throughput, rng, false);
+				&volume_sd, &volume_ray, &L, &throughput, rng);

 			if(result == VOLUME_PATH_SCATTERED) {
 				if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f))
@ -1090,6 +1090,60 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 			Ray volume_ray = ray;
 			volume_ray.t = (hit)? isect.t: FLT_MAX;

+#ifdef __KERNEL_CPU__
+			/* decoupled ray marching only supported on CPU */
+			bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+
+			/* cache steps along volume for repeated sampling */
+			VolumeSegment volume_segment;
+			ShaderData volume_sd;
+
+			shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce);
+			kernel_volume_decoupled_record(kg, &state,
+				&volume_ray, &volume_sd, &volume_segment, heterogeneous);
+
+			/* sample scattering */
+			int num_samples = kernel_data.integrator.volume_samples;
+			float num_samples_inv = 1.0f/num_samples;
+
+			for(int j = 0; j < num_samples; j++) {
+				/* workaround to fix correlation bug in T38710, can find better solution
+				 * in random number generator later, for now this is done here to not impact
+				 * performance of rendering without volumes */
+				RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
+
+				PathState ps = state;
+				Ray pray = ray;
+				float3 tp = throughput;
+
+				/* branch RNG state */
+				path_state_branch(&ps, j, num_samples);
+
+				VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+					&ps, &volume_ray, &volume_sd, &tp, &tmp_rng, &volume_segment);
+				
+				if(result == VOLUME_PATH_SCATTERED) {
+					/* todo: use all-light sampling */
+					if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
+						kernel_path_indirect(kg, rng, pray, buffer, tp*num_samples_inv, num_samples, ps, &L);
+
+						/* for render passes, sum and reset indirect light pass variables
+						 * for the next samples */
+						path_radiance_sum_indirect(&L);
+						path_radiance_reset_indirect(&L);
+					}
+				}
+			}
+
+			/* emission and transmittance */
+			if(volume_segment.closure_flag & SD_EMISSION)
+				path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
+			throughput *= volume_segment.accum_transmittance;
+
+			/* free cached steps */
+			kernel_volume_decoupled_free(kg, &volume_segment);
+#else
+			/* GPU: no decoupled ray marching, scatter probalistically */
 			int num_samples = kernel_data.integrator.volume_samples;
 			float num_samples_inv = 1.0f/num_samples;

@ -1106,7 +1160,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 				path_state_branch(&ps, j, num_samples);

 				VolumeIntegrateResult result = kernel_volume_integrate(kg, &ps,
-					&volume_sd, &volume_ray, &L, &tp, rng, true);
+					&volume_sd, &volume_ray, &L, &tp, rng);
 				
 				if(result == VOLUME_PATH_SCATTERED) {
 					/* todo: use all-light sampling */
@ -1123,6 +1177,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in

 			/* todo: avoid this calculation using decoupled ray marching */
 			kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
+#endif
 		}
 #endif

--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@ -312,7 +312,7 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe
 * the volume shading coefficient for the entire line segment */
 ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
 	PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
-	RNG *rng, bool branched)
+	RNG *rng)
 {
 	VolumeShaderCoefficients coeff;

@ -337,12 +337,18 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba

 		float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);

-		if(branched) {
-			/* branched path tracing: we always scatter in the segment */
+		/* decide if we will hit or miss */
+		float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+		float sample_transmittance = expf(-sample_sigma_t * t);
+
+		if(xi >= sample_transmittance) {
+			/* scattering */
 			float3 pdf;
 			float sample_t;

-			/* scattering */
+			/* rescale random number so we can reuse it */
+			xi = (xi - sample_transmittance)/(1.0f - sample_transmittance);
+
 			if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { 
 				/* distance sampling */
 				sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
@ -359,53 +365,17 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
 				pdf = make_float3(equi_pdf, equi_pdf, equi_pdf);
 			}

+			/* modifiy pdf for hit/miss decision */
+			pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
+
 			new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
 			t = sample_t;
 		}
 		else {
-			/* regular path tracing: we probalistically scatter in the segment
-			 * with probability the transmittance over the segment */
-
-			/* decide if we will hit or miss */
-			float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
-			float sample_transmittance = expf(-sample_sigma_t * t);
-
-			if(xi >= sample_transmittance) {
-				/* scattering */
-				float3 pdf;
-				float sample_t;
-
-				/* rescale random number so we can reuse it */
-				xi = (xi - sample_transmittance)/(1.0f - sample_transmittance);
-
-				if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { 
-					/* distance sampling */
-					sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
-				}
-				else {
-					/* equiangular sampling */
-					float3 light_P;
-					float equi_pdf;
-					if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
-						return VOLUME_PATH_MISSED;
-
-					sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &equi_pdf);
-					transmittance = volume_color_transmittance(sigma_t, sample_t);
-					pdf = make_float3(equi_pdf, equi_pdf, equi_pdf);
-				}
-
-				/* modifiy pdf for hit/miss decision */
-				pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
-
-				new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
-				t = sample_t;
-			}
-			else {
-				/* no scattering */
-				transmittance = volume_color_transmittance(sigma_t, t);
-				float pdf = (transmittance.x + transmittance.y + transmittance.z) * (1.0f/3.0f);
-				new_tp = *throughput * transmittance / pdf;
-			}
+			/* no scattering */
+			transmittance = volume_color_transmittance(sigma_t, t);
+			float pdf = (transmittance.x + transmittance.y + transmittance.z) * (1.0f/3.0f);
+			new_tp = *throughput * transmittance / pdf;
 		}
 	}
 	else if(closure_flag & SD_ABSORPTION) {
@ -876,8 +846,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 * ray, with the assumption that there are no surfaces blocking light
 * between the endpoints */
 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
-	PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng,
-	bool branched)
+	PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng)
 {
 	/* workaround to fix correlation bug in T38710, can find better solution
 	 * in random number generator later, for now this is done here to not impact
@ -903,7 +872,7 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals
 	if(heterogeneous)
 		return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
 	else
-		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, branched);
+		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
 #endif
 }