Cycles: improved Beckmann sampling using precomputed data

It turns out that the new Beckmann sampling function doesn't work well with Quasi Monte Carlo sampling, mainly near normal incidence where it can be worse than the previous sampler. In the new sampler the random number pattern gets split in two, warped and overlapped, which hurts the stratification, see the visualization in the differential revision. Now we use a precomputed table, which is much better behaved. GGX does not seem to benefit from using a precomputed table. Disadvantage is that this table adds 1MB of memory usage and 0.03s startup time to every render (on my quad core CPU). Differential Revision: https://developer.blender.org/D614
2014-06-20 21:21:05 +02:00 · 2014-06-20 21:21:05 +02:00 · 8fbd71e5f2
commit 8fbd71e5f2
parent 88d8358f91
8 changed files with 153 additions and 29 deletions
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@ -85,13 +85,13 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
 		case CLOSURE_BSDF_MICROFACET_GGX_ID:
 		case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
 		case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-			label = bsdf_microfacet_ggx_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+			label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
 			break;
 		case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
 		case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
 		case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
-			label = bsdf_microfacet_beckmann_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+			label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
 				eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
 			break;
 #ifdef __ANISOTROPIC__
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@ -176,8 +176,8 @@ ccl_device float approx_erfinvf(float z)
 * E. Heitz and E. d'Eon, EGSR 2014 */

 ccl_device_inline void microfacet_beckmann_sample_slopes(
+	KernelGlobals *kg,
 	const float cos_theta_i, const float sin_theta_i,
-	const float alpha_x, const float alpha_y,
 	float randu, float randv, float *slope_x, float *slope_y,
 	float *G1i)
 {
@ -200,10 +200,12 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(
 	const float SQRT_PI_INV = 0.56418958354f;
 	const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a);
 	const float G1 = 1.0f/(1.0f + Lambda); /* masking */
-	const float C = 1.0f - G1 * erf_a;

 	*G1i = G1;

+#if 0
+	const float C = 1.0f - G1 * erf_a;
+
 	/* sample slope X */
 	if(randu < C) {
 		/* rescale randu */
@ -238,11 +240,20 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(

 	/* sample slope Y */
 	*slope_y = approx_erfinvf(2.0f*randv - 1.0f);
+#else
+	/* use precomputed table, because it better preserves stratification
+	 * of the random number pattern */
+	int beckmann_table_offset = kernel_data.tables.beckmann_offset;
+
+	*slope_x = lookup_table_read_2D(kg, randu, cos_theta_i,
+		beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
+	*slope_y = approx_erfinvf(2.0f*randv - 1.0f);
+#endif
+
 }

 ccl_device_inline void microfacet_ggx_sample_slopes(
 	const float cos_theta_i, const float sin_theta_i,
-	const float alpha_x, const float alpha_y,
 	float randu, float randv, float *slope_x, float *slope_y,
 	float *G1i)
 {
@ -290,7 +301,8 @@ ccl_device_inline void microfacet_ggx_sample_slopes(
 	*slope_y = S * z * safe_sqrtf(1.0f + (*slope_x)*(*slope_x));
 }

-ccl_device_inline float3 microfacet_sample_stretched(const float3 omega_i,
+ccl_device_inline float3 microfacet_sample_stretched(
+	KernelGlobals *kg, const float3 omega_i,
 	const float alpha_x, const float alpha_y,
 	const float randu, const float randv,
 	bool beckmann, float *G1i)
@ -317,12 +329,14 @@ ccl_device_inline float3 microfacet_sample_stretched(const float3 omega_i,
 	/* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */
 	float slope_x, slope_y;

-	if(beckmann)
-		microfacet_beckmann_sample_slopes(costheta_, sintheta_,
-			alpha_x, alpha_y, randu, randv, &slope_x, &slope_y, G1i);
-	else
+	if(beckmann) {
+		microfacet_beckmann_sample_slopes(kg, costheta_, sintheta_,
+			randu, randv, &slope_x, &slope_y, G1i);
+	}
+	else {
 		microfacet_ggx_sample_slopes(costheta_, sintheta_,
-			alpha_x, alpha_y, randu, randv, &slope_x, &slope_y, G1i);
+			randu, randv, &slope_x, &slope_y, G1i);
+	}

 	/* 3. rotate */
 	float tmp = cosphi_*slope_x - sinphi_*slope_y;
@ -530,7 +544,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con
 	return make_float3(out, out, out);
 }

-ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
 	float alpha_x = sc->data0;
 	float alpha_y = sc->data1;
@ -552,7 +566,7 @@ ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, fl
 		float3 local_m;
 		float G1o;

-		local_m = microfacet_sample_stretched(local_I, alpha_x, alpha_y,
+		local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_y,
 			randu, randv, false, &G1o);

 		float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
@ -878,7 +892,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc
 	return make_float3(out, out, out);
 }

-ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
 {
 	float alpha_x = sc->data0;
 	float alpha_y = sc->data1;
@ -900,7 +914,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N
 		float3 local_m;
 		float G1o;

-		local_m = microfacet_sample_stretched(local_I, alpha_x, alpha_x,
+		local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x,
 			randu, randv, true, &G1o);

 		float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@ -44,6 +44,8 @@ CCL_NAMESPACE_BEGIN
 #define BB_TABLE_YPOWER			5.0f
 #define BB_TABLE_SPACING		2.0f

+#define BECKMANN_TABLE_SIZE		256
+
 #define TEX_NUM_FLOAT_IMAGES	5

 #define SHADER_NONE				(~0)
@ -933,11 +935,11 @@ typedef struct KernelCurves {
 	float maximum_width;
 } KernelCurves;

-typedef struct KernelBlackbody {
-	int table_offset;
-	int pad1, pad2, pad3;
-} KernelBlackbody;
-
+typedef struct KernelTables {
+	int blackbody_offset;
+	int beckmann_offset;
+	int pad1, pad2;
+} KernelTables;

 typedef struct KernelData {
 	KernelCamera cam;
@ -946,7 +948,7 @@ typedef struct KernelData {
 	KernelIntegrator integrator;
 	KernelBVH bvh;
 	KernelCurves curve;
-	KernelBlackbody blackbody;
+	KernelTables tables;
 } KernelData;

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@ -41,6 +41,8 @@
 #include "util_param.h"

 #include "kernel_types.h"
+#include "kernel_compat_cpu.h"
+#include "kernel_globals.h"
 #include "kernel_montecarlo.h"

 #include "closure/bsdf_util.h"
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@ -149,17 +149,18 @@ public: \
 \
 	void blur(float roughness) \
 	{ \
-		bsdf_##svmlower##_blur(&sc, roughness); \
 	} \
 \
 	float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const \
 	{ \
-		return bsdf_##svmlower##_eval_reflect(&sc, omega_out, omega_in, &pdf); \
+		pdf = 0; \
+		return make_float3(0, 0, 0); \
 	} \
 \
 	float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const \
 	{ \
-		return bsdf_##svmlower##_eval_transmit(&sc, omega_out, omega_in, &pdf); \
+		pdf = 0; \
+		return make_float3(0, 0, 0); \
 	} \
 \
 	int sample(const float3 &Ng, \
@ -168,8 +169,8 @@ public: \
 	           float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, \
 	           float &pdf, float3 &eval) const \
 	{ \
-		return bsdf_##svmlower##_sample(&sc, Ng, omega_out, domega_out_dx, domega_out_dy, \
-			randu, randv, &eval, &omega_in, &domega_in_dx, &domega_in_dy, &pdf); \
+		pdf = 0; \
+		return LABEL_NONE; \
 	} \
 }; \
 \
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@ -55,7 +55,7 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta
 		just one (the OSL-lerp is also automatically done for us by "lookup_table_read") */
 		float t = powf((temperature - BB_DRAPPER) * (1.0f / BB_TABLE_SPACING), (1.0f / BB_TABLE_XPOWER));

-		int blackbody_table_offset = kernel_data.blackbody.table_offset;
+		int blackbody_table_offset = kernel_data.tables.blackbody_offset;

 		/* Retrieve colors from the lookup table */
 		float lutval = t*lookuptablenormalize;
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@ -31,6 +31,95 @@

 CCL_NAMESPACE_BEGIN

+/* Beckmann sampling precomputed table, see bsdf_microfacet.h */
+
+/* 2D slope distribution (alpha = 1.0) */
+static float beckmann_table_P22(const float slope_x, const float slope_y)
+{
+	return expf(-(slope_x*slope_x + slope_y*slope_y));
+}
+
+/* maximal slope amplitude (range that contains 99.99% of the distribution) */
+static float beckmann_table_slope_max()
+{
+	return 6.0;
+}
+
+static void beckmann_table_rows(float *table, int row_from, int row_to)
+{
+	/* allocate temporary data */
+	const int DATA_TMP_SIZE = 512;
+	vector<double> slope_x(DATA_TMP_SIZE);
+	vector<double> CDF_P22_omega_i(DATA_TMP_SIZE);
+
+	/* loop over incident directions */
+	for(int index_theta = row_from; index_theta < row_to; index_theta++) {
+		/* incident vector */
+		const float cos_theta = index_theta / (BECKMANN_TABLE_SIZE - 1.0f);
+		const float sin_theta = safe_sqrtf(1.0f - cos_theta*cos_theta);
+
+		/* for a given incident vector
+		 * integrate P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
+		slope_x[0] = -beckmann_table_slope_max();
+		CDF_P22_omega_i[0] = 0;
+
+		for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) {
+			/* slope_x */
+			slope_x[index_slope_x] = -beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * index_slope_x/(DATA_TMP_SIZE - 1.0f);
+
+			/* dot product with incident vector */
+			float dot_product = fmaxf(0.0f, -slope_x[index_slope_x]*sin_theta + cos_theta);
+			/* marginalize P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
+			float P22_omega_i = 0.0f;
+
+			for(int j = 0; j < 100; ++j) {
+				float slope_y = -beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * j * (1.0f/99.0f);
+				P22_omega_i += dot_product * beckmann_table_P22(slope_x[index_slope_x], slope_y);
+			}
+
+			/* CDF of P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
+			CDF_P22_omega_i[index_slope_x] = CDF_P22_omega_i[index_slope_x - 1] + P22_omega_i;
+		}
+
+		/* renormalize CDF_P22_omega_i */
+		for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x)
+			CDF_P22_omega_i[index_slope_x] /= CDF_P22_omega_i[DATA_TMP_SIZE - 1];
+
+		/* loop over random number U1 */
+		int index_slope_x = 0;
+
+		for(int index_U = 0; index_U < BECKMANN_TABLE_SIZE; ++index_U) {
+			const float U = 0.0000001f + 0.9999998f * index_U / (float)(BECKMANN_TABLE_SIZE - 1);
+
+			/* inverse CDF_P22_omega_i, solve Eq.(11) */
+			while(CDF_P22_omega_i[index_slope_x] <= U)
+				++index_slope_x;
+
+			const double interp =
+				(CDF_P22_omega_i[index_slope_x] - U) /
+				(CDF_P22_omega_i[index_slope_x] - CDF_P22_omega_i[index_slope_x - 1]);
+
+			/* store value */
+			table[index_U + index_theta*BECKMANN_TABLE_SIZE] = (float)(
+				interp * slope_x[index_slope_x - 1]
+				+ (1.0f-interp) * slope_x[index_slope_x]);
+		}
+	}
+}
+
+static void beckmann_table_build(vector<float>& table)
+{
+	table.resize(BECKMANN_TABLE_SIZE*BECKMANN_TABLE_SIZE);
+
+	/* multithreaded build */
+	TaskPool pool;
+
+	for(int i = 0; i < BECKMANN_TABLE_SIZE; i+=8)
+		pool.push(function_bind(&beckmann_table_rows, &table[0], i, i+8));
+
+	pool.wait_work();
+}
+
 /* Shader */

 Shader::Shader()
@ -138,6 +227,7 @@ ShaderManager::ShaderManager()
 {
 	need_update = true;
 	blackbody_table_offset = TABLE_OFFSET_INVALID;
+	beckmann_table_offset = TABLE_OFFSET_INVALID;
 }

 ShaderManager::~ShaderManager()
@ -282,19 +372,28 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
 	device->tex_alloc("__shader_flag", dscene->shader_flag);

 	/* blackbody lookup table */
-	KernelBlackbody *kblackbody = &dscene->data.blackbody;
+	KernelTables *ktables = &dscene->data.tables;
 	
 	if(has_converter_blackbody && blackbody_table_offset == TABLE_OFFSET_INVALID) {
 		vector<float> table = blackbody_table();
 		blackbody_table_offset = scene->lookup_tables->add_table(dscene, table);
 		
-		kblackbody->table_offset = (int)blackbody_table_offset;
+		ktables->blackbody_offset = (int)blackbody_table_offset;
 	}
 	else if(!has_converter_blackbody && blackbody_table_offset != TABLE_OFFSET_INVALID) {
 		scene->lookup_tables->remove_table(blackbody_table_offset);
 		blackbody_table_offset = TABLE_OFFSET_INVALID;
 	}

+	/* beckmann lookup table */
+	if(beckmann_table_offset == TABLE_OFFSET_INVALID) {
+		vector<float> table;
+		beckmann_table_build(table);
+		beckmann_table_offset = scene->lookup_tables->add_table(dscene, table);
+		
+		ktables->beckmann_offset = (int)beckmann_table_offset;
+	}
+
 	/* integrator */
 	KernelIntegrator *kintegrator = &dscene->data.integrator;
 	kintegrator->use_volumes = has_volumes;
@ -308,6 +407,11 @@ void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scen
 		blackbody_table_offset = TABLE_OFFSET_INVALID;
 	}

+	if(beckmann_table_offset != TABLE_OFFSET_INVALID) {
+		scene->lookup_tables->remove_table(beckmann_table_offset);
+		beckmann_table_offset = TABLE_OFFSET_INVALID;
+	}
+
 	device->tex_free(dscene->shader_flag);
 	dscene->shader_flag.clear();
 }
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@ -149,6 +149,7 @@ protected:
 	AttributeIDMap unique_attribute_id;

 	size_t blackbody_table_offset;
+	size_t beckmann_table_offset;
 };

 CCL_NAMESPACE_END