diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 8ddf4971909..81c239ea0c9 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -85,13 +85,13 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - label = bsdf_microfacet_ggx_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - label = bsdf_microfacet_beckmann_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #ifdef __ANISOTROPIC__ diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 4a3d223d765..e130349bca2 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -176,8 +176,8 @@ ccl_device float approx_erfinvf(float z) * E. Heitz and E. d'Eon, EGSR 2014 */ ccl_device_inline void microfacet_beckmann_sample_slopes( + KernelGlobals *kg, const float cos_theta_i, const float sin_theta_i, - const float alpha_x, const float alpha_y, float randu, float randv, float *slope_x, float *slope_y, float *G1i) { @@ -200,10 +200,12 @@ ccl_device_inline void microfacet_beckmann_sample_slopes( const float SQRT_PI_INV = 0.56418958354f; const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a); const float G1 = 1.0f/(1.0f + Lambda); /* masking */ - const float C = 1.0f - G1 * erf_a; *G1i = G1; +#if 0 + const float C = 1.0f - G1 * erf_a; + /* sample slope X */ if(randu < C) { /* rescale randu */ @@ -238,11 +240,20 @@ ccl_device_inline void microfacet_beckmann_sample_slopes( /* sample slope Y */ *slope_y = approx_erfinvf(2.0f*randv - 1.0f); +#else + /* use precomputed table, because it better preserves stratification + * of the random number pattern */ + int beckmann_table_offset = kernel_data.tables.beckmann_offset; + + *slope_x = lookup_table_read_2D(kg, randu, cos_theta_i, + beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE); + *slope_y = approx_erfinvf(2.0f*randv - 1.0f); +#endif + } ccl_device_inline void microfacet_ggx_sample_slopes( const float cos_theta_i, const float sin_theta_i, - const float alpha_x, const float alpha_y, float randu, float randv, float *slope_x, float *slope_y, float *G1i) { @@ -290,7 +301,8 @@ ccl_device_inline void microfacet_ggx_sample_slopes( *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x)*(*slope_x)); } -ccl_device_inline float3 microfacet_sample_stretched(const float3 omega_i, +ccl_device_inline float3 microfacet_sample_stretched( + KernelGlobals *kg, const float3 omega_i, const float alpha_x, const float alpha_y, const float randu, const float randv, bool beckmann, float *G1i) @@ -317,12 +329,14 @@ ccl_device_inline float3 microfacet_sample_stretched(const float3 omega_i, /* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */ float slope_x, slope_y; - if(beckmann) - microfacet_beckmann_sample_slopes(costheta_, sintheta_, - alpha_x, alpha_y, randu, randv, &slope_x, &slope_y, G1i); - else + if(beckmann) { + microfacet_beckmann_sample_slopes(kg, costheta_, sintheta_, + randu, randv, &slope_x, &slope_y, G1i); + } + else { microfacet_ggx_sample_slopes(costheta_, sintheta_, - alpha_x, alpha_y, randu, randv, &slope_x, &slope_y, G1i); + randu, randv, &slope_x, &slope_y, G1i); + } /* 3. rotate */ float tmp = cosphi_*slope_x - sinphi_*slope_y; @@ -530,7 +544,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con return make_float3(out, out, out); } -ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { float alpha_x = sc->data0; float alpha_y = sc->data1; @@ -552,7 +566,7 @@ ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, fl float3 local_m; float G1o; - local_m = microfacet_sample_stretched(local_I, alpha_x, alpha_y, + local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_y, randu, randv, false, &G1o); float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z; @@ -878,7 +892,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc return make_float3(out, out, out); } -ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { float alpha_x = sc->data0; float alpha_y = sc->data1; @@ -900,7 +914,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N float3 local_m; float G1o; - local_m = microfacet_sample_stretched(local_I, alpha_x, alpha_x, + local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x, randu, randv, true, &G1o); float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z; diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 652a7848d73..d81909a623a 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -44,6 +44,8 @@ CCL_NAMESPACE_BEGIN #define BB_TABLE_YPOWER 5.0f #define BB_TABLE_SPACING 2.0f +#define BECKMANN_TABLE_SIZE 256 + #define TEX_NUM_FLOAT_IMAGES 5 #define SHADER_NONE (~0) @@ -933,11 +935,11 @@ typedef struct KernelCurves { float maximum_width; } KernelCurves; -typedef struct KernelBlackbody { - int table_offset; - int pad1, pad2, pad3; -} KernelBlackbody; - +typedef struct KernelTables { + int blackbody_offset; + int beckmann_offset; + int pad1, pad2; +} KernelTables; typedef struct KernelData { KernelCamera cam; @@ -946,7 +948,7 @@ typedef struct KernelData { KernelIntegrator integrator; KernelBVH bvh; KernelCurves curve; - KernelBlackbody blackbody; + KernelTables tables; } KernelData; CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index c1560ea2d01..f102e79f5c7 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -41,6 +41,8 @@ #include "util_param.h" #include "kernel_types.h" +#include "kernel_compat_cpu.h" +#include "kernel_globals.h" #include "kernel_montecarlo.h" #include "closure/bsdf_util.h" diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h index 218cf1c19cc..a543907e884 100644 --- a/intern/cycles/kernel/osl/osl_closures.h +++ b/intern/cycles/kernel/osl/osl_closures.h @@ -149,17 +149,18 @@ public: \ \ void blur(float roughness) \ { \ - bsdf_##svmlower##_blur(&sc, roughness); \ } \ \ float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const \ { \ - return bsdf_##svmlower##_eval_reflect(&sc, omega_out, omega_in, &pdf); \ + pdf = 0; \ + return make_float3(0, 0, 0); \ } \ \ float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const \ { \ - return bsdf_##svmlower##_eval_transmit(&sc, omega_out, omega_in, &pdf); \ + pdf = 0; \ + return make_float3(0, 0, 0); \ } \ \ int sample(const float3 &Ng, \ @@ -168,8 +169,8 @@ public: \ float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, \ float &pdf, float3 &eval) const \ { \ - return bsdf_##svmlower##_sample(&sc, Ng, omega_out, domega_out_dx, domega_out_dy, \ - randu, randv, &eval, &omega_in, &domega_in_dx, &domega_in_dy, &pdf); \ + pdf = 0; \ + return LABEL_NONE; \ } \ }; \ \ diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h index 63dbf27d35e..15257aed92e 100644 --- a/intern/cycles/kernel/svm/svm_blackbody.h +++ b/intern/cycles/kernel/svm/svm_blackbody.h @@ -55,7 +55,7 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta just one (the OSL-lerp is also automatically done for us by "lookup_table_read") */ float t = powf((temperature - BB_DRAPPER) * (1.0f / BB_TABLE_SPACING), (1.0f / BB_TABLE_XPOWER)); - int blackbody_table_offset = kernel_data.blackbody.table_offset; + int blackbody_table_offset = kernel_data.tables.blackbody_offset; /* Retrieve colors from the lookup table */ float lutval = t*lookuptablenormalize; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 662caed72f1..87107ae642d 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -31,6 +31,95 @@ CCL_NAMESPACE_BEGIN +/* Beckmann sampling precomputed table, see bsdf_microfacet.h */ + +/* 2D slope distribution (alpha = 1.0) */ +static float beckmann_table_P22(const float slope_x, const float slope_y) +{ + return expf(-(slope_x*slope_x + slope_y*slope_y)); +} + +/* maximal slope amplitude (range that contains 99.99% of the distribution) */ +static float beckmann_table_slope_max() +{ + return 6.0; +} + +static void beckmann_table_rows(float *table, int row_from, int row_to) +{ + /* allocate temporary data */ + const int DATA_TMP_SIZE = 512; + vector slope_x(DATA_TMP_SIZE); + vector CDF_P22_omega_i(DATA_TMP_SIZE); + + /* loop over incident directions */ + for(int index_theta = row_from; index_theta < row_to; index_theta++) { + /* incident vector */ + const float cos_theta = index_theta / (BECKMANN_TABLE_SIZE - 1.0f); + const float sin_theta = safe_sqrtf(1.0f - cos_theta*cos_theta); + + /* for a given incident vector + * integrate P22_{omega_i}(x_slope, 1, 1), Eq. (10) */ + slope_x[0] = -beckmann_table_slope_max(); + CDF_P22_omega_i[0] = 0; + + for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) { + /* slope_x */ + slope_x[index_slope_x] = -beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * index_slope_x/(DATA_TMP_SIZE - 1.0f); + + /* dot product with incident vector */ + float dot_product = fmaxf(0.0f, -slope_x[index_slope_x]*sin_theta + cos_theta); + /* marginalize P22_{omega_i}(x_slope, 1, 1), Eq. (10) */ + float P22_omega_i = 0.0f; + + for(int j = 0; j < 100; ++j) { + float slope_y = -beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * j * (1.0f/99.0f); + P22_omega_i += dot_product * beckmann_table_P22(slope_x[index_slope_x], slope_y); + } + + /* CDF of P22_{omega_i}(x_slope, 1, 1), Eq. (10) */ + CDF_P22_omega_i[index_slope_x] = CDF_P22_omega_i[index_slope_x - 1] + P22_omega_i; + } + + /* renormalize CDF_P22_omega_i */ + for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) + CDF_P22_omega_i[index_slope_x] /= CDF_P22_omega_i[DATA_TMP_SIZE - 1]; + + /* loop over random number U1 */ + int index_slope_x = 0; + + for(int index_U = 0; index_U < BECKMANN_TABLE_SIZE; ++index_U) { + const float U = 0.0000001f + 0.9999998f * index_U / (float)(BECKMANN_TABLE_SIZE - 1); + + /* inverse CDF_P22_omega_i, solve Eq.(11) */ + while(CDF_P22_omega_i[index_slope_x] <= U) + ++index_slope_x; + + const double interp = + (CDF_P22_omega_i[index_slope_x] - U) / + (CDF_P22_omega_i[index_slope_x] - CDF_P22_omega_i[index_slope_x - 1]); + + /* store value */ + table[index_U + index_theta*BECKMANN_TABLE_SIZE] = (float)( + interp * slope_x[index_slope_x - 1] + + (1.0f-interp) * slope_x[index_slope_x]); + } + } +} + +static void beckmann_table_build(vector& table) +{ + table.resize(BECKMANN_TABLE_SIZE*BECKMANN_TABLE_SIZE); + + /* multithreaded build */ + TaskPool pool; + + for(int i = 0; i < BECKMANN_TABLE_SIZE; i+=8) + pool.push(function_bind(&beckmann_table_rows, &table[0], i, i+8)); + + pool.wait_work(); +} + /* Shader */ Shader::Shader() @@ -138,6 +227,7 @@ ShaderManager::ShaderManager() { need_update = true; blackbody_table_offset = TABLE_OFFSET_INVALID; + beckmann_table_offset = TABLE_OFFSET_INVALID; } ShaderManager::~ShaderManager() @@ -282,19 +372,28 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc device->tex_alloc("__shader_flag", dscene->shader_flag); /* blackbody lookup table */ - KernelBlackbody *kblackbody = &dscene->data.blackbody; + KernelTables *ktables = &dscene->data.tables; if(has_converter_blackbody && blackbody_table_offset == TABLE_OFFSET_INVALID) { vector table = blackbody_table(); blackbody_table_offset = scene->lookup_tables->add_table(dscene, table); - kblackbody->table_offset = (int)blackbody_table_offset; + ktables->blackbody_offset = (int)blackbody_table_offset; } else if(!has_converter_blackbody && blackbody_table_offset != TABLE_OFFSET_INVALID) { scene->lookup_tables->remove_table(blackbody_table_offset); blackbody_table_offset = TABLE_OFFSET_INVALID; } + /* beckmann lookup table */ + if(beckmann_table_offset == TABLE_OFFSET_INVALID) { + vector table; + beckmann_table_build(table); + beckmann_table_offset = scene->lookup_tables->add_table(dscene, table); + + ktables->beckmann_offset = (int)beckmann_table_offset; + } + /* integrator */ KernelIntegrator *kintegrator = &dscene->data.integrator; kintegrator->use_volumes = has_volumes; @@ -308,6 +407,11 @@ void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scen blackbody_table_offset = TABLE_OFFSET_INVALID; } + if(beckmann_table_offset != TABLE_OFFSET_INVALID) { + scene->lookup_tables->remove_table(beckmann_table_offset); + beckmann_table_offset = TABLE_OFFSET_INVALID; + } + device->tex_free(dscene->shader_flag); dscene->shader_flag.clear(); } diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index 84be4b469d8..0ed6d2ddf01 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -149,6 +149,7 @@ protected: AttributeIDMap unique_attribute_id; size_t blackbody_table_offset; + size_t beckmann_table_offset; }; CCL_NAMESPACE_END