forked from bartvdbraak/blender
b63e6ab8d2
Thanks to pembem22 for finding the problem. Ref D8949
499 lines
14 KiB
C
499 lines
14 KiB
C
/*
|
|
* Copyright 2011-2013 Blender Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef __KERNEL_BSSRDF_H__
|
|
#define __KERNEL_BSSRDF_H__
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
typedef ccl_addr_space struct Bssrdf {
|
|
SHADER_CLOSURE_BASE;
|
|
|
|
float3 radius;
|
|
float3 albedo;
|
|
float sharpness;
|
|
float texture_blur;
|
|
float roughness;
|
|
float channels;
|
|
} Bssrdf;
|
|
|
|
static_assert(sizeof(ShaderClosure) >= sizeof(Bssrdf), "Bssrdf is too large!");
|
|
|
|
/* Planar Truncated Gaussian
|
|
*
|
|
* Note how this is different from the typical gaussian, this one integrates
|
|
* to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky
|
|
* that integrating x*exp(-x) gives a nice closed form solution. */
|
|
|
|
/* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */
|
|
#define GAUSS_TRUNCATE 12.46f
|
|
|
|
ccl_device float bssrdf_gaussian_eval(const float radius, float r)
|
|
{
|
|
/* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
|
|
* = 1 - exp(-Rm*Rm/(2*v)) */
|
|
const float v = radius * radius * (0.25f * 0.25f);
|
|
const float Rm = sqrtf(v * GAUSS_TRUNCATE);
|
|
|
|
if (r >= Rm)
|
|
return 0.0f;
|
|
|
|
return expf(-r * r / (2.0f * v)) / (2.0f * M_PI_F * v);
|
|
}
|
|
|
|
ccl_device float bssrdf_gaussian_pdf(const float radius, float r)
|
|
{
|
|
/* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
|
|
const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
|
|
|
|
return bssrdf_gaussian_eval(radius, r) * (1.0f / (area_truncated));
|
|
}
|
|
|
|
ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h)
|
|
{
|
|
/* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
|
|
* r = sqrt(-2*v*logf(xi)) */
|
|
const float v = radius * radius * (0.25f * 0.25f);
|
|
const float Rm = sqrtf(v * GAUSS_TRUNCATE);
|
|
|
|
/* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
|
|
const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
|
|
|
|
/* r(xi) */
|
|
const float r_squared = -2.0f * v * logf(1.0f - xi * area_truncated);
|
|
*r = sqrtf(r_squared);
|
|
|
|
/* h^2 + r^2 = Rm^2 */
|
|
*h = safe_sqrtf(Rm * Rm - r_squared);
|
|
}
|
|
|
|
/* Planar Cubic BSSRDF falloff
|
|
*
|
|
* This is basically (Rm - x)^3, with some factors to normalize it. For sampling
|
|
* we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
|
|
* far as I can tell has no closed form solution. So we get an iterative solution
|
|
* instead with newton-raphson. */
|
|
|
|
ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r)
|
|
{
|
|
if (sharpness == 0.0f) {
|
|
const float Rm = radius;
|
|
|
|
if (r >= Rm)
|
|
return 0.0f;
|
|
|
|
/* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
|
|
const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
|
|
const float f = Rm - r;
|
|
const float num = f * f * f;
|
|
|
|
return (10.0f * num) / (Rm5 * M_PI_F);
|
|
}
|
|
else {
|
|
float Rm = radius * (1.0f + sharpness);
|
|
|
|
if (r >= Rm)
|
|
return 0.0f;
|
|
|
|
/* custom variation with extra sharpness, to match the previous code */
|
|
const float y = 1.0f / (1.0f + sharpness);
|
|
float Rmy, ry, ryinv;
|
|
|
|
if (sharpness == 1.0f) {
|
|
Rmy = sqrtf(Rm);
|
|
ry = sqrtf(r);
|
|
ryinv = (ry > 0.0f) ? 1.0f / ry : 0.0f;
|
|
}
|
|
else {
|
|
Rmy = powf(Rm, y);
|
|
ry = powf(r, y);
|
|
ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f;
|
|
}
|
|
|
|
const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy;
|
|
const float f = Rmy - ry;
|
|
const float num = f * (f * f) * (y * ryinv);
|
|
|
|
return (10.0f * num) / (Rmy5 * M_PI_F);
|
|
}
|
|
}
|
|
|
|
ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r)
|
|
{
|
|
return bssrdf_cubic_eval(radius, sharpness, r);
|
|
}
|
|
|
|
/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
|
|
ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi)
|
|
{
|
|
/* newton-raphson iteration, usually succeeds in 2-4 iterations, except
|
|
* outside 0.02 ... 0.98 where it can go up to 10, so overall performance
|
|
* should not be too bad */
|
|
const float tolerance = 1e-6f;
|
|
const int max_iteration_count = 10;
|
|
float x = 0.25f;
|
|
int i;
|
|
|
|
for (i = 0; i < max_iteration_count; i++) {
|
|
float x2 = x * x;
|
|
float x3 = x2 * x;
|
|
float nx = (1.0f - x);
|
|
|
|
float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
|
|
float f_ = 20.0f * (x * nx) * (nx * nx);
|
|
|
|
if (fabsf(f) < tolerance || f_ == 0.0f)
|
|
break;
|
|
|
|
x = saturate(x - f / f_);
|
|
}
|
|
|
|
return x;
|
|
}
|
|
|
|
ccl_device void bssrdf_cubic_sample(
|
|
const float radius, const float sharpness, float xi, float *r, float *h)
|
|
{
|
|
float Rm = radius;
|
|
float r_ = bssrdf_cubic_quintic_root_find(xi);
|
|
|
|
if (sharpness != 0.0f) {
|
|
r_ = powf(r_, 1.0f + sharpness);
|
|
Rm *= (1.0f + sharpness);
|
|
}
|
|
|
|
r_ *= Rm;
|
|
*r = r_;
|
|
|
|
/* h^2 + r^2 = Rm^2 */
|
|
*h = safe_sqrtf(Rm * Rm - r_ * r_);
|
|
}
|
|
|
|
/* Approximate Reflectance Profiles
|
|
* http://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf
|
|
*/
|
|
|
|
/* This is a bit arbitrary, just need big enough radius so it matches
|
|
* the mean free length, but still not too big so sampling is still
|
|
* effective. Might need some further tweaks.
|
|
*/
|
|
#define BURLEY_TRUNCATE 16.0f
|
|
#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
|
|
|
|
ccl_device_inline float bssrdf_burley_fitting(float A)
|
|
{
|
|
/* Diffuse surface transmission, equation (6). */
|
|
return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
|
|
}
|
|
|
|
/* Scale mean free path length so it gives similar looking result
|
|
* to Cubic and Gaussian models.
|
|
*/
|
|
ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
|
|
{
|
|
return 0.25f * M_1_PI_F * r;
|
|
}
|
|
|
|
ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
|
|
{
|
|
/* Mean free path length. */
|
|
const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
|
|
/* Surface albedo. */
|
|
const float3 A = bssrdf->albedo;
|
|
const float3 s = make_float3(
|
|
bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z));
|
|
|
|
bssrdf->radius = l / s;
|
|
}
|
|
|
|
ccl_device float bssrdf_burley_eval(const float d, float r)
|
|
{
|
|
const float Rm = BURLEY_TRUNCATE * d;
|
|
|
|
if (r >= Rm)
|
|
return 0.0f;
|
|
|
|
/* Burley reflectance profile, equation (3).
|
|
*
|
|
* NOTES:
|
|
* - Surface albedo is already included into sc->weight, no need to
|
|
* multiply by this term here.
|
|
* - This is normalized diffuse model, so the equation is multiplied
|
|
* by 2*pi, which also matches cdf().
|
|
*/
|
|
float exp_r_3_d = expf(-r / (3.0f * d));
|
|
float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
|
|
return (exp_r_d + exp_r_3_d) / (4.0f * d);
|
|
}
|
|
|
|
ccl_device float bssrdf_burley_pdf(const float d, float r)
|
|
{
|
|
return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF);
|
|
}
|
|
|
|
/* Find the radius for desired CDF value.
|
|
* Returns scaled radius, meaning the result is to be scaled up by d.
|
|
* Since there's no closed form solution we do Newton-Raphson method to find it.
|
|
*/
|
|
ccl_device_forceinline float bssrdf_burley_root_find(float xi)
|
|
{
|
|
const float tolerance = 1e-6f;
|
|
const int max_iteration_count = 10;
|
|
/* Do initial guess based on manual curve fitting, this allows us to reduce
|
|
* number of iterations to maximum 4 across the [0..1] range. We keep maximum
|
|
* number of iteration higher just to be sure we didn't miss root in some
|
|
* corner case.
|
|
*/
|
|
float r;
|
|
if (xi <= 0.9f) {
|
|
r = expf(xi * xi * 2.4f) - 1.0f;
|
|
}
|
|
else {
|
|
/* TODO(sergey): Some nicer curve fit is possible here. */
|
|
r = 15.0f;
|
|
}
|
|
/* Solve against scaled radius. */
|
|
for (int i = 0; i < max_iteration_count; i++) {
|
|
float exp_r_3 = expf(-r / 3.0f);
|
|
float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
|
|
float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
|
|
float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
|
|
|
|
if (fabsf(f) < tolerance || f_ == 0.0f) {
|
|
break;
|
|
}
|
|
|
|
r = r - f / f_;
|
|
if (r < 0.0f) {
|
|
r = 0.0f;
|
|
}
|
|
}
|
|
return r;
|
|
}
|
|
|
|
ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h)
|
|
{
|
|
const float Rm = BURLEY_TRUNCATE * d;
|
|
const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
|
|
|
|
*r = r_;
|
|
|
|
/* h^2 + r^2 = Rm^2 */
|
|
*h = safe_sqrtf(Rm * Rm - r_ * r_);
|
|
}
|
|
|
|
/* None BSSRDF falloff
|
|
*
|
|
* Samples distributed over disk with no falloff, for reference. */
|
|
|
|
ccl_device float bssrdf_none_eval(const float radius, float r)
|
|
{
|
|
const float Rm = radius;
|
|
return (r < Rm) ? 1.0f : 0.0f;
|
|
}
|
|
|
|
ccl_device float bssrdf_none_pdf(const float radius, float r)
|
|
{
|
|
/* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
|
|
const float Rm = radius;
|
|
const float area = (M_PI_F * Rm * Rm);
|
|
|
|
return bssrdf_none_eval(radius, r) / area;
|
|
}
|
|
|
|
ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h)
|
|
{
|
|
/* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
|
|
* r = sqrt(xi)*Rm */
|
|
const float Rm = radius;
|
|
const float r_ = sqrtf(xi) * Rm;
|
|
|
|
*r = r_;
|
|
|
|
/* h^2 + r^2 = Rm^2 */
|
|
*h = safe_sqrtf(Rm * Rm - r_ * r_);
|
|
}
|
|
|
|
/* Generic */
|
|
|
|
ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
|
|
{
|
|
Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
|
|
|
|
if (bssrdf == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
float sample_weight = fabsf(average(weight));
|
|
bssrdf->sample_weight = sample_weight;
|
|
return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
|
|
}
|
|
|
|
ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type)
|
|
{
|
|
int flag = 0;
|
|
int bssrdf_channels = 3;
|
|
float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
|
|
|
|
/* Verify if the radii are large enough to sample without precision issues. */
|
|
if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
|
|
diffuse_weight.x = bssrdf->weight.x;
|
|
bssrdf->weight.x = 0.0f;
|
|
bssrdf->radius.x = 0.0f;
|
|
bssrdf_channels--;
|
|
}
|
|
if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
|
|
diffuse_weight.y = bssrdf->weight.y;
|
|
bssrdf->weight.y = 0.0f;
|
|
bssrdf->radius.y = 0.0f;
|
|
bssrdf_channels--;
|
|
}
|
|
if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
|
|
diffuse_weight.z = bssrdf->weight.z;
|
|
bssrdf->weight.z = 0.0f;
|
|
bssrdf->radius.z = 0.0f;
|
|
bssrdf_channels--;
|
|
}
|
|
|
|
if (bssrdf_channels < 3) {
|
|
/* Add diffuse BSDF if any radius too small. */
|
|
#ifdef __PRINCIPLED__
|
|
if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
|
|
float roughness = bssrdf->roughness;
|
|
float3 N = bssrdf->N;
|
|
|
|
PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
|
|
sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
|
|
|
|
if (bsdf) {
|
|
bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
|
|
bsdf->N = N;
|
|
bsdf->roughness = roughness;
|
|
flag |= bsdf_principled_diffuse_setup(bsdf);
|
|
}
|
|
}
|
|
else
|
|
#endif /* __PRINCIPLED__ */
|
|
{
|
|
DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
|
|
|
|
if (bsdf) {
|
|
bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
|
|
bsdf->N = bssrdf->N;
|
|
flag |= bsdf_diffuse_setup(bsdf);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Setup BSSRDF if radius is large enough. */
|
|
if (bssrdf_channels > 0) {
|
|
bssrdf->type = type;
|
|
bssrdf->channels = bssrdf_channels;
|
|
bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
|
|
bssrdf->texture_blur = saturate(bssrdf->texture_blur);
|
|
bssrdf->sharpness = saturate(bssrdf->sharpness);
|
|
|
|
if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
|
|
type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
|
|
type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
|
|
bssrdf_burley_setup(bssrdf);
|
|
}
|
|
|
|
flag |= SD_BSSRDF;
|
|
}
|
|
else {
|
|
bssrdf->type = type;
|
|
bssrdf->sample_weight = 0.0f;
|
|
}
|
|
|
|
return flag;
|
|
}
|
|
|
|
ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h)
|
|
{
|
|
const Bssrdf *bssrdf = (const Bssrdf *)sc;
|
|
float radius;
|
|
|
|
/* Sample color channel and reuse random number. Only a subset of channels
|
|
* may be used if their radius was too small to handle as BSSRDF. */
|
|
xi *= bssrdf->channels;
|
|
|
|
if (xi < 1.0f) {
|
|
radius = (bssrdf->radius.x > 0.0f) ?
|
|
bssrdf->radius.x :
|
|
(bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
|
|
}
|
|
else if (xi < 2.0f) {
|
|
xi -= 1.0f;
|
|
radius = (bssrdf->radius.x > 0.0f && bssrdf->radius.y > 0.0f) ? bssrdf->radius.y :
|
|
bssrdf->radius.z;
|
|
}
|
|
else {
|
|
xi -= 2.0f;
|
|
radius = bssrdf->radius.z;
|
|
}
|
|
|
|
/* Sample BSSRDF. */
|
|
if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
|
|
bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
|
|
}
|
|
else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
|
|
bssrdf_gaussian_sample(radius, xi, r, h);
|
|
}
|
|
else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
|
|
* bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) */
|
|
bssrdf_burley_sample(radius, xi, r, h);
|
|
}
|
|
}
|
|
|
|
ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
|
|
{
|
|
if (radius == 0.0f) {
|
|
return 0.0f;
|
|
}
|
|
else if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
|
|
return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
|
|
}
|
|
else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
|
|
return bssrdf_gaussian_pdf(radius, r);
|
|
}
|
|
else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
|
|
* bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
|
|
return bssrdf_burley_pdf(radius, r);
|
|
}
|
|
}
|
|
|
|
ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r)
|
|
{
|
|
const Bssrdf *bssrdf = (const Bssrdf *)sc;
|
|
|
|
return make_float3(bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
|
|
bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
|
|
bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
|
|
}
|
|
|
|
ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
|
|
{
|
|
const Bssrdf *bssrdf = (const Bssrdf *)sc;
|
|
float3 pdf = bssrdf_eval(sc, r);
|
|
|
|
return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
#endif /* __KERNEL_BSSRDF_H__ */
|