forked from bartvdbraak/blender
Cycles: Speedup of Christensen-Burley SSS falloff function
The idea is simply to pre-compute fitting and parameterization in the bssrdf_setup() function and re-use the values in both sample() and eval(). The only trick is where to store the pre-calculated values and the answer is inside of ShaderClosure->custom{1,2,3}. There's no memory bump here because we now simply re-use padding fields for the pre-calculated values. Similar trick we can do for other BSDFs. Seems to give nice speedup up to 7% here on my desktop with Core i7 CPU, SSE4.1 kernel.
This commit is contained in:
parent
f250aa9d86
commit
3e7389eaf2
@ -19,25 +19,6 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
ccl_device int bssrdf_setup(ShaderClosure *sc, ClosureType type)
|
|
||||||
{
|
|
||||||
if(sc->data0 < BSSRDF_MIN_RADIUS) {
|
|
||||||
/* revert to diffuse BSDF if radius too small */
|
|
||||||
sc->data0 = 0.0f;
|
|
||||||
sc->data1 = 0.0f;
|
|
||||||
int flag = bsdf_diffuse_setup(sc);
|
|
||||||
sc->type = CLOSURE_BSDF_BSSRDF_ID;
|
|
||||||
return flag;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
sc->data1 = saturate(sc->data1); /* texture blur */
|
|
||||||
sc->T.x = saturate(sc->T.x); /* sharpness */
|
|
||||||
sc->type = type;
|
|
||||||
|
|
||||||
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Planar Truncated Gaussian
|
/* Planar Truncated Gaussian
|
||||||
*
|
*
|
||||||
* Note how this is different from the typical gaussian, this one integrates
|
* Note how this is different from the typical gaussian, this one integrates
|
||||||
@ -210,13 +191,24 @@ ccl_device_inline float bssrdf_burley_compatible_mfp(float r)
|
|||||||
return 0.5f * M_1_PI_F * r;
|
return 0.5f * M_1_PI_F * r;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device float bssrdf_burley_eval(ShaderClosure *sc, float r)
|
ccl_device void bssrdf_burley_setup(ShaderClosure *sc)
|
||||||
{
|
{
|
||||||
/* Mean free path length. */
|
/* Mean free path length. */
|
||||||
const float l = bssrdf_burley_compatible_mfp(sc->data0);
|
const float l = bssrdf_burley_compatible_mfp(sc->data0);
|
||||||
/* Surface albedo. */
|
/* Surface albedo. */
|
||||||
const float A = sc->data2;
|
const float A = sc->data2;
|
||||||
const float s = bssrdf_burley_fitting(A);
|
const float s = bssrdf_burley_fitting(A);
|
||||||
|
const float d = l / s;
|
||||||
|
|
||||||
|
sc->custom1 = l;
|
||||||
|
sc->custom2 = s;
|
||||||
|
sc->custom3 = d;
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_device float bssrdf_burley_eval(ShaderClosure *sc, float r)
|
||||||
|
{
|
||||||
|
const float l = sc->custom1,
|
||||||
|
s = sc->custom2;
|
||||||
/* Burley refletance profile, equation (3).
|
/* Burley refletance profile, equation (3).
|
||||||
*
|
*
|
||||||
* Note that surface albedo is already included into sc->weight, no need to
|
* Note that surface albedo is already included into sc->weight, no need to
|
||||||
@ -277,12 +269,7 @@ ccl_device void bssrdf_burley_sample(ShaderClosure *sc,
|
|||||||
float *r,
|
float *r,
|
||||||
float *h)
|
float *h)
|
||||||
{
|
{
|
||||||
/* Mean free path length. */
|
const float d = sc->custom3;
|
||||||
const float l = bssrdf_burley_compatible_mfp(sc->data0);
|
|
||||||
/* Surface albedo. */
|
|
||||||
const float A = sc->data2;
|
|
||||||
const float s = bssrdf_burley_fitting(A);
|
|
||||||
const float d = l / s;
|
|
||||||
/* This is a bit arbitrary, just need big enough radius so it matches
|
/* This is a bit arbitrary, just need big enough radius so it matches
|
||||||
* the mean free length, but still not too big so sampling is still
|
* the mean free length, but still not too big so sampling is still
|
||||||
* effective. Might need some further tweaks.
|
* effective. Might need some further tweaks.
|
||||||
@ -330,6 +317,29 @@ ccl_device void bssrdf_none_sample(ShaderClosure *sc, float xi, float *r, float
|
|||||||
|
|
||||||
/* Generic */
|
/* Generic */
|
||||||
|
|
||||||
|
ccl_device int bssrdf_setup(ShaderClosure *sc, ClosureType type)
|
||||||
|
{
|
||||||
|
if(sc->data0 < BSSRDF_MIN_RADIUS) {
|
||||||
|
/* revert to diffuse BSDF if radius too small */
|
||||||
|
sc->data0 = 0.0f;
|
||||||
|
sc->data1 = 0.0f;
|
||||||
|
int flag = bsdf_diffuse_setup(sc);
|
||||||
|
sc->type = CLOSURE_BSDF_BSSRDF_ID;
|
||||||
|
return flag;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
sc->data1 = saturate(sc->data1); /* texture blur */
|
||||||
|
sc->T.x = saturate(sc->T.x); /* sharpness */
|
||||||
|
sc->type = type;
|
||||||
|
|
||||||
|
if(type == CLOSURE_BSSRDF_BURLEY_ID) {
|
||||||
|
bssrdf_burley_setup(sc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ccl_device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h)
|
ccl_device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h)
|
||||||
{
|
{
|
||||||
if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
|
if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
|
||||||
|
@ -642,7 +642,14 @@ typedef ccl_addr_space struct ShaderClosure {
|
|||||||
float data0;
|
float data0;
|
||||||
float data1;
|
float data1;
|
||||||
float data2;
|
float data2;
|
||||||
int pad1, pad2, pad3;
|
|
||||||
|
/* Following fields could be used to store pre-calculated
|
||||||
|
* values by various BSDF closures for more effective sampling
|
||||||
|
* and evaluation.
|
||||||
|
*/
|
||||||
|
float custom1;
|
||||||
|
float custom2;
|
||||||
|
float custom3;
|
||||||
|
|
||||||
#ifdef __OSL__
|
#ifdef __OSL__
|
||||||
void *prim, *pad4;
|
void *prim, *pad4;
|
||||||
|
Loading…
Reference in New Issue
Block a user