forked from bartvdbraak/blender
Cycles: some tiny hair intersection optimizations that help maybe 2%.
This commit is contained in:
parent
0f85174d50
commit
6b9ca06088
@ -152,48 +152,47 @@ ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta,
|
|||||||
float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
|
float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
|
||||||
float ta = -1.0f;
|
float ta = -1.0f;
|
||||||
float tb = -1.0f;
|
float tb = -1.0f;
|
||||||
|
|
||||||
*extremta = -1.0f;
|
*extremta = -1.0f;
|
||||||
*extremtb = -1.0f;
|
*extremtb = -1.0f;
|
||||||
*upper = p0;
|
*upper = p0;
|
||||||
*lower = p0 + p1 + p2 + p3;
|
*lower = (p0 + p1) + (p2 + p3);
|
||||||
*extrema = *upper;
|
*extrema = *upper;
|
||||||
*extremb = *lower;
|
*extremb = *lower;
|
||||||
|
|
||||||
if(*lower >= *upper) {
|
if(*lower >= *upper) {
|
||||||
*upper = *lower;
|
*upper = *lower;
|
||||||
*lower = p0;
|
*lower = p0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(halfdiscroot >= 0) {
|
if(halfdiscroot >= 0) {
|
||||||
|
float inv3p3 = (1.0f/3.0f)/p3;
|
||||||
halfdiscroot = sqrt(halfdiscroot);
|
halfdiscroot = sqrt(halfdiscroot);
|
||||||
ta = (-p2 - halfdiscroot) / (3 * p3);
|
ta = (-p2 - halfdiscroot) * inv3p3;
|
||||||
tb = (-p2 + halfdiscroot) / (3 * p3);
|
tb = (-p2 + halfdiscroot) * inv3p3;
|
||||||
}
|
}
|
||||||
|
|
||||||
float t2;
|
float t2;
|
||||||
float t3;
|
float t3;
|
||||||
|
|
||||||
if(ta > 0.0f && ta < 1.0f) {
|
if(ta > 0.0f && ta < 1.0f) {
|
||||||
t2 = ta * ta;
|
t2 = ta * ta;
|
||||||
t3 = t2 * ta;
|
t3 = t2 * ta;
|
||||||
*extremta = ta;
|
*extremta = ta;
|
||||||
*extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
|
*extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
|
||||||
if(*extrema > *upper) {
|
|
||||||
*upper = *extrema;
|
*upper = fmaxf(*extrema, *upper);
|
||||||
}
|
*lower = fminf(*extrema, *lower);
|
||||||
if(*extrema < *lower) {
|
|
||||||
*lower = *extrema;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(tb > 0.0f && tb < 1.0f) {
|
if(tb > 0.0f && tb < 1.0f) {
|
||||||
t2 = tb * tb;
|
t2 = tb * tb;
|
||||||
t3 = t2 * tb;
|
t3 = t2 * tb;
|
||||||
*extremtb = tb;
|
*extremtb = tb;
|
||||||
*extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
|
*extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
|
||||||
if(*extremb >= *upper) {
|
|
||||||
*upper = *extremb;
|
*upper = fmaxf(*extremb, *upper);
|
||||||
}
|
*lower = fminf(*extremb, *lower);
|
||||||
if(*extremb <= *lower) {
|
|
||||||
*lower = *extremb;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -483,10 +482,11 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
|
|||||||
float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
|
float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
|
||||||
float d0 = d - r_curr;
|
float d0 = d - r_curr;
|
||||||
float d1 = d + r_curr;
|
float d1 = d + r_curr;
|
||||||
|
float inv_mw_extension = 1.0f/mw_extension;
|
||||||
if (d0 >= 0)
|
if (d0 >= 0)
|
||||||
coverage = (min(d1 / mw_extension, 1.0f) - min(d0 / mw_extension, 1.0f)) * 0.5f;
|
coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f;
|
||||||
else // inside
|
else // inside
|
||||||
coverage = (min(d1 / mw_extension, 1.0f) + min(-d0 / mw_extension, 1.0f)) * 0.5f;
|
coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
|
if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
|
||||||
@ -496,12 +496,19 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
|
|||||||
}
|
}
|
||||||
|
|
||||||
t = p_curr.z;
|
t = p_curr.z;
|
||||||
|
|
||||||
|
/* stochastic fade from minimum width */
|
||||||
|
if(difl != 0.0f && lcg_state) {
|
||||||
|
if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
|
||||||
|
return hit;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
float l = len(p_en - p_st);
|
float l = len(p_en - p_st);
|
||||||
/* minimum width extension */
|
/* minimum width extension */
|
||||||
float or1 = r1;
|
float or1 = r1;
|
||||||
float or2 = r2;
|
float or2 = r2;
|
||||||
|
|
||||||
if(difl != 0.0f) {
|
if(difl != 0.0f) {
|
||||||
mw_extension = min(len(p_st - P) * difl, extmax);
|
mw_extension = min(len(p_st - P) * difl, extmax);
|
||||||
or1 = r1 < mw_extension ? mw_extension : r1;
|
or1 = r1 < mw_extension ? mw_extension : r1;
|
||||||
@ -509,12 +516,14 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
|
|||||||
or2 = r2 < mw_extension ? mw_extension : r2;
|
or2 = r2 < mw_extension ? mw_extension : r2;
|
||||||
}
|
}
|
||||||
/* --- */
|
/* --- */
|
||||||
float3 tg = (p_en - p_st) / l;
|
float invl = 1.0f/l;
|
||||||
gd = (or2 - or1) / l;
|
float3 tg = (p_en - p_st) * invl;
|
||||||
|
gd = (or2 - or1) * invl;
|
||||||
float difz = -dot(p_st,tg);
|
float difz = -dot(p_st,tg);
|
||||||
float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd));
|
float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd));
|
||||||
|
float invcyla = 1.0f/cyla;
|
||||||
float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1)));
|
float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1)));
|
||||||
float tcentre = -halfb/cyla;
|
float tcentre = -halfb*invcyla;
|
||||||
float zcentre = difz + (tg.z * tcentre);
|
float zcentre = difz + (tg.z * tcentre);
|
||||||
float3 tdif = - p_st;
|
float3 tdif = - p_st;
|
||||||
tdif.z += tcentre;
|
tdif.z += tcentre;
|
||||||
@ -529,7 +538,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
|
|||||||
}
|
}
|
||||||
|
|
||||||
float rootd = sqrtf(td);
|
float rootd = sqrtf(td);
|
||||||
float correction = ((-tb - rootd)/(2*cyla));
|
float correction = (-tb - rootd) * 0.5f * invcyla;
|
||||||
t = tcentre + correction;
|
t = tcentre + correction;
|
||||||
|
|
||||||
float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
|
float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
|
||||||
@ -540,7 +549,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
|
|||||||
dp_en *= -1;
|
dp_en *= -1;
|
||||||
|
|
||||||
if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
|
if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
|
||||||
correction = ((-tb + rootd)/(2*cyla));
|
correction = (-tb + rootd) * 0.5f * invcyla;
|
||||||
t = tcentre + correction;
|
t = tcentre + correction;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -550,22 +559,22 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float w = (zcentre + (tg.z * correction))/l;
|
float w = (zcentre + (tg.z * correction)) * invl;
|
||||||
w = clamp((float)w, 0.0f, 1.0f);
|
w = clamp((float)w, 0.0f, 1.0f);
|
||||||
/* compute u on the curve segment */
|
/* compute u on the curve segment */
|
||||||
u = i_st * (1 - w) + i_en * w;
|
u = i_st * (1 - w) + i_en * w;
|
||||||
|
|
||||||
|
/* stochastic fade from minimum width */
|
||||||
|
if(difl != 0.0f && lcg_state) {
|
||||||
r_curr = r1 + (r2 - r1) * w;
|
r_curr = r1 + (r2 - r1) * w;
|
||||||
r_ext = or1 + (or2 - or1) * w;
|
r_ext = or1 + (or2 - or1) * w;
|
||||||
coverage = r_curr/r_ext;
|
coverage = r_curr/r_ext;
|
||||||
|
|
||||||
}
|
if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
|
||||||
/* we found a new intersection */
|
|
||||||
|
|
||||||
/* stochastic fade from minimum width */
|
|
||||||
if(lcg_state && coverage != 1.0f) {
|
|
||||||
if(lcg_step_float(lcg_state) > coverage)
|
|
||||||
return hit;
|
return hit;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
/* we found a new intersection */
|
||||||
|
|
||||||
#ifdef __VISIBILITY_FLAG__
|
#ifdef __VISIBILITY_FLAG__
|
||||||
/* visibility flag test. we do it here under the assumption
|
/* visibility flag test. we do it here under the assumption
|
||||||
|
Loading…
Reference in New Issue
Block a user