From 6b9ca060883f57d70e7391d54bc0024d1c7dd807 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 23 Apr 2014 01:10:26 +0200 Subject: [PATCH] Cycles: some tiny hair intersection optimizations that help maybe 2%. --- intern/cycles/kernel/geom/geom_curve.h | 73 +++++++++++++++----------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 57fa0388842..32f6a88dce2 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -152,48 +152,47 @@ ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float halfdiscroot = (p2 * p2 - 3 * p3 * p1); float ta = -1.0f; float tb = -1.0f; + *extremta = -1.0f; *extremtb = -1.0f; *upper = p0; - *lower = p0 + p1 + p2 + p3; + *lower = (p0 + p1) + (p2 + p3); *extrema = *upper; *extremb = *lower; + if(*lower >= *upper) { *upper = *lower; *lower = p0; } if(halfdiscroot >= 0) { + float inv3p3 = (1.0f/3.0f)/p3; halfdiscroot = sqrt(halfdiscroot); - ta = (-p2 - halfdiscroot) / (3 * p3); - tb = (-p2 + halfdiscroot) / (3 * p3); + ta = (-p2 - halfdiscroot) * inv3p3; + tb = (-p2 + halfdiscroot) * inv3p3; } float t2; float t3; + if(ta > 0.0f && ta < 1.0f) { t2 = ta * ta; t3 = t2 * ta; *extremta = ta; *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; - if(*extrema > *upper) { - *upper = *extrema; - } - if(*extrema < *lower) { - *lower = *extrema; - } + + *upper = fmaxf(*extrema, *upper); + *lower = fminf(*extrema, *lower); } + if(tb > 0.0f && tb < 1.0f) { t2 = tb * tb; t3 = t2 * tb; *extremtb = tb; *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; - if(*extremb >= *upper) { - *upper = *extremb; - } - if(*extremb <= *lower) { - *lower = *extremb; - } + + *upper = fmaxf(*extremb, *upper); + *lower = fminf(*extremb, *lower); } } @@ -483,10 +482,11 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); float d0 = d - r_curr; float d1 = d + r_curr; + float inv_mw_extension = 1.0f/mw_extension; if (d0 >= 0) - coverage = (min(d1 / mw_extension, 1.0f) - min(d0 / mw_extension, 1.0f)) * 0.5f; + coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f; else // inside - coverage = (min(d1 / mw_extension, 1.0f) + min(-d0 / mw_extension, 1.0f)) * 0.5f; + coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f; } if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { @@ -496,12 +496,19 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect } t = p_curr.z; + + /* stochastic fade from minimum width */ + if(difl != 0.0f && lcg_state) { + if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) + return hit; + } } else { float l = len(p_en - p_st); /* minimum width extension */ float or1 = r1; float or2 = r2; + if(difl != 0.0f) { mw_extension = min(len(p_st - P) * difl, extmax); or1 = r1 < mw_extension ? mw_extension : r1; @@ -509,12 +516,14 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect or2 = r2 < mw_extension ? mw_extension : r2; } /* --- */ - float3 tg = (p_en - p_st) / l; - gd = (or2 - or1) / l; + float invl = 1.0f/l; + float3 tg = (p_en - p_st) * invl; + gd = (or2 - or1) * invl; float difz = -dot(p_st,tg); float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); + float invcyla = 1.0f/cyla; float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1))); - float tcentre = -halfb/cyla; + float tcentre = -halfb*invcyla; float zcentre = difz + (tg.z * tcentre); float3 tdif = - p_st; tdif.z += tcentre; @@ -529,7 +538,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect } float rootd = sqrtf(td); - float correction = ((-tb - rootd)/(2*cyla)); + float correction = (-tb - rootd) * 0.5f * invcyla; t = tcentre + correction; float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; @@ -540,7 +549,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect dp_en *= -1; if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { - correction = ((-tb + rootd)/(2*cyla)); + correction = (-tb + rootd) * 0.5f * invcyla; t = tcentre + correction; } @@ -550,23 +559,23 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect continue; } - float w = (zcentre + (tg.z * correction))/l; + float w = (zcentre + (tg.z * correction)) * invl; w = clamp((float)w, 0.0f, 1.0f); /* compute u on the curve segment */ u = i_st * (1 - w) + i_en * w; - r_curr = r1 + (r2 - r1) * w; - r_ext = or1 + (or2 - or1) * w; - coverage = r_curr/r_ext; + /* stochastic fade from minimum width */ + if(difl != 0.0f && lcg_state) { + r_curr = r1 + (r2 - r1) * w; + r_ext = or1 + (or2 - or1) * w; + coverage = r_curr/r_ext; + + if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) + return hit; + } } /* we found a new intersection */ - /* stochastic fade from minimum width */ - if(lcg_state && coverage != 1.0f) { - if(lcg_step_float(lcg_state) > coverage) - return hit; - } - #ifdef __VISIBILITY_FLAG__ /* visibility flag test. we do it here under the assumption * that most triangles are culled by node flags */