diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h index 0272dff5115..055b18e63d7 100644 --- a/intern/cycles/kernel/geom/geom_bvh.h +++ b/intern/cycles/kernel/geom/geom_bvh.h @@ -46,839 +46,6 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline float3 bvh_inverse_direction(float3 dir) -{ - /* avoid divide by zero (ooeps = exp2f(-80.0f)) */ - float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f; - float3 idir; - - idir.x = 1.0f/((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x)); - idir.y = 1.0f/((fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y)); - idir.z = 1.0f/((fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z)); - - return idir; -} - -ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) -{ - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - - *P = transform_point(&tfm, ray->P); - - float3 dir = transform_direction(&tfm, ray->D); - - float len; - dir = normalize_len(dir, &len); - - *idir = bvh_inverse_direction(dir); - - if(*t != FLT_MAX) - *t *= len; -} - -ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) -{ - if(*t != FLT_MAX) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); - *t *= len(transform_direction(&tfm, 1.0f/(*idir))); - } - - *P = ray->P; - *idir = bvh_inverse_direction(ray->D); -} - -#ifdef __OBJECT_MOTION__ -ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) -{ - Transform itfm; - *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); - - *P = transform_point(&itfm, ray->P); - - float3 dir = transform_direction(&itfm, ray->D); - - float len; - dir = normalize_len(dir, &len); - - *idir = bvh_inverse_direction(dir); - - if(*t != FLT_MAX) - *t *= len; -} - -ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) -{ - if(*t != FLT_MAX) - *t *= len(transform_direction(tfm, 1.0f/(*idir))); - - *P = ray->P; - *idir = bvh_inverse_direction(ray->D); -} -#endif - -/* Sven Woop's algorithm */ -ccl_device_inline bool bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, uint visibility, int object, int triAddr) -{ - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); - float3 dir = 1.0f/idir; - - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < isect->t) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) -#endif - { - /* record intersection */ - isect->prim = triAddr; - isect->object = object; - isect->u = u; - isect->v = v; - isect->t = t; - return true; - } - } - } - } - - return false; -} - -#ifdef __HAIR__ -ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3) -{ - float halfdiscroot = (p2 * p2 - 3 * p3 * p1); - float ta = -1.0f; - float tb = -1.0f; - *extremta = -1.0f; - *extremtb = -1.0f; - *upper = p0; - *lower = p0 + p1 + p2 + p3; - *extrema = *upper; - *extremb = *lower; - if(*lower >= *upper) { - *upper = *lower; - *lower = p0; - } - - if(halfdiscroot >= 0) { - halfdiscroot = sqrt(halfdiscroot); - ta = (-p2 - halfdiscroot) / (3 * p3); - tb = (-p2 + halfdiscroot) / (3 * p3); - } - - float t2; - float t3; - if(ta > 0.0f && ta < 1.0f) { - t2 = ta * ta; - t3 = t2 * ta; - *extremta = ta; - *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; - if(*extrema > *upper) { - *upper = *extrema; - } - if(*extrema < *lower) { - *lower = *extrema; - } - } - if(tb > 0.0f && tb < 1.0f) { - t2 = tb * tb; - t3 = t2 * tb; - *extremtb = tb; - *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; - if(*extremb >= *upper) { - *upper = *extremb; - } - if(*extremb <= *lower) { - *lower = *extremb; - } - } -} - -#ifdef __KERNEL_SSE2__ -ccl_device_inline __m128 transform_point_T3(const __m128 t[3], const __m128 &a) -{ - return fma(broadcast<0>(a), t[0], fma(broadcast<1>(a), t[1], _mm_mul_ps(broadcast<2>(a), t[2]))); -} -#endif - -#ifdef __KERNEL_SSE2__ -/* Pass P and idir by reference to aligned vector */ -ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, - const float3 &P, const float3 &idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax) -#else -ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax) -#endif -{ - float epsilon = 0.0f; - float r_st, r_en; - - int depth = kernel_data.curve.subdivisions; - int flags = kernel_data.curve.curveflags; - int prim = kernel_tex_fetch(__prim_index, curveAddr); - -#ifdef __KERNEL_SSE2__ - __m128 vdir = _mm_div_ps(_mm_set1_ps(1.0f), load_m128(idir)); - __m128 vcurve_coef[4]; - const float3 *curve_coef = (float3 *)vcurve_coef; - - { - __m128 dtmp = _mm_mul_ps(vdir, vdir); - __m128 d_ss = _mm_sqrt_ss(_mm_add_ss(dtmp, broadcast<2>(dtmp))); - __m128 rd_ss = _mm_div_ss(_mm_set_ss(1.0f), d_ss); - - __m128i v00vec = _mm_load_si128((__m128i *)&kg->__curves.data[prim]); - int2 &v00 = (int2 &)v00vec; - - int k0 = v00.x + segment; - int k1 = k0 + 1; - int ka = max(k0 - 1, v00.x); - int kb = min(k1 + 1, v00.x + v00.y - 1); - - __m128 P0 = _mm_load_ps(&kg->__curve_keys.data[ka].x); - __m128 P1 = _mm_load_ps(&kg->__curve_keys.data[k0].x); - __m128 P2 = _mm_load_ps(&kg->__curve_keys.data[k1].x); - __m128 P3 = _mm_load_ps(&kg->__curve_keys.data[kb].x); - - __m128 rd_sgn = set_sign_bit<0, 1, 1, 1>(broadcast<0>(rd_ss)); - __m128 mul_zxxy = _mm_mul_ps(shuffle<2, 0, 0, 1>(vdir), rd_sgn); - __m128 mul_yz = _mm_mul_ps(shuffle<1, 2, 1, 2>(vdir), mul_zxxy); - __m128 mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz); - __m128 vdir0 = _mm_and_ps(vdir, _mm_castsi128_ps(_mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0))); - - __m128 htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0); - __m128 htfm1 = shuffle<1, 0, 1, 3>(_mm_set_ss(_mm_cvtss_f32(d_ss)), vdir0); - __m128 htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); - - __m128 htfm[] = { htfm0, htfm1, htfm2 }; - __m128 vP = load_m128(P); - __m128 p0 = transform_point_T3(htfm, _mm_sub_ps(P0, vP)); - __m128 p1 = transform_point_T3(htfm, _mm_sub_ps(P1, vP)); - __m128 p2 = transform_point_T3(htfm, _mm_sub_ps(P2, vP)); - __m128 p3 = transform_point_T3(htfm, _mm_sub_ps(P3, vP)); - - float fc = 0.71f; - __m128 vfc = _mm_set1_ps(fc); - __m128 vfcxp3 = _mm_mul_ps(vfc, p3); - - vcurve_coef[0] = p1; - vcurve_coef[1] = _mm_mul_ps(vfc, _mm_sub_ps(p2, p0)); - vcurve_coef[2] = fma(_mm_set1_ps(fc * 2.0f), p0, fma(_mm_set1_ps(fc - 3.0f), p1, fms(_mm_set1_ps(3.0f - 2.0f * fc), p2, vfcxp3))); - vcurve_coef[3] = fms(_mm_set1_ps(fc - 2.0f), _mm_sub_ps(p2, p1), fms(vfc, p0, vfcxp3)); - - r_st = ((float4 &)P1).w; - r_en = ((float4 &)P2).w; - } -#else - float3 curve_coef[4]; - - /* curve Intersection check */ - float3 dir = 1.0f/idir; - - /* obtain curve parameters */ - { - /* ray transform created - this should be created at beginning of intersection loop */ - Transform htfm; - float d = sqrtf(dir.x * dir.x + dir.z * dir.z); - htfm = make_transform( - dir.z / d, 0, -dir.x /d, 0, - -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0, - dir.x, dir.y, dir.z, 0, - 0, 0, 0, 1); - - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; - int k1 = k0 + 1; - - int ka = max(k0 - 1,__float_as_int(v00.x)); - int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); - - float4 P0 = kernel_tex_fetch(__curve_keys, ka); - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - float4 P3 = kernel_tex_fetch(__curve_keys, kb); - - float3 p0 = transform_point(&htfm, float4_to_float3(P0) - P); - float3 p1 = transform_point(&htfm, float4_to_float3(P1) - P); - float3 p2 = transform_point(&htfm, float4_to_float3(P2) - P); - float3 p3 = transform_point(&htfm, float4_to_float3(P3) - P); - - float fc = 0.71f; - curve_coef[0] = p1; - curve_coef[1] = -fc*p0 + fc*p2; - curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3; - curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3; - r_st = P1.w; - r_en = P2.w; - } -#endif - - float r_curr = max(r_st, r_en); - - if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING)) - epsilon = 2 * r_curr; - - /* find bounds - this is slow for cubic curves */ - float upper, lower; - - float zextrem[4]; - curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z); - if(lower - r_curr > isect->t || upper + r_curr < epsilon) - return false; - - /* minimum width extension */ - float mw_extension = min(difl * fabsf(upper), extmax); - float r_ext = mw_extension + r_curr; - - float xextrem[4]; - curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); - if(lower > r_ext || upper < -r_ext) - return false; - - float yextrem[4]; - curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); - if(lower > r_ext || upper < -r_ext) - return false; - - /* setup recurrent loop */ - int level = 1 << depth; - int tree = 0; - float resol = 1.0f / (float)level; - bool hit = false; - - /* begin loop */ - while(!(tree >> (depth))) { - float i_st = tree * resol; - float i_en = i_st + (level * resol); -#ifdef __KERNEL_SSE2__ - __m128 vi_st = _mm_set1_ps(i_st), vi_en = _mm_set1_ps(i_en); - __m128 vp_st = fma(fma(fma(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]); - __m128 vp_en = fma(fma(fma(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]); - - __m128 vbmin = _mm_min_ps(vp_st, vp_en); - __m128 vbmax = _mm_max_ps(vp_st, vp_en); - - float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax; - float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z; - float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z; - float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en; -#else - float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0]; - float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0]; - - float bminx = min(p_st.x, p_en.x); - float bmaxx = max(p_st.x, p_en.x); - float bminy = min(p_st.y, p_en.y); - float bmaxy = max(p_st.y, p_en.y); - float bminz = min(p_st.z, p_en.z); - float bmaxz = max(p_st.z, p_en.z); -#endif - - if(xextrem[0] >= i_st && xextrem[0] <= i_en) { - bminx = min(bminx,xextrem[1]); - bmaxx = max(bmaxx,xextrem[1]); - } - if(xextrem[2] >= i_st && xextrem[2] <= i_en) { - bminx = min(bminx,xextrem[3]); - bmaxx = max(bmaxx,xextrem[3]); - } - if(yextrem[0] >= i_st && yextrem[0] <= i_en) { - bminy = min(bminy,yextrem[1]); - bmaxy = max(bmaxy,yextrem[1]); - } - if(yextrem[2] >= i_st && yextrem[2] <= i_en) { - bminy = min(bminy,yextrem[3]); - bmaxy = max(bmaxy,yextrem[3]); - } - if(zextrem[0] >= i_st && zextrem[0] <= i_en) { - bminz = min(bminz,zextrem[1]); - bmaxz = max(bmaxz,zextrem[1]); - } - if(zextrem[2] >= i_st && zextrem[2] <= i_en) { - bminz = min(bminz,zextrem[3]); - bmaxz = max(bmaxz,zextrem[3]); - } - - float r1 = r_st + (r_en - r_st) * i_st; - float r2 = r_st + (r_en - r_st) * i_en; - r_curr = max(r1, r2); - - mw_extension = min(difl * fabsf(bmaxz), extmax); - float r_ext = mw_extension + r_curr; - float coverage = 1.0f; - - if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { - /* the bounding box does not overlap the square centered at O */ - tree += level; - level = tree & -tree; - } - else if (level == 1) { - - /* the maximum recursion depth is reached. - * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. - * dP* is reversed if necessary.*/ - float t = isect->t; - float u = 0.0f; - if(flags & CURVE_KN_RIBBONS) { - float3 tg = (p_en - p_st); - float w = tg.x * tg.x + tg.y * tg.y; - if (w == 0) { - tree++; - level = tree & -tree; - continue; - } - w = -(p_st.x * tg.x + p_st.y * tg.y) / w; - w = clamp((float)w, 0.0f, 1.0f); - - /* compute u on the curve segment */ - u = i_st * (1 - w) + i_en * w; - r_curr = r_st + (r_en - r_st) * u; - /* compare x-y distances */ - float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0]; - - float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if (dot(tg, dp_st)< 0) - dp_st *= -1; - if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { - tree++; - level = tree & -tree; - continue; - } - float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if (dot(tg, dp_en) < 0) - dp_en *= -1; - if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { - tree++; - level = tree & -tree; - continue; - } - - /* compute coverage */ - float r_ext = r_curr; - coverage = 1.0f; - if(difl != 0.0f) { - mw_extension = min(difl * fabsf(bmaxz), extmax); - r_ext = mw_extension + r_curr; - float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); - float d0 = d - r_curr; - float d1 = d + r_curr; - if (d0 >= 0) - coverage = (min(d1 / mw_extension, 1.0f) - min(d0 / mw_extension, 1.0f)) * 0.5f; - else // inside - coverage = (min(d1 / mw_extension, 1.0f) + min(-d0 / mw_extension, 1.0f)) * 0.5f; - } - - if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { - tree++; - level = tree & -tree; - continue; - } - - t = p_curr.z; - } - else { - float l = len(p_en - p_st); - /* minimum width extension */ - float or1 = r1; - float or2 = r2; - if(difl != 0.0f) { - mw_extension = min(len(p_st - P) * difl, extmax); - or1 = r1 < mw_extension ? mw_extension : r1; - mw_extension = min(len(p_en - P) * difl, extmax); - or2 = r2 < mw_extension ? mw_extension : r2; - } - /* --- */ - float3 tg = (p_en - p_st) / l; - float gd = (or2 - or1) / l; - float difz = -dot(p_st,tg); - float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); - float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1))); - float tcentre = -halfb/cyla; - float zcentre = difz + (tg.z * tcentre); - float3 tdif = - p_st; - tdif.z += tcentre; - float tdifz = dot(tdif,tg); - float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); - float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; - float td = tb*tb - 4*cyla*tc; - if (td < 0.0f) { - tree++; - level = tree & -tree; - continue; - } - - float rootd = sqrtf(td); - float correction = ((-tb - rootd)/(2*cyla)); - t = tcentre + correction; - - float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if (dot(tg, dp_st)< 0) - dp_st *= -1; - float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if (dot(tg, dp_en) < 0) - dp_en *= -1; - - if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { - correction = ((-tb + rootd)/(2*cyla)); - t = tcentre + correction; - } - - if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { - tree++; - level = tree & -tree; - continue; - } - - float w = (zcentre + (tg.z * correction))/l; - w = clamp((float)w, 0.0f, 1.0f); - /* compute u on the curve segment */ - u = i_st * (1 - w) + i_en * w; - r_curr = r1 + (r2 - r1) * w; - r_ext = or1 + (or2 - or1) * w; - coverage = r_curr/r_ext; - - } - /* we found a new intersection */ - - /* stochastic fade from minimum width */ - if(lcg_state && coverage != 1.0f) { - if(lcg_step_float(lcg_state) > coverage) - return hit; - } - -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) -#endif - { - /* record intersection */ - isect->prim = curveAddr; - isect->segment = segment; - isect->object = object; - isect->u = u; - isect->v = 0.0f; - /*isect->v = 1.0f - coverage; */ - isect->t = t; - hit = true; - } - - tree++; - level = tree & -tree; - } - else { - /* split the curve into two curves and process */ - level = level >> 1; - } - } - - return hit; -} - -ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax) -{ - /* define few macros to minimize code duplication for SSE */ -#ifndef __KERNEL_SSE2__ -#define len3_squared(x) len_squared(x) -#define len3(x) len(x) -#define dot3(x, y) dot(x, y) -#endif - - /* curve Intersection check */ - int flags = kernel_data.curve.curveflags; - - int prim = kernel_tex_fetch(__prim_index, curveAddr); - float4 v00 = kernel_tex_fetch(__curves, prim); - - int cnum = __float_as_int(v00.x); - int k0 = cnum + segment; - int k1 = k0 + 1; - -#ifndef __KERNEL_SSE2__ - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - - float or1 = P1.w; - float or2 = P2.w; - float3 p1 = float4_to_float3(P1); - float3 p2 = float4_to_float3(P2); - - /* minimum width extension */ - float r1 = or1; - float r2 = or2; - float3 dif = P - p1; - float3 dif_second = P - p2; - if(difl != 0.0f) { - float pixelsize = min(len3(dif) * difl, extmax); - r1 = or1 < pixelsize ? pixelsize : or1; - pixelsize = min(len3(dif_second) * difl, extmax); - r2 = or2 < pixelsize ? pixelsize : or2; - } - /* --- */ - - float3 dir = 1.0f / idir; - float3 p21_diff = p2 - p1; - float3 sphere_dif1 = (dif + dif_second) * 0.5f; - float sphere_b_tmp = dot3(dir, sphere_dif1); - float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; -#else - const __m128 p1 = _mm_load_ps(&kg->__curve_keys.data[k0].x); - const __m128 p2 = _mm_load_ps(&kg->__curve_keys.data[k1].x); - const __m128 or12 = shuffle<3, 3, 3, 3>(p1, p2); - - __m128 r12 = or12; - const __m128 vP = load_m128(P); - const __m128 dif = _mm_sub_ps(vP, p1); - const __m128 dif_second = _mm_sub_ps(vP, p2); - if(difl != 0.0f) { - const __m128 len1_sq = len3_squared_splat(dif); - const __m128 len2_sq = len3_squared_splat(dif_second); - const __m128 len12 = _mm_sqrt_ps(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); - const __m128 pixelsize12 = _mm_min_ps(_mm_mul_ps(len12, _mm_set1_ps(difl)), _mm_set1_ps(extmax)); - r12 = _mm_max_ps(or12, pixelsize12); - } - float or1 = _mm_cvtss_f32(or12), or2 = _mm_cvtss_f32(broadcast<2>(or12)); - float r1 = _mm_cvtss_f32(r12), r2 = _mm_cvtss_f32(broadcast<2>(r12)); - - const __m128 dir = _mm_div_ps(_mm_set1_ps(1.0f), load_m128(idir)); - const __m128 p21_diff = _mm_sub_ps(p2, p1); - const __m128 sphere_dif1 = _mm_mul_ps(_mm_add_ps(dif, dif_second), _mm_set1_ps(0.5f)); - const __m128 sphere_b_tmp = dot3_splat(dir, sphere_dif1); - const __m128 sphere_dif2 = fnma(sphere_b_tmp, dir, sphere_dif1); -#endif - - float mr = max(r1, r2); - float l = len3(p21_diff); - float invl = 1.0f / l; - float sp_r = mr + 0.5f * l; - - float sphere_b = dot3(dir, sphere_dif2); - float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r; - - if(sdisc < 0.0f) - return false; - - /* obtain parameters and test midpoint distance for suitable modes */ -#ifndef __KERNEL_SSE2__ - float3 tg = p21_diff * invl; -#else - const __m128 tg = _mm_mul_ps(p21_diff, _mm_set1_ps(invl)); -#endif - float gd = (r2 - r1) * invl; - - float dirz = dot3(dir, tg); - float difz = dot3(dif, tg); - - float a = 1.0f - (dirz*dirz*(1 + gd*gd)); - - float halfb = dot3(dir, dif) - dirz*(difz + gd*(difz*gd + r1)); - - float tcentre = -halfb/a; - float zcentre = difz + (dirz * tcentre); - - if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE)) - return false; - if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION)) - return false; - - /* test minimum separation */ -#ifndef __KERNEL_SSE2__ - float3 cprod = cross(tg, dir); - float cprod2sq = len3_squared(cross(tg, dif)); -#else - const __m128 cprod = cross(tg, dir); - float cprod2sq = len3_squared(cross_zxy(tg, dif)); -#endif - float cprodsq = len3_squared(cprod); - float distscaled = dot3(cprod, dif); - - if(cprodsq == 0) - distscaled = cprod2sq; - else - distscaled = (distscaled*distscaled)/cprodsq; - - if(distscaled > mr*mr) - return false; - - /* calculate true intersection */ -#ifndef __KERNEL_SSE2__ - float3 tdif = dif + tcentre * dir; -#else - const __m128 tdif = fma(_mm_set1_ps(tcentre), dir, dif); -#endif - float tdifz = dot3(tdif, tg); - float tdifma = tdifz*gd + r1; - float tb = 2*(dot3(dir, tdif) - dirz*(tdifz + gd*tdifma)); - float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma; - float td = tb*tb - 4*a*tc; - - if (td < 0.0f) - return false; - - float rootd = 0.0f; - float correction = 0.0f; - if(flags & CURVE_KN_ACCURATE) { - rootd = sqrtf(td); - correction = ((-tb - rootd)/(2*a)); - } - - float t = tcentre + correction; - - if(t < isect->t) { - - if(flags & CURVE_KN_INTERSECTCORRECTION) { - rootd = sqrtf(td); - correction = ((-tb - rootd)/(2*a)); - t = tcentre + correction; - } - - float z = zcentre + (dirz * correction); - bool backface = false; - - if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { - backface = true; - correction = ((-tb + rootd)/(2*a)); - t = tcentre + correction; - z = zcentre + (dirz * correction); - } - - /* stochastic fade from minimum width */ - float adjradius = or1 + z * (or2 - or1) * invl; - adjradius = adjradius / (r1 + z * gd); - if(lcg_state && adjradius != 1.0f) { - if(lcg_step_float(lcg_state) > adjradius) - return false; - } - /* --- */ - - if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { - - if (flags & CURVE_KN_ENCLOSEFILTER) { - float enc_ratio = 1.01f; - if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { - float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); - float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio; - if(a2*c2 < 0.0f) - return false; - } - } - -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) -#endif - { - /* record intersection */ - isect->prim = curveAddr; - isect->segment = segment; - isect->object = object; - isect->u = z*invl; - isect->v = td/(4*a*a); - /*isect->v = 1.0f - adjradius;*/ - isect->t = t; - - if(backface) - isect->u = -isect->u; - - return true; - } - } - } - - return false; - -#ifndef __KERNEL_SSE2__ -#undef len3_squared -#undef len3 -#undef dot3 -#endif -} -#endif - -#ifdef __SUBSURFACE__ -/* Special ray intersection routines for subsurface scattering. In that case we - * only want to intersect with primitives in the same object, and if case of - * multiple hits we pick a single random primitive as the intersection point. */ - -ccl_device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, - float3 P, float3 idir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) -{ - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); - float3 dir = 1.0f/idir; - - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < tmax) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { - (*num_hits)++; - - int hit; - - if(*num_hits <= max_hits) { - hit = *num_hits - 1; - } - else { - /* reservoir sampling: if we are at the maximum number of - * hits, randomly replace element or skip it */ - hit = lcg_step_uint(lcg_state) % *num_hits; - - if(hit >= max_hits) - return; - } - - /* record intersection */ - Intersection *isect = &isect_array[hit]; - isect->prim = triAddr; - isect->object = object; - isect->u = u; - isect->v = v; - isect->t = t; - } - } - } -} -#endif - /* BVH intersection function variations */ #define BVH_INSTANCING 1 @@ -1042,7 +209,7 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng) { #ifdef __INTERSECTION_REFINE__ const float epsilon_f = 1e-5f; - /* ideally this should match epsilon_f, but instancing/mblur + /* ideally this should match epsilon_f, but instancing and motion blur * precision makes it problematic */ const float epsilon_test = 1.0f; const int epsilon_i = 32; @@ -1086,237 +253,5 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng) #endif } -/* Refine triangle intersection to more precise hit point. For rays that travel - * far the precision is often not so good, this reintersects the primitive from - * a closer distance. */ - -ccl_device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) -{ - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; - -#ifdef __INTERSECTION_REFINE__ - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - P = P + D*t; - - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; - - P = P + D*rt; - - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; -#else - return P + D*t; -#endif -} - -/* same as above, except that isect->t is assumed to be in object space for instancing */ -ccl_device_inline float3 bvh_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) -{ - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; - -#ifdef __INTERSECTION_REFINE__ - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } - - P = P + D*t; - - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; - - P = P + D*rt; - - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; -#else - return P + D*t; -#endif -} - -#ifdef __HAIR__ - -ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3) -{ - float fc = 0.71f; - float data[4]; - float t2 = t * t; - data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc; - data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t; - data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc; - data[3] = 3.0f * fc * t2 - 2.0f * fc * t; - return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; -} - -ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3) -{ - float data[4]; - float fc = 0.71f; - float t2 = t * t; - float t3 = t2 * t; - data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t; - data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f; - data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t; - data[3] = fc * t3 - fc * t2; - return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; -} - -ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) -{ - int flag = kernel_data.curve.curveflags; - float t = isect->t; - float3 P = ray->P; - float3 D = ray->D; - - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - int prim = kernel_tex_fetch(__prim_index, isect->prim); - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + isect->segment; - int k1 = k0 + 1; - - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - float l = 1.0f; - float3 tg = normalize_len(float4_to_float3(P2 - P1), &l); - float r1 = P1.w; - float r2 = P2.w; - float gd = ((r2 - r1)/l); - - P = P + D*t; - - if(flag & CURVE_KN_INTERPOLATE) { - int ka = max(k0 - 1,__float_as_int(v00.x)); - int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); - - float4 P0 = kernel_tex_fetch(__curve_keys, ka); - float4 P3 = kernel_tex_fetch(__curve_keys, kb); - - float3 p[4]; - p[0] = float4_to_float3(P0); - p[1] = float4_to_float3(P1); - p[2] = float4_to_float3(P2); - p[3] = float4_to_float3(P3); - -#ifdef __UV__ - sd->u = isect->u; - sd->v = 0.0f; -#endif - - tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); - - if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) - sd->Ng = normalize(-(D - tg * (dot(tg, D)))); - else { - float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); - sd->Ng = normalize(P - p_curr); - sd->Ng = sd->Ng - gd * tg; - sd->Ng = normalize(sd->Ng); - } - sd->N = sd->Ng; - } - else { - float3 dif = P - float4_to_float3(P1); - -#ifdef __UV__ - sd->u = dot(dif,tg)/l; - sd->v = 0.0f; -#endif - - if (flag & CURVE_KN_TRUETANGENTGNORMAL) { - sd->Ng = -(D - tg * dot(tg, D)); - sd->Ng = normalize(sd->Ng); - } - else { - sd->Ng = (dif - tg * sd->u * l) / (P1.w + sd->u * l * gd); - if (gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg ; - sd->Ng = normalize(sd->Ng); - } - } - - sd->N = sd->Ng; - } - -#ifdef __DPDU__ - /* dPdu/dPdv */ - sd->dPdu = tg; - sd->dPdv = cross(tg, sd->Ng); -#endif - - /*add fading parameter for minimum pixel width with transparency bsdf*/ - /*sd->curve_transparency = isect->v;*/ - /*sd->curve_radius = sd->u * gd * l + r1;*/ - - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; -} -#endif - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h index 40683a2da57..6529f58c0d2 100644 --- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h @@ -216,7 +216,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio if(tri_object == subsurface_object) { /* intersect ray against primitive */ - bvh_triangle_intersect_subsurface(kg, isect_array, P, idir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); + triangle_intersect_subsurface(kg, isect_array, P, idir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); } } } diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h index 0515a9e0fa7..3a50ffedfde 100644 --- a/intern/cycles/kernel/geom/geom_bvh_traversal.h +++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h @@ -41,7 +41,6 @@ ccl_device bool BVH_FUNCTION_NAME * - test if pushing distance on the stack helps (for non shadow rays) * - separate version for shadow rays * - likely and unlikely for if() statements - * - SSE for hair * - test restrict attribute for pointers */ @@ -258,18 +257,18 @@ ccl_device bool BVH_FUNCTION_NAME if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) #if FEATURE(BVH_HAIR_MINIMUM_WIDTH) - hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax); + hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, ray->time, segment, lcg_state, difl, extmax); else - hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax); + hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, ray->time, segment, lcg_state, difl, extmax); #else - hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment); + hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, ray->time, segment); else - hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment); + hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, ray->time, segment); #endif } else #endif - hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr); + hit = triangle_intersect(kg, isect, P, idir, visibility, object, primAddr); /* shadow ray early termination */ #if defined(__KERNEL_SSE2__) diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 821ac50eaa9..2daeb59c0ae 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -1,6 +1,4 @@ /* - * Copyright 2011-2013 Blender Foundation - * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -11,7 +9,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -133,5 +131,822 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) #endif +#ifdef __HAIR__ +ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3) +{ + float halfdiscroot = (p2 * p2 - 3 * p3 * p1); + float ta = -1.0f; + float tb = -1.0f; + *extremta = -1.0f; + *extremtb = -1.0f; + *upper = p0; + *lower = p0 + p1 + p2 + p3; + *extrema = *upper; + *extremb = *lower; + if(*lower >= *upper) { + *upper = *lower; + *lower = p0; + } + + if(halfdiscroot >= 0) { + halfdiscroot = sqrt(halfdiscroot); + ta = (-p2 - halfdiscroot) / (3 * p3); + tb = (-p2 + halfdiscroot) / (3 * p3); + } + + float t2; + float t3; + if(ta > 0.0f && ta < 1.0f) { + t2 = ta * ta; + t3 = t2 * ta; + *extremta = ta; + *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; + if(*extrema > *upper) { + *upper = *extrema; + } + if(*extrema < *lower) { + *lower = *extrema; + } + } + if(tb > 0.0f && tb < 1.0f) { + t2 = tb * tb; + t3 = t2 * tb; + *extremtb = tb; + *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; + if(*extremb >= *upper) { + *upper = *extremb; + } + if(*extremb <= *lower) { + *lower = *extremb; + } + } +} + +#ifdef __KERNEL_SSE2__ +ccl_device_inline __m128 transform_point_T3(const __m128 t[3], const __m128 &a) +{ + return fma(broadcast<0>(a), t[0], fma(broadcast<1>(a), t[1], _mm_mul_ps(broadcast<2>(a), t[2]))); +} +#endif + +#ifdef __KERNEL_SSE2__ +/* Pass P and idir by reference to aligned vector */ +ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, + const float3 &P, const float3 &idir, uint visibility, int object, int curveAddr, float time, int segment, uint *lcg_state, float difl, float extmax) +#else +ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, + float3 P, float3 idir, uint visibility, int object, int curveAddr, float time, int segment, uint *lcg_state, float difl, float extmax) +#endif +{ + float epsilon = 0.0f; + float r_st, r_en; + + int depth = kernel_data.curve.subdivisions; + int flags = kernel_data.curve.curveflags; + int prim = kernel_tex_fetch(__prim_index, curveAddr); + +#ifdef __KERNEL_SSE2__ + __m128 vdir = _mm_div_ps(_mm_set1_ps(1.0f), load_m128(idir)); + __m128 vcurve_coef[4]; + const float3 *curve_coef = (float3 *)vcurve_coef; + + { + __m128 dtmp = _mm_mul_ps(vdir, vdir); + __m128 d_ss = _mm_sqrt_ss(_mm_add_ss(dtmp, broadcast<2>(dtmp))); + __m128 rd_ss = _mm_div_ss(_mm_set_ss(1.0f), d_ss); + + __m128i v00vec = _mm_load_si128((__m128i *)&kg->__curves.data[prim]); + int2 &v00 = (int2 &)v00vec; + + int k0 = v00.x + segment; + int k1 = k0 + 1; + int ka = max(k0 - 1, v00.x); + int kb = min(k1 + 1, v00.x + v00.y - 1); + + __m128 P_curve[4]; + + P_curve[0] = _mm_load_ps(&kg->__curve_keys.data[ka].x); + P_curve[1] = _mm_load_ps(&kg->__curve_keys.data[k0].x); + P_curve[2] = _mm_load_ps(&kg->__curve_keys.data[k1].x); + P_curve[3] = _mm_load_ps(&kg->__curve_keys.data[kb].x); + + __m128 rd_sgn = set_sign_bit<0, 1, 1, 1>(broadcast<0>(rd_ss)); + __m128 mul_zxxy = _mm_mul_ps(shuffle<2, 0, 0, 1>(vdir), rd_sgn); + __m128 mul_yz = _mm_mul_ps(shuffle<1, 2, 1, 2>(vdir), mul_zxxy); + __m128 mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz); + __m128 vdir0 = _mm_and_ps(vdir, _mm_castsi128_ps(_mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0))); + + __m128 htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0); + __m128 htfm1 = shuffle<1, 0, 1, 3>(_mm_set_ss(_mm_cvtss_f32(d_ss)), vdir0); + __m128 htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); + + __m128 htfm[] = { htfm0, htfm1, htfm2 }; + __m128 vP = load_m128(P); + __m128 p0 = transform_point_T3(htfm, _mm_sub_ps(P_curve[0], vP)); + __m128 p1 = transform_point_T3(htfm, _mm_sub_ps(P_curve[1], vP)); + __m128 p2 = transform_point_T3(htfm, _mm_sub_ps(P_curve[2], vP)); + __m128 p3 = transform_point_T3(htfm, _mm_sub_ps(P_curve[3], vP)); + + float fc = 0.71f; + __m128 vfc = _mm_set1_ps(fc); + __m128 vfcxp3 = _mm_mul_ps(vfc, p3); + + vcurve_coef[0] = p1; + vcurve_coef[1] = _mm_mul_ps(vfc, _mm_sub_ps(p2, p0)); + vcurve_coef[2] = fma(_mm_set1_ps(fc * 2.0f), p0, fma(_mm_set1_ps(fc - 3.0f), p1, fms(_mm_set1_ps(3.0f - 2.0f * fc), p2, vfcxp3))); + vcurve_coef[3] = fms(_mm_set1_ps(fc - 2.0f), _mm_sub_ps(p2, p1), fms(vfc, p0, vfcxp3)); + + r_st = ((float4 &)P_curve[1]).w; + r_en = ((float4 &)P_curve[2]).w; + } +#else + float3 curve_coef[4]; + + /* curve Intersection check */ + float3 dir = 1.0f/idir; + + /* obtain curve parameters */ + { + /* ray transform created - this should be created at beginning of intersection loop */ + Transform htfm; + float d = sqrtf(dir.x * dir.x + dir.z * dir.z); + htfm = make_transform( + dir.z / d, 0, -dir.x /d, 0, + -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0, + dir.x, dir.y, dir.z, 0, + 0, 0, 0, 1); + + float4 v00 = kernel_tex_fetch(__curves, prim); + + int k0 = __float_as_int(v00.x) + segment; + int k1 = k0 + 1; + + int ka = max(k0 - 1,__float_as_int(v00.x)); + int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); + + float4 P_curve[4]; + + P_curve[0] = kernel_tex_fetch(__curve_keys, ka); + P_curve[1] = kernel_tex_fetch(__curve_keys, k0); + P_curve[2] = kernel_tex_fetch(__curve_keys, k1); + P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + + float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P); + float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P); + float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P); + float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P); + + float fc = 0.71f; + curve_coef[0] = p1; + curve_coef[1] = -fc*p0 + fc*p2; + curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3; + curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3; + r_st = P_curve[1].w; + r_en = P_curve[2].w; + } +#endif + + float r_curr = max(r_st, r_en); + + if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING)) + epsilon = 2 * r_curr; + + /* find bounds - this is slow for cubic curves */ + float upper, lower; + + float zextrem[4]; + curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z); + if(lower - r_curr > isect->t || upper + r_curr < epsilon) + return false; + + /* minimum width extension */ + float mw_extension = min(difl * fabsf(upper), extmax); + float r_ext = mw_extension + r_curr; + + float xextrem[4]; + curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); + if(lower > r_ext || upper < -r_ext) + return false; + + float yextrem[4]; + curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); + if(lower > r_ext || upper < -r_ext) + return false; + + /* setup recurrent loop */ + int level = 1 << depth; + int tree = 0; + float resol = 1.0f / (float)level; + bool hit = false; + + /* begin loop */ + while(!(tree >> (depth))) { + float i_st = tree * resol; + float i_en = i_st + (level * resol); +#ifdef __KERNEL_SSE2__ + __m128 vi_st = _mm_set1_ps(i_st), vi_en = _mm_set1_ps(i_en); + __m128 vp_st = fma(fma(fma(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]); + __m128 vp_en = fma(fma(fma(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]); + + __m128 vbmin = _mm_min_ps(vp_st, vp_en); + __m128 vbmax = _mm_max_ps(vp_st, vp_en); + + float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax; + float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z; + float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z; + float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en; +#else + float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0]; + float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0]; + + float bminx = min(p_st.x, p_en.x); + float bmaxx = max(p_st.x, p_en.x); + float bminy = min(p_st.y, p_en.y); + float bmaxy = max(p_st.y, p_en.y); + float bminz = min(p_st.z, p_en.z); + float bmaxz = max(p_st.z, p_en.z); +#endif + + if(xextrem[0] >= i_st && xextrem[0] <= i_en) { + bminx = min(bminx,xextrem[1]); + bmaxx = max(bmaxx,xextrem[1]); + } + if(xextrem[2] >= i_st && xextrem[2] <= i_en) { + bminx = min(bminx,xextrem[3]); + bmaxx = max(bmaxx,xextrem[3]); + } + if(yextrem[0] >= i_st && yextrem[0] <= i_en) { + bminy = min(bminy,yextrem[1]); + bmaxy = max(bmaxy,yextrem[1]); + } + if(yextrem[2] >= i_st && yextrem[2] <= i_en) { + bminy = min(bminy,yextrem[3]); + bmaxy = max(bmaxy,yextrem[3]); + } + if(zextrem[0] >= i_st && zextrem[0] <= i_en) { + bminz = min(bminz,zextrem[1]); + bmaxz = max(bmaxz,zextrem[1]); + } + if(zextrem[2] >= i_st && zextrem[2] <= i_en) { + bminz = min(bminz,zextrem[3]); + bmaxz = max(bmaxz,zextrem[3]); + } + + float r1 = r_st + (r_en - r_st) * i_st; + float r2 = r_st + (r_en - r_st) * i_en; + r_curr = max(r1, r2); + + mw_extension = min(difl * fabsf(bmaxz), extmax); + float r_ext = mw_extension + r_curr; + float coverage = 1.0f; + + if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { + /* the bounding box does not overlap the square centered at O */ + tree += level; + level = tree & -tree; + } + else if (level == 1) { + + /* the maximum recursion depth is reached. + * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. + * dP* is reversed if necessary.*/ + float t = isect->t; + float u = 0.0f; + if(flags & CURVE_KN_RIBBONS) { + float3 tg = (p_en - p_st); + float w = tg.x * tg.x + tg.y * tg.y; + if (w == 0) { + tree++; + level = tree & -tree; + continue; + } + w = -(p_st.x * tg.x + p_st.y * tg.y) / w; + w = clamp((float)w, 0.0f, 1.0f); + + /* compute u on the curve segment */ + u = i_st * (1 - w) + i_en * w; + r_curr = r_st + (r_en - r_st) * u; + /* compare x-y distances */ + float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0]; + + float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; + if (dot(tg, dp_st)< 0) + dp_st *= -1; + if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { + tree++; + level = tree & -tree; + continue; + } + float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; + if (dot(tg, dp_en) < 0) + dp_en *= -1; + if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { + tree++; + level = tree & -tree; + continue; + } + + /* compute coverage */ + float r_ext = r_curr; + coverage = 1.0f; + if(difl != 0.0f) { + mw_extension = min(difl * fabsf(bmaxz), extmax); + r_ext = mw_extension + r_curr; + float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); + float d0 = d - r_curr; + float d1 = d + r_curr; + if (d0 >= 0) + coverage = (min(d1 / mw_extension, 1.0f) - min(d0 / mw_extension, 1.0f)) * 0.5f; + else // inside + coverage = (min(d1 / mw_extension, 1.0f) + min(-d0 / mw_extension, 1.0f)) * 0.5f; + } + + if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { + tree++; + level = tree & -tree; + continue; + } + + t = p_curr.z; + } + else { + float l = len(p_en - p_st); + /* minimum width extension */ + float or1 = r1; + float or2 = r2; + if(difl != 0.0f) { + mw_extension = min(len(p_st - P) * difl, extmax); + or1 = r1 < mw_extension ? mw_extension : r1; + mw_extension = min(len(p_en - P) * difl, extmax); + or2 = r2 < mw_extension ? mw_extension : r2; + } + /* --- */ + float3 tg = (p_en - p_st) / l; + float gd = (or2 - or1) / l; + float difz = -dot(p_st,tg); + float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); + float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1))); + float tcentre = -halfb/cyla; + float zcentre = difz + (tg.z * tcentre); + float3 tdif = - p_st; + tdif.z += tcentre; + float tdifz = dot(tdif,tg); + float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); + float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; + float td = tb*tb - 4*cyla*tc; + if (td < 0.0f) { + tree++; + level = tree & -tree; + continue; + } + + float rootd = sqrtf(td); + float correction = ((-tb - rootd)/(2*cyla)); + t = tcentre + correction; + + float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; + if (dot(tg, dp_st)< 0) + dp_st *= -1; + float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; + if (dot(tg, dp_en) < 0) + dp_en *= -1; + + if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { + correction = ((-tb + rootd)/(2*cyla)); + t = tcentre + correction; + } + + if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { + tree++; + level = tree & -tree; + continue; + } + + float w = (zcentre + (tg.z * correction))/l; + w = clamp((float)w, 0.0f, 1.0f); + /* compute u on the curve segment */ + u = i_st * (1 - w) + i_en * w; + r_curr = r1 + (r2 - r1) * w; + r_ext = or1 + (or2 - or1) * w; + coverage = r_curr/r_ext; + + } + /* we found a new intersection */ + + /* stochastic fade from minimum width */ + if(lcg_state && coverage != 1.0f) { + if(lcg_step_float(lcg_state) > coverage) + return hit; + } + +#ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) +#endif + { + /* record intersection */ + isect->prim = curveAddr; + isect->object = object; + isect->segment = segment; + isect->u = u; + isect->v = 0.0f; + /*isect->v = 1.0f - coverage; */ + isect->t = t; + hit = true; + } + + tree++; + level = tree & -tree; + } + else { + /* split the curve into two curves and process */ + level = level >> 1; + } + } + + return hit; +} + +ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, + float3 P, float3 idir, uint visibility, int object, int curveAddr, float time, int segment, uint *lcg_state, float difl, float extmax) +{ + /* define few macros to minimize code duplication for SSE */ +#ifndef __KERNEL_SSE2__ +#define len3_squared(x) len_squared(x) +#define len3(x) len(x) +#define dot3(x, y) dot(x, y) +#endif + + /* curve Intersection check */ + int flags = kernel_data.curve.curveflags; + + int prim = kernel_tex_fetch(__prim_index, curveAddr); + float4 v00 = kernel_tex_fetch(__curves, prim); + + int cnum = __float_as_int(v00.x); + int k0 = cnum + segment; + int k1 = k0 + 1; + +#ifndef __KERNEL_SSE2__ + float4 P_curve[2]; + + P_curve[0]= kernel_tex_fetch(__curve_keys, k0); + P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + + float or1 = P_curve[0].w; + float or2 = P_curve[1].w; + float3 p1 = float4_to_float3(P_curve[0]); + float3 p2 = float4_to_float3(P_curve[1]); + + /* minimum width extension */ + float r1 = or1; + float r2 = or2; + float3 dif = P - p1; + float3 dif_second = P - p2; + if(difl != 0.0f) { + float pixelsize = min(len3(dif) * difl, extmax); + r1 = or1 < pixelsize ? pixelsize : or1; + pixelsize = min(len3(dif_second) * difl, extmax); + r2 = or2 < pixelsize ? pixelsize : or2; + } + /* --- */ + + float3 dir = 1.0f / idir; + float3 p21_diff = p2 - p1; + float3 sphere_dif1 = (dif + dif_second) * 0.5f; + float sphere_b_tmp = dot3(dir, sphere_dif1); + float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; +#else + const __m128 p1 = _mm_load_ps(&kg->__curve_keys.data[k0].x); + const __m128 p2 = _mm_load_ps(&kg->__curve_keys.data[k1].x); + const __m128 or12 = shuffle<3, 3, 3, 3>(p1, p2); + + __m128 r12 = or12; + const __m128 vP = load_m128(P); + const __m128 dif = _mm_sub_ps(vP, p1); + const __m128 dif_second = _mm_sub_ps(vP, p2); + if(difl != 0.0f) { + const __m128 len1_sq = len3_squared_splat(dif); + const __m128 len2_sq = len3_squared_splat(dif_second); + const __m128 len12 = _mm_sqrt_ps(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); + const __m128 pixelsize12 = _mm_min_ps(_mm_mul_ps(len12, _mm_set1_ps(difl)), _mm_set1_ps(extmax)); + r12 = _mm_max_ps(or12, pixelsize12); + } + float or1 = _mm_cvtss_f32(or12), or2 = _mm_cvtss_f32(broadcast<2>(or12)); + float r1 = _mm_cvtss_f32(r12), r2 = _mm_cvtss_f32(broadcast<2>(r12)); + + const __m128 dir = _mm_div_ps(_mm_set1_ps(1.0f), load_m128(idir)); + const __m128 p21_diff = _mm_sub_ps(p2, p1); + const __m128 sphere_dif1 = _mm_mul_ps(_mm_add_ps(dif, dif_second), _mm_set1_ps(0.5f)); + const __m128 sphere_b_tmp = dot3_splat(dir, sphere_dif1); + const __m128 sphere_dif2 = fnma(sphere_b_tmp, dir, sphere_dif1); +#endif + + float mr = max(r1, r2); + float l = len3(p21_diff); + float invl = 1.0f / l; + float sp_r = mr + 0.5f * l; + + float sphere_b = dot3(dir, sphere_dif2); + float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r; + + if(sdisc < 0.0f) + return false; + + /* obtain parameters and test midpoint distance for suitable modes */ +#ifndef __KERNEL_SSE2__ + float3 tg = p21_diff * invl; +#else + const __m128 tg = _mm_mul_ps(p21_diff, _mm_set1_ps(invl)); +#endif + float gd = (r2 - r1) * invl; + + float dirz = dot3(dir, tg); + float difz = dot3(dif, tg); + + float a = 1.0f - (dirz*dirz*(1 + gd*gd)); + + float halfb = dot3(dir, dif) - dirz*(difz + gd*(difz*gd + r1)); + + float tcentre = -halfb/a; + float zcentre = difz + (dirz * tcentre); + + if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE)) + return false; + if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION)) + return false; + + /* test minimum separation */ +#ifndef __KERNEL_SSE2__ + float3 cprod = cross(tg, dir); + float cprod2sq = len3_squared(cross(tg, dif)); +#else + const __m128 cprod = cross(tg, dir); + float cprod2sq = len3_squared(cross_zxy(tg, dif)); +#endif + float cprodsq = len3_squared(cprod); + float distscaled = dot3(cprod, dif); + + if(cprodsq == 0) + distscaled = cprod2sq; + else + distscaled = (distscaled*distscaled)/cprodsq; + + if(distscaled > mr*mr) + return false; + + /* calculate true intersection */ +#ifndef __KERNEL_SSE2__ + float3 tdif = dif + tcentre * dir; +#else + const __m128 tdif = fma(_mm_set1_ps(tcentre), dir, dif); +#endif + float tdifz = dot3(tdif, tg); + float tdifma = tdifz*gd + r1; + float tb = 2*(dot3(dir, tdif) - dirz*(tdifz + gd*tdifma)); + float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma; + float td = tb*tb - 4*a*tc; + + if (td < 0.0f) + return false; + + float rootd = 0.0f; + float correction = 0.0f; + if(flags & CURVE_KN_ACCURATE) { + rootd = sqrtf(td); + correction = ((-tb - rootd)/(2*a)); + } + + float t = tcentre + correction; + + if(t < isect->t) { + + if(flags & CURVE_KN_INTERSECTCORRECTION) { + rootd = sqrtf(td); + correction = ((-tb - rootd)/(2*a)); + t = tcentre + correction; + } + + float z = zcentre + (dirz * correction); + bool backface = false; + + if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { + backface = true; + correction = ((-tb + rootd)/(2*a)); + t = tcentre + correction; + z = zcentre + (dirz * correction); + } + + /* stochastic fade from minimum width */ + float adjradius = or1 + z * (or2 - or1) * invl; + adjradius = adjradius / (r1 + z * gd); + if(lcg_state && adjradius != 1.0f) { + if(lcg_step_float(lcg_state) > adjradius) + return false; + } + /* --- */ + + if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { + + if (flags & CURVE_KN_ENCLOSEFILTER) { + float enc_ratio = 1.01f; + if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { + float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); + float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio; + if(a2*c2 < 0.0f) + return false; + } + } + +#ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) +#endif + { + /* record intersection */ + isect->prim = curveAddr; + isect->object = object; + isect->segment = segment; + isect->u = z*invl; + isect->v = td/(4*a*a); + /*isect->v = 1.0f - adjradius;*/ + isect->t = t; + + if(backface) + isect->u = -isect->u; + + return true; + } + } + } + + return false; + +#ifndef __KERNEL_SSE2__ +#undef len3_squared +#undef len3 +#undef dot3 +#endif +} +#endif + +#ifdef __HAIR__ + +ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3) +{ + float fc = 0.71f; + float data[4]; + float t2 = t * t; + data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc; + data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t; + data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc; + data[3] = 3.0f * fc * t2 - 2.0f * fc * t; + return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; +} + +ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3) +{ + float data[4]; + float fc = 0.71f; + float t2 = t * t; + float t3 = t2 * t; + data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t; + data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f; + data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t; + data[3] = fc * t3 - fc * t2; + return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; +} + +ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +{ + int flag = kernel_data.curve.curveflags; + float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + + if(isect->object != ~0) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D*t); + D = normalize_len(D, &t); + } + + int prim = kernel_tex_fetch(__prim_index, isect->prim); + float4 v00 = kernel_tex_fetch(__curves, prim); + + int k0 = __float_as_int(v00.x) + sd->segment; + int k1 = k0 + 1; + + float3 tg; + + if(flag & CURVE_KN_INTERPOLATE) { + int ka = max(k0 - 1,__float_as_int(v00.x)); + int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); + + float4 P_curve[4]; + + P_curve[0] = kernel_tex_fetch(__curve_keys, ka); + P_curve[1] = kernel_tex_fetch(__curve_keys, k0); + P_curve[2] = kernel_tex_fetch(__curve_keys, k1); + P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + + float l = 1.0f; + tg = normalize_len(float4_to_float3(P_curve[2] - P_curve[1]), &l); + float r1 = P_curve[1].w; + float r2 = P_curve[2].w; + float gd = ((r2 - r1)/l); + + P = P + D*t; + + float3 p[4]; + p[0] = float4_to_float3(P_curve[0]); + p[1] = float4_to_float3(P_curve[1]); + p[2] = float4_to_float3(P_curve[2]); + p[3] = float4_to_float3(P_curve[3]); + +#ifdef __UV__ + sd->u = isect->u; + sd->v = 0.0f; +#endif + + tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); + + if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) + sd->Ng = normalize(-(D - tg * (dot(tg, D)))); + else { + float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); + sd->Ng = normalize(P - p_curr); + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); + } + sd->N = sd->Ng; + } + else { + float4 P_curve[2]; + + P_curve[0]= kernel_tex_fetch(__curve_keys, k0); + P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + + float l = 1.0f; + tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l); + float r1 = P_curve[0].w; + float r2 = P_curve[1].w; + float gd = ((r2 - r1)/l); + + P = P + D*t; + + float3 dif = P - float4_to_float3(P_curve[0]); + +#ifdef __UV__ + sd->u = dot(dif,tg)/l; + sd->v = 0.0f; +#endif + + if (flag & CURVE_KN_TRUETANGENTGNORMAL) { + sd->Ng = -(D - tg * dot(tg, D)); + sd->Ng = normalize(sd->Ng); + } + else { + sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd); + if (gd != 0.0f) { + sd->Ng = sd->Ng - gd * tg ; + sd->Ng = normalize(sd->Ng); + } + } + + sd->N = sd->Ng; + } + +#ifdef __DPDU__ + /* dPdu/dPdv */ + sd->dPdu = tg; + sd->dPdv = cross(tg, sd->Ng); +#endif + + /*add fading parameter for minimum pixel width with transparency bsdf*/ + /*sd->curve_transparency = isect->v;*/ + /*sd->curve_radius = sd->u * gd * l + r1;*/ + + if(isect->object != ~0) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +} +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index a66277e10cd..3646615b9f6 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -1,6 +1,4 @@ /* - * Copyright 2011-2013 Blender Foundation - * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -11,7 +9,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN @@ -296,5 +294,77 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle) return make_float3(f3.z, f3.w, f4.x); } +/* BVH */ + +ccl_device_inline float3 bvh_inverse_direction(float3 dir) +{ + /* avoid divide by zero (ooeps = exp2f(-80.0f)) */ + float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f; + float3 idir; + + idir.x = 1.0f/((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x)); + idir.y = 1.0f/((fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y)); + idir.z = 1.0f/((fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z)); + + return idir; +} + +ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) +{ + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + + *P = transform_point(&tfm, ray->P); + + float3 dir = transform_direction(&tfm, ray->D); + + float len; + dir = normalize_len(dir, &len); + + *idir = bvh_inverse_direction(dir); + + if(*t != FLT_MAX) + *t *= len; +} + +ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) +{ + if(*t != FLT_MAX) { + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + *t *= len(transform_direction(&tfm, 1.0f/(*idir))); + } + + *P = ray->P; + *idir = bvh_inverse_direction(ray->D); +} + +#ifdef __OBJECT_MOTION__ +ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) +{ + Transform itfm; + *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); + + *P = transform_point(&itfm, ray->P); + + float3 dir = transform_direction(&itfm, ray->D); + + float len; + dir = normalize_len(dir, &len); + + *idir = bvh_inverse_direction(dir); + + if(*t != FLT_MAX) + *t *= len; +} + +ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) +{ + if(*t != FLT_MAX) + *t *= len(transform_direction(tfm, 1.0f/(*idir))); + + *P = ray->P; + *idir = bvh_inverse_direction(ray->D); +} +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 0455df85961..6eaa077584b 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -1,5 +1,6 @@ /* - * Copyright 2011-2013 Blender Foundation + * Adapted from code Copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -11,16 +12,109 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ CCL_NAMESPACE_BEGIN -/* Point on triangle for Moller-Trumbore triangles */ -ccl_device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, float u, float v) +/* Refine triangle intersection to more precise hit point. For rays that travel + * far the precision is often not so good, this reintersects the primitive from + * a closer distance. */ + +ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != ~0) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D*t); + D = normalize_len(D, &t); + } + + P = P + D*t; + + float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); + float rt = Oz * invDz; + + P = P + D*rt; + + if(isect->object != ~0) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} + +/* same as above, except that isect->t is assumed to be in object space for instancing */ +ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != ~0) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } + + P = P + D*t; + + float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); + float rt = Oz * invDz; + + P = P + D*rt; + + if(isect->object != ~0) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} + +/* point and normal on triangle */ +ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) { /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index)); + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); @@ -28,44 +122,28 @@ ccl_device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, flo /* compute point */ float t = 1.0f - u - v; - return (u*v0 + v*v1 + t*v2); -} + *P = (u*v0 + v*v1 + t*v2); -/* Normal for Moller-Trumbore triangles */ -ccl_device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int *shader) -{ -#if 0 - /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index)); - - float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); - - /* compute normal */ - return normalize(cross(v2 - v0, v1 - v0)); -#else - float4 Nm = kernel_tex_fetch(__tri_normal, tri_index); + float4 Nm = kernel_tex_fetch(__tri_normal, prim); + *Ng = make_float3(Nm.x, Nm.y, Nm.z); *shader = __float_as_int(Nm.w); - return make_float3(Nm.x, Nm.y, Nm.z); -#endif } /* Return 3 triangle vertex locations */ -ccl_device_inline void triangle_vertices(KernelGlobals *kg, int tri_index, float3 P[3]) +ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3]) { /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index)); + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); } -ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index, float u, float v) +ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, float u, float v) { /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index)); + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x))); float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y))); @@ -74,10 +152,10 @@ ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index return normalize((1.0f - u - v)*n2 + u*n0 + v*n1); } -ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, float3 *dPdu, float3 *dPdv, int tri) +ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, float3 *dPdu, float3 *dPdv) { /* fetch triangle vertex coordinates */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri)); + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); @@ -176,5 +254,113 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData } } +/* Sven Woop's algorithm */ +ccl_device_inline bool triangle_intersect(KernelGlobals *kg, Intersection *isect, + float3 P, float3 idir, uint visibility, int object, int triAddr) +{ + /* compute and check intersection t-value */ + float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); + float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); + float3 dir = 1.0f/idir; + + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); + float t = Oz * invDz; + + if(t > 0.0f && t < isect->t) { + /* compute and check barycentric u */ + float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; + float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; + float u = Ox + t*Dx; + + if(u >= 0.0f) { + /* compute and check barycentric v */ + float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; + float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; + float v = Oy + t*Dy; + + if(v >= 0.0f && u + v <= 1.0f) { +#ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) +#endif + { + /* record intersection */ + isect->prim = triAddr; + isect->object = object; + isect->u = u; + isect->v = v; + isect->t = t; + return true; + } + } + } + } + + return false; +} + +#ifdef __SUBSURFACE__ +/* Special ray intersection routines for subsurface scattering. In that case we + * only want to intersect with primitives in the same object, and if case of + * multiple hits we pick a single random primitive as the intersection point. */ + +ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, + float3 P, float3 idir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) +{ + /* compute and check intersection t-value */ + float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); + float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); + float3 dir = 1.0f/idir; + + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); + float t = Oz * invDz; + + if(t > 0.0f && t < tmax) { + /* compute and check barycentric u */ + float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; + float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; + float u = Ox + t*Dx; + + if(u >= 0.0f) { + /* compute and check barycentric v */ + float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; + float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; + float v = Oy + t*Dy; + + if(v >= 0.0f && u + v <= 1.0f) { + (*num_hits)++; + + int hit; + + if(*num_hits <= max_hits) { + hit = *num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit = lcg_step_uint(lcg_state) % *num_hits; + + if(hit >= max_hits) + return; + } + + /* record intersection */ + Intersection *isect = &isect_array[hit]; + isect->prim = triAddr; + isect->object = object; + isect->u = u; + isect->v = v; + isect->t = t; + } + } + } +} +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 79ef86a8662..8c3f558c3ea 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -458,8 +458,7 @@ ccl_device void triangle_light_sample(KernelGlobals *kg, int prim, int object, v = randv*randu; /* triangle, so get position, normal, shader */ - ls->P = triangle_point_MT(kg, prim, u, v); - ls->Ng = triangle_normal_MT(kg, prim, &ls->shader); + triangle_point_normal(kg, prim, u, v, &ls->P, &ls->Ng, &ls->shader); ls->object = object; ls->prim = prim; ls->lamp = ~0; @@ -485,52 +484,6 @@ ccl_device float triangle_light_pdf(KernelGlobals *kg, return t*t*pdf/cos_pi; } -/* Curve Light */ - -#ifdef __HAIR__ - -ccl_device void curve_segment_light_sample(KernelGlobals *kg, int prim, int object, - int segment, float randu, float randv, float time, LightSample *ls) -{ - /* this strand code needs completion */ - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; - int k1 = k0 + 1; - - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - - float l = len(float4_to_float3(P2) - float4_to_float3(P1)); - - float r1 = P1.w; - float r2 = P2.w; - float3 tg = (float4_to_float3(P2) - float4_to_float3(P1)) / l; - float3 xc = make_float3(tg.x * tg.z, tg.y * tg.z, -(tg.x * tg.x + tg.y * tg.y)); - if (is_zero(xc)) - xc = make_float3(tg.x * tg.y, -(tg.x * tg.x + tg.z * tg.z), tg.z * tg.y); - xc = normalize(xc); - float3 yc = cross(tg, xc); - float gd = ((r2 - r1)/l); - - /* normal currently ignores gradient */ - ls->Ng = sinf(M_2PI_F * randv) * xc + cosf(M_2PI_F * randv) * yc; - ls->P = randu * l * tg + (gd * l + r1) * ls->Ng; - ls->object = object; - ls->prim = prim; - ls->lamp = ~0; - ls->t = 0.0f; - ls->u = randu; - ls->v = randv; - ls->type = LIGHT_STRAND; - ls->eval_fac = 1.0f; - ls->shader = __float_as_int(v00.z) | SHADER_USE_MIS; - - object_transform_light_sample(kg, ls, object, time); -} - -#endif - /* Light Distribution */ ccl_device int light_distribution_sample(KernelGlobals *kg, float randt) @@ -573,19 +526,14 @@ ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float if(prim >= 0) { int object = __float_as_int(l.w); -#ifdef __HAIR__ - int segment = __float_as_int(l.z) & SHADER_MASK; + int shader_flag = __float_as_int(l.z); - if(segment != SHADER_MASK) - curve_segment_light_sample(kg, prim, object, segment, randu, randv, time, ls); - else -#endif - triangle_light_sample(kg, prim, object, randu, randv, time, ls); + triangle_light_sample(kg, prim, object, randu, randv, time, ls); /* compute incoming direction, distance and pdf */ ls->D = normalize_len(ls->P - P, &ls->t); ls->pdf = triangle_light_pdf(kg, ls->Ng, -ls->D, ls->t); - ls->shader |= __float_as_int(l.z) & (~SHADER_MASK); + ls->shader |= shader_flag; } else { int lamp = -prim-1; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b113e906e9d..0f327f16419 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -96,7 +96,7 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, #endif /* vectors */ - sd->P = bvh_triangle_refine(kg, sd, isect, ray); + sd->P = triangle_refine(kg, sd, isect, ray); sd->Ng = Ng; sd->N = Ng; @@ -106,7 +106,7 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, #ifdef __DPDU__ /* dPdu/dPdv */ - triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); #endif #ifdef __HAIR__ @@ -177,7 +177,7 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat #endif /* vectors */ - sd->P = bvh_triangle_refine_subsurface(kg, sd, isect, ray); + sd->P = triangle_refine_subsurface(kg, sd, isect, ray); sd->Ng = Ng; sd->N = Ng; @@ -187,7 +187,7 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat #ifdef __DPDU__ /* dPdu/dPdv */ - triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); #endif sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); @@ -312,7 +312,7 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, } #endif else { - triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); #ifdef __INSTANCING__ if(instanced) { @@ -355,8 +355,7 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f); int shader; - P = triangle_point_MT(kg, prim, u, v); - Ng = triangle_normal_MT(kg, prim, &shader); + triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader); /* force smooth shading for displacement */ shader |= SHADER_SMOOTH_NORMAL; diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index bab4218aae9..29160e64082 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -177,15 +177,6 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen if(shader->use_mis && shader->has_surface_emission) num_triangles++; } - - /* disabled for curves */ -#if 0 - foreach(Mesh::Curve& curve, mesh->curves) { - Shader *shader = scene->shaders[curve.shader]; - - if(shader->use_mis && shader->has_surface_emission) - num_curve_segments += curve.num_segments(); -#endif } } @@ -225,21 +216,21 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen bool transform_applied = mesh->transform_applied; Transform tfm = object->tfm; int object_id = j; - int shader_id = SHADER_MASK; + int shader_flag = 0; if(transform_applied) object_id = ~object_id; if(!(object->visibility & PATH_RAY_DIFFUSE)) { - shader_id |= SHADER_EXCLUDE_DIFFUSE; + shader_flag |= SHADER_EXCLUDE_DIFFUSE; use_light_visibility = true; } if(!(object->visibility & PATH_RAY_GLOSSY)) { - shader_id |= SHADER_EXCLUDE_GLOSSY; + shader_flag |= SHADER_EXCLUDE_GLOSSY; use_light_visibility = true; } if(!(object->visibility & PATH_RAY_TRANSMIT)) { - shader_id |= SHADER_EXCLUDE_TRANSMIT; + shader_flag |= SHADER_EXCLUDE_TRANSMIT; use_light_visibility = true; } @@ -249,7 +240,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen if(shader->use_mis && shader->has_surface_emission) { distribution[offset].x = totarea; distribution[offset].y = __int_as_float(i + mesh->tri_offset); - distribution[offset].z = __int_as_float(shader_id); + distribution[offset].z = __int_as_float(shader_flag); distribution[offset].w = __int_as_float(object_id); offset++; @@ -267,40 +258,6 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen totarea += triangle_area(p1, p2, p3); } } - - /* sample as light disabled for strands */ -#if 0 - size_t i = 0; - - foreach(Mesh::Curve& curve, mesh->curves) { - Shader *shader = scene->shaders[curve.shader]; - int first_key = curve.first_key; - - if(shader->use_mis && shader->has_surface_emission) { - for(int j = 0; j < curve.num_segments(); j++) { - distribution[offset].x = totarea; - distribution[offset].y = __int_as_float(i + mesh->curve_offset); // XXX fix kernel code - distribution[offset].z = __int_as_float(j) & SHADER_MASK; - distribution[offset].w = __int_as_float(object_id); - offset++; - - float3 p1 = mesh->curve_keys[first_key + j].loc; - float r1 = mesh->curve_keys[first_key + j].radius; - float3 p2 = mesh->curve_keys[first_key + j + 1].loc; - float r2 = mesh->curve_keys[first_key + j + 1].radius; - - if(!transform_applied) { - p1 = transform_point(&tfm, p1); - p2 = transform_point(&tfm, p2); - } - - totarea += M_PI_F * (r1 + r2) * len(p1 - p2); - } - } - - i++; - } -#endif } if(progress.get_cancel()) return; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index b40f3616242..0a89642fc4a 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -203,20 +203,6 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene surface_area += triangle_area(p1, p2, p3); } - foreach(Mesh::Curve& curve, mesh->curves) { - int first_key = curve.first_key; - - for(int i = 0; i < curve.num_segments(); i++) { - float3 p1 = mesh->curve_keys[first_key + i].co; - float r1 = mesh->curve_keys[first_key + i].radius; - float3 p2 = mesh->curve_keys[first_key + i + 1].co; - float r2 = mesh->curve_keys[first_key + i + 1].radius; - - /* currently ignores segment overlaps*/ - surface_area += M_PI_F *(r1 + r2) * len(p1 - p2); - } - } - surface_area_map[mesh] = surface_area; } else @@ -232,23 +218,6 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene surface_area += triangle_area(p1, p2, p3); } - - foreach(Mesh::Curve& curve, mesh->curves) { - int first_key = curve.first_key; - - for(int i = 0; i < curve.num_segments(); i++) { - float3 p1 = mesh->curve_keys[first_key + i].co; - float r1 = mesh->curve_keys[first_key + i].radius; - float3 p2 = mesh->curve_keys[first_key + i + 1].co; - float r2 = mesh->curve_keys[first_key + i + 1].radius; - - p1 = transform_point(&tfm, p1); - p2 = transform_point(&tfm, p2); - - /* currently ignores segment overlaps*/ - surface_area += M_PI_F *(r1 + r2) * len(p1 - p2); - } - } } /* pack in texture */