diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 3f14c0d15c4..8b1759ef6a4 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -275,68 +275,13 @@ void BVH::pack_triangle(int idx, float4 woop[3]) float3 v1 = vpos[vidx[1]]; float3 v2 = vpos[vidx[2]]; - float3 r0 = v0 - v2; - float3 r1 = v1 - v2; - float3 r2 = cross(r0, r1); - - if(is_zero(r0) || is_zero(r1) || is_zero(r2)) { - /* degenerate */ - woop[0] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - woop[1] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - woop[2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - else { - Transform t = make_transform( - r0.x, r1.x, r2.x, v2.x, - r0.y, r1.y, r2.y, v2.y, - r0.z, r1.z, r2.z, v2.z, - 0.0f, 0.0f, 0.0f, 1.0f); - - t = transform_inverse(t); - - woop[0] = make_float4(t.z.x, t.z.y, t.z.z, -t.z.w); - woop[1] = make_float4(t.x.x, t.x.y, t.x.z, t.x.w); - woop[2] = make_float4(t.y.x, t.y.y, t.y.z, t.y.w); - } + woop[0] = float3_to_float4(v0); + woop[1] = float3_to_float4(v1); + woop[2] = float3_to_float4(v2); } /* Curves*/ -void BVH::pack_curve_segment(int idx, float4 woop[3]) -{ - int tob = pack.prim_object[idx]; - const Mesh *mesh = objects[tob]->mesh; - int tidx = pack.prim_index[idx]; - int segment = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[idx]); - int k0 = mesh->curves[tidx].first_key + segment; - int k1 = mesh->curves[tidx].first_key + segment + 1; - float3 v0 = float4_to_float3(mesh->curve_keys[k0]); - float3 v1 = float4_to_float3(mesh->curve_keys[k1]); - - float3 d0 = v1 - v0; - float l = len(d0); - - /*Plan - *Transform tfm = make_transform( - * location <3> , l, - * extra curve data <3> , StrID, - * nextkey, flags/tip?, 0, 0); - */ - float3 tg0 = make_float3(1.0f, 0.0f, 0.0f); - float3 tg1 = make_float3(1.0f, 0.0f, 0.0f); - - Transform tfm = make_transform( - tg0.x, tg0.y, tg0.z, l, - tg1.x, tg1.y, tg1.z, 0, - 0, 0, 0, 0, - 0, 0, 0, 1); - - woop[0] = tfm.x; - woop[1] = tfm.y; - woop[2] = tfm.z; - -} - void BVH::pack_primitives() { int nsize = TRI_NODE_SIZE; @@ -351,9 +296,7 @@ void BVH::pack_primitives() if(pack.prim_index[i] != -1) { float4 woop[3]; - if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE) - pack_curve_segment(i, woop); - else + if(pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) pack_triangle(i, woop); memcpy(&pack.tri_woop[i * nsize], woop, sizeof(float4)*3); diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h index ef4575ad7ee..a4a12707768 100644 --- a/intern/cycles/bvh/bvh.h +++ b/intern/cycles/bvh/bvh.h @@ -106,7 +106,6 @@ protected: /* triangles and strands*/ void pack_primitives(); void pack_triangle(int idx, float4 woop[3]); - void pack_curve_segment(int idx, float4 woop[3]); /* merge instance BVH's */ void pack_instances(size_t nodes_size); diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h index 9704b0ac1b8..db7c564c4a3 100644 --- a/intern/cycles/kernel/geom/geom_bvh_shadow.h +++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h @@ -81,6 +81,9 @@ ccl_device bool BVH_FUNCTION_NAME gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + /* traversal loop */ do { do { @@ -214,7 +217,7 @@ ccl_device bool BVH_FUNCTION_NAME switch(type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr); + hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr); break; } #if FEATURE(BVH_MOTION) @@ -295,6 +298,7 @@ ccl_device bool BVH_FUNCTION_NAME bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); #endif + triangle_intersect_precalc(dir, &isect_precalc); num_hits_in_instance = 0; #if defined(__KERNEL_SSE2__) @@ -330,6 +334,8 @@ ccl_device bool BVH_FUNCTION_NAME bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); #endif + triangle_intersect_precalc(dir, &isect_precalc); + /* scale isect->t to adjust for instancing */ for(int i = 0; i < num_hits_in_instance; i++) (isect_array-i-1)->t *= t_fac; @@ -342,6 +348,7 @@ ccl_device bool BVH_FUNCTION_NAME #else bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); #endif + triangle_intersect_precalc(dir, &isect_precalc); } #if defined(__KERNEL_SSE2__) diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h index e5cb60f537a..756130be3da 100644 --- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h @@ -77,6 +77,9 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + /* traversal loop */ do { do @@ -202,7 +205,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio switch(type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { - triangle_intersect_subsurface(kg, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); + triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); break; } #if FEATURE(BVH_MOTION) @@ -228,6 +231,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio #else bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); #endif + triangle_intersect_precalc(dir, &isect_precalc); #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); @@ -265,6 +269,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t); #endif + triangle_intersect_precalc(dir, &isect_precalc); + #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); Psplat[1] = ssef(P.y); diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h index 1d053ba5d08..7be22c0f3bf 100644 --- a/intern/cycles/kernel/geom/geom_bvh_traversal.h +++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h @@ -89,6 +89,9 @@ ccl_device bool BVH_FUNCTION_NAME gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + /* traversal loop */ do { do { @@ -257,7 +260,7 @@ ccl_device bool BVH_FUNCTION_NAME switch(type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, isect, P, dir, visibility, object, primAddr); + hit = triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr); break; } #if FEATURE(BVH_MOTION) @@ -312,6 +315,7 @@ ccl_device bool BVH_FUNCTION_NAME #else bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); #endif + triangle_intersect_precalc(dir, &isect_precalc); #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); @@ -342,6 +346,7 @@ ccl_device bool BVH_FUNCTION_NAME #else bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); #endif + triangle_intersect_precalc(dir, &isect_precalc); #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h index 58dda7b5e06..7d6b9bdd8db 100644 --- a/intern/cycles/kernel/geom/geom_bvh_volume.h +++ b/intern/cycles/kernel/geom/geom_bvh_volume.h @@ -83,6 +83,9 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg, gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif + IsectPrecalc isect_precalc; + triangle_intersect_precalc(dir, &isect_precalc); + /* traversal loop */ do { do { @@ -209,7 +212,7 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg, switch(type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { - triangle_intersect(kg, isect, P, dir, visibility, object, primAddr); + triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr); break; } #if FEATURE(BVH_MOTION) @@ -248,6 +251,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg, bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); #endif + triangle_intersect_precalc(dir, &isect_precalc); + #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); Psplat[1] = ssef(P.y); @@ -285,6 +290,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg, bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); #endif + triangle_intersect_precalc(dir, &isect_precalc); + #if defined(__KERNEL_SSE2__) Psplat[0] = ssef(P.x); Psplat[1] = ssef(P.y); diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 0390804d131..dd3928682e3 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -17,7 +17,9 @@ /* Triangle Primitive * - * Basic triangle with 3 vertices is used to represent mesh surfaces. */ + * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH + * ray intersection we use a precomputed triangle storage to accelerate + * intersection at the cost of more memory usage */ CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index b965bddf330..4bb60ca78e0 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -1,6 +1,5 @@ /* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation - * Modifications Copyright 2011, Blender Foundation. + * Copyright 2014, Blender Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,122 +14,270 @@ * limitations under the License. */ -/* Triangle/Ray intersections +/* Triangle/Ray intersections . * - * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH - * ray intersection we use a precomputed triangle storage to accelerate - * intersection at the cost of more memory usage */ + * For BVH ray intersection we use a precomputed triangle storage to accelerate + * intersection at the cost of more memory usage. + */ CCL_NAMESPACE_BEGIN +/* Workaroudn stupidness of CUDA/OpenCL which doesn't allow to access indexed + * component of float3 value. + */ +#ifndef __KERNEL_CPU__ +# define IDX(vec, idx) \ + ((idx == 0) ? ((vec).x) : ( (idx == 1) ? ((vec).y) : ((vec).z) )) +#else +# define IDX(vec, idx) ((vec)[idx]) +#endif + /* Ray-Triangle intersection for BVH traversal * - * Based on Sven Woop's algorithm with precomputed triangle storage */ + * Sven Woop + * Watertight Ray/Triangle Intersection + * + * http://jcgt.org/published/0002/01/05/paper.pdf + */ -ccl_device_inline bool triangle_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 dir, uint visibility, int object, int triAddr) +/* Precalculated data for the ray->tri intersection. */ +typedef struct IsectPrecalc { + /* Maximal dimension kz, and orthogonal dimensions. */ + int kx, ky, kz; + + /* Shear constants. */ + float Sx, Sy, Sz; +} IsectPrecalc; + +ccl_device_inline void triangle_intersect_precalc(float3 dir, + IsectPrecalc *isect_precalc) { - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); + /* Calculate dimesion where the ray direction is maximal. */ + int kz = util_max_axis(make_float3(fabsf(dir.x), + fabsf(dir.y), + fabsf(dir.z))); + int kx = kz + 1; if(kx == 3) kx = 0; + int ky = kx + 1; if(ky == 3) ky = 0; - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < isect->t) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) -#endif - { - /* record intersection */ - isect->t = t; - isect->u = u; - isect->v = v; - isect->prim = triAddr; - isect->object = object; - isect->type = PRIMITIVE_TRIANGLE; - return true; - } - } - } + /* Swap kx and ky dimensions to preserve winding direction of triangles. */ + if(IDX(dir, kz) < 0.0f) { + int tmp = kx; + kx = ky; + ky = tmp; } + /* Calculate the shear constants. */ + float inf_dir_z = 1.0f / IDX(dir, kz); + isect_precalc->Sx = IDX(dir, kx) * inf_dir_z; + isect_precalc->Sy = IDX(dir, ky) * inf_dir_z; + isect_precalc->Sz = inf_dir_z; + + /* Store the dimensions. */ + isect_precalc->kx = kx; + isect_precalc->ky = ky; + isect_precalc->kz = kz; +} + +/* TODO(sergey): Make it general utility function. */ +ccl_device_inline float xor_signmast(float x, int y) +{ + return __int_as_float(__float_as_int(x) ^ y); +} + +ccl_device_inline bool triangle_intersect(KernelGlobals *kg, + const IsectPrecalc *isect_precalc, + Intersection *isect, + float3 P, + float3 dir, + uint visibility, + int object, + int triAddr) +{ + const int kx = isect_precalc->kx; + const int ky = isect_precalc->ky; + const int kz = isect_precalc->kz; + const float Sx = isect_precalc->Sx; + const float Sy = isect_precalc->Sy; + const float Sz = isect_precalc->Sz; + + /* Calculate vertices relative to ray origin. */ + float3 tri[3]; + tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0)); + tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1)); + tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2)); + + const float3 A = tri[0] - P; + const float3 B = tri[1] - P; + const float3 C = tri[2] - P; + + const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz); + const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz); + const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz); + + /* Perform shear and scale of vertices. */ + const float Ax = A_kx - Sx * A_kz; + const float Ay = A_ky - Sy * A_kz; + const float Bx = B_kx - Sx * B_kz; + const float By = B_ky - Sy * B_kz; + const float Cx = C_kx - Sx * C_kz; + const float Cy = C_ky - Sy * C_kz; + + /* Calculate scaled barycentric coordinates. */ + float U = Cx * By - Cy * Bx; + int sign_mask = (__float_as_int(U) & 0x80000000); + float V = Ax * Cy - Ay * Cx; + if(sign_mask != (__float_as_int(V) & 0x80000000)) { + return false; + } + float W = Bx * Ay - By * Ax; + if(sign_mask != (__float_as_int(W) & 0x80000000)) { + return false; + } + + /* Calculate determinant. */ + float det = U + V + W; + if(UNLIKELY(det == 0.0f)) { + return false; + } + + /* Calculate scaled z−coordinates of vertices and use them to calculate + * the hit distance. + */ + const float Az = Sz * A_kz; + const float Bz = Sz * B_kz; + const float Cz = Sz * C_kz; + const float T = U * Az + V * Bz + W * Cz; + + if ((xor_signmast(T, sign_mask) < 0.0f) || + (xor_signmast(T, sign_mask) > isect->t * xor_signmast(det, sign_mask))) + { + return false; + } + +#ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) +#endif + { + /* Normalize U, V, W, and T. */ + const float inv_det = 1.0f / det; + isect->prim = triAddr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = U * inv_det; + isect->v = V * inv_det; + isect->t = T * inv_det; + return true; + } return false; } /* Special ray intersection routines for subsurface scattering. In that case we * only want to intersect with primitives in the same object, and if case of - * multiple hits we pick a single random primitive as the intersection point. */ + * multiple hits we pick a single random primitive as the intersection point. + */ #ifdef __SUBSURFACE__ -ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, - float3 P, float3 dir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) +ccl_device_inline void triangle_intersect_subsurface( + KernelGlobals *kg, + const IsectPrecalc *isect_precalc, + Intersection *isect_array, + float3 P, + float3 dir, + int object, + int triAddr, + float tmax, + uint *num_hits, + uint *lcg_state, + int max_hits) { - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); + const int kx = isect_precalc->kx; + const int ky = isect_precalc->ky; + const int kz = isect_precalc->kz; + const float Sx = isect_precalc->Sx; + const float Sy = isect_precalc->Sy; + const float Sz = isect_precalc->Sz; - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; + /* Calculate vertices relative to ray origin. */ + float3 tri[3]; + int prim = kernel_tex_fetch(__prim_index, triAddr); + triangle_vertices(kg, prim, tri); - if(t > 0.0f && t < tmax) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; + const float3 A = tri[0] - P; + const float3 B = tri[1] - P; + const float3 C = tri[2] - P; - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; + const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz); + const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz); + const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz); - if(v >= 0.0f && u + v <= 1.0f) { - (*num_hits)++; + /* Perform shear and scale of vertices. */ + const float Ax = A_kx - Sx * A_kz; + const float Ay = A_ky - Sy * A_kz; + const float Bx = B_kx - Sx * B_kz; + const float By = B_ky - Sy * B_kz; + const float Cx = C_kx - Sx * C_kz; + const float Cy = C_ky - Sy * C_kz; - int hit; - - if(*num_hits <= max_hits) { - hit = *num_hits - 1; - } - else { - /* reservoir sampling: if we are at the maximum number of - * hits, randomly replace element or skip it */ - hit = lcg_step_uint(lcg_state) % *num_hits; - - if(hit >= max_hits) - return; - } - - /* record intersection */ - Intersection *isect = &isect_array[hit]; - isect->t = t; - isect->u = u; - isect->v = v; - isect->prim = triAddr; - isect->object = object; - isect->type = PRIMITIVE_TRIANGLE; - } - } + /* Calculate scaled barycentric coordinates. */ + float U = Cx * By - Cy * Bx; + int sign_mask = (__float_as_int(U) & 0x80000000); + float V = Ax * Cy - Ay * Cx; + if(sign_mask != (__float_as_int(V) & 0x80000000)) { + return; } + float W = Bx * Ay - By * Ax; + if(sign_mask != (__float_as_int(W) & 0x80000000)) { + return; + } + + /* Calculate determinant. */ + float det = U + V + W; + if(UNLIKELY(det == 0.0f)) { + return; + } + + /* Calculate scaled z−coordinates of vertices and use them to calculate + * the hit distance. + */ + const float Az = Sz * A_kz; + const float Bz = Sz * B_kz; + const float Cz = Sz * C_kz; + const float T = U * Az + V * Bz + W * Cz; + + if ((xor_signmast(T, sign_mask) < 0.0f) || + (xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask))) + { + return; + } + + /* Normalize U, V, W, and T. */ + const float inv_det = 1.0f / det; + + (*num_hits)++; + int hit; + + if(*num_hits <= max_hits) { + hit = *num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit = lcg_step_uint(lcg_state) % *num_hits; + + if(hit >= max_hits) + return; + } + + /* record intersection */ + Intersection *isect = &isect_array[hit]; + isect->prim = triAddr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = U * inv_det; + isect->v = V * inv_det; + isect->t = T * inv_det; } #endif @@ -138,7 +285,17 @@ ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersec * far the precision is often not so good, this reintersects the primitive from * a closer distance. */ -ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +/* Reintersections uses the paper: + * + * Tomas Moeller + * Fast, minimum storage ray/triangle intersection + * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf + */ + +ccl_device_inline float3 triangle_refine(KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) { float3 P = ray->P; float3 D = ray->D; @@ -159,10 +316,17 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, cons P = P + D*t; - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; + float3 tri[3]; + tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0)); + tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1)); + tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2)); + + float3 edge1 = tri[0] - tri[2]; + float3 edge2 = tri[1] - tri[2]; + float3 tvec = P - tri[2]; + float3 qvec = cross(tvec, edge1); + float3 pvec = cross(D, edge2); + float rt = dot(edge2, qvec) / dot(edge1, pvec); P = P + D*rt; @@ -182,8 +346,13 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, cons #endif } -/* same as above, except that isect->t is assumed to be in object space for instancing */ -ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +/* Same as above, except that isect->t is assumed to be in object space for + * instancing. + */ +ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) { float3 P = ray->P; float3 D = ray->D; @@ -194,7 +363,9 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat #ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_itfm; #else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, + isect->object, + OBJECT_INVERSE_TRANSFORM); #endif P = transform_point(&tfm, P); @@ -204,10 +375,16 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat P = P + D*t; - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; + float3 tri[3]; + int prim = kernel_tex_fetch(__prim_index, isect->prim); + triangle_vertices(kg, prim, tri); + + float3 edge1 = tri[0] - tri[2]; + float3 edge2 = tri[1] - tri[2]; + float3 tvec = P - tri[2]; + float3 qvec = cross(tvec, edge1); + float3 pvec = cross(D, edge2); + float rt = dot(edge2, qvec) / dot(edge1, pvec); P = P + D*rt; @@ -215,7 +392,9 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat #ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_tfm; #else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, + isect->object, + OBJECT_TRANSFORM); #endif P = transform_point(&tfm, P); @@ -227,4 +406,6 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat #endif } +#undef IDX + CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 78005546a01..6898dc974c6 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -1452,6 +1452,22 @@ ccl_device bool map_to_sphere(float *r_u, float *r_v, } } +ccl_device_inline int util_max_axis(float3 vec) +{ + if(vec.x > vec.y) { + if(vec.x > vec.z) + return 0; + else + return 2; + } + else { + if(vec.y > vec.z) + return 1; + else + return 2; + } +} + CCL_NAMESPACE_END #endif /* __UTIL_MATH_H__ */