Cycles: Implement watertight ray/triangle intersection
Using this paper: Sven Woop, Watertight Ray/Triangle Intersection http://jcgt.org/published/0002/01/05/paper.pdf This change is expected to address quite reasonable amount of reports from the bug tracker, plus it might help reducing the noise in some scenes. Unfortunately, it's currently about 7% slower than the previous solution with pre-computed triangle plane equations, but maybe with some smart tweaks to the code (tests reshuffle, using SIMD in a nice way or so) we can avoid the speed regression. But perhaps smartest thing to do here would be to change single triangle / ray intersection with multiple triangles / ray intersections. That's how Embree does this and it's watertight single ray intersection is not any faster that this. Currently only triangle intersection is modified accordingly to the paper, in the future we would also want to modify the node / ray intersection. Reviewers: brecht, juicyfruit Subscribers: dingto, ton Differential Revision: https://developer.blender.org/D819
This commit is contained in:
parent
57d235d9f4
commit
f770bc4757
@ -275,68 +275,13 @@ void BVH::pack_triangle(int idx, float4 woop[3])
|
||||
float3 v1 = vpos[vidx[1]];
|
||||
float3 v2 = vpos[vidx[2]];
|
||||
|
||||
float3 r0 = v0 - v2;
|
||||
float3 r1 = v1 - v2;
|
||||
float3 r2 = cross(r0, r1);
|
||||
|
||||
if(is_zero(r0) || is_zero(r1) || is_zero(r2)) {
|
||||
/* degenerate */
|
||||
woop[0] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
woop[1] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
woop[2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
else {
|
||||
Transform t = make_transform(
|
||||
r0.x, r1.x, r2.x, v2.x,
|
||||
r0.y, r1.y, r2.y, v2.y,
|
||||
r0.z, r1.z, r2.z, v2.z,
|
||||
0.0f, 0.0f, 0.0f, 1.0f);
|
||||
|
||||
t = transform_inverse(t);
|
||||
|
||||
woop[0] = make_float4(t.z.x, t.z.y, t.z.z, -t.z.w);
|
||||
woop[1] = make_float4(t.x.x, t.x.y, t.x.z, t.x.w);
|
||||
woop[2] = make_float4(t.y.x, t.y.y, t.y.z, t.y.w);
|
||||
}
|
||||
woop[0] = float3_to_float4(v0);
|
||||
woop[1] = float3_to_float4(v1);
|
||||
woop[2] = float3_to_float4(v2);
|
||||
}
|
||||
|
||||
/* Curves*/
|
||||
|
||||
void BVH::pack_curve_segment(int idx, float4 woop[3])
|
||||
{
|
||||
int tob = pack.prim_object[idx];
|
||||
const Mesh *mesh = objects[tob]->mesh;
|
||||
int tidx = pack.prim_index[idx];
|
||||
int segment = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[idx]);
|
||||
int k0 = mesh->curves[tidx].first_key + segment;
|
||||
int k1 = mesh->curves[tidx].first_key + segment + 1;
|
||||
float3 v0 = float4_to_float3(mesh->curve_keys[k0]);
|
||||
float3 v1 = float4_to_float3(mesh->curve_keys[k1]);
|
||||
|
||||
float3 d0 = v1 - v0;
|
||||
float l = len(d0);
|
||||
|
||||
/*Plan
|
||||
*Transform tfm = make_transform(
|
||||
* location <3> , l,
|
||||
* extra curve data <3> , StrID,
|
||||
* nextkey, flags/tip?, 0, 0);
|
||||
*/
|
||||
float3 tg0 = make_float3(1.0f, 0.0f, 0.0f);
|
||||
float3 tg1 = make_float3(1.0f, 0.0f, 0.0f);
|
||||
|
||||
Transform tfm = make_transform(
|
||||
tg0.x, tg0.y, tg0.z, l,
|
||||
tg1.x, tg1.y, tg1.z, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 1);
|
||||
|
||||
woop[0] = tfm.x;
|
||||
woop[1] = tfm.y;
|
||||
woop[2] = tfm.z;
|
||||
|
||||
}
|
||||
|
||||
void BVH::pack_primitives()
|
||||
{
|
||||
int nsize = TRI_NODE_SIZE;
|
||||
@ -351,9 +296,7 @@ void BVH::pack_primitives()
|
||||
if(pack.prim_index[i] != -1) {
|
||||
float4 woop[3];
|
||||
|
||||
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
|
||||
pack_curve_segment(i, woop);
|
||||
else
|
||||
if(pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE)
|
||||
pack_triangle(i, woop);
|
||||
|
||||
memcpy(&pack.tri_woop[i * nsize], woop, sizeof(float4)*3);
|
||||
|
@ -106,7 +106,6 @@ protected:
|
||||
/* triangles and strands*/
|
||||
void pack_primitives();
|
||||
void pack_triangle(int idx, float4 woop[3]);
|
||||
void pack_curve_segment(int idx, float4 woop[3]);
|
||||
|
||||
/* merge instance BVH's */
|
||||
void pack_instances(size_t nodes_size);
|
||||
|
@ -81,6 +81,9 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||
#endif
|
||||
|
||||
IsectPrecalc isect_precalc;
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
do {
|
||||
@ -214,7 +217,7 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
|
||||
switch(type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
hit = triangle_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
|
||||
hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
|
||||
break;
|
||||
}
|
||||
#if FEATURE(BVH_MOTION)
|
||||
@ -295,6 +298,7 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
#endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
num_hits_in_instance = 0;
|
||||
|
||||
#if defined(__KERNEL_SSE2__)
|
||||
@ -330,6 +334,8 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
|
||||
#endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* scale isect->t to adjust for instancing */
|
||||
for(int i = 0; i < num_hits_in_instance; i++)
|
||||
(isect_array-i-1)->t *= t_fac;
|
||||
@ -342,6 +348,7 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
#else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
#endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
}
|
||||
|
||||
#if defined(__KERNEL_SSE2__)
|
||||
|
@ -77,6 +77,9 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
|
||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||
#endif
|
||||
|
||||
IsectPrecalc isect_precalc;
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
do
|
||||
@ -202,7 +205,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
|
||||
|
||||
switch(type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
triangle_intersect_subsurface(kg, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
|
||||
triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
|
||||
break;
|
||||
}
|
||||
#if FEATURE(BVH_MOTION)
|
||||
@ -228,6 +231,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
|
||||
#else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
#endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
#if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
@ -265,6 +269,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
#endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
#if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
Psplat[1] = ssef(P.y);
|
||||
|
@ -89,6 +89,9 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||
#endif
|
||||
|
||||
IsectPrecalc isect_precalc;
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
do {
|
||||
@ -257,7 +260,7 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
|
||||
switch(type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
hit = triangle_intersect(kg, isect, P, dir, visibility, object, primAddr);
|
||||
hit = triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
|
||||
break;
|
||||
}
|
||||
#if FEATURE(BVH_MOTION)
|
||||
@ -312,6 +315,7 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
#else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
#endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
#if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
@ -342,6 +346,7 @@ ccl_device bool BVH_FUNCTION_NAME
|
||||
#else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
#endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
#if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
|
@ -83,6 +83,9 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
|
||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||
#endif
|
||||
|
||||
IsectPrecalc isect_precalc;
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
/* traversal loop */
|
||||
do {
|
||||
do {
|
||||
@ -209,7 +212,7 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
|
||||
|
||||
switch(type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
triangle_intersect(kg, isect, P, dir, visibility, object, primAddr);
|
||||
triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
|
||||
break;
|
||||
}
|
||||
#if FEATURE(BVH_MOTION)
|
||||
@ -248,6 +251,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
#endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
#if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
Psplat[1] = ssef(P.y);
|
||||
@ -285,6 +290,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
#endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
#if defined(__KERNEL_SSE2__)
|
||||
Psplat[0] = ssef(P.x);
|
||||
Psplat[1] = ssef(P.y);
|
||||
|
@ -17,7 +17,9 @@
|
||||
|
||||
/* Triangle Primitive
|
||||
*
|
||||
* Basic triangle with 3 vertices is used to represent mesh surfaces. */
|
||||
* Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
|
||||
* ray intersection we use a precomputed triangle storage to accelerate
|
||||
* intersection at the cost of more memory usage */
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
/*
|
||||
* Adapted from code Copyright 2009-2010 NVIDIA Corporation
|
||||
* Modifications Copyright 2011, Blender Foundation.
|
||||
* Copyright 2014, Blender Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -15,122 +14,270 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* Triangle/Ray intersections
|
||||
/* Triangle/Ray intersections .
|
||||
*
|
||||
* Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
|
||||
* ray intersection we use a precomputed triangle storage to accelerate
|
||||
* intersection at the cost of more memory usage */
|
||||
* For BVH ray intersection we use a precomputed triangle storage to accelerate
|
||||
* intersection at the cost of more memory usage.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Workaroudn stupidness of CUDA/OpenCL which doesn't allow to access indexed
|
||||
* component of float3 value.
|
||||
*/
|
||||
#ifndef __KERNEL_CPU__
|
||||
# define IDX(vec, idx) \
|
||||
((idx == 0) ? ((vec).x) : ( (idx == 1) ? ((vec).y) : ((vec).z) ))
|
||||
#else
|
||||
# define IDX(vec, idx) ((vec)[idx])
|
||||
#endif
|
||||
|
||||
/* Ray-Triangle intersection for BVH traversal
|
||||
*
|
||||
* Based on Sven Woop's algorithm with precomputed triangle storage */
|
||||
* Sven Woop
|
||||
* Watertight Ray/Triangle Intersection
|
||||
*
|
||||
* http://jcgt.org/published/0002/01/05/paper.pdf
|
||||
*/
|
||||
|
||||
ccl_device_inline bool triangle_intersect(KernelGlobals *kg, Intersection *isect,
|
||||
float3 P, float3 dir, uint visibility, int object, int triAddr)
|
||||
/* Precalculated data for the ray->tri intersection. */
|
||||
typedef struct IsectPrecalc {
|
||||
/* Maximal dimension kz, and orthogonal dimensions. */
|
||||
int kx, ky, kz;
|
||||
|
||||
/* Shear constants. */
|
||||
float Sx, Sy, Sz;
|
||||
} IsectPrecalc;
|
||||
|
||||
ccl_device_inline void triangle_intersect_precalc(float3 dir,
|
||||
IsectPrecalc *isect_precalc)
|
||||
{
|
||||
/* compute and check intersection t-value */
|
||||
float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
|
||||
float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
|
||||
/* Calculate dimesion where the ray direction is maximal. */
|
||||
int kz = util_max_axis(make_float3(fabsf(dir.x),
|
||||
fabsf(dir.y),
|
||||
fabsf(dir.z)));
|
||||
int kx = kz + 1; if(kx == 3) kx = 0;
|
||||
int ky = kx + 1; if(ky == 3) ky = 0;
|
||||
|
||||
float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
|
||||
float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
|
||||
float t = Oz * invDz;
|
||||
|
||||
if(t > 0.0f && t < isect->t) {
|
||||
/* compute and check barycentric u */
|
||||
float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
|
||||
float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
|
||||
float u = Ox + t*Dx;
|
||||
|
||||
if(u >= 0.0f) {
|
||||
/* compute and check barycentric v */
|
||||
float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
|
||||
float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
|
||||
float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
|
||||
float v = Oy + t*Dy;
|
||||
|
||||
if(v >= 0.0f && u + v <= 1.0f) {
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
/* visibility flag test. we do it here under the assumption
|
||||
* that most triangles are culled by node flags */
|
||||
if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
|
||||
#endif
|
||||
{
|
||||
/* record intersection */
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
isect->prim = triAddr;
|
||||
isect->object = object;
|
||||
isect->type = PRIMITIVE_TRIANGLE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Swap kx and ky dimensions to preserve winding direction of triangles. */
|
||||
if(IDX(dir, kz) < 0.0f) {
|
||||
int tmp = kx;
|
||||
kx = ky;
|
||||
ky = tmp;
|
||||
}
|
||||
|
||||
/* Calculate the shear constants. */
|
||||
float inf_dir_z = 1.0f / IDX(dir, kz);
|
||||
isect_precalc->Sx = IDX(dir, kx) * inf_dir_z;
|
||||
isect_precalc->Sy = IDX(dir, ky) * inf_dir_z;
|
||||
isect_precalc->Sz = inf_dir_z;
|
||||
|
||||
/* Store the dimensions. */
|
||||
isect_precalc->kx = kx;
|
||||
isect_precalc->ky = ky;
|
||||
isect_precalc->kz = kz;
|
||||
}
|
||||
|
||||
/* TODO(sergey): Make it general utility function. */
|
||||
ccl_device_inline float xor_signmast(float x, int y)
|
||||
{
|
||||
return __int_as_float(__float_as_int(x) ^ y);
|
||||
}
|
||||
|
||||
ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
|
||||
const IsectPrecalc *isect_precalc,
|
||||
Intersection *isect,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
uint visibility,
|
||||
int object,
|
||||
int triAddr)
|
||||
{
|
||||
const int kx = isect_precalc->kx;
|
||||
const int ky = isect_precalc->ky;
|
||||
const int kz = isect_precalc->kz;
|
||||
const float Sx = isect_precalc->Sx;
|
||||
const float Sy = isect_precalc->Sy;
|
||||
const float Sz = isect_precalc->Sz;
|
||||
|
||||
/* Calculate vertices relative to ray origin. */
|
||||
float3 tri[3];
|
||||
tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
|
||||
tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
|
||||
tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
|
||||
|
||||
const float3 A = tri[0] - P;
|
||||
const float3 B = tri[1] - P;
|
||||
const float3 C = tri[2] - P;
|
||||
|
||||
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
|
||||
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
|
||||
const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz);
|
||||
|
||||
/* Perform shear and scale of vertices. */
|
||||
const float Ax = A_kx - Sx * A_kz;
|
||||
const float Ay = A_ky - Sy * A_kz;
|
||||
const float Bx = B_kx - Sx * B_kz;
|
||||
const float By = B_ky - Sy * B_kz;
|
||||
const float Cx = C_kx - Sx * C_kz;
|
||||
const float Cy = C_ky - Sy * C_kz;
|
||||
|
||||
/* Calculate scaled barycentric coordinates. */
|
||||
float U = Cx * By - Cy * Bx;
|
||||
int sign_mask = (__float_as_int(U) & 0x80000000);
|
||||
float V = Ax * Cy - Ay * Cx;
|
||||
if(sign_mask != (__float_as_int(V) & 0x80000000)) {
|
||||
return false;
|
||||
}
|
||||
float W = Bx * Ay - By * Ax;
|
||||
if(sign_mask != (__float_as_int(W) & 0x80000000)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Calculate determinant. */
|
||||
float det = U + V + W;
|
||||
if(UNLIKELY(det == 0.0f)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Calculate scaled z−coordinates of vertices and use them to calculate
|
||||
* the hit distance.
|
||||
*/
|
||||
const float Az = Sz * A_kz;
|
||||
const float Bz = Sz * B_kz;
|
||||
const float Cz = Sz * C_kz;
|
||||
const float T = U * Az + V * Bz + W * Cz;
|
||||
|
||||
if ((xor_signmast(T, sign_mask) < 0.0f) ||
|
||||
(xor_signmast(T, sign_mask) > isect->t * xor_signmast(det, sign_mask)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
/* visibility flag test. we do it here under the assumption
|
||||
* that most triangles are culled by node flags */
|
||||
if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
|
||||
#endif
|
||||
{
|
||||
/* Normalize U, V, W, and T. */
|
||||
const float inv_det = 1.0f / det;
|
||||
isect->prim = triAddr;
|
||||
isect->object = object;
|
||||
isect->type = PRIMITIVE_TRIANGLE;
|
||||
isect->u = U * inv_det;
|
||||
isect->v = V * inv_det;
|
||||
isect->t = T * inv_det;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Special ray intersection routines for subsurface scattering. In that case we
|
||||
* only want to intersect with primitives in the same object, and if case of
|
||||
* multiple hits we pick a single random primitive as the intersection point. */
|
||||
* multiple hits we pick a single random primitive as the intersection point.
|
||||
*/
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
|
||||
float3 P, float3 dir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
|
||||
ccl_device_inline void triangle_intersect_subsurface(
|
||||
KernelGlobals *kg,
|
||||
const IsectPrecalc *isect_precalc,
|
||||
Intersection *isect_array,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
int object,
|
||||
int triAddr,
|
||||
float tmax,
|
||||
uint *num_hits,
|
||||
uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
/* compute and check intersection t-value */
|
||||
float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
|
||||
float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
|
||||
const int kx = isect_precalc->kx;
|
||||
const int ky = isect_precalc->ky;
|
||||
const int kz = isect_precalc->kz;
|
||||
const float Sx = isect_precalc->Sx;
|
||||
const float Sy = isect_precalc->Sy;
|
||||
const float Sz = isect_precalc->Sz;
|
||||
|
||||
float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
|
||||
float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
|
||||
float t = Oz * invDz;
|
||||
/* Calculate vertices relative to ray origin. */
|
||||
float3 tri[3];
|
||||
int prim = kernel_tex_fetch(__prim_index, triAddr);
|
||||
triangle_vertices(kg, prim, tri);
|
||||
|
||||
if(t > 0.0f && t < tmax) {
|
||||
/* compute and check barycentric u */
|
||||
float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
|
||||
float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
|
||||
float u = Ox + t*Dx;
|
||||
const float3 A = tri[0] - P;
|
||||
const float3 B = tri[1] - P;
|
||||
const float3 C = tri[2] - P;
|
||||
|
||||
if(u >= 0.0f) {
|
||||
/* compute and check barycentric v */
|
||||
float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
|
||||
float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
|
||||
float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
|
||||
float v = Oy + t*Dy;
|
||||
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
|
||||
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
|
||||
const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz);
|
||||
|
||||
if(v >= 0.0f && u + v <= 1.0f) {
|
||||
(*num_hits)++;
|
||||
/* Perform shear and scale of vertices. */
|
||||
const float Ax = A_kx - Sx * A_kz;
|
||||
const float Ay = A_ky - Sy * A_kz;
|
||||
const float Bx = B_kx - Sx * B_kz;
|
||||
const float By = B_ky - Sy * B_kz;
|
||||
const float Cx = C_kx - Sx * C_kz;
|
||||
const float Cy = C_ky - Sy * C_kz;
|
||||
|
||||
int hit;
|
||||
|
||||
if(*num_hits <= max_hits) {
|
||||
hit = *num_hits - 1;
|
||||
}
|
||||
else {
|
||||
/* reservoir sampling: if we are at the maximum number of
|
||||
* hits, randomly replace element or skip it */
|
||||
hit = lcg_step_uint(lcg_state) % *num_hits;
|
||||
|
||||
if(hit >= max_hits)
|
||||
return;
|
||||
}
|
||||
|
||||
/* record intersection */
|
||||
Intersection *isect = &isect_array[hit];
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
isect->prim = triAddr;
|
||||
isect->object = object;
|
||||
isect->type = PRIMITIVE_TRIANGLE;
|
||||
}
|
||||
}
|
||||
/* Calculate scaled barycentric coordinates. */
|
||||
float U = Cx * By - Cy * Bx;
|
||||
int sign_mask = (__float_as_int(U) & 0x80000000);
|
||||
float V = Ax * Cy - Ay * Cx;
|
||||
if(sign_mask != (__float_as_int(V) & 0x80000000)) {
|
||||
return;
|
||||
}
|
||||
float W = Bx * Ay - By * Ax;
|
||||
if(sign_mask != (__float_as_int(W) & 0x80000000)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate determinant. */
|
||||
float det = U + V + W;
|
||||
if(UNLIKELY(det == 0.0f)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate scaled z−coordinates of vertices and use them to calculate
|
||||
* the hit distance.
|
||||
*/
|
||||
const float Az = Sz * A_kz;
|
||||
const float Bz = Sz * B_kz;
|
||||
const float Cz = Sz * C_kz;
|
||||
const float T = U * Az + V * Bz + W * Cz;
|
||||
|
||||
if ((xor_signmast(T, sign_mask) < 0.0f) ||
|
||||
(xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask)))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* Normalize U, V, W, and T. */
|
||||
const float inv_det = 1.0f / det;
|
||||
|
||||
(*num_hits)++;
|
||||
int hit;
|
||||
|
||||
if(*num_hits <= max_hits) {
|
||||
hit = *num_hits - 1;
|
||||
}
|
||||
else {
|
||||
/* reservoir sampling: if we are at the maximum number of
|
||||
* hits, randomly replace element or skip it */
|
||||
hit = lcg_step_uint(lcg_state) % *num_hits;
|
||||
|
||||
if(hit >= max_hits)
|
||||
return;
|
||||
}
|
||||
|
||||
/* record intersection */
|
||||
Intersection *isect = &isect_array[hit];
|
||||
isect->prim = triAddr;
|
||||
isect->object = object;
|
||||
isect->type = PRIMITIVE_TRIANGLE;
|
||||
isect->u = U * inv_det;
|
||||
isect->v = V * inv_det;
|
||||
isect->t = T * inv_det;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -138,7 +285,17 @@ ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersec
|
||||
* far the precision is often not so good, this reintersects the primitive from
|
||||
* a closer distance. */
|
||||
|
||||
ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
|
||||
/* Reintersections uses the paper:
|
||||
*
|
||||
* Tomas Moeller
|
||||
* Fast, minimum storage ray/triangle intersection
|
||||
* http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf
|
||||
*/
|
||||
|
||||
ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const Intersection *isect,
|
||||
const Ray *ray)
|
||||
{
|
||||
float3 P = ray->P;
|
||||
float3 D = ray->D;
|
||||
@ -159,10 +316,17 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, cons
|
||||
|
||||
P = P + D*t;
|
||||
|
||||
float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
|
||||
float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
|
||||
float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
|
||||
float rt = Oz * invDz;
|
||||
float3 tri[3];
|
||||
tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
|
||||
tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
|
||||
tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
|
||||
|
||||
float3 edge1 = tri[0] - tri[2];
|
||||
float3 edge2 = tri[1] - tri[2];
|
||||
float3 tvec = P - tri[2];
|
||||
float3 qvec = cross(tvec, edge1);
|
||||
float3 pvec = cross(D, edge2);
|
||||
float rt = dot(edge2, qvec) / dot(edge1, pvec);
|
||||
|
||||
P = P + D*rt;
|
||||
|
||||
@ -182,8 +346,13 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, cons
|
||||
#endif
|
||||
}
|
||||
|
||||
/* same as above, except that isect->t is assumed to be in object space for instancing */
|
||||
ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
|
||||
/* Same as above, except that isect->t is assumed to be in object space for
|
||||
* instancing.
|
||||
*/
|
||||
ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const Intersection *isect,
|
||||
const Ray *ray)
|
||||
{
|
||||
float3 P = ray->P;
|
||||
float3 D = ray->D;
|
||||
@ -194,7 +363,9 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
|
||||
#ifdef __OBJECT_MOTION__
|
||||
Transform tfm = sd->ob_itfm;
|
||||
#else
|
||||
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
|
||||
Transform tfm = object_fetch_transform(kg,
|
||||
isect->object,
|
||||
OBJECT_INVERSE_TRANSFORM);
|
||||
#endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
@ -204,10 +375,16 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
|
||||
|
||||
P = P + D*t;
|
||||
|
||||
float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
|
||||
float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
|
||||
float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
|
||||
float rt = Oz * invDz;
|
||||
float3 tri[3];
|
||||
int prim = kernel_tex_fetch(__prim_index, isect->prim);
|
||||
triangle_vertices(kg, prim, tri);
|
||||
|
||||
float3 edge1 = tri[0] - tri[2];
|
||||
float3 edge2 = tri[1] - tri[2];
|
||||
float3 tvec = P - tri[2];
|
||||
float3 qvec = cross(tvec, edge1);
|
||||
float3 pvec = cross(D, edge2);
|
||||
float rt = dot(edge2, qvec) / dot(edge1, pvec);
|
||||
|
||||
P = P + D*rt;
|
||||
|
||||
@ -215,7 +392,9 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
|
||||
#ifdef __OBJECT_MOTION__
|
||||
Transform tfm = sd->ob_tfm;
|
||||
#else
|
||||
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
|
||||
Transform tfm = object_fetch_transform(kg,
|
||||
isect->object,
|
||||
OBJECT_TRANSFORM);
|
||||
#endif
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
@ -227,4 +406,6 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef IDX
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@ -1452,6 +1452,22 @@ ccl_device bool map_to_sphere(float *r_u, float *r_v,
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline int util_max_axis(float3 vec)
|
||||
{
|
||||
if(vec.x > vec.y) {
|
||||
if(vec.x > vec.z)
|
||||
return 0;
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else {
|
||||
if(vec.y > vec.z)
|
||||
return 1;
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_MATH_H__ */
|
||||
|
Loading…
Reference in New Issue
Block a user