Cycles: Switch to reformulated Pluecker ray/triangle intersection
The intention of this commit it to address issues mentioned in the reports T43865,T50164 and T50452. The code is based on Embree code with some extra vectorization to speed up single ray to single triangle intersection. Unfortunately, such a fix is not coming for free. There is some slowdown for AVX2 processors, mainly due to different vectorization code, which caused different number of instructions to be executed and different instructions-per-cycle counters. But on another hand this commit makes pre-AVX2 platforms such as AVX and SSE4.1 a bit faster. The prerformance goes as following: 2.78c AVX2 2.78c AVX Patch AVX2 Patch AVX BMW 05:21.09 06:05.34 05:32.97 (+3.5%) 05:34.97 (-8.5%) Classroom 16:55.36 18:24.51 17:10.41 (+1.4%) 17:15.87 (-6.3%) Fishy Cat 08:08.49 08:36.26 08:09.19 (+0.2%) 08:12.25 (-4.7% Koro 11:22.54 11:45.24 11:13.25 (-1.5%) 11:43.81 (-0.3%) Barcelone 14:18.32 16:09.46 14:15.20 (-0.4%) 14:25.15 (-10.8%) On GPU the performance is about 1.5-2% slower in my tests on GTX1080 but afraid we can't do much as a part of this chaneg here and consider it a price to pay for more proper intersection check. Made in collaboration with Maxym Dmytrychenko, big thanks to him! Reviewers: brecht, juicyfruit, lukasstockner97, dingto Differential Revision: https://developer.blender.org/D1574
This commit is contained in:
parent
69aa6577b3
commit
6ea54fe9ff
@ -101,9 +101,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||||
#endif /* __KERNEL_SSE2__ */
|
#endif /* __KERNEL_SSE2__ */
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* traversal loop */
|
/* traversal loop */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -209,9 +206,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
switch(p_type) {
|
switch(p_type) {
|
||||||
case PRIMITIVE_TRIANGLE: {
|
case PRIMITIVE_TRIANGLE: {
|
||||||
hit = triangle_intersect(kg,
|
hit = triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect_array,
|
isect_array,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
PATH_RAY_SHADOW,
|
PATH_RAY_SHADOW,
|
||||||
object,
|
object,
|
||||||
prim_addr);
|
prim_addr);
|
||||||
@ -220,9 +217,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
#if BVH_FEATURE(BVH_MOTION)
|
#if BVH_FEATURE(BVH_MOTION)
|
||||||
case PRIMITIVE_MOTION_TRIANGLE: {
|
case PRIMITIVE_MOTION_TRIANGLE: {
|
||||||
hit = motion_triangle_intersect(kg,
|
hit = motion_triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect_array,
|
isect_array,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
ray->time,
|
ray->time,
|
||||||
PATH_RAY_SHADOW,
|
PATH_RAY_SHADOW,
|
||||||
object,
|
object,
|
||||||
@ -325,7 +322,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
num_hits_in_instance = 0;
|
num_hits_in_instance = 0;
|
||||||
isect_array->t = isect_t;
|
isect_array->t = isect_t;
|
||||||
|
|
||||||
@ -365,8 +361,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
|
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* scale isect->t to adjust for instancing */
|
/* scale isect->t to adjust for instancing */
|
||||||
for(int i = 0; i < num_hits_in_instance; i++) {
|
for(int i = 0; i < num_hits_in_instance; i++) {
|
||||||
(isect_array-i-1)->t *= t_fac;
|
(isect_array-i-1)->t *= t_fac;
|
||||||
@ -378,7 +372,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
# else
|
# else
|
||||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||||
# endif
|
# endif
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
isect_t = tmax;
|
isect_t = tmax;
|
||||||
|
@ -109,9 +109,6 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* traversal loop */
|
/* traversal loop */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -197,9 +194,9 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||||
triangle_intersect_subsurface(kg,
|
triangle_intersect_subsurface(kg,
|
||||||
&isect_precalc,
|
|
||||||
ss_isect,
|
ss_isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
object,
|
object,
|
||||||
prim_addr,
|
prim_addr,
|
||||||
isect_t,
|
isect_t,
|
||||||
@ -214,9 +211,9 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||||
motion_triangle_intersect_subsurface(kg,
|
motion_triangle_intersect_subsurface(kg,
|
||||||
&isect_precalc,
|
|
||||||
ss_isect,
|
ss_isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
ray->time,
|
ray->time,
|
||||||
object,
|
object,
|
||||||
prim_addr,
|
prim_addr,
|
||||||
|
@ -104,9 +104,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* traversal loop */
|
/* traversal loop */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -238,9 +235,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
BVH_DEBUG_NEXT_INTERSECTION();
|
BVH_DEBUG_NEXT_INTERSECTION();
|
||||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||||
if(triangle_intersect(kg,
|
if(triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect,
|
isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
visibility,
|
visibility,
|
||||||
object,
|
object,
|
||||||
prim_addr))
|
prim_addr))
|
||||||
@ -267,9 +264,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
BVH_DEBUG_NEXT_INTERSECTION();
|
BVH_DEBUG_NEXT_INTERSECTION();
|
||||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||||
if(motion_triangle_intersect(kg,
|
if(motion_triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect,
|
isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
ray->time,
|
ray->time,
|
||||||
visibility,
|
visibility,
|
||||||
object,
|
object,
|
||||||
@ -358,7 +355,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
# else
|
# else
|
||||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||||
# endif
|
# endif
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
# if defined(__KERNEL_SSE2__)
|
# if defined(__KERNEL_SSE2__)
|
||||||
Psplat[0] = ssef(P.x);
|
Psplat[0] = ssef(P.x);
|
||||||
@ -395,7 +391,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
# else
|
# else
|
||||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||||
# endif
|
# endif
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
# if defined(__KERNEL_SSE2__)
|
# if defined(__KERNEL_SSE2__)
|
||||||
Psplat[0] = ssef(P.x);
|
Psplat[0] = ssef(P.x);
|
||||||
|
@ -97,9 +97,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* traversal loop */
|
/* traversal loop */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -194,9 +191,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
triangle_intersect(kg,
|
triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect,
|
isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
visibility,
|
visibility,
|
||||||
object,
|
object,
|
||||||
prim_addr);
|
prim_addr);
|
||||||
@ -215,9 +212,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
motion_triangle_intersect(kg,
|
motion_triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect,
|
isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
ray->time,
|
ray->time,
|
||||||
visibility,
|
visibility,
|
||||||
object,
|
object,
|
||||||
@ -243,8 +240,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
# if defined(__KERNEL_SSE2__)
|
# if defined(__KERNEL_SSE2__)
|
||||||
Psplat[0] = ssef(P.x);
|
Psplat[0] = ssef(P.x);
|
||||||
Psplat[1] = ssef(P.y);
|
Psplat[1] = ssef(P.y);
|
||||||
@ -286,8 +281,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
# if defined(__KERNEL_SSE2__)
|
# if defined(__KERNEL_SSE2__)
|
||||||
Psplat[0] = ssef(P.x);
|
Psplat[0] = ssef(P.x);
|
||||||
Psplat[1] = ssef(P.y);
|
Psplat[1] = ssef(P.y);
|
||||||
|
@ -101,9 +101,6 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
|
||||||
#endif /* __KERNEL_SSE2__ */
|
#endif /* __KERNEL_SSE2__ */
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* traversal loop */
|
/* traversal loop */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -199,9 +196,9 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
hit = triangle_intersect(kg,
|
hit = triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect_array,
|
isect_array,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
visibility,
|
visibility,
|
||||||
object,
|
object,
|
||||||
prim_addr);
|
prim_addr);
|
||||||
@ -243,9 +240,9 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
hit = motion_triangle_intersect(kg,
|
hit = motion_triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect_array,
|
isect_array,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
ray->time,
|
ray->time,
|
||||||
visibility,
|
visibility,
|
||||||
object,
|
object,
|
||||||
@ -294,7 +291,6 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
num_hits_in_instance = 0;
|
num_hits_in_instance = 0;
|
||||||
isect_array->t = isect_t;
|
isect_array->t = isect_t;
|
||||||
|
|
||||||
@ -340,7 +336,6 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
# else
|
# else
|
||||||
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
|
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
|
||||||
# endif
|
# endif
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
/* Scale isect->t to adjust for instancing. */
|
/* Scale isect->t to adjust for instancing. */
|
||||||
for(int i = 0; i < num_hits_in_instance; i++) {
|
for(int i = 0; i < num_hits_in_instance; i++) {
|
||||||
(isect_array-i-1)->t *= t_fac;
|
(isect_array-i-1)->t *= t_fac;
|
||||||
@ -352,7 +347,6 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
# else
|
# else
|
||||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||||
# endif
|
# endif
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
isect_t = tmax;
|
isect_t = tmax;
|
||||||
|
@ -97,9 +97,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
&near_x, &near_y, &near_z,
|
&near_x, &near_y, &near_z,
|
||||||
&far_x, &far_y, &far_z);
|
&far_x, &far_y, &far_z);
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* Traversal loop. */
|
/* Traversal loop. */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -290,9 +287,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
switch(p_type) {
|
switch(p_type) {
|
||||||
case PRIMITIVE_TRIANGLE: {
|
case PRIMITIVE_TRIANGLE: {
|
||||||
hit = triangle_intersect(kg,
|
hit = triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect_array,
|
isect_array,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
PATH_RAY_SHADOW,
|
PATH_RAY_SHADOW,
|
||||||
object,
|
object,
|
||||||
prim_addr);
|
prim_addr);
|
||||||
@ -301,9 +298,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
#if BVH_FEATURE(BVH_MOTION)
|
#if BVH_FEATURE(BVH_MOTION)
|
||||||
case PRIMITIVE_MOTION_TRIANGLE: {
|
case PRIMITIVE_MOTION_TRIANGLE: {
|
||||||
hit = motion_triangle_intersect(kg,
|
hit = motion_triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect_array,
|
isect_array,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
ray->time,
|
ray->time,
|
||||||
PATH_RAY_SHADOW,
|
PATH_RAY_SHADOW,
|
||||||
object,
|
object,
|
||||||
@ -425,8 +422,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
++stack_ptr;
|
++stack_ptr;
|
||||||
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
|
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
|
||||||
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
|
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
|
||||||
@ -482,8 +477,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
object = OBJECT_NONE;
|
object = OBJECT_NONE;
|
||||||
node_addr = traversal_stack[stack_ptr].addr;
|
node_addr = traversal_stack[stack_ptr].addr;
|
||||||
--stack_ptr;
|
--stack_ptr;
|
||||||
|
@ -105,9 +105,6 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
&near_x, &near_y, &near_z,
|
&near_x, &near_y, &near_z,
|
||||||
&far_x, &far_y, &far_z);
|
&far_x, &far_y, &far_z);
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* Traversal loop. */
|
/* Traversal loop. */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -253,9 +250,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||||
triangle_intersect_subsurface(kg,
|
triangle_intersect_subsurface(kg,
|
||||||
&isect_precalc,
|
|
||||||
ss_isect,
|
ss_isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
object,
|
object,
|
||||||
prim_addr,
|
prim_addr,
|
||||||
isect_t,
|
isect_t,
|
||||||
@ -270,9 +267,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||||
motion_triangle_intersect_subsurface(kg,
|
motion_triangle_intersect_subsurface(kg,
|
||||||
&isect_precalc,
|
|
||||||
ss_isect,
|
ss_isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
ray->time,
|
ray->time,
|
||||||
object,
|
object,
|
||||||
prim_addr,
|
prim_addr,
|
||||||
|
@ -106,9 +106,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
&near_x, &near_y, &near_z,
|
&near_x, &near_y, &near_z,
|
||||||
&far_x, &far_y, &far_z);
|
&far_x, &far_y, &far_z);
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* Traversal loop. */
|
/* Traversal loop. */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -333,9 +330,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
BVH_DEBUG_NEXT_INTERSECTION();
|
BVH_DEBUG_NEXT_INTERSECTION();
|
||||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||||
if(triangle_intersect(kg,
|
if(triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect,
|
isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
visibility,
|
visibility,
|
||||||
object,
|
object,
|
||||||
prim_addr)) {
|
prim_addr)) {
|
||||||
@ -354,9 +351,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
BVH_DEBUG_NEXT_INTERSECTION();
|
BVH_DEBUG_NEXT_INTERSECTION();
|
||||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||||
if(motion_triangle_intersect(kg,
|
if(motion_triangle_intersect(kg,
|
||||||
&isect_precalc,
|
|
||||||
isect,
|
isect,
|
||||||
P,
|
P,
|
||||||
|
dir,
|
||||||
ray->time,
|
ray->time,
|
||||||
visibility,
|
visibility,
|
||||||
object,
|
object,
|
||||||
@ -447,8 +444,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
++stack_ptr;
|
++stack_ptr;
|
||||||
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
|
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
|
||||||
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
|
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
|
||||||
@ -489,8 +484,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
object = OBJECT_NONE;
|
object = OBJECT_NONE;
|
||||||
node_addr = traversal_stack[stack_ptr].addr;
|
node_addr = traversal_stack[stack_ptr].addr;
|
||||||
node_dist = traversal_stack[stack_ptr].dist;
|
node_dist = traversal_stack[stack_ptr].dist;
|
||||||
|
@ -91,9 +91,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
&near_x, &near_y, &near_z,
|
&near_x, &near_y, &near_z,
|
||||||
&far_x, &far_y, &far_z);
|
&far_x, &far_y, &far_z);
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* Traversal loop. */
|
/* Traversal loop. */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -266,7 +263,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* Intersect ray against primitive. */
|
/* Intersect ray against primitive. */
|
||||||
triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, prim_addr);
|
triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -281,7 +278,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* Intersect ray against primitive. */
|
/* Intersect ray against primitive. */
|
||||||
motion_triangle_intersect(kg, &isect_precalc, isect, P, ray->time, visibility, object, prim_addr);
|
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -316,8 +313,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
++stack_ptr;
|
++stack_ptr;
|
||||||
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
|
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
|
||||||
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
|
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
|
||||||
@ -362,8 +357,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
object = OBJECT_NONE;
|
object = OBJECT_NONE;
|
||||||
node_addr = traversal_stack[stack_ptr].addr;
|
node_addr = traversal_stack[stack_ptr].addr;
|
||||||
--stack_ptr;
|
--stack_ptr;
|
||||||
|
@ -95,9 +95,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
&near_x, &near_y, &near_z,
|
&near_x, &near_y, &near_z,
|
||||||
&far_x, &far_y, &far_z);
|
&far_x, &far_y, &far_z);
|
||||||
|
|
||||||
TriangleIsectPrecalc isect_precalc;
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
/* Traversal loop. */
|
/* Traversal loop. */
|
||||||
do {
|
do {
|
||||||
do {
|
do {
|
||||||
@ -271,7 +268,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* Intersect ray against primitive. */
|
/* Intersect ray against primitive. */
|
||||||
hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, prim_addr);
|
hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
|
||||||
if(hit) {
|
if(hit) {
|
||||||
/* Move on to next entry in intersections array. */
|
/* Move on to next entry in intersections array. */
|
||||||
isect_array++;
|
isect_array++;
|
||||||
@ -309,7 +306,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* Intersect ray against primitive. */
|
/* Intersect ray against primitive. */
|
||||||
hit = motion_triangle_intersect(kg, &isect_precalc, isect_array, P, ray->time, visibility, object, prim_addr);
|
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
|
||||||
if(hit) {
|
if(hit) {
|
||||||
/* Move on to next entry in intersections array. */
|
/* Move on to next entry in intersections array. */
|
||||||
isect_array++;
|
isect_array++;
|
||||||
@ -367,7 +364,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
num_hits_in_instance = 0;
|
num_hits_in_instance = 0;
|
||||||
isect_array->t = isect_t;
|
isect_array->t = isect_t;
|
||||||
|
|
||||||
@ -432,8 +428,6 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ray_triangle_intersect_precalc(dir, &isect_precalc);
|
|
||||||
|
|
||||||
object = OBJECT_NONE;
|
object = OBJECT_NONE;
|
||||||
node_addr = traversal_stack[stack_ptr].addr;
|
node_addr = traversal_stack[stack_ptr].addr;
|
||||||
--stack_ptr;
|
--stack_ptr;
|
||||||
|
@ -168,9 +168,9 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
|
|||||||
|
|
||||||
ccl_device_inline bool motion_triangle_intersect(
|
ccl_device_inline bool motion_triangle_intersect(
|
||||||
KernelGlobals *kg,
|
KernelGlobals *kg,
|
||||||
const TriangleIsectPrecalc *isect_precalc,
|
|
||||||
Intersection *isect,
|
Intersection *isect,
|
||||||
float3 P,
|
float3 P,
|
||||||
|
float3 dir,
|
||||||
float time,
|
float time,
|
||||||
uint visibility,
|
uint visibility,
|
||||||
int object,
|
int object,
|
||||||
@ -186,10 +186,10 @@ ccl_device_inline bool motion_triangle_intersect(
|
|||||||
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
||||||
/* Ray-triangle intersection, unoptimized. */
|
/* Ray-triangle intersection, unoptimized. */
|
||||||
float t, u, v;
|
float t, u, v;
|
||||||
if(ray_triangle_intersect(isect_precalc,
|
if(ray_triangle_intersect(P,
|
||||||
P,
|
dir,
|
||||||
isect->t,
|
isect->t,
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
(ssef*)verts,
|
(ssef*)verts,
|
||||||
#else
|
#else
|
||||||
verts[0], verts[1], verts[2],
|
verts[0], verts[1], verts[2],
|
||||||
@ -222,9 +222,9 @@ ccl_device_inline bool motion_triangle_intersect(
|
|||||||
#ifdef __SUBSURFACE__
|
#ifdef __SUBSURFACE__
|
||||||
ccl_device_inline void motion_triangle_intersect_subsurface(
|
ccl_device_inline void motion_triangle_intersect_subsurface(
|
||||||
KernelGlobals *kg,
|
KernelGlobals *kg,
|
||||||
const TriangleIsectPrecalc *isect_precalc,
|
|
||||||
SubsurfaceIntersection *ss_isect,
|
SubsurfaceIntersection *ss_isect,
|
||||||
float3 P,
|
float3 P,
|
||||||
|
float3 dir,
|
||||||
float time,
|
float time,
|
||||||
int object,
|
int object,
|
||||||
int prim_addr,
|
int prim_addr,
|
||||||
@ -242,10 +242,10 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
|
|||||||
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
||||||
/* Ray-triangle intersection, unoptimized. */
|
/* Ray-triangle intersection, unoptimized. */
|
||||||
float t, u, v;
|
float t, u, v;
|
||||||
if(ray_triangle_intersect(isect_precalc,
|
if(ray_triangle_intersect(P,
|
||||||
P,
|
dir,
|
||||||
tmax,
|
tmax,
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
(ssef*)verts,
|
(ssef*)verts,
|
||||||
#else
|
#else
|
||||||
verts[0], verts[1], verts[2],
|
verts[0], verts[1], verts[2],
|
||||||
|
@ -22,25 +22,16 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
/* Ray-Triangle intersection for BVH traversal
|
|
||||||
*
|
|
||||||
* Sven Woop
|
|
||||||
* Watertight Ray/Triangle Intersection
|
|
||||||
*
|
|
||||||
* http://jcgt.org/published/0002/01/05/paper.pdf
|
|
||||||
*/
|
|
||||||
|
|
||||||
ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
|
ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
|
||||||
const TriangleIsectPrecalc *isect_precalc,
|
|
||||||
Intersection *isect,
|
Intersection *isect,
|
||||||
float3 P,
|
float3 P,
|
||||||
|
float3 dir,
|
||||||
uint visibility,
|
uint visibility,
|
||||||
int object,
|
int object,
|
||||||
int prim_addr)
|
int prim_addr)
|
||||||
{
|
{
|
||||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
|
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
|
||||||
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
|
||||||
const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
|
const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
|
||||||
#else
|
#else
|
||||||
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
|
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
|
||||||
@ -48,9 +39,10 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
|
|||||||
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
|
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
|
||||||
#endif
|
#endif
|
||||||
float t, u, v;
|
float t, u, v;
|
||||||
if(ray_triangle_intersect(isect_precalc,
|
if(ray_triangle_intersect(P,
|
||||||
P, isect->t,
|
dir,
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
isect->t,
|
||||||
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
ssef_verts,
|
ssef_verts,
|
||||||
#else
|
#else
|
||||||
float4_to_float3(tri_a),
|
float4_to_float3(tri_a),
|
||||||
@ -86,9 +78,9 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
|
|||||||
#ifdef __SUBSURFACE__
|
#ifdef __SUBSURFACE__
|
||||||
ccl_device_inline void triangle_intersect_subsurface(
|
ccl_device_inline void triangle_intersect_subsurface(
|
||||||
KernelGlobals *kg,
|
KernelGlobals *kg,
|
||||||
const TriangleIsectPrecalc *isect_precalc,
|
|
||||||
SubsurfaceIntersection *ss_isect,
|
SubsurfaceIntersection *ss_isect,
|
||||||
float3 P,
|
float3 P,
|
||||||
|
float3 dir,
|
||||||
int object,
|
int object,
|
||||||
int prim_addr,
|
int prim_addr,
|
||||||
float tmax,
|
float tmax,
|
||||||
@ -96,8 +88,7 @@ ccl_device_inline void triangle_intersect_subsurface(
|
|||||||
int max_hits)
|
int max_hits)
|
||||||
{
|
{
|
||||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
|
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
|
||||||
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
|
||||||
const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
|
const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
|
||||||
#else
|
#else
|
||||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
|
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
|
||||||
@ -105,14 +96,13 @@ ccl_device_inline void triangle_intersect_subsurface(
|
|||||||
tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
|
tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
|
||||||
#endif
|
#endif
|
||||||
float t, u, v;
|
float t, u, v;
|
||||||
if(!ray_triangle_intersect(isect_precalc,
|
if(!ray_triangle_intersect(P,
|
||||||
P, tmax,
|
dir,
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
tmax,
|
||||||
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
ssef_verts,
|
ssef_verts,
|
||||||
#else
|
#else
|
||||||
tri_a,
|
tri_a, tri_b, tri_c,
|
||||||
tri_b,
|
|
||||||
tri_c,
|
|
||||||
#endif
|
#endif
|
||||||
&u, &v, &t))
|
&u, &v, &t))
|
||||||
{
|
{
|
||||||
@ -150,15 +140,14 @@ ccl_device_inline void triangle_intersect_subsurface(
|
|||||||
isect->t = t;
|
isect->t = t;
|
||||||
|
|
||||||
/* Record geometric normal. */
|
/* Record geometric normal. */
|
||||||
/* TODO(sergey): Check whether it's faster to re-use ssef verts. */
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
|
||||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
|
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
|
||||||
tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
|
tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
|
||||||
tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
|
tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
|
||||||
#endif
|
#endif
|
||||||
ss_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
ss_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* __SUBSURFACE__ */
|
||||||
|
|
||||||
/* Refine triangle intersection to more precise hit point. For rays that travel
|
/* Refine triangle intersection to more precise hit point. For rays that travel
|
||||||
* far the precision is often not so good, this reintersects the primitive from
|
* far the precision is often not so good, this reintersects the primitive from
|
||||||
|
@ -79,216 +79,106 @@ ccl_device bool ray_aligned_disk_intersect(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Optimized watertight ray-triangle intersection.
|
|
||||||
*
|
|
||||||
* Sven Woop
|
|
||||||
* Watertight Ray/Triangle Intersection
|
|
||||||
*
|
|
||||||
* http://jcgt.org/published/0002/01/05/paper.pdf
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Precalculated data for the ray->tri intersection. */
|
|
||||||
typedef struct TriangleIsectPrecalc {
|
|
||||||
/* Maximal dimension kz, and orthogonal dimensions. */
|
|
||||||
int kx, ky, kz;
|
|
||||||
|
|
||||||
/* Shear constants. */
|
|
||||||
float Sx, Sy, Sz;
|
|
||||||
} TriangleIsectPrecalc;
|
|
||||||
|
|
||||||
/* Workaround stupidness of CUDA/OpenCL which doesn't allow to access indexed
|
|
||||||
* component of float3 value.
|
|
||||||
*/
|
|
||||||
#ifdef __KERNEL_GPU__
|
|
||||||
# define IDX(vec, idx) \
|
|
||||||
((idx == 0) ? ((vec).x) : ( (idx == 1) ? ((vec).y) : ((vec).z) ))
|
|
||||||
#else
|
|
||||||
# define IDX(vec, idx) ((vec)[idx])
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (defined(__KERNEL_OPENCL_APPLE__)) || \
|
|
||||||
(defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86)))
|
|
||||||
ccl_device_noinline
|
|
||||||
#else
|
|
||||||
ccl_device_inline
|
|
||||||
#endif
|
|
||||||
void ray_triangle_intersect_precalc(float3 dir,
|
|
||||||
TriangleIsectPrecalc *isect_precalc)
|
|
||||||
{
|
|
||||||
/* Calculate dimension where the ray direction is maximal. */
|
|
||||||
#ifndef __KERNEL_SSE__
|
|
||||||
int kz = util_max_axis(make_float3(fabsf(dir.x),
|
|
||||||
fabsf(dir.y),
|
|
||||||
fabsf(dir.z)));
|
|
||||||
int kx = kz + 1; if(kx == 3) kx = 0;
|
|
||||||
int ky = kx + 1; if(ky == 3) ky = 0;
|
|
||||||
#else
|
|
||||||
int kx, ky, kz;
|
|
||||||
/* Avoiding mispredicted branch on direction. */
|
|
||||||
kz = util_max_axis(fabs(dir));
|
|
||||||
static const char inc_xaxis[] = {1, 2, 0, 55};
|
|
||||||
static const char inc_yaxis[] = {2, 0, 1, 55};
|
|
||||||
kx = inc_xaxis[kz];
|
|
||||||
ky = inc_yaxis[kz];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
float dir_kz = IDX(dir, kz);
|
|
||||||
|
|
||||||
/* Swap kx and ky dimensions to preserve winding direction of triangles. */
|
|
||||||
if(dir_kz < 0.0f) {
|
|
||||||
int tmp = kx;
|
|
||||||
kx = ky;
|
|
||||||
ky = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Calculate the shear constants. */
|
|
||||||
float inv_dir_z = 1.0f / dir_kz;
|
|
||||||
isect_precalc->Sx = IDX(dir, kx) * inv_dir_z;
|
|
||||||
isect_precalc->Sy = IDX(dir, ky) * inv_dir_z;
|
|
||||||
isect_precalc->Sz = inv_dir_z;
|
|
||||||
|
|
||||||
/* Store the dimensions. */
|
|
||||||
isect_precalc->kx = kx;
|
|
||||||
isect_precalc->ky = ky;
|
|
||||||
isect_precalc->kz = kz;
|
|
||||||
}
|
|
||||||
|
|
||||||
ccl_device_forceinline bool ray_triangle_intersect(
|
ccl_device_forceinline bool ray_triangle_intersect(
|
||||||
const TriangleIsectPrecalc *isect_precalc,
|
float3 ray_P, float3 ray_dir, float ray_t,
|
||||||
float3 ray_P, float ray_t,
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
|
||||||
const ssef *ssef_verts,
|
const ssef *ssef_verts,
|
||||||
#else
|
#else
|
||||||
const float3 tri_a, const float3 tri_b, const float3 tri_c,
|
const float3 tri_a, const float3 tri_b, const float3 tri_c,
|
||||||
#endif
|
#endif
|
||||||
float *isect_u, float *isect_v, float *isect_t)
|
float *isect_u, float *isect_v, float *isect_t)
|
||||||
{
|
{
|
||||||
const int kx = isect_precalc->kx;
|
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||||
const int ky = isect_precalc->ky;
|
typedef ssef float3;
|
||||||
const int kz = isect_precalc->kz;
|
const float3 tri_a(ssef_verts[0]);
|
||||||
const float Sx = isect_precalc->Sx;
|
const float3 tri_b(ssef_verts[1]);
|
||||||
const float Sy = isect_precalc->Sy;
|
const float3 tri_c(ssef_verts[2]);
|
||||||
const float Sz = isect_precalc->Sz;
|
const float3 P(ray_P);
|
||||||
|
const float3 dir(ray_dir);
|
||||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
|
||||||
const avxf avxf_P(ray_P.m128, ray_P.m128);
|
|
||||||
const avxf tri_ab(_mm256_loadu_ps((float *)(ssef_verts)));
|
|
||||||
const avxf tri_bc(_mm256_loadu_ps((float *)(ssef_verts + 1)));
|
|
||||||
|
|
||||||
const avxf AB = tri_ab - avxf_P;
|
|
||||||
const avxf BC = tri_bc - avxf_P;
|
|
||||||
|
|
||||||
const __m256i permute_mask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
|
|
||||||
|
|
||||||
const avxf AB_k = shuffle(AB, permute_mask);
|
|
||||||
const avxf BC_k = shuffle(BC, permute_mask);
|
|
||||||
|
|
||||||
/* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
|
|
||||||
const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
|
|
||||||
|
|
||||||
/* Akx, Aky, Bkx, Bky, Bkx,Bky, Ckx, Cky */
|
|
||||||
const avxf ABBC_kxy = shuffle<0,1,0,1>(AB_k, BC_k);
|
|
||||||
|
|
||||||
const avxf Sxy(Sy, Sx, Sy, Sx);
|
|
||||||
|
|
||||||
/* Ax, Ay, Bx, By, Bx, By, Cx, Cy */
|
|
||||||
const avxf ABBC_xy = nmadd(ABBC_kz, Sxy, ABBC_kxy);
|
|
||||||
|
|
||||||
float ABBC_kz_array[8];
|
|
||||||
_mm256_storeu_ps((float*)&ABBC_kz_array, ABBC_kz);
|
|
||||||
|
|
||||||
const float A_kz = ABBC_kz_array[0];
|
|
||||||
const float B_kz = ABBC_kz_array[2];
|
|
||||||
const float C_kz = ABBC_kz_array[6];
|
|
||||||
|
|
||||||
/* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
|
|
||||||
const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
|
|
||||||
|
|
||||||
const avxf neg_mask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
|
|
||||||
|
|
||||||
/* W U V
|
|
||||||
* (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
|
|
||||||
*/
|
|
||||||
const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, neg_mask /* Dont care */);
|
|
||||||
|
|
||||||
const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ neg_mask;
|
|
||||||
|
|
||||||
/* Calculate scaled barycentric coordinates. */
|
|
||||||
float WUVW_array[4];
|
|
||||||
_mm_storeu_ps((float*)&WUVW_array, _mm256_castps256_ps128 (WUVWnegWUVW));
|
|
||||||
|
|
||||||
const float W = WUVW_array[0];
|
|
||||||
const float U = WUVW_array[1];
|
|
||||||
const float V = WUVW_array[2];
|
|
||||||
|
|
||||||
const int WUVW_mask = 0x7 & _mm256_movemask_ps(WUVWnegWUVW);
|
|
||||||
const int WUVW_zero = 0x7 & _mm256_movemask_ps(_mm256_cmp_ps(WUVWnegWUVW,
|
|
||||||
_mm256_setzero_ps(), 0));
|
|
||||||
|
|
||||||
if(!((WUVW_mask == 7) || (WUVW_mask == 0)) && ((WUVW_mask | WUVW_zero) != 7)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
|
# define dot3(a, b) dot(a, b)
|
||||||
|
const float3 P = ray_P;
|
||||||
|
const float3 dir = ray_dir;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Calculate vertices relative to ray origin. */
|
/* Calculate vertices relative to ray origin. */
|
||||||
const float3 A = make_float3(tri_a.x - ray_P.x, tri_a.y - ray_P.y, tri_a.z - ray_P.z);
|
const float3 v0 = tri_c - P;
|
||||||
const float3 B = make_float3(tri_b.x - ray_P.x, tri_b.y - ray_P.y, tri_b.z - ray_P.z);
|
const float3 v1 = tri_a - P;
|
||||||
const float3 C = make_float3(tri_c.x - ray_P.x, tri_c.y - ray_P.y, tri_c.z - ray_P.z);
|
const float3 v2 = tri_b - P;
|
||||||
|
|
||||||
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
|
/* Calculate triangle edges. */
|
||||||
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
|
const float3 e0 = v2 - v0;
|
||||||
const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz);
|
const float3 e1 = v0 - v1;
|
||||||
|
const float3 e2 = v1 - v2;
|
||||||
|
|
||||||
/* Perform shear and scale of vertices. */
|
/* Perform edge tests. */
|
||||||
const float Ax = A_kx - Sx * A_kz;
|
#ifdef __KERNEL_SSE2__
|
||||||
const float Ay = A_ky - Sy * A_kz;
|
const float3 crossU = cross(v2 + v0, e0);
|
||||||
const float Bx = B_kx - Sx * B_kz;
|
const float3 crossV = cross(v0 + v1, e1);
|
||||||
const float By = B_ky - Sy * B_kz;
|
const float3 crossW = cross(v1 + v2, e2);
|
||||||
const float Cx = C_kx - Sx * C_kz;
|
# ifndef __KERNEL_SSE__
|
||||||
const float Cy = C_ky - Sy * C_kz;
|
const ssef crossX(crossU.x, crossV.x, crossW.x, crossW.x);
|
||||||
|
const ssef crossY(crossU.y, crossV.y, crossW.y, crossW.y);
|
||||||
|
const ssef crossZ(crossU.z, crossV.z, crossW.z, crossW.z);
|
||||||
|
# else
|
||||||
|
ssef crossX(crossU);
|
||||||
|
ssef crossY(crossV);
|
||||||
|
ssef crossZ(crossW);
|
||||||
|
ssef zero = _mm_setzero_ps();
|
||||||
|
_MM_TRANSPOSE4_PS(crossX, crossY, crossZ, zero);
|
||||||
|
# endif
|
||||||
|
const ssef dirX(ray_dir.x);
|
||||||
|
const ssef dirY(ray_dir.y);
|
||||||
|
const ssef dirZ(ray_dir.z);
|
||||||
|
/*const*/ ssef UVWW = crossX*dirX + crossY*dirY + crossZ*dirZ;
|
||||||
|
const float minUVW = reduce_min(UVWW);
|
||||||
|
const float maxUVW = reduce_max(UVWW);
|
||||||
|
#else /* __KERNEL_SSE2__ */
|
||||||
|
const float U = dot(cross(v2 + v0, e0), ray_dir);
|
||||||
|
const float V = dot(cross(v0 + v1, e1), ray_dir);
|
||||||
|
const float W = dot(cross(v1 + v2, e2), ray_dir);
|
||||||
|
const float minUVW = min(U, min(V, W));
|
||||||
|
const float maxUVW = max(U, max(V, W));
|
||||||
|
#endif /* __KERNEL_SSE2__ */
|
||||||
|
|
||||||
/* Calculate scaled barycentric coordinates. */
|
if(minUVW < 0.0f && maxUVW > 0.0f) {
|
||||||
float U = Cx * By - Cy * Bx;
|
|
||||||
float V = Ax * Cy - Ay * Cx;
|
|
||||||
float W = Bx * Ay - By * Ax;
|
|
||||||
if((U < 0.0f || V < 0.0f || W < 0.0f) &&
|
|
||||||
(U > 0.0f || V > 0.0f || W > 0.0f))
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Calculate determinant. */
|
|
||||||
float det = U + V + W;
|
|
||||||
if(UNLIKELY(det == 0.0f)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Calculate scaled z-coordinates of vertices and use them to calculate
|
/* Calculate geometry normal and denominator. */
|
||||||
* the hit distance.
|
const float3 Ng1 = cross(e1, e0);
|
||||||
*/
|
//const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
|
||||||
const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
|
const float3 Ng = Ng1 + Ng1;
|
||||||
const int sign_det = (__float_as_int(det) & 0x80000000);
|
const float den = dot3(Ng, dir);
|
||||||
const float sign_T = xor_signmask(T, sign_det);
|
/* Avoid division by 0. */
|
||||||
|
if(UNLIKELY(den == 0.0f)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Perform depth test. */
|
||||||
|
const float T = dot3(v0, Ng);
|
||||||
|
const int sign_den = (__float_as_int(den) & 0x80000000);
|
||||||
|
const float sign_T = xor_signmask(T, sign_den);
|
||||||
if((sign_T < 0.0f) ||
|
if((sign_T < 0.0f) ||
|
||||||
(sign_T > ray_t * xor_signmask(det, sign_det)))
|
(sign_T > ray_t * xor_signmask(den, sign_den)))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Workaround precision error on CUDA. */
|
const float inv_den = 1.0f / den;
|
||||||
#ifdef __KERNEL_CUDA__
|
#ifdef __KERNEL_SSE2__
|
||||||
if(A == B && B == C) {
|
UVWW *= inv_den;
|
||||||
return false;
|
_mm_store_ss(isect_u, UVWW);
|
||||||
}
|
_mm_store_ss(isect_v, shuffle<1,1,3,3>(UVWW));
|
||||||
|
#else
|
||||||
|
*isect_u = U * inv_den;
|
||||||
|
*isect_v = V * inv_den;
|
||||||
#endif
|
#endif
|
||||||
const float inv_det = 1.0f / det;
|
*isect_t = T * inv_den;
|
||||||
*isect_u = U * inv_det;
|
|
||||||
*isect_v = V * inv_det;
|
|
||||||
*isect_t = T * inv_det;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
#undef IDX
|
#undef dot3
|
||||||
|
}
|
||||||
|
|
||||||
ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D,
|
ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D,
|
||||||
float ray_mint, float ray_maxt,
|
float ray_mint, float ray_maxt,
|
||||||
|
Loading…
Reference in New Issue
Block a user