Cycles: Use SSE-optimized version of triangle intersection for motion triangles

The title says it all actually. Gives up to 10% speedup on test scenes here
on i7-6800K.

Render times on GPU are unreliable here, but there might be some slowdown
caused by watertight nature of intersections.
This commit is contained in:
Sergey Sharybin 2017-03-23 16:16:05 +01:00
parent a1348dde2e
commit ba8c7d2ba1
11 changed files with 38 additions and 29 deletions

@ -209,9 +209,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
&isect_precalc,
isect_array,
P,
dir,
ray->time,
PATH_RAY_SHADOW,
object,

@ -214,9 +214,9 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
motion_triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
dir,
ray->time,
object,
prim_addr,

@ -267,9 +267,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
&isect_precalc,
isect,
P,
dir,
ray->time,
visibility,
object,

@ -215,9 +215,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
continue;
}
motion_triangle_intersect(kg,
&isect_precalc,
isect,
P,
dir,
ray->time,
visibility,
object,

@ -243,9 +243,9 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
continue;
}
hit = motion_triangle_intersect(kg,
&isect_precalc,
isect_array,
P,
dir,
ray->time,
visibility,
object,

@ -290,9 +290,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
&isect_precalc,
isect_array,
P,
dir,
ray->time,
PATH_RAY_SHADOW,
object,

@ -270,9 +270,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
motion_triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
dir,
ray->time,
object,
prim_addr,

@ -354,9 +354,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
&isect_precalc,
isect,
P,
dir,
ray->time,
visibility,
object,

@ -281,7 +281,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
continue;
}
/* Intersect ray against primitive. */
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
motion_triangle_intersect(kg, &isect_precalc, isect, P, ray->time, visibility, object, prim_addr);
}
break;
}

@ -309,7 +309,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
continue;
}
/* Intersect ray against primitive. */
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
hit = motion_triangle_intersect(kg, &isect_precalc, isect_array, P, ray->time, visibility, object, prim_addr);
if(hit) {
/* Move on to next entry in intersections array. */
isect_array++;

@ -166,14 +166,15 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
* time and do a ray intersection with the resulting triangle.
*/
ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
Intersection *isect,
float3 P,
float3 dir,
float time,
uint visibility,
int object,
int prim_addr)
ccl_device_inline bool motion_triangle_intersect(
KernelGlobals *kg,
const TriangleIsectPrecalc *isect_precalc,
Intersection *isect,
float3 P,
float time,
uint visibility,
int object,
int prim_addr)
{
/* Primitive index for vertex location lookup. */
int prim = kernel_tex_fetch(__prim_index, prim_addr);
@ -185,11 +186,15 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
if(ray_triangle_intersect_uv(P,
dir,
isect->t,
verts[2], verts[0], verts[1],
&u, &v, &t))
if(ray_triangle_intersect(isect_precalc,
P,
isect->t,
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
(ssef*)verts,
#else
verts[0], verts[1], verts[2],
#endif
&u, &v, &t))
{
#ifdef __VISIBILITY_FLAG__
/* Visibility flag test. we do it here under the assumption
@ -217,9 +222,9 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
#ifdef __SUBSURFACE__
ccl_device_inline void motion_triangle_intersect_subsurface(
KernelGlobals *kg,
const TriangleIsectPrecalc *isect_precalc,
SubsurfaceIntersection *ss_isect,
float3 P,
float3 dir,
float time,
int object,
int prim_addr,
@ -237,11 +242,15 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
if(ray_triangle_intersect_uv(P,
dir,
tmax,
verts[2], verts[0], verts[1],
&u, &v, &t))
if(ray_triangle_intersect(isect_precalc,
P,
tmax,
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
(ssef*)verts,
#else
verts[0], verts[1], verts[2],
#endif
&u, &v, &t))
{
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
if(ss_isect->hits[i].t == t) {