forked from bartvdbraak/blender
Cycles: Use SSE-optimized version of triangle intersection for motion triangles
The title says it all actually. Gives up to 10% speedup on test scenes here on i7-6800K. Render times on GPU are unreliable here, but there might be some slowdown caused by watertight nature of intersections.
This commit is contained in:
parent
a1348dde2e
commit
ba8c7d2ba1
@ -209,9 +209,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
case PRIMITIVE_MOTION_TRIANGLE: {
|
||||
hit = motion_triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect_array,
|
||||
P,
|
||||
dir,
|
||||
ray->time,
|
||||
PATH_RAY_SHADOW,
|
||||
object,
|
||||
|
@ -214,9 +214,9 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
motion_triangle_intersect_subsurface(kg,
|
||||
&isect_precalc,
|
||||
ss_isect,
|
||||
P,
|
||||
dir,
|
||||
ray->time,
|
||||
object,
|
||||
prim_addr,
|
||||
|
@ -267,9 +267,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
if(motion_triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
ray->time,
|
||||
visibility,
|
||||
object,
|
||||
|
@ -215,9 +215,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
continue;
|
||||
}
|
||||
motion_triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
ray->time,
|
||||
visibility,
|
||||
object,
|
||||
|
@ -243,9 +243,9 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
continue;
|
||||
}
|
||||
hit = motion_triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect_array,
|
||||
P,
|
||||
dir,
|
||||
ray->time,
|
||||
visibility,
|
||||
object,
|
||||
|
@ -290,9 +290,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
case PRIMITIVE_MOTION_TRIANGLE: {
|
||||
hit = motion_triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect_array,
|
||||
P,
|
||||
dir,
|
||||
ray->time,
|
||||
PATH_RAY_SHADOW,
|
||||
object,
|
||||
|
@ -270,9 +270,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
for(; prim_addr < prim_addr2; prim_addr++) {
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
motion_triangle_intersect_subsurface(kg,
|
||||
&isect_precalc,
|
||||
ss_isect,
|
||||
P,
|
||||
dir,
|
||||
ray->time,
|
||||
object,
|
||||
prim_addr,
|
||||
|
@ -354,9 +354,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
BVH_DEBUG_NEXT_INTERSECTION();
|
||||
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
|
||||
if(motion_triangle_intersect(kg,
|
||||
&isect_precalc,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
ray->time,
|
||||
visibility,
|
||||
object,
|
||||
|
@ -281,7 +281,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
continue;
|
||||
}
|
||||
/* Intersect ray against primitive. */
|
||||
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
|
||||
motion_triangle_intersect(kg, &isect_precalc, isect, P, ray->time, visibility, object, prim_addr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -309,7 +309,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
continue;
|
||||
}
|
||||
/* Intersect ray against primitive. */
|
||||
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
|
||||
hit = motion_triangle_intersect(kg, &isect_precalc, isect_array, P, ray->time, visibility, object, prim_addr);
|
||||
if(hit) {
|
||||
/* Move on to next entry in intersections array. */
|
||||
isect_array++;
|
||||
|
@ -166,14 +166,15 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
|
||||
* time and do a ray intersection with the resulting triangle.
|
||||
*/
|
||||
|
||||
ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
|
||||
Intersection *isect,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
float time,
|
||||
uint visibility,
|
||||
int object,
|
||||
int prim_addr)
|
||||
ccl_device_inline bool motion_triangle_intersect(
|
||||
KernelGlobals *kg,
|
||||
const TriangleIsectPrecalc *isect_precalc,
|
||||
Intersection *isect,
|
||||
float3 P,
|
||||
float time,
|
||||
uint visibility,
|
||||
int object,
|
||||
int prim_addr)
|
||||
{
|
||||
/* Primitive index for vertex location lookup. */
|
||||
int prim = kernel_tex_fetch(__prim_index, prim_addr);
|
||||
@ -185,11 +186,15 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
|
||||
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
||||
/* Ray-triangle intersection, unoptimized. */
|
||||
float t, u, v;
|
||||
if(ray_triangle_intersect_uv(P,
|
||||
dir,
|
||||
isect->t,
|
||||
verts[2], verts[0], verts[1],
|
||||
&u, &v, &t))
|
||||
if(ray_triangle_intersect(isect_precalc,
|
||||
P,
|
||||
isect->t,
|
||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
||||
(ssef*)verts,
|
||||
#else
|
||||
verts[0], verts[1], verts[2],
|
||||
#endif
|
||||
&u, &v, &t))
|
||||
{
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
/* Visibility flag test. we do it here under the assumption
|
||||
@ -217,9 +222,9 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_device_inline void motion_triangle_intersect_subsurface(
|
||||
KernelGlobals *kg,
|
||||
const TriangleIsectPrecalc *isect_precalc,
|
||||
SubsurfaceIntersection *ss_isect,
|
||||
float3 P,
|
||||
float3 dir,
|
||||
float time,
|
||||
int object,
|
||||
int prim_addr,
|
||||
@ -237,11 +242,15 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
|
||||
motion_triangle_vertices(kg, fobject, prim, time, verts);
|
||||
/* Ray-triangle intersection, unoptimized. */
|
||||
float t, u, v;
|
||||
if(ray_triangle_intersect_uv(P,
|
||||
dir,
|
||||
tmax,
|
||||
verts[2], verts[0], verts[1],
|
||||
&u, &v, &t))
|
||||
if(ray_triangle_intersect(isect_precalc,
|
||||
P,
|
||||
tmax,
|
||||
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
|
||||
(ssef*)verts,
|
||||
#else
|
||||
verts[0], verts[1], verts[2],
|
||||
#endif
|
||||
&u, &v, &t))
|
||||
{
|
||||
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if(ss_isect->hits[i].t == t) {
|
||||
|
Loading…
Reference in New Issue
Block a user