2016-07-07 10:23:13 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2011-2016, Blender Foundation.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
|
|
|
|
// 3-vector which might be faster.
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
|
2016-07-11 11:44:19 +00:00
|
|
|
int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
int child)
|
|
|
|
{
|
|
|
|
Transform space;
|
2016-07-11 11:44:19 +00:00
|
|
|
const int child_addr = node_addr + child * 3;
|
2016-07-07 10:23:13 +00:00
|
|
|
space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
|
|
|
|
space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
|
|
|
|
space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
|
|
|
|
space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
|
|
|
|
return space;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if !defined(__KERNEL_SSE2__)
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3 P,
|
|
|
|
const float3 idir,
|
|
|
|
const float t,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
2016-07-08 07:41:36 +00:00
|
|
|
float dist[2])
|
2016-07-07 10:23:13 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
/* fetch node data */
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
|
|
|
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
|
|
|
|
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
|
|
|
|
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
|
2016-07-07 10:23:13 +00:00
|
|
|
|
|
|
|
/* intersect ray against child nodes */
|
|
|
|
float c0lox = (node0.x - P.x) * idir.x;
|
|
|
|
float c0hix = (node0.z - P.x) * idir.x;
|
|
|
|
float c0loy = (node1.x - P.y) * idir.y;
|
|
|
|
float c0hiy = (node1.z - P.y) * idir.y;
|
|
|
|
float c0loz = (node2.x - P.z) * idir.z;
|
|
|
|
float c0hiz = (node2.z - P.z) * idir.z;
|
|
|
|
float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
|
|
|
float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
|
|
|
|
|
|
|
float c1lox = (node0.y - P.x) * idir.x;
|
|
|
|
float c1hix = (node0.w - P.x) * idir.x;
|
|
|
|
float c1loy = (node1.y - P.y) * idir.y;
|
|
|
|
float c1hiy = (node1.w - P.y) * idir.y;
|
|
|
|
float c1loz = (node2.y - P.z) * idir.z;
|
|
|
|
float c1hiz = (node2.w - P.z) * idir.z;
|
|
|
|
float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
|
|
|
float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
|
|
|
|
|
|
|
dist[0] = c0min;
|
|
|
|
dist[1] = c1min;
|
|
|
|
|
|
|
|
#ifdef __VISIBILITY_FLAG__
|
|
|
|
/* this visibility test gives a 5% performance hit, how to solve? */
|
|
|
|
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
|
|
|
|
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
|
|
|
|
#else
|
|
|
|
return ((c0max >= c0min)? 1: 0) |
|
|
|
|
((c1max >= c1min)? 2: 0);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3 P,
|
|
|
|
const float3 idir,
|
|
|
|
const float t,
|
|
|
|
const float difl,
|
|
|
|
const float extmax,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
2016-07-08 07:41:36 +00:00
|
|
|
float dist[2])
|
2016-07-07 10:23:13 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
/* fetch node data */
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
|
|
|
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
|
|
|
|
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
|
|
|
|
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
|
2016-07-07 10:23:13 +00:00
|
|
|
|
|
|
|
/* intersect ray against child nodes */
|
|
|
|
float c0lox = (node0.x - P.x) * idir.x;
|
|
|
|
float c0hix = (node0.z - P.x) * idir.x;
|
|
|
|
float c0loy = (node1.x - P.y) * idir.y;
|
|
|
|
float c0hiy = (node1.z - P.y) * idir.y;
|
|
|
|
float c0loz = (node2.x - P.z) * idir.z;
|
|
|
|
float c0hiz = (node2.z - P.z) * idir.z;
|
|
|
|
float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
|
|
|
float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
|
|
|
|
|
|
|
float c1lox = (node0.y - P.x) * idir.x;
|
|
|
|
float c1hix = (node0.w - P.x) * idir.x;
|
|
|
|
float c1loy = (node1.y - P.y) * idir.y;
|
|
|
|
float c1hiy = (node1.w - P.y) * idir.y;
|
|
|
|
float c1loz = (node2.y - P.z) * idir.z;
|
|
|
|
float c1hiz = (node2.w - P.z) * idir.z;
|
|
|
|
float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
|
|
|
float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
|
|
|
|
|
|
|
if(difl != 0.0f) {
|
|
|
|
float hdiff = 1.0f + difl;
|
|
|
|
float ldiff = 1.0f - difl;
|
|
|
|
if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
|
|
|
|
c0min = max(ldiff * c0min, c0min - extmax);
|
|
|
|
c0max = min(hdiff * c0max, c0max + extmax);
|
|
|
|
}
|
|
|
|
if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
|
|
|
|
c1min = max(ldiff * c1min, c1min - extmax);
|
|
|
|
c1max = min(hdiff * c1max, c1max + extmax);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
dist[0] = c0min;
|
|
|
|
dist[1] = c1min;
|
|
|
|
|
|
|
|
#ifdef __VISIBILITY_FLAG__
|
|
|
|
/* this visibility test gives a 5% performance hit, how to solve? */
|
|
|
|
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
|
|
|
|
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
|
|
|
|
#else
|
|
|
|
return ((c0max >= c0min)? 1: 0) |
|
|
|
|
((c1max >= c1min)? 2: 0);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
|
2016-07-07 10:23:13 +00:00
|
|
|
KernelGlobals *kg,
|
|
|
|
const float3 P,
|
|
|
|
const float3 dir,
|
|
|
|
const float t,
|
2016-07-11 11:44:19 +00:00
|
|
|
int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
int child,
|
2016-07-08 07:41:36 +00:00
|
|
|
float dist[2])
|
2016-07-07 10:23:13 +00:00
|
|
|
{
|
2016-07-11 11:44:19 +00:00
|
|
|
Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
|
2016-07-07 10:23:13 +00:00
|
|
|
float3 aligned_dir = transform_direction(&space, dir);
|
|
|
|
float3 aligned_P = transform_point(&space, P);
|
|
|
|
float3 nrdir = -bvh_inverse_direction(aligned_dir);
|
2016-07-11 11:44:19 +00:00
|
|
|
float3 lower_xyz = aligned_P * nrdir;
|
|
|
|
float3 upper_xyz = lower_xyz - nrdir;
|
|
|
|
const float near_x = min(lower_xyz.x, upper_xyz.x);
|
|
|
|
const float near_y = min(lower_xyz.y, upper_xyz.y);
|
|
|
|
const float near_z = min(lower_xyz.z, upper_xyz.z);
|
|
|
|
const float far_x = max(lower_xyz.x, upper_xyz.x);
|
|
|
|
const float far_y = max(lower_xyz.y, upper_xyz.y);
|
|
|
|
const float far_z = max(lower_xyz.z, upper_xyz.z);
|
2016-07-11 16:15:51 +00:00
|
|
|
const float tnear = max4(0.0f, near_x, near_y, near_z);
|
|
|
|
const float tfar = min4(t, far_x, far_y, far_z);
|
|
|
|
*dist = tnear;
|
|
|
|
return tnear <= tfar;
|
2016-07-07 10:23:13 +00:00
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
|
2016-07-07 10:23:13 +00:00
|
|
|
KernelGlobals *kg,
|
|
|
|
const float3 P,
|
|
|
|
const float3 dir,
|
|
|
|
const float t,
|
|
|
|
const float difl,
|
2016-07-11 11:44:19 +00:00
|
|
|
int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
int child,
|
2016-07-08 07:41:36 +00:00
|
|
|
float dist[2])
|
2016-07-07 10:23:13 +00:00
|
|
|
{
|
2016-07-11 11:44:19 +00:00
|
|
|
Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
|
2016-07-07 10:23:13 +00:00
|
|
|
float3 aligned_dir = transform_direction(&space, dir);
|
|
|
|
float3 aligned_P = transform_point(&space, P);
|
|
|
|
float3 nrdir = -bvh_inverse_direction(aligned_dir);
|
|
|
|
float3 tLowerXYZ = aligned_P * nrdir;
|
|
|
|
float3 tUpperXYZ = tLowerXYZ - nrdir;
|
2016-07-11 11:44:19 +00:00
|
|
|
const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
|
|
|
|
const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
|
|
|
|
const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
|
|
|
|
const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
|
|
|
|
const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
|
|
|
|
const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
|
2016-07-11 16:15:51 +00:00
|
|
|
const float tnear = max4(0.0f, near_x, near_y, near_z);
|
|
|
|
const float tfar = min4(t, far_x, far_y, far_z);
|
|
|
|
*dist = tnear;
|
2016-07-07 10:23:13 +00:00
|
|
|
if(difl != 0.0f) {
|
|
|
|
/* TODO(sergey): Same as for QBVH, needs a proper use. */
|
|
|
|
const float round_down = 1.0f - difl;
|
|
|
|
const float round_up = 1.0f + difl;
|
2016-07-11 16:15:51 +00:00
|
|
|
return round_down*tnear <= round_up*tfar;
|
2016-07-07 10:23:13 +00:00
|
|
|
}
|
|
|
|
else {
|
2016-07-11 16:15:51 +00:00
|
|
|
return tnear <= tfar;
|
2016-07-07 10:23:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3 P,
|
|
|
|
const float3 dir,
|
|
|
|
const float3 idir,
|
|
|
|
const float t,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
2016-07-08 07:41:36 +00:00
|
|
|
float dist[2])
|
2016-07-07 10:23:13 +00:00
|
|
|
{
|
|
|
|
int mask = 0;
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
|
|
|
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
|
2016-07-07 10:23:13 +00:00
|
|
|
#ifdef __VISIBILITY_FLAG__
|
|
|
|
if((__float_as_uint(cnodes.x) & visibility))
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
mask |= 1;
|
|
|
|
}
|
|
|
|
}
|
2016-07-11 11:44:19 +00:00
|
|
|
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
|
2016-07-07 10:23:13 +00:00
|
|
|
#ifdef __VISIBILITY_FLAG__
|
|
|
|
if((__float_as_uint(cnodes.y) & visibility))
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
mask |= 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3 P,
|
|
|
|
const float3 dir,
|
|
|
|
const float3 idir,
|
|
|
|
const float t,
|
|
|
|
const float difl,
|
|
|
|
const float extmax,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
2016-07-08 07:41:36 +00:00
|
|
|
float dist[2])
|
2016-07-07 10:23:13 +00:00
|
|
|
{
|
|
|
|
int mask = 0;
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
|
|
|
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
|
2016-07-07 10:23:13 +00:00
|
|
|
#ifdef __VISIBILITY_FLAG__
|
|
|
|
if((__float_as_uint(cnodes.x) & visibility))
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
mask |= 1;
|
|
|
|
}
|
|
|
|
}
|
2016-07-11 11:44:19 +00:00
|
|
|
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
|
2016-07-07 10:23:13 +00:00
|
|
|
#ifdef __VISIBILITY_FLAG__
|
|
|
|
if((__float_as_uint(cnodes.y) & visibility))
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
mask |= 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3 P,
|
|
|
|
const float3 dir,
|
|
|
|
const float3 idir,
|
|
|
|
const float t,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
|
|
|
float dist[2])
|
|
|
|
{
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
|
2016-07-07 10:23:13 +00:00
|
|
|
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
|
|
|
return bvh_unaligned_node_intersect(kg,
|
|
|
|
P,
|
|
|
|
dir,
|
|
|
|
idir,
|
|
|
|
t,
|
2016-07-11 11:44:19 +00:00
|
|
|
node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
visibility,
|
|
|
|
dist);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return bvh_aligned_node_intersect(kg,
|
|
|
|
P,
|
|
|
|
idir,
|
|
|
|
t,
|
2016-07-11 11:44:19 +00:00
|
|
|
node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
visibility,
|
|
|
|
dist);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3 P,
|
|
|
|
const float3 dir,
|
|
|
|
const float3 idir,
|
|
|
|
const float t,
|
|
|
|
const float difl,
|
|
|
|
const float extmax,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
|
|
|
float dist[2])
|
|
|
|
{
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
|
2016-07-07 10:23:13 +00:00
|
|
|
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
|
|
|
return bvh_unaligned_node_intersect_robust(kg,
|
|
|
|
P,
|
|
|
|
dir,
|
|
|
|
idir,
|
|
|
|
t,
|
|
|
|
difl,
|
|
|
|
extmax,
|
2016-07-11 11:44:19 +00:00
|
|
|
node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
visibility,
|
|
|
|
dist);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return bvh_aligned_node_intersect_robust(kg,
|
|
|
|
P,
|
|
|
|
idir,
|
|
|
|
t,
|
|
|
|
difl,
|
|
|
|
extmax,
|
2016-07-11 11:44:19 +00:00
|
|
|
node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
visibility,
|
|
|
|
dist);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else /* !defined(__KERNEL_SSE2__) */
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
int ccl_device_forceinline bvh_aligned_node_intersect(
|
2016-07-07 10:23:13 +00:00
|
|
|
KernelGlobals *kg,
|
|
|
|
const float3& P,
|
|
|
|
const float3& dir,
|
|
|
|
const ssef& tsplat,
|
|
|
|
const ssef Psplat[3],
|
|
|
|
const ssef idirsplat[3],
|
|
|
|
const shuffle_swap_t shufflexyz[3],
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
|
|
|
float dist[2])
|
|
|
|
{
|
|
|
|
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
|
|
|
|
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
|
|
|
|
|
|
|
|
/* fetch node data */
|
2016-07-11 11:44:19 +00:00
|
|
|
const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + node_addr;
|
2016-07-07 10:23:13 +00:00
|
|
|
|
|
|
|
/* intersect ray against child nodes */
|
|
|
|
const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
|
|
|
|
const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
|
|
|
|
const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
|
|
|
|
|
|
|
|
/* calculate { c0min, c1min, -c0max, -c1max} */
|
|
|
|
ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
|
|
|
|
const ssef tminmax = minmax ^ pn;
|
|
|
|
const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
|
|
|
|
|
|
|
|
dist[0] = tminmax[0];
|
|
|
|
dist[1] = tminmax[1];
|
|
|
|
|
|
|
|
int mask = movemask(lrhit);
|
|
|
|
|
|
|
|
# ifdef __VISIBILITY_FLAG__
|
|
|
|
/* this visibility test gives a 5% performance hit, how to solve? */
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
2016-07-07 10:23:13 +00:00
|
|
|
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
|
|
|
|
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
|
|
|
|
return cmask;
|
|
|
|
# else
|
|
|
|
return mask & 3;
|
|
|
|
# endif
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_aligned_node_intersect_robust(
|
2016-07-07 10:23:13 +00:00
|
|
|
KernelGlobals *kg,
|
|
|
|
const float3& P,
|
|
|
|
const float3& dir,
|
|
|
|
const ssef& tsplat,
|
|
|
|
const ssef Psplat[3],
|
|
|
|
const ssef idirsplat[3],
|
|
|
|
const shuffle_swap_t shufflexyz[3],
|
|
|
|
const float difl,
|
|
|
|
const float extmax,
|
|
|
|
const int nodeAddr,
|
|
|
|
const uint visibility,
|
|
|
|
float dist[2])
|
|
|
|
{
|
|
|
|
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
|
|
|
|
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
|
|
|
|
|
|
|
|
/* fetch node data */
|
|
|
|
const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
|
|
|
|
|
|
|
|
/* intersect ray against child nodes */
|
|
|
|
const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
|
|
|
|
const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
|
|
|
|
const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
|
|
|
|
|
|
|
|
/* calculate { c0min, c1min, -c0max, -c1max} */
|
|
|
|
ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
|
|
|
|
const ssef tminmax = minmax ^ pn;
|
|
|
|
|
|
|
|
if(difl != 0.0f) {
|
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
|
|
|
|
float4 *tminmaxview = (float4*)&tminmax;
|
|
|
|
float& c0min = tminmaxview->x, &c1min = tminmaxview->y;
|
|
|
|
float& c0max = tminmaxview->z, &c1max = tminmaxview->w;
|
|
|
|
float hdiff = 1.0f + difl;
|
|
|
|
float ldiff = 1.0f - difl;
|
|
|
|
if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
|
|
|
|
c0min = max(ldiff * c0min, c0min - extmax);
|
|
|
|
c0max = min(hdiff * c0max, c0max + extmax);
|
|
|
|
}
|
|
|
|
if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
|
|
|
|
c1min = max(ldiff * c1min, c1min - extmax);
|
|
|
|
c1max = min(hdiff * c1max, c1max + extmax);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
|
|
|
|
|
|
|
|
dist[0] = tminmax[0];
|
|
|
|
dist[1] = tminmax[1];
|
|
|
|
|
|
|
|
int mask = movemask(lrhit);
|
|
|
|
|
|
|
|
# ifdef __VISIBILITY_FLAG__
|
|
|
|
/* this visibility test gives a 5% performance hit, how to solve? */
|
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
|
|
|
|
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
|
|
|
|
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
|
|
|
|
return cmask;
|
|
|
|
# else
|
|
|
|
return mask & 3;
|
|
|
|
# endif
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3 P,
|
|
|
|
const float3 dir,
|
2016-07-11 16:15:51 +00:00
|
|
|
const ssef& isect_near,
|
|
|
|
const ssef& isect_far,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
|
|
|
float dist[2])
|
|
|
|
{
|
2016-07-11 11:44:19 +00:00
|
|
|
Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
|
|
|
|
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
|
2016-07-07 10:23:13 +00:00
|
|
|
|
|
|
|
float3 aligned_dir0 = transform_direction(&space0, dir),
|
2017-03-10 14:34:54 +00:00
|
|
|
aligned_dir1 = transform_direction(&space1, dir);
|
2016-07-07 10:23:13 +00:00
|
|
|
float3 aligned_P0 = transform_point(&space0, P),
|
|
|
|
aligned_P1 = transform_point(&space1, P);
|
|
|
|
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
|
|
|
|
nrdir1 = -bvh_inverse_direction(aligned_dir1);
|
|
|
|
|
2016-07-11 11:44:19 +00:00
|
|
|
ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
|
2016-07-07 10:23:13 +00:00
|
|
|
aligned_P1.x * nrdir1.x,
|
|
|
|
0.0f, 0.0f),
|
2016-07-11 11:44:19 +00:00
|
|
|
lower_y = ssef(aligned_P0.y * nrdir0.y,
|
2016-07-07 10:23:13 +00:00
|
|
|
aligned_P1.y * nrdir1.y,
|
|
|
|
0.0f,
|
|
|
|
0.0f),
|
2016-07-11 11:44:19 +00:00
|
|
|
lower_z = ssef(aligned_P0.z * nrdir0.z,
|
2016-07-07 10:23:13 +00:00
|
|
|
aligned_P1.z * nrdir1.z,
|
|
|
|
0.0f,
|
|
|
|
0.0f);
|
|
|
|
|
2016-07-11 11:44:19 +00:00
|
|
|
ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
|
|
|
|
upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
|
|
|
|
upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
|
2016-07-07 10:23:13 +00:00
|
|
|
|
2016-07-11 11:44:19 +00:00
|
|
|
ssef tnear_x = min(lower_x, upper_x);
|
|
|
|
ssef tnear_y = min(lower_y, upper_y);
|
|
|
|
ssef tnear_z = min(lower_z, upper_z);
|
|
|
|
ssef tfar_x = max(lower_x, upper_x);
|
|
|
|
ssef tfar_y = max(lower_y, upper_y);
|
|
|
|
ssef tfar_z = max(lower_z, upper_z);
|
2016-07-07 10:23:13 +00:00
|
|
|
|
2016-07-11 16:15:51 +00:00
|
|
|
const ssef tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
|
|
|
|
const ssef tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
|
|
|
|
sseb vmask = tnear <= tfar;
|
|
|
|
dist[0] = tnear.f[0];
|
|
|
|
dist[1] = tnear.f[1];
|
2016-07-07 10:23:13 +00:00
|
|
|
|
|
|
|
int mask = (int)movemask(vmask);
|
|
|
|
|
|
|
|
# ifdef __VISIBILITY_FLAG__
|
|
|
|
/* this visibility test gives a 5% performance hit, how to solve? */
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
2016-07-07 10:23:13 +00:00
|
|
|
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
|
|
|
|
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
|
|
|
|
return cmask;
|
|
|
|
# else
|
|
|
|
return mask & 3;
|
|
|
|
# endif
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3 P,
|
|
|
|
const float3 dir,
|
2016-07-11 16:15:51 +00:00
|
|
|
const ssef& isect_near,
|
|
|
|
const ssef& isect_far,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float difl,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
|
|
|
float dist[2])
|
|
|
|
{
|
2016-07-11 11:44:19 +00:00
|
|
|
Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
|
|
|
|
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
|
2016-07-07 10:23:13 +00:00
|
|
|
|
|
|
|
float3 aligned_dir0 = transform_direction(&space0, dir),
|
2017-03-10 14:34:54 +00:00
|
|
|
aligned_dir1 = transform_direction(&space1, dir);
|
2016-07-07 10:23:13 +00:00
|
|
|
float3 aligned_P0 = transform_point(&space0, P),
|
|
|
|
aligned_P1 = transform_point(&space1, P);
|
|
|
|
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
|
|
|
|
nrdir1 = -bvh_inverse_direction(aligned_dir1);
|
|
|
|
|
2016-07-11 11:44:19 +00:00
|
|
|
ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
|
2016-07-07 10:23:13 +00:00
|
|
|
aligned_P1.x * nrdir1.x,
|
|
|
|
0.0f, 0.0f),
|
2016-07-11 11:44:19 +00:00
|
|
|
lower_y = ssef(aligned_P0.y * nrdir0.y,
|
2016-07-07 10:23:13 +00:00
|
|
|
aligned_P1.y * nrdir1.y,
|
|
|
|
0.0f,
|
|
|
|
0.0f),
|
2016-07-11 11:44:19 +00:00
|
|
|
lower_z = ssef(aligned_P0.z * nrdir0.z,
|
2016-07-07 10:23:13 +00:00
|
|
|
aligned_P1.z * nrdir1.z,
|
|
|
|
0.0f,
|
|
|
|
0.0f);
|
|
|
|
|
2016-07-11 11:44:19 +00:00
|
|
|
ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
|
|
|
|
upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
|
|
|
|
upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
|
2016-07-07 10:23:13 +00:00
|
|
|
|
2016-07-11 11:44:19 +00:00
|
|
|
ssef tnear_x = min(lower_x, upper_x);
|
|
|
|
ssef tnear_y = min(lower_y, upper_y);
|
|
|
|
ssef tnear_z = min(lower_z, upper_z);
|
|
|
|
ssef tfar_x = max(lower_x, upper_x);
|
|
|
|
ssef tfar_y = max(lower_y, upper_y);
|
|
|
|
ssef tfar_z = max(lower_z, upper_z);
|
2016-07-07 10:23:13 +00:00
|
|
|
|
2016-07-11 16:15:51 +00:00
|
|
|
const ssef tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
|
|
|
|
const ssef tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
|
2016-07-07 10:23:13 +00:00
|
|
|
sseb vmask;
|
|
|
|
if(difl != 0.0f) {
|
|
|
|
const float round_down = 1.0f - difl;
|
|
|
|
const float round_up = 1.0f + difl;
|
2016-07-11 16:15:51 +00:00
|
|
|
vmask = round_down*tnear <= round_up*tfar;
|
2016-07-07 10:23:13 +00:00
|
|
|
}
|
|
|
|
else {
|
2016-07-11 16:15:51 +00:00
|
|
|
vmask = tnear <= tfar;
|
2016-07-07 10:23:13 +00:00
|
|
|
}
|
|
|
|
|
2016-07-11 16:15:51 +00:00
|
|
|
dist[0] = tnear.f[0];
|
|
|
|
dist[1] = tnear.f[1];
|
2016-07-07 10:23:13 +00:00
|
|
|
|
|
|
|
int mask = (int)movemask(vmask);
|
|
|
|
|
|
|
|
# ifdef __VISIBILITY_FLAG__
|
|
|
|
/* this visibility test gives a 5% performance hit, how to solve? */
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
|
2016-07-07 10:23:13 +00:00
|
|
|
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
|
|
|
|
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
|
|
|
|
return cmask;
|
|
|
|
# else
|
|
|
|
return mask & 3;
|
|
|
|
# endif
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3& P,
|
|
|
|
const float3& dir,
|
2016-07-11 16:15:51 +00:00
|
|
|
const ssef& isect_near,
|
|
|
|
const ssef& isect_far,
|
2016-07-07 10:23:13 +00:00
|
|
|
const ssef& tsplat,
|
|
|
|
const ssef Psplat[3],
|
|
|
|
const ssef idirsplat[3],
|
|
|
|
const shuffle_swap_t shufflexyz[3],
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
|
|
|
float dist[2])
|
|
|
|
{
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
|
2016-07-07 10:23:13 +00:00
|
|
|
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
|
|
|
return bvh_unaligned_node_intersect(kg,
|
|
|
|
P,
|
|
|
|
dir,
|
2016-07-11 16:15:51 +00:00
|
|
|
isect_near,
|
|
|
|
isect_far,
|
2016-07-11 11:44:19 +00:00
|
|
|
node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
visibility,
|
|
|
|
dist);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return bvh_aligned_node_intersect(kg,
|
|
|
|
P,
|
|
|
|
dir,
|
|
|
|
tsplat,
|
|
|
|
Psplat,
|
|
|
|
idirsplat,
|
|
|
|
shufflexyz,
|
2016-07-11 11:44:19 +00:00
|
|
|
node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
visibility,
|
|
|
|
dist);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-02 12:48:39 +00:00
|
|
|
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
|
2016-07-07 10:23:13 +00:00
|
|
|
const float3& P,
|
|
|
|
const float3& dir,
|
2016-07-11 16:15:51 +00:00
|
|
|
const ssef& isect_near,
|
|
|
|
const ssef& isect_far,
|
2016-07-07 10:23:13 +00:00
|
|
|
const ssef& tsplat,
|
|
|
|
const ssef Psplat[3],
|
|
|
|
const ssef idirsplat[3],
|
|
|
|
const shuffle_swap_t shufflexyz[3],
|
|
|
|
const float difl,
|
|
|
|
const float extmax,
|
2016-07-11 11:44:19 +00:00
|
|
|
const int node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
const uint visibility,
|
|
|
|
float dist[2])
|
|
|
|
{
|
2016-07-11 11:44:19 +00:00
|
|
|
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
|
2016-07-07 10:23:13 +00:00
|
|
|
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
|
|
|
|
return bvh_unaligned_node_intersect_robust(kg,
|
|
|
|
P,
|
|
|
|
dir,
|
2016-07-11 16:15:51 +00:00
|
|
|
isect_near,
|
|
|
|
isect_far,
|
2016-07-07 10:23:13 +00:00
|
|
|
difl,
|
2016-07-11 11:44:19 +00:00
|
|
|
node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
visibility,
|
|
|
|
dist);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return bvh_aligned_node_intersect_robust(kg,
|
|
|
|
P,
|
|
|
|
dir,
|
|
|
|
tsplat,
|
|
|
|
Psplat,
|
|
|
|
idirsplat,
|
|
|
|
shufflexyz,
|
|
|
|
difl,
|
|
|
|
extmax,
|
2016-07-11 11:44:19 +00:00
|
|
|
node_addr,
|
2016-07-07 10:23:13 +00:00
|
|
|
visibility,
|
|
|
|
dist);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* !defined(__KERNEL_SSE2__) */
|