Cycles: Cleanup, variables name

Using camel case for variables is something what didn't came from our original
code, but rather from third party libraries. Let's avoid those as much as possible.
This commit is contained in:
Sergey Sharybin 2016-07-11 13:44:19 +02:00
parent 2ecbc3b777
commit cf82b49a0f
12 changed files with 1071 additions and 902 deletions

@ -17,11 +17,11 @@
// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
// 3-vector which might be faster.
ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
int nodeAddr,
int node_addr,
int child)
{
Transform space;
const int child_addr = nodeAddr + child * 3;
const int child_addr = node_addr + child * 3;
space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
@ -34,16 +34,16 @@ ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 idir,
const float t,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
/* fetch node data */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
/* intersect ray against child nodes */
float c0lox = (node0.x - P.x) * idir.x;
@ -83,16 +83,16 @@ ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
const float t,
const float difl,
const float extmax,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
/* fetch node data */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
/* intersect ray against child nodes */
float c0lox = (node0.x - P.x) * idir.x;
@ -144,26 +144,26 @@ ccl_device_inline bool bvh_unaligned_node_intersect_child(
const float3 P,
const float3 dir,
const float t,
int nodeAddr,
int node_addr,
int child,
float dist[2])
{
Transform space = bvh_unaligned_node_fetch_space(kg, nodeAddr, child);
Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
float3 aligned_dir = transform_direction(&space, dir);
float3 aligned_P = transform_point(&space, P);
float3 nrdir = -bvh_inverse_direction(aligned_dir);
float3 tLowerXYZ = aligned_P * nrdir;
float3 tUpperXYZ = tLowerXYZ - nrdir;
const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x);
const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y);
const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z);
const float tFarX = max(tLowerXYZ.x, tUpperXYZ.x);
const float tFarY = max(tLowerXYZ.y, tUpperXYZ.y);
const float tFarZ = max(tLowerXYZ.z, tUpperXYZ.z);
const float tNear = max4(0.0f, tNearX, tNearY, tNearZ);
const float tFar = min4(t, tFarX, tFarY, tFarZ);
*dist = tNear;
return tNear <= tFar;
float3 lower_xyz = aligned_P * nrdir;
float3 upper_xyz = lower_xyz - nrdir;
const float near_x = min(lower_xyz.x, upper_xyz.x);
const float near_y = min(lower_xyz.y, upper_xyz.y);
const float near_z = min(lower_xyz.z, upper_xyz.z);
const float far_x = max(lower_xyz.x, upper_xyz.x);
const float far_y = max(lower_xyz.y, upper_xyz.y);
const float far_z = max(lower_xyz.z, upper_xyz.z);
const float near = max4(0.0f, near_x, near_y, near_z);
const float far = min4(t, far_x, far_y, far_z);
*dist = near;
return near <= far;
}
ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
@ -172,33 +172,33 @@ ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
const float3 dir,
const float t,
const float difl,
int nodeAddr,
int node_addr,
int child,
float dist[2])
{
Transform space = bvh_unaligned_node_fetch_space(kg, nodeAddr, child);
Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
float3 aligned_dir = transform_direction(&space, dir);
float3 aligned_P = transform_point(&space, P);
float3 nrdir = -bvh_inverse_direction(aligned_dir);
float3 tLowerXYZ = aligned_P * nrdir;
float3 tUpperXYZ = tLowerXYZ - nrdir;
const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x);
const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y);
const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z);
const float tFarX = max(tLowerXYZ.x, tUpperXYZ.x);
const float tFarY = max(tLowerXYZ.y, tUpperXYZ.y);
const float tFarZ = max(tLowerXYZ.z, tUpperXYZ.z);
const float tNear = max4(0.0f, tNearX, tNearY, tNearZ);
const float tFar = min4(t, tFarX, tFarY, tFarZ);
*dist = tNear;
const float near_x = min(tLowerXYZ.x, tUpperXYZ.x);
const float near_y = min(tLowerXYZ.y, tUpperXYZ.y);
const float near_z = min(tLowerXYZ.z, tUpperXYZ.z);
const float far_x = max(tLowerXYZ.x, tUpperXYZ.x);
const float far_y = max(tLowerXYZ.y, tUpperXYZ.y);
const float far_z = max(tLowerXYZ.z, tUpperXYZ.z);
const float near = max4(0.0f, near_x, near_y, near_z);
const float gar = min4(t, far_x, far_y, far_z);
*dist = near;
if(difl != 0.0f) {
/* TODO(sergey): Same as for QBVH, needs a proper use. */
const float round_down = 1.0f - difl;
const float round_up = 1.0f + difl;
return round_down*tNear <= round_up*tFar;
return round_down*near <= round_up*gar;
}
else {
return tNear <= tFar;
return near <= gar;
}
}
@ -207,13 +207,13 @@ ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const float3 dir,
const float3 idir,
const float t,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
int mask = 0;
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 0, &dist[0])) {
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(cnodes.x) & visibility))
#endif
@ -221,7 +221,7 @@ ccl_device_inline int bvh_unaligned_node_intersect(KernelGlobals *kg,
mask |= 1;
}
}
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 1, &dist[1])) {
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(cnodes.y) & visibility))
#endif
@ -239,13 +239,13 @@ ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
const float t,
const float difl,
const float extmax,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
int mask = 0;
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 0, &dist[0])) {
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(cnodes.x) & visibility))
#endif
@ -253,7 +253,7 @@ ccl_device_inline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
mask |= 1;
}
}
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, nodeAddr, 1, &dist[1])) {
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(cnodes.y) & visibility))
#endif
@ -269,18 +269,18 @@ ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
const float3 dir,
const float3 idir,
const float t,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr);
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(kg,
P,
dir,
idir,
t,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -289,7 +289,7 @@ ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
P,
idir,
t,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -302,11 +302,11 @@ ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
const float t,
const float difl,
const float extmax,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr);
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect_robust(kg,
P,
@ -315,7 +315,7 @@ ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
t,
difl,
extmax,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -326,7 +326,7 @@ ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
t,
difl,
extmax,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -341,7 +341,7 @@ int ccl_device_inline bvh_aligned_node_intersect(
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
@ -349,7 +349,7 @@ int ccl_device_inline bvh_aligned_node_intersect(
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
/* fetch node data */
const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + node_addr;
/* intersect ray against child nodes */
const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
@ -368,7 +368,7 @@ int ccl_device_inline bvh_aligned_node_intersect(
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
return cmask;
@ -446,12 +446,12 @@ int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg,
const float3 dir,
const ssef& tnear,
const ssef& tfar,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
Transform space0 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 0);
Transform space1 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 1);
Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);;
@ -460,40 +460,40 @@ int ccl_device_inline bvh_unaligned_node_intersect(KernelGlobals *kg,
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
nrdir1 = -bvh_inverse_direction(aligned_dir1);
ssef tLowerX = ssef(aligned_P0.x * nrdir0.x,
ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
aligned_P1.x * nrdir1.x,
0.0f, 0.0f),
tLowerY = ssef(aligned_P0.y * nrdir0.y,
lower_y = ssef(aligned_P0.y * nrdir0.y,
aligned_P1.y * nrdir1.y,
0.0f,
0.0f),
tLowerZ = ssef(aligned_P0.z * nrdir0.z,
lower_z = ssef(aligned_P0.z * nrdir0.z,
aligned_P1.z * nrdir1.z,
0.0f,
0.0f);
ssef tUpperX = tLowerX - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
tUpperY = tLowerY - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
tUpperZ = tLowerZ - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
ssef tnear_x = min(tLowerX, tUpperX);
ssef tnear_y = min(tLowerY, tUpperY);
ssef tnear_z = min(tLowerZ, tUpperZ);
ssef tfar_x = max(tLowerX, tUpperX);
ssef tfar_y = max(tLowerY, tUpperY);
ssef tfar_z = max(tLowerZ, tUpperZ);
ssef tnear_x = min(lower_x, upper_x);
ssef tnear_y = min(lower_y, upper_y);
ssef tnear_z = min(lower_z, upper_z);
ssef tfar_x = max(lower_x, upper_x);
ssef tfar_y = max(lower_y, upper_y);
ssef tfar_z = max(lower_z, upper_z);
const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
sseb vmask = tNear <= tFar;
dist[0] = tNear.f[0];
dist[1] = tNear.f[1];
const ssef near = max4(tnear_x, tnear_y, tnear_z, tnear);
const ssef far = min4(tfar_x, tfar_y, tfar_z, tfar);
sseb vmask = near <= far;
dist[0] = near.f[0];
dist[1] = near.f[1];
int mask = (int)movemask(vmask);
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
return cmask;
@ -508,12 +508,12 @@ int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
const ssef& tnear,
const ssef& tfar,
const float difl,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
Transform space0 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 0);
Transform space1 = bvh_unaligned_node_fetch_space(kg, nodeAddr, 1);
Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);;
@ -522,49 +522,49 @@ int ccl_device_inline bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
nrdir1 = -bvh_inverse_direction(aligned_dir1);
ssef tLowerX = ssef(aligned_P0.x * nrdir0.x,
ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
aligned_P1.x * nrdir1.x,
0.0f, 0.0f),
tLowerY = ssef(aligned_P0.y * nrdir0.y,
lower_y = ssef(aligned_P0.y * nrdir0.y,
aligned_P1.y * nrdir1.y,
0.0f,
0.0f),
tLowerZ = ssef(aligned_P0.z * nrdir0.z,
lower_z = ssef(aligned_P0.z * nrdir0.z,
aligned_P1.z * nrdir1.z,
0.0f,
0.0f);
ssef tUpperX = tLowerX - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
tUpperY = tLowerY - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
tUpperZ = tLowerZ - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
ssef tnear_x = min(tLowerX, tUpperX);
ssef tnear_y = min(tLowerY, tUpperY);
ssef tnear_z = min(tLowerZ, tUpperZ);
ssef tfar_x = max(tLowerX, tUpperX);
ssef tfar_y = max(tLowerY, tUpperY);
ssef tfar_z = max(tLowerZ, tUpperZ);
ssef tnear_x = min(lower_x, upper_x);
ssef tnear_y = min(lower_y, upper_y);
ssef tnear_z = min(lower_z, upper_z);
ssef tfar_x = max(lower_x, upper_x);
ssef tfar_y = max(lower_y, upper_y);
ssef tfar_z = max(lower_z, upper_z);
const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
const ssef near = max4(tnear_x, tnear_y, tnear_z, tnear);
const ssef far = min4(tfar_x, tfar_y, tfar_z, tfar);
sseb vmask;
if(difl != 0.0f) {
const float round_down = 1.0f - difl;
const float round_up = 1.0f + difl;
vmask = round_down*tNear <= round_up*tFar;
vmask = round_down*near <= round_up*far;
}
else {
vmask = tNear <= tFar;
vmask = near <= far;
}
dist[0] = tNear.f[0];
dist[1] = tNear.f[1];
dist[0] = near.f[0];
dist[1] = near.f[1];
int mask = (int)movemask(vmask);
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
return cmask;
@ -582,18 +582,18 @@ ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr);
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(kg,
P,
dir,
tnear,
tfar,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -605,7 +605,7 @@ ccl_device_inline int bvh_node_intersect(KernelGlobals *kg,
Psplat,
idirsplat,
shufflexyz,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -622,11 +622,11 @@ ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
const shuffle_swap_t shufflexyz[3],
const float difl,
const float extmax,
const int nodeAddr,
const int node_addr,
const uint visibility,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, nodeAddr);
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect_robust(kg,
P,
@ -634,7 +634,7 @@ ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
tnear,
tfar,
difl,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -648,7 +648,7 @@ ccl_device_inline int bvh_node_intersect_robust(KernelGlobals *kg,
shufflexyz,
difl,
extmax,
nodeAddr,
node_addr,
visibility,
dist);
}

@ -49,12 +49,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
*/
/* traversal stack in CUDA thread-local memory */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
int traversal_stack[BVH_STACK_SIZE];
traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int stack_ptr = 0;
int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
const float tmax = ray->t;
@ -102,10 +102,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
int nodeAddrChild1, traverse_mask;
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_ahild1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
@ -115,7 +115,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
idir,
isect_t,
nodeAddr,
node_addr,
PATH_RAY_SHADOW,
dist);
#else // __KERNEL_SSE2__
@ -130,60 +130,59 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat,
idirsplat,
shufflexyz,
nodeAddr,
node_addr,
PATH_RAY_SHADOW,
dist);
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.z);
nodeAddrChild1 = __float_as_int(cnodes.w);
node_addr = __float_as_int(cnodes.z);
node_addr_ahild1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool closestChild1 = (dist[1] < dist[0]);
if(closestChild1) {
int tmp = nodeAddr;
nodeAddr = nodeAddrChild1;
nodeAddrChild1 = tmp;
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_ahild1;
node_addr_ahild1 = tmp;
}
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = nodeAddrChild1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = node_addr_ahild1;
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
nodeAddr = nodeAddrChild1;
node_addr = node_addr_ahild1;
}
else if(traverse_mask == 0) {
/* Neither child was intersected. */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
}
}
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
int primAddr = __float_as_int(leaf.x);
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
if(prim_addr >= 0) {
#endif
const int primAddr2 = __float_as_int(leaf.y);
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
/* pop */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
/* primitive intersection */
while(primAddr < primAddr2) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
while(prim_addr < prim_addr2) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
@ -193,22 +192,57 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
hit = triangle_intersect(kg,
&isect_precalc,
isect_array,
P,
PATH_RAY_SHADOW,
object,
prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
PATH_RAY_SHADOW,
object,
prim_addr);
break;
}
#endif
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
else
hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
isect_array,
P,
dir,
PATH_RAY_SHADOW,
object,
prim_addr,
ray->time,
type,
NULL,
0, 0);
}
else {
hit = bvh_curve_intersect(kg,
isect_array,
P,
dir,
PATH_RAY_SHADOW,
object,
prim_addr,
ray->time,
type,
NULL,
0, 0);
}
break;
}
#endif
@ -260,13 +294,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect_array->t = isect_t;
}
primAddr++;
prim_addr++;
}
}
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
@ -290,18 +324,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
node_addr = kernel_tex_fetch(__object_node, object);
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
if(num_hits_in_instance) {
@ -346,11 +380,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
return false;
}

@ -50,12 +50,12 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
*/
/* traversal stack in CUDA thread-local memory */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
int traversal_stack[BVH_STACK_SIZE];
traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
int stackPtr = 0;
int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object);
int stack_ptr = 0;
int node_addr = kernel_tex_fetch(__object_node, subsurface_object);
/* ray parameters in registers */
float3 P = ray->P;
@ -111,10 +111,10 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
int nodeAddrChild1, traverse_mask;
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
@ -124,7 +124,7 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
idir,
isect_t,
nodeAddr,
node_addr,
PATH_RAY_ALL_VISIBILITY,
dist);
#else // __KERNEL_SSE2__
@ -139,65 +139,64 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat,
idirsplat,
shufflexyz,
nodeAddr,
node_addr,
PATH_RAY_ALL_VISIBILITY,
dist);
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.z);
nodeAddrChild1 = __float_as_int(cnodes.w);
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool closestChild1 = (dist[1] < dist[0]);
if(closestChild1) {
int tmp = nodeAddr;
nodeAddr = nodeAddrChild1;
nodeAddrChild1 = tmp;
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
}
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = nodeAddrChild1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = node_addr_child1;
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
nodeAddr = nodeAddrChild1;
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
/* Neither child was intersected. */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
}
}
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
int primAddr = __float_as_int(leaf.x);
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
int prim_addr = __float_as_int(leaf.x);
const int primAddr2 = __float_as_int(leaf.y);
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* pop */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
primAddr,
prim_addr,
isect_t,
lcg_state,
max_hits);
@ -207,15 +206,15 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
primAddr,
prim_addr,
isect_t,
lcg_state,
max_hits);
@ -228,8 +227,8 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
}
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
}
ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,

@ -59,12 +59,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
*/
/* traversal stack in CUDA thread-local memory */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
int traversal_stack[BVH_STACK_SIZE];
traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int stack_ptr = 0;
int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
float3 P = ray->P;
@ -111,10 +111,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
int nodeAddrChild1, traverse_mask;
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
@ -128,7 +128,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect->t,
difl,
extmax,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -142,7 +142,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
idir,
isect->t,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -162,7 +162,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
shufflexyz,
difl,
extmax,
nodeAddr,
node_addr,
visibility,
dist);
}
@ -180,65 +180,71 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat,
idirsplat,
shufflexyz,
nodeAddr,
node_addr,
visibility,
dist);
}
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.z);
nodeAddrChild1 = __float_as_int(cnodes.w);
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool closestChild1 = (dist[1] < dist[0]);
if(closestChild1) {
int tmp = nodeAddr;
nodeAddr = nodeAddrChild1;
nodeAddrChild1 = tmp;
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
}
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = nodeAddrChild1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = node_addr_child1;
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
nodeAddr = nodeAddrChild1;
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
/* Neither child was intersected. */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
}
BVH_DEBUG_NEXT_STEP();
}
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
int primAddr = __float_as_int(leaf.x);
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
if(prim_addr >= 0) {
#endif
const int primAddr2 = __float_as_int(leaf.y);
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* pop */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
&isect_precalc,
isect,
P,
visibility,
object,
prim_addr))
{
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
@ -257,10 +263,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr))
{
/* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
if(visibility == PATH_RAY_SHADOW_OPAQUE)
@ -281,14 +295,38 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; primAddr < primAddr2; primAddr++) {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
else
hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr,
ray->time,
type,
lcg_state,
difl,
extmax);
}
else {
hit = bvh_curve_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr,
ray->time,
type,
lcg_state,
difl,
extmax);
}
if(hit) {
/* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
@ -312,7 +350,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
@ -334,20 +372,20 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
node_addr = kernel_tex_fetch(__object_node, object);
BVH_DEBUG_NEXT_INSTANCE();
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
@ -372,11 +410,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}

@ -49,12 +49,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
*/
/* traversal stack in CUDA thread-local memory */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
int traversal_stack[BVH_STACK_SIZE];
traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int stack_ptr = 0;
int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
float3 P = ray->P;
@ -99,10 +99,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
int nodeAddrChild1, traverse_mask;
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
@ -112,7 +112,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
idir,
isect->t,
nodeAddr,
node_addr,
visibility,
dist);
#else // __KERNEL_SSE2__
@ -127,84 +127,96 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat,
idirsplat,
shufflexyz,
nodeAddr,
node_addr,
visibility,
dist);
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.z);
nodeAddrChild1 = __float_as_int(cnodes.w);
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool closestChild1 = (dist[1] < dist[0]);
if(closestChild1) {
int tmp = nodeAddr;
nodeAddr = nodeAddrChild1;
nodeAddrChild1 = tmp;
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
}
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = nodeAddrChild1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = node_addr_child1;
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
nodeAddr = nodeAddrChild1;
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
/* Neither child was intersected. */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
}
}
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
int primAddr = __float_as_int(leaf.x);
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
if(prim_addr >= 0) {
#endif
const int primAddr2 = __float_as_int(leaf.y);
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* pop */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
triangle_intersect(kg,
&isect_precalc,
isect,
P,
visibility,
object,
prim_addr);
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
}
break;
}
@ -217,7 +229,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
@ -243,25 +255,25 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
node_addr = kernel_tex_fetch(__object_node, object);
}
else {
/* pop */
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
@ -287,11 +299,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
#endif /* FEATURE(BVH_MOTION) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}

@ -50,12 +50,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
*/
/* traversal stack in CUDA thread-local memory */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
int traversal_stack[BVH_STACK_SIZE];
traversal_stack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int stack_ptr = 0;
int node_addr = kernel_data.bvh.root;
/* ray parameters in registers */
const float tmax = ray->t;
@ -103,10 +103,10 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
int nodeAddrChild1, traverse_mask;
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
@ -116,7 +116,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
idir,
isect_t,
nodeAddr,
node_addr,
visibility,
dist);
#else // __KERNEL_SSE2__
@ -131,70 +131,75 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Psplat,
idirsplat,
shufflexyz,
nodeAddr,
node_addr,
visibility,
dist);
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.z);
nodeAddrChild1 = __float_as_int(cnodes.w);
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool closestChild1 = (dist[1] < dist[0]);
if(closestChild1) {
int tmp = nodeAddr;
nodeAddr = nodeAddrChild1;
nodeAddrChild1 = tmp;
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
}
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = nodeAddrChild1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = node_addr_child1;
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
nodeAddr = nodeAddrChild1;
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
/* Neither child was intersected. */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
}
}
/* if node is leaf, fetch triangle list */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
int primAddr = __float_as_int(leaf.x);
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
if(prim_addr >= 0) {
#endif
const int primAddr2 = __float_as_int(leaf.y);
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
bool hit;
/* pop */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
hit = triangle_intersect(kg,
&isect_precalc,
isect_array,
P,
visibility,
object,
prim_addr);
if(hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@ -224,15 +229,22 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
if(hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@ -268,7 +280,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
@ -296,25 +308,25 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
# endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
node_addr = kernel_tex_fetch(__object_node, object);
}
else {
/* pop */
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
if(num_hits_in_instance) {
@ -357,11 +369,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
#endif /* FEATURE(BVH_MOTION) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
}

@ -68,10 +68,10 @@ ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
const int far_x,
const int far_y,
const int far_z,
const int nodeAddr,
const int node_addr,
ssef *__restrict dist)
{
const int offset = nodeAddr + 1;
const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
@ -89,17 +89,17 @@ ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
#endif
#ifdef __KERNEL_SSE41__
const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear));
const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar));
const sseb vmask = cast(tNear) > cast(tFar);
const ssef near = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear));
const ssef far = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar));
const sseb vmask = cast(near) > cast(far);
int mask = (int)movemask(vmask)^0xf;
#else
const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
const sseb vmask = tNear <= tFar;
const ssef near = max4(tnear_x, tnear_y, tnear_z, tnear);
const ssef far = min4(tfar_x, tfar_y, tfar_z, tfar);
const sseb vmask = near <= far;
int mask = (int)movemask(vmask);
#endif
*dist = tNear;
*dist = near;
return mask;
}
@ -119,11 +119,11 @@ ccl_device_inline int qbvh_aligned_node_intersect_robust(
const int far_x,
const int far_y,
const int far_z,
const int nodeAddr,
const int node_addr,
const float difl,
ssef *__restrict dist)
{
const int offset = nodeAddr + 1;
const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
@ -142,10 +142,10 @@ ccl_device_inline int qbvh_aligned_node_intersect_robust(
const float round_down = 1.0f - difl;
const float round_up = 1.0f + difl;
const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
const sseb vmask = round_down*tNear <= round_up*tFar;
*dist = tNear;
const ssef near = max4(tnear_x, tnear_y, tnear_z, tnear);
const ssef far = min4(tfar_x, tfar_y, tfar_z, tfar);
const sseb vmask = round_down*near <= round_up*far;
*dist = near;
return (int)movemask(vmask);
}
@ -167,10 +167,10 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
const int far_x,
const int far_y,
const int far_z,
const int nodeAddr,
const int node_addr,
ssef *__restrict dist)
{
const int offset = nodeAddr;
const int offset = node_addr;
const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
@ -215,10 +215,10 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
const ssef tfar_x = maxi(tlower_x, tupper_x);
const ssef tfar_y = maxi(tlower_y, tupper_y);
const ssef tfar_z = maxi(tlower_z, tupper_z);
const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z);
const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z);
const sseb vmask = tNear <= tFar;
*dist = tNear;
const ssef near = max4(tnear, tnear_x, tnear_y, tnear_z);
const ssef far = min4(tfar, tfar_x, tfar_y, tfar_z);
const sseb vmask = near <= far;
*dist = near;
return movemask(vmask);
#else
const ssef tnear_x = min(tlower_x, tupper_x);
@ -227,10 +227,10 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
const ssef tfar_x = max(tlower_x, tupper_x);
const ssef tfar_y = max(tlower_y, tupper_y);
const ssef tfar_z = max(tlower_z, tupper_z);
const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z);
const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z);
const sseb vmask = tNear <= tFar;
*dist = tNear;
const ssef near = max4(tnear, tnear_x, tnear_y, tnear_z);
const ssef far = min4(tfar, tfar_x, tfar_y, tfar_z);
const sseb vmask = near <= far;
*dist = near;
return movemask(vmask);
#endif
}
@ -251,11 +251,11 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
const int far_x,
const int far_y,
const int far_z,
const int nodeAddr,
const int node_addr,
const float difl,
ssef *__restrict dist)
{
const int offset = nodeAddr;
const int offset = node_addr;
const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
@ -311,10 +311,10 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
const ssef tfar_y = max(tlower_y, tupper_y);
const ssef tfar_z = max(tlower_z, tupper_z);
#endif
const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z);
const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z);
const sseb vmask = round_down*tNear <= round_up*tFar;
*dist = tNear;
const ssef near = max4(tnear, tnear_x, tnear_y, tnear_z);
const ssef far = min4(tfar, tfar_x, tfar_y, tfar_z);
const sseb vmask = round_down*near <= round_up*far;
*dist = near;
return movemask(vmask);
}
@ -339,10 +339,10 @@ ccl_device_inline int qbvh_node_intersect(
const int far_x,
const int far_y,
const int far_z,
const int nodeAddr,
const int node_addr,
ssef *__restrict dist)
{
const int offset = nodeAddr;
const int offset = node_addr;
const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return qbvh_unaligned_node_intersect(kg,
@ -356,7 +356,7 @@ ccl_device_inline int qbvh_node_intersect(
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
node_addr,
dist);
}
else {
@ -371,7 +371,7 @@ ccl_device_inline int qbvh_node_intersect(
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
node_addr,
dist);
}
}
@ -392,11 +392,11 @@ ccl_device_inline int qbvh_node_intersect_robust(
const int far_x,
const int far_y,
const int far_z,
const int nodeAddr,
const int node_addr,
const float difl,
ssef *__restrict dist)
{
const int offset = nodeAddr;
const int offset = node_addr;
const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return qbvh_unaligned_node_intersect_robust(kg,
@ -410,7 +410,7 @@ ccl_device_inline int qbvh_node_intersect_robust(
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
node_addr,
difl,
dist);
}
@ -426,7 +426,7 @@ ccl_device_inline int qbvh_node_intersect_robust(
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
node_addr,
difl,
dist);
}

@ -45,12 +45,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int stack_ptr = 0;
int node_addr = kernel_data.bvh.root;
/* Ray parameters in registers. */
const float tmax = ray->t;
@ -106,53 +106,53 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) {
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
#endif
ssef dist;
int traverseChild = NODE_INTERSECT(kg,
tnear,
tfar,
int child_mask = NODE_INTERSECT(kg,
tnear,
tfar,
#ifdef __KERNEL_AVX2__
P_idir4,
P_idir4,
#endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
org4,
org4,
# endif
# if BVH_FEATURE(BVH_HAIR)
dir4,
dir4,
# endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
&dist);
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
node_addr,
&dist);
if(traverseChild != 0) {
if(child_mask != 0) {
float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
}
else
#endif
{
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
}
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
if(traverseChild == 0) {
nodeAddr = __float_as_int(cnodes[r]);
int r = __bscf(child_mask);
if(child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@ -161,24 +161,24 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(traverseChild == 0) {
if(child_mask == 0) {
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
node_addr = c0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@ -186,86 +186,86 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
if(child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2]);
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3]);
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
traversal_stack[stack_ptr].dist = d3;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2],
&traversal_stack[stack_ptr - 3]);
}
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
#endif
int primAddr = __float_as_int(leaf.x);
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
if(prim_addr >= 0) {
#endif
int primAddr2 = __float_as_int(leaf.y);
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
/* Primitive intersection. */
while(primAddr < primAddr2) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
while(prim_addr < prim_addr2) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
@ -275,22 +275,57 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
hit = triangle_intersect(kg, &isect_precalc, isect_array, P, PATH_RAY_SHADOW, object, primAddr);
hit = triangle_intersect(kg,
&isect_precalc,
isect_array,
P,
PATH_RAY_SHADOW,
object,
prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
PATH_RAY_SHADOW,
object,
prim_addr);
break;
}
#endif
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
else
hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
isect_array,
P,
dir,
PATH_RAY_SHADOW,
object,
prim_addr,
ray->time,
type,
NULL,
0, 0);
}
else {
hit = bvh_curve_intersect(kg,
isect_array,
P,
dir,
PATH_RAY_SHADOW,
object,
prim_addr,
ray->time,
type,
NULL,
0, 0);
}
break;
}
#endif
@ -342,13 +377,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect_array->t = isect_t;
}
primAddr++;
prim_addr++;
}
}
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
@ -377,19 +412,19 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
node_addr = kernel_tex_fetch(__object_node, object);
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
if(num_hits_in_instance) {
@ -437,11 +472,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
return false;
}

@ -47,12 +47,12 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object);
int stack_ptr = 0;
int node_addr = kernel_tex_fetch(__object_node, subsurface_object);
/* Ray parameters in registers. */
float3 P = ray->P;
@ -116,44 +116,43 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
ssef dist;
int traverseChild = NODE_INTERSECT(kg,
tnear,
tfar,
int child_mask = NODE_INTERSECT(kg,
tnear,
tfar,
#ifdef __KERNEL_AVX2__
P_idir4,
P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
org4,
org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
dir4,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
&dist);
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
node_addr,
&dist);
if(traverseChild != 0) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
if(child_mask != 0) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
}
else
#endif
{
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
}
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
if(traverseChild == 0) {
nodeAddr = __float_as_int(cnodes[r]);
int r = __bscf(child_mask);
if(child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@ -162,24 +161,24 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(traverseChild == 0) {
if(child_mask == 0) {
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
node_addr = c0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@ -187,82 +186,82 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
if(child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2]);
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3]);
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
traversal_stack[stack_ptr].dist = d3;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2],
&traversal_stack[stack_ptr - 3]);
}
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
int primAddr = __float_as_int(leaf.x);
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
int prim_addr = __float_as_int(leaf.x);
int primAddr2 = __float_as_int(leaf.y);
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* Intersect ray against primitive, */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
primAddr,
prim_addr,
isect_t,
lcg_state,
max_hits);
@ -272,15 +271,15 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* Intersect ray against primitive. */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
primAddr,
prim_addr,
isect_t,
lcg_state,
max_hits);
@ -292,8 +291,8 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
break;
}
}
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
}
#undef NODE_INTERSECT

@ -55,14 +55,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
traversalStack[0].dist = -FLT_MAX;
QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
traversal_stack[0].dist = -FLT_MAX;
/* Traversal variables in registers. */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
float nodeDist = -FLT_MAX;
int stack_ptr = 0;
int node_addr = kernel_data.bvh.root;
float node_dist = -FLT_MAX;
/* Ray parameters in registers. */
float3 P = ray->P;
@ -117,22 +117,22 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if(UNLIKELY(nodeDist > isect->t)
if(UNLIKELY(node_dist > isect->t)
#ifdef __VISIBILITY_FLAG__
|| (__float_as_uint(inodes.x) & visibility) == 0)
#endif
{
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
--stack_ptr;
continue;
}
int traverseChild;
int child_mask;
ssef dist;
BVH_DEBUG_NEXT_STEP();
@ -144,48 +144,48 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*
* Need to test if doing opposite would be any faster.
*/
traverseChild = NODE_INTERSECT_ROBUST(kg,
tnear,
tfar,
child_mask = NODE_INTERSECT_ROBUST(kg,
tnear,
tfar,
# ifdef __KERNEL_AVX2__
P_idir4,
P_idir4,
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
org4,
org4,
# endif
# if BVH_FEATURE(BVH_HAIR)
dir4,
dir4,
# endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
difl,
&dist);
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
node_addr,
difl,
&dist);
}
else
#endif /* BVH_HAIR_MINIMUM_WIDTH */
{
traverseChild = NODE_INTERSECT(kg,
tnear,
tfar,
child_mask = NODE_INTERSECT(kg,
tnear,
tfar,
#ifdef __KERNEL_AVX2__
P_idir4,
P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
org4,
org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
dir4,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
&dist);
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
node_addr,
&dist);
}
if(traverseChild != 0) {
if(child_mask != 0) {
float4 cnodes;
/* TODO(sergey): Investigate whether moving cnodes upwards
* gives a speedup (will be different cache pattern but will
@ -193,20 +193,20 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
}
else
#endif
{
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
}
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
int r = __bscf(child_mask);
float d0 = ((float*)&dist)[r];
if(traverseChild == 0) {
nodeAddr = __float_as_int(cnodes[r]);
nodeDist = d0;
if(child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
node_dist = d0;
continue;
}
@ -214,26 +214,26 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(traverseChild == 0) {
if(child_mask == 0) {
if(d1 < d0) {
nodeAddr = c1;
nodeDist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
node_addr = c1;
node_dist = d1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
nodeAddr = c0;
nodeDist = d0;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
node_addr = c0;
node_dist = d0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@ -241,116 +241,131 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
if(child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2]);
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
--stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3]);
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
traversal_stack[stack_ptr].dist = d3;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2],
&traversal_stack[stack_ptr - 3]);
}
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
--stack_ptr;
}
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
#ifdef __VISIBILITY_FLAG__
if(UNLIKELY((nodeDist > isect->t) ||
if(UNLIKELY((node_dist > isect->t) ||
((__float_as_uint(leaf.z) & visibility) == 0)))
#else
if(UNLIKELY((nodeDist > isect->t)))
if(UNLIKELY((node_dist > isect->t)))
#endif
{
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
--stack_ptr;
continue;
}
int primAddr = __float_as_int(leaf.x);
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
if(prim_addr >= 0) {
#endif
int primAddr2 = __float_as_int(leaf.y);
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
--stack_ptr;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
&isect_precalc,
isect,
P,
visibility,
object,
prim_addr)) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
if(visibility == PATH_RAY_SHADOW_OPAQUE)
if(visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
}
}
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr)) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
if(visibility == PATH_RAY_SHADOW_OPAQUE)
if(visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
}
}
}
break;
@ -359,19 +374,44 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; primAddr < primAddr2; primAddr++) {
for(; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_STEP();
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
else
hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = bvh_cardinal_curve_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr,
ray->time,
type,
lcg_state,
difl,
extmax);
}
else {
hit = bvh_curve_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr,
ray->time,
type,
lcg_state,
difl,
extmax);
}
if(hit) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
if(visibility == PATH_RAY_SHADOW_OPAQUE)
if(visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
}
}
}
break;
@ -382,12 +422,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm);
qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
# else
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
@ -408,21 +448,21 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
traversalStack[stackPtr].dist = -FLT_MAX;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
traversal_stack[stack_ptr].dist = -FLT_MAX;
nodeAddr = kernel_tex_fetch(__object_node, object);
node_addr = kernel_tex_fetch(__object_node, object);
BVH_DEBUG_NEXT_INSTANCE();
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
@ -451,12 +491,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}

@ -44,12 +44,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int stack_ptr = 0;
int node_addr = kernel_data.bvh.root;
/* Ray parameters in registers. */
float3 P = ray->P;
@ -102,52 +102,52 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
#ifdef __VISIBILITY_FLAG__
float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if((__float_as_uint(inodes.x) & visibility) == 0) {
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
#endif
ssef dist;
int traverseChild = NODE_INTERSECT(kg,
tnear,
tfar,
int child_mask = NODE_INTERSECT(kg,
tnear,
tfar,
#ifdef __KERNEL_AVX2__
P_idir4,
P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
org4,
org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
dir4,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
&dist);
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
node_addr,
&dist);
if(traverseChild != 0) {
if(child_mask != 0) {
float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
}
else
#endif
{
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
}
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
if(traverseChild == 0) {
nodeAddr = __float_as_int(cnodes[r]);
int r = __bscf(child_mask);
if(child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@ -156,24 +156,24 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(traverseChild == 0) {
if(child_mask == 0) {
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
node_addr = c0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@ -181,102 +181,102 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
if(child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2]);
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3]);
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
traversal_stack[stack_ptr].dist = d3;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2],
&traversal_stack[stack_ptr - 3]);
}
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
int primAddr = __float_as_int(leaf.x);
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
if(prim_addr >= 0) {
#endif
int primAddr2 = __float_as_int(leaf.y);
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
/* Primitive intersection. */
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, prim_addr);
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
}
break;
}
@ -286,7 +286,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
@ -315,25 +315,25 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
node_addr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
@ -362,11 +362,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}

@ -45,12 +45,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int stack_ptr = 0;
int node_addr = kernel_data.bvh.root;
/* Ray parameters in registers. */
const float tmax = ray->t;
@ -106,52 +106,52 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
#ifdef __VISIBILITY_FLAG__
float4 inodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if((__float_as_uint(inodes.x) & visibility) == 0) {
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
#endif
ssef dist;
int traverseChild = NODE_INTERSECT(kg,
tnear,
tfar,
int child_mask = NODE_INTERSECT(kg,
tnear,
tfar,
#ifdef __KERNEL_AVX2__
P_idir4,
P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
org4,
org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
dir4,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
nodeAddr,
&dist);
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
node_addr,
&dist);
if(traverseChild != 0) {
if(child_mask != 0) {
float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
}
else
#endif
{
cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
}
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
if(traverseChild == 0) {
nodeAddr = __float_as_int(cnodes[r]);
int r = __bscf(child_mask);
if(child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@ -160,24 +160,24 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(traverseChild == 0) {
if(child_mask == 0) {
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
node_addr = c0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
continue;
}
}
@ -185,88 +185,88 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there.
*/
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c1;
traversal_stack[stack_ptr].dist = d1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c0;
traversal_stack[stack_ptr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
if(child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2]);
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
continue;
}
/* Four children are hit, push all onto stack and sort 4
* stack items, continue with closest child.
*/
r = __bscf(traverseChild);
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3]);
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
traversal_stack[stack_ptr].dist = d3;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
traversal_stack[stack_ptr].dist = d2;
qbvh_stack_sort(&traversal_stack[stack_ptr],
&traversal_stack[stack_ptr - 1],
&traversal_stack[stack_ptr - 2],
&traversal_stack[stack_ptr - 3]);
}
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
int primAddr = __float_as_int(leaf.x);
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
if(prim_addr >= 0) {
#endif
int primAddr2 = __float_as_int(leaf.y);
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
bool hit;
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
/* Primitive intersection. */
switch(p_type) {
case PRIMITIVE_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, primAddr);
hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, prim_addr);
if(hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@ -295,16 +295,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, primAddr);
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
if(hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@ -337,7 +337,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -primAddr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
@ -368,25 +368,25 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
num_hits_in_instance = 0;
isect_array->t = isect_t;
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
node_addr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
if(stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
@ -434,11 +434,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect_array->t = isect_t;
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
}