2011-04-27 11:58:34 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2011, Blender Foundation.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
#define MBVH_OBJECT_SENTINEL 0x76543210
|
|
|
|
#define MBVH_NODE_SIZE 8
|
|
|
|
#define MBVH_STACK_SIZE 1024
|
|
|
|
#define MBVH_RAY_STACK_SIZE 10000
|
|
|
|
|
|
|
|
typedef struct MBVHTask {
|
|
|
|
int node;
|
|
|
|
int index;
|
|
|
|
int num;
|
|
|
|
int object;
|
|
|
|
} MBVHTask;
|
|
|
|
|
|
|
|
typedef struct MVBHRay {
|
|
|
|
float3 P;
|
|
|
|
float u;
|
|
|
|
float3 idir;
|
|
|
|
float v;
|
|
|
|
float t;
|
|
|
|
int index;
|
|
|
|
int object;
|
|
|
|
|
|
|
|
float3 origP;
|
|
|
|
float3 origD;
|
|
|
|
float tmax;
|
|
|
|
} MBVHRay;
|
|
|
|
|
|
|
|
__device float3 mbvh_inverse_direction(float3 dir)
|
|
|
|
{
|
|
|
|
// Avoid divide by zero (ooeps = exp2f(-80.0f))
|
|
|
|
float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f;
|
|
|
|
float3 idir;
|
|
|
|
|
|
|
|
idir.x = 1.0f / (fabsf(dir.x) > ooeps ? dir.x : copysignf(ooeps, dir.x));
|
|
|
|
idir.y = 1.0f / (fabsf(dir.y) > ooeps ? dir.y : copysignf(ooeps, dir.y));
|
|
|
|
idir.z = 1.0f / (fabsf(dir.z) > ooeps ? dir.z : copysignf(ooeps, dir.z));
|
|
|
|
|
|
|
|
return idir;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device void mbvh_instance_push(KernelGlobals *kg, int object, MBVHRay *ray)
|
|
|
|
{
|
|
|
|
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
|
|
|
|
2012-04-16 08:35:21 +00:00
|
|
|
ray->P = transform_point(&tfm, ray->origP);
|
2011-04-27 11:58:34 +00:00
|
|
|
|
|
|
|
float3 dir = ray->origD;
|
|
|
|
|
|
|
|
if(ray->t != ray->tmax) dir *= ray->t;
|
|
|
|
|
|
|
|
dir = transform_direction(&tfm, dir);
|
|
|
|
ray->idir = mbvh_inverse_direction(normalize(dir));
|
|
|
|
|
|
|
|
if(ray->t != ray->tmax) ray->t = len(dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
__device void mbvh_instance_pop(KernelGlobals *kg, int object, MBVHRay *ray)
|
|
|
|
{
|
|
|
|
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
|
|
|
|
|
|
|
|
if(ray->t != ray->tmax)
|
|
|
|
ray->t = len(transform_direction(&tfm, (1.0f/(ray->idir)) * (ray->t)));
|
|
|
|
|
|
|
|
ray->P = ray->origP;
|
|
|
|
ray->idir = mbvh_inverse_direction(ray->origD);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sven Woop's algorithm */
|
|
|
|
__device void mbvh_triangle_intersect(KernelGlobals *kg, MBVHRay *ray, int object, int triAddr)
|
|
|
|
{
|
|
|
|
float3 P = ray->P;
|
|
|
|
float3 idir = ray->idir;
|
|
|
|
|
|
|
|
/* compute and check intersection t-value */
|
|
|
|
float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+0);
|
|
|
|
float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+1);
|
|
|
|
float3 dir = 1.0f/idir;
|
|
|
|
|
|
|
|
float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
|
|
|
|
float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
|
|
|
|
float t = Oz * invDz;
|
|
|
|
|
|
|
|
if(t > 0.0f && t < ray->t) {
|
|
|
|
/* compute and check barycentric u */
|
|
|
|
float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
|
|
|
|
float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
|
|
|
|
float u = Ox + t*Dx;
|
|
|
|
|
|
|
|
if(u >= 0.0f) {
|
|
|
|
/* compute and check barycentric v */
|
|
|
|
float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+2);
|
|
|
|
float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
|
|
|
|
float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
|
|
|
|
float v = Oy + t*Dy;
|
|
|
|
|
|
|
|
if(v >= 0.0f && u + v <= 1.0f) {
|
|
|
|
/* record intersection */
|
|
|
|
ray->index = triAddr;
|
|
|
|
ray->object = object;
|
|
|
|
ray->u = u;
|
|
|
|
ray->v = v;
|
|
|
|
ray->t = t;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
__device void mbvh_node_intersect(KernelGlobals *kg, __m128 *traverseChild,
|
|
|
|
__m128 *tHit, float3 P, float3 idir, float t, int nodeAddr)
|
|
|
|
{
|
|
|
|
/* X axis */
|
|
|
|
const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+0);
|
|
|
|
const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x));
|
|
|
|
const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+1);
|
|
|
|
const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x));
|
|
|
|
|
|
|
|
__m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
|
|
|
|
__m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
|
|
|
|
|
|
|
|
/* Y axis */
|
|
|
|
const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+2);
|
|
|
|
const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y));
|
|
|
|
const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+3);
|
|
|
|
const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y));
|
|
|
|
|
|
|
|
tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
|
|
|
|
tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
|
|
|
|
|
|
|
|
/* Z axis */
|
|
|
|
const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+4);
|
|
|
|
const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z));
|
|
|
|
const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+5);
|
|
|
|
const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z));
|
|
|
|
|
|
|
|
tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
|
|
|
|
tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
|
|
|
|
|
|
|
|
/* compare and get mask */
|
|
|
|
*traverseChild = _mm_cmple_ps(tmin, tmax);
|
|
|
|
|
|
|
|
/* get distance XXX probably wrong */
|
|
|
|
*tHit = tmin;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mbvh_sort_by_length(int id[4], float len[4])
|
|
|
|
{
|
|
|
|
for(int i = 1; i < 4; i++) {
|
|
|
|
int j = i - 1;
|
|
|
|
|
|
|
|
while(j >= 0 && len[j] > len[j+1]) {
|
|
|
|
swap(len[j], len[j+1]);
|
|
|
|
swap(id[j], id[j+1]);
|
|
|
|
j--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
__device void scene_intersect(KernelGlobals *kg, MBVHRay *rays, int numrays)
|
|
|
|
{
|
|
|
|
/* traversal stacks */
|
|
|
|
MBVHTask task_stack[MBVH_STACK_SIZE];
|
|
|
|
int active_ray_stacks[4][MBVH_RAY_STACK_SIZE];
|
|
|
|
int num_task, num_active[4] = {0, 0, 0, 0};
|
|
|
|
__m128i one_mm = _mm_set1_epi32(1);
|
|
|
|
|
|
|
|
/* push root node task on stack */
|
|
|
|
task_stack[0].node = kernel_data.bvh.root;
|
|
|
|
task_stack[0].index = 0;
|
|
|
|
task_stack[0].num = numrays;
|
|
|
|
task_stack[0].object = ~0;
|
|
|
|
num_task = 1;
|
|
|
|
|
|
|
|
/* push all rays in first SIMD lane */
|
|
|
|
for(int i = 0; i < numrays; i++)
|
|
|
|
active_ray_stacks[0][i] = i;
|
|
|
|
num_active[0] = numrays;
|
|
|
|
|
|
|
|
while(num_task >= 1) {
|
|
|
|
/* pop task */
|
|
|
|
MBVHTask task = task_stack[--num_task];
|
|
|
|
|
|
|
|
if(task.node == MBVH_OBJECT_SENTINEL) {
|
|
|
|
/* instance pop */
|
|
|
|
|
|
|
|
/* pop rays from stack */
|
|
|
|
num_active[task.index] -= task.num;
|
|
|
|
int ray_offset = num_active[task.index];
|
|
|
|
|
|
|
|
/* transform rays */
|
|
|
|
for(int i = 0; i < task.num; i++) {
|
|
|
|
MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]];
|
|
|
|
mbvh_instance_pop(kg, task.object, ray);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if(task.node >= 0) {
|
|
|
|
/* inner node? */
|
|
|
|
|
|
|
|
/* pop rays from stack*/
|
|
|
|
num_active[task.index] -= task.num;
|
|
|
|
int ray_offset = num_active[task.index];
|
|
|
|
|
|
|
|
/* initialze simd values */
|
|
|
|
__m128i num_active_mm = _mm_load_si128((__m128i*)num_active);
|
|
|
|
__m128 len_mm = _mm_set_ps1(0.0f);
|
|
|
|
|
|
|
|
for(int i = 0; i < task.num; i++) {
|
|
|
|
int rayid = active_ray_stacks[task.index][ray_offset + i];
|
|
|
|
MVBHRay *ray = rays + rayid;
|
|
|
|
|
|
|
|
/* intersect 4 QBVH node children */
|
|
|
|
__m128 result;
|
|
|
|
__m128 thit;
|
|
|
|
|
|
|
|
mbvh_node_intersect(kg, &result, &thit, ray->P, ray->idir, ray->t, task.node);
|
|
|
|
|
|
|
|
/* update length for sorting */
|
|
|
|
len_mm = _mm_add_ps(len_mm, _mm_and_ps(thit, result));
|
|
|
|
|
|
|
|
/* push rays on stack */
|
|
|
|
for(int j = 0; j < 4; j++)
|
|
|
|
active_ray_stacks[j][num_active[j]] = rayid;
|
|
|
|
|
|
|
|
/* update num active */
|
|
|
|
__m128i resulti = _mm_and_si128(*((__m128i*)&result), one_mm);
|
|
|
|
num_active_mm = _mm_add_epi32(resulti, num_active_mm);
|
|
|
|
_mm_store_si128((__m128i*)num_active, num_active_mm);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(num_active[0] || num_active[1] || num_active[2] || num_active[3]) {
|
|
|
|
/* load child node addresses */
|
|
|
|
float4 cnodes = kernel_tex_fetch(__bvh_nodes, task.node);
|
|
|
|
int child[4] = {
|
|
|
|
__float_as_int(cnodes.x),
|
|
|
|
__float_as_int(cnodes.y),
|
|
|
|
__float_as_int(cnodes.z),
|
|
|
|
__float_as_int(cnodes.w)};
|
|
|
|
|
|
|
|
/* sort nodes by average intersection distance */
|
|
|
|
int ids[4] = {0, 1, 2, 3};
|
|
|
|
float len[4];
|
|
|
|
|
|
|
|
_mm_store_ps(len, len_mm);
|
|
|
|
mbvh_sort_by_length(ids, len);
|
|
|
|
|
|
|
|
/* push new tasks on stack */
|
|
|
|
for(int j = 0; j < 4; j++) {
|
|
|
|
if(num_active[j]) {
|
|
|
|
int id = ids[j];
|
|
|
|
|
|
|
|
task_stack[num_task].node = child[id];
|
|
|
|
task_stack[num_task].index = id;
|
|
|
|
task_stack[num_task].num = num_active[id];
|
|
|
|
task_stack[num_task].object = task.object;
|
|
|
|
num_task++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* fetch leaf node data */
|
|
|
|
float4 leaf = kernel_tex_fetch(__bvh_nodes, (-task.node-1)*MBVH_NODE_SIZE+(MBVH_NODE_SIZE-2));
|
|
|
|
int triAddr = __float_as_int(leaf.x);
|
|
|
|
int triAddr2 = __float_as_int(leaf.y);
|
|
|
|
|
|
|
|
/* pop rays from stack*/
|
|
|
|
num_active[task.index] -= task.num;
|
|
|
|
int ray_offset = num_active[task.index];
|
|
|
|
|
|
|
|
/* triangles */
|
|
|
|
if(triAddr >= 0) {
|
|
|
|
int i, numq = (task.num >> 2) << 2;
|
|
|
|
|
|
|
|
/* SIMD ray leaf intersection */
|
|
|
|
for(i = 0; i < numq; i += 4) {
|
|
|
|
MBVHRay *ray4[4] = {
|
|
|
|
&rays[active_ray_stacks[task.index][ray_offset + i + 0]],
|
|
|
|
&rays[active_ray_stacks[task.index][ray_offset + i + 1]],
|
|
|
|
&rays[active_ray_stacks[task.index][ray_offset + i + 2]],
|
|
|
|
&rays[active_ray_stacks[task.index][ray_offset + i + 3]]};
|
|
|
|
|
|
|
|
/* load SoA */
|
|
|
|
|
|
|
|
while(triAddr < triAddr2) {
|
|
|
|
mbvh_triangle_intersect(ray4[0], task.object, task.node);
|
|
|
|
mbvh_triangle_intersect(ray4[1], task.object, task.node);
|
|
|
|
mbvh_triangle_intersect(ray4[2], task.object, task.node);
|
|
|
|
mbvh_triangle_intersect(ray4[3], task.object, task.node);
|
|
|
|
triAddr++;
|
|
|
|
|
|
|
|
/* some shadow ray optim could be done by setting t=0 */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* store AoS */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mono ray leaf intersection */
|
|
|
|
for(; i < task.num; i++) {
|
|
|
|
MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]];
|
|
|
|
|
|
|
|
while(triAddr < triAddr2) {
|
|
|
|
mbvh_triangle_intersect(kg, ray, task.object, task.node);
|
|
|
|
triAddr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* instance push */
|
|
|
|
int object = -triAddr-1;
|
|
|
|
int node = triAddr;
|
|
|
|
|
|
|
|
/* push instance pop task */
|
|
|
|
task_stack[num_task].node = MBVH_OBJECT_SENTINEL;
|
|
|
|
task_stack[num_task].index = task.index;
|
|
|
|
task_stack[num_task].num = task.num;
|
|
|
|
task_stack[num_task].object = object;
|
|
|
|
num_task++;
|
|
|
|
|
|
|
|
num_active[task.index] += task.num;
|
|
|
|
|
|
|
|
/* push node task */
|
|
|
|
task_stack[num_task].node = node;
|
|
|
|
task_stack[num_task].index = task.index;
|
|
|
|
task_stack[num_task].num = task.num;
|
|
|
|
task_stack[num_task].object = object;
|
|
|
|
num_task++;
|
|
|
|
|
|
|
|
for(int i = 0; i < task.num; i++) {
|
|
|
|
int rayid = active_ray_stacks[task.index][ray_offset + i];
|
|
|
|
|
|
|
|
/* push on stack for last task */
|
|
|
|
active_ray_stacks[task.index][num_active[task.index]] = rayid;
|
|
|
|
num_active[task.index]++;
|
|
|
|
|
|
|
|
/* transform ray */
|
|
|
|
MBVHRay *ray = &rays[rayid];
|
|
|
|
mbvh_instance_push(kg, object, ray);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
__device void mbvh_set_ray(MBVHRay *rays, int i, Ray *ray, float tmax)
|
|
|
|
{
|
|
|
|
MBVHRay *mray = &rays[i];
|
|
|
|
|
|
|
|
/* ray parameters in registers */
|
|
|
|
mray->P = ray->P;
|
|
|
|
mray->idir = mbvh_inverse_direction(ray->D);
|
|
|
|
mray->t = tmax;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device bool mbvh_get_intersection(MVBHRay *rays, int i, Intersection *isect, float tmax)
|
|
|
|
{
|
|
|
|
MBVHRay *mray = &rays[i];
|
|
|
|
|
|
|
|
if(mray->t == tmax)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
isect->t = mray->t;
|
|
|
|
isect->u = mray->u;
|
|
|
|
isect->v = mray->v;
|
|
|
|
isect->index = mray->index;
|
|
|
|
isect->object = mray->object;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device bool mbvh_get_shadow(MBVHRay *rays, int i, float tmax)
|
|
|
|
{
|
|
|
|
return (rays[i].t == tmax);
|
|
|
|
}
|
|
|
|
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|