Cycles: Use dedicated BVH for subsurface ray casting

This commit makes it so casting subsurface rays will totally ignore all
the BVH nodes and primitives which do not belong to a current object,
making it much simpler traversal code and reduces number of intersection
tests.

Reviewers: brecht, juicyfruit, dingto, lukasstockner97

Differential Revision: https://developer.blender.org/D1823
This commit is contained in:
Sergey Sharybin 2016-02-25 15:12:11 +01:00
parent 712a257994
commit 0e47e0cc9e
8 changed files with 161 additions and 334 deletions

@ -233,7 +233,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
Mesh *mesh = ob->mesh;
BVH *bvh = mesh->bvh;
if(!mesh->transform_applied) {
if(mesh->need_build_bvh()) {
if(mesh_map.find(mesh) == mesh_map.end()) {
prim_index_size += bvh->pack.prim_index.size();
tri_woop_size += bvh->pack.tri_woop.size();
@ -268,9 +268,10 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
foreach(Object *ob, objects) {
Mesh *mesh = ob->mesh;
/* if mesh transform is applied, that means it's already in the top
* level BVH, and we don't need to merge it in */
if(mesh->transform_applied) {
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
if(!mesh->need_build_bvh()) {
pack.object_node[object_offset++] = 0;
continue;
}

@ -180,7 +180,7 @@ void BVHBuild::add_references(BVHRange& root)
foreach(Object *ob, objects) {
if(params.top_level) {
if(ob->mesh->transform_applied) {
if(!ob->mesh->is_instanced()) {
num_alloc_references += ob->mesh->triangles.size();
num_alloc_references += count_curve_segments(ob->mesh);
}
@ -201,7 +201,7 @@ void BVHBuild::add_references(BVHRange& root)
foreach(Object *ob, objects) {
if(params.top_level) {
if(ob->mesh->transform_applied)
if(!ob->mesh->is_instanced())
add_reference_mesh(bounds, center, ob->mesh, i);
else
add_reference_object(bounds, center, ob, i);

@ -91,27 +91,9 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__INSTANCING__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing
#define BVH_FUNCTION_FEATURES BVH_INSTANCING
#include "geom_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__HAIR__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
#include "geom_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
#include "geom_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion
#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
#define BVH_FUNCTION_FEATURES BVH_MOTION
#include "geom_bvh_subsurface.h"
#endif
@ -269,17 +251,6 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
if(kernel_data.bvh.have_curves) {
return bvh_intersect_subsurface_hair_motion(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
}
#endif /* __HAIR__ */
return bvh_intersect_subsurface_motion(kg,
ray,
ss_isect,
@ -288,56 +259,12 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
max_hits);
}
#endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
if(kernel_data.bvh.have_curves) {
return bvh_intersect_subsurface_hair(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
}
#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
#ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing) {
return bvh_intersect_subsurface_instancing(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
}
#endif /* __INSTANCING__ */
return bvh_intersect_subsurface(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
#else /* __KERNEL_CPU__ */
#ifdef __INSTANCING__
return bvh_intersect_subsurface_instancing(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
#else
return bvh_intersect_subsurface(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
#endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */
}
#endif

@ -25,7 +25,6 @@
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
*
* BVH_INSTANCING: object instancing
* BVH_MOTION: motion blur rendering
*
*/
@ -41,17 +40,16 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
* - test if pushing distance on the stack helps (for non shadow rays)
* - separate version for shadow rays
* - likely and unlikely for if() statements
* - SSE for hair
* - test restrict attribute for pointers
*/
/* traversal stack in CUDA thread-local memory */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
/* traversal variables in registers */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object);
/* ray parameters in registers */
float3 P = ray->P;
@ -62,14 +60,28 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ss_isect->num_hits = 0;
const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
if(!(object_flag & SD_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
Transform ob_itfm;
bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
&isect_t,
&ob_itfm);
#else
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
#endif
object = subsurface_object;
}
#if defined(__KERNEL_SSE2__)
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
ssef Psplat[3], idirsplat[3];
shuffle_swap_t shufflexyz[3];
@ -190,133 +202,56 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
#endif
const int primAddr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const int primAddr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* pop */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
/* pop */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
/* only primitives from the same object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
/* only primitives from the same object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
#endif
default: {
break;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
}
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) {
object = subsurface_object;
#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
#else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
++stackPtr;
kernel_assert(stackPtr < BVH_STACK_SIZE);
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
else {
/* pop */
nodeAddr = traversalStack[stackPtr];
--stackPtr;
#endif
default: {
break;
}
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
}

@ -21,7 +21,6 @@
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
*
* BVH_INSTANCING: object instancing
* BVH_MOTION: motion blur rendering
*
*/
@ -47,7 +46,7 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Traversal variables in registers. */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
int nodeAddr = kernel_tex_fetch(__object_node, subsurface_object);
/* Ray parameters in registers. */
float3 P = ray->P;
@ -58,9 +57,23 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
ss_isect->num_hits = 0;
const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
if(!(object_flag & SD_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
Transform ob_itfm;
bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
&isect_t,
&ob_itfm);
#else
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
#endif
object = subsurface_object;
}
#ifndef __KERNEL_SSE41__
if(!isfinite(P.x)) {
@ -206,137 +219,54 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
int primAddr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(primAddr >= 0) {
#endif
int primAddr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
int primAddr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* Intersect ray against primitive, */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
/* Only primitives from the same object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object) {
continue;
}
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* Intersect ray against primitive, */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* Intersect ray against primitive. */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
/* Only primitives from the same object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object) {
continue;
}
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
case PRIMITIVE_MOTION_TRIANGLE: {
/* Intersect ray against primitive. */
for(; primAddr < primAddr2; primAddr++) {
kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
#endif
default:
break;
break;
}
}
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) {
object = subsurface_object;
#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
#else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
kernel_assert(stackPtr < BVH_QSTACK_SIZE);
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
default:
break;
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
#else
org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
}

@ -97,6 +97,7 @@ Mesh::Mesh()
curve_attributes.curve_mesh = this;
has_volume = false;
has_surface_bssrdf = false;
}
Mesh::~Mesh()
@ -490,7 +491,7 @@ void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total
compute_bounds();
if(!transform_applied) {
if(need_build_bvh()) {
string msg = "Updating Mesh BVH ";
if(name == "")
msg += string_printf("%u/%u", (uint)(n+1), (uint)total);
@ -550,6 +551,21 @@ bool Mesh::has_motion_blur() const
curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)));
}
bool Mesh::need_build_bvh() const
{
return !transform_applied || has_surface_bssrdf;
}
bool Mesh::is_instanced() const
{
/* Currently we treat subsurface objects as instanced.
*
* While it might be not very optimal for ray traversal, it avoids having
* duplicated BVH in the memory, saving quite some space.
*/
return !transform_applied || has_surface_bssrdf;
}
/* Mesh Manager */
MeshManager::MeshManager()
@ -1142,10 +1158,14 @@ void MeshManager::device_update_flags(Device * /*device*/,
/* update flags */
foreach(Mesh *mesh, scene->meshes) {
mesh->has_volume = false;
foreach(uint shader, mesh->used_shaders) {
if(scene->shaders[shader]->has_volume) {
foreach(uint shader_index, mesh->used_shaders) {
const Shader *shader = scene->shaders[shader_index];
if(shader->has_volume) {
mesh->has_volume = true;
}
if(shader->has_surface_bssrdf) {
mesh->has_surface_bssrdf = true;
}
}
}
need_flags_update = false;
@ -1278,7 +1298,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
size_t i = 0, num_bvh = 0;
foreach(Mesh *mesh, scene->meshes)
if(mesh->need_update && !mesh->transform_applied)
if(mesh->need_update && mesh->need_build_bvh())
num_bvh++;
TaskPool pool;
@ -1291,7 +1311,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
&progress,
i,
num_bvh));
if(!mesh->transform_applied) {
if(mesh->need_build_bvh()) {
i++;
}
}

@ -87,6 +87,7 @@ public:
vector<bool> smooth;
bool has_volume; /* Set in the device_update_flags(). */
bool has_surface_bssrdf; /* Set in the device_update_flags(). */
vector<float4> curve_keys; /* co + radius */
vector<Curve> curves;
@ -143,6 +144,19 @@ public:
void tag_update(Scene *scene, bool rebuild);
bool has_motion_blur() const;
/* Check whether the mesh should have own BVH built separately. Briefly,
* own BVH is needed for mesh, if:
*
* - It is instanced multiple times, so each instance object should share the
* same BVH tree.
* - Special ray intersection is needed, for example to limit subsurface rays
* to only the mesh itself.
*/
bool need_build_bvh() const;
/* Check if the mesh should be treated as instanced. */
bool is_instanced() const;
};
/* Mesh Manager */

@ -512,7 +512,7 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u
/* apply transforms for objects with single user meshes */
foreach(Object *object, scene->objects) {
if(mesh_users[object->mesh] == 1 &&
if((mesh_users[object->mesh] == 1 && !object->mesh->is_instanced()) &&
object->mesh->displacement_method == Mesh::DISPLACE_BUMP)
{
if(!(motion_blur && object->use_motion)) {