Cysles: Avoid having ShaderData on the stack
This commit introduces a SSS-oriented intersection structure which is replacing old logic of having separate arrays for just intersections and shader data and encapsulates all the data needed for SSS evaluation. This giver a huge stack memory saving on GPU. In own experiments it gave 25% memory usage reduction on GTX560Ti (722MB vs. 946MB). Unfortunately, this gave some performance loss of 20% which only happens on GPU. This is perhaps due to different memory access pattern. Will be solved in the future, hopefully. Famous saying: won in memory - lost in time (which is also valid in other way around).
This commit is contained in:
parent
e6fff424db
commit
8bca34fe32
@ -255,38 +255,81 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, con
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SUBSURFACE__
|
#ifdef __SUBSURFACE__
|
||||||
ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
|
ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
|
||||||
|
const Ray *ray,
|
||||||
|
SubsurfaceIntersection *ss_isect,
|
||||||
|
int subsurface_object,
|
||||||
|
uint *lcg_state,
|
||||||
|
int max_hits)
|
||||||
{
|
{
|
||||||
#ifdef __OBJECT_MOTION__
|
#ifdef __OBJECT_MOTION__
|
||||||
if(kernel_data.bvh.have_motion) {
|
if(kernel_data.bvh.have_motion) {
|
||||||
#ifdef __HAIR__
|
#ifdef __HAIR__
|
||||||
if(kernel_data.bvh.have_curves)
|
if(kernel_data.bvh.have_curves) {
|
||||||
return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
|
return bvh_intersect_subsurface_hair_motion(kg,
|
||||||
|
ray,
|
||||||
|
ss_isect,
|
||||||
|
subsurface_object,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
|
}
|
||||||
#endif /* __HAIR__ */
|
#endif /* __HAIR__ */
|
||||||
|
|
||||||
return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
|
return bvh_intersect_subsurface_motion(kg,
|
||||||
|
ray,
|
||||||
|
ss_isect,
|
||||||
|
subsurface_object,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
}
|
}
|
||||||
#endif /* __OBJECT_MOTION__ */
|
#endif /* __OBJECT_MOTION__ */
|
||||||
|
|
||||||
#ifdef __HAIR__
|
#ifdef __HAIR__
|
||||||
if(kernel_data.bvh.have_curves)
|
if(kernel_data.bvh.have_curves) {
|
||||||
return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
|
return bvh_intersect_subsurface_hair(kg,
|
||||||
|
ray,
|
||||||
|
ss_isect,
|
||||||
|
subsurface_object,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
|
}
|
||||||
#endif /* __HAIR__ */
|
#endif /* __HAIR__ */
|
||||||
|
|
||||||
#ifdef __KERNEL_CPU__
|
#ifdef __KERNEL_CPU__
|
||||||
|
|
||||||
#ifdef __INSTANCING__
|
#ifdef __INSTANCING__
|
||||||
if(kernel_data.bvh.have_instancing)
|
if(kernel_data.bvh.have_instancing) {
|
||||||
return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
|
return bvh_intersect_subsurface_instancing(kg,
|
||||||
|
ray,
|
||||||
|
ss_isect,
|
||||||
|
subsurface_object,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
|
}
|
||||||
#endif /* __INSTANCING__ */
|
#endif /* __INSTANCING__ */
|
||||||
|
|
||||||
return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
|
return bvh_intersect_subsurface(kg,
|
||||||
|
ray,
|
||||||
|
ss_isect,
|
||||||
|
subsurface_object,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
#else /* __KERNEL_CPU__ */
|
#else /* __KERNEL_CPU__ */
|
||||||
|
|
||||||
#ifdef __INSTANCING__
|
#ifdef __INSTANCING__
|
||||||
return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
|
return bvh_intersect_subsurface_instancing(kg,
|
||||||
|
ray,
|
||||||
|
ss_isect,
|
||||||
|
subsurface_object,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
#else
|
#else
|
||||||
return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
|
return bvh_intersect_subsurface(kg,
|
||||||
|
ray,
|
||||||
|
ss_isect,
|
||||||
|
subsurface_object,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
#endif /* __INSTANCING__ */
|
#endif /* __INSTANCING__ */
|
||||||
|
|
||||||
#endif /* __KERNEL_CPU__ */
|
#endif /* __KERNEL_CPU__ */
|
||||||
|
@ -30,9 +30,9 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||||
const Ray *ray,
|
const Ray *ray,
|
||||||
Intersection *isect_array,
|
SubsurfaceIntersection *ss_isect,
|
||||||
int subsurface_object,
|
int subsurface_object,
|
||||||
uint *lcg_state,
|
uint *lcg_state,
|
||||||
int max_hits)
|
int max_hits)
|
||||||
@ -60,7 +60,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
int object = OBJECT_NONE;
|
int object = OBJECT_NONE;
|
||||||
float isect_t = ray->t;
|
float isect_t = ray->t;
|
||||||
|
|
||||||
uint num_hits = 0;
|
ss_isect->num_hits = 0;
|
||||||
|
|
||||||
#if BVH_FEATURE(BVH_MOTION)
|
#if BVH_FEATURE(BVH_MOTION)
|
||||||
Transform ob_itfm;
|
Transform ob_itfm;
|
||||||
@ -210,7 +210,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
|
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
|
||||||
if(tri_object != subsurface_object)
|
if(tri_object != subsurface_object)
|
||||||
continue;
|
continue;
|
||||||
triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
|
triangle_intersect_subsurface(kg,
|
||||||
|
&isect_precalc,
|
||||||
|
ss_isect,
|
||||||
|
P,
|
||||||
|
object,
|
||||||
|
primAddr,
|
||||||
|
isect_t,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -223,7 +231,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
|
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
|
||||||
if(tri_object != subsurface_object)
|
if(tri_object != subsurface_object)
|
||||||
continue;
|
continue;
|
||||||
motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
|
motion_triangle_intersect_subsurface(kg,
|
||||||
|
ss_isect,
|
||||||
|
P,
|
||||||
|
dir,
|
||||||
|
ray->time,
|
||||||
|
object,
|
||||||
|
primAddr,
|
||||||
|
isect_t,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -301,13 +318,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||||||
}
|
}
|
||||||
#endif /* FEATURE(BVH_INSTANCING) */
|
#endif /* FEATURE(BVH_INSTANCING) */
|
||||||
} while(nodeAddr != ENTRYPOINT_SENTINEL);
|
} while(nodeAddr != ENTRYPOINT_SENTINEL);
|
||||||
|
|
||||||
return num_hits;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
|
ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
|
||||||
const Ray *ray,
|
const Ray *ray,
|
||||||
Intersection *isect_array,
|
SubsurfaceIntersection *ss_isect,
|
||||||
int subsurface_object,
|
int subsurface_object,
|
||||||
uint *lcg_state,
|
uint *lcg_state,
|
||||||
int max_hits)
|
int max_hits)
|
||||||
@ -316,7 +331,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
|
|||||||
if(kernel_data.bvh.use_qbvh) {
|
if(kernel_data.bvh.use_qbvh) {
|
||||||
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
|
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
|
||||||
ray,
|
ray,
|
||||||
isect_array,
|
ss_isect,
|
||||||
subsurface_object,
|
subsurface_object,
|
||||||
lcg_state,
|
lcg_state,
|
||||||
max_hits);
|
max_hits);
|
||||||
@ -327,7 +342,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
|
|||||||
kernel_assert(kernel_data.bvh.use_qbvh == false);
|
kernel_assert(kernel_data.bvh.use_qbvh == false);
|
||||||
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
|
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
|
||||||
ray,
|
ray,
|
||||||
isect_array,
|
ss_isect,
|
||||||
subsurface_object,
|
subsurface_object,
|
||||||
lcg_state,
|
lcg_state,
|
||||||
max_hits);
|
max_hits);
|
||||||
|
@ -358,8 +358,17 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection
|
|||||||
* multiple hits we pick a single random primitive as the intersection point. */
|
* multiple hits we pick a single random primitive as the intersection point. */
|
||||||
|
|
||||||
#ifdef __SUBSURFACE__
|
#ifdef __SUBSURFACE__
|
||||||
ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
|
ccl_device_inline void motion_triangle_intersect_subsurface(
|
||||||
float3 P, float3 dir, float time, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
|
KernelGlobals *kg,
|
||||||
|
SubsurfaceIntersection *ss_isect,
|
||||||
|
float3 P,
|
||||||
|
float3 dir,
|
||||||
|
float time,
|
||||||
|
int object,
|
||||||
|
int triAddr,
|
||||||
|
float tmax,
|
||||||
|
uint *lcg_state,
|
||||||
|
int max_hits)
|
||||||
{
|
{
|
||||||
/* primitive index for vertex location lookup */
|
/* primitive index for vertex location lookup */
|
||||||
int prim = kernel_tex_fetch(__prim_index, triAddr);
|
int prim = kernel_tex_fetch(__prim_index, triAddr);
|
||||||
@ -373,30 +382,34 @@ ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, I
|
|||||||
float t, u, v;
|
float t, u, v;
|
||||||
|
|
||||||
if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
|
if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
|
||||||
(*num_hits)++;
|
ss_isect->num_hits++;
|
||||||
|
|
||||||
int hit;
|
int hit;
|
||||||
|
|
||||||
if(*num_hits <= max_hits) {
|
if(ss_isect->num_hits <= max_hits) {
|
||||||
hit = *num_hits - 1;
|
hit = ss_isect->num_hits - 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* reservoir sampling: if we are at the maximum number of
|
/* reservoir sampling: if we are at the maximum number of
|
||||||
* hits, randomly replace element or skip it */
|
* hits, randomly replace element or skip it */
|
||||||
hit = lcg_step_uint(lcg_state) % *num_hits;
|
hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
|
||||||
|
|
||||||
if(hit >= max_hits)
|
if(hit >= max_hits)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* record intersection */
|
/* record intersection */
|
||||||
Intersection *isect = &isect_array[hit];
|
Intersection *isect = &ss_isect->hits[hit];
|
||||||
isect->t = t;
|
isect->t = t;
|
||||||
isect->u = u;
|
isect->u = u;
|
||||||
isect->v = v;
|
isect->v = v;
|
||||||
isect->prim = triAddr;
|
isect->prim = triAddr;
|
||||||
isect->object = object;
|
isect->object = object;
|
||||||
isect->type = PRIMITIVE_MOTION_TRIANGLE;
|
isect->type = PRIMITIVE_MOTION_TRIANGLE;
|
||||||
|
|
||||||
|
/* Record geometric normal. */
|
||||||
|
ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
|
||||||
|
verts[2] - verts[0]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -26,9 +26,9 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||||
const Ray *ray,
|
const Ray *ray,
|
||||||
Intersection *isect_array,
|
SubsurfaceIntersection *ss_isect,
|
||||||
int subsurface_object,
|
int subsurface_object,
|
||||||
uint *lcg_state,
|
uint *lcg_state,
|
||||||
int max_hits)
|
int max_hits)
|
||||||
@ -55,7 +55,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
float3 idir = bvh_inverse_direction(dir);
|
float3 idir = bvh_inverse_direction(dir);
|
||||||
int object = OBJECT_NONE;
|
int object = OBJECT_NONE;
|
||||||
float isect_t = ray->t;
|
float isect_t = ray->t;
|
||||||
uint num_hits = 0;
|
|
||||||
|
ss_isect->num_hits = 0;
|
||||||
|
|
||||||
#if BVH_FEATURE(BVH_MOTION)
|
#if BVH_FEATURE(BVH_MOTION)
|
||||||
Transform ob_itfm;
|
Transform ob_itfm;
|
||||||
@ -63,7 +64,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
|
|
||||||
#ifndef __KERNEL_SSE41__
|
#ifndef __KERNEL_SSE41__
|
||||||
if(!isfinite(P.x)) {
|
if(!isfinite(P.x)) {
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -226,7 +227,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
if(tri_object != subsurface_object) {
|
if(tri_object != subsurface_object) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
|
triangle_intersect_subsurface(kg,
|
||||||
|
&isect_precalc,
|
||||||
|
ss_isect,
|
||||||
|
P,
|
||||||
|
object,
|
||||||
|
primAddr,
|
||||||
|
isect_t,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -240,7 +249,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
if(tri_object != subsurface_object) {
|
if(tri_object != subsurface_object) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
|
motion_triangle_intersect_subsurface(kg,
|
||||||
|
ss_isect,
|
||||||
|
P,
|
||||||
|
dir,
|
||||||
|
ray->time,
|
||||||
|
object,
|
||||||
|
primAddr,
|
||||||
|
isect_t,
|
||||||
|
lcg_state,
|
||||||
|
max_hits);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -321,6 +339,4 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
|||||||
}
|
}
|
||||||
#endif /* FEATURE(BVH_INSTANCING) */
|
#endif /* FEATURE(BVH_INSTANCING) */
|
||||||
} while(nodeAddr != ENTRYPOINT_SENTINEL);
|
} while(nodeAddr != ENTRYPOINT_SENTINEL);
|
||||||
|
|
||||||
return num_hits;
|
|
||||||
}
|
}
|
||||||
|
@ -204,12 +204,11 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
|
|||||||
ccl_device_inline void triangle_intersect_subsurface(
|
ccl_device_inline void triangle_intersect_subsurface(
|
||||||
KernelGlobals *kg,
|
KernelGlobals *kg,
|
||||||
const IsectPrecalc *isect_precalc,
|
const IsectPrecalc *isect_precalc,
|
||||||
Intersection *isect_array,
|
SubsurfaceIntersection *ss_isect,
|
||||||
float3 P,
|
float3 P,
|
||||||
int object,
|
int object,
|
||||||
int triAddr,
|
int triAddr,
|
||||||
float tmax,
|
float tmax,
|
||||||
uint *num_hits,
|
|
||||||
uint *lcg_state,
|
uint *lcg_state,
|
||||||
int max_hits)
|
int max_hits)
|
||||||
{
|
{
|
||||||
@ -272,29 +271,36 @@ ccl_device_inline void triangle_intersect_subsurface(
|
|||||||
/* Normalize U, V, W, and T. */
|
/* Normalize U, V, W, and T. */
|
||||||
const float inv_det = 1.0f / det;
|
const float inv_det = 1.0f / det;
|
||||||
|
|
||||||
(*num_hits)++;
|
ss_isect->num_hits++;
|
||||||
int hit;
|
int hit;
|
||||||
|
|
||||||
if(*num_hits <= max_hits) {
|
if(ss_isect->num_hits <= max_hits) {
|
||||||
hit = *num_hits - 1;
|
hit = ss_isect->num_hits - 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* reservoir sampling: if we are at the maximum number of
|
/* reservoir sampling: if we are at the maximum number of
|
||||||
* hits, randomly replace element or skip it */
|
* hits, randomly replace element or skip it */
|
||||||
hit = lcg_step_uint(lcg_state) % *num_hits;
|
hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
|
||||||
|
|
||||||
if(hit >= max_hits)
|
if(hit >= max_hits)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* record intersection */
|
/* record intersection */
|
||||||
Intersection *isect = &isect_array[hit];
|
Intersection *isect = &ss_isect->hits[hit];
|
||||||
isect->prim = triAddr;
|
isect->prim = triAddr;
|
||||||
isect->object = object;
|
isect->object = object;
|
||||||
isect->type = PRIMITIVE_TRIANGLE;
|
isect->type = PRIMITIVE_TRIANGLE;
|
||||||
isect->u = U * inv_det;
|
isect->u = U * inv_det;
|
||||||
isect->v = V * inv_det;
|
isect->v = V * inv_det;
|
||||||
isect->t = T * inv_det;
|
isect->t = T * inv_det;
|
||||||
|
|
||||||
|
/* Record geometric normal. */
|
||||||
|
/* TODO(sergey): Use float4_to_float3() on just an edges. */
|
||||||
|
const float3 v0 = float4_to_float3(tri_a);
|
||||||
|
const float3 v1 = float4_to_float3(tri_b);
|
||||||
|
const float3 v2 = float4_to_float3(tri_c);
|
||||||
|
ss_isect->Ng[hit] = normalize(cross(v1 - v0, v2 - v0));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -338,10 +338,16 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
|
|||||||
if(sc) {
|
if(sc) {
|
||||||
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
|
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
|
||||||
|
|
||||||
ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
|
SubsurfaceIntersection ss_isect;
|
||||||
float bssrdf_u, bssrdf_v;
|
float bssrdf_u, bssrdf_v;
|
||||||
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
|
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
|
||||||
int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
|
int num_hits = subsurface_scatter_multi_intersect(kg,
|
||||||
|
&ss_isect,
|
||||||
|
sd,
|
||||||
|
sc,
|
||||||
|
&lcg_state,
|
||||||
|
bssrdf_u, bssrdf_v,
|
||||||
|
false);
|
||||||
#ifdef __VOLUME__
|
#ifdef __VOLUME__
|
||||||
Ray volume_ray = *ray;
|
Ray volume_ray = *ray;
|
||||||
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
|
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
|
||||||
@ -350,15 +356,26 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
|
|||||||
|
|
||||||
/* compute lighting with the BSDF closure */
|
/* compute lighting with the BSDF closure */
|
||||||
for(int hit = 0; hit < num_hits; hit++) {
|
for(int hit = 0; hit < num_hits; hit++) {
|
||||||
|
/* NOTE: We reuse the existing ShaderData, we assume the path
|
||||||
|
* integration loop stops when this function returns true.
|
||||||
|
*/
|
||||||
|
subsurface_scatter_multi_setup(kg,
|
||||||
|
&ss_isect,
|
||||||
|
hit,
|
||||||
|
sd,
|
||||||
|
state->flag,
|
||||||
|
sc,
|
||||||
|
false);
|
||||||
|
|
||||||
float3 tp = *throughput;
|
float3 tp = *throughput;
|
||||||
PathState hit_state = *state;
|
PathState hit_state = *state;
|
||||||
Ray hit_ray = *ray;
|
Ray hit_ray = *ray;
|
||||||
|
|
||||||
hit_state.rng_offset += PRNG_BOUNCE_NUM;
|
hit_state.rng_offset += PRNG_BOUNCE_NUM;
|
||||||
|
|
||||||
kernel_path_surface_connect_light(kg, rng, &bssrdf_sd[hit], tp, state, L);
|
kernel_path_surface_connect_light(kg, rng, sd, tp, state, L);
|
||||||
|
|
||||||
if(kernel_path_surface_bounce(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, L, &hit_ray)) {
|
if(kernel_path_surface_bounce(kg, rng, sd, &tp, &hit_state, L, &hit_ray)) {
|
||||||
#ifdef __LAMP_MIS__
|
#ifdef __LAMP_MIS__
|
||||||
hit_state.ray_t = 0.0f;
|
hit_state.ray_t = 0.0f;
|
||||||
#endif
|
#endif
|
||||||
|
@ -128,10 +128,16 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
|||||||
/* do subsurface scatter step with copy of shader data, this will
|
/* do subsurface scatter step with copy of shader data, this will
|
||||||
* replace the BSSRDF with a diffuse BSDF closure */
|
* replace the BSSRDF with a diffuse BSDF closure */
|
||||||
for(int j = 0; j < num_samples; j++) {
|
for(int j = 0; j < num_samples; j++) {
|
||||||
ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
|
SubsurfaceIntersection ss_isect;
|
||||||
float bssrdf_u, bssrdf_v;
|
float bssrdf_u, bssrdf_v;
|
||||||
path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
|
path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
|
||||||
int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
|
int num_hits = subsurface_scatter_multi_intersect(kg,
|
||||||
|
&ss_isect,
|
||||||
|
sd,
|
||||||
|
sc,
|
||||||
|
&lcg_state,
|
||||||
|
bssrdf_u, bssrdf_v,
|
||||||
|
true);
|
||||||
#ifdef __VOLUME__
|
#ifdef __VOLUME__
|
||||||
Ray volume_ray = *ray;
|
Ray volume_ray = *ray;
|
||||||
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
|
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
|
||||||
@ -140,6 +146,15 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
|||||||
|
|
||||||
/* compute lighting with the BSDF closure */
|
/* compute lighting with the BSDF closure */
|
||||||
for(int hit = 0; hit < num_hits; hit++) {
|
for(int hit = 0; hit < num_hits; hit++) {
|
||||||
|
ShaderData bssrdf_sd = *sd;
|
||||||
|
subsurface_scatter_multi_setup(kg,
|
||||||
|
&ss_isect,
|
||||||
|
hit,
|
||||||
|
&bssrdf_sd,
|
||||||
|
state->flag,
|
||||||
|
sc,
|
||||||
|
true);
|
||||||
|
|
||||||
PathState hit_state = *state;
|
PathState hit_state = *state;
|
||||||
|
|
||||||
path_state_branch(&hit_state, j, num_samples);
|
path_state_branch(&hit_state, j, num_samples);
|
||||||
@ -147,7 +162,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
|||||||
#ifdef __VOLUME__
|
#ifdef __VOLUME__
|
||||||
if(need_update_volume_stack) {
|
if(need_update_volume_stack) {
|
||||||
/* Setup ray from previous surface point to the new one. */
|
/* Setup ray from previous surface point to the new one. */
|
||||||
float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng);
|
float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
|
||||||
volume_ray.D = normalize_len(P - volume_ray.P,
|
volume_ray.D = normalize_len(P - volume_ray.P,
|
||||||
&volume_ray.t);
|
&volume_ray.t);
|
||||||
|
|
||||||
@ -165,15 +180,27 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
|
|||||||
/* direct light */
|
/* direct light */
|
||||||
if(kernel_data.integrator.use_direct_light) {
|
if(kernel_data.integrator.use_direct_light) {
|
||||||
bool all = kernel_data.integrator.sample_all_lights_direct;
|
bool all = kernel_data.integrator.sample_all_lights_direct;
|
||||||
kernel_branched_path_surface_connect_light(kg, rng,
|
kernel_branched_path_surface_connect_light(
|
||||||
&bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all);
|
kg,
|
||||||
|
rng,
|
||||||
|
&bssrdf_sd,
|
||||||
|
&hit_state,
|
||||||
|
throughput,
|
||||||
|
num_samples_inv,
|
||||||
|
L,
|
||||||
|
all);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* indirect light */
|
/* indirect light */
|
||||||
kernel_branched_path_surface_indirect_light(kg, rng,
|
kernel_branched_path_surface_indirect_light(
|
||||||
&bssrdf_sd[hit], throughput, num_samples_inv,
|
kg,
|
||||||
&hit_state, L);
|
rng,
|
||||||
|
&bssrdf_sd,
|
||||||
|
throughput,
|
||||||
|
num_samples_inv,
|
||||||
|
&hit_state,
|
||||||
|
L);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,19 +179,23 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent)
|
|||||||
return color;
|
return color;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd, ShaderData *in_sd, int state_flag, float3 *eval, float3 *N)
|
ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
|
||||||
|
ShaderData *sd,
|
||||||
|
int state_flag,
|
||||||
|
float3 *eval,
|
||||||
|
float3 *N)
|
||||||
{
|
{
|
||||||
/* average color and texture blur at outgoing point */
|
/* average color and texture blur at outgoing point */
|
||||||
float texture_blur;
|
float texture_blur;
|
||||||
float3 out_color = shader_bssrdf_sum(out_sd, NULL, &texture_blur);
|
float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
|
||||||
|
|
||||||
/* do we have bump mapping? */
|
/* do we have bump mapping? */
|
||||||
bool bump = (out_sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
|
bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
|
||||||
|
|
||||||
if(bump || texture_blur > 0.0f) {
|
if(bump || texture_blur > 0.0f) {
|
||||||
/* average color and normal at incoming point */
|
/* average color and normal at incoming point */
|
||||||
shader_eval_surface(kg, in_sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
|
shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
|
||||||
float3 in_color = shader_bssrdf_sum(in_sd, (bump)? N: NULL, NULL);
|
float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
|
||||||
|
|
||||||
/* we simply divide out the average color and multiply with the average
|
/* we simply divide out the average color and multiply with the average
|
||||||
* of the other one. we could try to do this per closure but it's quite
|
* of the other one. we could try to do this per closure but it's quite
|
||||||
@ -206,9 +210,18 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* subsurface scattering step, from a point on the surface to other nearby points on the same object */
|
/* Subsurface scattering step, from a point on the surface to other
|
||||||
ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd, ShaderData bssrdf_sd[BSSRDF_MAX_HITS],
|
* nearby points on the same object.
|
||||||
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
|
*/
|
||||||
|
ccl_device int subsurface_scatter_multi_intersect(
|
||||||
|
KernelGlobals *kg,
|
||||||
|
SubsurfaceIntersection* ss_isect,
|
||||||
|
ShaderData *sd,
|
||||||
|
ShaderClosure *sc,
|
||||||
|
uint *lcg_state,
|
||||||
|
float disk_u,
|
||||||
|
float disk_v,
|
||||||
|
bool all)
|
||||||
{
|
{
|
||||||
/* pick random axis in local frame and point on disk */
|
/* pick random axis in local frame and point on disk */
|
||||||
float3 disk_N, disk_T, disk_B;
|
float3 disk_N, disk_T, disk_B;
|
||||||
@ -259,65 +272,84 @@ ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd,
|
|||||||
float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
|
float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
|
||||||
|
|
||||||
/* create ray */
|
/* create ray */
|
||||||
Ray ray;
|
Ray *ray = &ss_isect->ray;
|
||||||
ray.P = sd->P + disk_N*disk_height + disk_P;
|
ray->P = sd->P + disk_N*disk_height + disk_P;
|
||||||
ray.D = -disk_N;
|
ray->D = -disk_N;
|
||||||
ray.t = 2.0f*disk_height;
|
ray->t = 2.0f*disk_height;
|
||||||
ray.dP = sd->dP;
|
ray->dP = sd->dP;
|
||||||
ray.dD = differential3_zero();
|
ray->dD = differential3_zero();
|
||||||
ray.time = sd->time;
|
ray->time = sd->time;
|
||||||
|
|
||||||
/* intersect with the same object. if multiple intersections are found it
|
/* intersect with the same object. if multiple intersections are found it
|
||||||
* will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
|
* will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
|
||||||
Intersection isect[BSSRDF_MAX_HITS];
|
scene_intersect_subsurface(kg,
|
||||||
uint num_hits = scene_intersect_subsurface(kg, &ray, isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
|
ray,
|
||||||
|
ss_isect,
|
||||||
/* evaluate bssrdf */
|
sd->object,
|
||||||
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
|
lcg_state,
|
||||||
int num_eval_hits = min(num_hits, BSSRDF_MAX_HITS);
|
BSSRDF_MAX_HITS);
|
||||||
|
/* TODO(sergey): Investigate whether scene_intersect_subsurface() could
|
||||||
|
* indeed return more than BSSRDF_MAX_HITS hits.
|
||||||
|
*/
|
||||||
|
int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
|
||||||
|
|
||||||
for(int hit = 0; hit < num_eval_hits; hit++) {
|
for(int hit = 0; hit < num_eval_hits; hit++) {
|
||||||
ShaderData *bsd = &bssrdf_sd[hit];
|
/* Quickly retrieve P and Ng without setting up ShaderData. */
|
||||||
|
float3 hit_P = ray->P + ss_isect->hits[hit].t * ray->D;
|
||||||
/* setup new shading point */
|
float3 hit_Ng = ss_isect->Ng[hit];
|
||||||
*bsd = *sd;
|
if(ss_isect->hits[hit].object != OBJECT_NONE) {
|
||||||
shader_setup_from_subsurface(kg, bsd, &isect[hit], &ray);
|
object_normal_transform(kg, sd, &hit_Ng);
|
||||||
|
}
|
||||||
|
|
||||||
/* probability densities for local frame axes */
|
/* probability densities for local frame axes */
|
||||||
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, bsd->Ng));
|
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
|
||||||
float pdf_T = pick_pdf_T * fabsf(dot(disk_T, bsd->Ng));
|
float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
|
||||||
float pdf_B = pick_pdf_B * fabsf(dot(disk_B, bsd->Ng));
|
float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
|
||||||
|
|
||||||
/* multiple importance sample between 3 axes, power heuristic
|
/* multiple importance sample between 3 axes, power heuristic
|
||||||
* found to be slightly better than balance heuristic */
|
* found to be slightly better than balance heuristic */
|
||||||
float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
|
float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
|
||||||
|
|
||||||
/* real distance to sampled point */
|
/* real distance to sampled point */
|
||||||
float r = len(bsd->P - sd->P);
|
float r = len(hit_P - sd->P);
|
||||||
|
|
||||||
/* evaluate */
|
/* evaluate */
|
||||||
float w = mis_weight / pdf_N;
|
float w = mis_weight / pdf_N;
|
||||||
if(num_hits > BSSRDF_MAX_HITS)
|
if(ss_isect->num_hits > BSSRDF_MAX_HITS)
|
||||||
w *= num_hits/(float)BSSRDF_MAX_HITS;
|
w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
|
||||||
eval = subsurface_scatter_eval(bsd, sc, disk_r, r, all) * w;
|
float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
|
||||||
|
|
||||||
/* optionally blur colors and bump mapping */
|
ss_isect->weight[hit] = eval;
|
||||||
float3 N = bsd->N;
|
|
||||||
subsurface_color_bump_blur(kg, sd, bsd, state_flag, &eval, &N);
|
|
||||||
|
|
||||||
/* setup diffuse bsdf */
|
|
||||||
subsurface_scatter_setup_diffuse_bsdf(bsd, eval, true, N);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return num_eval_hits;
|
return num_eval_hits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ccl_device void subsurface_scatter_multi_setup(KernelGlobals *kg,
|
||||||
|
SubsurfaceIntersection* ss_isect,
|
||||||
|
int hit,
|
||||||
|
ShaderData *sd,
|
||||||
|
int state_flag,
|
||||||
|
ShaderClosure *sc,
|
||||||
|
bool all)
|
||||||
|
{
|
||||||
|
/* Setup new shading point. */
|
||||||
|
shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray);
|
||||||
|
|
||||||
|
/* Optionally blur colors and bump mapping. */
|
||||||
|
float3 weight = ss_isect->weight[hit];
|
||||||
|
float3 N = sd->N;
|
||||||
|
subsurface_color_bump_blur(kg, sd, state_flag, &weight, &N);
|
||||||
|
|
||||||
|
/* Setup diffuse BSDF. */
|
||||||
|
subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
|
||||||
|
}
|
||||||
|
|
||||||
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
|
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
|
||||||
ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
|
ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
|
||||||
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
|
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
|
||||||
{
|
{
|
||||||
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
|
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
|
||||||
uint num_hits = 0;
|
|
||||||
|
|
||||||
/* pick random axis in local frame and point on disk */
|
/* pick random axis in local frame and point on disk */
|
||||||
float3 disk_N, disk_T, disk_B;
|
float3 disk_N, disk_T, disk_B;
|
||||||
@ -368,15 +400,15 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
|
|||||||
|
|
||||||
/* intersect with the same object. if multiple intersections are
|
/* intersect with the same object. if multiple intersections are
|
||||||
* found it will randomly pick one of them */
|
* found it will randomly pick one of them */
|
||||||
Intersection isect;
|
SubsurfaceIntersection ss_isect;
|
||||||
num_hits = scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1);
|
scene_intersect_subsurface(kg, &ray, &ss_isect, sd->object, lcg_state, 1);
|
||||||
|
|
||||||
/* evaluate bssrdf */
|
/* evaluate bssrdf */
|
||||||
if(num_hits > 0) {
|
if(ss_isect.num_hits > 0) {
|
||||||
float3 origP = sd->P;
|
float3 origP = sd->P;
|
||||||
|
|
||||||
/* setup new shading point */
|
/* setup new shading point */
|
||||||
shader_setup_from_subsurface(kg, sd, &isect, &ray);
|
shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
|
||||||
|
|
||||||
/* probability densities for local frame axes */
|
/* probability densities for local frame axes */
|
||||||
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
|
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
|
||||||
@ -391,16 +423,16 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
|
|||||||
float r = len(sd->P - origP);
|
float r = len(sd->P - origP);
|
||||||
|
|
||||||
/* evaluate */
|
/* evaluate */
|
||||||
float w = (mis_weight * num_hits) / pdf_N;
|
float w = (mis_weight * ss_isect.num_hits) / pdf_N;
|
||||||
eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
|
eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* optionally blur colors and bump mapping */
|
/* optionally blur colors and bump mapping */
|
||||||
float3 N = sd->N;
|
float3 N = sd->N;
|
||||||
subsurface_color_bump_blur(kg, sd, sd, state_flag, &eval, &N);
|
subsurface_color_bump_blur(kg, sd, state_flag, &eval, &N);
|
||||||
|
|
||||||
/* setup diffuse bsdf */
|
/* setup diffuse bsdf */
|
||||||
subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N);
|
subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
|
||||||
}
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@ -520,6 +520,18 @@ typedef ccl_addr_space struct Intersection {
|
|||||||
#endif
|
#endif
|
||||||
} Intersection;
|
} Intersection;
|
||||||
|
|
||||||
|
/* Subsurface Intersection result */
|
||||||
|
|
||||||
|
struct SubsurfaceIntersection
|
||||||
|
{
|
||||||
|
Ray ray;
|
||||||
|
float3 weight[BSSRDF_MAX_HITS];
|
||||||
|
|
||||||
|
int num_hits;
|
||||||
|
struct Intersection hits[BSSRDF_MAX_HITS];
|
||||||
|
float3 Ng[BSSRDF_MAX_HITS];
|
||||||
|
};
|
||||||
|
|
||||||
/* Primitives */
|
/* Primitives */
|
||||||
|
|
||||||
typedef enum PrimitiveType {
|
typedef enum PrimitiveType {
|
||||||
|
Loading…
Reference in New Issue
Block a user