Cycles: add Optix support in the kernel
This adds all the kernel side changes for the Optix backend. Ref D5363
This commit is contained in:
parent
7eb293a37b
commit
53932f1f06
@ -33,136 +33,140 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
#include "kernel/bvh/bvh_types.h"
|
||||
|
||||
#ifndef __KERNEL_OPTIX__
|
||||
|
||||
/* Common QBVH functions. */
|
||||
#ifdef __QBVH__
|
||||
# include "kernel/bvh/qbvh_nodes.h"
|
||||
# ifdef __KERNEL_AVX2__
|
||||
# include "kernel/bvh/obvh_nodes.h"
|
||||
# ifdef __QBVH__
|
||||
# include "kernel/bvh/qbvh_nodes.h"
|
||||
# ifdef __KERNEL_AVX2__
|
||||
# include "kernel/bvh/obvh_nodes.h"
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Regular BVH traversal */
|
||||
|
||||
#include "kernel/bvh/bvh_nodes.h"
|
||||
# include "kernel/bvh/bvh_nodes.h"
|
||||
|
||||
#define BVH_FUNCTION_NAME bvh_intersect
|
||||
#define BVH_FUNCTION_FEATURES 0
|
||||
#include "kernel/bvh/bvh_traversal.h"
|
||||
|
||||
#if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
#endif
|
||||
|
||||
#if defined(__HAIR__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_hair
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
#endif
|
||||
|
||||
#if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
#endif
|
||||
|
||||
#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
#endif
|
||||
|
||||
/* Subsurface scattering BVH traversal */
|
||||
|
||||
#if defined(__BVH_LOCAL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_local
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/bvh_local.h"
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_local_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_local.h"
|
||||
# endif
|
||||
#endif /* __BVH_LOCAL__ */
|
||||
|
||||
/* Volume BVH traversal */
|
||||
|
||||
#if defined(__VOLUME__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
# endif
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
# endif
|
||||
#endif /* __VOLUME__ */
|
||||
|
||||
/* Record all intersections - Shadow BVH traversal */
|
||||
|
||||
#if defined(__SHADOW_RECORD_ALL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
|
||||
# define BVH_FUNCTION_NAME bvh_intersect
|
||||
# define BVH_FUNCTION_FEATURES 0
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
# endif
|
||||
|
||||
# if defined(__HAIR__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_hair
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
# endif
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
# endif
|
||||
|
||||
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# include "kernel/bvh/bvh_traversal.h"
|
||||
# endif
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
/* Subsurface scattering BVH traversal */
|
||||
|
||||
# if defined(__BVH_LOCAL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_local
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/bvh_local.h"
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_local_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_local.h"
|
||||
# endif
|
||||
# endif /* __BVH_LOCAL__ */
|
||||
|
||||
/* Volume BVH traversal */
|
||||
|
||||
# if defined(__VOLUME__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
# endif
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume.h"
|
||||
# endif
|
||||
# endif /* __VOLUME__ */
|
||||
|
||||
/* Record all intersections - Shadow BVH traversal */
|
||||
|
||||
# if defined(__SHADOW_RECORD_ALL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
|
||||
# define BVH_FUNCTION_FEATURES 0
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# endif
|
||||
|
||||
# if defined(__HAIR__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# endif
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# endif
|
||||
|
||||
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
|
||||
# include "kernel/bvh/bvh_shadow_all.h"
|
||||
# endif
|
||||
# endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
/* Record all intersections - Volume BVH traversal */
|
||||
|
||||
#if defined(__VOLUME_RECORD_ALL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume_all.h"
|
||||
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
|
||||
# if defined(__VOLUME_RECORD_ALL__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all
|
||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume_all.h"
|
||||
# endif
|
||||
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume_all.h"
|
||||
# endif
|
||||
#endif /* __VOLUME_RECORD_ALL__ */
|
||||
# if defined(__INSTANCING__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume_all.h"
|
||||
# endif
|
||||
|
||||
#undef BVH_FEATURE
|
||||
#undef BVH_NAME_JOIN
|
||||
#undef BVH_NAME_EVAL
|
||||
#undef BVH_FUNCTION_FULL_NAME
|
||||
# if defined(__OBJECT_MOTION__)
|
||||
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
|
||||
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
|
||||
# include "kernel/bvh/bvh_volume_all.h"
|
||||
# endif
|
||||
# endif /* __VOLUME_RECORD_ALL__ */
|
||||
|
||||
# undef BVH_FEATURE
|
||||
# undef BVH_NAME_JOIN
|
||||
# undef BVH_NAME_EVAL
|
||||
# undef BVH_FUNCTION_FULL_NAME
|
||||
|
||||
#endif /* __KERNEL_OPTIX__ */
|
||||
|
||||
ccl_device_inline bool scene_intersect_valid(const Ray *ray)
|
||||
{
|
||||
@ -173,8 +177,10 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray)
|
||||
* such cases.
|
||||
* From production scenes so far it seems it's enough to test first element
|
||||
* only.
|
||||
* Scene intersection may also called with empty rays for conditional trace
|
||||
* calls that evaluate to false, so filter those out.
|
||||
*/
|
||||
return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x);
|
||||
return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
|
||||
}
|
||||
|
||||
ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
|
||||
@ -184,10 +190,46 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_INTERSECT);
|
||||
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
uint p0 = 0;
|
||||
uint p1 = 0;
|
||||
uint p2 = 0;
|
||||
uint p3 = 0;
|
||||
uint p4 = visibility;
|
||||
uint p5 = PRIMITIVE_NONE;
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->time,
|
||||
0xFF,
|
||||
OPTIX_RAY_FLAG_NONE,
|
||||
0,
|
||||
0,
|
||||
0, // SBT offset for PG_HITD
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5);
|
||||
|
||||
isect->t = __uint_as_float(p0);
|
||||
isect->u = __uint_as_float(p1);
|
||||
isect->v = __uint_as_float(p2);
|
||||
isect->prim = p3;
|
||||
isect->object = p4;
|
||||
isect->type = p5;
|
||||
|
||||
return p5 != PRIMITIVE_NONE;
|
||||
#else /* __KERNEL_OPTIX__ */
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
#ifdef __EMBREE__
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.bvh.scene) {
|
||||
isect->t = ray->t;
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
||||
@ -202,42 +244,41 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif /* __EMBREE__ */
|
||||
#ifdef __OBJECT_MOTION__
|
||||
# endif /* __EMBREE__ */
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
#endif /* __OBJECT_MOTION__ */
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
#ifdef __HAIR__
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
#endif /* __HAIR__ */
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
|
||||
# ifdef __INSTANCING__
|
||||
# ifdef __KERNEL_CPU__
|
||||
# ifdef __INSTANCING__
|
||||
if (kernel_data.bvh.have_instancing) {
|
||||
return bvh_intersect_instancing(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __INSTANCING__ */
|
||||
# endif /* __INSTANCING__ */
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
#else /* __KERNEL_CPU__ */
|
||||
|
||||
# ifdef __INSTANCING__
|
||||
# else /* __KERNEL_CPU__ */
|
||||
# ifdef __INSTANCING__
|
||||
return bvh_intersect_instancing(kg, ray, isect, visibility);
|
||||
# else
|
||||
# else
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
# endif /* __INSTANCING__ */
|
||||
|
||||
#endif /* __KERNEL_CPU__ */
|
||||
# endif /* __INSTANCING__ */
|
||||
# endif /* __KERNEL_CPU__ */
|
||||
#endif /* __KERNEL_OPTIX__ */
|
||||
}
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
@ -250,11 +291,43 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
|
||||
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
uint p0 = ((uint64_t)lcg_state) & 0xFFFFFFFF;
|
||||
uint p1 = (((uint64_t)lcg_state) >> 32) & 0xFFFFFFFF;
|
||||
uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF;
|
||||
uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF;
|
||||
uint p4 = local_object;
|
||||
// Is set to zero on miss or if ray is aborted, so can be used as return value
|
||||
uint p5 = max_hits;
|
||||
|
||||
local_isect->num_hits = 0; // Initialize hit count to zero
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->time,
|
||||
// Need to always call into __anyhit__kernel_optix_local_hit
|
||||
0xFF,
|
||||
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
|
||||
1,
|
||||
0,
|
||||
0, // SBT offset for PG_HITL
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5);
|
||||
|
||||
return p5;
|
||||
# else /* __KERNEL_OPTIX__ */
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
local_isect->num_hits = 0;
|
||||
return false;
|
||||
}
|
||||
# ifdef __EMBREE__
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.bvh.scene) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
|
||||
ctx.lcg_state = lcg_state;
|
||||
@ -296,13 +369,15 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
|
||||
|
||||
return local_isect->num_hits > 0;
|
||||
}
|
||||
# endif /* __EMBREE__ */
|
||||
# ifdef __OBJECT_MOTION__
|
||||
# endif /* __EMBREE__ */
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
# endif /* __KERNEL_OPTIX__ */
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -316,11 +391,41 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
|
||||
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
uint p0 = ((uint64_t)isect) & 0xFFFFFFFF;
|
||||
uint p1 = (((uint64_t)isect) >> 32) & 0xFFFFFFFF;
|
||||
uint p3 = max_hits;
|
||||
uint p4 = visibility;
|
||||
uint p5 = false;
|
||||
|
||||
*num_hits = 0; // Initialize hit count to zero
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->time,
|
||||
// Need to always call into __anyhit__kernel_optix_shadow_all_hit
|
||||
0xFF,
|
||||
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
|
||||
2,
|
||||
0,
|
||||
0, // SBT offset for PG_HITS
|
||||
p0,
|
||||
p1,
|
||||
*num_hits,
|
||||
p3,
|
||||
p4,
|
||||
p5);
|
||||
|
||||
return p5;
|
||||
# else /* __KERNEL_OPTIX__ */
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
*num_hits = 0;
|
||||
return false;
|
||||
}
|
||||
# ifdef __EMBREE__
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.bvh.scene) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
||||
ctx.isect_s = isect;
|
||||
@ -337,32 +442,41 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
|
||||
*num_hits = ctx.num_hits;
|
||||
return rtc_ray.tfar == -INFINITY;
|
||||
}
|
||||
# endif
|
||||
# ifdef __OBJECT_MOTION__
|
||||
# endif /* __EMBREE__ */
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
# ifdef __INSTANCING__
|
||||
# ifdef __KERNEL_CPU__
|
||||
# ifdef __INSTANCING__
|
||||
if (kernel_data.bvh.have_instancing) {
|
||||
return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
|
||||
}
|
||||
# endif /* __INSTANCING__ */
|
||||
|
||||
# endif /* __INSTANCING__ */
|
||||
return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
|
||||
# else
|
||||
# ifdef __INSTANCING__
|
||||
return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
|
||||
# else
|
||||
return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
|
||||
# endif /* __INSTANCING__ */
|
||||
# endif /* __KERNEL_CPU__ */
|
||||
# endif /* __KERNEL_OPTIX__ */
|
||||
}
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
@ -374,30 +488,67 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
|
||||
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
uint p0 = 0;
|
||||
uint p1 = 0;
|
||||
uint p2 = 0;
|
||||
uint p3 = 0;
|
||||
uint p4 = visibility;
|
||||
uint p5 = PRIMITIVE_NONE;
|
||||
|
||||
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
|
||||
ray->P,
|
||||
ray->D,
|
||||
0.0f,
|
||||
ray->t,
|
||||
ray->time,
|
||||
// Visibility mask set to only intersect objects with volumes
|
||||
0x02,
|
||||
OPTIX_RAY_FLAG_NONE,
|
||||
0,
|
||||
0,
|
||||
0, // SBT offset for PG_HITD
|
||||
p0,
|
||||
p1,
|
||||
p2,
|
||||
p3,
|
||||
p4,
|
||||
p5);
|
||||
|
||||
isect->t = __uint_as_float(p0);
|
||||
isect->u = __uint_as_float(p1);
|
||||
isect->v = __uint_as_float(p2);
|
||||
isect->prim = p3;
|
||||
isect->object = p4;
|
||||
isect->type = p5;
|
||||
|
||||
return p5 != PRIMITIVE_NONE;
|
||||
# else /* __KERNEL_OPTIX__ */
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __KERNEL_CPU__
|
||||
# ifdef __INSTANCING__
|
||||
# ifdef __KERNEL_CPU__
|
||||
# ifdef __INSTANCING__
|
||||
if (kernel_data.bvh.have_instancing) {
|
||||
return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __INSTANCING__ */
|
||||
# endif /* __INSTANCING__ */
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
# else /* __KERNEL_CPU__ */
|
||||
# ifdef __INSTANCING__
|
||||
# else /* __KERNEL_CPU__ */
|
||||
# ifdef __INSTANCING__
|
||||
return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
|
||||
# else
|
||||
# else
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
# endif /* __INSTANCING__ */
|
||||
# endif /* __KERNEL_CPU__ */
|
||||
# endif /* __INSTANCING__ */
|
||||
# endif /* __KERNEL_CPU__ */
|
||||
# endif /* __KERNEL_OPTIX__ */
|
||||
}
|
||||
#endif /* __VOLUME__ */
|
||||
|
||||
@ -413,6 +564,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.bvh.scene) {
|
||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
||||
|
@ -38,12 +38,14 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg,
|
||||
{
|
||||
const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
|
||||
|
||||
# ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
|
||||
if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
|
||||
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
|
||||
if (time < prim_time.x || time > prim_time.y) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
|
||||
float epsilon = 0.0f;
|
||||
@ -505,12 +507,14 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
|
||||
|
||||
const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
|
||||
|
||||
# ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
|
||||
if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
|
||||
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
|
||||
if (time < prim_time.x || time > prim_time.y) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
|
||||
/* curve Intersection check */
|
||||
|
89
intern/cycles/kernel/kernel_compat_optix.h
Normal file
89
intern/cycles/kernel/kernel_compat_optix.h
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright 2019, NVIDIA Corporation.
|
||||
* Copyright 2019, Blender Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __KERNEL_COMPAT_OPTIX_H__
|
||||
#define __KERNEL_COMPAT_OPTIX_H__
|
||||
|
||||
#define OPTIX_DONT_INCLUDE_CUDA
|
||||
#include <optix.h>
|
||||
|
||||
#define __KERNEL_GPU__
|
||||
#define __KERNEL_CUDA__ // OptiX kernels are implicitly CUDA kernels too
|
||||
#define __KERNEL_OPTIX__
|
||||
#define CCL_NAMESPACE_BEGIN
|
||||
#define CCL_NAMESPACE_END
|
||||
|
||||
#ifndef ATTR_FALLTHROUGH
|
||||
# define ATTR_FALLTHROUGH
|
||||
#endif
|
||||
|
||||
typedef unsigned int uint32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef unsigned short half;
|
||||
typedef unsigned long long CUtexObject;
|
||||
|
||||
#define FLT_MIN 1.175494350822287507969e-38f
|
||||
#define FLT_MAX 340282346638528859811704183484516925440.0f
|
||||
|
||||
__device__ half __float2half(const float f)
|
||||
{
|
||||
half val;
|
||||
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
|
||||
return val;
|
||||
}
|
||||
|
||||
/* Selective nodes compilation. */
|
||||
#ifndef __NODES_MAX_GROUP__
|
||||
# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
|
||||
#endif
|
||||
#ifndef __NODES_FEATURES__
|
||||
# define __NODES_FEATURES__ NODE_FEATURE_ALL
|
||||
#endif
|
||||
|
||||
#define ccl_device \
|
||||
__device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
|
||||
#define ccl_device_inline ccl_device
|
||||
#define ccl_device_forceinline ccl_device
|
||||
#define ccl_device_noinline __device__ __noinline__
|
||||
#define ccl_device_noinline_cpu ccl_device
|
||||
#define ccl_global
|
||||
#define ccl_static_constant __constant__
|
||||
#define ccl_constant const
|
||||
#define ccl_local
|
||||
#define ccl_local_param
|
||||
#define ccl_private
|
||||
#define ccl_may_alias
|
||||
#define ccl_addr_space
|
||||
#define ccl_restrict __restrict__
|
||||
#define ccl_ref
|
||||
#define ccl_align(n) __align__(n)
|
||||
|
||||
// Zero initialize structs to help the compiler figure out scoping
|
||||
#define ccl_optional_struct_init = {}
|
||||
|
||||
#define kernel_data __params.data // See kernel_globals.h
|
||||
#define kernel_tex_array(t) __params.t
|
||||
#define kernel_tex_fetch(t, index) __params.t[(index)]
|
||||
|
||||
#define kernel_assert(cond)
|
||||
|
||||
/* Types */
|
||||
|
||||
#include "util/util_half.h"
|
||||
#include "util/util_types.h"
|
||||
|
||||
#endif /* __KERNEL_COMPAT_OPTIX_H__ */
|
@ -90,12 +90,43 @@ typedef struct KernelGlobals {
|
||||
|
||||
#endif /* __KERNEL_CPU__ */
|
||||
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
|
||||
typedef struct ShaderParams {
|
||||
uint4 *input;
|
||||
float4 *output;
|
||||
int type;
|
||||
int filter;
|
||||
int sx;
|
||||
int offset;
|
||||
int sample;
|
||||
} ShaderParams;
|
||||
|
||||
typedef struct KernelParams {
|
||||
WorkTile tile;
|
||||
KernelData data;
|
||||
ShaderParams shader;
|
||||
# define KERNEL_TEX(type, name) const type *name;
|
||||
# include "kernel/kernel_textures.h"
|
||||
} KernelParams;
|
||||
|
||||
typedef struct KernelGlobals {
|
||||
# ifdef __VOLUME__
|
||||
VolumeState volume_state;
|
||||
# endif
|
||||
Intersection hits_stack[64];
|
||||
} KernelGlobals;
|
||||
|
||||
extern "C" __constant__ KernelParams __params;
|
||||
|
||||
#else /* __KERNEL_OPTIX__ */
|
||||
|
||||
/* For CUDA, constant memory textures must be globals, so we can't put them
|
||||
* into a struct. As a result we don't actually use this struct and use actual
|
||||
* globals and simply pass along a NULL pointer everywhere, which we hope gets
|
||||
* optimized out. */
|
||||
|
||||
#ifdef __KERNEL_CUDA__
|
||||
# ifdef __KERNEL_CUDA__
|
||||
|
||||
__constant__ KernelData __data;
|
||||
typedef struct KernelGlobals {
|
||||
@ -103,10 +134,12 @@ typedef struct KernelGlobals {
|
||||
Intersection hits_stack[64];
|
||||
} KernelGlobals;
|
||||
|
||||
# define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
|
||||
# include "kernel/kernel_textures.h"
|
||||
# define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
|
||||
# include "kernel/kernel_textures.h"
|
||||
|
||||
#endif /* __KERNEL_CUDA__ */
|
||||
# endif /* __KERNEL_CUDA__ */
|
||||
|
||||
#endif /* __KERNEL_OPTIX__ */
|
||||
|
||||
/* OpenCL */
|
||||
|
||||
|
@ -326,13 +326,19 @@ ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
|
||||
return true;
|
||||
}
|
||||
|
||||
ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space PathState *state,
|
||||
float3 throughput,
|
||||
float3 ao_alpha)
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
ccl_device_inline /* inline trace calls */
|
||||
#else
|
||||
ccl_device_noinline
|
||||
#endif
|
||||
void
|
||||
kernel_path_ao(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space PathState *state,
|
||||
float3 throughput,
|
||||
float3 ao_alpha)
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_AO);
|
||||
|
||||
@ -655,9 +661,11 @@ ccl_device void kernel_path_trace(
|
||||
|
||||
kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
|
||||
|
||||
# ifndef __KERNEL_OPTIX__
|
||||
if (ray.t == 0.0f) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
/* Initialize state. */
|
||||
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
|
||||
@ -671,6 +679,13 @@ ccl_device void kernel_path_trace(
|
||||
PathState state;
|
||||
path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
|
||||
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
/* Force struct into local memory to avoid costly spilling on trace calls. */
|
||||
if (pass_stride < 0) /* This is never executed and just prevents the compiler from doing SROA. */
|
||||
for (int i = 0; i < sizeof(L); ++i)
|
||||
reinterpret_cast<unsigned char *>(&L)[-pass_stride + i] = 0;
|
||||
# endif
|
||||
|
||||
/* Integrate. */
|
||||
kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd);
|
||||
|
||||
|
@ -48,10 +48,16 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd
|
||||
}
|
||||
#endif
|
||||
|
||||
ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const Intersection *isect,
|
||||
const Ray *ray)
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
ccl_device_inline
|
||||
#else
|
||||
ccl_device_noinline
|
||||
#endif
|
||||
void
|
||||
shader_setup_from_ray(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const Intersection *isect,
|
||||
const Ray *ray)
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
|
||||
|
||||
|
@ -17,13 +17,6 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __VOLUME__
|
||||
typedef struct VolumeState {
|
||||
# ifdef __SPLIT_KERNEL__
|
||||
# else
|
||||
PathState ps;
|
||||
# endif
|
||||
} VolumeState;
|
||||
|
||||
/* Get PathState ready for use for volume stack evaluation. */
|
||||
# ifdef __SPLIT_KERNEL__
|
||||
ccl_addr_space
|
||||
@ -55,16 +48,15 @@ ccl_addr_space
|
||||
/* Attenuate throughput accordingly to the given intersection event.
|
||||
* Returns true if the throughput is zero and traversal can be aborted.
|
||||
*/
|
||||
ccl_device_forceinline bool shadow_handle_transparent_isect(
|
||||
KernelGlobals *kg,
|
||||
ShaderData *shadow_sd,
|
||||
ccl_addr_space PathState *state,
|
||||
ccl_device_forceinline bool shadow_handle_transparent_isect(KernelGlobals *kg,
|
||||
ShaderData *shadow_sd,
|
||||
ccl_addr_space PathState *state,
|
||||
#ifdef __VOLUME__
|
||||
ccl_addr_space struct PathState *volume_state,
|
||||
ccl_addr_space PathState *volume_state,
|
||||
#endif
|
||||
Intersection *isect,
|
||||
Ray *ray,
|
||||
float3 *throughput)
|
||||
Intersection *isect,
|
||||
Ray *ray,
|
||||
float3 *throughput)
|
||||
{
|
||||
#ifdef __VOLUME__
|
||||
/* Attenuation between last surface and next surface. */
|
||||
@ -163,7 +155,11 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
|
||||
uint num_hits;
|
||||
const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits);
|
||||
# ifdef __VOLUME__
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
VolumeState &volume_state = kg->volume_state;
|
||||
# else
|
||||
VolumeState volume_state;
|
||||
# endif
|
||||
# endif
|
||||
/* If no opaque surface found but we did find transparent hits,
|
||||
* shade them.
|
||||
@ -302,7 +298,11 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
|
||||
float3 *shadow)
|
||||
{
|
||||
# ifdef __VOLUME__
|
||||
# ifdef __KERNEL_OPTIX__
|
||||
VolumeState &volume_state = kg->volume_state;
|
||||
# else
|
||||
VolumeState volume_state;
|
||||
# endif
|
||||
# endif
|
||||
if (blocked && is_transparent_isect) {
|
||||
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
|
||||
@ -387,32 +387,38 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *shadow_sd,
|
||||
ccl_addr_space PathState *state,
|
||||
Ray *ray_input,
|
||||
Ray *ray,
|
||||
float3 *shadow)
|
||||
{
|
||||
Ray *ray = ray_input;
|
||||
Intersection isect;
|
||||
/* Some common early checks. */
|
||||
*shadow = make_float3(1.0f, 1.0f, 1.0f);
|
||||
#if !defined(__KERNEL_OPTIX__)
|
||||
/* Some common early checks.
|
||||
* Avoid conditional trace call in OptiX though, since those hurt performance there.
|
||||
*/
|
||||
if (ray->t == 0.0f) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
#ifdef __SHADOW_TRICKS__
|
||||
const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER :
|
||||
PATH_RAY_SHADOW;
|
||||
#else
|
||||
const uint visibility = PATH_RAY_SHADOW;
|
||||
#endif
|
||||
/* Do actual shadow shading. */
|
||||
/* First of all, we check if integrator requires transparent shadows.
|
||||
/* Do actual shadow shading.
|
||||
* First of all, we check if integrator requires transparent shadows.
|
||||
* if not, we use simplest and fastest ever way to calculate occlusion.
|
||||
* Do not do this in OptiX to avoid the additional trace call.
|
||||
*/
|
||||
#ifdef __TRANSPARENT_SHADOWS__
|
||||
#if !defined(__KERNEL_OPTIX__) || !defined(__TRANSPARENT_SHADOWS__)
|
||||
Intersection isect;
|
||||
# ifdef __TRANSPARENT_SHADOWS__
|
||||
if (!kernel_data.integrator.transparent_shadows)
|
||||
#endif
|
||||
# endif
|
||||
{
|
||||
return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow);
|
||||
}
|
||||
#endif
|
||||
#ifdef __TRANSPARENT_SHADOWS__
|
||||
# ifdef __SHADOW_RECORD_ALL__
|
||||
/* For the transparent shadows we try to use record-all logic on the
|
||||
@ -426,7 +432,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
|
||||
return true;
|
||||
}
|
||||
const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
|
||||
# ifdef __KERNEL_GPU__
|
||||
# if defined(__KERNEL_GPU__) && !defined(__KERNEL_OPTIX__)
|
||||
/* On GPU we do tricky with tracing opaque ray first, this avoids speed
|
||||
* regressions in some files.
|
||||
*
|
||||
|
@ -353,13 +353,19 @@ ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc,
|
||||
*weight = safe_divide_color(bssrdf->weight, A);
|
||||
}
|
||||
|
||||
ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg,
|
||||
LocalIntersection *ss_isect,
|
||||
ShaderData *sd,
|
||||
ccl_addr_space PathState *state,
|
||||
const ShaderClosure *sc,
|
||||
const float bssrdf_u,
|
||||
const float bssrdf_v)
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
ccl_device_inline /* inline trace calls */
|
||||
#else
|
||||
ccl_device_noinline
|
||||
#endif
|
||||
bool
|
||||
subsurface_random_walk(KernelGlobals *kg,
|
||||
LocalIntersection *ss_isect,
|
||||
ShaderData *sd,
|
||||
ccl_addr_space PathState *state,
|
||||
const ShaderClosure *sc,
|
||||
const float bssrdf_u,
|
||||
const float bssrdf_v)
|
||||
{
|
||||
/* Sample diffuse surface scatter into the object. */
|
||||
float3 D;
|
||||
|
@ -143,6 +143,13 @@ CCL_NAMESPACE_BEGIN
|
||||
# endif
|
||||
#endif /* __KERNEL_CUDA__ */
|
||||
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
# undef __BAKING__
|
||||
# undef __BRANCHED_PATH__
|
||||
/* TODO(pmours): Cannot use optixTrace in non-inlined functions */
|
||||
# undef __SHADER_RAYTRACE__
|
||||
#endif /* __KERNEL_OPTIX__ */
|
||||
|
||||
#ifdef __KERNEL_OPENCL__
|
||||
#endif /* __KERNEL_OPENCL__ */
|
||||
|
||||
@ -1056,6 +1063,15 @@ typedef struct PathState {
|
||||
#endif
|
||||
} PathState;
|
||||
|
||||
#ifdef __VOLUME__
|
||||
typedef struct VolumeState {
|
||||
# ifdef __SPLIT_KERNEL__
|
||||
# else
|
||||
PathState ps;
|
||||
# endif
|
||||
} VolumeState;
|
||||
#endif
|
||||
|
||||
/* Struct to gather multiple nearby intersections. */
|
||||
typedef struct LocalIntersection {
|
||||
Ray ray;
|
||||
@ -1343,9 +1359,12 @@ typedef enum KernelBVHLayout {
|
||||
BVH_LAYOUT_BVH2 = (1 << 0),
|
||||
BVH_LAYOUT_BVH4 = (1 << 1),
|
||||
BVH_LAYOUT_BVH8 = (1 << 2),
|
||||
|
||||
BVH_LAYOUT_EMBREE = (1 << 3),
|
||||
BVH_LAYOUT_OPTIX = (1 << 4),
|
||||
|
||||
BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
|
||||
BVH_LAYOUT_ALL = (unsigned int)(-1),
|
||||
BVH_LAYOUT_ALL = (unsigned int)(~0u),
|
||||
} KernelBVHLayout;
|
||||
|
||||
typedef struct KernelBVH {
|
||||
@ -1357,14 +1376,18 @@ typedef struct KernelBVH {
|
||||
int bvh_layout;
|
||||
int use_bvh_steps;
|
||||
|
||||
/* Embree */
|
||||
#ifdef __EMBREE__
|
||||
RTCScene scene;
|
||||
# ifndef __KERNEL_64_BIT__
|
||||
int pad1;
|
||||
# endif
|
||||
/* Custom BVH */
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
OptixTraversableHandle scene;
|
||||
#else
|
||||
int pad1, pad2;
|
||||
# ifdef __EMBREE__
|
||||
RTCScene scene;
|
||||
# ifndef __KERNEL_64_BIT__
|
||||
int pad2;
|
||||
# endif
|
||||
# else
|
||||
int scene, pad2;
|
||||
# endif
|
||||
#endif
|
||||
} KernelBVH;
|
||||
static_assert_align(KernelBVH, 16);
|
||||
|
294
intern/cycles/kernel/kernels/optix/kernel_optix.cu
Normal file
294
intern/cycles/kernel/kernels/optix/kernel_optix.cu
Normal file
@ -0,0 +1,294 @@
|
||||
/*
|
||||
* Copyright 2019, NVIDIA Corporation.
|
||||
* Copyright 2019, Blender Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/kernel_compat_optix.h"
|
||||
#include "util/util_atomic.h"
|
||||
#include "kernel/kernel_types.h"
|
||||
#include "kernel/kernel_globals.h"
|
||||
#include "../cuda/kernel_cuda_image.h" // Texture lookup uses normal CUDA intrinsics
|
||||
|
||||
#include "kernel/kernel_path.h"
|
||||
#include "kernel/kernel_bake.h"
|
||||
|
||||
template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
|
||||
{
|
||||
return (T *)(((uint64_t)optixGetPayload_1() << 32) | optixGetPayload_0());
|
||||
}
|
||||
template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
|
||||
{
|
||||
return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2());
|
||||
}
|
||||
|
||||
template<bool always = false> ccl_device_forceinline uint get_object_id()
|
||||
{
|
||||
#ifdef __OBJECT_MOTION__
|
||||
// Always get the the instance ID from the TLAS
|
||||
// There might be a motion transform node between TLAS and BLAS which does not have one
|
||||
uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
|
||||
#else
|
||||
uint object = optixGetInstanceId();
|
||||
#endif
|
||||
// Choose between always returning object ID or only for instances
|
||||
if (always)
|
||||
// Can just remove the high bit since instace always contains object ID
|
||||
return object & 0x7FFFFF;
|
||||
// Set to OBJECT_NONE if this is not an instanced object
|
||||
else if (object & 0x800000)
|
||||
object = OBJECT_NONE;
|
||||
return object;
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_path_trace()
|
||||
{
|
||||
KernelGlobals kg; // Allocate stack storage for common data
|
||||
|
||||
const uint3 launch_index = optixGetLaunchIndex();
|
||||
// Keep threads for same pixel together to improve occupancy of warps
|
||||
uint pixel_offset = launch_index.x / __params.tile.num_samples;
|
||||
uint sample_offset = launch_index.x % __params.tile.num_samples;
|
||||
|
||||
kernel_path_trace(&kg,
|
||||
__params.tile.buffer,
|
||||
__params.tile.start_sample + sample_offset,
|
||||
__params.tile.x + pixel_offset,
|
||||
__params.tile.y + launch_index.y,
|
||||
__params.tile.offset,
|
||||
__params.tile.stride);
|
||||
}
|
||||
|
||||
#ifdef __BAKING__
|
||||
extern "C" __global__ void __raygen__kernel_optix_bake()
|
||||
{
|
||||
KernelGlobals kg;
|
||||
const ShaderParams &p = __params.shader;
|
||||
kernel_bake_evaluate(&kg,
|
||||
p.input,
|
||||
p.output,
|
||||
(ShaderEvalType)p.type,
|
||||
p.filter,
|
||||
p.sx + optixGetLaunchIndex().x,
|
||||
p.offset,
|
||||
p.sample);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_displace()
|
||||
{
|
||||
KernelGlobals kg;
|
||||
const ShaderParams &p = __params.shader;
|
||||
kernel_displace_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_background()
|
||||
{
|
||||
KernelGlobals kg;
|
||||
const ShaderParams &p = __params.shader;
|
||||
kernel_background_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __miss__kernel_optix_miss()
|
||||
{
|
||||
// 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss
|
||||
optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
|
||||
optixSetPayload_5(PRIMITIVE_NONE);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_local_hit()
|
||||
{
|
||||
#ifdef __BVH_LOCAL__
|
||||
const uint object = get_object_id<true>();
|
||||
if (object != optixGetPayload_4() /* local_object */) {
|
||||
// Only intersect with matching object
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
int hit = 0;
|
||||
uint *const lcg_state = get_payload_ptr_0<uint>();
|
||||
LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
|
||||
|
||||
if (lcg_state) {
|
||||
const uint max_hits = optixGetPayload_5();
|
||||
for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (optixGetRayTmax() == local_isect->hits[i].t) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
|
||||
hit = local_isect->num_hits++;
|
||||
|
||||
if (local_isect->num_hits > max_hits) {
|
||||
hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
|
||||
if (hit >= max_hits) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
|
||||
// Record closest intersection only (do not terminate ray here, since there is no guarantee
|
||||
// about distance ordering in anyhit)
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
Intersection *isect = &local_isect->hits[hit];
|
||||
isect->t = optixGetRayTmax();
|
||||
isect->prim = optixGetPrimitiveIndex();
|
||||
isect->object = get_object_id();
|
||||
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
|
||||
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
isect->u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
isect->v = barycentrics.x;
|
||||
}
|
||||
else {
|
||||
isect->u = __uint_as_float(optixGetAttribute_0());
|
||||
isect->v = __uint_as_float(optixGetAttribute_1());
|
||||
}
|
||||
|
||||
// Record geometric normal
|
||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
|
||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
|
||||
const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
|
||||
const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
|
||||
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||
|
||||
// Continue tracing (without this the trace call would return after the first hit)
|
||||
optixIgnoreIntersection();
|
||||
#endif
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
|
||||
{
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
const uint prim = optixGetPrimitiveIndex();
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
# endif
|
||||
|
||||
// Offset into array with num_hits
|
||||
Intersection *const isect = get_payload_ptr_0<Intersection>() + optixGetPayload_2();
|
||||
isect->t = optixGetRayTmax();
|
||||
isect->prim = prim;
|
||||
isect->object = get_object_id();
|
||||
isect->type = kernel_tex_fetch(__prim_type, prim);
|
||||
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
isect->u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
isect->v = barycentrics.x;
|
||||
}
|
||||
else {
|
||||
isect->u = __uint_as_float(optixGetAttribute_0());
|
||||
isect->v = __uint_as_float(optixGetAttribute_1());
|
||||
}
|
||||
|
||||
# ifdef __TRANSPARENT_SHADOWS__
|
||||
// Detect if this surface has a shader with transparent shadows
|
||||
if (!shader_transparent_shadow(NULL, isect) || optixGetPayload_2() >= optixGetPayload_3()) {
|
||||
# endif
|
||||
// This is an opaque hit or the hit limit has been reached, abort traversal
|
||||
optixSetPayload_5(true);
|
||||
return optixTerminateRay();
|
||||
# ifdef __TRANSPARENT_SHADOWS__
|
||||
}
|
||||
|
||||
// TODO(pmours): Do we need REQUIRE_UNIQUE_ANYHIT for this to work?
|
||||
optixSetPayload_2(optixGetPayload_2() + 1); // num_hits++
|
||||
|
||||
// Continue tracing
|
||||
optixIgnoreIntersection();
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
|
||||
{
|
||||
uint visibility = optixGetPayload_4();
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
const uint prim = optixGetPrimitiveIndex();
|
||||
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0)
|
||||
return optixIgnoreIntersection();
|
||||
#endif
|
||||
|
||||
// Shadow ray early termination
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE)
|
||||
return optixTerminateRay();
|
||||
}
|
||||
|
||||
extern "C" __global__ void __closesthit__kernel_optix_hit()
|
||||
{
|
||||
optixSetPayload_0(__float_as_uint(optixGetRayTmax())); // Intersection distance
|
||||
optixSetPayload_3(optixGetPrimitiveIndex());
|
||||
optixSetPayload_4(get_object_id());
|
||||
// Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index
|
||||
optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));
|
||||
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
|
||||
optixSetPayload_2(__float_as_uint(barycentrics.x));
|
||||
}
|
||||
else {
|
||||
optixSetPayload_1(optixGetAttribute_0());
|
||||
optixSetPayload_2(optixGetAttribute_1());
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __HAIR__
|
||||
extern "C" __global__ void __intersection__curve()
|
||||
{
|
||||
const uint prim = optixGetPrimitiveIndex();
|
||||
const uint object = get_object_id<true>();
|
||||
const uint type = kernel_tex_fetch(__prim_type, prim);
|
||||
const uint visibility = optixGetPayload_4();
|
||||
|
||||
const float3 P = optixGetObjectRayOrigin();
|
||||
const float3 dir = optixGetObjectRayDirection();
|
||||
|
||||
# ifdef __OBJECT_MOTION__
|
||||
const float time = optixGetRayTime();
|
||||
# else
|
||||
const float time = 0.0f;
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = optixGetRayTmax();
|
||||
|
||||
if (!(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) ?
|
||||
curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type) :
|
||||
cardinal_curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) {
|
||||
optixReportIntersection(isect.t,
|
||||
type & PRIMITIVE_ALL,
|
||||
__float_as_int(isect.u), // Attribute_0
|
||||
__float_as_int(isect.v)); // Attribute_1
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
extern "C" __global__ void __exception__kernel_optix_exception()
|
||||
{
|
||||
printf("Unhandled exception occured: code %d!\n", optixGetExceptionCode());
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user