Attempts to fix CUDA issues on sm 2.0 cards, still no luck getting motion blur

working, but this should make it not crash.

Also fix for wrong shutter time, should have been shorter.
This commit is contained in:
Brecht Van Lommel 2012-10-17 22:48:29 +00:00
parent 431caff869
commit 6915394a3b
8 changed files with 78 additions and 66 deletions

@ -87,7 +87,7 @@ __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *
__device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax)
{
Transform itfm;
*tfm = object_fetch_transform_motion(kg, object, ray->time, &itfm);
*tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
*P = transform_point(&itfm, ray->P);
@ -104,9 +104,8 @@ __device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, con
__device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax)
{
if(*t != FLT_MAX) {
if(*t != FLT_MAX)
*t *= len(transform_direction(tfm, 1.0f/(*idir)));
}
*P = ray->P;
*idir = bvh_inverse_direction(ray->D);
@ -163,7 +162,7 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg,
/* Sven Woop's algorithm */
__device_inline void bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect,
float3 P, float3 idir, uint visibility, int object, int triAddr, Transform *tfm)
float3 P, float3 idir, uint visibility, int object, int triAddr)
{
/* compute and check intersection t-value */
float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
@ -285,7 +284,7 @@ __device_inline bool bvh_intersect(KernelGlobals *kg, const Ray *ray, const uint
/* triangle intersection */
while(primAddr < primAddr2) {
/* intersect ray against triangle */
bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr, NULL);
bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
/* shadow ray early termination */
if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0)
@ -405,7 +404,7 @@ __device_inline bool bvh_intersect_motion(KernelGlobals *kg, const Ray *ray, con
/* triangle intersection */
while(primAddr < primAddr2) {
/* intersect ray against triangle */
bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr, &ob_tfm);
bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
/* shadow ray early termination */
if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0)
@ -444,7 +443,8 @@ __device_inline bool bvh_intersect_motion(KernelGlobals *kg, const Ray *ray, con
__device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
{
#ifdef __OBJECT_MOTION__
/* todo: fix cuda sm 2.0 motion blur */
#if defined(__OBJECT_MOTION__) && (!defined(__KERNEL_CUDA) || (__CUDA_ARCH__ >= 210))
if(kernel_data.bvh.have_motion)
return bvh_intersect_motion(kg, ray, visibility, isect);
else

@ -217,7 +217,7 @@ __device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, flo
if(kernel_data.cam.shuttertime == 0.0f)
ray->time = TIME_INVALID;
else
ray->time = 0.5f + (time - 0.5f)*kernel_data.cam.shuttertime;
ray->time = 0.5f + 0.5f*(time - 0.5f)*kernel_data.cam.shuttertime;
#endif
/* sample */

@ -47,6 +47,9 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou
ray.P = make_float3(0.0f, 0.0f, 0.0f);
ray.D = equirectangular_to_direction(u, v);
ray.t = 0.0f;
#ifdef __CAMERA_MOTION__
ray.time = 0.5f;
#endif
#ifdef __RAY_DIFFERENTIALS__
ray.dD.dx = make_float3(0.0f, 0.0f, 0.0f);

@ -34,6 +34,9 @@ __device float3 direct_emissive_eval(KernelGlobals *kg, float rando,
ray.P = ls->P;
ray.dP.dx = make_float3(0.0f, 0.0f, 0.0f);
ray.dP.dy = make_float3(0.0f, 0.0f, 0.0f);
#ifdef __CAMERA_MOTION__
ray.time = time;
#endif
shader_setup_from_background(kg, &sd, &ray);
eval = shader_eval_background(kg, &sd, 0);
}

@ -303,7 +303,7 @@ __device void triangle_light_sample(KernelGlobals *kg, int prim, int object,
if(ls->object >= 0) {
#ifdef __OBJECT_MOTION__
Transform itfm;
Transform tfm = object_fetch_transform_motion(kg, ls->object, time, &itfm);
Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
#else
Transform tfm = object_fetch_transform(kg, ls->object, OBJECT_TRANSFORM);
Transform itfm = object_fetch_transform(kg, ls->object, OBJECT_INVERSE_TRANSFORM);

@ -25,7 +25,7 @@ enum ObjectTransform {
OBJECT_TRANSFORM_MOTION_PRE = 8,
OBJECT_TRANSFORM_MOTION_MID = 12,
OBJECT_TRANSFORM_MOTION_POST = 16,
OBJECT_DUPLI = 18
OBJECT_DUPLI = 20
};
__device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type)
@ -42,49 +42,53 @@ __device_inline Transform object_fetch_transform(KernelGlobals *kg, int object,
}
#ifdef __OBJECT_MOTION__
__device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time, Transform *itfm)
__device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
{
MotionTransform motion;
int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE;
motion.pre.x = kernel_tex_fetch(__objects, offset + 0);
motion.pre.y = kernel_tex_fetch(__objects, offset + 1);
motion.pre.z = kernel_tex_fetch(__objects, offset + 2);
motion.pre.w = kernel_tex_fetch(__objects, offset + 3);
motion.mid.x = kernel_tex_fetch(__objects, offset + 4);
motion.mid.y = kernel_tex_fetch(__objects, offset + 5);
motion.mid.z = kernel_tex_fetch(__objects, offset + 6);
motion.mid.w = kernel_tex_fetch(__objects, offset + 7);
motion.post.x = kernel_tex_fetch(__objects, offset + 8);
motion.post.y = kernel_tex_fetch(__objects, offset + 9);
motion.post.z = kernel_tex_fetch(__objects, offset + 10);
motion.post.w = kernel_tex_fetch(__objects, offset + 11);
Transform tfm;
int object_flag = kernel_tex_fetch(__object_flag, object);
/* if we do motion blur */
if(object_flag & SD_OBJECT_MOTION) {
/* fetch motion transforms */
MotionTransform motion;
int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE;
motion.pre.x = kernel_tex_fetch(__objects, offset + 0);
motion.pre.y = kernel_tex_fetch(__objects, offset + 1);
motion.pre.z = kernel_tex_fetch(__objects, offset + 2);
motion.pre.w = kernel_tex_fetch(__objects, offset + 3);
motion.mid.x = kernel_tex_fetch(__objects, offset + 4);
motion.mid.y = kernel_tex_fetch(__objects, offset + 5);
motion.mid.z = kernel_tex_fetch(__objects, offset + 6);
motion.mid.w = kernel_tex_fetch(__objects, offset + 7);
motion.post.x = kernel_tex_fetch(__objects, offset + 8);
motion.post.y = kernel_tex_fetch(__objects, offset + 9);
motion.post.z = kernel_tex_fetch(__objects, offset + 10);
motion.post.w = kernel_tex_fetch(__objects, offset + 11);
transform_motion_interpolate(&tfm, &motion, time);
/* invert */
if(itfm)
*itfm = transform_quick_inverse(tfm);
}
else {
tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
if(itfm)
*itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
}
transform_motion_interpolate(&tfm, &motion, time);
return tfm;
}
__device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm)
{
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_MOTION) {
/* if we do motion blur */
Transform tfm = object_fetch_transform_motion(kg, object, time);
if(itfm)
*itfm = transform_quick_inverse(tfm);
return tfm;
}
else {
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
*itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
return tfm;
}
}
#endif
__device_inline void object_position_transform(KernelGlobals *kg, ShaderData *sd, float3 *P)
@ -271,6 +275,5 @@ __device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
return make_float3(f3.z, f3.w, f4.x);
}
CCL_NAMESPACE_END

@ -43,6 +43,22 @@ CCL_NAMESPACE_BEGIN
/* ShaderData setup from incoming ray */
#ifdef __OBJECT_MOTION__
__device_noinline void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
{
/* note that this is a separate non-inlined function to work around crash
* on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
if(sd->flag & SD_OBJECT_MOTION) {
sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
}
else {
sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
}
}
#endif
__device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
const Intersection *isect, const Ray *ray)
{
@ -72,14 +88,7 @@ __device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
/* matrices and time */
#ifdef __OBJECT_MOTION__
if(sd->flag & SD_OBJECT_MOTION) {
sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, ray->time, &sd->ob_itfm);
}
else {
sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
}
shader_setup_object_transforms(kg, sd, ray->time);
sd->time = ray->time;
#endif
@ -181,13 +190,7 @@ __device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
#ifdef __OBJECT_MOTION__
if(sd->flag & SD_OBJECT_MOTION) {
sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time, &sd->ob_itfm);
}
else {
sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
}
shader_setup_object_transforms(kg, sd, time);
}
sd->time = time;

@ -64,8 +64,8 @@ void Object::compute_bounds(bool motion_blur, float shuttertime)
/* todo: this is really terrible. according to pbrt there is a better
* way to find this iteratively, but did not find implementation yet
* or try to implement myself */
float start_t = 0.5f - shuttertime*0.5f;
float end_t = 0.5f - shuttertime*0.5f;
float start_t = 0.5f - shuttertime*0.25f;
float end_t = 0.5f + shuttertime*0.25f;
for(float t = start_t; t < end_t; t += (1.0f/128.0f)*shuttertime) {
Transform ttfm;