diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h index 9d8ad6f3072..d033fb1d145 100644 --- a/intern/cycles/kernel/kernel_bvh.h +++ b/intern/cycles/kernel/kernel_bvh.h @@ -87,7 +87,7 @@ __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray * __device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) { Transform itfm; - *tfm = object_fetch_transform_motion(kg, object, ray->time, &itfm); + *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); *P = transform_point(&itfm, ray->P); @@ -104,9 +104,8 @@ __device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, con __device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) { - if(*t != FLT_MAX) { + if(*t != FLT_MAX) *t *= len(transform_direction(tfm, 1.0f/(*idir))); - } *P = ray->P; *idir = bvh_inverse_direction(ray->D); @@ -163,7 +162,7 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg, /* Sven Woop's algorithm */ __device_inline void bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, uint visibility, int object, int triAddr, Transform *tfm) + float3 P, float3 idir, uint visibility, int object, int triAddr) { /* compute and check intersection t-value */ float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); @@ -285,7 +284,7 @@ __device_inline bool bvh_intersect(KernelGlobals *kg, const Ray *ray, const uint /* triangle intersection */ while(primAddr < primAddr2) { /* intersect ray against triangle */ - bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr, NULL); + bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr); /* shadow ray early termination */ if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0) @@ -405,7 +404,7 @@ __device_inline bool bvh_intersect_motion(KernelGlobals *kg, const Ray *ray, con /* triangle intersection */ while(primAddr < primAddr2) { /* intersect ray against triangle */ - bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr, &ob_tfm); + bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr); /* shadow ray early termination */ if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0) @@ -444,7 +443,8 @@ __device_inline bool bvh_intersect_motion(KernelGlobals *kg, const Ray *ray, con __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) { -#ifdef __OBJECT_MOTION__ + /* todo: fix cuda sm 2.0 motion blur */ +#if defined(__OBJECT_MOTION__) && (!defined(__KERNEL_CUDA) || (__CUDA_ARCH__ >= 210)) if(kernel_data.bvh.have_motion) return bvh_intersect_motion(kg, ray, visibility, isect); else diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 08674d0e379..1b2fe8c56ee 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -217,7 +217,7 @@ __device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, flo if(kernel_data.cam.shuttertime == 0.0f) ray->time = TIME_INVALID; else - ray->time = 0.5f + (time - 0.5f)*kernel_data.cam.shuttertime; + ray->time = 0.5f + 0.5f*(time - 0.5f)*kernel_data.cam.shuttertime; #endif /* sample */ diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_displace.h index 6461a1eea38..a55f7a7fd75 100644 --- a/intern/cycles/kernel/kernel_displace.h +++ b/intern/cycles/kernel/kernel_displace.h @@ -47,6 +47,9 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou ray.P = make_float3(0.0f, 0.0f, 0.0f); ray.D = equirectangular_to_direction(u, v); ray.t = 0.0f; +#ifdef __CAMERA_MOTION__ + ray.time = 0.5f; +#endif #ifdef __RAY_DIFFERENTIALS__ ray.dD.dx = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 53d53b4bedd..75b6df5f08f 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -34,6 +34,9 @@ __device float3 direct_emissive_eval(KernelGlobals *kg, float rando, ray.P = ls->P; ray.dP.dx = make_float3(0.0f, 0.0f, 0.0f); ray.dP.dy = make_float3(0.0f, 0.0f, 0.0f); +#ifdef __CAMERA_MOTION__ + ray.time = time; +#endif shader_setup_from_background(kg, &sd, &ray); eval = shader_eval_background(kg, &sd, 0); } diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 4bb17c0bd5a..2791b3abbb6 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -303,7 +303,7 @@ __device void triangle_light_sample(KernelGlobals *kg, int prim, int object, if(ls->object >= 0) { #ifdef __OBJECT_MOTION__ Transform itfm; - Transform tfm = object_fetch_transform_motion(kg, ls->object, time, &itfm); + Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm); #else Transform tfm = object_fetch_transform(kg, ls->object, OBJECT_TRANSFORM); Transform itfm = object_fetch_transform(kg, ls->object, OBJECT_INVERSE_TRANSFORM); diff --git a/intern/cycles/kernel/kernel_object.h b/intern/cycles/kernel/kernel_object.h index 79ff7e2020a..2fa9443766e 100644 --- a/intern/cycles/kernel/kernel_object.h +++ b/intern/cycles/kernel/kernel_object.h @@ -25,7 +25,7 @@ enum ObjectTransform { OBJECT_TRANSFORM_MOTION_PRE = 8, OBJECT_TRANSFORM_MOTION_MID = 12, OBJECT_TRANSFORM_MOTION_POST = 16, - OBJECT_DUPLI = 18 + OBJECT_DUPLI = 20 }; __device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type) @@ -42,49 +42,53 @@ __device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, } #ifdef __OBJECT_MOTION__ -__device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time, Transform *itfm) +__device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time) { + MotionTransform motion; + + int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE; + + motion.pre.x = kernel_tex_fetch(__objects, offset + 0); + motion.pre.y = kernel_tex_fetch(__objects, offset + 1); + motion.pre.z = kernel_tex_fetch(__objects, offset + 2); + motion.pre.w = kernel_tex_fetch(__objects, offset + 3); + + motion.mid.x = kernel_tex_fetch(__objects, offset + 4); + motion.mid.y = kernel_tex_fetch(__objects, offset + 5); + motion.mid.z = kernel_tex_fetch(__objects, offset + 6); + motion.mid.w = kernel_tex_fetch(__objects, offset + 7); + + motion.post.x = kernel_tex_fetch(__objects, offset + 8); + motion.post.y = kernel_tex_fetch(__objects, offset + 9); + motion.post.z = kernel_tex_fetch(__objects, offset + 10); + motion.post.w = kernel_tex_fetch(__objects, offset + 11); + Transform tfm; - - int object_flag = kernel_tex_fetch(__object_flag, object); - - /* if we do motion blur */ - if(object_flag & SD_OBJECT_MOTION) { - /* fetch motion transforms */ - MotionTransform motion; - - int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE; - - motion.pre.x = kernel_tex_fetch(__objects, offset + 0); - motion.pre.y = kernel_tex_fetch(__objects, offset + 1); - motion.pre.z = kernel_tex_fetch(__objects, offset + 2); - motion.pre.w = kernel_tex_fetch(__objects, offset + 3); - - motion.mid.x = kernel_tex_fetch(__objects, offset + 4); - motion.mid.y = kernel_tex_fetch(__objects, offset + 5); - motion.mid.z = kernel_tex_fetch(__objects, offset + 6); - motion.mid.w = kernel_tex_fetch(__objects, offset + 7); - - motion.post.x = kernel_tex_fetch(__objects, offset + 8); - motion.post.y = kernel_tex_fetch(__objects, offset + 9); - motion.post.z = kernel_tex_fetch(__objects, offset + 10); - motion.post.w = kernel_tex_fetch(__objects, offset + 11); - - transform_motion_interpolate(&tfm, &motion, time); - - /* invert */ - if(itfm) - *itfm = transform_quick_inverse(tfm); - } - else { - tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); - - if(itfm) - *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - } + transform_motion_interpolate(&tfm, &motion, time); return tfm; } + +__device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm) +{ + int object_flag = kernel_tex_fetch(__object_flag, object); + + if(object_flag & SD_OBJECT_MOTION) { + /* if we do motion blur */ + Transform tfm = object_fetch_transform_motion(kg, object, time); + + if(itfm) + *itfm = transform_quick_inverse(tfm); + + return tfm; + } + else { + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + + return tfm; + } +} #endif __device_inline void object_position_transform(KernelGlobals *kg, ShaderData *sd, float3 *P) @@ -271,6 +275,5 @@ __device float3 particle_angular_velocity(KernelGlobals *kg, int particle) return make_float3(f3.z, f3.w, f4.x); } - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 36f7122a380..814c32dfbd3 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -43,6 +43,22 @@ CCL_NAMESPACE_BEGIN /* ShaderData setup from incoming ray */ +#ifdef __OBJECT_MOTION__ +__device_noinline void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) +{ + /* note that this is a separate non-inlined function to work around crash + * on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */ + if(sd->flag & SD_OBJECT_MOTION) { + sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); + sd->ob_itfm= transform_quick_inverse(sd->ob_tfm); + } + else { + sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + } +} +#endif + __device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) { @@ -72,14 +88,7 @@ __device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, /* matrices and time */ #ifdef __OBJECT_MOTION__ - if(sd->flag & SD_OBJECT_MOTION) { - sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, ray->time, &sd->ob_itfm); - } - else { - sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - } - + shader_setup_object_transforms(kg, sd, ray->time); sd->time = ray->time; #endif @@ -181,13 +190,7 @@ __device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, sd->flag |= kernel_tex_fetch(__object_flag, sd->object); #ifdef __OBJECT_MOTION__ - if(sd->flag & SD_OBJECT_MOTION) { - sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time, &sd->ob_itfm); - } - else { - sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - } + shader_setup_object_transforms(kg, sd, time); } sd->time = time; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 0b87a530725..25b4d1f08cc 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -64,8 +64,8 @@ void Object::compute_bounds(bool motion_blur, float shuttertime) /* todo: this is really terrible. according to pbrt there is a better * way to find this iteratively, but did not find implementation yet * or try to implement myself */ - float start_t = 0.5f - shuttertime*0.5f; - float end_t = 0.5f - shuttertime*0.5f; + float start_t = 0.5f - shuttertime*0.25f; + float end_t = 0.5f + shuttertime*0.25f; for(float t = start_t; t < end_t; t += (1.0f/128.0f)*shuttertime) { Transform ttfm;