diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 3d74c57288b..e10ffb3cf98 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -396,6 +396,8 @@ void BlenderSync::sync_motion(BL::SpaceView3D b_v3d, BL::Object b_override) if(b_override) b_cam = b_override; + Camera prevcam = *(scene->camera); + /* go back and forth one frame */ int frame = b_scene.frame_current(); @@ -411,6 +413,10 @@ void BlenderSync::sync_motion(BL::SpaceView3D b_v3d, BL::Object b_override) } scene_frame_set(b_scene, frame); + + /* tag camera for motion update */ + if(scene->camera->motion_modified(prevcam)) + scene->camera->tag_update(); } CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 00130f357dd..6c63872333d 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -17,6 +17,7 @@ */ #include "background.h" +#include "camera.h" #include "film.h" #include "../render/filter.h" #include "graph.h" @@ -179,6 +180,11 @@ void BlenderSync::sync_integrator() integrator->sample_clamp = get_float(cscene, "sample_clamp"); #ifdef __CAMERA_MOTION__ + if(integrator->motion_blur != r.use_motion_blur()) { + scene->object_manager->tag_update(scene); + scene->camera->tag_update(); + } + integrator->motion_blur = (!preview && r.use_motion_blur()); #endif diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index df8a9b1d5b4..ad1ce1df295 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -31,13 +31,11 @@ set(SRC_HEADERS kernel_globals.h kernel_light.h kernel_math.h - kernel_mbvh.h kernel_montecarlo.h kernel_object.h kernel_passes.h kernel_path.h kernel_projection.h - kernel_qbvh.h kernel_random.h kernel_shader.h kernel_textures.h diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h index 90aec2e46b3..9d8ad6f3072 100644 --- a/intern/cycles/kernel/kernel_bvh.h +++ b/intern/cycles/kernel/kernel_bvh.h @@ -57,7 +57,7 @@ __device_inline float3 bvh_inverse_direction(float3 dir) __device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) { - Transform tfm = object_fetch_transform(kg, object, ray->time, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); *P = transform_point(&tfm, ray->P); @@ -75,7 +75,7 @@ __device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) { if(*t != FLT_MAX) { - Transform tfm = object_fetch_transform(kg, object, ray->time, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); *t *= len(transform_direction(&tfm, 1.0f/(*idir))); } @@ -83,6 +83,36 @@ __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray * *idir = bvh_inverse_direction(ray->D); } +#ifdef __OBJECT_MOTION__ +__device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) +{ + Transform itfm; + *tfm = object_fetch_transform_motion(kg, object, ray->time, &itfm); + + *P = transform_point(&itfm, ray->P); + + float3 dir = transform_direction(&itfm, ray->D); + + float len; + dir = normalize_len(dir, &len); + + *idir = bvh_inverse_direction(dir); + + if(*t != FLT_MAX) + *t *= len; +} + +__device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) +{ + if(*t != FLT_MAX) { + *t *= len(transform_direction(tfm, 1.0f/(*idir))); + } + + *P = ray->P; + *idir = bvh_inverse_direction(ray->D); +} +#endif + /* intersect two bounding boxes */ __device_inline void bvh_node_intersect(KernelGlobals *kg, bool *traverseChild0, bool *traverseChild1, @@ -133,7 +163,7 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg, /* Sven Woop's algorithm */ __device_inline void bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, uint visibility, int object, int triAddr) + float3 P, float3 idir, uint visibility, int object, int triAddr, Transform *tfm) { /* compute and check intersection t-value */ float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); @@ -176,7 +206,7 @@ __device_inline void bvh_triangle_intersect(KernelGlobals *kg, Intersection *ise } } -__device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) +__device_inline bool bvh_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) { /* traversal stack in CUDA thread-local memory */ int traversalStack[BVH_STACK_SIZE]; @@ -255,7 +285,7 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui /* triangle intersection */ while(primAddr < primAddr2) { /* intersect ray against triangle */ - bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr); + bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr, NULL); /* shadow ray early termination */ if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0) @@ -268,7 +298,6 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui else { /* instance push */ object = kernel_tex_fetch(__prim_object, -primAddr-1); - bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax); ++stackPtr; @@ -296,6 +325,133 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui return (isect->prim != ~0); } +#ifdef __OBJECT_MOTION__ +__device_inline bool bvh_intersect_motion(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) +{ + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 idir = bvh_inverse_direction(ray->D); + int object = ~0; + + Transform ob_tfm; + + isect->t = tmax; + isect->object = ~0; + isect->prim = ~0; + isect->u = 0.0f; + isect->v = 0.0f; + + /* traversal loop */ + do { + do + { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) + { + bool traverseChild0, traverseChild1, closestChild1; + int nodeAddrChild1; + + bvh_node_intersect(kg, &traverseChild0, &traverseChild1, + &closestChild1, &nodeAddr, &nodeAddrChild1, + P, idir, isect->t, visibility, nodeAddr); + + if(traverseChild0 != traverseChild1) { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + } + else { + if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + else { + /* both children were intersected, push the farther one */ + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + traversalStack[stackPtr] = nodeAddrChild1; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1)); + int primAddr = __float_as_int(leaf.x); + + if(primAddr >= 0) { + int primAddr2 = __float_as_int(leaf.y); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* triangle intersection */ + while(primAddr < primAddr2) { + /* intersect ray against triangle */ + bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr, &ob_tfm); + + /* shadow ray early termination */ + if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0) + return true; + + primAddr++; + } + } + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax); + + ++stackPtr; + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + } + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + if(stackPtr >= 0) { + kernel_assert(object != ~0); + + /* instance pop */ + bvh_instance_motion_pop(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax); + object = ~0; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return (isect->prim != ~0); +} +#endif + +__device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) + return bvh_intersect_motion(kg, ray, visibility, isect); + else +#endif + return bvh_intersect(kg, ray, visibility, isect); +} + __device_inline float3 ray_offset(float3 P, float3 Ng) { #ifdef __INTERSECTION_REFINE__ @@ -352,7 +508,7 @@ __device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, co #ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_itfm; #else - Transform tfm = object_fetch_transform(kg, isect->object, ray->time, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif P = transform_point(&tfm, P); @@ -373,7 +529,7 @@ __device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, co #ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_tfm; #else - Transform tfm = object_fetch_transform(kg, isect->object, ray->time, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif P = transform_point(&tfm, P); diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 1084415d0cf..4bb17c0bd5a 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -301,8 +301,13 @@ __device void triangle_light_sample(KernelGlobals *kg, int prim, int object, #ifdef __INSTANCING__ /* instance transform */ if(ls->object >= 0) { - Transform tfm = object_fetch_transform(kg, ls->object, time, OBJECT_TRANSFORM); - Transform itfm = object_fetch_transform(kg, ls->object, time, OBJECT_INVERSE_TRANSFORM); +#ifdef __OBJECT_MOTION__ + Transform itfm; + Transform tfm = object_fetch_transform_motion(kg, ls->object, time, &itfm); +#else + Transform tfm = object_fetch_transform(kg, ls->object, OBJECT_TRANSFORM); + Transform itfm = object_fetch_transform(kg, ls->object, OBJECT_INVERSE_TRANSFORM); +#endif ls->P = transform_point(&tfm, ls->P); ls->Ng = normalize(transform_direction_transposed(&itfm, ls->Ng)); diff --git a/intern/cycles/kernel/kernel_mbvh.h b/intern/cycles/kernel/kernel_mbvh.h deleted file mode 100644 index ccbd3d069b4..00000000000 --- a/intern/cycles/kernel/kernel_mbvh.h +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright 2011, Blender Foundation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -CCL_NAMESPACE_BEGIN - -#define MBVH_OBJECT_SENTINEL 0x76543210 -#define MBVH_NODE_SIZE 8 -#define MBVH_STACK_SIZE 1024 -#define MBVH_RAY_STACK_SIZE 10000 - -typedef struct MBVHTask { - int node; - int index; - int num; - int object; -} MBVHTask; - -typedef struct MVBHRay { - float3 P; - float u; - float3 idir; - float v; - float t; - int index; - int object; - - float3 origP; - float3 origD; - float tmax; -} MBVHRay; - -__device float3 mbvh_inverse_direction(float3 dir) -{ - // Avoid divide by zero (ooeps = exp2f(-80.0f)) - float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f; - float3 idir; - - idir.x = 1.0f / (fabsf(dir.x) > ooeps ? dir.x : copysignf(ooeps, dir.x)); - idir.y = 1.0f / (fabsf(dir.y) > ooeps ? dir.y : copysignf(ooeps, dir.y)); - idir.z = 1.0f / (fabsf(dir.z) > ooeps ? dir.z : copysignf(ooeps, dir.z)); - - return idir; -} - -__device void mbvh_instance_push(KernelGlobals *kg, int object, MBVHRay *ray) -{ - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - - ray->P = transform_point(&tfm, ray->origP); - - float3 dir = ray->origD; - - if(ray->t != ray->tmax) dir *= ray->t; - - dir = transform_direction(&tfm, dir); - ray->idir = mbvh_inverse_direction(normalize(dir)); - - if(ray->t != ray->tmax) ray->t = len(dir); -} - -__device void mbvh_instance_pop(KernelGlobals *kg, int object, MBVHRay *ray) -{ - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); - - if(ray->t != ray->tmax) - ray->t = len(transform_direction(&tfm, (1.0f/(ray->idir)) * (ray->t))); - - ray->P = ray->origP; - ray->idir = mbvh_inverse_direction(ray->origD); -} - -/* Sven Woop's algorithm */ -__device void mbvh_triangle_intersect(KernelGlobals *kg, MBVHRay *ray, int object, int triAddr) -{ - float3 P = ray->P; - float3 idir = ray->idir; - - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+1); - float3 dir = 1.0f/idir; - - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < ray->t) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { - /* record intersection */ - ray->index = triAddr; - ray->object = object; - ray->u = u; - ray->v = v; - ray->t = t; - } - } - } -} - -__device void mbvh_node_intersect(KernelGlobals *kg, __m128 *traverseChild, - __m128 *tHit, float3 P, float3 idir, float t, int nodeAddr) -{ - /* X axis */ - const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+0); - const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x)); - const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+1); - const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x)); - - __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps()); - __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t)); - - /* Y axis */ - const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+2); - const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y)); - const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+3); - const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y)); - - tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin); - tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax); - - /* Z axis */ - const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+4); - const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z)); - const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+5); - const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z)); - - tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin); - tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax); - - /* compare and get mask */ - *traverseChild = _mm_cmple_ps(tmin, tmax); - - /* get distance XXX probably wrong */ - *tHit = tmin; -} - -static void mbvh_sort_by_length(int id[4], float len[4]) -{ - for(int i = 1; i < 4; i++) { - int j = i - 1; - - while(j >= 0 && len[j] > len[j+1]) { - swap(len[j], len[j+1]); - swap(id[j], id[j+1]); - j--; - } - } -} - -__device void scene_intersect(KernelGlobals *kg, MBVHRay *rays, int numrays) -{ - /* traversal stacks */ - MBVHTask task_stack[MBVH_STACK_SIZE]; - int active_ray_stacks[4][MBVH_RAY_STACK_SIZE]; - int num_task, num_active[4] = {0, 0, 0, 0}; - __m128i one_mm = _mm_set1_epi32(1); - - /* push root node task on stack */ - task_stack[0].node = kernel_data.bvh.root; - task_stack[0].index = 0; - task_stack[0].num = numrays; - task_stack[0].object = ~0; - num_task = 1; - - /* push all rays in first SIMD lane */ - for(int i = 0; i < numrays; i++) - active_ray_stacks[0][i] = i; - num_active[0] = numrays; - - while(num_task >= 1) { - /* pop task */ - MBVHTask task = task_stack[--num_task]; - - if(task.node == MBVH_OBJECT_SENTINEL) { - /* instance pop */ - - /* pop rays from stack */ - num_active[task.index] -= task.num; - int ray_offset = num_active[task.index]; - - /* transform rays */ - for(int i = 0; i < task.num; i++) { - MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]]; - mbvh_instance_pop(kg, task.object, ray); - } - } - else if(task.node >= 0) { - /* inner node? */ - - /* pop rays from stack*/ - num_active[task.index] -= task.num; - int ray_offset = num_active[task.index]; - - /* initialze simd values */ - __m128i num_active_mm = _mm_load_si128((__m128i*)num_active); - __m128 len_mm = _mm_set_ps1(0.0f); - - for(int i = 0; i < task.num; i++) { - int rayid = active_ray_stacks[task.index][ray_offset + i]; - MVBHRay *ray = rays + rayid; - - /* intersect 4 QBVH node children */ - __m128 result; - __m128 thit; - - mbvh_node_intersect(kg, &result, &thit, ray->P, ray->idir, ray->t, task.node); - - /* update length for sorting */ - len_mm = _mm_add_ps(len_mm, _mm_and_ps(thit, result)); - - /* push rays on stack */ - for(int j = 0; j < 4; j++) - active_ray_stacks[j][num_active[j]] = rayid; - - /* update num active */ - __m128i resulti = _mm_and_si128(*((__m128i*)&result), one_mm); - num_active_mm = _mm_add_epi32(resulti, num_active_mm); - _mm_store_si128((__m128i*)num_active, num_active_mm); - } - - if(num_active[0] || num_active[1] || num_active[2] || num_active[3]) { - /* load child node addresses */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, task.node); - int child[4] = { - __float_as_int(cnodes.x), - __float_as_int(cnodes.y), - __float_as_int(cnodes.z), - __float_as_int(cnodes.w)}; - - /* sort nodes by average intersection distance */ - int ids[4] = {0, 1, 2, 3}; - float len[4]; - - _mm_store_ps(len, len_mm); - mbvh_sort_by_length(ids, len); - - /* push new tasks on stack */ - for(int j = 0; j < 4; j++) { - if(num_active[j]) { - int id = ids[j]; - - task_stack[num_task].node = child[id]; - task_stack[num_task].index = id; - task_stack[num_task].num = num_active[id]; - task_stack[num_task].object = task.object; - num_task++; - } - } - } - } - else { - /* fetch leaf node data */ - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-task.node-1)*MBVH_NODE_SIZE+(MBVH_NODE_SIZE-2)); - int triAddr = __float_as_int(leaf.x); - int triAddr2 = __float_as_int(leaf.y); - - /* pop rays from stack*/ - num_active[task.index] -= task.num; - int ray_offset = num_active[task.index]; - - /* triangles */ - if(triAddr >= 0) { - int i, numq = (task.num >> 2) << 2; - - /* SIMD ray leaf intersection */ - for(i = 0; i < numq; i += 4) { - MBVHRay *ray4[4] = { - &rays[active_ray_stacks[task.index][ray_offset + i + 0]], - &rays[active_ray_stacks[task.index][ray_offset + i + 1]], - &rays[active_ray_stacks[task.index][ray_offset + i + 2]], - &rays[active_ray_stacks[task.index][ray_offset + i + 3]]}; - - /* load SoA */ - - while(triAddr < triAddr2) { - mbvh_triangle_intersect(ray4[0], task.object, task.node); - mbvh_triangle_intersect(ray4[1], task.object, task.node); - mbvh_triangle_intersect(ray4[2], task.object, task.node); - mbvh_triangle_intersect(ray4[3], task.object, task.node); - triAddr++; - - /* some shadow ray optim could be done by setting t=0 */ - } - - /* store AoS */ - } - - /* mono ray leaf intersection */ - for(; i < task.num; i++) { - MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]]; - - while(triAddr < triAddr2) { - mbvh_triangle_intersect(kg, ray, task.object, task.node); - triAddr++; - } - } - } - else { - /* instance push */ - int object = -triAddr-1; - int node = triAddr; - - /* push instance pop task */ - task_stack[num_task].node = MBVH_OBJECT_SENTINEL; - task_stack[num_task].index = task.index; - task_stack[num_task].num = task.num; - task_stack[num_task].object = object; - num_task++; - - num_active[task.index] += task.num; - - /* push node task */ - task_stack[num_task].node = node; - task_stack[num_task].index = task.index; - task_stack[num_task].num = task.num; - task_stack[num_task].object = object; - num_task++; - - for(int i = 0; i < task.num; i++) { - int rayid = active_ray_stacks[task.index][ray_offset + i]; - - /* push on stack for last task */ - active_ray_stacks[task.index][num_active[task.index]] = rayid; - num_active[task.index]++; - - /* transform ray */ - MBVHRay *ray = &rays[rayid]; - mbvh_instance_push(kg, object, ray); - } - } - } - } -} - -__device void mbvh_set_ray(MBVHRay *rays, int i, Ray *ray, float tmax) -{ - MBVHRay *mray = &rays[i]; - - /* ray parameters in registers */ - mray->P = ray->P; - mray->idir = mbvh_inverse_direction(ray->D); - mray->t = tmax; -} - -__device bool mbvh_get_intersection(MVBHRay *rays, int i, Intersection *isect, float tmax) -{ - MBVHRay *mray = &rays[i]; - - if(mray->t == tmax) - return false; - - isect->t = mray->t; - isect->u = mray->u; - isect->v = mray->v; - isect->index = mray->index; - isect->object = mray->object; - - return true; -} - -__device bool mbvh_get_shadow(MBVHRay *rays, int i, float tmax) -{ - return (rays[i].t == tmax); -} - -CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/kernel_object.h b/intern/cycles/kernel/kernel_object.h index ad43120146a..d8ea2cf9926 100644 --- a/intern/cycles/kernel/kernel_object.h +++ b/intern/cycles/kernel/kernel_object.h @@ -27,39 +27,11 @@ enum ObjectTransform { OBJECT_DUPLI = 16 }; -__device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, float time, enum ObjectTransform type) +__device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type) { - Transform tfm; - -#ifdef __OBJECT_MOTION__ - /* if we do motion blur */ - if(sd->flag & SD_OBJECT_MOTION) { - /* fetch motion transforms */ - MotionTransform motion; - - motion.pre.x = have_motion; - motion.pre.y = kernel_tex_fetch(__objects, offset + 1); - motion.pre.z = kernel_tex_fetch(__objects, offset + 2); - motion.pre.w = kernel_tex_fetch(__objects, offset + 3); - - motion.post.x = kernel_tex_fetch(__objects, offset + 4); - motion.post.y = kernel_tex_fetch(__objects, offset + 5); - motion.post.z = kernel_tex_fetch(__objects, offset + 6); - motion.post.w = kernel_tex_fetch(__objects, offset + 7); - - /* interpolate (todo: do only once per object) */ - transform_motion_interpolate(&tfm, &motion, time); - - /* invert */ - if(type == OBJECT_INVERSE_TRANSFORM) - tfm = transform_quick_inverse(tfm); - - return tfm; - } -#endif - int offset = object*OBJECT_SIZE + (int)type; + Transform tfm; tfm.x = kernel_tex_fetch(__objects, offset + 0); tfm.y = kernel_tex_fetch(__objects, offset + 1); tfm.z = kernel_tex_fetch(__objects, offset + 2); @@ -68,12 +40,54 @@ __device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, return tfm; } +#ifdef __OBJECT_MOTION__ +__device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time, Transform *itfm) +{ + Transform tfm; + + int object_flag = kernel_tex_fetch(__object_flag, object); + + /* if we do motion blur */ + if(object_flag & SD_OBJECT_MOTION) { + /* fetch motion transforms */ + MotionTransform motion; + + int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE; + + motion.pre.x = kernel_tex_fetch(__objects, offset + 0); + motion.pre.y = kernel_tex_fetch(__objects, offset + 1); + motion.pre.z = kernel_tex_fetch(__objects, offset + 2); + motion.pre.w = kernel_tex_fetch(__objects, offset + 3); + + + motion.post.x = kernel_tex_fetch(__objects, offset + 4); + motion.post.y = kernel_tex_fetch(__objects, offset + 5); + motion.post.z = kernel_tex_fetch(__objects, offset + 6); + motion.post.w = kernel_tex_fetch(__objects, offset + 7); + + transform_motion_interpolate(&tfm, &motion, time); + + /* invert */ + if(itfm) + *itfm = transform_quick_inverse(tfm); + } + else { + tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + + if(itfm) + *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + } + + return tfm; +} +#endif + __device_inline void object_position_transform(KernelGlobals *kg, ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ *P = transform_point(&sd->ob_tfm, *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, TIME_INVALID, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); *P = transform_point(&tfm, *P); #endif } @@ -83,7 +97,7 @@ __device_inline void object_inverse_position_transform(KernelGlobals *kg, Shader #ifdef __OBJECT_MOTION__ *P = transform_point(&sd->ob_itfm, *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, TIME_INVALID, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); *P = transform_point(&tfm, *P); #endif } @@ -93,7 +107,7 @@ __device_inline void object_inverse_normal_transform(KernelGlobals *kg, ShaderDa #ifdef __OBJECT_MOTION__ *N = normalize(transform_direction_transposed(&sd->ob_tfm, *N)); #else - Transform tfm = object_fetch_transform(kg, sd->object, TIME_INVALID, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -103,7 +117,7 @@ __device_inline void object_normal_transform(KernelGlobals *kg, ShaderData *sd, #ifdef __OBJECT_MOTION__ *N = normalize(transform_direction_transposed(&sd->ob_itfm, *N)); #else - Transform tfm = object_fetch_transform(kg, sd->object, TIME_INVALID, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -113,7 +127,7 @@ __device_inline void object_dir_transform(KernelGlobals *kg, ShaderData *sd, flo #ifdef __OBJECT_MOTION__ *D = transform_direction(&sd->ob_tfm, *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, 0.0f, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); *D = transform_direction(&tfm, *D); #endif } @@ -123,7 +137,7 @@ __device_inline float3 object_location(KernelGlobals *kg, ShaderData *sd) #ifdef __OBJECT_MOTION__ return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w); #else - Transform tfm = object_fetch_transform(kg, sd->object, 0.0f, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); return make_float3(tfm.x.w, tfm.y.w, tfm.z.w); #endif } diff --git a/intern/cycles/kernel/kernel_qbvh.h b/intern/cycles/kernel/kernel_qbvh.h deleted file mode 100644 index 525b616921d..00000000000 --- a/intern/cycles/kernel/kernel_qbvh.h +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation - * Modifications Copyright 2011, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CCL_NAMESPACE_BEGIN - -/* - * "Persistent while-while kernel" used in: - * - * "Understanding the Efficiency of Ray Traversal on GPUs", - * Timo Aila and Samuli Laine, - * Proc. High-Performance Graphics 2009 - */ - -/* bottom-most stack entry, indicating the end of traversal */ - -#define ENTRYPOINT_SENTINEL 0x76543210 -/* 64 object BVH + 64 mesh BVH + 64 object node splitting */ -#define QBVH_STACK_SIZE 192 -#define QBVH_NODE_SIZE 8 -#define TRI_NODE_SIZE 3 - -__device_inline float3 qbvh_inverse_direction(float3 dir) -{ - // Avoid divide by zero (ooeps = exp2f(-80.0f)) - float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f; - float3 idir; - - idir.x = 1.0f/((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x)); - idir.y = 1.0f/((fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y)); - idir.z = 1.0f/((fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z)); - - return idir; -} - -__device_inline void qbvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) -{ - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - - *P = transform_point(&tfm, ray->P); - - float3 dir = transform_direction(&tfm, ray->D); - - float len; - dir = normalize_len(dir, &len); - - *idir = qbvh_inverse_direction(dir); - - if(*t != FLT_MAX) - *t *= len; -} - -__device_inline void qbvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) -{ - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); - - if(*t != FLT_MAX) - *t *= len(transform_direction(&tfm, 1.0f/(*idir))); - - *P = ray->P; - *idir = qbvh_inverse_direction(ray->D); -} - -#ifdef __KERNEL_CPU__ - -__device_inline void qbvh_node_intersect(KernelGlobals *kg, int *traverseChild, - int nodeAddrChild[4], float3 P, float3 idir, float t, int nodeAddr) -{ - /* X axis */ - const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0); - const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x)); - const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1); - const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x)); - - __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps()); - __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t)); - - /* Y axis */ - const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2); - const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y)); - const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3); - const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y)); - - tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin); - tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax); - - /* Z axis */ - const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4); - const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z)); - const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5); - const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z)); - - tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin); - tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax); - - /* compare and get mask */ - *traverseChild = _mm_movemask_ps(_mm_cmple_ps(tmin, tmax)); - - /* get node addresses */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+6); - - nodeAddrChild[0] = __float_as_int(cnodes.x); - nodeAddrChild[1] = __float_as_int(cnodes.y); - nodeAddrChild[2] = __float_as_int(cnodes.z); - nodeAddrChild[3] = __float_as_int(cnodes.w); -} - -#else - -__device_inline bool qbvh_bb_intersect(float3 bmin, float3 bmax, float3 P, float3 idir, float t) -{ - float t0x = (bmin.x - P.x)*idir.x; - float t1x = (bmax.x - P.x)*idir.x; - float t0y = (bmin.y - P.y)*idir.y; - float t1y = (bmax.y - P.y)*idir.y; - float t0z = (bmin.z - P.z)*idir.z; - float t1z = (bmax.z - P.z)*idir.z; - - float minx = min(t0x, t1x); - float maxx = max(t0x, t1x); - float miny = min(t0y, t1y); - float maxy = max(t0y, t1y); - float minz = min(t0z, t1z); - float maxz = max(t0z, t1z); - - float tmin = max4(0.0f, minx, miny, minz); - float tmax = min4(t, maxx, maxy, maxz); - - return (tmin <= tmax); -} - -/* intersect four bounding boxes */ -__device_inline void qbvh_node_intersect(KernelGlobals *kg, int *traverseChild, - int nodeAddrChild[4], float3 P, float3 idir, float t, int nodeAddr) -{ - /* fetch node data */ - float4 minx = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0); - float4 miny = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2); - float4 minz = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4); - float4 maxx = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1); - float4 maxy = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3); - float4 maxz = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5); - - /* intersect bounding boxes */ - bool traverseChild0 = qbvh_bb_intersect(make_float3(minx.x, miny.x, minz.x), make_float3(maxx.x, maxy.x, maxz.x), P, idir, t); - bool traverseChild1 = qbvh_bb_intersect(make_float3(minx.y, miny.y, minz.y), make_float3(maxx.y, maxy.y, maxz.y), P, idir, t); - bool traverseChild2 = qbvh_bb_intersect(make_float3(minx.z, miny.z, minz.z), make_float3(maxx.z, maxy.z, maxz.z), P, idir, t); - bool traverseChild3 = qbvh_bb_intersect(make_float3(minx.w, miny.w, minz.w), make_float3(maxx.w, maxy.w, maxz.w), P, idir, t); - - *traverseChild = 0; - if(traverseChild0) *traverseChild |= 1; - if(traverseChild1) *traverseChild |= 2; - if(traverseChild2) *traverseChild |= 4; - if(traverseChild3) *traverseChild |= 8; - - /* get node addresses */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+6); - - nodeAddrChild[0] = __float_as_int(cnodes.x); - nodeAddrChild[1] = __float_as_int(cnodes.y); - nodeAddrChild[2] = __float_as_int(cnodes.z); - nodeAddrChild[3] = __float_as_int(cnodes.w); -} - -#endif - -/* Sven Woop's algorithm */ -__device_inline void qbvh_triangle_intersect(KernelGlobals *kg, Intersection *isect, float3 P, float3 idir, int object, int triAddr) -{ - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); - float3 dir = 1.0f/idir; - - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < isect->t) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { - /* record intersection */ - isect->prim = triAddr; - isect->object = object; - isect->u = u; - isect->v = v; - isect->t = t; - } - } - } -} - -__device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const bool isshadowray, Intersection *isect) -{ - /* traversal stack in CUDA thread-local memory */ - int traversalStack[QBVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stackPtr = 0; - int nodeAddr = kernel_data.bvh.root; - - /* ray parameters in registers */ - const float tmax = ray->t; - float3 P = ray->P; - float3 idir = qbvh_inverse_direction(ray->D); - int object = ~0; - - isect->t = tmax; - isect->object = ~0; - isect->prim = ~0; - isect->u = 0.0f; - isect->v = 0.0f; - - /* traversal loop */ - do { - do - { - /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) - { - int traverseChild, nodeAddrChild[4]; - - qbvh_node_intersect(kg, &traverseChild, nodeAddrChild, - P, idir, isect->t, nodeAddr); - - if(traverseChild & 1) { - ++stackPtr; - traversalStack[stackPtr] = nodeAddrChild[0]; - } - - if(traverseChild & 2) { - ++stackPtr; - traversalStack[stackPtr] = nodeAddrChild[1]; - } - if(traverseChild & 4) { - ++stackPtr; - traversalStack[stackPtr] = nodeAddrChild[2]; - } - - if(traverseChild & 8) { - ++stackPtr; - traversalStack[stackPtr] = nodeAddrChild[3]; - } - - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } - - /* if node is leaf, fetch triangle list */ - if(nodeAddr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*QBVH_NODE_SIZE+(QBVH_NODE_SIZE-2)); - int primAddr = __float_as_int(leaf.x); - -#ifdef __INSTANCING__ - if(primAddr >= 0) { -#endif - int primAddr2 = __float_as_int(leaf.y); - - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - - /* triangle intersection */ - while(primAddr < primAddr2) { - /* intersect ray against triangle */ - qbvh_triangle_intersect(kg, isect, P, idir, object, primAddr); - - /* shadow ray early termination */ - if(isshadowray && isect->prim != ~0) - return true; - - primAddr++; - } -#ifdef __INSTANCING__ - } - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -primAddr-1); - - qbvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax); - - ++stackPtr; - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - - nodeAddr = kernel_tex_fetch(__object_node, object); - } -#endif - } - } while(nodeAddr != ENTRYPOINT_SENTINEL); - -#ifdef __INSTANCING__ - if(stackPtr >= 0) { - kernel_assert(object != ~0); - - /* instance pop */ - qbvh_instance_pop(kg, object, ray, &P, &idir, &isect->t, tmax); - object = ~0; - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif - } while(nodeAddr != ENTRYPOINT_SENTINEL); - - return (isect->prim != ~0); -} - -__device_inline float3 ray_offset(float3 P, float3 Ng) -{ -#ifdef __INTERSECTION_REFINE__ - const float epsilon_f = 1e-5f; - const int epsilon_i = 32; - - float3 res; - - /* x component */ - if(fabsf(P.x) < epsilon_f) { - res.x = P.x + Ng.x*epsilon_f; - } - else { - uint ix = __float_as_uint(P.x); - ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i; - res.x = __uint_as_float(ix); - } - - /* y component */ - if(fabsf(P.y) < epsilon_f) { - res.y = P.y + Ng.y*epsilon_f; - } - else { - uint iy = __float_as_uint(P.y); - iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i; - res.y = __uint_as_float(iy); - } - - /* z component */ - if(fabsf(P.z) < epsilon_f) { - res.z = P.z + Ng.z*epsilon_f; - } - else { - uint iz = __float_as_uint(P.z); - iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i; - res.z = __uint_as_float(iz); - } - - return res; -#else - const float epsilon_f = 1e-4f; - return P + epsilon_f*Ng; -#endif -} - -__device_inline float3 bvh_triangle_refine(KernelGlobals *kg, const Intersection *isect, const Ray *ray) -{ - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; - -#ifdef __INTERSECTION_REFINE__ - if(isect->object != ~0) { - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - P = P + D*t; - - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; - - P = P + D*rt; - - if(isect->object != ~0) { - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); - P = transform_point(&tfm, P); - } - - return P; -#else - return P + D*t; -#endif -} - -CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 1ed5e3d352c..2711012edef 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -67,10 +67,18 @@ __device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, sd->v = isect->v; #endif + sd->flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); + sd->flag |= kernel_tex_fetch(__object_flag, sd->object); + /* matrices and time */ #ifdef __OBJECT_MOTION__ - sd->ob_tfm = object_fetch_transform(kg, sd->object, ray->time, OBJECT_TRANSFORM); - sd->ob_itfm = object_fetch_transform(kg, sd->object, ray->time, OBJECT_INVERSE_TRANSFORM); + if(sd->flag & SD_OBJECT_MOTION) { + sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time, &sd->ob_itfm); + } + else { + sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + } sd->time = ray->time; #endif @@ -87,9 +95,6 @@ __device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, if(sd->shader & SHADER_SMOOTH_NORMAL) sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); - sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); - sd->flag |= kernel_tex_fetch(__object_flag, sd->object); - #ifdef __DPDU__ /* dPdu/dPdv */ triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); @@ -173,11 +178,20 @@ __device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, } #endif -#ifdef __OBJECT_MOTION__ - sd->time = time; + sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); + if(sd->object != -1) + sd->flag |= kernel_tex_fetch(__object_flag, sd->object); - sd->ob_tfm = object_fetch_transform(kg, sd->object, time, OBJECT_TRANSFORM); - sd->ob_itfm = object_fetch_transform(kg, sd->object, time, OBJECT_INVERSE_TRANSFORM); +#ifdef __OBJECT_MOTION__ + if(sd->flag & SD_OBJECT_MOTION) { + sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time, &sd->ob_itfm); + } + else { + sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + } + + sd->time = time; #endif /* smooth normal */ @@ -190,10 +204,6 @@ __device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, #endif } - sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); - if(sd->object != -1) - sd->flag |= kernel_tex_fetch(__object_flag, sd->object); - #ifdef __DPDU__ /* dPdu/dPdv */ if(sd->prim == ~0) { diff --git a/intern/cycles/kernel/kernel_triangle.h b/intern/cycles/kernel/kernel_triangle.h index f57c59a45eb..43cfa330724 100644 --- a/intern/cycles/kernel/kernel_triangle.h +++ b/intern/cycles/kernel/kernel_triangle.h @@ -201,10 +201,10 @@ __device float4 triangle_motion_vector(KernelGlobals *kg, ShaderData *sd) * transformation was set match the world/object space of motion_pre/post */ Transform tfm; - tfm = object_fetch_transform(kg, sd->object, TIME_INVALID, OBJECT_TRANSFORM_MOTION_PRE); + tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM_MOTION_PRE); motion_pre = transform_point(&tfm, motion_pre); - tfm = object_fetch_transform(kg, sd->object, TIME_INVALID, OBJECT_TRANSFORM_MOTION_POST); + tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM_MOTION_POST); motion_post = transform_point(&tfm, motion_post); float3 P; diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index be49aa54e47..2acea04838a 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -113,7 +113,6 @@ CCL_NAMESPACE_BEGIN #endif //#define __SOBOL_FULL_SCREEN__ -//#define __QBVH__ /* Shader Evaluation */ @@ -428,13 +427,6 @@ typedef struct ShaderData { /* length of the ray being shaded */ float ray_length; -#ifdef __OBJECT_MOTION__ - /* object <-> world space transformations, cached to avoid - * re-interpolating them constantly for shading */ - Transform ob_tfm; - Transform ob_itfm; -#endif - #ifdef __RAY_DIFFERENTIALS__ /* differential of P. these are orthogonal to Ng, not N */ differential3 dP; @@ -453,6 +445,13 @@ typedef struct ShaderData { float3 T; #endif +#ifdef __OBJECT_MOTION__ + /* object <-> world space transformations, cached to avoid + * re-interpolating them constantly for shading */ + Transform ob_tfm; + Transform ob_itfm; +#endif + #ifdef __MULTI_CLOSURE__ /* Closure data, we store a fixed array of closures */ ShaderClosure closure[MAX_CLOSURE]; @@ -632,7 +631,8 @@ typedef struct KernelBVH { /* root node */ int root; int attributes_map_stride; - int pad1, pad2; + int have_motion; + int pad2; } KernelBVH; typedef struct KernelData { diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index 441f17d90e9..727b9801d95 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -19,6 +19,8 @@ #include "camera.h" #include "scene.h" +#include "device.h" + #include "util_vector.h" CCL_NAMESPACE_BEGIN @@ -141,7 +143,7 @@ void Camera::update() void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene) { - Scene::MotionType need_motion = scene->need_motion(); + Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading); update(); @@ -274,13 +276,17 @@ bool Camera::modified(const Camera& cam) (border_bottom == cam.border_bottom) && (border_top == cam.border_top) && (matrix == cam.matrix) && - (motion == cam.motion) && - (use_motion == cam.use_motion) && (panorama_type == cam.panorama_type) && (fisheye_fov == cam.fisheye_fov) && (fisheye_lens == cam.fisheye_lens)); } +bool Camera::motion_modified(const Camera& cam) +{ + return !((motion == cam.motion) && + (use_motion == cam.use_motion)); +} + void Camera::tag_update() { need_update = true; diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h index 82852bde5e0..1407c86e7c2 100644 --- a/intern/cycles/render/camera.h +++ b/intern/cycles/render/camera.h @@ -103,6 +103,7 @@ public: void device_free(Device *device, DeviceScene *dscene); bool modified(const Camera& cam); + bool motion_modified(const Camera& cam); void tag_update(); }; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 014b78dec2b..3f2fe4ab093 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -723,7 +723,8 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen shader->need_update_attributes = false; #ifdef __OBJECT_MOTION__ - bool motion_blur = scene->need_motion() == Scene::MOTION_BLUR; + Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading); + bool motion_blur = need_motion == Scene::MOTION_BLUR; #else bool motion_blur = false; #endif diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 4a72dcc52f7..f5d78c080c8 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -151,7 +151,8 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene uint *object_flag = dscene->object_flag.resize(scene->objects.size()); int i = 0; map surface_area_map; - Scene::MotionType need_motion = scene->need_motion(); + Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading); + bool have_motion = false; foreach(Object *ob, scene->objects) { Mesh *mesh = ob->mesh; @@ -229,6 +230,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene transform_motion_decompose(&decomp, &ob->motion); memcpy(&objects[offset+8], &decomp, sizeof(float4)*8); flag |= SD_OBJECT_MOTION; + have_motion = true; } else { float4 no_motion = make_float4(FLT_MAX); @@ -253,6 +255,8 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene device->tex_alloc("__objects", dscene->objects); device->tex_alloc("__object_flag", dscene->object_flag); + + dscene->data.bvh.have_motion = have_motion; } void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) @@ -300,7 +304,8 @@ void ObjectManager::apply_static_transforms(Scene *scene, Progress& progress) /* counter mesh users */ map mesh_users; #ifdef __OBJECT_MOTION__ - bool motion_blur = scene->need_motion() == Scene::MOTION_BLUR; + Scene::MotionType need_motion = scene->need_motion(); + bool motion_blur = need_motion == Scene::MOTION_BLUR; #else bool motion_blur = false; #endif diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 071338d49c2..15031b9500c 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -183,10 +183,10 @@ void Scene::device_update(Device *device_, Progress& progress) device->const_copy_to("__data", &dscene.data, sizeof(dscene.data)); } -Scene::MotionType Scene::need_motion() +Scene::MotionType Scene::need_motion(bool advanced_shading) { if(integrator->motion_blur) - return MOTION_BLUR; + return (advanced_shading)? MOTION_BLUR: MOTION_NONE; else if(Pass::contains(film->passes, PASS_MOTION)) return MOTION_PASS; else diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 09087fb2970..bd45c1c04e6 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -194,7 +194,7 @@ public: void need_global_attributes(AttributeRequestSet& attributes); enum MotionType { MOTION_NONE = 0, MOTION_PASS, MOTION_BLUR }; - MotionType need_motion(); + MotionType need_motion(bool advanced_shading = true); bool need_update(); bool need_reset(); diff --git a/source/blender/editors/render/render_update.c b/source/blender/editors/render/render_update.c index cd55b91cb6b..5b1c03f65df 100644 --- a/source/blender/editors/render/render_update.c +++ b/source/blender/editors/render/render_update.c @@ -74,12 +74,19 @@ void ED_render_scene_update(Main *bmain, Scene *scene, int updated) bScreen *sc; ScrArea *sa; ARegion *ar; + static int recursive_check = FALSE; /* don't do this render engine update if we're updating the scene from * other threads doing e.g. rendering or baking jobs */ if (!BLI_thread_is_main()) return; + /* don't call this recursively for frame updates */ + if(recursive_check) + return; + + recursive_check = TRUE; + C = CTX_create(); CTX_data_main_set(C, bmain); CTX_data_scene_set(C, scene); @@ -114,6 +121,8 @@ void ED_render_scene_update(Main *bmain, Scene *scene, int updated) } CTX_free(C); + + recursive_check = FALSE; } void ED_render_engine_area_exit(ScrArea *sa) diff --git a/source/blender/makesrna/intern/rna_scene.c b/source/blender/makesrna/intern/rna_scene.c index a3616e0845a..537dab73340 100644 --- a/source/blender/makesrna/intern/rna_scene.c +++ b/source/blender/makesrna/intern/rna_scene.c @@ -3600,14 +3600,14 @@ static void rna_def_scene_render_data(BlenderRNA *brna) RNA_def_property_boolean_sdna(prop, NULL, "mode", R_MBLUR); RNA_def_property_ui_text(prop, "Motion Blur", "Use multi-sampled 3D scene motion blur"); RNA_def_property_clear_flag(prop, PROP_ANIMATABLE); - RNA_def_property_update(prop, NC_SCENE | ND_RENDER_OPTIONS, NULL); + RNA_def_property_update(prop, NC_SCENE | ND_RENDER_OPTIONS, "rna_Scene_glsl_update"); prop = RNA_def_property(srna, "motion_blur_samples", PROP_INT, PROP_NONE); RNA_def_property_int_sdna(prop, NULL, "mblur_samples"); RNA_def_property_range(prop, 1, 32); RNA_def_property_ui_text(prop, "Motion Samples", "Number of scene samples to take with motion blur"); RNA_def_property_clear_flag(prop, PROP_ANIMATABLE); - RNA_def_property_update(prop, NC_SCENE | ND_RENDER_OPTIONS, NULL); + RNA_def_property_update(prop, NC_SCENE | ND_RENDER_OPTIONS, "rna_Scene_glsl_update"); prop = RNA_def_property(srna, "motion_blur_shutter", PROP_FLOAT, PROP_NONE); RNA_def_property_float_sdna(prop, NULL, "blurfac"); @@ -3615,7 +3615,7 @@ static void rna_def_scene_render_data(BlenderRNA *brna) RNA_def_property_ui_range(prop, 0.01, 2.0f, 1, 0); RNA_def_property_ui_text(prop, "Shutter", "Time taken in frames between shutter open and close"); RNA_def_property_clear_flag(prop, PROP_ANIMATABLE); - RNA_def_property_update(prop, NC_SCENE | ND_RENDER_OPTIONS, NULL); + RNA_def_property_update(prop, NC_SCENE | ND_RENDER_OPTIONS, "rna_Scene_glsl_update"); /* border */ prop = RNA_def_property(srna, "use_border", PROP_BOOLEAN, PROP_NONE);