diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index d7003729c46..ad91022de07 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -304,7 +304,6 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated) void BlenderSync::sync_mesh_motion(BL::Object b_ob, Mesh *mesh, int motion) { /* todo: displacement, subdivision */ - BL::ID b_ob_data = b_ob.data(); size_t size = mesh->verts.size(); /* skip objects without deforming modifiers. this is not a totally reliable, diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index c0b6e210bb1..488fea8d12b 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -135,7 +135,6 @@ void BlenderSync::sync_data(BL::SpaceView3D b_v3d, BL::Object b_override, const void BlenderSync::sync_integrator() { - BL::RenderSettings r = b_scene.render(); PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); experimental = (RNA_enum_get(&cscene, "feature_set") != 0); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 23ad5e5ff92..491a63a7cf2 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -259,7 +259,7 @@ public: path_create_directories(cubin); - string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" --use_fast_math " + string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" " "-o \"%s\" --ptxas-options=\"-v\" --maxrregcount=%d --opencc-options -OPT:Olimit=0 -I\"%s\" -DNVCC", nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), maxreg, include.c_str()); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index d165716aaca..98cb16d5dfc 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -114,7 +114,7 @@ if(WITH_CYCLES_CUDA_BINARIES) add_custom_command( OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu --use_fast_math -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" --maxrregcount=24 --opencc-options -OPT:Olimit=0 -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC + COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" --maxrregcount=24 --opencc-options -OPT:Olimit=0 -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC DEPENDS ${cuda_sources}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h index 5da4253bd86..522f9861c35 100644 --- a/intern/cycles/kernel/kernel_bvh.h +++ b/intern/cycles/kernel/kernel_bvh.h @@ -74,10 +74,10 @@ __device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) { - Transform tfm = object_fetch_transform(kg, object, ray->time, OBJECT_TRANSFORM); - - if(*t != FLT_MAX) + if(*t != FLT_MAX) { + Transform tfm = object_fetch_transform(kg, object, ray->time, OBJECT_TRANSFORM); *t *= len(transform_direction(&tfm, 1.0f/(*idir))); + } *P = ray->P; *idir = bvh_inverse_direction(ray->D); diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 2f9f2c45e88..06bdce6c35c 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -62,5 +62,15 @@ typedef texture texture_image_uchar4; #define kernel_data __data +/* Use fast math functions */ + +#define cosf(x) __cosf(((float)x)) +#define sinf(x) __sinf(((float)x)) +#define powf(x, y) __powf(((float)x), ((float)y)) +#define cosf(x) __cosf(((float)x)) +#define tanf(x) __tanf(((float)x)) +#define logf(x) __logf(((float)x)) +#define expf(x) __expf(((float)x)) + #endif /* __KERNEL_COMPAT_CUDA_H__ */ diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h index a5735920cd9..72d36811626 100644 --- a/intern/cycles/kernel/kernel_projection.h +++ b/intern/cycles/kernel/kernel_projection.h @@ -69,20 +69,20 @@ __device float3 equirectangular_to_direction(float u, float v) float theta = M_PI_F*(1.0f - v); return make_float3( - sin(theta)*cos(phi), - sin(theta)*sin(phi), - cos(theta)); + sinf(theta)*cosf(phi), + sinf(theta)*sinf(phi), + cosf(theta)); } /* Fisheye <-> Cartesian direction */ __device float2 direction_to_fisheye(float3 dir, float fov) { - float r = atan2f(sqrt(dir.y*dir.y + dir.z*dir.z), dir.x) / fov; - float phi = atan2(dir.z, dir.y); + float r = atan2f(sqrtf(dir.y*dir.y + dir.z*dir.z), dir.x) / fov; + float phi = atan2f(dir.z, dir.y); - float u = r * cos(phi) + 0.5f; - float v = r * sin(phi) + 0.5f; + float u = r * cosf(phi) + 0.5f; + float v = r * sinf(phi) + 0.5f; return make_float2(u, v); } @@ -92,7 +92,7 @@ __device float3 fisheye_to_direction(float u, float v, float fov) u = (u - 0.5f) * 2.0f; v = (v - 0.5f) * 2.0f; - float r = sqrt(u*u + v*v); + float r = sqrtf(u*u + v*v); if(r > 1.0f) return make_float3(0.0f, 0.0f, 0.0f); @@ -127,7 +127,7 @@ __device float3 fisheye_equisolid_to_direction(float u, float v, float lens, flo v = (v - 0.5f) * height; float rmax = 2.0f * lens * sinf(fov * 0.25f); - float r = sqrt(u*u + v*v); + float r = sqrtf(u*u + v*v); if(r > rmax) return make_float3(0.0f, 0.0f, 0.0f); @@ -153,7 +153,7 @@ __device float3 mirrorball_to_direction(float u, float v) dir.x = 2.0f*u - 1.0f; dir.z = 2.0f*v - 1.0f; - dir.y = -sqrt(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f)); + dir.y = -sqrtf(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f)); /* reflection */ float3 I = make_float3(0.0f, -1.0f, 0.0f); @@ -166,7 +166,7 @@ __device float2 direction_to_mirrorball(float3 dir) /* inverse of mirrorball_to_direction */ dir.y -= 1.0f; - float div = 2.0f*sqrt(max(-0.5f*dir.y, 0.0f)); + float div = 2.0f*sqrtf(max(-0.5f*dir.y, 0.0f)); if(div > 0.0f) dir /= div; diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index e4897ee6787..b460c4c87a2 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -61,16 +61,20 @@ __device_inline float3 transform_perspective(const Transform *t, const float3 a) __device_inline float3 transform_point(const Transform *t, const float3 a) { - float4 b = make_float4(a.x, a.y, a.z, 1.0f); - float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b)); + float3 c = make_float3( + a.x*t->x.x + a.y*t->x.y + a.z*t->x.z + t->x.w, + a.x*t->y.x + a.y*t->y.y + a.z*t->y.z + t->y.w, + a.x*t->z.x + a.y*t->z.y + a.z*t->z.z + t->z.w); return c; } __device_inline float3 transform_direction(const Transform *t, const float3 a) { - float4 b = make_float4(a.x, a.y, a.z, 0.0f); - float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b)); + float3 c = make_float3( + a.x*t->x.x + a.y*t->x.y + a.z*t->x.z, + a.x*t->y.x + a.y*t->y.y + a.z*t->y.z, + a.x*t->z.x + a.y*t->z.y + a.z*t->z.z); return c; }