diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 9d25b6df529..787f0e6feab 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -217,8 +217,13 @@ public: if(!opencl_version_check()) return false; + /* nvidia opencl cache doesn't not work correct with includes, so force recompile */ + static double recompile_trick = 0.0; + if(recompile_trick == 0.0) + recompile_trick = time_dt(); + /* compile source */ - string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt()); + string source = string_printf("#include \"kernel.cl\" // %lf\n", recompile_trick); size_t source_len = source.size(); const char *source_str = source.c_str(); diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 88a797f753d..feb9e35e785 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -61,12 +61,24 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen foreach(Object *object, scene->objects) { Mesh *mesh = object->mesh; + bool have_emission = false; - for(size_t i = 0; i < mesh->triangles.size(); i++) { - Shader *shader = scene->shaders[mesh->shader[i]]; + /* skip if we have no emission shaders */ + foreach(uint sindex, mesh->used_shaders) { + if(scene->shaders[sindex]->has_surface_emission) { + have_emission = true; + break; + } + } - if(shader->has_surface_emission) - num_triangles++; + /* count triangles */ + if(have_emission) { + for(size_t i = 0; i < mesh->triangles.size(); i++) { + Shader *shader = scene->shaders[mesh->shader[i]]; + + if(shader->has_surface_emission) + num_triangles++; + } } } @@ -82,25 +94,38 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen foreach(Object *object, scene->objects) { Mesh *mesh = object->mesh; - Transform tfm = object->tfm; - int object_id = (mesh->transform_applied)? -j-1: j; + bool have_emission = false; - for(size_t i = 0; i < mesh->triangles.size(); i++) { - Shader *shader = scene->shaders[mesh->shader[i]]; + /* skip if we have no emission shaders */ + foreach(uint sindex, mesh->used_shaders) { + if(scene->shaders[sindex]->has_surface_emission) { + have_emission = true; + break; + } + } - if(shader->has_surface_emission) { - distribution[offset].x = totarea; - distribution[offset].y = __int_as_float(i + mesh->tri_offset); - distribution[offset].z = 1.0f; - distribution[offset].w = __int_as_float(object_id); - offset++; + /* sum area */ + if(have_emission) { + Transform tfm = object->tfm; + int object_id = (mesh->transform_applied)? -j-1: j; - Mesh::Triangle t = mesh->triangles[i]; - float3 p1 = transform(&tfm, mesh->verts[t.v[0]]); - float3 p2 = transform(&tfm, mesh->verts[t.v[1]]); - float3 p3 = transform(&tfm, mesh->verts[t.v[2]]); + for(size_t i = 0; i < mesh->triangles.size(); i++) { + Shader *shader = scene->shaders[mesh->shader[i]]; - totarea += triangle_area(p1, p2, p3); + if(shader->has_surface_emission) { + distribution[offset].x = totarea; + distribution[offset].y = __int_as_float(i + mesh->tri_offset); + distribution[offset].z = 1.0f; + distribution[offset].w = __int_as_float(object_id); + offset++; + + Mesh::Triangle t = mesh->triangles[i]; + float3 p1 = transform(&tfm, mesh->verts[t.v[0]]); + float3 p2 = transform(&tfm, mesh->verts[t.v[1]]); + float3 p3 = transform(&tfm, mesh->verts[t.v[2]]); + + totarea += triangle_area(p1, p2, p3); + } } } diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 4ba2de6e61b..fab051bde72 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -22,6 +22,7 @@ #include "scene.h" #include "util_foreach.h" +#include "util_map.h" #include "util_progress.h" CCL_NAMESPACE_BEGIN @@ -103,6 +104,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene { float4 *objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size()); int i = 0; + map surface_area_map; foreach(Object *ob, scene->objects) { Mesh *mesh = ob->mesh; @@ -112,16 +114,39 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene Transform itfm = transform_inverse(tfm); Transform ntfm = transform_transpose(itfm); - /* compute surface area */ + /* compute surface area. for uniform scale we can do avoid the many + transform calls and share computation for instances */ /* todo: correct for displacement, and move to a better place */ - float surfacearea = 0.0f; + float uniform_scale; + float surface_area = 0.0f; + + if(transform_uniform_scale(tfm, uniform_scale)) { + map::iterator it = surface_area_map.find(mesh); - foreach(Mesh::Triangle& t, mesh->triangles) { - float3 p1 = transform(&tfm, mesh->verts[t.v[0]]); - float3 p2 = transform(&tfm, mesh->verts[t.v[1]]); - float3 p3 = transform(&tfm, mesh->verts[t.v[2]]); + if(it == surface_area_map.end()) { + foreach(Mesh::Triangle& t, mesh->triangles) { + float3 p1 = mesh->verts[t.v[0]]; + float3 p2 = mesh->verts[t.v[1]]; + float3 p3 = mesh->verts[t.v[2]]; - surfacearea += triangle_area(p1, p2, p3); + surface_area += triangle_area(p1, p2, p3); + } + + surface_area_map[mesh] = surface_area; + } + else + surface_area = it->second; + + surface_area *= uniform_scale; + } + else { + foreach(Mesh::Triangle& t, mesh->triangles) { + float3 p1 = transform(&tfm, mesh->verts[t.v[0]]); + float3 p2 = transform(&tfm, mesh->verts[t.v[1]]); + float3 p3 = transform(&tfm, mesh->verts[t.v[2]]); + + surface_area += triangle_area(p1, p2, p3); + } } /* pack in texture */ @@ -130,7 +155,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene memcpy(&objects[offset], &tfm, sizeof(float4)*4); memcpy(&objects[offset+4], &itfm, sizeof(float4)*4); memcpy(&objects[offset+8], &ntfm, sizeof(float4)*4); - objects[offset+12] = make_float4(surfacearea, 0.0f, 0.0f, 0.0f); + objects[offset+12] = make_float4(surface_area, 0.0f, 0.0f, 0.0f); i++; diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index e904674a981..998d4161ebf 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -205,6 +205,30 @@ __device_inline float3 transform_get_column(const Transform *t, int column) Transform transform_inverse(const Transform& a); +__device_inline bool transform_uniform_scale(const Transform& tfm, float& scale) +{ + /* the epsilon here is quite arbitrary, but this function is only used for + surface area and bump, where we except it to not be so sensitive */ + Transform ttfm = transform_transpose(tfm); + float eps = 1e-7f; + + float sx = len(float4_to_float3(tfm.x)); + float sy = len(float4_to_float3(tfm.y)); + float sz = len(float4_to_float3(tfm.z)); + float stx = len(float4_to_float3(ttfm.x)); + float sty = len(float4_to_float3(ttfm.y)); + float stz = len(float4_to_float3(ttfm.z)); + + if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && + fabsf(sx - stx) < eps && fabsf(sx - sty) < eps && + fabsf(sx - stz) < eps) { + scale = sx; + return true; + } + + return false; +} + #endif CCL_NAMESPACE_END