Cycles: optimizations for instances in scene updates before render starts,

should load a non-trivial mesh instanced many times quite a bit faster now.
This commit is contained in:
Brecht Van Lommel 2011-09-02 16:15:18 +00:00
parent 1135875ab1
commit 67030aaf84
4 changed files with 107 additions and 28 deletions

@ -217,8 +217,13 @@ public:
if(!opencl_version_check())
return false;
/* nvidia opencl cache doesn't not work correct with includes, so force recompile */
static double recompile_trick = 0.0;
if(recompile_trick == 0.0)
recompile_trick = time_dt();
/* compile source */
string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
string source = string_printf("#include \"kernel.cl\" // %lf\n", recompile_trick);
size_t source_len = source.size();
const char *source_str = source.c_str();

@ -61,7 +61,18 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
foreach(Object *object, scene->objects) {
Mesh *mesh = object->mesh;
bool have_emission = false;
/* skip if we have no emission shaders */
foreach(uint sindex, mesh->used_shaders) {
if(scene->shaders[sindex]->has_surface_emission) {
have_emission = true;
break;
}
}
/* count triangles */
if(have_emission) {
for(size_t i = 0; i < mesh->triangles.size(); i++) {
Shader *shader = scene->shaders[mesh->shader[i]];
@ -69,6 +80,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
num_triangles++;
}
}
}
size_t num_distribution = num_triangles + num_lights;
@ -82,6 +94,18 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
foreach(Object *object, scene->objects) {
Mesh *mesh = object->mesh;
bool have_emission = false;
/* skip if we have no emission shaders */
foreach(uint sindex, mesh->used_shaders) {
if(scene->shaders[sindex]->has_surface_emission) {
have_emission = true;
break;
}
}
/* sum area */
if(have_emission) {
Transform tfm = object->tfm;
int object_id = (mesh->transform_applied)? -j-1: j;
@ -103,6 +127,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
totarea += triangle_area(p1, p2, p3);
}
}
}
if(progress.get_cancel()) return;

@ -22,6 +22,7 @@
#include "scene.h"
#include "util_foreach.h"
#include "util_map.h"
#include "util_progress.h"
CCL_NAMESPACE_BEGIN
@ -103,6 +104,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
{
float4 *objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size());
int i = 0;
map<Mesh*, float> surface_area_map;
foreach(Object *ob, scene->objects) {
Mesh *mesh = ob->mesh;
@ -112,16 +114,39 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
Transform itfm = transform_inverse(tfm);
Transform ntfm = transform_transpose(itfm);
/* compute surface area */
/* compute surface area. for uniform scale we can do avoid the many
transform calls and share computation for instances */
/* todo: correct for displacement, and move to a better place */
float surfacearea = 0.0f;
float uniform_scale;
float surface_area = 0.0f;
if(transform_uniform_scale(tfm, uniform_scale)) {
map<Mesh*, float>::iterator it = surface_area_map.find(mesh);
if(it == surface_area_map.end()) {
foreach(Mesh::Triangle& t, mesh->triangles) {
float3 p1 = mesh->verts[t.v[0]];
float3 p2 = mesh->verts[t.v[1]];
float3 p3 = mesh->verts[t.v[2]];
surface_area += triangle_area(p1, p2, p3);
}
surface_area_map[mesh] = surface_area;
}
else
surface_area = it->second;
surface_area *= uniform_scale;
}
else {
foreach(Mesh::Triangle& t, mesh->triangles) {
float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
surfacearea += triangle_area(p1, p2, p3);
surface_area += triangle_area(p1, p2, p3);
}
}
/* pack in texture */
@ -130,7 +155,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
memcpy(&objects[offset], &tfm, sizeof(float4)*4);
memcpy(&objects[offset+4], &itfm, sizeof(float4)*4);
memcpy(&objects[offset+8], &ntfm, sizeof(float4)*4);
objects[offset+12] = make_float4(surfacearea, 0.0f, 0.0f, 0.0f);
objects[offset+12] = make_float4(surface_area, 0.0f, 0.0f, 0.0f);
i++;

@ -205,6 +205,30 @@ __device_inline float3 transform_get_column(const Transform *t, int column)
Transform transform_inverse(const Transform& a);
__device_inline bool transform_uniform_scale(const Transform& tfm, float& scale)
{
/* the epsilon here is quite arbitrary, but this function is only used for
surface area and bump, where we except it to not be so sensitive */
Transform ttfm = transform_transpose(tfm);
float eps = 1e-7f;
float sx = len(float4_to_float3(tfm.x));
float sy = len(float4_to_float3(tfm.y));
float sz = len(float4_to_float3(tfm.z));
float stx = len(float4_to_float3(ttfm.x));
float sty = len(float4_to_float3(ttfm.y));
float stz = len(float4_to_float3(ttfm.z));
if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps &&
fabsf(sx - stx) < eps && fabsf(sx - sty) < eps &&
fabsf(sx - stz) < eps) {
scale = sx;
return true;
}
return false;
}
#endif
CCL_NAMESPACE_END