Cycles: optimizations for instances in scene updates before render starts,

should load a non-trivial mesh instanced many times quite a bit faster now.
This commit is contained in:
Brecht Van Lommel 2011-09-02 16:15:18 +00:00
parent 1135875ab1
commit 67030aaf84
4 changed files with 107 additions and 28 deletions

@ -217,8 +217,13 @@ public:
if(!opencl_version_check())
return false;
/* nvidia opencl cache doesn't not work correct with includes, so force recompile */
static double recompile_trick = 0.0;
if(recompile_trick == 0.0)
recompile_trick = time_dt();
/* compile source */
string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
string source = string_printf("#include \"kernel.cl\" // %lf\n", recompile_trick);
size_t source_len = source.size();
const char *source_str = source.c_str();

@ -61,12 +61,24 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
foreach(Object *object, scene->objects) {
Mesh *mesh = object->mesh;
bool have_emission = false;
for(size_t i = 0; i < mesh->triangles.size(); i++) {
Shader *shader = scene->shaders[mesh->shader[i]];
/* skip if we have no emission shaders */
foreach(uint sindex, mesh->used_shaders) {
if(scene->shaders[sindex]->has_surface_emission) {
have_emission = true;
break;
}
}
if(shader->has_surface_emission)
num_triangles++;
/* count triangles */
if(have_emission) {
for(size_t i = 0; i < mesh->triangles.size(); i++) {
Shader *shader = scene->shaders[mesh->shader[i]];
if(shader->has_surface_emission)
num_triangles++;
}
}
}
@ -82,25 +94,38 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
foreach(Object *object, scene->objects) {
Mesh *mesh = object->mesh;
Transform tfm = object->tfm;
int object_id = (mesh->transform_applied)? -j-1: j;
bool have_emission = false;
for(size_t i = 0; i < mesh->triangles.size(); i++) {
Shader *shader = scene->shaders[mesh->shader[i]];
/* skip if we have no emission shaders */
foreach(uint sindex, mesh->used_shaders) {
if(scene->shaders[sindex]->has_surface_emission) {
have_emission = true;
break;
}
}
if(shader->has_surface_emission) {
distribution[offset].x = totarea;
distribution[offset].y = __int_as_float(i + mesh->tri_offset);
distribution[offset].z = 1.0f;
distribution[offset].w = __int_as_float(object_id);
offset++;
/* sum area */
if(have_emission) {
Transform tfm = object->tfm;
int object_id = (mesh->transform_applied)? -j-1: j;
Mesh::Triangle t = mesh->triangles[i];
float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
for(size_t i = 0; i < mesh->triangles.size(); i++) {
Shader *shader = scene->shaders[mesh->shader[i]];
totarea += triangle_area(p1, p2, p3);
if(shader->has_surface_emission) {
distribution[offset].x = totarea;
distribution[offset].y = __int_as_float(i + mesh->tri_offset);
distribution[offset].z = 1.0f;
distribution[offset].w = __int_as_float(object_id);
offset++;
Mesh::Triangle t = mesh->triangles[i];
float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
totarea += triangle_area(p1, p2, p3);
}
}
}

@ -22,6 +22,7 @@
#include "scene.h"
#include "util_foreach.h"
#include "util_map.h"
#include "util_progress.h"
CCL_NAMESPACE_BEGIN
@ -103,6 +104,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
{
float4 *objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size());
int i = 0;
map<Mesh*, float> surface_area_map;
foreach(Object *ob, scene->objects) {
Mesh *mesh = ob->mesh;
@ -112,16 +114,39 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
Transform itfm = transform_inverse(tfm);
Transform ntfm = transform_transpose(itfm);
/* compute surface area */
/* compute surface area. for uniform scale we can do avoid the many
transform calls and share computation for instances */
/* todo: correct for displacement, and move to a better place */
float surfacearea = 0.0f;
float uniform_scale;
float surface_area = 0.0f;
if(transform_uniform_scale(tfm, uniform_scale)) {
map<Mesh*, float>::iterator it = surface_area_map.find(mesh);
foreach(Mesh::Triangle& t, mesh->triangles) {
float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
if(it == surface_area_map.end()) {
foreach(Mesh::Triangle& t, mesh->triangles) {
float3 p1 = mesh->verts[t.v[0]];
float3 p2 = mesh->verts[t.v[1]];
float3 p3 = mesh->verts[t.v[2]];
surfacearea += triangle_area(p1, p2, p3);
surface_area += triangle_area(p1, p2, p3);
}
surface_area_map[mesh] = surface_area;
}
else
surface_area = it->second;
surface_area *= uniform_scale;
}
else {
foreach(Mesh::Triangle& t, mesh->triangles) {
float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
surface_area += triangle_area(p1, p2, p3);
}
}
/* pack in texture */
@ -130,7 +155,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
memcpy(&objects[offset], &tfm, sizeof(float4)*4);
memcpy(&objects[offset+4], &itfm, sizeof(float4)*4);
memcpy(&objects[offset+8], &ntfm, sizeof(float4)*4);
objects[offset+12] = make_float4(surfacearea, 0.0f, 0.0f, 0.0f);
objects[offset+12] = make_float4(surface_area, 0.0f, 0.0f, 0.0f);
i++;

@ -205,6 +205,30 @@ __device_inline float3 transform_get_column(const Transform *t, int column)
Transform transform_inverse(const Transform& a);
__device_inline bool transform_uniform_scale(const Transform& tfm, float& scale)
{
/* the epsilon here is quite arbitrary, but this function is only used for
surface area and bump, where we except it to not be so sensitive */
Transform ttfm = transform_transpose(tfm);
float eps = 1e-7f;
float sx = len(float4_to_float3(tfm.x));
float sy = len(float4_to_float3(tfm.y));
float sz = len(float4_to_float3(tfm.z));
float stx = len(float4_to_float3(ttfm.x));
float sty = len(float4_to_float3(ttfm.y));
float stz = len(float4_to_float3(ttfm.z));
if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps &&
fabsf(sx - stx) < eps && fabsf(sx - sty) < eps &&
fabsf(sx - stz) < eps) {
scale = sx;
return true;
}
return false;
}
#endif
CCL_NAMESPACE_END