Fix #33375: OSL geom:trianglevertices gave wrong coordinates for static BVH.

Also some simple OSL optimization, passing thread data pointer directly instead
of via thread local storage, and creating ustrings for attribute lookup.
This commit is contained in:
Brecht Van Lommel 2012-12-01 19:15:05 +00:00
parent 807fd448a5
commit 7c0a0bae79
19 changed files with 276 additions and 282 deletions

@ -23,9 +23,12 @@
#include "device_intern.h"
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "osl_shader.h"
#include "osl_globals.h"
#include "buffers.h"
@ -43,11 +46,16 @@ class CPUDevice : public Device
{
public:
TaskPool task_pool;
KernelGlobals *kg;
KernelGlobals kernel_globals;
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
CPUDevice(Stats &stats) : Device(stats)
{
kg = kernel_globals_create();
#ifdef WITH_OSL
kernel_globals.osl = &osl_globals;
#endif
/* do now to avoid thread issues */
system_cpu_support_optimized();
@ -56,7 +64,6 @@ public:
~CPUDevice()
{
task_pool.stop();
kernel_globals_free(kg);
}
bool support_advanced_shading()
@ -95,12 +102,12 @@ public:
void const_copy_to(const char *name, void *host, size_t size)
{
kernel_const_copy(kg, name, host, size);
kernel_const_copy(&kernel_globals, name, host, size);
}
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{
kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height);
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
@ -116,7 +123,7 @@ public:
void *osl_memory()
{
#ifdef WITH_OSL
return kernel_osl_memory(kg);
return &osl_globals;
#else
return NULL;
#endif
@ -148,9 +155,10 @@ public:
return;
}
KernelGlobals kg = kernel_globals;
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_init(kg);
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
RenderTile tile;
@ -171,7 +179,7 @@ public:
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
kernel_cpu_optimized_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
@ -192,7 +200,7 @@ public:
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_path_trace(kg, render_buffer, rng_state,
kernel_cpu_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
@ -212,8 +220,7 @@ public:
}
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_free(kg);
OSLShader::thread_free(&kg);
#endif
}
@ -223,7 +230,7 @@ public:
if(system_cpu_support_optimized()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
kernel_cpu_optimized_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, task.resolution, x, y, task.offset, task.stride);
}
else
@ -231,22 +238,23 @@ public:
{
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
kernel_cpu_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, task.resolution, x, y, task.offset, task.stride);
}
}
void thread_shader(DeviceTask& task)
{
KernelGlobals kg = kernel_globals;
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_init(kg);
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_optimized()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
kernel_cpu_optimized_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
@ -256,7 +264,7 @@ public:
#endif
{
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
@ -264,8 +272,7 @@ public:
}
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_free(kg);
OSLShader::thread_free(&kg);
#endif
}

@ -29,38 +29,6 @@
CCL_NAMESPACE_BEGIN
/* Globals */
KernelGlobals *kernel_globals_create()
{
KernelGlobals *kg = new KernelGlobals();
#ifdef WITH_OSL
kg->osl.use = false;
#endif
return kg;
}
void kernel_globals_free(KernelGlobals *kg)
{
delete kg;
}
/* OSL */
#ifdef WITH_OSL
void *kernel_osl_memory(KernelGlobals *kg)
{
return (void*)&kg->osl;
}
bool kernel_osl_use(KernelGlobals *kg)
{
return kg->osl.use;
}
#endif
/* Memory Copy */
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size)

@ -19,13 +19,6 @@
#ifndef __KERNEL_ATTRIBUTE_CL__
#define __KERNEL_ATTRIBUTE_CL__
#include "util_types.h"
#ifdef __OSL__
#include <string>
#include "util_attribute.h"
#endif
CCL_NAMESPACE_BEGIN
/* note: declared in kernel.h, have to add it here because kernel.h is not available */
@ -33,20 +26,9 @@ bool kernel_osl_use(KernelGlobals *kg);
__device_inline int find_attribute(KernelGlobals *kg, ShaderData *sd, uint id)
{
#ifdef __OSL__
if (kernel_osl_use(kg)) {
/* for OSL, a hash map is used to lookup the attribute by name. */
OSLGlobals::AttributeMap &attr_map = kg->osl.attribute_map[sd->object];
ustring stdname(std::string("std::") + std::string(attribute_standard_name((AttributeStandard)id)));
OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
if (it != attr_map.end()) {
const OSLGlobals::Attribute &osl_attr = it->second;
/* return result */
return (osl_attr.elem == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : osl_attr.offset;
}
else
return (int)ATTR_STD_NOT_FOUND;
if (kg->osl) {
return OSLShader::find_attribute(kg, sd, id);
}
else
#endif

@ -18,14 +18,6 @@
/* Constant Globals */
#ifdef __KERNEL_CPU__
#ifdef __OSL__
#include "osl_globals.h"
#endif
#endif
CCL_NAMESPACE_BEGIN
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
@ -35,6 +27,12 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_CPU__
#ifdef __OSL__
struct OSLGlobals;
struct OSLThreadData;
struct OSLShadingSystem;
#endif
#define MAX_BYTE_IMAGES 512
#define MAX_FLOAT_IMAGES 5
@ -51,7 +49,9 @@ typedef struct KernelGlobals {
#ifdef __OSL__
/* On the CPU, we also have the OSL globals here. Most data structures are shared
* with SVM, the difference is in the shaders and object/mesh attributes. */
OSLGlobals osl;
OSLGlobals *osl;
OSLShadingSystem *osl_ss;
OSLThreadData *osl_tdata;
#endif
} KernelGlobals;

@ -16,10 +16,16 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifdef __OSL__
#include "osl_shader.h"
#endif
#include "kernel_differential.h"
#include "kernel_montecarlo.h"
#include "kernel_projection.h"
#include "kernel_object.h"
#include "kernel_attribute.h"
#include "kernel_projection.h"
#include "kernel_triangle.h"
#ifdef __QBVH__
#include "kernel_qbvh.h"

@ -26,10 +26,6 @@
*
*/
#ifdef __OSL__
#include "osl_shader.h"
#endif
#include "closure/bsdf.h"
#include "closure/emissive.h"
#include "closure/volume.h"
@ -61,7 +57,7 @@ __device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
const Intersection *isect, const Ray *ray)
{
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
OSLShader::init(kg, sd);
#endif
@ -147,7 +143,7 @@ __device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
int shader, int object, int prim, float u, float v, float t, float time)
{
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
OSLShader::init(kg, sd);
#endif
@ -278,7 +274,7 @@ __device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
__device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
{
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
OSLShader::init(kg, sd);
#endif
@ -387,7 +383,7 @@ __device void shader_bsdf_eval(KernelGlobals *kg, const ShaderData *sd,
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
return _shader_bsdf_multi_eval_osl(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
else
#endif
@ -444,7 +440,7 @@ __device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd,
*pdf = 0.0f;
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
label = OSLShader::bsdf_sample(sd, sc, randu, randv, eval, *omega_in, *domega_in, *pdf);
else
#endif
@ -456,7 +452,7 @@ __device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd,
if(sd->num_closure > 1) {
float sweight = sc->sample_weight;
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
_shader_bsdf_multi_eval_osl(sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight);
else
#endif
@ -483,7 +479,7 @@ __device int shader_bsdf_sample_closure(KernelGlobals *kg, const ShaderData *sd,
*pdf = 0.0f;
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
label = OSLShader::bsdf_sample(sd, sc, randu, randv, eval, *omega_in, *domega_in, *pdf);
else
#endif
@ -503,7 +499,7 @@ __device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughnes
if(CLOSURE_IS_BSDF(sc->type)) {
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
OSLShader::bsdf_blur(sc, roughness);
else
#endif
@ -650,7 +646,7 @@ __device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
if(CLOSURE_IS_EMISSION(sc->type)) {
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
eval += OSLShader::emissive_eval(sd, sc)*sc->weight;
else
#endif
@ -694,7 +690,7 @@ __device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
float randb, int path_flag)
{
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
OSLShader::eval_surface(kg, sd, randb, path_flag);
else
#endif
@ -713,7 +709,7 @@ __device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
__device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int path_flag)
{
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
return OSLShader::eval_background(kg, sd, path_flag);
else
#endif
@ -759,7 +755,7 @@ __device float3 shader_volume_eval_phase(KernelGlobals *kg, ShaderData *sd,
if(CLOSURE_IS_VOLUME(sc->type)) {
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
eval += OSLShader::volume_eval_phase(sc, omega_in, omega_out);
else
#endif
@ -780,7 +776,7 @@ __device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
{
#ifdef __SVM__
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
OSLShader::eval_volume(kg, sd, randb, path_flag);
else
#endif
@ -795,7 +791,7 @@ __device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd)
/* this will modify sd->P */
#ifdef __SVM__
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
OSLShader::eval_displacement(kg, sd);
else
#endif
@ -851,7 +847,7 @@ __device void shader_merge_closures(KernelGlobals *kg, ShaderData *sd)
__device void shader_release(KernelGlobals *kg, ShaderData *sd)
{
#ifdef __OSL__
if (kernel_osl_use(kg))
if (kg->osl)
OSLShader::release(kg, sd);
#endif
}

@ -16,9 +16,6 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "kernel_attribute.h"
#include "kernel_projection.h"
CCL_NAMESPACE_BEGIN
/* Point on triangle for Moller-Trumbore triangles */

@ -366,9 +366,6 @@ typedef struct ShaderClosure {
float sample_weight;
#endif
#ifdef __OSL__
void *prim;
#endif
float data0;
float data1;
@ -377,6 +374,9 @@ typedef struct ShaderClosure {
float3 T;
#endif
#ifdef __OSL__
void *prim;
#endif
} ShaderClosure;
/* Shader Data
@ -403,7 +403,8 @@ enum ShaderDataFlag {
/* object flags */
SD_HOLDOUT_MASK = 4096, /* holdout for camera rays */
SD_OBJECT_MOTION = 8192 /* has object motion blur */
SD_OBJECT_MOTION = 8192, /* has object motion blur */
SD_TRANSFORM_APPLIED = 16384 /* vertices have transform applied */
};
typedef struct ShaderData {

@ -153,8 +153,9 @@ static void register_closure(OSL::ShadingSystem *ss, const char *name, int id, O
ss->register_closure(name, id, params, prepare, generic_closure_setup, generic_closure_compare);
}
void OSLShader::register_closures(OSL::ShadingSystem *ss)
void OSLShader::register_closures(OSLShadingSystem *ss_)
{
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)ss_;
int id = 0;
register_closure(ss, "diffuse", id++,

@ -38,7 +38,14 @@ CCL_NAMESPACE_BEGIN
class OSLRenderServices;
struct OSLGlobals {
/* use */
OSLGlobals()
{
ss = NULL;
ts = NULL;
services = NULL;
use = false;
}
bool use;
/* shading system */
@ -66,19 +73,12 @@ struct OSLGlobals {
vector<AttributeMap> attribute_map;
ObjectNameMap object_name_map;
vector<ustring> object_names;
};
/* thread key for thread specific data lookup */
struct ThreadData {
OSL::ShaderGlobals globals;
OSL::PerThreadInfo *thread_info;
};
static tls_ptr(ThreadData, thread_data);
static thread_mutex thread_data_mutex;
static volatile int thread_data_users;
void thread_data_init();
void thread_data_free();
/* thread key for thread specific data lookup */
struct OSLThreadData {
OSL::ShaderGlobals globals;
OSL::PerThreadInfo *thread_info;
};
CCL_NAMESPACE_END

@ -23,6 +23,7 @@
#include "scene.h"
#include "osl_closures.h"
#include "osl_globals.h"
#include "osl_services.h"
#include "osl_shader.h"
@ -36,6 +37,8 @@
#include "kernel_differential.h"
#include "kernel_object.h"
#include "kernel_bvh.h"
#include "kernel_attribute.h"
#include "kernel_projection.h"
#include "kernel_triangle.h"
#include "kernel_accumulate.h"
#include "kernel_shader.h"
@ -53,6 +56,34 @@ ustring OSLRenderServices::u_camera("camera");
ustring OSLRenderServices::u_screen("screen");
ustring OSLRenderServices::u_raster("raster");
ustring OSLRenderServices::u_ndc("NDC");
ustring OSLRenderServices::u_object_location("object:location");
ustring OSLRenderServices::u_object_index("object:index");
ustring OSLRenderServices::u_geom_dupli_generated("geom:dupli_generated");
ustring OSLRenderServices::u_geom_dupli_uv("geom:dupli_uv");
ustring OSLRenderServices::u_material_index("material:index");
ustring OSLRenderServices::u_object_random("object:random");
ustring OSLRenderServices::u_particle_index("particle:index");
ustring OSLRenderServices::u_particle_age("particle:age");
ustring OSLRenderServices::u_particle_lifetime("particle:lifetime");
ustring OSLRenderServices::u_particle_location("particle:location");
ustring OSLRenderServices::u_particle_rotation("particle:rotation");
ustring OSLRenderServices::u_particle_size("particle:size");
ustring OSLRenderServices::u_particle_velocity("particle:velocity");
ustring OSLRenderServices::u_particle_angular_velocity("particle:angular_velocity");
ustring OSLRenderServices::u_geom_numpolyvertices("geom:numpolyvertices");
ustring OSLRenderServices::u_geom_trianglevertices("geom:trianglevertices");
ustring OSLRenderServices::u_geom_polyvertices("geom:polyvertices");
ustring OSLRenderServices::u_geom_name("geom:name");
ustring OSLRenderServices::u_path_ray_length("path:ray_length");
ustring OSLRenderServices::u_trace("trace");
ustring OSLRenderServices::u_hit("hit");
ustring OSLRenderServices::u_hitdist("hitdist");
ustring OSLRenderServices::u_N("N");
ustring OSLRenderServices::u_Ng("Ng");
ustring OSLRenderServices::u_P("P");
ustring OSLRenderServices::u_I("I");
ustring OSLRenderServices::u_u("u");
ustring OSLRenderServices::u_v("v");
ustring OSLRenderServices::u_empty;
OSLRenderServices::OSLRenderServices()
@ -488,104 +519,108 @@ static void get_object_attribute(const OSLGlobals::Attribute& attr, bool derivat
memset((char *)val + datasize, 0, datasize * 2);
}
static bool get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
TypeDesc type, bool derivatives, void *val)
bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
TypeDesc type, bool derivatives, void *val)
{
/* todo: turn this into hash table returning int, which can be used in switch */
/* todo: turn this into hash table? */
/* Object Attributes */
if (name == "object:location") {
if (name == u_object_location) {
float3 f = object_location(kg, sd);
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == "object:index") {
else if (name == u_object_index) {
float f = object_pass_id(kg, sd->object);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == "geom:dupli_generated") {
else if (name == u_geom_dupli_generated) {
float3 f = object_dupli_generated(kg, sd->object);
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == "geom:dupli_uv") {
else if (name == u_geom_dupli_uv) {
float3 f = object_dupli_uv(kg, sd->object);
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == "material:index") {
else if (name == u_material_index) {
float f = shader_pass_id(kg, sd);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == "object:random") {
else if (name == u_object_random) {
float f = object_random_number(kg, sd->object);
return set_attribute_float(f, type, derivatives, val);
}
/* Particle Attributes */
else if (name == "particle:index") {
else if (name == u_particle_index) {
uint particle_id = object_particle_id(kg, sd->object);
float f = particle_index(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == "particle:age") {
else if (name == u_particle_age) {
uint particle_id = object_particle_id(kg, sd->object);
float f = particle_age(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == "particle:lifetime") {
else if (name == u_particle_lifetime) {
uint particle_id = object_particle_id(kg, sd->object);
float f= particle_lifetime(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == "particle:location") {
else if (name == u_particle_location) {
uint particle_id = object_particle_id(kg, sd->object);
float3 f = particle_location(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
#if 0 /* unsupported */
else if (name == "particle:rotation") {
else if (name == u_particle_rotation) {
uint particle_id = object_particle_id(kg, sd->object);
float4 f = particle_rotation(kg, particle_id);
return set_attribute_float4(f, type, derivatives, val);
}
#endif
else if (name == "particle:size") {
else if (name == u_particle_size) {
uint particle_id = object_particle_id(kg, sd->object);
float f = particle_size(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == "particle:velocity") {
else if (name == u_particle_velocity) {
uint particle_id = object_particle_id(kg, sd->object);
float3 f = particle_velocity(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == "particle:angular_velocity") {
else if (name == u_particle_angular_velocity) {
uint particle_id = object_particle_id(kg, sd->object);
float3 f = particle_angular_velocity(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == "geom:numpolyvertices") {
else if (name == u_geom_numpolyvertices) {
return set_attribute_int(3, type, derivatives, val);
}
else if (name == "geom:trianglevertices" || name == "geom:polyvertices") {
else if (name == u_geom_trianglevertices || name == u_geom_polyvertices) {
float3 P[3];
triangle_vertices(kg, sd->prim, P);
object_position_transform(kg, sd, &P[0]);
object_position_transform(kg, sd, &P[1]);
object_position_transform(kg, sd, &P[2]);
if(!(sd->flag & SD_TRANSFORM_APPLIED)) {
object_position_transform(kg, sd, &P[0]);
object_position_transform(kg, sd, &P[1]);
object_position_transform(kg, sd, &P[2]);
}
return set_attribute_float3_3(P, type, derivatives, val);
}
else if(name == "geom:name") {
ustring object_name = kg->osl.object_names[sd->object];
else if(name == u_geom_name) {
ustring object_name = kg->osl->object_names[sd->object];
return set_attribute_string(object_name, type, derivatives, val);
}
else
return false;
}
static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
TypeDesc type, bool derivatives, void *val)
bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
TypeDesc type, bool derivatives, void *val)
{
/* Ray Length */
if (name == "path:ray_length") {
if (name == u_path_ray_length) {
float f = sd->ray_length;
return set_attribute_float(f, type, derivatives, val);
}
@ -604,9 +639,9 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri
/* lookup of attribute on another object */
if (object_name != u_empty) {
OSLGlobals::ObjectNameMap::iterator it = kg->osl.object_name_map.find(object_name);
OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
if (it == kg->osl.object_name_map.end())
if (it == kg->osl->object_name_map.end())
return false;
object = it->second;
@ -617,7 +652,7 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri
}
/* find attribute on object */
OSLGlobals::AttributeMap& attribute_map = kg->osl.attribute_map[object];
OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object];
OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
if (it != attribute_map.end()) {
@ -663,7 +698,7 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options,
float s, float t, float dsdx, float dtdx,
float dsdy, float dtdy, float *result)
{
OSL::TextureSystem *ts = kernel_globals->osl.ts;
OSL::TextureSystem *ts = kernel_globals->osl->ts;
bool status = ts->texture(filename, options, s, t, dsdx, dtdx, dsdy, dtdy, result);
if(!status) {
@ -685,7 +720,7 @@ bool OSLRenderServices::texture3d(ustring filename, TextureOpt &options,
const OSL::Vec3 &dPdx, const OSL::Vec3 &dPdy,
const OSL::Vec3 &dPdz, float *result)
{
OSL::TextureSystem *ts = kernel_globals->osl.ts;
OSL::TextureSystem *ts = kernel_globals->osl->ts;
bool status = ts->texture3d(filename, options, P, dPdx, dPdy, dPdz, result);
if(!status) {
@ -707,7 +742,7 @@ bool OSLRenderServices::environment(ustring filename, TextureOpt &options,
OSL::ShaderGlobals *sg, const OSL::Vec3 &R,
const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result)
{
OSL::TextureSystem *ts = kernel_globals->osl.ts;
OSL::TextureSystem *ts = kernel_globals->osl->ts;
bool status = ts->environment(filename, options, R, dRdx, dRdy, result);
if(!status) {
@ -728,7 +763,7 @@ bool OSLRenderServices::get_texture_info(ustring filename, int subimage,
ustring dataname,
TypeDesc datatype, void *data)
{
OSL::TextureSystem *ts = kernel_globals->osl.ts;
OSL::TextureSystem *ts = kernel_globals->osl->ts;
return ts->get_texture_info(filename, subimage, dataname, datatype, data);
}
@ -798,12 +833,12 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustri
{
TraceData *tracedata = (TraceData*)sg->tracedata;
if(source == "trace" && tracedata) {
if(name == "hit") {
if(source == u_trace && tracedata) {
if(name == u_hit) {
return set_attribute_int((tracedata->isect.prim != ~0), type, derivatives, val);
}
else if(tracedata->isect.prim != ~0) {
if(name == "hitdist") {
if(name == u_hitdist) {
float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
return set_attribute_float(f, type, derivatives, val);
}
@ -817,25 +852,25 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustri
tracedata->setup = true;
}
if(name == "N") {
if(name == u_N) {
return set_attribute_float3(sd->N, type, derivatives, val);
}
else if(name == "Ng") {
else if(name == u_Ng) {
return set_attribute_float3(sd->Ng, type, derivatives, val);
}
else if(name == "P") {
else if(name == u_P) {
float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
return set_attribute_float3(f, type, derivatives, val);
}
else if(name == "I") {
else if(name == u_I) {
float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
return set_attribute_float3(f, type, derivatives, val);
}
else if(name == "u") {
else if(name == u_u) {
float f[3] = {sd->u, sd->du.dx, sd->du.dy};
return set_attribute_float(f, type, derivatives, val);
}
else if(name == "v") {
else if(name == u_v) {
float f[3] = {sd->v, sd->dv.dx, sd->dv.dy};
return set_attribute_float(f, type, derivatives, val);
}

@ -101,6 +101,11 @@ public:
bool get_texture_info(ustring filename, int subimage,
ustring dataname, TypeDesc datatype, void *data);
static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
TypeDesc type, bool derivatives, void *val);
static bool get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
TypeDesc type, bool derivatives, void *val);
struct TraceData {
Ray ray;
Intersection isect;
@ -114,6 +119,34 @@ public:
static ustring u_screen;
static ustring u_raster;
static ustring u_ndc;
static ustring u_object_location;
static ustring u_object_index;
static ustring u_geom_dupli_generated;
static ustring u_geom_dupli_uv;
static ustring u_material_index;
static ustring u_object_random;
static ustring u_particle_index;
static ustring u_particle_age;
static ustring u_particle_lifetime;
static ustring u_particle_location;
static ustring u_particle_rotation;
static ustring u_particle_size;
static ustring u_particle_velocity;
static ustring u_particle_angular_velocity;
static ustring u_geom_numpolyvertices;
static ustring u_geom_trianglevertices;
static ustring u_geom_polyvertices;
static ustring u_geom_name;
static ustring u_path_ray_length;
static ustring u_trace;
static ustring u_hit;
static ustring u_hitdist;
static ustring u_N;
static ustring u_Ng;
static ustring u_P;
static ustring u_I;
static ustring u_u;
static ustring u_v;
static ustring u_empty;
private:

@ -22,65 +22,56 @@
#include "kernel_object.h"
#include "osl_closures.h"
#include "osl_globals.h"
#include "osl_services.h"
#include "osl_shader.h"
#include "util_attribute.h"
#include "util_foreach.h"
#include <OSL/oslexec.h>
CCL_NAMESPACE_BEGIN
tls_ptr(OSLGlobals::ThreadData, OSLGlobals::thread_data);
volatile int OSLGlobals::thread_data_users = 0;
thread_mutex OSLGlobals::thread_data_mutex;
/* Threads */
void OSLGlobals::thread_data_init()
void OSLShader::thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OSLGlobals *osl_globals)
{
thread_scoped_lock thread_data_lock(thread_data_mutex);
/* no osl used? */
if(!osl_globals->use) {
kg->osl = NULL;
return;
}
if(thread_data_users == 0)
tls_create(OSLGlobals::ThreadData, thread_data);
/* per thread kernel data init*/
kg->osl = osl_globals;
kg->osl->services->thread_init(kernel_globals);
thread_data_users++;
}
void OSLGlobals::thread_data_free()
{
/* thread local storage delete */
thread_scoped_lock thread_data_lock(thread_data_mutex);
thread_data_users--;
if(thread_data_users == 0)
tls_delete(OSLGlobals::ThreadData, thread_data);
}
void OSLShader::thread_init(KernelGlobals *kg)
{
OSL::ShadingSystem *ss = kg->osl.ss;
OSLGlobals::ThreadData *tdata = new OSLGlobals::ThreadData();
OSL::ShadingSystem *ss = kg->osl->ss;
OSLThreadData *tdata = new OSLThreadData();
memset(&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
tdata->thread_info = ss->create_thread_info();
tls_set(kg->osl.thread_data, tdata);
kg->osl.services->thread_init(kg);
kg->osl_ss = (OSLShadingSystem*)ss;
kg->osl_tdata = tdata;
}
void OSLShader::thread_free(KernelGlobals *kg)
{
OSL::ShadingSystem *ss = kg->osl.ss;
if(!kg->osl)
return;
OSLGlobals::ThreadData *tdata = tls_get(OSLGlobals::ThreadData, kg->osl.thread_data);
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
OSLThreadData *tdata = kg->osl_tdata;
ss->destroy_thread_info(tdata->thread_info);
delete tdata;
kg->osl = NULL;
kg->osl_ss = NULL;
kg->osl_tdata = NULL;
}
/* Globals */
@ -230,8 +221,8 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, float randb, int path_flag)
{
/* gather pointers */
OSL::ShadingSystem *ss = kg->osl.ss;
OSLGlobals::ThreadData *tdata = tls_get(OSLGlobals::ThreadData, kg->osl.thread_data);
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
OSLThreadData *tdata = kg->osl_tdata;
OSL::ShaderGlobals *globals = &tdata->globals;
OSL::ShadingContext *ctx = (OSL::ShadingContext *)sd->osl_ctx;
@ -241,8 +232,8 @@ void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, float randb, int
/* execute shader for this point */
int shader = sd->shader & SHADER_MASK;
if (kg->osl.surface_state[shader])
ss->execute(*ctx, *(kg->osl.surface_state[shader]), *globals);
if (kg->osl->surface_state[shader])
ss->execute(*ctx, *(kg->osl->surface_state[shader]), *globals);
/* free trace data */
if(globals->tracedata)
@ -291,8 +282,8 @@ static float3 flatten_background_closure_tree(const OSL::ClosureColor *closure)
float3 OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, int path_flag)
{
/* gather pointers */
OSL::ShadingSystem *ss = kg->osl.ss;
OSLGlobals::ThreadData *tdata = tls_get(OSLGlobals::ThreadData, kg->osl.thread_data);
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
OSLThreadData *tdata = kg->osl_tdata;
OSL::ShaderGlobals *globals = &tdata->globals;
OSL::ShadingContext *ctx = (OSL::ShadingContext *)sd->osl_ctx;
@ -300,8 +291,8 @@ float3 OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, int path_fl
shaderdata_to_shaderglobals(kg, sd, path_flag, globals);
/* execute shader for this point */
if (kg->osl.background_state)
ss->execute(*ctx, *(kg->osl.background_state), *globals);
if (kg->osl->background_state)
ss->execute(*ctx, *(kg->osl->background_state), *globals);
/* free trace data */
if(globals->tracedata)
@ -371,8 +362,8 @@ static void flatten_volume_closure_tree(ShaderData *sd,
void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, float randb, int path_flag)
{
/* gather pointers */
OSL::ShadingSystem *ss = kg->osl.ss;
OSLGlobals::ThreadData *tdata = tls_get(OSLGlobals::ThreadData, kg->osl.thread_data);
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
OSLThreadData *tdata = kg->osl_tdata;
OSL::ShaderGlobals *globals = &tdata->globals;
OSL::ShadingContext *ctx = (OSL::ShadingContext *)sd->osl_ctx;
@ -382,8 +373,8 @@ void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, float randb, int
/* execute shader */
int shader = sd->shader & SHADER_MASK;
if (kg->osl.volume_state[shader])
ss->execute(*ctx, *(kg->osl.volume_state[shader]), *globals);
if (kg->osl->volume_state[shader])
ss->execute(*ctx, *(kg->osl->volume_state[shader]), *globals);
/* free trace data */
if(globals->tracedata)
@ -398,8 +389,8 @@ void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, float randb, int
void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd)
{
/* gather pointers */
OSL::ShadingSystem *ss = kg->osl.ss;
OSLGlobals::ThreadData *tdata = tls_get(OSLGlobals::ThreadData, kg->osl.thread_data);
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
OSLThreadData *tdata = kg->osl_tdata;
OSL::ShaderGlobals *globals = &tdata->globals;
OSL::ShadingContext *ctx = (OSL::ShadingContext *)sd->osl_ctx;
@ -409,8 +400,8 @@ void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd)
/* execute shader */
int shader = sd->shader & SHADER_MASK;
if (kg->osl.displacement_state[shader])
ss->execute(*ctx, *(kg->osl.displacement_state[shader]), *globals);
if (kg->osl->displacement_state[shader])
ss->execute(*ctx, *(kg->osl->displacement_state[shader]), *globals);
/* free trace data */
if(globals->tracedata)
@ -422,15 +413,15 @@ void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd)
void OSLShader::init(KernelGlobals *kg, ShaderData *sd)
{
OSL::ShadingSystem *ss = kg->osl.ss;
OSLGlobals::ThreadData *tdata = tls_get(OSLGlobals::ThreadData, kg->osl.thread_data);
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
OSLThreadData *tdata = kg->osl_tdata;
sd->osl_ctx = ss->get_context(tdata->thread_info);
}
void OSLShader::release(KernelGlobals *kg, ShaderData *sd)
{
OSL::ShadingSystem *ss = kg->osl.ss;
OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss;
ss->release_context((OSL::ShadingContext *)sd->osl_ctx);
}
@ -488,5 +479,23 @@ float3 OSLShader::volume_eval_phase(const ShaderClosure *sc, const float3 omega_
return TO_FLOAT3(volume_eval) * sc->weight;
}
/* Attributes */
int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id)
{
/* for OSL, a hash map is used to lookup the attribute by name. */
OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[sd->object];
ustring stdname(std::string("std::") + std::string(attribute_standard_name((AttributeStandard)id)));
OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
if (it != attr_map.end()) {
const OSLGlobals::Attribute &osl_attr = it->second;
/* return result */
return (osl_attr.elem == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : osl_attr.offset;
}
else
return (int)ATTR_STD_NOT_FOUND;
}
CCL_NAMESPACE_END

@ -31,33 +31,27 @@
* This means no thread state must be passed along in the kernel itself.
*/
#include <OSL/oslexec.h>
#include <OSL/oslclosure.h>
#include "kernel_types.h"
#include "util_map.h"
#include "util_param.h"
#include "util_vector.h"
CCL_NAMESPACE_BEGIN
namespace OSL = ::OSL;
class OSLRenderServices;
class Scene;
struct ShaderClosure;
struct ShaderData;
struct differential3;
struct KernelGlobals;
struct OSLGlobals;
struct OSLShadingSystem;
class OSLShader {
public:
/* init */
static void register_closures(OSL::ShadingSystem *ss);
static void register_closures(OSLShadingSystem *ss);
/* per thread data */
static void thread_init(KernelGlobals *kg);
static void thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OSLGlobals *osl_globals);
static void thread_free(KernelGlobals *kg);
/* eval */
@ -82,6 +76,9 @@ public:
/* release */
static void init(KernelGlobals *kg, ShaderData *sd);
static void release(KernelGlobals *kg, ShaderData *sd);
/* attributes */
static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id);
};
CCL_NAMESPACE_END

@ -148,10 +148,9 @@ ObjectManager::~ObjectManager()
{
}
void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress)
{
float4 *objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size());
uint *object_flag = dscene->object_flag.resize(scene->objects.size());
int i = 0;
map<Mesh*, float> surface_area_map;
Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading);
@ -257,7 +256,6 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
}
device->tex_alloc("__objects", dscene->objects);
device->tex_alloc("__object_flag", dscene->object_flag);
dscene->data.bvh.have_motion = have_motion;
}
@ -272,9 +270,12 @@ void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *sc
if(scene->objects.size() == 0)
return;
/* object info flag */
uint *object_flag = dscene->object_flag.resize(scene->objects.size());
/* set object transform matrices, before applying static transforms */
progress.set_status("Updating Objects", "Copying Transformations to device");
device_update_transforms(device, dscene, scene, progress);
device_update_transforms(device, dscene, scene, object_flag, progress);
if(progress.get_cancel()) return;
@ -282,10 +283,11 @@ void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *sc
/* todo: do before to support getting object level coords? */
if(scene->params.bvh_type == SceneParams::BVH_STATIC) {
progress.set_status("Updating Objects", "Applying Static Transformations");
apply_static_transforms(scene, progress);
apply_static_transforms(scene, object_flag, progress);
}
if(progress.get_cancel()) return;
/* allocate object flag */
device->tex_alloc("__object_flag", dscene->object_flag);
need_update = false;
}
@ -299,7 +301,7 @@ void ObjectManager::device_free(Device *device, DeviceScene *dscene)
dscene->object_flag.clear();
}
void ObjectManager::apply_static_transforms(Scene *scene, Progress& progress)
void ObjectManager::apply_static_transforms(Scene *scene, uint *object_flag, Progress& progress)
{
/* todo: normals and displacement should be done before applying transform! */
/* todo: create objects/meshes in right order! */
@ -312,6 +314,7 @@ void ObjectManager::apply_static_transforms(Scene *scene, Progress& progress)
#else
bool motion_blur = false;
#endif
int i = 0;
foreach(Object *object, scene->objects) {
map<Mesh*, int>::iterator it = mesh_users.find(object->mesh);
@ -334,8 +337,12 @@ void ObjectManager::apply_static_transforms(Scene *scene, Progress& progress)
if(progress.get_cancel()) return;
}
object_flag[i] |= SD_TRANSFORM_APPLIED;
}
}
i++;
}
}

@ -73,12 +73,12 @@ public:
~ObjectManager();
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
void device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
void device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress);
void device_free(Device *device, DeviceScene *dscene);
void tag_update(Scene *scene);
void apply_static_transforms(Scene *scene, Progress& progress);
void apply_static_transforms(Scene *scene, uint *object_flag, Progress& progress);
};
CCL_NAMESPACE_END

@ -45,8 +45,6 @@ CCL_NAMESPACE_BEGIN
OSLShaderManager::OSLShaderManager()
{
thread_data_initialized = false;
services = new OSLRenderServices();
shading_system_init();
@ -103,11 +101,6 @@ void OSLShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
scene->image_manager->set_osl_texture_system((void*)ts);
device_update_common(device, dscene, scene, progress);
if(!thread_data_initialized) {
og->thread_data_init();
thread_data_initialized = true;
}
}
void OSLShaderManager::device_free(Device *device, DeviceScene *dscene)
@ -125,11 +118,6 @@ void OSLShaderManager::device_free(Device *device, DeviceScene *dscene)
og->volume_state.clear();
og->displacement_state.clear();
og->background_state.reset();
if(thread_data_initialized) {
og->thread_data_free();
thread_data_initialized = false;
}
}
void OSLShaderManager::texture_system_init()
@ -170,7 +158,7 @@ void OSLShaderManager::shading_system_init()
const int nraytypes = sizeof(raytypes)/sizeof(raytypes[0]);
ss->attribute("raytypes", TypeDesc(TypeDesc::STRING, nraytypes), raytypes);
OSLShader::register_closures(ss);
OSLShader::register_closures((OSLShadingSystem*)ss);
loaded_shaders.clear();
}

@ -73,8 +73,6 @@ protected:
OSLRenderServices *services;
OSL::ErrorHandler errhandler;
set<string> loaded_shaders;
bool thread_data_initialized;
};
#endif

@ -70,37 +70,6 @@ protected:
bool joined;
};
/* Thread Local Storage
*
* Boost implementation is a bit slow, and Mac OS X __thread is not supported
* but the pthreads implementation is optimized, so we use these macros. */
#if defined(__APPLE__) || defined(_WIN32)
#define tls_ptr(type, name) \
pthread_key_t name
#define tls_set(name, value) \
pthread_setspecific(name, value)
#define tls_get(type, name) \
((type*)pthread_getspecific(name))
#define tls_create(type, name) \
pthread_key_create(&name, NULL)
#define tls_delete(type, name) \
pthread_key_delete(name);
#else
#define tls_ptr(type, name) \
__thread type *name
#define tls_set(name, value) \
name = value
#define tls_get(type, name) \
name
#define tls_create(type, name)
#define tls_delete(type, name)
#endif
CCL_NAMESPACE_END
#endif /* __UTIL_THREAD_H__ */