diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index efd6798ca51..cfc567ff9ca 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -276,7 +276,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, shader = __float_as_int(str.z); } #endif - int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE); + int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; /* if no transparent shadows, all light is blocked */ if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h index 522213f30ca..46fd178aed6 100644 --- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h @@ -358,7 +358,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, shader = __float_as_int(str.z); } #endif - int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE); + int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; /* if no transparent shadows, all light is blocked */ if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 9d833b77bdb..0b410f448c8 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -322,67 +322,49 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object) ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) { - return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE + 1); + return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; } /* Particle data from which object was instanced */ ccl_device_inline uint particle_index(KernelGlobals *kg, int particle) { - int offset = particle*PARTICLE_SIZE; - float4 f = kernel_tex_fetch(__particles, offset + 0); - return __float_as_uint(f.x); + return kernel_tex_fetch(__particles, particle).index; } ccl_device float particle_age(KernelGlobals *kg, int particle) { - int offset = particle*PARTICLE_SIZE; - float4 f = kernel_tex_fetch(__particles, offset + 0); - return f.y; + return kernel_tex_fetch(__particles, particle).age; } ccl_device float particle_lifetime(KernelGlobals *kg, int particle) { - int offset = particle*PARTICLE_SIZE; - float4 f = kernel_tex_fetch(__particles, offset + 0); - return f.z; + return kernel_tex_fetch(__particles, particle).lifetime; } ccl_device float particle_size(KernelGlobals *kg, int particle) { - int offset = particle*PARTICLE_SIZE; - float4 f = kernel_tex_fetch(__particles, offset + 0); - return f.w; + return kernel_tex_fetch(__particles, particle).size; } ccl_device float4 particle_rotation(KernelGlobals *kg, int particle) { - int offset = particle*PARTICLE_SIZE; - float4 f = kernel_tex_fetch(__particles, offset + 1); - return f; + return kernel_tex_fetch(__particles, particle).rotation; } ccl_device float3 particle_location(KernelGlobals *kg, int particle) { - int offset = particle*PARTICLE_SIZE; - float4 f = kernel_tex_fetch(__particles, offset + 2); - return make_float3(f.x, f.y, f.z); + return float4_to_float3(kernel_tex_fetch(__particles, particle).location); } ccl_device float3 particle_velocity(KernelGlobals *kg, int particle) { - int offset = particle*PARTICLE_SIZE; - float4 f2 = kernel_tex_fetch(__particles, offset + 2); - float4 f3 = kernel_tex_fetch(__particles, offset + 3); - return make_float3(f2.w, f3.x, f3.y); + return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity); } ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle) { - int offset = particle*PARTICLE_SIZE; - float4 f3 = kernel_tex_fetch(__particles, offset + 3); - float4 f4 = kernel_tex_fetch(__particles, offset + 4); - return make_float3(f3.z, f3.w, f4.x); + return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity); } /* Object intersection in BVH */ diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 5875249b404..a5556c3be8f 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -29,7 +29,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, /* setup shading at emitter */ float3 eval; - int shader_flag = kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE); + int shader_flag = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).flags; #ifdef __BACKGROUND_MIS__ if(ls->type == LIGHT_BACKGROUND) { @@ -51,9 +51,9 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, #endif if(shader_flag & SD_HAS_CONSTANT_EMISSION) { - eval.x = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 2)); - eval.y = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 3)); - eval.z = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 4)); + eval.x = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[0]; + eval.y = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[1]; + eval.z = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[2]; if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) { ls->Ng = -ls->Ng; } diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b1f66852b7f..fc8d06fc33d 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -114,7 +114,7 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, sd->I = -ray->D; - sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); + sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; #ifdef __INSTANCING__ if(isect->object != OBJECT_NONE) { @@ -199,7 +199,7 @@ void shader_setup_from_subsurface( motion_triangle_shader_setup(kg, sd, isect, ray, true); } - sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); + sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; # ifdef __INSTANCING__ if(isect->object != OBJECT_NONE) { @@ -276,7 +276,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, sd->time = time; sd->ray_length = t; - sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); + sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; sd->object_flag = 0; if(sd->object != OBJECT_NONE) { sd->object_flag |= kernel_tex_fetch(__object_flag, @@ -386,7 +386,7 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat sd->Ng = -ray->D; sd->I = -ray->D; sd->shader = kernel_data.background.surface_shader; - sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); + sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; sd->object_flag = 0; sd->time = ray->time; sd->ray_length = 0.0f; @@ -1181,7 +1181,7 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg, sd->shader = stack[i].shader; sd->flag &= ~SD_SHADER_FLAGS; - sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); + sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; sd->object_flag &= ~SD_OBJECT_FLAGS; if(sd->object != OBJECT_NONE) { @@ -1254,7 +1254,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect shader = __float_as_int(str.z); } #endif - int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE); + int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; } diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 601165a3848..892e0c84cba 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -61,11 +61,11 @@ KERNEL_TEX(float2, __light_background_marginal_cdf) KERNEL_TEX(float2, __light_background_conditional_cdf) /* particles */ -KERNEL_TEX(float4, __particles) +KERNEL_TEX(KernelParticle, __particles) /* shaders */ KERNEL_TEX(uint4, __svm_nodes) -KERNEL_TEX(uint, __shader_flag) +KERNEL_TEX(KernelShader, __shaders) KERNEL_TEX(uint, __object_flag) /* lookup tables */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 198ce39c63f..2cab63cdc6a 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -39,8 +39,6 @@ CCL_NAMESPACE_BEGIN #define FILTER_TABLE_SIZE 1024 #define RAMP_TABLE_SIZE 256 #define SHUTTER_TABLE_SIZE 256 -#define PARTICLE_SIZE 5 -#define SHADER_SIZE 5 #define BSSRDF_MIN_RADIUS 1e-8f #define BSSRDF_MAX_HITS 4 @@ -923,7 +921,7 @@ enum ShaderDataFlag { SD_HAS_BUMP = (1 << 25), /* Has true displacement. */ SD_HAS_DISPLACEMENT = (1 << 26), - /* Has constant emission (value stored in __shader_flag) */ + /* Has constant emission (value stored in __shaders) */ SD_HAS_CONSTANT_EMISSION = (1 << 27), /* Needs to access attributes */ SD_NEED_ATTRIBUTES = (1 << 28), @@ -1511,6 +1509,29 @@ typedef struct KernelLightDistribution { } KernelLightDistribution; static_assert_align(KernelLightDistribution, 16); +typedef struct KernelParticle { + int index; + float age; + float lifetime; + float size; + float4 rotation; + /* Only xyz are used of the following. float4 instead of float3 are used + * to ensure consistent padding/alignment across devices. */ + float4 location; + float4 velocity; + float4 angular_velocity; +} KernelParticle; +static_assert_align(KernelParticle, 16); + +typedef struct KernelShader { + float constant_emission[3]; + float pad1; + int flags; + int pass_id; + int pad2, pad3; +} KernelShader; +static_assert_align(KernelShader, 16); + /* Declarations required for split kernel */ /* Macro for queues */ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 058e7dccafd..88360e5f1ae 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -104,7 +104,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel) ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack) { for(int i = 0; stack[i].shader != SHADER_NONE; i++) { - int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE); + int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags; if(shader_flag & SD_HETEROGENEOUS_VOLUME) { return true; @@ -134,7 +134,7 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac int method = -1; for(int i = 0; stack[i].shader != SHADER_NONE; i++) { - int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE); + int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags; if(shader_flag & SD_VOLUME_MIS) { return SD_VOLUME_MIS; diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp index 3ee620c9d01..e4be3306d7e 100644 --- a/intern/cycles/render/particles.cpp +++ b/intern/cycles/render/particles.cpp @@ -62,14 +62,10 @@ void ParticleSystemManager::device_update_particles(Device *, DeviceScene *dscen for(size_t j = 0; j < scene->particle_systems.size(); j++) num_particles += scene->particle_systems[j]->particles.size(); - float4 *particles = dscene->particles.alloc(PARTICLE_SIZE*num_particles); + KernelParticle *kparticles = dscene->particles.alloc(num_particles); /* dummy particle */ - particles[0] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - particles[1] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - particles[2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - particles[3] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - particles[4] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + memset(kparticles, 0, sizeof(KernelParticle)); int i = 1; for(size_t j = 0; j < scene->particle_systems.size(); j++) { @@ -78,13 +74,15 @@ void ParticleSystemManager::device_update_particles(Device *, DeviceScene *dscen for(size_t k = 0; k < psys->particles.size(); k++) { /* pack in texture */ Particle& pa = psys->particles[k]; - int offset = i*PARTICLE_SIZE; - particles[offset] = make_float4(__uint_as_float(pa.index), pa.age, pa.lifetime, pa.size); - particles[offset+1] = pa.rotation; - particles[offset+2] = make_float4(pa.location.x, pa.location.y, pa.location.z, pa.velocity.x); - particles[offset+3] = make_float4(pa.velocity.y, pa.velocity.z, pa.angular_velocity.x, pa.angular_velocity.y); - particles[offset+4] = make_float4(pa.angular_velocity.z, 0.0f, 0.0f, 0.0f); + kparticles[i].index = pa.index; + kparticles[i].age = pa.age; + kparticles[i].lifetime = pa.lifetime; + kparticles[i].size = pa.size; + kparticles[i].rotation = pa.rotation; + kparticles[i].location = float3_to_float4(pa.location); + kparticles[i].velocity = float3_to_float4(pa.velocity); + kparticles[i].angular_velocity = float3_to_float4(pa.angular_velocity); i++; diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index aca28fc32fb..f5b8e2fd6a6 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -71,7 +71,7 @@ DeviceScene::DeviceScene(Device *device) light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE), particles(device, "__particles", MEM_TEXTURE), svm_nodes(device, "__svm_nodes", MEM_TEXTURE), - shader_flag(device, "__shader_flag", MEM_TEXTURE), + shaders(device, "__shaders", MEM_TEXTURE), object_flag(device, "__object_flag", MEM_TEXTURE), lookup_table(device, "__lookup_table", MEM_TEXTURE), sobol_directions(device, "__sobol_directions", MEM_TEXTURE) diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 3f089b9138f..316ffeb1092 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -102,11 +102,11 @@ public: device_vector light_background_conditional_cdf; /* particles */ - device_vector particles; + device_vector particles; /* shaders */ device_vector svm_nodes; - device_vector shader_flag; + device_vector shaders; device_vector object_flag; /* lookup tables */ diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 578c61a3e79..ec52c51e337 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -432,14 +432,12 @@ void ShaderManager::device_update_common(Device *device, Scene *scene, Progress& /*progress*/) { - dscene->shader_flag.free(); + dscene->shaders.free(); if(scene->shaders.size() == 0) return; - uint shader_flag_size = scene->shaders.size()*SHADER_SIZE; - uint *shader_flag = dscene->shader_flag.alloc(shader_flag_size); - uint i = 0; + KernelShader *kshader = dscene->shaders.alloc(scene->shaders.size()); bool has_volumes = false; bool has_transparent_shadow = false; @@ -487,16 +485,17 @@ void ShaderManager::device_update_common(Device *device, flag |= SD_HAS_CONSTANT_EMISSION; /* regular shader */ - shader_flag[i++] = flag; - shader_flag[i++] = shader->pass_id; - shader_flag[i++] = __float_as_int(constant_emission.x); - shader_flag[i++] = __float_as_int(constant_emission.y); - shader_flag[i++] = __float_as_int(constant_emission.z); + kshader->flags = flag; + kshader->pass_id = shader->pass_id; + kshader->constant_emission[0] = constant_emission.x; + kshader->constant_emission[1] = constant_emission.y; + kshader->constant_emission[2] = constant_emission.z; + kshader++; has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; } - dscene->shader_flag.copy_to_device(); + dscene->shaders.copy_to_device(); /* lookup tables */ KernelTables *ktables = &dscene->data.tables; @@ -525,7 +524,7 @@ void ShaderManager::device_free_common(Device *, DeviceScene *dscene, Scene *sce { scene->lookup_tables->remove_table(&beckmann_table_offset); - dscene->shader_flag.free(); + dscene->shaders.free(); } void ShaderManager::add_default(Scene *scene)