Code refactor: use KernelShader and KernelParticle instead of float arrays.

Original patch by Stefan with modifications by Brecht.
This commit is contained in:
Stefan Werner 2018-03-08 00:35:24 +01:00 committed by Brecht Van Lommel
parent fa9175ff02
commit f3010e98c3
12 changed files with 72 additions and 72 deletions

@ -276,7 +276,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
shader = __float_as_int(str.z); shader = __float_as_int(str.z);
} }
#endif #endif
int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE); int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
/* if no transparent shadows, all light is blocked */ /* if no transparent shadows, all light is blocked */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {

@ -358,7 +358,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
shader = __float_as_int(str.z); shader = __float_as_int(str.z);
} }
#endif #endif
int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE); int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
/* if no transparent shadows, all light is blocked */ /* if no transparent shadows, all light is blocked */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {

@ -322,67 +322,49 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
{ {
return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE + 1); return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
} }
/* Particle data from which object was instanced */ /* Particle data from which object was instanced */
ccl_device_inline uint particle_index(KernelGlobals *kg, int particle) ccl_device_inline uint particle_index(KernelGlobals *kg, int particle)
{ {
int offset = particle*PARTICLE_SIZE; return kernel_tex_fetch(__particles, particle).index;
float4 f = kernel_tex_fetch(__particles, offset + 0);
return __float_as_uint(f.x);
} }
ccl_device float particle_age(KernelGlobals *kg, int particle) ccl_device float particle_age(KernelGlobals *kg, int particle)
{ {
int offset = particle*PARTICLE_SIZE; return kernel_tex_fetch(__particles, particle).age;
float4 f = kernel_tex_fetch(__particles, offset + 0);
return f.y;
} }
ccl_device float particle_lifetime(KernelGlobals *kg, int particle) ccl_device float particle_lifetime(KernelGlobals *kg, int particle)
{ {
int offset = particle*PARTICLE_SIZE; return kernel_tex_fetch(__particles, particle).lifetime;
float4 f = kernel_tex_fetch(__particles, offset + 0);
return f.z;
} }
ccl_device float particle_size(KernelGlobals *kg, int particle) ccl_device float particle_size(KernelGlobals *kg, int particle)
{ {
int offset = particle*PARTICLE_SIZE; return kernel_tex_fetch(__particles, particle).size;
float4 f = kernel_tex_fetch(__particles, offset + 0);
return f.w;
} }
ccl_device float4 particle_rotation(KernelGlobals *kg, int particle) ccl_device float4 particle_rotation(KernelGlobals *kg, int particle)
{ {
int offset = particle*PARTICLE_SIZE; return kernel_tex_fetch(__particles, particle).rotation;
float4 f = kernel_tex_fetch(__particles, offset + 1);
return f;
} }
ccl_device float3 particle_location(KernelGlobals *kg, int particle) ccl_device float3 particle_location(KernelGlobals *kg, int particle)
{ {
int offset = particle*PARTICLE_SIZE; return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
float4 f = kernel_tex_fetch(__particles, offset + 2);
return make_float3(f.x, f.y, f.z);
} }
ccl_device float3 particle_velocity(KernelGlobals *kg, int particle) ccl_device float3 particle_velocity(KernelGlobals *kg, int particle)
{ {
int offset = particle*PARTICLE_SIZE; return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
float4 f2 = kernel_tex_fetch(__particles, offset + 2);
float4 f3 = kernel_tex_fetch(__particles, offset + 3);
return make_float3(f2.w, f3.x, f3.y);
} }
ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle) ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
{ {
int offset = particle*PARTICLE_SIZE; return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
float4 f3 = kernel_tex_fetch(__particles, offset + 3);
float4 f4 = kernel_tex_fetch(__particles, offset + 4);
return make_float3(f3.z, f3.w, f4.x);
} }
/* Object intersection in BVH */ /* Object intersection in BVH */

@ -29,7 +29,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
/* setup shading at emitter */ /* setup shading at emitter */
float3 eval; float3 eval;
int shader_flag = kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE); int shader_flag = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).flags;
#ifdef __BACKGROUND_MIS__ #ifdef __BACKGROUND_MIS__
if(ls->type == LIGHT_BACKGROUND) { if(ls->type == LIGHT_BACKGROUND) {
@ -51,9 +51,9 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
#endif #endif
if(shader_flag & SD_HAS_CONSTANT_EMISSION) if(shader_flag & SD_HAS_CONSTANT_EMISSION)
{ {
eval.x = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 2)); eval.x = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[0];
eval.y = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 3)); eval.y = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[1];
eval.z = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 4)); eval.z = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[2];
if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) { if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
ls->Ng = -ls->Ng; ls->Ng = -ls->Ng;
} }

@ -114,7 +114,7 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
sd->I = -ray->D; sd->I = -ray->D;
sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
#ifdef __INSTANCING__ #ifdef __INSTANCING__
if(isect->object != OBJECT_NONE) { if(isect->object != OBJECT_NONE) {
@ -199,7 +199,7 @@ void shader_setup_from_subsurface(
motion_triangle_shader_setup(kg, sd, isect, ray, true); motion_triangle_shader_setup(kg, sd, isect, ray, true);
} }
sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
# ifdef __INSTANCING__ # ifdef __INSTANCING__
if(isect->object != OBJECT_NONE) { if(isect->object != OBJECT_NONE) {
@ -276,7 +276,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
sd->time = time; sd->time = time;
sd->ray_length = t; sd->ray_length = t;
sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
sd->object_flag = 0; sd->object_flag = 0;
if(sd->object != OBJECT_NONE) { if(sd->object != OBJECT_NONE) {
sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object_flag |= kernel_tex_fetch(__object_flag,
@ -386,7 +386,7 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
sd->Ng = -ray->D; sd->Ng = -ray->D;
sd->I = -ray->D; sd->I = -ray->D;
sd->shader = kernel_data.background.surface_shader; sd->shader = kernel_data.background.surface_shader;
sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
sd->object_flag = 0; sd->object_flag = 0;
sd->time = ray->time; sd->time = ray->time;
sd->ray_length = 0.0f; sd->ray_length = 0.0f;
@ -1181,7 +1181,7 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
sd->shader = stack[i].shader; sd->shader = stack[i].shader;
sd->flag &= ~SD_SHADER_FLAGS; sd->flag &= ~SD_SHADER_FLAGS;
sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
sd->object_flag &= ~SD_OBJECT_FLAGS; sd->object_flag &= ~SD_OBJECT_FLAGS;
if(sd->object != OBJECT_NONE) { if(sd->object != OBJECT_NONE) {
@ -1254,7 +1254,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect
shader = __float_as_int(str.z); shader = __float_as_int(str.z);
} }
#endif #endif
int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE); int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
} }

@ -61,11 +61,11 @@ KERNEL_TEX(float2, __light_background_marginal_cdf)
KERNEL_TEX(float2, __light_background_conditional_cdf) KERNEL_TEX(float2, __light_background_conditional_cdf)
/* particles */ /* particles */
KERNEL_TEX(float4, __particles) KERNEL_TEX(KernelParticle, __particles)
/* shaders */ /* shaders */
KERNEL_TEX(uint4, __svm_nodes) KERNEL_TEX(uint4, __svm_nodes)
KERNEL_TEX(uint, __shader_flag) KERNEL_TEX(KernelShader, __shaders)
KERNEL_TEX(uint, __object_flag) KERNEL_TEX(uint, __object_flag)
/* lookup tables */ /* lookup tables */

@ -39,8 +39,6 @@ CCL_NAMESPACE_BEGIN
#define FILTER_TABLE_SIZE 1024 #define FILTER_TABLE_SIZE 1024
#define RAMP_TABLE_SIZE 256 #define RAMP_TABLE_SIZE 256
#define SHUTTER_TABLE_SIZE 256 #define SHUTTER_TABLE_SIZE 256
#define PARTICLE_SIZE 5
#define SHADER_SIZE 5
#define BSSRDF_MIN_RADIUS 1e-8f #define BSSRDF_MIN_RADIUS 1e-8f
#define BSSRDF_MAX_HITS 4 #define BSSRDF_MAX_HITS 4
@ -923,7 +921,7 @@ enum ShaderDataFlag {
SD_HAS_BUMP = (1 << 25), SD_HAS_BUMP = (1 << 25),
/* Has true displacement. */ /* Has true displacement. */
SD_HAS_DISPLACEMENT = (1 << 26), SD_HAS_DISPLACEMENT = (1 << 26),
/* Has constant emission (value stored in __shader_flag) */ /* Has constant emission (value stored in __shaders) */
SD_HAS_CONSTANT_EMISSION = (1 << 27), SD_HAS_CONSTANT_EMISSION = (1 << 27),
/* Needs to access attributes */ /* Needs to access attributes */
SD_NEED_ATTRIBUTES = (1 << 28), SD_NEED_ATTRIBUTES = (1 << 28),
@ -1511,6 +1509,29 @@ typedef struct KernelLightDistribution {
} KernelLightDistribution; } KernelLightDistribution;
static_assert_align(KernelLightDistribution, 16); static_assert_align(KernelLightDistribution, 16);
typedef struct KernelParticle {
int index;
float age;
float lifetime;
float size;
float4 rotation;
/* Only xyz are used of the following. float4 instead of float3 are used
* to ensure consistent padding/alignment across devices. */
float4 location;
float4 velocity;
float4 angular_velocity;
} KernelParticle;
static_assert_align(KernelParticle, 16);
typedef struct KernelShader {
float constant_emission[3];
float pad1;
int flags;
int pass_id;
int pad2, pad3;
} KernelShader;
static_assert_align(KernelShader, 16);
/* Declarations required for split kernel */ /* Declarations required for split kernel */
/* Macro for queues */ /* Macro for queues */

@ -104,7 +104,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack) ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
{ {
for(int i = 0; stack[i].shader != SHADER_NONE; i++) { for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE); int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
if(shader_flag & SD_HETEROGENEOUS_VOLUME) { if(shader_flag & SD_HETEROGENEOUS_VOLUME) {
return true; return true;
@ -134,7 +134,7 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
int method = -1; int method = -1;
for(int i = 0; stack[i].shader != SHADER_NONE; i++) { for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE); int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
if(shader_flag & SD_VOLUME_MIS) { if(shader_flag & SD_VOLUME_MIS) {
return SD_VOLUME_MIS; return SD_VOLUME_MIS;

@ -62,14 +62,10 @@ void ParticleSystemManager::device_update_particles(Device *, DeviceScene *dscen
for(size_t j = 0; j < scene->particle_systems.size(); j++) for(size_t j = 0; j < scene->particle_systems.size(); j++)
num_particles += scene->particle_systems[j]->particles.size(); num_particles += scene->particle_systems[j]->particles.size();
float4 *particles = dscene->particles.alloc(PARTICLE_SIZE*num_particles); KernelParticle *kparticles = dscene->particles.alloc(num_particles);
/* dummy particle */ /* dummy particle */
particles[0] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); memset(kparticles, 0, sizeof(KernelParticle));
particles[1] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
particles[2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
particles[3] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
particles[4] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
int i = 1; int i = 1;
for(size_t j = 0; j < scene->particle_systems.size(); j++) { for(size_t j = 0; j < scene->particle_systems.size(); j++) {
@ -78,13 +74,15 @@ void ParticleSystemManager::device_update_particles(Device *, DeviceScene *dscen
for(size_t k = 0; k < psys->particles.size(); k++) { for(size_t k = 0; k < psys->particles.size(); k++) {
/* pack in texture */ /* pack in texture */
Particle& pa = psys->particles[k]; Particle& pa = psys->particles[k];
int offset = i*PARTICLE_SIZE;
particles[offset] = make_float4(__uint_as_float(pa.index), pa.age, pa.lifetime, pa.size); kparticles[i].index = pa.index;
particles[offset+1] = pa.rotation; kparticles[i].age = pa.age;
particles[offset+2] = make_float4(pa.location.x, pa.location.y, pa.location.z, pa.velocity.x); kparticles[i].lifetime = pa.lifetime;
particles[offset+3] = make_float4(pa.velocity.y, pa.velocity.z, pa.angular_velocity.x, pa.angular_velocity.y); kparticles[i].size = pa.size;
particles[offset+4] = make_float4(pa.angular_velocity.z, 0.0f, 0.0f, 0.0f); kparticles[i].rotation = pa.rotation;
kparticles[i].location = float3_to_float4(pa.location);
kparticles[i].velocity = float3_to_float4(pa.velocity);
kparticles[i].angular_velocity = float3_to_float4(pa.angular_velocity);
i++; i++;

@ -71,7 +71,7 @@ DeviceScene::DeviceScene(Device *device)
light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE), light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE),
particles(device, "__particles", MEM_TEXTURE), particles(device, "__particles", MEM_TEXTURE),
svm_nodes(device, "__svm_nodes", MEM_TEXTURE), svm_nodes(device, "__svm_nodes", MEM_TEXTURE),
shader_flag(device, "__shader_flag", MEM_TEXTURE), shaders(device, "__shaders", MEM_TEXTURE),
object_flag(device, "__object_flag", MEM_TEXTURE), object_flag(device, "__object_flag", MEM_TEXTURE),
lookup_table(device, "__lookup_table", MEM_TEXTURE), lookup_table(device, "__lookup_table", MEM_TEXTURE),
sobol_directions(device, "__sobol_directions", MEM_TEXTURE) sobol_directions(device, "__sobol_directions", MEM_TEXTURE)

@ -102,11 +102,11 @@ public:
device_vector<float2> light_background_conditional_cdf; device_vector<float2> light_background_conditional_cdf;
/* particles */ /* particles */
device_vector<float4> particles; device_vector<KernelParticle> particles;
/* shaders */ /* shaders */
device_vector<int4> svm_nodes; device_vector<int4> svm_nodes;
device_vector<uint> shader_flag; device_vector<KernelShader> shaders;
device_vector<uint> object_flag; device_vector<uint> object_flag;
/* lookup tables */ /* lookup tables */

@ -432,14 +432,12 @@ void ShaderManager::device_update_common(Device *device,
Scene *scene, Scene *scene,
Progress& /*progress*/) Progress& /*progress*/)
{ {
dscene->shader_flag.free(); dscene->shaders.free();
if(scene->shaders.size() == 0) if(scene->shaders.size() == 0)
return; return;
uint shader_flag_size = scene->shaders.size()*SHADER_SIZE; KernelShader *kshader = dscene->shaders.alloc(scene->shaders.size());
uint *shader_flag = dscene->shader_flag.alloc(shader_flag_size);
uint i = 0;
bool has_volumes = false; bool has_volumes = false;
bool has_transparent_shadow = false; bool has_transparent_shadow = false;
@ -487,16 +485,17 @@ void ShaderManager::device_update_common(Device *device,
flag |= SD_HAS_CONSTANT_EMISSION; flag |= SD_HAS_CONSTANT_EMISSION;
/* regular shader */ /* regular shader */
shader_flag[i++] = flag; kshader->flags = flag;
shader_flag[i++] = shader->pass_id; kshader->pass_id = shader->pass_id;
shader_flag[i++] = __float_as_int(constant_emission.x); kshader->constant_emission[0] = constant_emission.x;
shader_flag[i++] = __float_as_int(constant_emission.y); kshader->constant_emission[1] = constant_emission.y;
shader_flag[i++] = __float_as_int(constant_emission.z); kshader->constant_emission[2] = constant_emission.z;
kshader++;
has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
} }
dscene->shader_flag.copy_to_device(); dscene->shaders.copy_to_device();
/* lookup tables */ /* lookup tables */
KernelTables *ktables = &dscene->data.tables; KernelTables *ktables = &dscene->data.tables;
@ -525,7 +524,7 @@ void ShaderManager::device_free_common(Device *, DeviceScene *dscene, Scene *sce
{ {
scene->lookup_tables->remove_table(&beckmann_table_offset); scene->lookup_tables->remove_table(&beckmann_table_offset);
dscene->shader_flag.free(); dscene->shaders.free();
} }
void ShaderManager::add_default(Scene *scene) void ShaderManager::add_default(Scene *scene)