Cycles: Refactor Image Texture limits.

Instead of treating Fermi GPU limits as default,
and overriding them for other devices,
we now nicely set them for each platform.

* Due to setting values for all platforms,
we don't have to offset the slot id for OpenCL anymore,
as the image manager wont add float images for OpenCL now.

* Bugfix: TEX_NUM_FLOAT_IMAGES was always 5, even for CPU,
so the code in svm_image.h clamped float textures with alpha on CPU after the 5th slot.

Reviewers: #cycles, brecht

Reviewed By: #cycles, brecht

Subscribers: brecht

Differential Revision: https://developer.blender.org/D1925
This commit is contained in:
Thomas Dinges 2016-04-16 20:48:33 +02:00
parent b973911fee
commit 557544f2c4
6 changed files with 66 additions and 52 deletions

@ -16,6 +16,15 @@
CCL_NAMESPACE_BEGIN
/* Float textures on various devices. */
#if defined(__KERNEL_CPU__)
#define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_CPU
#elif defined(__KERNEL_CUDA__)
#define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_CUDA
#else
#define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_OPENCL
#endif
#ifdef __KERNEL_OPENCL__
/* For OpenCL all images are packed in a single array, and we do manual lookup
@ -50,12 +59,6 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix)
ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
{
/* first slots are used by float textures, which are not supported here */
if(id < TEX_NUM_FLOAT_IMAGES)
return make_float4(1.0f, 0.0f, 1.0f, 1.0f);
id -= TEX_NUM_FLOAT_IMAGES;
uint4 info = kernel_tex_fetch(__tex_image_packed_info, id);
uint width = info.x;
uint height = info.y;

@ -30,16 +30,46 @@
CCL_NAMESPACE_BEGIN
ImageManager::ImageManager()
ImageManager::ImageManager(const DeviceInfo& info)
{
need_update = true;
pack_images = false;
osl_texture_system = NULL;
animation_frame = 0;
tex_num_images = TEX_NUM_IMAGES;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES;
tex_image_byte_start = TEX_IMAGE_BYTE_START;
/* Set image limits */
/* CPU */
if(info.type == DEVICE_CPU) {
tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CPU;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CPU;
tex_image_byte_start = TEX_IMAGE_BYTE_START_CPU;
}
/* CUDA (Fermi) */
else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && !info.extended_images) {
tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CUDA;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CUDA;
tex_image_byte_start = TEX_IMAGE_BYTE_START_CUDA;
}
/* CUDA (Kepler and above) */
else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) {
tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CUDA_KEPLER;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER;
tex_image_byte_start = TEX_IMAGE_BYTE_START_CUDA_KELPER;
}
/* OpenCL */
else if(info.pack_images) {
tex_num_byte_images = TEX_NUM_BYTE_IMAGES_OPENCL;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES_OPENCL;
tex_image_byte_start = TEX_IMAGE_BYTE_START_OPENCL;
}
/* Should never happen */
else {
tex_num_byte_images = 0;
tex_num_float_images = 0;
tex_image_byte_start = 0;
assert(0);
}
}
ImageManager::~ImageManager()
@ -60,21 +90,6 @@ void ImageManager::set_osl_texture_system(void *texture_system)
osl_texture_system = texture_system;
}
void ImageManager::set_extended_image_limits(const DeviceInfo& info)
{
if(info.type == DEVICE_CPU) {
tex_num_images = TEX_EXTENDED_NUM_IMAGES_CPU;
tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES;
tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START;
}
else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) {
tex_num_images = TEX_EXTENDED_NUM_IMAGES_GPU;
}
else if(info.pack_images) {
tex_num_images = TEX_PACKED_NUM_IMAGES;
}
}
bool ImageManager::set_animation_frame_update(int frame)
{
if(frame != animation_frame) {
@ -267,9 +282,9 @@ int ImageManager::add_image(const string& filename,
if(slot == images.size()) {
/* max images limit reached */
if(images.size() == tex_num_images) {
if(images.size() == tex_num_byte_images) {
printf("ImageManager::add_image: byte image limit reached %d, skipping '%s'\n",
tex_num_images, filename.c_str());
tex_num_byte_images, filename.c_str());
return -1;
}

@ -32,7 +32,7 @@ class Progress;
class ImageManager {
public:
ImageManager();
ImageManager(const DeviceInfo& info);
~ImageManager();
int add_image(const string& filename,
@ -62,7 +62,6 @@ public:
void set_osl_texture_system(void *texture_system);
void set_pack_images(bool pack_images_);
void set_extended_image_limits(const DeviceInfo& info);
bool set_animation_frame_update(int frame);
bool need_update;
@ -86,7 +85,7 @@ public:
};
private:
int tex_num_images;
int tex_num_byte_images;
int tex_num_float_images;
int tex_image_byte_start;
thread_mutex device_mutex;

@ -54,7 +54,7 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_)
mesh_manager = new MeshManager();
object_manager = new ObjectManager();
integrator = new Integrator();
image_manager = new ImageManager();
image_manager = new ImageManager(device_info_);
particle_system_manager = new ParticleSystemManager();
curve_system_manager = new CurveSystemManager();
bake_manager = new BakeManager();
@ -64,9 +64,6 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_)
shader_manager = ShaderManager::create(this, params.shadingsystem);
else
shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM);
/* Extended image limits for CPU and GPUs */
image_manager->set_extended_image_limits(device_info_);
}
Scene::~Scene()

@ -109,8 +109,8 @@ public:
device_vector<uint> sobol_directions;
/* cpu images */
device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES_CPU];
device_vector<float4> tex_float_image[TEX_EXTENDED_NUM_FLOAT_IMAGES];
device_vector<uchar4> tex_image[TEX_NUM_BYTE_IMAGES_CPU];
device_vector<float4> tex_float_image[TEX_NUM_FLOAT_IMAGES_CPU];
/* opencl images */
device_vector<uchar4> tex_image_packed;

@ -21,26 +21,26 @@ CCL_NAMESPACE_BEGIN
/* Texture limits on various devices. */
#define TEX_NUM_FLOAT_IMAGES 5
/* CPU */
#define TEX_NUM_BYTE_IMAGES_CPU 1024
#define TEX_NUM_FLOAT_IMAGES_CPU 1024
#define TEX_IMAGE_BYTE_START_CPU TEX_NUM_FLOAT_IMAGES_CPU
/* generic */
#define TEX_NUM_IMAGES 88
#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES
/* CUDA (Fermi) */
#define TEX_NUM_BYTE_IMAGES_CUDA 88
#define TEX_NUM_FLOAT_IMAGES_CUDA 5
#define TEX_IMAGE_BYTE_START_CUDA TEX_NUM_FLOAT_IMAGES_CUDA
/* extended gpu */
#define TEX_EXTENDED_NUM_IMAGES_GPU 145
/* CUDA (KEPLER and above) */
#define TEX_NUM_BYTE_IMAGES_CUDA_KEPLER 145
#define TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER 5
#define TEX_IMAGE_BYTE_START_CUDA_KELPER TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER
/* extended cpu */
#define TEX_EXTENDED_NUM_FLOAT_IMAGES 1024
#define TEX_EXTENDED_NUM_IMAGES_CPU 1024
#define TEX_EXTENDED_IMAGE_BYTE_START TEX_EXTENDED_NUM_FLOAT_IMAGES
/* OpenCL */
#define TEX_NUM_BYTE_IMAGES_OPENCL 1024
#define TEX_NUM_FLOAT_IMAGES_OPENCL 0
#define TEX_IMAGE_BYTE_START_OPENCL TEX_NUM_FLOAT_IMAGES_OPENCL
/* Limitations for packed images.
*
* Technically number of textures is unlimited, but it should in
* fact be in sync with CPU limitations.
*/
#define TEX_PACKED_NUM_IMAGES 1024
/* Color to use when textures are not found. */
#define TEX_IMAGE_MISSING_R 1