Cycles: Refactor Image Texture limits.

Instead of treating Fermi GPU limits as default,
and overriding them for other devices,
we now nicely set them for each platform.

* Due to setting values for all platforms,
we don't have to offset the slot id for OpenCL anymore,
as the image manager wont add float images for OpenCL now.

* Bugfix: TEX_NUM_FLOAT_IMAGES was always 5, even for CPU,
so the code in svm_image.h clamped float textures with alpha on CPU after the 5th slot.

Reviewers: #cycles, brecht

Reviewed By: #cycles, brecht

Subscribers: brecht

Differential Revision: https://developer.blender.org/D1925
This commit is contained in:
Thomas Dinges 2016-04-16 20:48:33 +02:00
parent b973911fee
commit 557544f2c4
6 changed files with 66 additions and 52 deletions

@ -16,6 +16,15 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
/* Float textures on various devices. */
#if defined(__KERNEL_CPU__)
#define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_CPU
#elif defined(__KERNEL_CUDA__)
#define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_CUDA
#else
#define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_OPENCL
#endif
#ifdef __KERNEL_OPENCL__ #ifdef __KERNEL_OPENCL__
/* For OpenCL all images are packed in a single array, and we do manual lookup /* For OpenCL all images are packed in a single array, and we do manual lookup
@ -50,12 +59,6 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix)
ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha) ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
{ {
/* first slots are used by float textures, which are not supported here */
if(id < TEX_NUM_FLOAT_IMAGES)
return make_float4(1.0f, 0.0f, 1.0f, 1.0f);
id -= TEX_NUM_FLOAT_IMAGES;
uint4 info = kernel_tex_fetch(__tex_image_packed_info, id); uint4 info = kernel_tex_fetch(__tex_image_packed_info, id);
uint width = info.x; uint width = info.x;
uint height = info.y; uint height = info.y;

@ -30,16 +30,46 @@
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
ImageManager::ImageManager() ImageManager::ImageManager(const DeviceInfo& info)
{ {
need_update = true; need_update = true;
pack_images = false; pack_images = false;
osl_texture_system = NULL; osl_texture_system = NULL;
animation_frame = 0; animation_frame = 0;
tex_num_images = TEX_NUM_IMAGES; /* Set image limits */
tex_num_float_images = TEX_NUM_FLOAT_IMAGES;
tex_image_byte_start = TEX_IMAGE_BYTE_START; /* CPU */
if(info.type == DEVICE_CPU) {
tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CPU;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CPU;
tex_image_byte_start = TEX_IMAGE_BYTE_START_CPU;
}
/* CUDA (Fermi) */
else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && !info.extended_images) {
tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CUDA;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CUDA;
tex_image_byte_start = TEX_IMAGE_BYTE_START_CUDA;
}
/* CUDA (Kepler and above) */
else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) {
tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CUDA_KEPLER;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER;
tex_image_byte_start = TEX_IMAGE_BYTE_START_CUDA_KELPER;
}
/* OpenCL */
else if(info.pack_images) {
tex_num_byte_images = TEX_NUM_BYTE_IMAGES_OPENCL;
tex_num_float_images = TEX_NUM_FLOAT_IMAGES_OPENCL;
tex_image_byte_start = TEX_IMAGE_BYTE_START_OPENCL;
}
/* Should never happen */
else {
tex_num_byte_images = 0;
tex_num_float_images = 0;
tex_image_byte_start = 0;
assert(0);
}
} }
ImageManager::~ImageManager() ImageManager::~ImageManager()
@ -60,21 +90,6 @@ void ImageManager::set_osl_texture_system(void *texture_system)
osl_texture_system = texture_system; osl_texture_system = texture_system;
} }
void ImageManager::set_extended_image_limits(const DeviceInfo& info)
{
if(info.type == DEVICE_CPU) {
tex_num_images = TEX_EXTENDED_NUM_IMAGES_CPU;
tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES;
tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START;
}
else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) {
tex_num_images = TEX_EXTENDED_NUM_IMAGES_GPU;
}
else if(info.pack_images) {
tex_num_images = TEX_PACKED_NUM_IMAGES;
}
}
bool ImageManager::set_animation_frame_update(int frame) bool ImageManager::set_animation_frame_update(int frame)
{ {
if(frame != animation_frame) { if(frame != animation_frame) {
@ -267,9 +282,9 @@ int ImageManager::add_image(const string& filename,
if(slot == images.size()) { if(slot == images.size()) {
/* max images limit reached */ /* max images limit reached */
if(images.size() == tex_num_images) { if(images.size() == tex_num_byte_images) {
printf("ImageManager::add_image: byte image limit reached %d, skipping '%s'\n", printf("ImageManager::add_image: byte image limit reached %d, skipping '%s'\n",
tex_num_images, filename.c_str()); tex_num_byte_images, filename.c_str());
return -1; return -1;
} }

@ -32,7 +32,7 @@ class Progress;
class ImageManager { class ImageManager {
public: public:
ImageManager(); ImageManager(const DeviceInfo& info);
~ImageManager(); ~ImageManager();
int add_image(const string& filename, int add_image(const string& filename,
@ -62,7 +62,6 @@ public:
void set_osl_texture_system(void *texture_system); void set_osl_texture_system(void *texture_system);
void set_pack_images(bool pack_images_); void set_pack_images(bool pack_images_);
void set_extended_image_limits(const DeviceInfo& info);
bool set_animation_frame_update(int frame); bool set_animation_frame_update(int frame);
bool need_update; bool need_update;
@ -86,7 +85,7 @@ public:
}; };
private: private:
int tex_num_images; int tex_num_byte_images;
int tex_num_float_images; int tex_num_float_images;
int tex_image_byte_start; int tex_image_byte_start;
thread_mutex device_mutex; thread_mutex device_mutex;

@ -54,7 +54,7 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_)
mesh_manager = new MeshManager(); mesh_manager = new MeshManager();
object_manager = new ObjectManager(); object_manager = new ObjectManager();
integrator = new Integrator(); integrator = new Integrator();
image_manager = new ImageManager(); image_manager = new ImageManager(device_info_);
particle_system_manager = new ParticleSystemManager(); particle_system_manager = new ParticleSystemManager();
curve_system_manager = new CurveSystemManager(); curve_system_manager = new CurveSystemManager();
bake_manager = new BakeManager(); bake_manager = new BakeManager();
@ -64,9 +64,6 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_)
shader_manager = ShaderManager::create(this, params.shadingsystem); shader_manager = ShaderManager::create(this, params.shadingsystem);
else else
shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM); shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM);
/* Extended image limits for CPU and GPUs */
image_manager->set_extended_image_limits(device_info_);
} }
Scene::~Scene() Scene::~Scene()

@ -109,8 +109,8 @@ public:
device_vector<uint> sobol_directions; device_vector<uint> sobol_directions;
/* cpu images */ /* cpu images */
device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES_CPU]; device_vector<uchar4> tex_image[TEX_NUM_BYTE_IMAGES_CPU];
device_vector<float4> tex_float_image[TEX_EXTENDED_NUM_FLOAT_IMAGES]; device_vector<float4> tex_float_image[TEX_NUM_FLOAT_IMAGES_CPU];
/* opencl images */ /* opencl images */
device_vector<uchar4> tex_image_packed; device_vector<uchar4> tex_image_packed;

@ -21,26 +21,26 @@ CCL_NAMESPACE_BEGIN
/* Texture limits on various devices. */ /* Texture limits on various devices. */
#define TEX_NUM_FLOAT_IMAGES 5 /* CPU */
#define TEX_NUM_BYTE_IMAGES_CPU 1024
#define TEX_NUM_FLOAT_IMAGES_CPU 1024
#define TEX_IMAGE_BYTE_START_CPU TEX_NUM_FLOAT_IMAGES_CPU
/* generic */ /* CUDA (Fermi) */
#define TEX_NUM_IMAGES 88 #define TEX_NUM_BYTE_IMAGES_CUDA 88
#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES #define TEX_NUM_FLOAT_IMAGES_CUDA 5
#define TEX_IMAGE_BYTE_START_CUDA TEX_NUM_FLOAT_IMAGES_CUDA
/* extended gpu */ /* CUDA (KEPLER and above) */
#define TEX_EXTENDED_NUM_IMAGES_GPU 145 #define TEX_NUM_BYTE_IMAGES_CUDA_KEPLER 145
#define TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER 5
#define TEX_IMAGE_BYTE_START_CUDA_KELPER TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER
/* extended cpu */ /* OpenCL */
#define TEX_EXTENDED_NUM_FLOAT_IMAGES 1024 #define TEX_NUM_BYTE_IMAGES_OPENCL 1024
#define TEX_EXTENDED_NUM_IMAGES_CPU 1024 #define TEX_NUM_FLOAT_IMAGES_OPENCL 0
#define TEX_EXTENDED_IMAGE_BYTE_START TEX_EXTENDED_NUM_FLOAT_IMAGES #define TEX_IMAGE_BYTE_START_OPENCL TEX_NUM_FLOAT_IMAGES_OPENCL
/* Limitations for packed images.
*
* Technically number of textures is unlimited, but it should in
* fact be in sync with CPU limitations.
*/
#define TEX_PACKED_NUM_IMAGES 1024
/* Color to use when textures are not found. */ /* Color to use when textures are not found. */
#define TEX_IMAGE_MISSING_R 1 #define TEX_IMAGE_MISSING_R 1