diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 7fc8d2b5706..46bef96dc3f 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -109,6 +109,12 @@ template struct texture_image { return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); } + ccl_always_inline float4 read(uchar r) + { + float f = r*(1.0f/255.0f); + return make_float4(f, f, f, 1.0); + } + ccl_always_inline float4 read(float r) { /* TODO(dingto): Optimize this, so interpolation @@ -479,6 +485,7 @@ typedef texture texture_int; typedef texture texture_uint4; typedef texture texture_uchar4; typedef texture_image texture_image_float; +typedef texture_image texture_image_uchar; typedef texture_image texture_image_float4; typedef texture_image texture_image_uchar4; @@ -490,17 +497,20 @@ typedef texture_image texture_image_uchar4; #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size)) #define kernel_tex_image_interp(tex, x, y) \ - ((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp(x, y) : \ + ((tex >= TEX_IMAGE_BYTE_START_CPU) ? kg->texture_byte_images[tex - TEX_IMAGE_BYTE_START_CPU].interp(x, y) : \ + (tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp(x, y) : \ (tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp(x, y) : \ kg->texture_float4_images[tex].interp(x, y)) #define kernel_tex_image_interp_3d(tex, x, y, z) \ - ((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp_3d(x, y, z) : \ + ((tex >= TEX_IMAGE_BYTE_START_CPU) ? kg->texture_byte_images[tex - TEX_IMAGE_BYTE_START_CPU].interp_3d(x, y, z) : \ + (tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp_3d(x, y, z) : \ (tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp_3d(x, y, z) : \ kg->texture_float4_images[tex].interp_3d(x, y, z)) #define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) \ - ((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp_3d_ex(x, y, z, interpolation) : \ + ((tex >= TEX_IMAGE_BYTE_START_CPU) ? kg->texture_byte_images[tex - TEX_IMAGE_BYTE_START_CPU].interp_3d_ex(x, y, z, interpolation) : \ + (tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp_3d_ex(x, y, z, interpolation) : \ (tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp_3d_ex(x, y, z, interpolation) : \ kg->texture_float4_images[tex].interp_3d_ex(x, y, z, interpolation)) diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 3af44e06179..c44ea1b051f 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -35,6 +35,7 @@ typedef struct KernelGlobals { texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_IMAGES_CPU]; texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_IMAGES_CPU]; texture_image_float texture_float_images[TEX_NUM_FLOAT_IMAGES_CPU]; + texture_image_uchar texture_byte_images[TEX_NUM_BYTE_IMAGES_CPU]; # define KERNEL_TEX(type, ttype, name) ttype name; # define KERNEL_IMAGE_TEX(type, ttype, name) diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index 960012e95e3..365ce891354 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -138,6 +138,22 @@ void kernel_tex_copy(KernelGlobals *kg, tex->extension = extension; } } + else if(strstr(name, "__tex_image_byte")) { + texture_image_uchar *tex = NULL; + int id = atoi(name + strlen("__tex_image_byte_")); + int array_index = id - TEX_IMAGE_BYTE_START_CPU; + + if(array_index >= 0 && array_index < TEX_NUM_BYTE_IMAGES_CPU) { + tex = &kg->texture_byte_images[array_index]; + } + + if(tex) { + tex->data = (uchar*)mem; + tex->dimensions_set(width, height, depth); + tex->interpolation = interpolation; + tex->extension = extension; + } + } else assert(0); } diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 0985cca5bb6..23e07082b48 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -43,40 +43,50 @@ ImageManager::ImageManager(const DeviceInfo& info) tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_IMAGES_CPU; tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_IMAGES_CPU; tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_IMAGES_CPU; + tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_IMAGES_CPU; tex_image_byte4_start = TEX_IMAGE_BYTE4_START_CPU; tex_image_float_start = TEX_IMAGE_FLOAT_START_CPU; + tex_image_byte_start = TEX_IMAGE_BYTE_START_CPU; } /* CUDA (Fermi) */ else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && !info.extended_images) { tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_IMAGES_CUDA; tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_IMAGES_CUDA; tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_IMAGES_CUDA; + tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_IMAGES_CUDA; tex_image_byte4_start = TEX_IMAGE_BYTE4_START_CUDA; tex_image_float_start = TEX_IMAGE_FLOAT_START_CUDA; + tex_image_byte_start = TEX_IMAGE_BYTE_START_CUDA; } /* CUDA (Kepler and above) */ else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) { tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_IMAGES_CUDA_KEPLER; tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER; tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER; + tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_IMAGES_CUDA_KEPLER; tex_image_byte4_start = TEX_IMAGE_BYTE4_START_CUDA_KEPLER; tex_image_float_start = TEX_IMAGE_FLOAT_START_CUDA_KEPLER; + tex_image_byte_start = TEX_IMAGE_BYTE_START_CUDA_KEPLER; } /* OpenCL */ else if(info.pack_images) { tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_IMAGES_OPENCL; tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_IMAGES_OPENCL; tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_IMAGES_OPENCL; + tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_IMAGES_OPENCL; tex_image_byte4_start = TEX_IMAGE_BYTE4_START_OPENCL; tex_image_float_start = TEX_IMAGE_FLOAT_START_OPENCL; + tex_image_byte_start = TEX_IMAGE_BYTE_START_OPENCL; } /* Should never happen */ else { tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0; tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0; tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0; + tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0; tex_image_byte4_start = 0; tex_image_float_start = 0; + tex_image_byte_start = 0; assert(0); } } @@ -137,8 +147,13 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen else return IMAGE_DATA_TYPE_FLOAT; } - else - return IMAGE_DATA_TYPE_BYTE4; + else { + if(channels > 1) + return IMAGE_DATA_TYPE_BYTE4; + else + return IMAGE_DATA_TYPE_BYTE; + } + } ImageInput *in = ImageInput::create(filename); @@ -192,12 +207,16 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen else return IMAGE_DATA_TYPE_FLOAT; } - else - return IMAGE_DATA_TYPE_BYTE4; + else { + if(channels > 1) + return IMAGE_DATA_TYPE_BYTE4; + else + return IMAGE_DATA_TYPE_BYTE; + } } /* We use a consecutive slot counting scheme on the devices, in order - * float4, byte4, float. + * float4, byte4, float, byte. * These functions convert the slot ids from ImageManager "images" ones * to device ones and vice versa. */ int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type) @@ -206,13 +225,20 @@ int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type) return slot + tex_image_byte4_start; else if(type == IMAGE_DATA_TYPE_FLOAT) return slot + tex_image_float_start; + else if(type == IMAGE_DATA_TYPE_BYTE) + return slot + tex_image_byte_start; else return slot; } int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *type) { - if(flat_slot >= tex_image_float_start) + if(flat_slot >= tex_image_byte_start) + { + *type = IMAGE_DATA_TYPE_BYTE; + return flat_slot - tex_image_byte_start; + } + else if(flat_slot >= tex_image_float_start) { *type = IMAGE_DATA_TYPE_FLOAT; return flat_slot - tex_image_float_start; @@ -233,6 +259,8 @@ string ImageManager::name_from_type(int type) return "float4"; else if(type == IMAGE_DATA_TYPE_FLOAT) return "float"; + else if(type == IMAGE_DATA_TYPE_BYTE) + return "byte"; else return "byte4"; } @@ -268,9 +296,11 @@ int ImageManager::add_image(const string& filename, if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) is_float = true; - /* No float textures on GPU yet */ + /* No float and byte textures on GPU yet */ if(type == IMAGE_DATA_TYPE_FLOAT && tex_num_images[type] == 0) type = IMAGE_DATA_TYPE_FLOAT4; + if(type == IMAGE_DATA_TYPE_BYTE && tex_num_images[type] == 0) + type = IMAGE_DATA_TYPE_BYTE4; /* Fnd existing image. */ for(slot = 0; slot < images[type].size(); slot++) { @@ -531,6 +561,44 @@ bool ImageManager::file_load_byte4_image(Image *img, device_vector& tex_ return true; } +bool ImageManager::file_load_byte_image(Image *img, device_vector& tex_img) +{ + ImageInput *in = NULL; + int width, height, depth, components; + + if(!file_load_image_generic(img, &in, width, height, depth, components)) + return false; + + /* read BW pixels */ + uchar *pixels = (uchar*)tex_img.resize(width, height, depth); + if(pixels == NULL) { + return false; + } + + if(in) { + if(depth <= 1) { + int scanlinesize = width*components*sizeof(uchar); + + in->read_image(TypeDesc::UINT8, + (uchar*)pixels + (((size_t)height)-1)*scanlinesize, + AutoStride, + -scanlinesize, + AutoStride); + } + else { + in->read_image(TypeDesc::UINT8, (uchar*)pixels); + } + + in->close(); + delete in; + } + else { + builtin_image_pixels_cb(img->filename, img->builtin_data, pixels); + } + + return true; +} + bool ImageManager::file_load_float4_image(Image *img, device_vector& tex_img) { ImageInput *in = NULL; @@ -749,7 +817,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD img->extension); } } - else { + else if(type == IMAGE_DATA_TYPE_BYTE4){ device_vector& tex_img = dscene->tex_byte4_image[slot]; if(tex_img.device_pointer) { @@ -775,6 +843,29 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD img->extension); } } + else { + device_vector& tex_img = dscene->tex_byte_image[slot]; + + if(tex_img.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(tex_img); + } + + if(!file_load_byte_image(img, tex_img)) { + /* on failure to load, we set a 1x1 pixels pink image */ + uchar *pixels = (uchar*)tex_img.resize(1, 1); + + pixels[0] = (TEX_IMAGE_MISSING_R * 255); + } + + if(!pack_images) { + thread_scoped_lock device_lock(device_mutex); + device->tex_alloc(name.c_str(), + tex_img, + img->interpolation, + img->extension); + } + } img->need_load = false; } @@ -799,9 +890,6 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD } tex_img.clear(); - - delete images[type][slot]; - images[type][slot] = NULL; } else if(type == IMAGE_DATA_TYPE_FLOAT) { device_vector& tex_img = dscene->tex_float_image[slot]; @@ -812,11 +900,8 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD } tex_img.clear(); - - delete images[type][slot]; - images[type][slot] = NULL; } - else { + else if(type == IMAGE_DATA_TYPE_BYTE4){ device_vector& tex_img = dscene->tex_byte4_image[slot]; if(tex_img.device_pointer) { @@ -825,10 +910,20 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD } tex_img.clear(); - - delete images[type][slot]; - images[type][slot] = NULL; } + else { + device_vector& tex_img = dscene->tex_byte_image[slot]; + + if(tex_img.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(tex_img); + } + + tex_img.clear(); + } + + delete images[type][slot]; + images[type][slot] = NULL; } } diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index cf5a6e9523f..53f739cd356 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -40,6 +40,7 @@ public: IMAGE_DATA_TYPE_FLOAT4 = 0, IMAGE_DATA_TYPE_BYTE4 = 1, IMAGE_DATA_TYPE_FLOAT = 2, + IMAGE_DATA_TYPE_BYTE = 3, IMAGE_DATA_NUM_TYPES }; @@ -97,6 +98,7 @@ private: int tex_num_images[IMAGE_DATA_NUM_TYPES]; int tex_image_byte4_start; int tex_image_float_start; + int tex_image_byte_start; thread_mutex device_mutex; int animation_frame; @@ -106,6 +108,7 @@ private: bool file_load_image_generic(Image *img, ImageInput **in, int &width, int &height, int &depth, int &components); bool file_load_byte4_image(Image *img, device_vector& tex_img); + bool file_load_byte_image(Image *img, device_vector& tex_img); bool file_load_float4_image(Image *img, device_vector& tex_img); bool file_load_float_image(Image *img, device_vector& tex_img); diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 34050851d8e..435d7a396c5 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -112,6 +112,7 @@ public: device_vector tex_byte4_image[TEX_NUM_BYTE4_IMAGES_CPU]; device_vector tex_float4_image[TEX_NUM_FLOAT4_IMAGES_CPU]; device_vector tex_float_image[TEX_NUM_FLOAT_IMAGES_CPU]; + device_vector tex_byte_image[TEX_NUM_BYTE_IMAGES_CPU]; /* opencl images */ device_vector tex_image_byte4_packed; diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index 1f0046c354c..2a1cfca4fdd 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -25,29 +25,37 @@ CCL_NAMESPACE_BEGIN #define TEX_NUM_BYTE4_IMAGES_CPU 1024 #define TEX_NUM_FLOAT4_IMAGES_CPU 1024 #define TEX_NUM_FLOAT_IMAGES_CPU 1024 +#define TEX_NUM_BYTE_IMAGES_CPU 1024 #define TEX_IMAGE_BYTE4_START_CPU TEX_NUM_FLOAT4_IMAGES_CPU #define TEX_IMAGE_FLOAT_START_CPU (TEX_NUM_FLOAT4_IMAGES_CPU + TEX_NUM_BYTE4_IMAGES_CPU) +#define TEX_IMAGE_BYTE_START_CPU (TEX_NUM_FLOAT4_IMAGES_CPU + TEX_NUM_BYTE4_IMAGES_CPU + TEX_NUM_BYTE_IMAGES_CPU) /* CUDA (Fermi) */ #define TEX_NUM_BYTE4_IMAGES_CUDA 88 #define TEX_NUM_FLOAT4_IMAGES_CUDA 5 #define TEX_NUM_FLOAT_IMAGES_CUDA 0 +#define TEX_NUM_BYTE_IMAGES_CUDA 0 #define TEX_IMAGE_BYTE4_START_CUDA TEX_NUM_FLOAT4_IMAGES_CUDA #define TEX_IMAGE_FLOAT_START_CUDA (TEX_NUM_FLOAT4_IMAGES_CUDA + TEX_NUM_BYTE4_IMAGES_CUDA) +#define TEX_IMAGE_BYTE_START_CUDA (TEX_NUM_FLOAT4_IMAGES_CUDA + TEX_NUM_BYTE4_IMAGES_CUDA + TEX_NUM_BYTE_IMAGES_CUDA) /* CUDA (KEPLER and above) */ #define TEX_NUM_BYTE4_IMAGES_CUDA_KEPLER 145 #define TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER 5 #define TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER 0 +#define TEX_NUM_BYTE_IMAGES_CUDA_KEPLER 0 #define TEX_IMAGE_BYTE4_START_CUDA_KEPLER TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER #define TEX_IMAGE_FLOAT_START_CUDA_KEPLER (TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER + TEX_NUM_BYTE4_IMAGES_CUDA_KEPLER) +#define TEX_IMAGE_BYTE_START_CUDA_KEPLER (TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER + TEX_NUM_BYTE4_IMAGES_CUDA_KEPLER + TEX_NUM_BYTE_IMAGES_CUDA_KEPLER) /* OpenCL */ #define TEX_NUM_BYTE4_IMAGES_OPENCL 1024 #define TEX_NUM_FLOAT4_IMAGES_OPENCL 1024 #define TEX_NUM_FLOAT_IMAGES_OPENCL 0 +#define TEX_NUM_BYTE_IMAGES_OPENCL 0 #define TEX_IMAGE_BYTE4_START_OPENCL TEX_NUM_FLOAT4_IMAGES_OPENCL #define TEX_IMAGE_FLOAT_START_OPENCL (TEX_NUM_FLOAT4_IMAGES_OPENCL + TEX_NUM_BYTE4_IMAGES_OPENCL) +#define TEX_IMAGE_BYTE_START_OPENCL (TEX_NUM_FLOAT4_IMAGES_OPENCL + TEX_NUM_BYTE4_IMAGES_OPENCL + TEX_NUM_BYTE_IMAGES_OPENCL) /* Color to use when textures are not found. */