Cycles: Add single channel texture support for OpenCL.
This way OpenCL devices can also benefit from a smaller memory footprint, when using e.g. bumpmaps (greyscale, 1 channel). Additional target for my GSoC 2016.
This commit is contained in:
parent
da77d9873f
commit
5c0a67b325
@ -495,6 +495,7 @@ typedef texture<uint> texture_uint;
|
|||||||
typedef texture<int> texture_int;
|
typedef texture<int> texture_int;
|
||||||
typedef texture<uint4> texture_uint4;
|
typedef texture<uint4> texture_uint4;
|
||||||
typedef texture<uchar4> texture_uchar4;
|
typedef texture<uchar4> texture_uchar4;
|
||||||
|
typedef texture<uchar> texture_uchar;
|
||||||
typedef texture_image<float> texture_image_float;
|
typedef texture_image<float> texture_image_float;
|
||||||
typedef texture_image<uchar> texture_image_uchar;
|
typedef texture_image<uchar> texture_image_uchar;
|
||||||
typedef texture_image<half> texture_image_half;
|
typedef texture_image<half> texture_image_half;
|
||||||
|
@ -188,6 +188,8 @@ KERNEL_TEX(uint, texture_uint, __bindless_mapping)
|
|||||||
/* packed image (opencl) */
|
/* packed image (opencl) */
|
||||||
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
|
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
|
||||||
KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
|
KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
|
||||||
|
KERNEL_TEX(uchar, texture_uchar, __tex_image_byte_packed)
|
||||||
|
KERNEL_TEX(float, texture_float, __tex_image_float_packed)
|
||||||
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
|
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
|
||||||
|
|
||||||
#undef KERNEL_TEX
|
#undef KERNEL_TEX
|
||||||
|
@ -36,13 +36,26 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
|
ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
|
||||||
{
|
{
|
||||||
if(id >= TEX_NUM_FLOAT4_IMAGES) {
|
/* Float4 */
|
||||||
|
if(id < TEX_START_BYTE4_OPENCL) {
|
||||||
|
return kernel_tex_fetch(__tex_image_float4_packed, offset);
|
||||||
|
}
|
||||||
|
/* Byte4 */
|
||||||
|
else if(id < TEX_START_FLOAT_OPENCL) {
|
||||||
uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
|
uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
|
||||||
float f = 1.0f/255.0f;
|
float f = 1.0f/255.0f;
|
||||||
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
|
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
|
||||||
}
|
}
|
||||||
|
/* Float */
|
||||||
|
else if(id < TEX_START_BYTE_OPENCL) {
|
||||||
|
float f = kernel_tex_fetch(__tex_image_float_packed, offset);
|
||||||
|
return make_float4(f, f, f, 1.0f);
|
||||||
|
}
|
||||||
|
/* Byte */
|
||||||
else {
|
else {
|
||||||
return kernel_tex_fetch(__tex_image_float4_packed, offset);
|
uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset);
|
||||||
|
float f = r * (1.0f/255.0f);
|
||||||
|
return make_float4(f, f, f, 1.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -284,7 +284,7 @@ int ImageManager::add_image(const string& filename,
|
|||||||
if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
|
if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
|
||||||
is_float = true;
|
is_float = true;
|
||||||
|
|
||||||
/* No single channel and half textures on CUDA (Fermi) and OpenCL, use available slots */
|
/* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
|
||||||
if((type == IMAGE_DATA_TYPE_FLOAT ||
|
if((type == IMAGE_DATA_TYPE_FLOAT ||
|
||||||
type == IMAGE_DATA_TYPE_HALF4 ||
|
type == IMAGE_DATA_TYPE_HALF4 ||
|
||||||
type == IMAGE_DATA_TYPE_HALF) &&
|
type == IMAGE_DATA_TYPE_HALF) &&
|
||||||
@ -1105,10 +1105,11 @@ void ImageManager::device_pack_images(Device *device,
|
|||||||
size_t size = 0, offset = 0;
|
size_t size = 0, offset = 0;
|
||||||
ImageDataType type;
|
ImageDataType type;
|
||||||
|
|
||||||
int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4];
|
int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]
|
||||||
|
+ tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE];
|
||||||
uint4 *info = dscene->tex_image_packed_info.resize(info_size);
|
uint4 *info = dscene->tex_image_packed_info.resize(info_size);
|
||||||
|
|
||||||
/* Byte Textures*/
|
/* Byte4 Textures*/
|
||||||
type = IMAGE_DATA_TYPE_BYTE4;
|
type = IMAGE_DATA_TYPE_BYTE4;
|
||||||
|
|
||||||
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
||||||
@ -1119,7 +1120,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||||||
size += tex_img.size();
|
size += tex_img.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size);
|
uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size);
|
||||||
|
|
||||||
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
||||||
if(!images[type][slot])
|
if(!images[type][slot])
|
||||||
@ -1131,11 +1132,11 @@ void ImageManager::device_pack_images(Device *device,
|
|||||||
|
|
||||||
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
|
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
|
||||||
|
|
||||||
memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
|
memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
|
||||||
offset += tex_img.size();
|
offset += tex_img.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Float Textures*/
|
/* Float4 Textures*/
|
||||||
type = IMAGE_DATA_TYPE_FLOAT4;
|
type = IMAGE_DATA_TYPE_FLOAT4;
|
||||||
size = 0, offset = 0;
|
size = 0, offset = 0;
|
||||||
|
|
||||||
@ -1147,7 +1148,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||||||
size += tex_img.size();
|
size += tex_img.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
float4 *pixels_float = dscene->tex_image_float4_packed.resize(size);
|
float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size);
|
||||||
|
|
||||||
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
||||||
if(!images[type][slot])
|
if(!images[type][slot])
|
||||||
@ -1160,6 +1161,63 @@ void ImageManager::device_pack_images(Device *device,
|
|||||||
uint8_t options = pack_image_options(type, slot);
|
uint8_t options = pack_image_options(type, slot);
|
||||||
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
|
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
|
||||||
|
|
||||||
|
memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
|
||||||
|
offset += tex_img.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Byte Textures*/
|
||||||
|
type = IMAGE_DATA_TYPE_BYTE;
|
||||||
|
size = 0, offset = 0;
|
||||||
|
|
||||||
|
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
||||||
|
if(!images[type][slot])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
|
||||||
|
size += tex_img.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size);
|
||||||
|
|
||||||
|
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
||||||
|
if(!images[type][slot])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
|
||||||
|
|
||||||
|
uint8_t options = pack_image_options(type, slot);
|
||||||
|
|
||||||
|
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
|
||||||
|
|
||||||
|
memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
|
||||||
|
offset += tex_img.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Float Textures*/
|
||||||
|
type = IMAGE_DATA_TYPE_FLOAT;
|
||||||
|
size = 0, offset = 0;
|
||||||
|
|
||||||
|
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
||||||
|
if(!images[type][slot])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
device_vector<float>& tex_img = dscene->tex_float_image[slot];
|
||||||
|
size += tex_img.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
float *pixels_float = dscene->tex_image_float_packed.resize(size);
|
||||||
|
|
||||||
|
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
||||||
|
if(!images[type][slot])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
device_vector<float>& tex_img = dscene->tex_float_image[slot];
|
||||||
|
|
||||||
|
/* todo: support 3D textures, only CPU for now */
|
||||||
|
|
||||||
|
uint8_t options = pack_image_options(type, slot);
|
||||||
|
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
|
||||||
|
|
||||||
memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
|
memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
|
||||||
offset += tex_img.size();
|
offset += tex_img.size();
|
||||||
}
|
}
|
||||||
@ -1178,6 +1236,20 @@ void ImageManager::device_pack_images(Device *device,
|
|||||||
}
|
}
|
||||||
device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
|
device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
|
||||||
}
|
}
|
||||||
|
if(dscene->tex_image_byte_packed.size()) {
|
||||||
|
if(dscene->tex_image_byte_packed.device_pointer) {
|
||||||
|
thread_scoped_lock device_lock(device_mutex);
|
||||||
|
device->tex_free(dscene->tex_image_byte_packed);
|
||||||
|
}
|
||||||
|
device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed);
|
||||||
|
}
|
||||||
|
if(dscene->tex_image_float_packed.size()) {
|
||||||
|
if(dscene->tex_image_float_packed.device_pointer) {
|
||||||
|
thread_scoped_lock device_lock(device_mutex);
|
||||||
|
device->tex_free(dscene->tex_image_float_packed);
|
||||||
|
}
|
||||||
|
device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed);
|
||||||
|
}
|
||||||
if(dscene->tex_image_packed_info.size()) {
|
if(dscene->tex_image_packed_info.size()) {
|
||||||
if(dscene->tex_image_packed_info.device_pointer) {
|
if(dscene->tex_image_packed_info.device_pointer) {
|
||||||
thread_scoped_lock device_lock(device_mutex);
|
thread_scoped_lock device_lock(device_mutex);
|
||||||
@ -1208,10 +1280,14 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
|
|||||||
|
|
||||||
device->tex_free(dscene->tex_image_byte4_packed);
|
device->tex_free(dscene->tex_image_byte4_packed);
|
||||||
device->tex_free(dscene->tex_image_float4_packed);
|
device->tex_free(dscene->tex_image_float4_packed);
|
||||||
|
device->tex_free(dscene->tex_image_byte_packed);
|
||||||
|
device->tex_free(dscene->tex_image_float_packed);
|
||||||
device->tex_free(dscene->tex_image_packed_info);
|
device->tex_free(dscene->tex_image_packed_info);
|
||||||
|
|
||||||
dscene->tex_image_byte4_packed.clear();
|
dscene->tex_image_byte4_packed.clear();
|
||||||
dscene->tex_image_float4_packed.clear();
|
dscene->tex_image_float4_packed.clear();
|
||||||
|
dscene->tex_image_byte_packed.clear();
|
||||||
|
dscene->tex_image_float_packed.clear();
|
||||||
dscene->tex_image_packed_info.clear();
|
dscene->tex_image_packed_info.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,6 +123,8 @@ public:
|
|||||||
/* opencl images */
|
/* opencl images */
|
||||||
device_vector<uchar4> tex_image_byte4_packed;
|
device_vector<uchar4> tex_image_byte4_packed;
|
||||||
device_vector<float4> tex_image_float4_packed;
|
device_vector<float4> tex_image_float4_packed;
|
||||||
|
device_vector<uchar> tex_image_byte_packed;
|
||||||
|
device_vector<float> tex_image_float_packed;
|
||||||
device_vector<uint4> tex_image_packed_info;
|
device_vector<uint4> tex_image_packed_info;
|
||||||
|
|
||||||
KernelData data;
|
KernelData data;
|
||||||
|
@ -67,8 +67,8 @@ CCL_NAMESPACE_BEGIN
|
|||||||
#define TEX_NUM_FLOAT4_OPENCL 1024
|
#define TEX_NUM_FLOAT4_OPENCL 1024
|
||||||
#define TEX_NUM_BYTE4_OPENCL 1024
|
#define TEX_NUM_BYTE4_OPENCL 1024
|
||||||
#define TEX_NUM_HALF4_OPENCL 0
|
#define TEX_NUM_HALF4_OPENCL 0
|
||||||
#define TEX_NUM_FLOAT_OPENCL 0
|
#define TEX_NUM_FLOAT_OPENCL 1024
|
||||||
#define TEX_NUM_BYTE_OPENCL 0
|
#define TEX_NUM_BYTE_OPENCL 1024
|
||||||
#define TEX_NUM_HALF_OPENCL 0
|
#define TEX_NUM_HALF_OPENCL 0
|
||||||
#define TEX_START_FLOAT4_OPENCL 0
|
#define TEX_START_FLOAT4_OPENCL 0
|
||||||
#define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL
|
#define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL
|
||||||
|
Loading…
Reference in New Issue
Block a user