diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index bb303b32705..c882b477c35 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -122,6 +122,17 @@ template struct texture_image { return make_float4(r, r, r, 1.0f); } + ccl_always_inline float4 read(half4 r) + { + return half4_to_float4(r); + } + + ccl_always_inline float4 read(half r) + { + float f = half_to_float(r); + return make_float4(f, f, f, 1.0f); + } + ccl_always_inline int wrap_periodic(int x, int width) { x %= width; @@ -486,8 +497,10 @@ typedef texture texture_uint4; typedef texture texture_uchar4; typedef texture_image texture_image_float; typedef texture_image texture_image_uchar; +typedef texture_image texture_image_half; typedef texture_image texture_image_float4; typedef texture_image texture_image_uchar4; +typedef texture_image texture_image_half4; /* Macros to handle different memory storage on different devices */ diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index e06c68f2fc9..8e66a3a0340 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -37,8 +37,10 @@ struct VolumeStep; typedef struct KernelGlobals { texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_CPU]; texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_CPU]; + texture_image_half4 texture_half4_images[TEX_NUM_HALF4_CPU]; texture_image_float texture_float_images[TEX_NUM_FLOAT_CPU]; texture_image_uchar texture_byte_images[TEX_NUM_BYTE_CPU]; + texture_image_half texture_half_images[TEX_NUM_HALF_CPU]; # define KERNEL_TEX(type, ttype, name) ttype name; # define KERNEL_IMAGE_TEX(type, ttype, name) diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index d8a83f69685..f11c85d5f6a 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -154,6 +154,38 @@ void kernel_tex_copy(KernelGlobals *kg, tex->extension = extension; } } + else if(strstr(name, "__tex_image_half4")) { + texture_image_half4 *tex = NULL; + int id = atoi(name + strlen("__tex_image_half4_")); + int array_index = id - TEX_START_HALF4_CPU; + + if(array_index >= 0 && array_index < TEX_NUM_HALF4_CPU) { + tex = &kg->texture_half4_images[array_index]; + } + + if(tex) { + tex->data = (half4*)mem; + tex->dimensions_set(width, height, depth); + tex->interpolation = interpolation; + tex->extension = extension; + } + } + else if(strstr(name, "__tex_image_half")) { + texture_image_half *tex = NULL; + int id = atoi(name + strlen("__tex_image_half_")); + int array_index = id - TEX_START_HALF_CPU; + + if(array_index >= 0 && array_index < TEX_NUM_HALF_CPU) { + tex = &kg->texture_half_images[array_index]; + } + + if(tex) { + tex->data = (half*)mem; + tex->dimensions_set(width, height, depth); + tex->interpolation = interpolation; + tex->extension = extension; + } + } else assert(0); } diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index b10861ab857..47383140170 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -23,7 +23,11 @@ CCL_NAMESPACE_BEGIN ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y) { - if(tex >= TEX_START_BYTE_CPU) + if(tex >= TEX_START_HALF_CPU) + return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y); + else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y); @@ -35,7 +39,11 @@ ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z) { - if(tex >= TEX_START_BYTE_CPU) + if(tex >= TEX_START_HALF_CPU) + return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z); + else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z); @@ -48,7 +56,11 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation) { - if(tex >= TEX_START_BYTE_CPU) + if(tex >= TEX_START_HALF_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation); + else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation); diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 71dc85f5f03..c5e819520ea 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -54,10 +54,14 @@ ImageManager::ImageManager(const DeviceInfo& info) tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \ + tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \ + tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \ + tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \ + tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \ } if(device_type == DEVICE_CPU) { @@ -80,10 +84,14 @@ ImageManager::ImageManager(const DeviceInfo& info) tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0; tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0; tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0; + tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0; + tex_num_images[IMAGE_DATA_TYPE_HALF] = 0; tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0; tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0; tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0; tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0; + tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0; + tex_start_images[IMAGE_DATA_TYPE_HALF] = 0; assert(0); } @@ -128,7 +136,7 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen void *builtin_data, bool& is_linear) { - bool is_float = false; + bool is_float = false, is_half = false; is_linear = false; int channels = 4; @@ -167,6 +175,10 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen } } + /* check if it's half float */ + if(spec.format == TypeDesc::HALF) + is_half = true; + channels = spec.nchannels; /* basic color space detection, not great but better than nothing @@ -192,7 +204,10 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen delete in; } - if(is_float) { + if(is_half) { + return IMAGE_DATA_TYPE_HALF4; + } + else if(is_float) { return (channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT; } else { @@ -230,6 +245,10 @@ string ImageManager::name_from_type(int type) return "float"; else if(type == IMAGE_DATA_TYPE_BYTE) return "byte"; + else if(type == IMAGE_DATA_TYPE_HALF4) + return "half4"; + else if(type == IMAGE_DATA_TYPE_HALF) + return "half"; else return "byte4"; } @@ -265,11 +284,16 @@ int ImageManager::add_image(const string& filename, if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) is_float = true; - /* No single channel textures on CUDA (Fermi) and OpenCL, use available slots */ - if(type == IMAGE_DATA_TYPE_FLOAT && tex_num_images[type] == 0) + /* No single channel and half textures on CUDA (Fermi) and OpenCL, use available slots */ + if((type == IMAGE_DATA_TYPE_FLOAT || + type == IMAGE_DATA_TYPE_HALF4 || + type == IMAGE_DATA_TYPE_HALF) && + tex_num_images[type] == 0) { type = IMAGE_DATA_TYPE_FLOAT4; - if(type == IMAGE_DATA_TYPE_BYTE && tex_num_images[type] == 0) + } + if(type == IMAGE_DATA_TYPE_BYTE && tex_num_images[type] == 0) { type = IMAGE_DATA_TYPE_BYTE4; + } /* Fnd existing image. */ for(slot = 0; slot < images[type].size(); slot++) { @@ -645,6 +669,107 @@ bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_ return true; } +template +bool ImageManager::file_load_half_image(Image *img, ImageDataType type, device_vector& tex_img) +{ + ImageInput *in = NULL; + int width, height, depth, components; + + if(!file_load_image_generic(img, &in, width, height, depth, components)) + return false; + + /* read RGBA pixels */ + half *pixels = (half*)tex_img.resize(width, height, depth); + if(pixels == NULL) { + return false; + } + + if(in) { + half *readpixels = pixels; + vector tmppixels; + + if(components > 4) { + tmppixels.resize(((size_t)width)*height*components); + readpixels = &tmppixels[0]; + } + + if(depth <= 1) { + int scanlinesize = width*components*sizeof(half); + + in->read_image(TypeDesc::HALF, + (uchar*)readpixels + (height-1)*scanlinesize, /*TODO(dingto): why uchar cast? */ + AutoStride, + -scanlinesize, + AutoStride); + } + else { + in->read_image(TypeDesc::HALF, (uchar*)readpixels); + } + + if(components > 4) { + size_t dimensions = ((size_t)width)*height; + for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) { + pixels[i*4+3] = tmppixels[i*components+3]; + pixels[i*4+2] = tmppixels[i*components+2]; + pixels[i*4+1] = tmppixels[i*components+1]; + pixels[i*4+0] = tmppixels[i*components+0]; + } + + tmppixels.clear(); + } + + in->close(); + delete in; + } +#if 0 + /* TODO(dingto): Support half for ImBuf. */ + else { + builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels); + } +#endif + + /* Check if we actually have a half4 slot, in case components == 1, but device + * doesn't support single channel textures. */ + if(type == IMAGE_DATA_TYPE_HALF4) { + size_t num_pixels = ((size_t)width) * height * depth; + if(components == 2) { + /* grayscale + alpha */ + for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { + pixels[i*4+3] = pixels[i*2+1]; + pixels[i*4+2] = pixels[i*2+0]; + pixels[i*4+1] = pixels[i*2+0]; + pixels[i*4+0] = pixels[i*2+0]; + } + } + else if(components == 3) { + /* RGB */ + for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { + pixels[i*4+3] = 1.0f; + pixels[i*4+2] = pixels[i*3+2]; + pixels[i*4+1] = pixels[i*3+1]; + pixels[i*4+0] = pixels[i*3+0]; + } + } + else if(components == 1) { + /* grayscale */ + for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { + pixels[i*4+3] = 1.0f; + pixels[i*4+2] = pixels[i]; + pixels[i*4+1] = pixels[i]; + pixels[i*4+0] = pixels[i]; + } + } + + if(img->use_alpha == false) { + for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { + pixels[i*4+3] = 1.0f; + } + } + } + + return true; +} + void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot, Progress *progress) { if(progress->get_cancel()) @@ -744,7 +869,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD img->extension); } } - else { + else if(type == IMAGE_DATA_TYPE_BYTE){ device_vector& tex_img = dscene->tex_byte_image[slot]; if(tex_img.device_pointer) { @@ -767,6 +892,55 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD img->extension); } } + else if(type == IMAGE_DATA_TYPE_HALF4){ + device_vector& tex_img = dscene->tex_half4_image[slot]; + + if(tex_img.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(tex_img); + } + + if(!file_load_half_image(img, type, tex_img)) { + /* on failure to load, we set a 1x1 pixels pink image */ + half *pixels = (half*)tex_img.resize(1, 1); + + pixels[0] = TEX_IMAGE_MISSING_R; + pixels[1] = TEX_IMAGE_MISSING_G; + pixels[2] = TEX_IMAGE_MISSING_B; + pixels[3] = TEX_IMAGE_MISSING_A; + } + + if(!pack_images) { + thread_scoped_lock device_lock(device_mutex); + device->tex_alloc(name.c_str(), + tex_img, + img->interpolation, + img->extension); + } + } + else if(type == IMAGE_DATA_TYPE_HALF){ + device_vector& tex_img = dscene->tex_half_image[slot]; + + if(tex_img.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(tex_img); + } + + if(!file_load_half_image(img, type, tex_img)) { + /* on failure to load, we set a 1x1 pixels pink image */ + half *pixels = (half*)tex_img.resize(1, 1); + + pixels[0] = TEX_IMAGE_MISSING_R; + } + + if(!pack_images) { + thread_scoped_lock device_lock(device_mutex); + device->tex_alloc(name.c_str(), + tex_img, + img->interpolation, + img->extension); + } + } img->need_load = false; } @@ -812,7 +986,7 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD tex_img.clear(); } - else { + else if(type == IMAGE_DATA_TYPE_BYTE){ device_vector& tex_img = dscene->tex_byte_image[slot]; if(tex_img.device_pointer) { @@ -822,6 +996,26 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD tex_img.clear(); } + else if(type == IMAGE_DATA_TYPE_HALF4){ + device_vector& tex_img = dscene->tex_half4_image[slot]; + + if(tex_img.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(tex_img); + } + + tex_img.clear(); + } + else if(type == IMAGE_DATA_TYPE_HALF){ + device_vector& tex_img = dscene->tex_half_image[slot]; + + if(tex_img.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(tex_img); + } + + tex_img.clear(); + } delete images[type][slot]; images[type][slot] = NULL; diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 8735133fd91..01d02f4dbec 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -41,6 +41,8 @@ public: IMAGE_DATA_TYPE_BYTE4 = 1, IMAGE_DATA_TYPE_FLOAT = 2, IMAGE_DATA_TYPE_BYTE = 3, + IMAGE_DATA_TYPE_HALF4 = 4, + IMAGE_DATA_TYPE_HALF = 5, IMAGE_DATA_NUM_TYPES }; @@ -113,6 +115,9 @@ private: template bool file_load_float_image(Image *img, ImageDataType type, device_vector& tex_img); + template + bool file_load_half_image(Image *img, ImageDataType type, device_vector& tex_img); + int type_index_to_flattened_slot(int slot, ImageDataType type); int flattened_slot_to_type_index(int flat_slot, ImageDataType *type); string name_from_type(int type); diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index b34d6127118..b821e2b6475 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -113,6 +113,8 @@ public: device_vector tex_float4_image[TEX_NUM_FLOAT4_CPU]; device_vector tex_float_image[TEX_NUM_FLOAT_CPU]; device_vector tex_byte_image[TEX_NUM_BYTE_CPU]; + device_vector tex_half4_image[TEX_NUM_HALF4_CPU]; + device_vector tex_half_image[TEX_NUM_HALF_CPU]; /* opencl images */ device_vector tex_image_byte4_packed; diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index f4bac9888a5..ae85ab3a915 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -85,6 +85,27 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale) #endif } +ccl_device_inline float half_to_float(half h) +{ + float f; + + *((int*) &f) = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13); + + return f; +} + +ccl_device_inline float4 half4_to_float4(half4 h) +{ + float4 f; + + f.x = half_to_float(h.x); + f.y = half_to_float(h.y); + f.z = half_to_float(h.z); + f.w = half_to_float(h.w); + + return f; +} + #endif #endif diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index e00edc046f7..2ef47283029 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -26,40 +26,56 @@ CCL_NAMESPACE_BEGIN #define TEX_NUM_BYTE4_CPU 1024 #define TEX_NUM_FLOAT_CPU 1024 #define TEX_NUM_BYTE_CPU 1024 +#define TEX_NUM_HALF4_CPU 1024 +#define TEX_NUM_HALF_CPU 1024 #define TEX_START_FLOAT4_CPU 0 #define TEX_START_BYTE4_CPU TEX_NUM_FLOAT4_CPU #define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU) -#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_BYTE_CPU) +#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU) +#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU) +#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU + TEX_NUM_HALF4_CPU) /* CUDA (Geforce 4xx and 5xx) */ #define TEX_NUM_FLOAT4_CUDA 5 #define TEX_NUM_BYTE4_CUDA 88 #define TEX_NUM_FLOAT_CUDA 0 #define TEX_NUM_BYTE_CUDA 0 +#define TEX_NUM_HALF4_CUDA 0 +#define TEX_NUM_HALF_CUDA 0 #define TEX_START_FLOAT4_CUDA 0 #define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA #define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA) -#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_BYTE_CUDA) +#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA) +#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA) +#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA + TEX_NUM_HALF4_CUDA) /* CUDA (Kepler, Geforce 6xx and above) */ #define TEX_NUM_FLOAT4_CUDA_KEPLER 1024 #define TEX_NUM_BYTE4_CUDA_KEPLER 1024 #define TEX_NUM_FLOAT_CUDA_KEPLER 1024 #define TEX_NUM_BYTE_CUDA_KEPLER 1024 +#define TEX_NUM_HALF4_CUDA_KEPLER 0 +#define TEX_NUM_HALF_CUDA_KEPLER 0 #define TEX_START_FLOAT4_CUDA_KEPLER 0 #define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER #define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER) -#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER) +#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER) +#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER) +#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER) /* OpenCL */ #define TEX_NUM_FLOAT4_OPENCL 1024 #define TEX_NUM_BYTE4_OPENCL 1024 #define TEX_NUM_FLOAT_OPENCL 0 #define TEX_NUM_BYTE_OPENCL 0 +#define TEX_NUM_HALF4_OPENCL 0 +#define TEX_NUM_HALF_OPENCL 0 #define TEX_START_FLOAT4_OPENCL 0 #define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL #define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL) -#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_BYTE_OPENCL) +#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL) +#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL) +#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL + TEX_NUM_HALF4_OPENCL) /* Color to use when textures are not found. */