diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 47383140170..af68907a5c2 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -25,12 +25,12 @@ ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float { if(tex >= TEX_START_HALF_CPU) return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y); else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y); else if(tex >= TEX_START_BYTE4_CPU) return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y); else @@ -41,12 +41,12 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl { if(tex >= TEX_START_HALF_CPU) return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z); else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z); else if(tex >= TEX_START_BYTE4_CPU) return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z); else @@ -57,13 +57,13 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation) { if(tex >= TEX_START_HALF_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation); + return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation); else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation); else if(tex >= TEX_START_BYTE4_CPU) return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation); else diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index b6b90dfff81..f359829374d 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -277,7 +277,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, } # else CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); - if(id < 2048) /* TODO(dingto): Make this a variable */ + if(id < TEX_START_FLOAT_CUDA_KEPLER) r = kernel_tex_image_interp_float4(tex, x, y); else { float f = kernel_tex_image_interp_float(tex, x, y); diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 614620c14af..284af5f90f7 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -52,15 +52,15 @@ ImageManager::ImageManager(const DeviceInfo& info) { \ tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \ + tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \ + tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \ } @@ -82,15 +82,15 @@ ImageManager::ImageManager(const DeviceInfo& info) /* Should not happen. */ tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0; tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0; + tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0; tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0; tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0; - tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0; tex_num_images[IMAGE_DATA_TYPE_HALF] = 0; tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0; tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0; + tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0; tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0; tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0; - tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0; tex_start_images[IMAGE_DATA_TYPE_HALF] = 0; assert(0); } @@ -216,7 +216,7 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen } /* We use a consecutive slot counting scheme on the devices, in order - * float4, byte4, float, byte. + * float4, byte4, half4, float, byte, half. * These functions convert the slot ids from ImageManager "images" ones * to device ones and vice versa. */ int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type) diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 07998684b23..cca71a6bb93 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -39,9 +39,9 @@ public: enum ImageDataType { IMAGE_DATA_TYPE_FLOAT4 = 0, IMAGE_DATA_TYPE_BYTE4 = 1, - IMAGE_DATA_TYPE_FLOAT = 2, - IMAGE_DATA_TYPE_BYTE = 3, - IMAGE_DATA_TYPE_HALF4 = 4, + IMAGE_DATA_TYPE_HALF4 = 2, + IMAGE_DATA_TYPE_FLOAT = 3, + IMAGE_DATA_TYPE_BYTE = 4, IMAGE_DATA_TYPE_HALF = 5, IMAGE_DATA_NUM_TYPES diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index 0cecfe91ea9..ec3ee2b8191 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -24,58 +24,58 @@ CCL_NAMESPACE_BEGIN /* CPU */ #define TEX_NUM_FLOAT4_CPU 1024 #define TEX_NUM_BYTE4_CPU 1024 +#define TEX_NUM_HALF4_CPU 1024 #define TEX_NUM_FLOAT_CPU 1024 #define TEX_NUM_BYTE_CPU 1024 -#define TEX_NUM_HALF4_CPU 1024 #define TEX_NUM_HALF_CPU 1024 #define TEX_START_FLOAT4_CPU 0 #define TEX_START_BYTE4_CPU TEX_NUM_FLOAT4_CPU -#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU) -#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU) -#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU) -#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU + TEX_NUM_HALF4_CPU) +#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU) +#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU) +#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU) +#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU) /* CUDA (Geforce 4xx and 5xx) */ #define TEX_NUM_FLOAT4_CUDA 5 #define TEX_NUM_BYTE4_CUDA 85 +#define TEX_NUM_HALF4_CUDA 0 #define TEX_NUM_FLOAT_CUDA 0 #define TEX_NUM_BYTE_CUDA 0 -#define TEX_NUM_HALF4_CUDA 0 #define TEX_NUM_HALF_CUDA 0 #define TEX_START_FLOAT4_CUDA 0 #define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA -#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA) -#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA) -#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA) -#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA + TEX_NUM_HALF4_CUDA) +#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA) +#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA) +#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA) +#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA) /* CUDA (Kepler, Geforce 6xx and above) */ #define TEX_NUM_FLOAT4_CUDA_KEPLER 1024 #define TEX_NUM_BYTE4_CUDA_KEPLER 1024 +#define TEX_NUM_HALF4_CUDA_KEPLER 0 #define TEX_NUM_FLOAT_CUDA_KEPLER 1024 #define TEX_NUM_BYTE_CUDA_KEPLER 1024 -#define TEX_NUM_HALF4_CUDA_KEPLER 0 #define TEX_NUM_HALF_CUDA_KEPLER 0 #define TEX_START_FLOAT4_CUDA_KEPLER 0 #define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER -#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER) -#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER) -#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER) -#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER) +#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER) +#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER) +#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER) +#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER) /* OpenCL */ #define TEX_NUM_FLOAT4_OPENCL 1024 #define TEX_NUM_BYTE4_OPENCL 1024 +#define TEX_NUM_HALF4_OPENCL 0 #define TEX_NUM_FLOAT_OPENCL 0 #define TEX_NUM_BYTE_OPENCL 0 -#define TEX_NUM_HALF4_OPENCL 0 #define TEX_NUM_HALF_OPENCL 0 #define TEX_START_FLOAT4_OPENCL 0 #define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL -#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL) -#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL) -#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL) -#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL + TEX_NUM_HALF4_OPENCL) +#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL) +#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL) +#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL) +#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL) /* Color to use when textures are not found. */