forked from bartvdbraak/blender
Cycles: Change code order for Image Data Types.
Now we have the 4 component ones first (float4, byte4, half4) followed by the 1 component ones (float, byte, half). Makes code a bit more consistent and also reduces code a bit when enabling half support on GPU in next commit. This also exposed a typo in half CPU images for 3D textures, which wasn't used yet, but good to have that one fixed anyway.
This commit is contained in:
parent
3aed54dbd5
commit
5ac7ef873b
@ -25,12 +25,12 @@ ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float
|
||||
{
|
||||
if(tex >= TEX_START_HALF_CPU)
|
||||
return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_BYTE_CPU)
|
||||
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_FLOAT_CPU)
|
||||
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_BYTE4_CPU)
|
||||
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y);
|
||||
else
|
||||
@ -41,12 +41,12 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl
|
||||
{
|
||||
if(tex >= TEX_START_HALF_CPU)
|
||||
return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_BYTE_CPU)
|
||||
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_FLOAT_CPU)
|
||||
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_BYTE4_CPU)
|
||||
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z);
|
||||
else
|
||||
@ -57,13 +57,13 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl
|
||||
ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
|
||||
{
|
||||
if(tex >= TEX_START_HALF_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_BYTE_CPU)
|
||||
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_FLOAT_CPU)
|
||||
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_BYTE4_CPU)
|
||||
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else
|
||||
|
@ -277,7 +277,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
|
||||
}
|
||||
# else
|
||||
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
|
||||
if(id < 2048) /* TODO(dingto): Make this a variable */
|
||||
if(id < TEX_START_FLOAT_CUDA_KEPLER)
|
||||
r = kernel_tex_image_interp_float4(tex, x, y);
|
||||
else {
|
||||
float f = kernel_tex_image_interp_float(tex, x, y);
|
||||
|
@ -52,15 +52,15 @@ ImageManager::ImageManager(const DeviceInfo& info)
|
||||
{ \
|
||||
tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \
|
||||
}
|
||||
|
||||
@ -82,15 +82,15 @@ ImageManager::ImageManager(const DeviceInfo& info)
|
||||
/* Should not happen. */
|
||||
tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF] = 0;
|
||||
assert(0);
|
||||
}
|
||||
@ -216,7 +216,7 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen
|
||||
}
|
||||
|
||||
/* We use a consecutive slot counting scheme on the devices, in order
|
||||
* float4, byte4, float, byte.
|
||||
* float4, byte4, half4, float, byte, half.
|
||||
* These functions convert the slot ids from ImageManager "images" ones
|
||||
* to device ones and vice versa. */
|
||||
int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
|
||||
|
@ -39,9 +39,9 @@ public:
|
||||
enum ImageDataType {
|
||||
IMAGE_DATA_TYPE_FLOAT4 = 0,
|
||||
IMAGE_DATA_TYPE_BYTE4 = 1,
|
||||
IMAGE_DATA_TYPE_FLOAT = 2,
|
||||
IMAGE_DATA_TYPE_BYTE = 3,
|
||||
IMAGE_DATA_TYPE_HALF4 = 4,
|
||||
IMAGE_DATA_TYPE_HALF4 = 2,
|
||||
IMAGE_DATA_TYPE_FLOAT = 3,
|
||||
IMAGE_DATA_TYPE_BYTE = 4,
|
||||
IMAGE_DATA_TYPE_HALF = 5,
|
||||
|
||||
IMAGE_DATA_NUM_TYPES
|
||||
|
@ -24,58 +24,58 @@ CCL_NAMESPACE_BEGIN
|
||||
/* CPU */
|
||||
#define TEX_NUM_FLOAT4_CPU 1024
|
||||
#define TEX_NUM_BYTE4_CPU 1024
|
||||
#define TEX_NUM_HALF4_CPU 1024
|
||||
#define TEX_NUM_FLOAT_CPU 1024
|
||||
#define TEX_NUM_BYTE_CPU 1024
|
||||
#define TEX_NUM_HALF4_CPU 1024
|
||||
#define TEX_NUM_HALF_CPU 1024
|
||||
#define TEX_START_FLOAT4_CPU 0
|
||||
#define TEX_START_BYTE4_CPU TEX_NUM_FLOAT4_CPU
|
||||
#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU)
|
||||
#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU)
|
||||
#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU)
|
||||
#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU + TEX_NUM_HALF4_CPU)
|
||||
#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU)
|
||||
#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU)
|
||||
#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU)
|
||||
#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU)
|
||||
|
||||
/* CUDA (Geforce 4xx and 5xx) */
|
||||
#define TEX_NUM_FLOAT4_CUDA 5
|
||||
#define TEX_NUM_BYTE4_CUDA 85
|
||||
#define TEX_NUM_HALF4_CUDA 0
|
||||
#define TEX_NUM_FLOAT_CUDA 0
|
||||
#define TEX_NUM_BYTE_CUDA 0
|
||||
#define TEX_NUM_HALF4_CUDA 0
|
||||
#define TEX_NUM_HALF_CUDA 0
|
||||
#define TEX_START_FLOAT4_CUDA 0
|
||||
#define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA
|
||||
#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
|
||||
#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA)
|
||||
#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
|
||||
#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA + TEX_NUM_HALF4_CUDA)
|
||||
#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
|
||||
#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
|
||||
#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
|
||||
#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
|
||||
|
||||
/* CUDA (Kepler, Geforce 6xx and above) */
|
||||
#define TEX_NUM_FLOAT4_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_BYTE4_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_HALF4_CUDA_KEPLER 0
|
||||
#define TEX_NUM_FLOAT_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_BYTE_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_HALF4_CUDA_KEPLER 0
|
||||
#define TEX_NUM_HALF_CUDA_KEPLER 0
|
||||
#define TEX_START_FLOAT4_CUDA_KEPLER 0
|
||||
#define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER
|
||||
#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
|
||||
#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER)
|
||||
#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER)
|
||||
#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER)
|
||||
#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
|
||||
#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER)
|
||||
#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER)
|
||||
#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER)
|
||||
|
||||
/* OpenCL */
|
||||
#define TEX_NUM_FLOAT4_OPENCL 1024
|
||||
#define TEX_NUM_BYTE4_OPENCL 1024
|
||||
#define TEX_NUM_HALF4_OPENCL 0
|
||||
#define TEX_NUM_FLOAT_OPENCL 0
|
||||
#define TEX_NUM_BYTE_OPENCL 0
|
||||
#define TEX_NUM_HALF4_OPENCL 0
|
||||
#define TEX_NUM_HALF_OPENCL 0
|
||||
#define TEX_START_FLOAT4_OPENCL 0
|
||||
#define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL
|
||||
#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL)
|
||||
#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL)
|
||||
#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL)
|
||||
#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL + TEX_NUM_HALF4_OPENCL)
|
||||
#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL)
|
||||
#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL)
|
||||
#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL)
|
||||
#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL)
|
||||
|
||||
|
||||
/* Color to use when textures are not found. */
|
||||
|
Loading…
Reference in New Issue
Block a user