Merge branch 'master' into blender2.8

2017-10-10 01:36:36 +11:00 · 2017-10-10 01:36:36 +11:00 · 6ec43a765b
commit 6ec43a765b
parent abcda06934 4b3e6cb728
32 changed files with 1551 additions and 1502 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -731,6 +731,17 @@ if(WITH_INTERNATIONAL)
 endif()

 if(WITH_PYTHON)
+	# While we have this as an '#error' in bpy_util.h,
+	# upgrading Python tends to cause confusion for users who build.
+	# Give the error message early to make this more obvious.
+	#
+	# Do this before main 'platform_*' checks,
+	# because UNIX will search for the old Python paths which may not exist.
+	# giving errors about missing paths before this case is met.
+	if(DEFINED PYTHON_VERSION AND "${PYTHON_VERSION}" VERSION_LESS "3.6")
+		message(FATAL_ERROR "At least Python 3.6 is required to build")
+	endif()
+
 	if(NOT EXISTS "${CMAKE_SOURCE_DIR}/release/scripts/addons/modules")
 		message(WARNING
 			"Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, "
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@ -1188,7 +1188,7 @@ class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel):
        sub = col.column()
        sub.active = use_cpu(context)
        sub.prop(cworld, "volume_sampling", text="")
-        sub.prop(cworld, "volume_interpolation", text="")
+        col.prop(cworld, "volume_interpolation", text="")
        col.prop(cworld, "homogeneous_volume", text="Homogeneous")


@ -1287,7 +1287,7 @@ class CYCLES_MATERIAL_PT_settings(CyclesButtonsPanel, Panel):
        sub = col.column()
        sub.active = use_cpu(context)
        sub.prop(cmat, "volume_sampling", text="")
-        sub.prop(cmat, "volume_interpolation", text="")
+        col.prop(cmat, "volume_interpolation", text="")
        col.prop(cmat, "homogeneous_volume", text="Homogeneous")

        layout.separator()
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@ -26,6 +26,7 @@
 #include "util/util_stats.h"
 #include "util/util_string.h"
 #include "util/util_thread.h"
+#include "util/util_texture.h"
 #include "util/util_types.h"
 #include "util/util_vector.h"

--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@ -163,6 +163,9 @@ public:
 	TaskPool task_pool;
 	KernelGlobals kernel_globals;

+	device_vector<TextureInfo> texture_info;
+	bool need_texture_info;
+
 #ifdef WITH_OSL
 	OSLGlobals osl_globals;
 #endif
@ -235,6 +238,8 @@ public:
 			VLOG(1) << "Will be using split kernel.";
 		}

+		need_texture_info = false;
+
 #define REGISTER_SPLIT_KERNEL(name) split_kernels[#name] = KernelFunctions<void(*)(KernelGlobals*, KernelData*)>(KERNEL_FUNCTIONS(name))
 		REGISTER_SPLIT_KERNEL(path_init);
 		REGISTER_SPLIT_KERNEL(scene_intersect);
@ -261,6 +266,7 @@ public:
 	~CPUDevice()
 	{
 		task_pool.stop();
+		tex_free(texture_info);
 	}

 	virtual bool show_samples() const
@ -268,6 +274,15 @@ public:
 		return (TaskScheduler::num_threads() == 1);
 	}

+	void load_texture_info()
+	{
+		if(need_texture_info) {
+			tex_free(texture_info);
+			tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+			need_texture_info = false;
+		}
+	}
+
 	void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
 	{
 		if(name) {
@ -333,14 +348,47 @@ public:
 		VLOG(1) << "Texture allocate: " << name << ", "
 		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
 		        << string_human_readable_size(mem.memory_size()) << ")";
-		kernel_tex_copy(&kernel_globals,
-		                name,
-		                mem.data_pointer,
-		                mem.data_width,
-		                mem.data_height,
-		                mem.data_depth,
-		                interpolation,
-		                extension);
+
+		if(interpolation == INTERPOLATION_NONE) {
+			/* Data texture. */
+			kernel_tex_copy(&kernel_globals,
+							name,
+							mem.data_pointer,
+							mem.data_width,
+							mem.data_height,
+							mem.data_depth,
+							interpolation,
+							extension);
+		}
+		else {
+			/* Image Texture. */
+			int flat_slot = 0;
+			if(string_startswith(name, "__tex_image")) {
+				int pos =  string(name).rfind("_");
+				flat_slot = atoi(name + pos + 1);
+			}
+			else {
+				assert(0);
+			}
+
+			if(flat_slot >= texture_info.size()) {
+				/* Allocate some slots in advance, to reduce amount
+				 * of re-allocations. */
+				texture_info.resize(flat_slot + 128);
+			}
+
+			TextureInfo& info = texture_info.get_data()[flat_slot];
+			info.data = (uint64_t)mem.data_pointer;
+			info.cl_buffer = 0;
+			info.interpolation = interpolation;
+			info.extension = extension;
+			info.width = mem.data_width;
+			info.height = mem.data_height;
+			info.depth = mem.data_depth;
+
+			need_texture_info = true;
+		}
+
 		mem.device_pointer = mem.data_pointer;
 		mem.device_size = mem.memory_size();
 		stats.mem_alloc(mem.device_size);
@ -352,6 +400,7 @@ public:
 			mem.device_pointer = 0;
 			stats.mem_free(mem.device_size);
 			mem.device_size = 0;
+			need_texture_info = true;
 		}
 	}

@ -784,6 +833,9 @@ public:

 	void task_add(DeviceTask& task)
 	{
+		/* Load texture info. */
+		load_texture_info();
+
 		/* split task into smaller ones */
 		list<DeviceTask> tasks;

--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@ -129,7 +129,7 @@ public:
 	CUcontext cuContext;
 	CUmodule cuModule, cuFilterModule;
 	map<device_ptr, bool> tex_interp_map;
-	map<device_ptr, uint> tex_bindless_map;
+	map<device_ptr, CUtexObject> tex_bindless_map;
 	int cuDevId;
 	int cuDevArchitecture;
 	bool first_error;
@ -145,8 +145,8 @@ public:
 	map<device_ptr, PixelMem> pixel_mem_map;

 	/* Bindless Textures */
-	device_vector<uint> bindless_mapping;
-	bool need_bindless_mapping;
+	device_vector<TextureInfo> texture_info;
+	bool need_texture_info;

 	CUdeviceptr cuda_device_ptr(device_ptr mem)
 	{
@ -231,7 +231,7 @@ public:

 		split_kernel = NULL;

-		need_bindless_mapping = false;
+		need_texture_info = false;

 		/* intialize */
 		if(cuda_error(cuInit(0)))
@ -274,7 +274,7 @@ public:
 		delete split_kernel;

 		if(info.has_bindless_textures) {
-			tex_free(bindless_mapping);
+			tex_free(texture_info);
 		}

 		cuda_assert(cuCtxDestroy(cuContext));
@ -544,12 +544,12 @@ public:
 		return (result == CUDA_SUCCESS);
 	}

-	void load_bindless_mapping()
+	void load_texture_info()
 	{
-		if(info.has_bindless_textures && need_bindless_mapping) {
-			tex_free(bindless_mapping);
-			tex_alloc("__bindless_mapping", bindless_mapping, INTERPOLATION_NONE, EXTENSION_REPEAT);
-			need_bindless_mapping = false;
+		if(info.has_bindless_textures && need_texture_info) {
+			tex_free(texture_info);
+			tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+			need_texture_info = false;
 		}
 	}

@ -646,8 +646,7 @@ public:
 		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
 		        << string_human_readable_size(mem.memory_size()) << ")";

-		/* Check if we are on sm_30 or above.
-		 * We use arrays and bindles textures for storage there */
+		/* Check if we are on sm_30 or above, for bindless textures. */
 		bool has_bindless_textures = info.has_bindless_textures;

 		/* General variables for both architectures */
@ -679,20 +678,10 @@ public:
 			filter_mode = CU_TR_FILTER_MODE_LINEAR;
 		}

-		CUarray_format_enum format;
-		switch(mem.data_type) {
-			case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
-			case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
-			case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
-			case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
-			case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
-			default: assert(0); return;
-		}
-
 		/* General variables for Fermi */
 		CUtexref texref = NULL;

-		if(!has_bindless_textures) {
+		if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) {
 			if(mem.data_depth > 1) {
 				/* Kernel uses different bind names for 2d and 3d float textures,
 				 * so we have to adjust couple of things here.
@ -711,41 +700,41 @@ public:
 			}
 		}

-		/* Data Storage */
 		if(interpolation == INTERPOLATION_NONE) {
-			if(has_bindless_textures) {
-				mem_alloc(NULL, mem, MEM_READ_ONLY);
-				mem_copy_to(mem);
+			/* Data Storage */
+			mem_alloc(NULL, mem, MEM_READ_ONLY);
+			mem_copy_to(mem);

-				CUdeviceptr cumem;
-				size_t cubytes;
+			CUdeviceptr cumem;
+			size_t cubytes;

-				cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
+			cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));

-				if(cubytes == 8) {
-					/* 64 bit device pointer */
-					uint64_t ptr = mem.device_pointer;
-					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
-				}
-				else {
-					/* 32 bit device pointer */
-					uint32_t ptr = (uint32_t)mem.device_pointer;
-					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
-				}
+			if(cubytes == 8) {
+				/* 64 bit device pointer */
+				uint64_t ptr = mem.device_pointer;
+				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
 			}
 			else {
-				mem_alloc(NULL, mem, MEM_READ_ONLY);
-				mem_copy_to(mem);
-
-				cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
-				cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
-				cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
+				/* 32 bit device pointer */
+				uint32_t ptr = (uint32_t)mem.device_pointer;
+				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
 			}
 		}
-		/* Texture Storage */
 		else {
+			/* Texture Storage */
 			CUarray handle = NULL;

+			CUarray_format_enum format;
+			switch(mem.data_type) {
+				case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
+				case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
+				case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
+				case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+				case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
+				default: assert(0); return;
+			}
+
 			if(mem.data_depth > 1) {
 				CUDA_ARRAY3D_DESCRIPTOR desc;

@ -810,8 +799,8 @@ public:

 			stats.mem_alloc(size);

-			/* Bindless Textures - Kepler */
 			if(has_bindless_textures) {
+				/* Bindless Textures - Kepler */
 				int flat_slot = 0;
 				if(string_startswith(name, "__tex_image")) {
 					int pos =  string(name).rfind("_");
@ -844,35 +833,39 @@ public:
 				}

 				/* Resize once */
-				if(flat_slot >= bindless_mapping.size()) {
+				if(flat_slot >= texture_info.size()) {
 					/* Allocate some slots in advance, to reduce amount
-					 * of re-allocations.
-					 */
-					bindless_mapping.resize(flat_slot + 128);
+					 * of re-allocations. */
+					texture_info.resize(flat_slot + 128);
 				}

 				/* Set Mapping and tag that we need to (re-)upload to device */
-				bindless_mapping.get_data()[flat_slot] = (uint)tex;
-				tex_bindless_map[mem.device_pointer] = (uint)tex;
-				need_bindless_mapping = true;
+				TextureInfo& info = texture_info.get_data()[flat_slot];
+				info.data = (uint64_t)tex;
+				info.cl_buffer = 0;
+				info.interpolation = interpolation;
+				info.extension = extension;
+				info.width = mem.data_width;
+				info.height = mem.data_height;
+				info.depth = mem.data_depth;
+
+				tex_bindless_map[mem.device_pointer] = tex;
+				need_texture_info = true;
 			}
-			/* Regular Textures - Fermi */
 			else {
+				/* Regular Textures - Fermi */
 				cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
 				cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
 				cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
-			}
-		}

-		/* Fermi, Data and Image Textures */
-		if(!has_bindless_textures) {
-			cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
-			cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
-			if(mem.data_depth > 1) {
-				cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
-			}
+				cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
+				cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
+				if(mem.data_depth > 1) {
+					cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
+				}

-			cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
+				cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
+			}
 		}

 		/* Fermi and Kepler */
@ -888,8 +881,8 @@ public:

 				/* Free CUtexObject (Bindless Textures) */
 				if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) {
-					uint flat_slot = tex_bindless_map[mem.device_pointer];
-					cuTexObjectDestroy(flat_slot);
+					CUtexObject tex = tex_bindless_map[mem.device_pointer];
+					cuTexObjectDestroy(tex);
 				}

 				tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
@ -1737,9 +1730,6 @@ public:
 		if(task->type == DeviceTask::RENDER) {
 			RenderTile tile;

-			/* Upload Bindless Mapping */
-			load_bindless_mapping();
-
 			DeviceRequestedFeatures requested_features;
 			if(use_split_kernel()) {
 				if(!use_adaptive_compilation()) {
@ -1780,9 +1770,6 @@ public:
 			}
 		}
 		else if(task->type == DeviceTask::SHADER) {
-			/* Upload Bindless Mapping */
-			load_bindless_mapping();
-
 			shader(*task);

 			cuda_assert(cuCtxSynchronize());
@ -1805,9 +1792,12 @@ public:

 	void task_add(DeviceTask& task)
 	{
-		if(task.type == DeviceTask::FILM_CONVERT) {
-			CUDAContextScope scope(this);
+		CUDAContextScope scope(this);

+		/* Load texture info. */
+		load_texture_info();
+
+		if(task.type == DeviceTask::FILM_CONVERT) {
 			/* must be done in main thread due to opengl access */
 			film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
 			cuda_assert(cuCtxSynchronize());
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@ -545,15 +545,10 @@ private:
 	MemoryManager memory_manager;
 	friend class MemoryManager;

-	struct tex_info_t {
-		uint buffer, padding;
-		cl_ulong offset;
-		uint width, height, depth, options;
-	};
-	static_assert_align(tex_info_t, 16);
+	static_assert_align(TextureInfo, 16);

-	vector<tex_info_t> texture_descriptors;
-	device_memory texture_descriptors_buffer;
+	vector<TextureInfo> texture_info;
+	device_memory texture_info_buffer;

 	struct Texture {
 		Texture() {}
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@ -136,11 +136,11 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
 		return;
 	}

-	/* Allocate this right away so that texture_descriptors_buffer is placed at offset 0 in the device memory buffers */
-	texture_descriptors.resize(1);
-	texture_descriptors_buffer.resize(1);
-	texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0];
-	memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
+	/* Allocate this right away so that texture_info_buffer is placed at offset 0 in the device memory buffers */
+	texture_info.resize(1);
+	texture_info_buffer.resize(1);
+	texture_info_buffer.data_pointer = (device_ptr)&texture_info[0];
+	memory_manager.alloc("texture_info", texture_info_buffer);

 	fprintf(stderr, "Device init success\n");
 	device_initialized = true;
@ -625,7 +625,7 @@ void OpenCLDeviceBase::flush_texture_buffers()

 	vector<texture_slot_t> texture_slots;

-#define KERNEL_TEX(type, ttype, name) \
+#define KERNEL_TEX(type, name) \
 	if(textures.find(#name) != textures.end()) { \
 		texture_slots.push_back(texture_slot_t(#name, num_slots)); \
 	} \
@ -647,55 +647,38 @@ void OpenCLDeviceBase::flush_texture_buffers()
 	}

 	/* Realloc texture descriptors buffer. */
-	memory_manager.free(texture_descriptors_buffer);
+	memory_manager.free(texture_info_buffer);

-	texture_descriptors.resize(num_slots);
-	texture_descriptors_buffer.resize(num_slots * sizeof(tex_info_t));
-	texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0];
+	texture_info.resize(num_slots);
+	texture_info_buffer.resize(num_slots * sizeof(TextureInfo));
+	texture_info_buffer.data_pointer = (device_ptr)&texture_info[0];

-	memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
+	memory_manager.alloc("texture_info", texture_info_buffer);

 	/* Fill in descriptors */
 	foreach(texture_slot_t& slot, texture_slots) {
 		Texture& tex = textures[slot.name];

-		tex_info_t& info = texture_descriptors[slot.slot];
+		TextureInfo& info = texture_info[slot.slot];

 		MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);

-		info.offset = desc.offset;
-		info.buffer = desc.device_buffer;
+		info.data = desc.offset;
+		info.cl_buffer = desc.device_buffer;

 		if(string_startswith(slot.name, "__tex_image")) {
 			info.width = tex.mem->data_width;
 			info.height = tex.mem->data_height;
 			info.depth = tex.mem->data_depth;

-			info.options = 0;
-
-			if(tex.interpolation == INTERPOLATION_CLOSEST) {
-				info.options |= (1 << 0);
-			}
-
-			switch(tex.extension) {
-				case EXTENSION_REPEAT:
-					info.options |= (1 << 1);
-					break;
-				case EXTENSION_EXTEND:
-					info.options |= (1 << 2);
-					break;
-				case EXTENSION_CLIP:
-					info.options |= (1 << 3);
-					break;
-				default:
-					break;
-			}
+			info.interpolation = tex.interpolation;
+			info.extension = tex.extension;
 		}
 	}

 	/* Force write of descriptors. */
-	memory_manager.free(texture_descriptors_buffer);
-	memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
+	memory_manager.free(texture_info_buffer);
+	memory_manager.alloc("texture_info", texture_info_buffer);
 }

 void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@ -117,14 +117,8 @@ public:
 				ccl_constant KernelData *data;
 				ccl_global char *buffers[8];

-				typedef struct _tex_info_t {
-					uint buffer, padding;
-					uint64_t offset;
-					uint width, height, depth, options;
-				} _tex_info_t;
-
-#define KERNEL_TEX(type, ttype, name) \
-				_tex_info_t name;
+#define KERNEL_TEX(type, name) \
+				TextureInfo name;
 #include "kernel/kernel_textures.h"
 #undef KERNEL_TEX

--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@ -8,7 +8,7 @@ set(INC_SYS

 )

-set(SRC
+set(SRC_CPU_KERNELS
 	kernels/cpu/kernel.cpp
 	kernels/cpu/kernel_sse2.cpp
 	kernels/cpu/kernel_sse3.cpp
@ -27,6 +27,15 @@ set(SRC
 	kernels/cpu/filter_sse41.cpp
 	kernels/cpu/filter_avx.cpp
 	kernels/cpu/filter_avx2.cpp
+)
+
+set(SRC_CUDA_KERNELS
+	kernels/cuda/kernel.cu
+	kernels/cuda/kernel_split.cu
+	kernels/cuda/filter.cu
+)
+
+set(SRC_OPENCL_KERNELS
 	kernels/opencl/kernel.cl
 	kernels/opencl/kernel_state_buffer_size.cl
 	kernels/opencl/kernel_split.cl
@ -50,9 +59,6 @@ set(SRC
 	kernels/opencl/kernel_indirect_subsurface.cl
 	kernels/opencl/kernel_buffer_update.cl
 	kernels/opencl/filter.cl
-	kernels/cuda/kernel.cu
-	kernels/cuda/kernel_split.cu
-	kernels/cuda/filter.cu
 )

 set(SRC_BVH_HEADERS
@ -83,7 +89,6 @@ set(SRC_HEADERS
 	kernel_emission.h
 	kernel_film.h
 	kernel_globals.h
-	kernel_image_opencl.h
 	kernel_jitter.h
 	kernel_light.h
 	kernel_math.h
@ -119,10 +124,12 @@ set(SRC_KERNELS_CPU_HEADERS

 set(SRC_KERNELS_CUDA_HEADERS
 	kernels/cuda/kernel_config.h
+	kernels/cuda/kernel_cuda_image.h
 )

 set(SRC_KERNELS_OPENCL_HEADERS
 	kernels/opencl/kernel_split_function.h
+	kernels/opencl/kernel_opencl_image.h
 )

 set(SRC_CLOSURE_HEADERS
@ -457,7 +464,9 @@ if(CXX_HAS_AVX2)
 endif()

 add_library(cycles_kernel
-	${SRC}
+	${SRC_CPU_KERNELS}
+	${SRC_CUDA_KERNELS}
+	${SRC_OPENCL_KERNELS}
 	${SRC_HEADERS}
 	${SRC_KERNELS_CPU_HEADERS}
 	${SRC_KERNELS_CUDA_HEADERS}
@ -484,34 +493,10 @@ endif()
 #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
 #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)

-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_sort.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inactive.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/filter.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPENCL_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CUDA_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPENCL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@ -605,8 +605,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure

 						/* if fresnel is used, calculate the color with reflection_color(...) */
 						if(use_fresnel) {
-							*pdf = 1.0f;
-							*eval = reflection_color(bsdf, *omega_in, m);
+							*eval *= reflection_color(bsdf, *omega_in, m);
 						}

 						label = LABEL_REFLECT | LABEL_SINGULAR;
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@ -29,21 +29,6 @@ CCL_NAMESPACE_BEGIN

 /* Return position normalized to 0..1 in mesh bounds */

-#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300
-ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
-{
-	float4 r;
-	switch(id) {
-		case 0: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_000, x, y, z); break;
-		case 8: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_008, x, y, z); break;
-		case 16: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_016, x, y, z); break;
-		case 24: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_024, x, y, z); break;
-		case 32: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_032, x, y, z); break;
-	}
-	return r;
-}
-#endif  /* __KERNEL_CUDA__ */
-
 ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
                                                    const ShaderData *sd,
                                                    float3 P)
@ -65,23 +50,8 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
 ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
 {
 	float3 P = volume_normalized_position(kg, sd, sd->P);
-#ifdef __KERNEL_CUDA__
-#  if __CUDA_ARCH__ >= 300
-	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
-	float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z);
-	float4 r = make_float4(f, f, f, 1.0f);
-#  else
-	float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
-#  endif
-#elif defined(__KERNEL_OPENCL__)
-	float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
-#else
-	float4 r;
-	if(sd->flag & SD_VOLUME_CUBIC)
-		r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
-	else
-		r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z);
-#endif
+	InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
+	float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);

 	if(dx) *dx = 0.0f;
 	if(dy) *dy = 0.0f;
@ -92,22 +62,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
 ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
 {
 	float3 P = volume_normalized_position(kg, sd, sd->P);
-#ifdef __KERNEL_CUDA__
-#  if __CUDA_ARCH__ >= 300
-	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
-	float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
-#  else
-	float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
-#  endif
-#elif defined(__KERNEL_OPENCL__)
-	float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
-#else
-	float4 r;
-	if(sd->flag & SD_VOLUME_CUBIC)
-		r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
-	else
-		r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z);
-#endif
+	InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
+	float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);

 	if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
 	if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@ -74,7 +74,7 @@ CCL_NAMESPACE_BEGIN
 * pointer lookup. */

 template<typename T> struct texture  {
-	ccl_always_inline T fetch(int index)
+	ccl_always_inline const T& fetch(int index)
 	{
 		kernel_assert(index >= 0 && index < width);
 		return data[index];
@ -112,449 +112,6 @@ template<typename T> struct texture  {
 	int width;
 };

-template<typename T> struct texture_image  {
-#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
-	{ \
-		u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
-		u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
-		u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
-		u[3] = (1.0f / 6.0f) * t * t * t; \
-	} (void)0
-
-	ccl_always_inline float4 read(float4 r)
-	{
-		return r;
-	}
-
-	ccl_always_inline float4 read(uchar4 r)
-	{
-		float f = 1.0f/255.0f;
-		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
-	}
-
-	ccl_always_inline float4 read(uchar r)
-	{
-		float f = r*(1.0f/255.0f);
-		return make_float4(f, f, f, 1.0f);
-	}
-
-	ccl_always_inline float4 read(float r)
-	{
-		/* TODO(dingto): Optimize this, so interpolation
-		 * happens on float instead of float4 */
-		return make_float4(r, r, r, 1.0f);
-	}
-
-	ccl_always_inline float4 read(half4 r)
-	{
-		return half4_to_float4(r);
-	}
-
-	ccl_always_inline float4 read(half r)
-	{
-		float f = half_to_float(r);
-		return make_float4(f, f, f, 1.0f);
-	}
-
-	ccl_always_inline int wrap_periodic(int x, int width)
-	{
-		x %= width;
-		if(x < 0)
-			x += width;
-		return x;
-	}
-
-	ccl_always_inline int wrap_clamp(int x, int width)
-	{
-		return clamp(x, 0, width-1);
-	}
-
-	ccl_always_inline float frac(float x, int *ix)
-	{
-		int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
-		*ix = i;
-		return x - (float)i;
-	}
-
-	ccl_always_inline float4 interp(float x, float y)
-	{
-		if(UNLIKELY(!data))
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		int ix, iy, nix, niy;
-
-		if(interpolation == INTERPOLATION_CLOSEST) {
-			frac(x*(float)width, &ix);
-			frac(y*(float)height, &iy);
-			switch(extension) {
-				case EXTENSION_REPEAT:
-					ix = wrap_periodic(ix, width);
-					iy = wrap_periodic(iy, height);
-					break;
-				case EXTENSION_CLIP:
-					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-					}
-					ATTR_FALLTHROUGH;
-				case EXTENSION_EXTEND:
-					ix = wrap_clamp(ix, width);
-					iy = wrap_clamp(iy, height);
-					break;
-				default:
-					kernel_assert(0);
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			}
-			return read(data[ix + iy*width]);
-		}
-		else if(interpolation == INTERPOLATION_LINEAR) {
-			float tx = frac(x*(float)width - 0.5f, &ix);
-			float ty = frac(y*(float)height - 0.5f, &iy);
-
-			switch(extension) {
-				case EXTENSION_REPEAT:
-					ix = wrap_periodic(ix, width);
-					iy = wrap_periodic(iy, height);
-
-					nix = wrap_periodic(ix+1, width);
-					niy = wrap_periodic(iy+1, height);
-					break;
-				case EXTENSION_CLIP:
-					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-					}
-					ATTR_FALLTHROUGH;
-				case EXTENSION_EXTEND:
-					nix = wrap_clamp(ix+1, width);
-					niy = wrap_clamp(iy+1, height);
-
-					ix = wrap_clamp(ix, width);
-					iy = wrap_clamp(iy, height);
-					break;
-				default:
-					kernel_assert(0);
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			}
-
-			float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
-			r += (1.0f - ty)*tx*read(data[nix + iy*width]);
-			r += ty*(1.0f - tx)*read(data[ix + niy*width]);
-			r += ty*tx*read(data[nix + niy*width]);
-
-			return r;
-		}
-		else {
-			/* Bicubic b-spline interpolation. */
-			float tx = frac(x*(float)width - 0.5f, &ix);
-			float ty = frac(y*(float)height - 0.5f, &iy);
-			int pix, piy, nnix, nniy;
-			switch(extension) {
-				case EXTENSION_REPEAT:
-					ix = wrap_periodic(ix, width);
-					iy = wrap_periodic(iy, height);
-
-					pix = wrap_periodic(ix-1, width);
-					piy = wrap_periodic(iy-1, height);
-
-					nix = wrap_periodic(ix+1, width);
-					niy = wrap_periodic(iy+1, height);
-
-					nnix = wrap_periodic(ix+2, width);
-					nniy = wrap_periodic(iy+2, height);
-					break;
-				case EXTENSION_CLIP:
-					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-					}
-					ATTR_FALLTHROUGH;
-				case EXTENSION_EXTEND:
-					pix = wrap_clamp(ix-1, width);
-					piy = wrap_clamp(iy-1, height);
-
-					nix = wrap_clamp(ix+1, width);
-					niy = wrap_clamp(iy+1, height);
-
-					nnix = wrap_clamp(ix+2, width);
-					nniy = wrap_clamp(iy+2, height);
-
-					ix = wrap_clamp(ix, width);
-					iy = wrap_clamp(iy, height);
-					break;
-				default:
-					kernel_assert(0);
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			}
-
-			const int xc[4] = {pix, ix, nix, nnix};
-			const int yc[4] = {width * piy,
-			                   width * iy,
-			                   width * niy,
-			                   width * nniy};
-			float u[4], v[4];
-			/* Some helper macro to keep code reasonable size,
-			 * let compiler to inline all the matrix multiplications.
-			 */
-#define DATA(x, y) (read(data[xc[x] + yc[y]]))
-#define TERM(col) \
-			(v[col] * (u[0] * DATA(0, col) + \
-			           u[1] * DATA(1, col) + \
-			           u[2] * DATA(2, col) + \
-			           u[3] * DATA(3, col)))
-
-			SET_CUBIC_SPLINE_WEIGHTS(u, tx);
-			SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-
-			/* Actual interpolation. */
-			return TERM(0) + TERM(1) + TERM(2) + TERM(3);
-
-#undef TERM
-#undef DATA
-		}
-	}
-
-	ccl_always_inline float4 interp_3d(float x, float y, float z)
-	{
-		return interp_3d_ex(x, y, z, interpolation);
-	}
-
-	ccl_always_inline float4 interp_3d_ex_closest(float x, float y, float z)
-	{
-		int ix, iy, iz;
-		frac(x*(float)width, &ix);
-		frac(y*(float)height, &iy);
-		frac(z*(float)depth, &iz);
-
-		switch(extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		return read(data[ix + iy*width + iz*width*height]);
-	}
-
-	ccl_always_inline float4 interp_3d_ex_linear(float x, float y, float z)
-	{
-		int ix, iy, iz;
-		int nix, niy, niz;
-
-		float tx = frac(x*(float)width - 0.5f, &ix);
-		float ty = frac(y*(float)height - 0.5f, &iy);
-		float tz = frac(z*(float)depth - 0.5f, &iz);
-
-		switch(extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-
-				nix = wrap_periodic(ix+1, width);
-				niy = wrap_periodic(iy+1, height);
-				niz = wrap_periodic(iz+1, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				nix = wrap_clamp(ix+1, width);
-				niy = wrap_clamp(iy+1, height);
-				niz = wrap_clamp(iz+1, depth);
-
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		float4 r;
-
-		r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
-		r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
-		r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
-		r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
-
-		r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
-		r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
-		r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
-		r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
-
-		return r;
-	}
-
-	/* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
-	 * causing stack overflow issue in this function unless it is inlined.
-	 *
-	 * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
-	 * enabled.
-	 */
-#ifdef __GNUC__
-	ccl_always_inline
-#else
-	ccl_never_inline
-#endif
-	float4 interp_3d_ex_tricubic(float x, float y, float z)
-	{
-		int ix, iy, iz;
-		int nix, niy, niz;
-		/* Tricubic b-spline interpolation. */
-		const float tx = frac(x*(float)width - 0.5f, &ix);
-		const float ty = frac(y*(float)height - 0.5f, &iy);
-		const float tz = frac(z*(float)depth - 0.5f, &iz);
-		int pix, piy, piz, nnix, nniy, nniz;
-
-		switch(extension) {
-			case EXTENSION_REPEAT:
-				ix = wrap_periodic(ix, width);
-				iy = wrap_periodic(iy, height);
-				iz = wrap_periodic(iz, depth);
-
-				pix = wrap_periodic(ix-1, width);
-				piy = wrap_periodic(iy-1, height);
-				piz = wrap_periodic(iz-1, depth);
-
-				nix = wrap_periodic(ix+1, width);
-				niy = wrap_periodic(iy+1, height);
-				niz = wrap_periodic(iz+1, depth);
-
-				nnix = wrap_periodic(ix+2, width);
-				nniy = wrap_periodic(iy+2, height);
-				nniz = wrap_periodic(iz+2, depth);
-				break;
-			case EXTENSION_CLIP:
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-				ATTR_FALLTHROUGH;
-			case EXTENSION_EXTEND:
-				pix = wrap_clamp(ix-1, width);
-				piy = wrap_clamp(iy-1, height);
-				piz = wrap_clamp(iz-1, depth);
-
-				nix = wrap_clamp(ix+1, width);
-				niy = wrap_clamp(iy+1, height);
-				niz = wrap_clamp(iz+1, depth);
-
-				nnix = wrap_clamp(ix+2, width);
-				nniy = wrap_clamp(iy+2, height);
-				nniz = wrap_clamp(iz+2, depth);
-
-				ix = wrap_clamp(ix, width);
-				iy = wrap_clamp(iy, height);
-				iz = wrap_clamp(iz, depth);
-				break;
-			default:
-				kernel_assert(0);
-				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		}
-
-		const int xc[4] = {pix, ix, nix, nnix};
-		const int yc[4] = {width * piy,
-		                   width * iy,
-		                   width * niy,
-		                   width * nniy};
-		const int zc[4] = {width * height * piz,
-		                   width * height * iz,
-		                   width * height * niz,
-		                   width * height * nniz};
-		float u[4], v[4], w[4];
-
-		/* Some helper macro to keep code reasonable size,
-		 * let compiler to inline all the matrix multiplications.
-		 */
-#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
-#define COL_TERM(col, row) \
-		(v[col] * (u[0] * DATA(0, col, row) + \
-		           u[1] * DATA(1, col, row) + \
-		           u[2] * DATA(2, col, row) + \
-		           u[3] * DATA(3, col, row)))
-#define ROW_TERM(row) \
-		(w[row] * (COL_TERM(0, row) + \
-		           COL_TERM(1, row) + \
-		           COL_TERM(2, row) + \
-		           COL_TERM(3, row)))
-
-		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
-		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-		SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
-		/* Actual interpolation. */
-		return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
-
-#undef COL_TERM
-#undef ROW_TERM
-#undef DATA
-	}
-
-	ccl_always_inline float4 interp_3d_ex(float x, float y, float z,
-	                                      int interpolation = INTERPOLATION_LINEAR)
-	{
-		if(UNLIKELY(!data))
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		switch(interpolation) {
-			case INTERPOLATION_CLOSEST:
-				return interp_3d_ex_closest(x, y, z);
-			case INTERPOLATION_LINEAR:
-				return interp_3d_ex_linear(x, y, z);
-			default:
-				return interp_3d_ex_tricubic(x, y, z);
-		}
-	}
-
-	ccl_always_inline void dimensions_set(int width_, int height_, int depth_)
-	{
-		width = width_;
-		height = height_;
-		depth = depth_;
-	}
-
-	T *data;
-	int interpolation;
-	ExtensionType extension;
-	int width, height, depth;
-#undef SET_CUBIC_SPLINE_WEIGHTS
-};
-
-typedef texture<float4> texture_float4;
-typedef texture<float2> texture_float2;
-typedef texture<float> texture_float;
-typedef texture<uint> texture_uint;
-typedef texture<int> texture_int;
-typedef texture<uint4> texture_uint4;
-typedef texture<uchar4> texture_uchar4;
-typedef texture<uchar> texture_uchar;
-typedef texture_image<float> texture_image_float;
-typedef texture_image<uchar> texture_image_uchar;
-typedef texture_image<half> texture_image_half;
-typedef texture_image<float4> texture_image_float4;
-typedef texture_image<uchar4> texture_image_uchar4;
-typedef texture_image<half4> texture_image_half4;
-
 /* Macros to handle different memory storage on different devices */

 #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
@ -563,10 +120,6 @@ typedef texture_image<half4> texture_image_half4;
 #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
 #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))

-#define kernel_tex_image_interp(tex,x,y) kernel_tex_image_interp_impl(kg,tex,x,y)
-#define kernel_tex_image_interp_3d(tex, x, y, z) kernel_tex_image_interp_3d_impl(kg,tex,x,y,z)
-#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) kernel_tex_image_interp_3d_ex_impl(kg,tex, x, y, z, interpolation)
-
 #define kernel_data (kg->__data)

 #ifdef __KERNEL_SSE2__
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@ -126,42 +126,16 @@ ccl_device_inline uint ccl_num_groups(uint d)

 /* Textures */

-typedef texture<float4, 1> texture_float4;
-typedef texture<float2, 1> texture_float2;
-typedef texture<float, 1> texture_float;
-typedef texture<uint, 1> texture_uint;
-typedef texture<int, 1> texture_int;
-typedef texture<uint4, 1> texture_uint4;
-typedef texture<uchar, 1> texture_uchar;
-typedef texture<uchar4, 1> texture_uchar4;
+/* Use arrays for regular data. This is a little slower than textures on Fermi,
+ * but allows for cleaner code and we will stop supporting Fermi soon. */
+#define kernel_tex_fetch(t, index) t[(index)]
+
+/* On Kepler (6xx) and above, we use Bindless Textures for images.
+ * On Fermi cards (4xx and 5xx), we have to use regular textures. */
+#if __CUDA_ARCH__ < 300
 typedef texture<float4, 2> texture_image_float4;
 typedef texture<float4, 3> texture_image3d_float4;
 typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
-
-/* Macros to handle different memory storage on different devices */
-
-/* On Fermi cards (4xx and 5xx), we use regular textures for both data and images.
- * On Kepler (6xx) and above, we use Bindless Textures for images and arrays for data.
- *
- * Arrays are necessary in order to use the full VRAM on newer cards, and it's slightly faster.
- * Using Arrays on Fermi turned out to be slower.*/
-
-/* Fermi */
-#if __CUDA_ARCH__ < 300
-#  define __KERNEL_CUDA_TEX_STORAGE__
-#  define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
-
-#  define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
-#  define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
-
-/* Kepler */
-#else
-#  define kernel_tex_fetch(t, index) t[(index)]
-
-#  define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y)
-#  define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y)
-#  define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z)
-#  define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z)
 #endif

 #define kernel_data __data
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@ -144,7 +144,7 @@

 /* data lookup defines */
 #define kernel_data (*kg->data)
-#define kernel_tex_fetch(tex, index) ((ccl_global tex##_t*)(kg->buffers[kg->tex.buffer] + kg->tex.offset))[(index)]
+#define kernel_tex_fetch(tex, index) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))[(index)]

 /* define NULL */
 #define NULL 0
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@ -46,14 +46,7 @@ struct Intersection;
 struct VolumeStep;

 typedef struct KernelGlobals {
-	vector<texture_image_float4> texture_float4_images;
-	vector<texture_image_uchar4> texture_byte4_images;
-	vector<texture_image_half4> texture_half4_images;
-	vector<texture_image_float> texture_float_images;
-	vector<texture_image_uchar> texture_byte_images;
-	vector<texture_image_half> texture_half_images;
-
-#  define KERNEL_TEX(type, ttype, name) ttype name;
+#  define KERNEL_TEX(type, name) texture<type> name;
 #  define KERNEL_IMAGE_TEX(type, ttype, name)
 #  include "kernel/kernel_textures.h"

@ -99,11 +92,7 @@ typedef struct KernelGlobals {
 	Intersection hits_stack[64];
 } KernelGlobals;

-#  ifdef __KERNEL_CUDA_TEX_STORAGE__
-#    define KERNEL_TEX(type, ttype, name) ttype name;
-#  else
-#    define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name;
-#  endif
+#  define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
 #  define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
 #  include "kernel/kernel_textures.h"

@ -113,22 +102,16 @@ typedef struct KernelGlobals {

 #ifdef __KERNEL_OPENCL__

-#  define KERNEL_TEX(type, ttype, name) \
+#  define KERNEL_TEX(type, name) \
 typedef type name##_t;
 #  include "kernel/kernel_textures.h"

-typedef struct tex_info_t {
-	uint buffer, padding;
-	uint64_t offset;
-	uint width, height, depth, options;
-} tex_info_t;
-
 typedef ccl_addr_space struct KernelGlobals {
 	ccl_constant KernelData *data;
 	ccl_global char *buffers[8];

-#  define KERNEL_TEX(type, ttype, name) \
-	tex_info_t name;
+#  define KERNEL_TEX(type, name) \
+	TextureInfo name;
 #  include "kernel/kernel_textures.h"

 #  ifdef __SPLIT_KERNEL__
@ -176,9 +159,9 @@ ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg)
 	if(ccl_local_id(0) + ccl_local_id(1) == 0)
 #  endif
 	{
-		ccl_global tex_info_t *info = (ccl_global tex_info_t*)kg->buffers[0];
+		ccl_global TextureInfo *info = (ccl_global TextureInfo*)kg->buffers[0];

-#  define KERNEL_TEX(type, ttype, name) \
+#  define KERNEL_TEX(type, name) \
 		kg->name = *(info++);
 #  include "kernel/kernel_textures.h"
 	}
--- a/intern/cycles/kernel/kernel_image_opencl.h
+++ b/intern/cycles/kernel/kernel_image_opencl.h
@ -1,252 +0,0 @@
-/*
- * Copyright 2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-/* For OpenCL we do manual lookup and interpolation. */
-
-ccl_device_inline ccl_global tex_info_t* kernel_tex_info(KernelGlobals *kg, uint id) {
-	const uint tex_offset = id
-#define KERNEL_TEX(type, ttype, name) + 1
-#include "kernel/kernel_textures.h"
-	;
-
-	return &((ccl_global tex_info_t*)kg->buffers[0])[tex_offset];
-}
-
-#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->buffer] + info->offset))[(index)]
-
-ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
-{
-	const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
-	const int texture_type = kernel_tex_type(id);
-
-	/* Float4 */
-	if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
-		return tex_fetch(float4, info, offset);
-	}
-	/* Byte4 */
-	else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
-		uchar4 r = tex_fetch(uchar4, info, offset);
-		float f = 1.0f/255.0f;
-		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
-	}
-	/* Float */
-	else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
-		float f = tex_fetch(float, info, offset);
-		return make_float4(f, f, f, 1.0f);
-	}
-	/* Byte */
-	else {
-		uchar r = tex_fetch(uchar, info, offset);
-		float f = r * (1.0f/255.0f);
-		return make_float4(f, f, f, 1.0f);
-	}
-}
-
-ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
-{
-	x %= width;
-	if(x < 0)
-		x += width;
-	return x;
-}
-
-ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
-{
-	return clamp(x, 0, width-1);
-}
-
-ccl_device_inline float svm_image_texture_frac(float x, int *ix)
-{
-	int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
-	*ix = i;
-	return x - (float)i;
-}
-
-ccl_device_inline uint kernel_decode_image_interpolation(uint info)
-{
-	return (info & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
-}
-
-ccl_device_inline uint kernel_decode_image_extension(uint info)
-{
-	if(info & (1 << 1)) {
-		return EXTENSION_REPEAT;
-	}
-	else if(info & (1 << 2)) {
-		return EXTENSION_EXTEND;
-	}
-	else {
-		return EXTENSION_CLIP;
-	}
-}
-
-ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
-{
-	const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
-
-	uint width = info->width;
-	uint height = info->height;
-	uint offset = 0;
-
-	/* Decode image options. */
-	uint interpolation = kernel_decode_image_interpolation(info->options);
-	uint extension = kernel_decode_image_extension(info->options);
-
-	/* Actual sampling. */
-	float4 r;
-	int ix, iy, nix, niy;
-	if(interpolation == INTERPOLATION_CLOSEST) {
-		svm_image_texture_frac(x*width, &ix);
-		svm_image_texture_frac(y*height, &iy);
-
-		if(extension == EXTENSION_REPEAT) {
-			ix = svm_image_texture_wrap_periodic(ix, width);
-			iy = svm_image_texture_wrap_periodic(iy, height);
-		}
-		else {
-			if(extension == EXTENSION_CLIP) {
-				if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-			}
-			/* Fall through. */
-			/* EXTENSION_EXTEND */
-			ix = svm_image_texture_wrap_clamp(ix, width);
-			iy = svm_image_texture_wrap_clamp(iy, height);
-		}
-
-		r = svm_image_texture_read(kg, id, offset + ix + iy*width);
-	}
-	else { /* INTERPOLATION_LINEAR */
-		float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
-		float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
-
-		if(extension == EXTENSION_REPEAT) {
-			ix = svm_image_texture_wrap_periodic(ix, width);
-			iy = svm_image_texture_wrap_periodic(iy, height);
-
-			nix = svm_image_texture_wrap_periodic(ix+1, width);
-			niy = svm_image_texture_wrap_periodic(iy+1, height);
-		}
-		else {
-			if(extension == EXTENSION_CLIP) {
-				if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-			}
-			nix = svm_image_texture_wrap_clamp(ix+1, width);
-			niy = svm_image_texture_wrap_clamp(iy+1, height);
-			ix = svm_image_texture_wrap_clamp(ix, width);
-			iy = svm_image_texture_wrap_clamp(iy, height);
-		}
-
-		r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
-		r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
-		r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
-		r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
-	}
-	return r;
-}
-
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
-{
-	const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
-
-	uint width = info->width;
-	uint height = info->height;
-	uint offset = 0;
-	uint depth = info->depth;
-
-	/* Decode image options. */
-	uint interpolation = kernel_decode_image_interpolation(info->options);
-	uint extension = kernel_decode_image_extension(info->options);
-
-	/* Actual sampling. */
-	float4 r;
-	int ix, iy, iz, nix, niy, niz;
-	if(interpolation == INTERPOLATION_CLOSEST) {
-		svm_image_texture_frac(x*width, &ix);
-		svm_image_texture_frac(y*height, &iy);
-		svm_image_texture_frac(z*depth, &iz);
-
-		if(extension == EXTENSION_REPEAT) {
-			ix = svm_image_texture_wrap_periodic(ix, width);
-			iy = svm_image_texture_wrap_periodic(iy, height);
-			iz = svm_image_texture_wrap_periodic(iz, depth);
-		}
-		else {
-			if(extension == EXTENSION_CLIP) {
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-			}
-			/* Fall through. */
-			/* EXTENSION_EXTEND */
-			ix = svm_image_texture_wrap_clamp(ix, width);
-			iy = svm_image_texture_wrap_clamp(iy, height);
-			iz = svm_image_texture_wrap_clamp(iz, depth);
-		}
-		r = svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
-	}
-	else { /* INTERPOLATION_LINEAR */
-		float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix);
-		float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy);
-		float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz);
-
-		if(extension == EXTENSION_REPEAT) {
-			ix = svm_image_texture_wrap_periodic(ix, width);
-			iy = svm_image_texture_wrap_periodic(iy, height);
-			iz = svm_image_texture_wrap_periodic(iz, depth);
-
-			nix = svm_image_texture_wrap_periodic(ix+1, width);
-			niy = svm_image_texture_wrap_periodic(iy+1, height);
-			niz = svm_image_texture_wrap_periodic(iz+1, depth);
-		}
-		else {
-			if(extension == EXTENSION_CLIP) {
-				if(x < 0.0f || y < 0.0f || z < 0.0f ||
-				   x > 1.0f || y > 1.0f || z > 1.0f)
-				{
-					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-				}
-			}
-			/* Fall through. */
-			/*  EXTENSION_EXTEND */
-			nix = svm_image_texture_wrap_clamp(ix+1, width);
-			niy = svm_image_texture_wrap_clamp(iy+1, height);
-			niz = svm_image_texture_wrap_clamp(iz+1, depth);
-
-			ix = svm_image_texture_wrap_clamp(ix, width);
-			iy = svm_image_texture_wrap_clamp(iy, height);
-			iz = svm_image_texture_wrap_clamp(iz, depth);
-		}
-
-		r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
-		r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + iz*width*height);
-		r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + iz*width*height);
-		r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + iz*width*height);
-
-		r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + niz*width*height);
-		r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + niz*width*height);
-		r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + niz*width*height);
-		r += tz*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + niz*width*height);
-	}
-	return r;
-}
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@ -15,7 +15,7 @@
 */

 #ifndef KERNEL_TEX
-#  define KERNEL_TEX(type, ttype, name)
+#  define KERNEL_TEX(type, name)
 #endif

 #ifndef KERNEL_IMAGE_TEX
@ -23,63 +23,65 @@
 #endif

 /* bvh */
-KERNEL_TEX(float4, texture_float4, __bvh_nodes)
-KERNEL_TEX(float4, texture_float4, __bvh_leaf_nodes)
-KERNEL_TEX(float4, texture_float4, __prim_tri_verts)
-KERNEL_TEX(uint, texture_uint, __prim_tri_index)
-KERNEL_TEX(uint, texture_uint, __prim_type)
-KERNEL_TEX(uint, texture_uint, __prim_visibility)
-KERNEL_TEX(uint, texture_uint, __prim_index)
-KERNEL_TEX(uint, texture_uint, __prim_object)
-KERNEL_TEX(uint, texture_uint, __object_node)
-KERNEL_TEX(float2, texture_float2, __prim_time)
+KERNEL_TEX(float4, __bvh_nodes)
+KERNEL_TEX(float4, __bvh_leaf_nodes)
+KERNEL_TEX(float4, __prim_tri_verts)
+KERNEL_TEX(uint, __prim_tri_index)
+KERNEL_TEX(uint, __prim_type)
+KERNEL_TEX(uint, __prim_visibility)
+KERNEL_TEX(uint, __prim_index)
+KERNEL_TEX(uint, __prim_object)
+KERNEL_TEX(uint, __object_node)
+KERNEL_TEX(float2, __prim_time)

 /* objects */
-KERNEL_TEX(float4, texture_float4, __objects)
-KERNEL_TEX(float4, texture_float4, __objects_vector)
+KERNEL_TEX(float4, __objects)
+KERNEL_TEX(float4, __objects_vector)

 /* triangles */
-KERNEL_TEX(uint, texture_uint, __tri_shader)
-KERNEL_TEX(float4, texture_float4, __tri_vnormal)
-KERNEL_TEX(uint4, texture_uint4, __tri_vindex)
-KERNEL_TEX(uint, texture_uint, __tri_patch)
-KERNEL_TEX(float2, texture_float2, __tri_patch_uv)
+KERNEL_TEX(uint, __tri_shader)
+KERNEL_TEX(float4, __tri_vnormal)
+KERNEL_TEX(uint4, __tri_vindex)
+KERNEL_TEX(uint, __tri_patch)
+KERNEL_TEX(float2, __tri_patch_uv)

 /* curves */
-KERNEL_TEX(float4, texture_float4, __curves)
-KERNEL_TEX(float4, texture_float4, __curve_keys)
+KERNEL_TEX(float4, __curves)
+KERNEL_TEX(float4, __curve_keys)

 /* patches */
-KERNEL_TEX(uint, texture_uint, __patches)
+KERNEL_TEX(uint, __patches)

 /* attributes */
-KERNEL_TEX(uint4, texture_uint4, __attributes_map)
-KERNEL_TEX(float, texture_float, __attributes_float)
-KERNEL_TEX(float4, texture_float4, __attributes_float3)
-KERNEL_TEX(uchar4, texture_uchar4, __attributes_uchar4)
+KERNEL_TEX(uint4, __attributes_map)
+KERNEL_TEX(float, __attributes_float)
+KERNEL_TEX(float4, __attributes_float3)
+KERNEL_TEX(uchar4, __attributes_uchar4)

 /* lights */
-KERNEL_TEX(float4, texture_float4, __light_distribution)
-KERNEL_TEX(float4, texture_float4, __light_data)
-KERNEL_TEX(float2, texture_float2, __light_background_marginal_cdf)
-KERNEL_TEX(float2, texture_float2, __light_background_conditional_cdf)
+KERNEL_TEX(float4, __light_distribution)
+KERNEL_TEX(float4, __light_data)
+KERNEL_TEX(float2, __light_background_marginal_cdf)
+KERNEL_TEX(float2, __light_background_conditional_cdf)

 /* particles */
-KERNEL_TEX(float4, texture_float4, __particles)
+KERNEL_TEX(float4, __particles)

 /* shaders */
-KERNEL_TEX(uint4, texture_uint4, __svm_nodes)
-KERNEL_TEX(uint, texture_uint, __shader_flag)
-KERNEL_TEX(uint, texture_uint, __object_flag)
+KERNEL_TEX(uint4, __svm_nodes)
+KERNEL_TEX(uint, __shader_flag)
+KERNEL_TEX(uint, __object_flag)

 /* lookup tables */
-KERNEL_TEX(float, texture_float, __lookup_table)
+KERNEL_TEX(float, __lookup_table)

 /* sobol */
-KERNEL_TEX(uint, texture_uint, __sobol_directions)
+KERNEL_TEX(uint, __sobol_directions)

-#ifdef __KERNEL_CUDA__
-#  if __CUDA_ARCH__ < 300
+#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ >= 300
+/* image textures */
+KERNEL_TEX(TextureInfo, __texture_info)
+#else
 /* full-float image */
 KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_000)
 KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_008)
@ -180,12 +182,7 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_641)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_649)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_657)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_665)
-
-#  else
-/* bindless textures */
-KERNEL_TEX(uint, texture_uint, __bindless_mapping)
-#  endif  /* __CUDA_ARCH__ */
-#endif  /* __KERNEL_CUDA__ */
+#endif  /* defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 */

 #undef KERNEL_TEX
 #undef KERNEL_IMAGE_TEX
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@ -84,130 +84,16 @@ void kernel_tex_copy(KernelGlobals *kg,
 	if(0) {
 	}

-#define KERNEL_TEX(type, ttype, tname) \
+#define KERNEL_TEX(type, tname) \
 	else if(strcmp(name, #tname) == 0) { \
 		kg->tname.data = (type*)mem; \
 		kg->tname.width = width; \
 	}
-#define KERNEL_IMAGE_TEX(type, ttype, tname)
+#define KERNEL_IMAGE_TEX(type, tname)
 #include "kernel/kernel_textures.h"
-
-	else if(strstr(name, "__tex_image_float4")) {
-		texture_image_float4 *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_float4_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_float4_images.size()) {
-				kg->texture_float4_images.resize(array_index+1);
-			}
-			tex = &kg->texture_float4_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (float4*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_float")) {
-		texture_image_float *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_float_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_float_images.size()) {
-				kg->texture_float_images.resize(array_index+1);
-			}
-			tex = &kg->texture_float_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (float*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_byte4")) {
-		texture_image_uchar4 *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_byte4_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_byte4_images.size()) {
-				kg->texture_byte4_images.resize(array_index+1);
-			}
-			tex = &kg->texture_byte4_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (uchar4*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_byte")) {
-		texture_image_uchar *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_byte_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_byte_images.size()) {
-				kg->texture_byte_images.resize(array_index+1);
-			}
-			tex = &kg->texture_byte_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (uchar*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_half4")) {
-		texture_image_half4 *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_half4_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_half4_images.size()) {
-				kg->texture_half4_images.resize(array_index+1);
-			}
-			tex = &kg->texture_half4_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (half4*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else if(strstr(name, "__tex_image_half")) {
-		texture_image_half *tex = NULL;
-		int id = atoi(name + strlen("__tex_image_half_"));
-		int array_index = kernel_tex_index(id);
-
-		if(array_index >= 0) {
-			if(array_index >= kg->texture_half_images.size()) {
-				kg->texture_half_images.resize(array_index+1);
-			}
-			tex = &kg->texture_half_images[array_index];
-		}
-
-		if(tex) {
-			tex->data = (half*)mem;
-			tex->dimensions_set(width, height, depth);
-			tex->interpolation = interpolation;
-			tex->extension = extension;
-		}
-	}
-	else
+	else {
 		assert(0);
+	}
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@ -17,70 +17,478 @@
 #ifndef __KERNEL_CPU_IMAGE_H__
 #define __KERNEL_CPU_IMAGE_H__

-#ifdef __KERNEL_CPU__
-
 CCL_NAMESPACE_BEGIN

-ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y)
+template<typename T> struct TextureInterpolator  {
+#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
+	{ \
+		u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
+		u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
+		u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
+		u[3] = (1.0f / 6.0f) * t * t * t; \
+	} (void)0
+
+	static ccl_always_inline float4 read(float4 r)
+	{
+		return r;
+	}
+
+	static ccl_always_inline float4 read(uchar4 r)
+	{
+		float f = 1.0f/255.0f;
+		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
+	}
+
+	static ccl_always_inline float4 read(uchar r)
+	{
+		float f = r*(1.0f/255.0f);
+		return make_float4(f, f, f, 1.0f);
+	}
+
+	static ccl_always_inline float4 read(float r)
+	{
+		/* TODO(dingto): Optimize this, so interpolation
+		 * happens on float instead of float4 */
+		return make_float4(r, r, r, 1.0f);
+	}
+
+	static ccl_always_inline float4 read(half4 r)
+	{
+		return half4_to_float4(r);
+	}
+
+	static ccl_always_inline float4 read(half r)
+	{
+		float f = half_to_float(r);
+		return make_float4(f, f, f, 1.0f);
+	}
+
+	static ccl_always_inline int wrap_periodic(int x, int width)
+	{
+		x %= width;
+		if(x < 0)
+			x += width;
+		return x;
+	}
+
+	static ccl_always_inline int wrap_clamp(int x, int width)
+	{
+		return clamp(x, 0, width-1);
+	}
+
+	static ccl_always_inline float frac(float x, int *ix)
+	{
+		int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
+		*ix = i;
+		return x - (float)i;
+	}
+
+	static ccl_always_inline float4 interp(const TextureInfo& info, float x, float y)
+	{
+		if(UNLIKELY(!info.data))
+			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+		const T *data = (const T*)info.data;
+		int width = info.width;
+		int height = info.height;
+		int ix, iy, nix, niy;
+
+		if(info.interpolation == INTERPOLATION_CLOSEST) {
+			frac(x*(float)width, &ix);
+			frac(y*(float)height, &iy);
+			switch(info.extension) {
+				case EXTENSION_REPEAT:
+					ix = wrap_periodic(ix, width);
+					iy = wrap_periodic(iy, height);
+					break;
+				case EXTENSION_CLIP:
+					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+					}
+					ATTR_FALLTHROUGH;
+				case EXTENSION_EXTEND:
+					ix = wrap_clamp(ix, width);
+					iy = wrap_clamp(iy, height);
+					break;
+				default:
+					kernel_assert(0);
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+			}
+			return read(data[ix + iy*width]);
+		}
+		else if(info.interpolation == INTERPOLATION_LINEAR) {
+			float tx = frac(x*(float)width - 0.5f, &ix);
+			float ty = frac(y*(float)height - 0.5f, &iy);
+
+			switch(info.extension) {
+				case EXTENSION_REPEAT:
+					ix = wrap_periodic(ix, width);
+					iy = wrap_periodic(iy, height);
+
+					nix = wrap_periodic(ix+1, width);
+					niy = wrap_periodic(iy+1, height);
+					break;
+				case EXTENSION_CLIP:
+					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+					}
+					ATTR_FALLTHROUGH;
+				case EXTENSION_EXTEND:
+					nix = wrap_clamp(ix+1, width);
+					niy = wrap_clamp(iy+1, height);
+
+					ix = wrap_clamp(ix, width);
+					iy = wrap_clamp(iy, height);
+					break;
+				default:
+					kernel_assert(0);
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+			}
+
+			float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
+			r += (1.0f - ty)*tx*read(data[nix + iy*width]);
+			r += ty*(1.0f - tx)*read(data[ix + niy*width]);
+			r += ty*tx*read(data[nix + niy*width]);
+
+			return r;
+		}
+		else {
+			/* Bicubic b-spline interpolation. */
+			float tx = frac(x*(float)width - 0.5f, &ix);
+			float ty = frac(y*(float)height - 0.5f, &iy);
+			int pix, piy, nnix, nniy;
+			switch(info.extension) {
+				case EXTENSION_REPEAT:
+					ix = wrap_periodic(ix, width);
+					iy = wrap_periodic(iy, height);
+
+					pix = wrap_periodic(ix-1, width);
+					piy = wrap_periodic(iy-1, height);
+
+					nix = wrap_periodic(ix+1, width);
+					niy = wrap_periodic(iy+1, height);
+
+					nnix = wrap_periodic(ix+2, width);
+					nniy = wrap_periodic(iy+2, height);
+					break;
+				case EXTENSION_CLIP:
+					if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+						return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+					}
+					ATTR_FALLTHROUGH;
+				case EXTENSION_EXTEND:
+					pix = wrap_clamp(ix-1, width);
+					piy = wrap_clamp(iy-1, height);
+
+					nix = wrap_clamp(ix+1, width);
+					niy = wrap_clamp(iy+1, height);
+
+					nnix = wrap_clamp(ix+2, width);
+					nniy = wrap_clamp(iy+2, height);
+
+					ix = wrap_clamp(ix, width);
+					iy = wrap_clamp(iy, height);
+					break;
+				default:
+					kernel_assert(0);
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+			}
+
+			const int xc[4] = {pix, ix, nix, nnix};
+			const int yc[4] = {width * piy,
+			                   width * iy,
+			                   width * niy,
+			                   width * nniy};
+			float u[4], v[4];
+			/* Some helper macro to keep code reasonable size,
+			 * let compiler to inline all the matrix multiplications.
+			 */
+#define DATA(x, y) (read(data[xc[x] + yc[y]]))
+#define TERM(col) \
+			(v[col] * (u[0] * DATA(0, col) + \
+			           u[1] * DATA(1, col) + \
+			           u[2] * DATA(2, col) + \
+			           u[3] * DATA(3, col)))
+
+			SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+			SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+
+			/* Actual interpolation. */
+			return TERM(0) + TERM(1) + TERM(2) + TERM(3);
+
+#undef TERM
+#undef DATA
+		}
+	}
+
+	static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info, float x, float y, float z)
+	{
+		int width = info.width;
+		int height = info.height;
+		int depth = info.depth;
+		int ix, iy, iz;
+
+		frac(x*(float)width, &ix);
+		frac(y*(float)height, &iy);
+		frac(z*(float)depth, &iz);
+
+		switch(info.extension) {
+			case EXTENSION_REPEAT:
+				ix = wrap_periodic(ix, width);
+				iy = wrap_periodic(iy, height);
+				iz = wrap_periodic(iz, depth);
+				break;
+			case EXTENSION_CLIP:
+				if(x < 0.0f || y < 0.0f || z < 0.0f ||
+				   x > 1.0f || y > 1.0f || z > 1.0f)
+				{
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+				ATTR_FALLTHROUGH;
+			case EXTENSION_EXTEND:
+				ix = wrap_clamp(ix, width);
+				iy = wrap_clamp(iy, height);
+				iz = wrap_clamp(iz, depth);
+				break;
+			default:
+				kernel_assert(0);
+				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+		}
+
+		const T *data = (const T*)info.data;
+		return read(data[ix + iy*width + iz*width*height]);
+	}
+
+	static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, float x, float y, float z)
+	{
+		int width = info.width;
+		int height = info.height;
+		int depth = info.depth;
+		int ix, iy, iz;
+		int nix, niy, niz;
+
+		float tx = frac(x*(float)width - 0.5f, &ix);
+		float ty = frac(y*(float)height - 0.5f, &iy);
+		float tz = frac(z*(float)depth - 0.5f, &iz);
+
+		switch(info.extension) {
+			case EXTENSION_REPEAT:
+				ix = wrap_periodic(ix, width);
+				iy = wrap_periodic(iy, height);
+				iz = wrap_periodic(iz, depth);
+
+				nix = wrap_periodic(ix+1, width);
+				niy = wrap_periodic(iy+1, height);
+				niz = wrap_periodic(iz+1, depth);
+				break;
+			case EXTENSION_CLIP:
+				if(x < 0.0f || y < 0.0f || z < 0.0f ||
+				   x > 1.0f || y > 1.0f || z > 1.0f)
+				{
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+				ATTR_FALLTHROUGH;
+			case EXTENSION_EXTEND:
+				nix = wrap_clamp(ix+1, width);
+				niy = wrap_clamp(iy+1, height);
+				niz = wrap_clamp(iz+1, depth);
+
+				ix = wrap_clamp(ix, width);
+				iy = wrap_clamp(iy, height);
+				iz = wrap_clamp(iz, depth);
+				break;
+			default:
+				kernel_assert(0);
+				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+		}
+
+		const T *data = (const T*)info.data;
+		float4 r;
+
+		r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
+		r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
+		r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
+		r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
+
+		r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
+		r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
+		r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
+		r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
+
+		return r;
+	}
+
+	/* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
+	 * causing stack overflow issue in this function unless it is inlined.
+	 *
+	 * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
+	 * enabled.
+	 */
+#ifdef __GNUC__
+	static ccl_always_inline
+#else
+	static ccl_never_inline
+#endif
+	float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z)
+	{
+		int width = info.width;
+		int height = info.height;
+		int depth = info.depth;
+		int ix, iy, iz;
+		int nix, niy, niz;
+		/* Tricubic b-spline interpolation. */
+		const float tx = frac(x*(float)width - 0.5f, &ix);
+		const float ty = frac(y*(float)height - 0.5f, &iy);
+		const float tz = frac(z*(float)depth - 0.5f, &iz);
+		int pix, piy, piz, nnix, nniy, nniz;
+
+		switch(info.extension) {
+			case EXTENSION_REPEAT:
+				ix = wrap_periodic(ix, width);
+				iy = wrap_periodic(iy, height);
+				iz = wrap_periodic(iz, depth);
+
+				pix = wrap_periodic(ix-1, width);
+				piy = wrap_periodic(iy-1, height);
+				piz = wrap_periodic(iz-1, depth);
+
+				nix = wrap_periodic(ix+1, width);
+				niy = wrap_periodic(iy+1, height);
+				niz = wrap_periodic(iz+1, depth);
+
+				nnix = wrap_periodic(ix+2, width);
+				nniy = wrap_periodic(iy+2, height);
+				nniz = wrap_periodic(iz+2, depth);
+				break;
+			case EXTENSION_CLIP:
+				if(x < 0.0f || y < 0.0f || z < 0.0f ||
+				   x > 1.0f || y > 1.0f || z > 1.0f)
+				{
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+				ATTR_FALLTHROUGH;
+			case EXTENSION_EXTEND:
+				pix = wrap_clamp(ix-1, width);
+				piy = wrap_clamp(iy-1, height);
+				piz = wrap_clamp(iz-1, depth);
+
+				nix = wrap_clamp(ix+1, width);
+				niy = wrap_clamp(iy+1, height);
+				niz = wrap_clamp(iz+1, depth);
+
+				nnix = wrap_clamp(ix+2, width);
+				nniy = wrap_clamp(iy+2, height);
+				nniz = wrap_clamp(iz+2, depth);
+
+				ix = wrap_clamp(ix, width);
+				iy = wrap_clamp(iy, height);
+				iz = wrap_clamp(iz, depth);
+				break;
+			default:
+				kernel_assert(0);
+				return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+		}
+
+		const int xc[4] = {pix, ix, nix, nnix};
+		const int yc[4] = {width * piy,
+		                   width * iy,
+		                   width * niy,
+		                   width * nniy};
+		const int zc[4] = {width * height * piz,
+		                   width * height * iz,
+		                   width * height * niz,
+		                   width * height * nniz};
+		float u[4], v[4], w[4];
+
+		/* Some helper macro to keep code reasonable size,
+		 * let compiler to inline all the matrix multiplications.
+		 */
+#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
+#define COL_TERM(col, row) \
+		(v[col] * (u[0] * DATA(0, col, row) + \
+		           u[1] * DATA(1, col, row) + \
+		           u[2] * DATA(2, col, row) + \
+		           u[3] * DATA(3, col, row)))
+#define ROW_TERM(row) \
+		(w[row] * (COL_TERM(0, row) + \
+		           COL_TERM(1, row) + \
+		           COL_TERM(2, row) + \
+		           COL_TERM(3, row)))
+
+		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+		SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+
+		/* Actual interpolation. */
+		const T *data = (const T*)info.data;
+		return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+
+#undef COL_TERM
+#undef ROW_TERM
+#undef DATA
+	}
+
+	static ccl_always_inline float4 interp_3d(const TextureInfo& info,
+	                                          float x, float y, float z,
+	                                          InterpolationType interp)
+	{
+		if(UNLIKELY(!info.data))
+			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+		switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) {
+			case INTERPOLATION_CLOSEST:
+				return interp_3d_closest(info, x, y, z);
+			case INTERPOLATION_LINEAR:
+				return interp_3d_linear(info, x, y, z);
+			default:
+				return interp_3d_tricubic(info, x, y, z);
+		}
+	}
+#undef SET_CUBIC_SPLINE_WEIGHTS
+};
+
+ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
 {
-	switch(kernel_tex_type(tex)) {
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
+
+	switch(kernel_tex_type(id)) {
 		case IMAGE_DATA_TYPE_HALF:
-			return kg->texture_half_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<half>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_BYTE:
-			return kg->texture_byte_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<uchar>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_FLOAT:
-			return kg->texture_float_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<float>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_HALF4:
-			return kg->texture_half4_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<half4>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_BYTE4:
-			return kg->texture_byte4_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<uchar4>::interp(info, x, y);
 		case IMAGE_DATA_TYPE_FLOAT4:
 		default:
-			return kg->texture_float4_images[kernel_tex_index(tex)].interp(x, y);
+			return TextureInterpolator<float4>::interp(info, x, y);
 	}
 }

-ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
 {
-	switch(kernel_tex_type(tex)) {
-		case IMAGE_DATA_TYPE_HALF:
-			return kg->texture_half_images[kernel_tex_index(tex)].interp_3d(x, y, z);
-		case IMAGE_DATA_TYPE_BYTE:
-			return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d(x, y, z);
-		case IMAGE_DATA_TYPE_FLOAT:
-			return kg->texture_float_images[kernel_tex_index(tex)].interp_3d(x, y, z);
-		case IMAGE_DATA_TYPE_HALF4:
-			return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
-		case IMAGE_DATA_TYPE_BYTE4:
-			return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
-		case IMAGE_DATA_TYPE_FLOAT4:
-		default:
-			return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
-	}
-}
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);

-ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
-{
-	switch(kernel_tex_type(tex)) {
+	switch(kernel_tex_type(id)) {
 		case IMAGE_DATA_TYPE_HALF:
-			return kg->texture_half_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_BYTE:
-			return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_FLOAT:
-			return kg->texture_float_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_HALF4:
-			return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_BYTE4:
-			return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
 		case IMAGE_DATA_TYPE_FLOAT4:
 		default:
-			return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+			return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
 	}
 }

 CCL_NAMESPACE_END

-#endif  // __KERNEL_CPU__
-
-
 #endif // __KERNEL_CPU_IMAGE_H__
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@ -26,6 +26,7 @@
 #include "kernel/kernel_math.h"
 #include "kernel/kernel_types.h"
 #include "kernel/kernel_globals.h"
+#include "kernel/kernels/cuda/kernel_cuda_image.h"
 #include "kernel/kernel_film.h"
 #include "kernel/kernel_path.h"
 #include "kernel/kernel_path_branched.h"
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@ -0,0 +1,310 @@
+/*
+ * Copyright 2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if __CUDA_ARCH__ >= 300
+
+/* Kepler */
+
+/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
+ccl_device float cubic_w0(float a)
+{
+	return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f);
+}
+
+ccl_device float cubic_w1(float a)
+{
+	return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f);
+}
+
+ccl_device float cubic_w2(float a)
+{
+	return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f);
+}
+
+ccl_device float cubic_w3(float a)
+{
+	return (1.0f/6.0f)*(a*a*a);
+}
+
+/* g0 and g1 are the two amplitude functions. */
+ccl_device float cubic_g0(float a)
+{
+	return cubic_w0(a) + cubic_w1(a);
+}
+
+ccl_device float cubic_g1(float a)
+{
+	return cubic_w2(a) + cubic_w3(a);
+}
+
+/* h0 and h1 are the two offset functions */
+ccl_device float cubic_h0(float a)
+{
+	/* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+	return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
+}
+
+ccl_device float cubic_h1(float a)
+{
+	return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
+}
+
+/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
+template<typename T>
+ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y)
+{
+	x = (x * info.width) - 0.5f;
+	y = (y * info.height) - 0.5f;
+
+	float px = floor(x);
+	float py = floor(y);
+	float fx = x - px;
+	float fy = y - py;
+
+	float g0x = cubic_g0(fx);
+	float g1x = cubic_g1(fx);
+	float x0 = (px + cubic_h0(fx)) / info.width;
+	float x1 = (px + cubic_h1(fx)) / info.width;
+	float y0 = (py + cubic_h0(fy)) / info.height;
+	float y1 = (py + cubic_h1(fy)) / info.height;
+
+	return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) +
+	                       g1x * tex2D<T>(tex, x1, y0)) +
+	       cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) +
+	                       g1x * tex2D<T>(tex, x1, y1));
+}
+
+/* Fast tricubic texture lookup using 8 bilinear lookups. */
+template<typename T>
+ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z)
+{
+	x = (x * info.width) - 0.5f;
+	y = (y * info.height) - 0.5f;
+	z = (z * info.depth) - 0.5f;
+
+	float px = floor(x);
+	float py = floor(y);
+	float pz = floor(z);
+	float fx = x - px;
+	float fy = y - py;
+	float fz = z - pz;
+
+	float g0x = cubic_g0(fx);
+	float g1x = cubic_g1(fx);
+	float g0y = cubic_g0(fy);
+	float g1y = cubic_g1(fy);
+	float g0z = cubic_g0(fz);
+	float g1z = cubic_g1(fz);
+
+	float x0 = (px + cubic_h0(fx)) / info.width;
+	float x1 = (px + cubic_h1(fx)) / info.width;
+	float y0 = (py + cubic_h0(fy)) / info.height;
+	float y1 = (py + cubic_h1(fy)) / info.height;
+	float z0 = (pz + cubic_h0(fz)) / info.depth;
+	float z1 = (pz + cubic_h1(fz)) / info.depth;
+
+	return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) +
+	                     g1x * tex3D<T>(tex, x1, y0, z0)) +
+	              g1y * (g0x * tex3D<T>(tex, x0, y1, z0) +
+	                     g1x * tex3D<T>(tex, x1, y1, z0))) +
+	       g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) +
+	                     g1x * tex3D<T>(tex, x1, y0, z1)) +
+	              g1y * (g0x * tex3D<T>(tex, x0, y1, z1) +
+	                     g1x * tex3D<T>(tex, x1, y1, z1)));
+}
+
+ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
+{
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
+	CUtexObject tex = (CUtexObject)info.data;
+
+	/* float4, byte4 and half4 */
+	const int texture_type = kernel_tex_type(id);
+	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
+	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+	   texture_type == IMAGE_DATA_TYPE_HALF4)
+	{
+		if(info.interpolation == INTERPOLATION_CUBIC) {
+			return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
+		}
+		else {
+			return tex2D<float4>(tex, x, y);
+		}
+	}
+	/* float, byte and half */
+	else {
+		float f;
+
+		if(info.interpolation == INTERPOLATION_CUBIC) {
+			f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
+		}
+		else {
+			f = tex2D<float>(tex, x, y);
+		}
+
+		return make_float4(f, f, f, 1.0f);
+	}
+}
+
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+{
+	const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
+	CUtexObject tex = (CUtexObject)info.data;
+	uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp;
+
+	const int texture_type = kernel_tex_type(id);
+	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
+	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+	   texture_type == IMAGE_DATA_TYPE_HALF4)
+	{
+		if(interpolation == INTERPOLATION_CUBIC) {
+			return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
+		}
+		else {
+			return tex3D<float4>(tex, x, y, z);
+		}
+	}
+	else {
+		float f;
+
+		if(interpolation == INTERPOLATION_CUBIC) {
+			f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
+		}
+		else {
+			f = tex3D<float>(tex, x, y, z);
+		}
+
+		return make_float4(f, f, f, 1.0f);
+	}
+}
+
+#else
+
+/* Fermi */
+
+ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
+{
+	float4 r;
+	switch(id) {
+		case 0: r = tex2D(__tex_image_float4_000, x, y); break;
+		case 8: r = tex2D(__tex_image_float4_008, x, y); break;
+		case 16: r = tex2D(__tex_image_float4_016, x, y); break;
+		case 24: r = tex2D(__tex_image_float4_024, x, y); break;
+		case 32: r = tex2D(__tex_image_float4_032, x, y); break;
+		case 1: r = tex2D(__tex_image_byte4_001, x, y); break;
+		case 9: r = tex2D(__tex_image_byte4_009, x, y); break;
+		case 17: r = tex2D(__tex_image_byte4_017, x, y); break;
+		case 25: r = tex2D(__tex_image_byte4_025, x, y); break;
+		case 33: r = tex2D(__tex_image_byte4_033, x, y); break;
+		case 41: r = tex2D(__tex_image_byte4_041, x, y); break;
+		case 49: r = tex2D(__tex_image_byte4_049, x, y); break;
+		case 57: r = tex2D(__tex_image_byte4_057, x, y); break;
+		case 65: r = tex2D(__tex_image_byte4_065, x, y); break;
+		case 73: r = tex2D(__tex_image_byte4_073, x, y); break;
+		case 81: r = tex2D(__tex_image_byte4_081, x, y); break;
+		case 89: r = tex2D(__tex_image_byte4_089, x, y); break;
+		case 97: r = tex2D(__tex_image_byte4_097, x, y); break;
+		case 105: r = tex2D(__tex_image_byte4_105, x, y); break;
+		case 113: r = tex2D(__tex_image_byte4_113, x, y); break;
+		case 121: r = tex2D(__tex_image_byte4_121, x, y); break;
+		case 129: r = tex2D(__tex_image_byte4_129, x, y); break;
+		case 137: r = tex2D(__tex_image_byte4_137, x, y); break;
+		case 145: r = tex2D(__tex_image_byte4_145, x, y); break;
+		case 153: r = tex2D(__tex_image_byte4_153, x, y); break;
+		case 161: r = tex2D(__tex_image_byte4_161, x, y); break;
+		case 169: r = tex2D(__tex_image_byte4_169, x, y); break;
+		case 177: r = tex2D(__tex_image_byte4_177, x, y); break;
+		case 185: r = tex2D(__tex_image_byte4_185, x, y); break;
+		case 193: r = tex2D(__tex_image_byte4_193, x, y); break;
+		case 201: r = tex2D(__tex_image_byte4_201, x, y); break;
+		case 209: r = tex2D(__tex_image_byte4_209, x, y); break;
+		case 217: r = tex2D(__tex_image_byte4_217, x, y); break;
+		case 225: r = tex2D(__tex_image_byte4_225, x, y); break;
+		case 233: r = tex2D(__tex_image_byte4_233, x, y); break;
+		case 241: r = tex2D(__tex_image_byte4_241, x, y); break;
+		case 249: r = tex2D(__tex_image_byte4_249, x, y); break;
+		case 257: r = tex2D(__tex_image_byte4_257, x, y); break;
+		case 265: r = tex2D(__tex_image_byte4_265, x, y); break;
+		case 273: r = tex2D(__tex_image_byte4_273, x, y); break;
+		case 281: r = tex2D(__tex_image_byte4_281, x, y); break;
+		case 289: r = tex2D(__tex_image_byte4_289, x, y); break;
+		case 297: r = tex2D(__tex_image_byte4_297, x, y); break;
+		case 305: r = tex2D(__tex_image_byte4_305, x, y); break;
+		case 313: r = tex2D(__tex_image_byte4_313, x, y); break;
+		case 321: r = tex2D(__tex_image_byte4_321, x, y); break;
+		case 329: r = tex2D(__tex_image_byte4_329, x, y); break;
+		case 337: r = tex2D(__tex_image_byte4_337, x, y); break;
+		case 345: r = tex2D(__tex_image_byte4_345, x, y); break;
+		case 353: r = tex2D(__tex_image_byte4_353, x, y); break;
+		case 361: r = tex2D(__tex_image_byte4_361, x, y); break;
+		case 369: r = tex2D(__tex_image_byte4_369, x, y); break;
+		case 377: r = tex2D(__tex_image_byte4_377, x, y); break;
+		case 385: r = tex2D(__tex_image_byte4_385, x, y); break;
+		case 393: r = tex2D(__tex_image_byte4_393, x, y); break;
+		case 401: r = tex2D(__tex_image_byte4_401, x, y); break;
+		case 409: r = tex2D(__tex_image_byte4_409, x, y); break;
+		case 417: r = tex2D(__tex_image_byte4_417, x, y); break;
+		case 425: r = tex2D(__tex_image_byte4_425, x, y); break;
+		case 433: r = tex2D(__tex_image_byte4_433, x, y); break;
+		case 441: r = tex2D(__tex_image_byte4_441, x, y); break;
+		case 449: r = tex2D(__tex_image_byte4_449, x, y); break;
+		case 457: r = tex2D(__tex_image_byte4_457, x, y); break;
+		case 465: r = tex2D(__tex_image_byte4_465, x, y); break;
+		case 473: r = tex2D(__tex_image_byte4_473, x, y); break;
+		case 481: r = tex2D(__tex_image_byte4_481, x, y); break;
+		case 489: r = tex2D(__tex_image_byte4_489, x, y); break;
+		case 497: r = tex2D(__tex_image_byte4_497, x, y); break;
+		case 505: r = tex2D(__tex_image_byte4_505, x, y); break;
+		case 513: r = tex2D(__tex_image_byte4_513, x, y); break;
+		case 521: r = tex2D(__tex_image_byte4_521, x, y); break;
+		case 529: r = tex2D(__tex_image_byte4_529, x, y); break;
+		case 537: r = tex2D(__tex_image_byte4_537, x, y); break;
+		case 545: r = tex2D(__tex_image_byte4_545, x, y); break;
+		case 553: r = tex2D(__tex_image_byte4_553, x, y); break;
+		case 561: r = tex2D(__tex_image_byte4_561, x, y); break;
+		case 569: r = tex2D(__tex_image_byte4_569, x, y); break;
+		case 577: r = tex2D(__tex_image_byte4_577, x, y); break;
+		case 585: r = tex2D(__tex_image_byte4_585, x, y); break;
+		case 593: r = tex2D(__tex_image_byte4_593, x, y); break;
+		case 601: r = tex2D(__tex_image_byte4_601, x, y); break;
+		case 609: r = tex2D(__tex_image_byte4_609, x, y); break;
+		case 617: r = tex2D(__tex_image_byte4_617, x, y); break;
+		case 625: r = tex2D(__tex_image_byte4_625, x, y); break;
+		case 633: r = tex2D(__tex_image_byte4_633, x, y); break;
+		case 641: r = tex2D(__tex_image_byte4_641, x, y); break;
+		case 649: r = tex2D(__tex_image_byte4_649, x, y); break;
+		case 657: r = tex2D(__tex_image_byte4_657, x, y); break;
+		case 665: r = tex2D(__tex_image_byte4_665, x, y); break;
+		default: r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+	}
+	return r;
+}
+
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
+{
+	float4 r;
+	switch(id) {
+		case 0: r = tex3D(__tex_image_float4_3d_000, x, y, z); break;
+		case 8: r = tex3D(__tex_image_float4_3d_008, x, y, z); break;
+		case 16: r = tex3D(__tex_image_float4_3d_016, x, y, z); break;
+		case 24: r = tex3D(__tex_image_float4_3d_024, x, y, z); break;
+		case 32: r = tex3D(__tex_image_float4_3d_032, x, y, z); break;
+	}
+	return r;
+}
+
+#endif
+
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@ -20,7 +20,7 @@
 #include "kernel/kernel_math.h"
 #include "kernel/kernel_types.h"
 #include "kernel/kernel_globals.h"
-#include "kernel/kernel_image_opencl.h"
+#include "kernel/kernels/opencl/kernel_opencl_image.h"

 #include "kernel/kernel_film.h"

--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@ -0,0 +1,341 @@
+/*
+ * Copyright 2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* For OpenCL we do manual lookup and interpolation. */
+
+ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) {
+	const uint tex_offset = id
+#define KERNEL_TEX(type, name) + 1
+#include "kernel/kernel_textures.h"
+	;
+
+	return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset];
+}
+
+#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)]
+
+ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
+{
+	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+	const int texture_type = kernel_tex_type(id);
+
+	/* Float4 */
+	if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
+		return tex_fetch(float4, info, offset);
+	}
+	/* Byte4 */
+	else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
+		uchar4 r = tex_fetch(uchar4, info, offset);
+		float f = 1.0f/255.0f;
+		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
+	}
+	/* Float */
+	else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
+		float f = tex_fetch(float, info, offset);
+		return make_float4(f, f, f, 1.0f);
+	}
+	/* Byte */
+	else {
+		uchar r = tex_fetch(uchar, info, offset);
+		float f = r * (1.0f/255.0f);
+		return make_float4(f, f, f, 1.0f);
+	}
+}
+
+ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
+{
+	x %= width;
+	if(x < 0)
+		x += width;
+	return x;
+}
+
+ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
+{
+	return clamp(x, 0, width-1);
+}
+
+ccl_device_inline float svm_image_texture_frac(float x, int *ix)
+{
+	int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
+	*ix = i;
+	return x - (float)i;
+}
+
+#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
+	{ \
+		u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
+		u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
+		u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
+		u[3] = (1.0f / 6.0f) * t * t * t; \
+	} (void)0
+
+ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
+{
+	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+	uint width = info->width;
+	uint height = info->height;
+	uint interpolation = info->interpolation;
+	uint extension = info->extension;
+
+	/* Actual sampling. */
+	if(interpolation == INTERPOLATION_CLOSEST) {
+		int ix, iy;
+		svm_image_texture_frac(x*width, &ix);
+		svm_image_texture_frac(y*height, &iy);
+
+		if(extension == EXTENSION_REPEAT) {
+			ix = svm_image_texture_wrap_periodic(ix, width);
+			iy = svm_image_texture_wrap_periodic(iy, height);
+		}
+		else {
+			if(extension == EXTENSION_CLIP) {
+				if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+			}
+			/* Fall through. */
+			/* EXTENSION_EXTEND */
+			ix = svm_image_texture_wrap_clamp(ix, width);
+			iy = svm_image_texture_wrap_clamp(iy, height);
+		}
+
+		return svm_image_texture_read(kg, id, ix + iy*width);
+	}
+	else {
+		/* Bilinear or bicubic interpolation. */
+		int ix, iy, nix, niy;
+		float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
+		float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
+
+		if(extension == EXTENSION_REPEAT) {
+			ix = svm_image_texture_wrap_periodic(ix, width);
+			iy = svm_image_texture_wrap_periodic(iy, height);
+			nix = svm_image_texture_wrap_periodic(ix+1, width);
+			niy = svm_image_texture_wrap_periodic(iy+1, height);
+		}
+		else {
+			if(extension == EXTENSION_CLIP) {
+				if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+			}
+			ix = svm_image_texture_wrap_clamp(ix, width);
+			iy = svm_image_texture_wrap_clamp(iy, height);
+			nix = svm_image_texture_wrap_clamp(ix+1, width);
+			niy = svm_image_texture_wrap_clamp(iy+1, height);
+		}
+
+		if(interpolation == INTERPOLATION_LINEAR) {
+			/* Bilinear interpolation. */
+			float4 r;
+			r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width);
+			r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width);
+			r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width);
+			r += ty*tx*svm_image_texture_read(kg, id, nix + niy*width);
+			return r;
+		}
+
+		/* Bicubic interpolation. */
+		int pix, piy, nnix, nniy;
+		if(extension == EXTENSION_REPEAT) {
+			pix = svm_image_texture_wrap_periodic(ix-1, width);
+			piy = svm_image_texture_wrap_periodic(iy-1, height);
+			nnix = svm_image_texture_wrap_periodic(ix+2, width);
+			nniy = svm_image_texture_wrap_periodic(iy+2, height);
+		}
+		else {
+			pix = svm_image_texture_wrap_clamp(ix-1, width);
+			piy = svm_image_texture_wrap_clamp(iy-1, height);
+			nnix = svm_image_texture_wrap_clamp(ix+2, width);
+			nniy = svm_image_texture_wrap_clamp(iy+2, height);
+		}
+
+		const int xc[4] = {pix, ix, nix, nnix};
+		const int yc[4] = {width * piy,
+		                   width * iy,
+		                   width * niy,
+		                   width * nniy};
+		float u[4], v[4];
+		/* Some helper macro to keep code reasonable size,
+		 * let compiler to inline all the matrix multiplications.
+		 */
+#define DATA(x, y) (svm_image_texture_read(kg, id, xc[x] + yc[y]))
+#define TERM(col) \
+		(v[col] * (u[0] * DATA(0, col) + \
+		           u[1] * DATA(1, col) + \
+		           u[2] * DATA(2, col) + \
+		           u[3] * DATA(3, col)))
+
+		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+
+		/* Actual interpolation. */
+		return TERM(0) + TERM(1) + TERM(2) + TERM(3);
+#undef TERM
+#undef DATA
+	}
+}
+
+
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
+{
+	const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+	uint width = info->width;
+	uint height = info->height;
+	uint depth = info->depth;
+	uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp;
+	uint extension = info->extension;
+
+	/* Actual sampling. */
+	if(interpolation == INTERPOLATION_CLOSEST) {
+		int ix, iy, iz;
+		svm_image_texture_frac(x*width, &ix);
+		svm_image_texture_frac(y*height, &iy);
+		svm_image_texture_frac(z*depth, &iz);
+
+		if(extension == EXTENSION_REPEAT) {
+			ix = svm_image_texture_wrap_periodic(ix, width);
+			iy = svm_image_texture_wrap_periodic(iy, height);
+			iz = svm_image_texture_wrap_periodic(iz, depth);
+		}
+		else {
+			if(extension == EXTENSION_CLIP) {
+				if(x < 0.0f || y < 0.0f || z < 0.0f ||
+				   x > 1.0f || y > 1.0f || z > 1.0f)
+				{
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+			}
+			/* Fall through. */
+			/* EXTENSION_EXTEND */
+			ix = svm_image_texture_wrap_clamp(ix, width);
+			iy = svm_image_texture_wrap_clamp(iy, height);
+			iz = svm_image_texture_wrap_clamp(iz, depth);
+		}
+		return svm_image_texture_read(kg, id, ix + iy*width + iz*width*height);
+	}
+	else {
+		/* Bilinear or bicubic interpolation. */
+		int ix, iy, iz, nix, niy, niz;
+		float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix);
+		float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy);
+		float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz);
+
+		if(extension == EXTENSION_REPEAT) {
+			ix = svm_image_texture_wrap_periodic(ix, width);
+			iy = svm_image_texture_wrap_periodic(iy, height);
+			iz = svm_image_texture_wrap_periodic(iz, depth);
+
+			nix = svm_image_texture_wrap_periodic(ix+1, width);
+			niy = svm_image_texture_wrap_periodic(iy+1, height);
+			niz = svm_image_texture_wrap_periodic(iz+1, depth);
+		}
+		else {
+			if(extension == EXTENSION_CLIP) {
+				if(x < 0.0f || y < 0.0f || z < 0.0f ||
+				   x > 1.0f || y > 1.0f || z > 1.0f)
+				{
+					return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+				}
+			}
+			/* Fall through. */
+			/*  EXTENSION_EXTEND */
+			nix = svm_image_texture_wrap_clamp(ix+1, width);
+			niy = svm_image_texture_wrap_clamp(iy+1, height);
+			niz = svm_image_texture_wrap_clamp(iz+1, depth);
+
+			ix = svm_image_texture_wrap_clamp(ix, width);
+			iy = svm_image_texture_wrap_clamp(iy, height);
+			iz = svm_image_texture_wrap_clamp(iz, depth);
+		}
+
+		if(interpolation == INTERPOLATION_LINEAR) {
+			/* Bilinear interpolation. */
+			float4 r;
+			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width + iz*width*height);
+			r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width + iz*width*height);
+			r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width + iz*width*height);
+			r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, nix + niy*width + iz*width*height);
+
+			r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width + niz*width*height);
+			r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width + niz*width*height);
+			r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width + niz*width*height);
+			r += tz*ty*tx*svm_image_texture_read(kg, id, nix + niy*width + niz*width*height);
+			return r;
+		}
+
+		/* Bicubic interpolation. */
+		int pix, piy, piz, nnix, nniy, nniz;
+		if(extension == EXTENSION_REPEAT) {
+			pix = svm_image_texture_wrap_periodic(ix-1, width);
+			piy = svm_image_texture_wrap_periodic(iy-1, height);
+			piz = svm_image_texture_wrap_periodic(iz-1, depth);
+			nnix = svm_image_texture_wrap_periodic(ix+2, width);
+			nniy = svm_image_texture_wrap_periodic(iy+2, height);
+			nniz = svm_image_texture_wrap_periodic(iz+2, depth);
+		}
+		else {
+			pix = svm_image_texture_wrap_clamp(ix-1, width);
+			piy = svm_image_texture_wrap_clamp(iy-1, height);
+			piz = svm_image_texture_wrap_clamp(iz-1, depth);
+			nnix = svm_image_texture_wrap_clamp(ix+2, width);
+			nniy = svm_image_texture_wrap_clamp(iy+2, height);
+			nniz = svm_image_texture_wrap_clamp(iz+2, depth);
+		}
+
+		const int xc[4] = {pix, ix, nix, nnix};
+		const int yc[4] = {width * piy,
+		                   width * iy,
+		                   width * niy,
+		                   width * nniy};
+		const int zc[4] = {width * height * piz,
+		                   width * height * iz,
+		                   width * height * niz,
+		                   width * height * nniz};
+		float u[4], v[4], w[4];
+
+		/* Some helper macro to keep code reasonable size,
+		 * let compiler to inline all the matrix multiplications.
+		 */
+#define DATA(x, y, z) (svm_image_texture_read(kg, id, xc[x] + yc[y] + zc[z]))
+#define COL_TERM(col, row) \
+		(v[col] * (u[0] * DATA(0, col, row) + \
+		           u[1] * DATA(1, col, row) + \
+		           u[2] * DATA(2, col, row) + \
+		           u[3] * DATA(3, col, row)))
+#define ROW_TERM(row) \
+		(w[row] * (COL_TERM(0, row) + \
+		           COL_TERM(1, row) + \
+		           COL_TERM(2, row) + \
+		           COL_TERM(3, row)))
+
+		SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+		SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+		SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+
+		/* Actual interpolation. */
+		return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+
+#undef COL_TERM
+#undef ROW_TERM
+#undef DATA
+	}
+}
+
+#undef SET_CUBIC_SPLINE_WEIGHTS
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@ -962,7 +962,7 @@ bool OSLRenderServices::texture(ustring filename,

 	if(filename.length() && filename[0] == '@') {
 		int slot = atoi(filename.c_str() + 1);
-		float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t);
+		float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t);

 		result[0] = rgba[0];
 		if(nchannels > 1)
@ -1043,7 +1043,7 @@ bool OSLRenderServices::texture3d(ustring filename,
 	bool status;
 	if(filename.length() && filename[0] == '@') {
 		int slot = atoi(filename.c_str() + 1);
-		float4 rgba = kernel_tex_image_interp_3d(slot, P.x, P.y, P.z);
+		float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE);

 		result[0] = rgba[0];
 		if(nchannels > 1)
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@ -29,7 +29,10 @@
 #endif

 #ifdef __KERNEL_OPENCL__
-#  include "kernel/kernel_image_opencl.h"
+#  include "kernel/kernels/opencl/kernel_opencl_image.h"
+#endif
+#ifdef __KERNEL_CUDA__
+#  include "kernel/kernels/cuda/kernel_cuda_image.h"
 #endif
 #ifdef __KERNEL_CPU__
 #  include "kernel/kernels/cpu/kernel_cpu_image.h"
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@ -18,135 +18,7 @@ CCL_NAMESPACE_BEGIN

 ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
 {
-#ifdef __KERNEL_CPU__
-	float4 r = kernel_tex_image_interp(id, x, y);
-#elif defined(__KERNEL_OPENCL__)
 	float4 r = kernel_tex_image_interp(kg, id, x, y);
-#else
-	float4 r;
-
-#  if __CUDA_ARCH__ < 300
-	/* not particularly proud of this massive switch, what are the
-	 * alternatives?
-	 * - use a single big 1D texture, and do our own lookup/filtering
-	 * - group by size and use a 3d texture, performance impact
-	 * - group into larger texture with some padding for correct lerp
-	 *
-	 * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler),
-	 * and we cannot use all since we still need some for other storage */
-
-	switch(id) {
-		case 0: r = kernel_tex_image_interp(__tex_image_float4_000, x, y); break;
-		case 8: r = kernel_tex_image_interp(__tex_image_float4_008, x, y); break;
-		case 16: r = kernel_tex_image_interp(__tex_image_float4_016, x, y); break;
-		case 24: r = kernel_tex_image_interp(__tex_image_float4_024, x, y); break;
-		case 32: r = kernel_tex_image_interp(__tex_image_float4_032, x, y); break;
-		case 1: r = kernel_tex_image_interp(__tex_image_byte4_001, x, y); break;
-		case 9: r = kernel_tex_image_interp(__tex_image_byte4_009, x, y); break;
-		case 17: r = kernel_tex_image_interp(__tex_image_byte4_017, x, y); break;
-		case 25: r = kernel_tex_image_interp(__tex_image_byte4_025, x, y); break;
-		case 33: r = kernel_tex_image_interp(__tex_image_byte4_033, x, y); break;
-		case 41: r = kernel_tex_image_interp(__tex_image_byte4_041, x, y); break;
-		case 49: r = kernel_tex_image_interp(__tex_image_byte4_049, x, y); break;
-		case 57: r = kernel_tex_image_interp(__tex_image_byte4_057, x, y); break;
-		case 65: r = kernel_tex_image_interp(__tex_image_byte4_065, x, y); break;
-		case 73: r = kernel_tex_image_interp(__tex_image_byte4_073, x, y); break;
-		case 81: r = kernel_tex_image_interp(__tex_image_byte4_081, x, y); break;
-		case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break;
-		case 97: r = kernel_tex_image_interp(__tex_image_byte4_097, x, y); break;
-		case 105: r = kernel_tex_image_interp(__tex_image_byte4_105, x, y); break;
-		case 113: r = kernel_tex_image_interp(__tex_image_byte4_113, x, y); break;
-		case 121: r = kernel_tex_image_interp(__tex_image_byte4_121, x, y); break;
-		case 129: r = kernel_tex_image_interp(__tex_image_byte4_129, x, y); break;
-		case 137: r = kernel_tex_image_interp(__tex_image_byte4_137, x, y); break;
-		case 145: r = kernel_tex_image_interp(__tex_image_byte4_145, x, y); break;
-		case 153: r = kernel_tex_image_interp(__tex_image_byte4_153, x, y); break;
-		case 161: r = kernel_tex_image_interp(__tex_image_byte4_161, x, y); break;
-		case 169: r = kernel_tex_image_interp(__tex_image_byte4_169, x, y); break;
-		case 177: r = kernel_tex_image_interp(__tex_image_byte4_177, x, y); break;
-		case 185: r = kernel_tex_image_interp(__tex_image_byte4_185, x, y); break;
-		case 193: r = kernel_tex_image_interp(__tex_image_byte4_193, x, y); break;
-		case 201: r = kernel_tex_image_interp(__tex_image_byte4_201, x, y); break;
-		case 209: r = kernel_tex_image_interp(__tex_image_byte4_209, x, y); break;
-		case 217: r = kernel_tex_image_interp(__tex_image_byte4_217, x, y); break;
-		case 225: r = kernel_tex_image_interp(__tex_image_byte4_225, x, y); break;
-		case 233: r = kernel_tex_image_interp(__tex_image_byte4_233, x, y); break;
-		case 241: r = kernel_tex_image_interp(__tex_image_byte4_241, x, y); break;
-		case 249: r = kernel_tex_image_interp(__tex_image_byte4_249, x, y); break;
-		case 257: r = kernel_tex_image_interp(__tex_image_byte4_257, x, y); break;
-		case 265: r = kernel_tex_image_interp(__tex_image_byte4_265, x, y); break;
-		case 273: r = kernel_tex_image_interp(__tex_image_byte4_273, x, y); break;
-		case 281: r = kernel_tex_image_interp(__tex_image_byte4_281, x, y); break;
-		case 289: r = kernel_tex_image_interp(__tex_image_byte4_289, x, y); break;
-		case 297: r = kernel_tex_image_interp(__tex_image_byte4_297, x, y); break;
-		case 305: r = kernel_tex_image_interp(__tex_image_byte4_305, x, y); break;
-		case 313: r = kernel_tex_image_interp(__tex_image_byte4_313, x, y); break;
-		case 321: r = kernel_tex_image_interp(__tex_image_byte4_321, x, y); break;
-		case 329: r = kernel_tex_image_interp(__tex_image_byte4_329, x, y); break;
-		case 337: r = kernel_tex_image_interp(__tex_image_byte4_337, x, y); break;
-		case 345: r = kernel_tex_image_interp(__tex_image_byte4_345, x, y); break;
-		case 353: r = kernel_tex_image_interp(__tex_image_byte4_353, x, y); break;
-		case 361: r = kernel_tex_image_interp(__tex_image_byte4_361, x, y); break;
-		case 369: r = kernel_tex_image_interp(__tex_image_byte4_369, x, y); break;
-		case 377: r = kernel_tex_image_interp(__tex_image_byte4_377, x, y); break;
-		case 385: r = kernel_tex_image_interp(__tex_image_byte4_385, x, y); break;
-		case 393: r = kernel_tex_image_interp(__tex_image_byte4_393, x, y); break;
-		case 401: r = kernel_tex_image_interp(__tex_image_byte4_401, x, y); break;
-		case 409: r = kernel_tex_image_interp(__tex_image_byte4_409, x, y); break;
-		case 417: r = kernel_tex_image_interp(__tex_image_byte4_417, x, y); break;
-		case 425: r = kernel_tex_image_interp(__tex_image_byte4_425, x, y); break;
-		case 433: r = kernel_tex_image_interp(__tex_image_byte4_433, x, y); break;
-		case 441: r = kernel_tex_image_interp(__tex_image_byte4_441, x, y); break;
-		case 449: r = kernel_tex_image_interp(__tex_image_byte4_449, x, y); break;
-		case 457: r = kernel_tex_image_interp(__tex_image_byte4_457, x, y); break;
-		case 465: r = kernel_tex_image_interp(__tex_image_byte4_465, x, y); break;
-		case 473: r = kernel_tex_image_interp(__tex_image_byte4_473, x, y); break;
-		case 481: r = kernel_tex_image_interp(__tex_image_byte4_481, x, y); break;
-		case 489: r = kernel_tex_image_interp(__tex_image_byte4_489, x, y); break;
-		case 497: r = kernel_tex_image_interp(__tex_image_byte4_497, x, y); break;
-		case 505: r = kernel_tex_image_interp(__tex_image_byte4_505, x, y); break;
-		case 513: r = kernel_tex_image_interp(__tex_image_byte4_513, x, y); break;
-		case 521: r = kernel_tex_image_interp(__tex_image_byte4_521, x, y); break;
-		case 529: r = kernel_tex_image_interp(__tex_image_byte4_529, x, y); break;
-		case 537: r = kernel_tex_image_interp(__tex_image_byte4_537, x, y); break;
-		case 545: r = kernel_tex_image_interp(__tex_image_byte4_545, x, y); break;
-		case 553: r = kernel_tex_image_interp(__tex_image_byte4_553, x, y); break;
-		case 561: r = kernel_tex_image_interp(__tex_image_byte4_561, x, y); break;
-		case 569: r = kernel_tex_image_interp(__tex_image_byte4_569, x, y); break;
-		case 577: r = kernel_tex_image_interp(__tex_image_byte4_577, x, y); break;
-		case 585: r = kernel_tex_image_interp(__tex_image_byte4_585, x, y); break;
-		case 593: r = kernel_tex_image_interp(__tex_image_byte4_593, x, y); break;
-		case 601: r = kernel_tex_image_interp(__tex_image_byte4_601, x, y); break;
-		case 609: r = kernel_tex_image_interp(__tex_image_byte4_609, x, y); break;
-		case 617: r = kernel_tex_image_interp(__tex_image_byte4_617, x, y); break;
-		case 625: r = kernel_tex_image_interp(__tex_image_byte4_625, x, y); break;
-		case 633: r = kernel_tex_image_interp(__tex_image_byte4_633, x, y); break;
-		case 641: r = kernel_tex_image_interp(__tex_image_byte4_641, x, y); break;
-		case 649: r = kernel_tex_image_interp(__tex_image_byte4_649, x, y); break;
-		case 657: r = kernel_tex_image_interp(__tex_image_byte4_657, x, y); break;
-		case 665: r = kernel_tex_image_interp(__tex_image_byte4_665, x, y); break;
-		default:
-			kernel_assert(0);
-			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-	}
-#  else
-	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
-	/* float4, byte4 and half4 */
-	const int texture_type = kernel_tex_type(id);
-	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
-	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
-	   texture_type == IMAGE_DATA_TYPE_HALF4)
-	{
-		r = kernel_tex_image_interp_float4(tex, x, y);
-	}
-	/* float, byte and half */
-	else {
-		float f = kernel_tex_image_interp_float(tex, x, y);
-		r = make_float4(f, f, f, 1.0f);
-	}
-#  endif
-#endif
-
 	const float alpha = r.w;

 	if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@ -42,29 +42,8 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
 		tfm.w = read_node_float(kg, offset);
 		co = transform_point(&tfm, co);
 	}
-	float4 r;
-#  if defined(__KERNEL_CUDA__)
-#    if __CUDA_ARCH__ >= 300
-	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
-	const int texture_type = kernel_tex_type(id);
-	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
-	   texture_type == IMAGE_DATA_TYPE_BYTE4 ||
-	   texture_type == IMAGE_DATA_TYPE_HALF4)
-	{
-		r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z);
-	}
-	else {
-		float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z);
-		r = make_float4(f, f, f, 1.0f);
-	}
-#    else /* __CUDA_ARCH__ >= 300 */
-	r = volume_image_texture_3d(id, co.x, co.y, co.z);
-#    endif
-#  elif defined(__KERNEL_OPENCL__)
-	r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z);
-#  else
-	r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z);
-#  endif /* __KERNEL_CUDA__ */
+
+	float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
 #else
 	float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 #endif
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@ -46,12 +46,64 @@ CCL_NAMESPACE_BEGIN

 #if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300)
 #  define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4)
-#  define kernel_tex_index(tex) (tex)
 #else
 #  define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
-#  define kernel_tex_index(tex) (tex >> IMAGE_DATA_TYPE_SHIFT)
 #endif

+/* Interpolation types for textures
+ * cuda also use texture space to store other objects */
+typedef enum InterpolationType {
+	INTERPOLATION_NONE = -1,
+	INTERPOLATION_LINEAR = 0,
+	INTERPOLATION_CLOSEST = 1,
+	INTERPOLATION_CUBIC = 2,
+	INTERPOLATION_SMART = 3,
+
+	INTERPOLATION_NUM_TYPES,
+} InterpolationType;
+
+/* Texture types
+ * Since we store the type in the lower bits of a flat index,
+ * the shift and bit mask constant below need to be kept in sync. */
+typedef enum ImageDataType {
+	IMAGE_DATA_TYPE_FLOAT4 = 0,
+	IMAGE_DATA_TYPE_BYTE4 = 1,
+	IMAGE_DATA_TYPE_HALF4 = 2,
+	IMAGE_DATA_TYPE_FLOAT = 3,
+	IMAGE_DATA_TYPE_BYTE = 4,
+	IMAGE_DATA_TYPE_HALF = 5,
+
+	IMAGE_DATA_NUM_TYPES
+} ImageDataType;
+
+#define IMAGE_DATA_TYPE_SHIFT 3
+#define IMAGE_DATA_TYPE_MASK 0x7
+
+/* Extension types for textures.
+ *
+ * Defines how the image is extrapolated past its original bounds. */
+typedef enum ExtensionType {
+	/* Cause the image to repeat horizontally and vertically. */
+	EXTENSION_REPEAT = 0,
+	/* Extend by repeating edge pixels of the image. */
+	EXTENSION_EXTEND = 1,
+	/* Clip to image size and set exterior pixels as transparent. */
+	EXTENSION_CLIP = 2,
+
+	EXTENSION_NUM_TYPES,
+} ExtensionType;
+
+typedef struct TextureInfo {
+	/* Pointer, offset or texture depending on device. */
+	uint64_t data;
+	/* Buffer number for OpenCL. */
+	uint cl_buffer;
+	/* Interpolation and extension type. */
+	uint interpolation, extension;
+	/* Dimensions. */
+	uint width, height, depth;
+} TextureInfo;
+
 CCL_NAMESPACE_END

 #endif /* __UTIL_TEXTURE_H__ */
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@ -101,52 +101,6 @@ ccl_device_inline size_t round_down(size_t x, size_t multiple)
 	return (x / multiple) * multiple;
 }

-/* Interpolation types for textures
- * cuda also use texture space to store other objects */
-enum InterpolationType {
-	INTERPOLATION_NONE = -1,
-	INTERPOLATION_LINEAR = 0,
-	INTERPOLATION_CLOSEST = 1,
-	INTERPOLATION_CUBIC = 2,
-	INTERPOLATION_SMART = 3,
-
-	INTERPOLATION_NUM_TYPES,
-};
-
-/* Texture types
- * Since we store the type in the lower bits of a flat index,
- * the shift and bit mask constant below need to be kept in sync.
- */
-
-enum ImageDataType {
-	IMAGE_DATA_TYPE_FLOAT4 = 0,
-	IMAGE_DATA_TYPE_BYTE4 = 1,
-	IMAGE_DATA_TYPE_HALF4 = 2,
-	IMAGE_DATA_TYPE_FLOAT = 3,
-	IMAGE_DATA_TYPE_BYTE = 4,
-	IMAGE_DATA_TYPE_HALF = 5,
-
-	IMAGE_DATA_NUM_TYPES
-};
-
-#define IMAGE_DATA_TYPE_SHIFT 3
-#define IMAGE_DATA_TYPE_MASK 0x7
-
-/* Extension types for textures.
- *
- * Defines how the image is extrapolated past its original bounds.
- */
-enum ExtensionType {
-	/* Cause the image to repeat horizontally and vertically. */
-	EXTENSION_REPEAT = 0,
-	/* Extend by repeating edge pixels of the image. */
-	EXTENSION_EXTEND = 1,
-	/* Clip to image size and set exterior pixels as transparent. */
-	EXTENSION_CLIP = 2,
-
-	EXTENSION_NUM_TYPES,
-};
-
 CCL_NAMESPACE_END

 /* Vectorized types declaration. */
--- a/source/blender/editors/sculpt_paint/paint_vertex.c
+++ b/source/blender/editors/sculpt_paint/paint_vertex.c
@ -149,6 +149,10 @@ static bool vwpaint_use_normal(const VPaint *vp)
 	       ((vp->paint.brush->flag & BRUSH_FRONTFACE_FALLOFF) != 0);
 }

+static bool brush_use_accumulate(const Brush *brush)
+{
+	return (brush->flag & BRUSH_ACCUMULATE) != 0 || brush->vertexpaint_tool == PAINT_BLEND_SMEAR;
+}

 static MDeformVert *defweight_prev_init(MDeformVert *dvert_prev, MDeformVert *dvert_curr, int index)
 {
@ -272,7 +276,7 @@ static uint vpaint_blend(
 	uint color_blend = ED_vpaint_blend_tool(tool, color_curr, color_paint, alpha_i);

 	/* if no accumulate, clip color adding with colorig & orig alpha */
-	if ((brush->flag & BRUSH_ACCUMULATE) == 0) {
+	if (!brush_use_accumulate(brush)) {
 		uint color_test, a;
 		char *cp, *ct, *co;

@ -784,7 +788,7 @@ static void do_weight_paint_vertex_single(
 		dw_mirr = NULL;
 	}

-	if ((wp->paint.brush->flag & BRUSH_ACCUMULATE) == 0) {
+	if (!brush_use_accumulate(wp->paint.brush)) {
 		MDeformVert *dvert_prev = ob->sculpt->mode.wpaint.dvert_prev;
 		MDeformVert *dv_prev = defweight_prev_init(dvert_prev, me->dvert, index);
 		if (index_mirr != -1) {
@ -900,7 +904,7 @@ static void do_weight_paint_vertex_multi(
 		return;
 	}

-	if ((wp->paint.brush->flag & BRUSH_ACCUMULATE) == 0) {
+	if (!brush_use_accumulate(wp->paint.brush)) {
 		MDeformVert *dvert_prev = ob->sculpt->mode.wpaint.dvert_prev;
 		MDeformVert *dv_prev = defweight_prev_init(dvert_prev, me->dvert, index);
 		if (index_mirr != -1) {
@ -1031,7 +1035,7 @@ static void vertex_paint_init_session_data(const ToolSettings *ts, Object *ob)

 	/* Create average brush arrays */
 	if (ob->mode == OB_MODE_VERTEX_PAINT) {
-		if ((brush->flag & BRUSH_ACCUMULATE) == 0) {
+		if (!brush_use_accumulate(brush)) {
 			if (ob->sculpt->mode.vpaint.previous_color == NULL) {
 				ob->sculpt->mode.vpaint.previous_color =
 				        MEM_callocN(me->totloop * sizeof(uint), __func__);
@ -1042,7 +1046,7 @@ static void vertex_paint_init_session_data(const ToolSettings *ts, Object *ob)
 		}
 	}
 	else if (ob->mode == OB_MODE_WEIGHT_PAINT) {
-		if ((brush->flag & BRUSH_ACCUMULATE) == 0) {
+		if (!brush_use_accumulate(brush)) {
 			if (ob->sculpt->mode.wpaint.alpha_weight == NULL) {
 				ob->sculpt->mode.wpaint.alpha_weight =
 				        MEM_callocN(me->totvert * sizeof(float), __func__);
@ -1204,11 +1208,9 @@ struct WPaintData {

 	int defbase_tot;

-	/* Special storage for smear brush, avoid feedback loop - update each step and swap. */
-	struct {
-		float *weight_prev;
-		float *weight_curr;
-	} smear;
+	/* original weight values for use in blur/smear */
+	float *precomputed_weight;
+	bool precomputed_weight_ready;
 };

 /* Initialize the stroke cache invariants from operator properties */
@ -1437,24 +1439,8 @@ static bool wpaint_stroke_test_start(bContext *C, wmOperator *op, const float mo
 		wpd->mirror.lock = tmpflags;
 	}

-	if (vp->paint.brush->vertexpaint_tool == PAINT_BLEND_SMEAR) {
-		wpd->smear.weight_prev = MEM_mallocN(sizeof(float) * me->totvert, __func__);
-		const MDeformVert *dv = me->dvert;
-		if (wpd->do_multipaint) {
-			const bool do_auto_normalize = ((ts->auto_normalize != 0) && (wpd->vgroup_validmap != NULL));
-			for (int i = 0; i < me->totvert; i++, dv++) {
-				float weight = BKE_defvert_multipaint_collective_weight(
-				        dv, wpd->defbase_tot, wpd->defbase_sel, wpd->defbase_tot_sel, do_auto_normalize);
-				CLAMP(weight, 0.0f, 1.0f);
-				wpd->smear.weight_prev[i] = weight;
-			}
-		}
-		else {
-			for (int i = 0; i < me->totvert; i++, dv++) {
-				wpd->smear.weight_prev[i] = defvert_find_weight(dv, wpd->active.index);
-			}
-		}
-		wpd->smear.weight_curr = MEM_dupallocN(wpd->smear.weight_prev);
+	if (ELEM(vp->paint.brush->vertexpaint_tool, PAINT_BLEND_SMEAR, PAINT_BLEND_BLUR)) {
+		wpd->precomputed_weight = MEM_mallocN(sizeof(float) * me->totvert, __func__);
 	}

 	/* imat for normals */
@ -1512,6 +1498,33 @@ static float wpaint_get_active_weight(const MDeformVert *dv, const WeightPaintIn
 	}
 }

+static void do_wpaint_precompute_weight_cb_ex(
+        void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id))
+{
+	SculptThreadedTaskData *data = userdata;
+	const MDeformVert *dv = &data->me->dvert[n];
+
+	data->wpd->precomputed_weight[n] = wpaint_get_active_weight(dv, data->wpi);
+}
+
+static void precompute_weight_values(
+        bContext *C, Object *ob, Brush *brush, struct WPaintData *wpd, WeightPaintInfo *wpi, Mesh *me)
+{
+	if (wpd->precomputed_weight_ready && !brush_use_accumulate(brush))
+		return;
+
+	/* threaded loop over vertices */
+	SculptThreadedTaskData data = {
+		.C = C, .ob = ob, .wpd = wpd, .wpi = wpi, .me = me,
+	};
+
+	BLI_task_parallel_range_ex(
+	        0, me->totvert, &data, NULL, 0, do_wpaint_precompute_weight_cb_ex,
+	        true, false);
+
+	wpd->precomputed_weight_ready = true;
+}
+
 static void do_wpaint_brush_blur_task_cb_ex(
        void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id))
 {
@ -1560,8 +1573,7 @@ static void do_wpaint_brush_blur_task_cb_ex(
 					for (int k = 0; k < mp->totloop; k++) {
 						const int l_index = mp->loopstart + k;
 						const MLoop *ml = &data->me->mloop[l_index];
-						const MDeformVert *dv = &data->me->dvert[ml->v];
-						weight_final += wpaint_get_active_weight(dv, data->wpi);
+						weight_final += data->wpd->precomputed_weight[ml->v];
 					}
 				}

@ -1681,7 +1693,7 @@ static void do_wpaint_brush_smear_task_cb_ex(

 									if (stroke_dot > stroke_dot_max) {
 										stroke_dot_max = stroke_dot;
-										weight_final = data->wpd->smear.weight_prev[v_other_index];
+										weight_final = data->wpd->precomputed_weight[v_other_index];
 										do_color = true;
 									}
 								}
@ -1693,12 +1705,13 @@ static void do_wpaint_brush_smear_task_cb_ex(
 							const float final_alpha =
 							        brush_fade * brush_strength *
 							        grid_alpha * brush_alpha_pressure;
+
+							if (final_alpha <= 0.0f)
+								continue;
+
 							do_weight_paint_vertex(
 							        data->vp, data->ob, data->wpi,
 							        v_index, final_alpha, (float)weight_final);
-							/* Access the weight again because it might not have been applied completely. */
-							data->wpd->smear.weight_curr[v_index] =
-							        wpaint_get_active_weight(&data->me->dvert[v_index], data->wpi);
 						}
 					}
 				}
@ -2064,14 +2077,14 @@ static void wpaint_stroke_update_step(bContext *C, struct PaintStroke *stroke, P
 	wpi.brush_alpha_value =  brush_alpha_value;
 	/* *** done setting up WeightPaintInfo *** */

+	if (wpd->precomputed_weight) {
+		precompute_weight_values(C, ob, brush, wpd, &wpi, ob->data);
+	}
+
 	wpaint_do_symmetrical_brush_actions(C, ob, wp, sd, wpd, &wpi);

 	swap_m4m4(vc->rv3d->persmat, mat);

-	if (wp->paint.brush->vertexpaint_tool == PAINT_BLEND_SMEAR) {
-		SWAP(float *, wpd->smear.weight_curr, wpd->smear.weight_prev);
-	}
-
 	/* calculate pivot for rotation around seletion if needed */
 	/* also needed for "View Selected" on last stroke */
 	paint_last_stroke_update(scene, vc->ar, mval);
@ -2121,10 +2134,8 @@ static void wpaint_stroke_done(const bContext *C, struct PaintStroke *stroke)
 			MEM_freeN((void *)wpd->active.lock);
 		if (wpd->mirror.lock)
 			MEM_freeN((void *)wpd->mirror.lock);
-		if (wpd->smear.weight_prev)
-			MEM_freeN(wpd->smear.weight_prev);
-		if (wpd->smear.weight_curr)
-			MEM_freeN(wpd->smear.weight_curr);
+		if (wpd->precomputed_weight)
+			MEM_freeN(wpd->precomputed_weight);

 		MEM_freeN(wpd);
 	}
--- a/source/blender/makesrna/intern/rna_nodetree.c
+++ b/source/blender/makesrna/intern/rna_nodetree.c
@ -3740,7 +3740,7 @@ static const EnumPropertyItem sh_tex_prop_interpolation_items[] = {
 	{SHD_INTERP_CLOSEST, "Closest", 0, "Closest",
 	                     "No interpolation (sample closest texel)"},
 	{SHD_INTERP_CUBIC,   "Cubic", 0, "Cubic",
-	                     "Cubic interpolation (CPU only)"},
+	                     "Cubic interpolation"},
 	{SHD_INTERP_SMART,   "Smart", 0, "Smart",
 	                     "Bicubic when magnifying, else bilinear (OSL only)"},
 	{0, NULL, 0, NULL, NULL}
@ -4106,7 +4106,7 @@ static void def_sh_tex_pointdensity(StructRNA *srna)
 		{SHD_INTERP_LINEAR,  "Linear", 0, "Linear",
 		                     "Linear interpolation"},
 		{SHD_INTERP_CUBIC,   "Cubic", 0, "Cubic",
-		                     "Cubic interpolation (CPU only)"},
+		                     "Cubic interpolation"},
 		{0, NULL, 0, NULL, NULL}
 	};

--- a/source/creator/creator_args.c
+++ b/source/creator/creator_args.c
@ -439,7 +439,7 @@ static void arg_py_context_restore(
 * \{ */

 static const char arg_handle_print_version_doc[] =
-"\n\tPrint Blender version and exit"
+"\n\tPrint Blender version and exit."
 ;
 static int arg_handle_print_version(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -463,10 +463,10 @@ static int arg_handle_print_version(int UNUSED(argc), const char **UNUSED(argv),
 }

 static const char arg_handle_print_help_doc[] =
-"\n\tPrint this help text and exit"
+"\n\tPrint this help text and exit."
 ;
 static const char arg_handle_print_help_doc_win32[] =
-"\n\tPrint this help text and exit (windows only)"
+"\n\tPrint this help text and exit (windows only)."
 ;
 static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), void *data)
 {
@ -593,16 +593,16 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo
 	printf("Argument Parsing:\n");
 	printf("\tArguments must be separated by white space, eg:\n");
 	printf("\t# blender -ba test.blend\n");
-	printf("\t...will ignore the 'a'\n");
+	printf("\t...will ignore the 'a'.\n");
 	printf("\t# blender -b test.blend -f8\n");
-	printf("\t...will ignore '8' because there is no space between the '-f' and the frame value\n\n");
+	printf("\t...will ignore '8' because there is no space between the '-f' and the frame value.\n\n");

 	printf("Argument Order:\n");
 	printf("\tArguments are executed in the order they are given. eg:\n");
 	printf("\t# blender --background test.blend --render-frame 1 --render-output '/tmp'\n");
-	printf("\t...will not render to '/tmp' because '--render-frame 1' renders before the output path is set\n");
+	printf("\t...will not render to '/tmp' because '--render-frame 1' renders before the output path is set.\n");
 	printf("\t# blender --background --render-output /tmp test.blend --render-frame 1\n");
-	printf("\t...will not render to '/tmp' because loading the blend-file overwrites the render output that was set\n");
+	printf("\t...will not render to '/tmp' because loading the blend-file overwrites the render output that was set.\n");
 	printf("\t# blender --background test.blend --render-output /tmp --render-frame 1\n");
 	printf("\t...works as expected.\n\n");

@ -612,7 +612,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo
 	printf("  $BLENDER_SYSTEM_SCRIPTS   Directory for system wide scripts.\n");
 	printf("  $BLENDER_USER_DATAFILES   Directory for user data files (icons, translations, ..).\n");
 	printf("  $BLENDER_SYSTEM_DATAFILES Directory for system wide data files.\n");
-	printf("  $BLENDER_SYSTEM_PYTHON    Directory for system python libraries.\n");
+	printf("  $BLENDER_SYSTEM_PYTHON    Directory for system Python libraries.\n");
 #ifdef WIN32
 	printf("  $TEMP                     Store temporary files here.\n");
 #else
@ -621,7 +621,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo
 #ifdef WITH_SDL
 	printf("  $SDL_AUDIODRIVER          LibSDL audio driver - alsa, esd, dma.\n");
 #endif
-	printf("  $PYTHONHOME               Path to the python directory, eg. /usr/lib/python.\n\n");
+	printf("  $PYTHONHOME               Path to the Python directory, eg. /usr/lib/python.\n\n");

 	exit(0);

@ -629,7 +629,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo
 }

 static const char arg_handle_arguments_end_doc[] =
-"\n\tEnds option processing, following arguments passed unchanged. Access via Python's 'sys.argv'"
+"\n\tEnd option processing, following arguments passed unchanged. Access via Python's 'sys.argv'."
 ;
 static int arg_handle_arguments_end(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -646,10 +646,10 @@ static int arg_handle_arguments_end(int UNUSED(argc), const char **UNUSED(argv),
 #endif

 static const char arg_handle_python_set_doc_enable[] =
-"\n\tEnable automatic Python script execution" PY_ENABLE_AUTO
+"\n\tEnable automatic Python script execution" PY_ENABLE_AUTO "."
 ;
 static const char arg_handle_python_set_doc_disable[] =
-"\n\tDisable automatic Python script execution (pydrivers & startup scripts)" PY_DISABLE_AUTO
+"\n\tDisable automatic Python script execution (pydrivers & startup scripts)" PY_DISABLE_AUTO "."
 ;
 #undef PY_ENABLE_AUTO
 #undef PY_DISABLE_AUTO
@ -667,7 +667,7 @@ static int arg_handle_python_set(int UNUSED(argc), const char **UNUSED(argv), vo
 }

 static const char arg_handle_crash_handler_disable_doc[] =
-"\n\tDisable the crash handler"
+"\n\tDisable the crash handler."
 ;
 static int arg_handle_crash_handler_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -676,7 +676,7 @@ static int arg_handle_crash_handler_disable(int UNUSED(argc), const char **UNUSE
 }

 static const char arg_handle_abort_handler_disable_doc[] =
-"\n\tDisable the abort handler"
+"\n\tDisable the abort handler."
 ;
 static int arg_handle_abort_handler_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -685,7 +685,7 @@ static int arg_handle_abort_handler_disable(int UNUSED(argc), const char **UNUSE
 }

 static const char arg_handle_background_mode_set_doc[] =
-"\n\tRun in background (often used for UI-less rendering)"
+"\n\tRun in background (often used for UI-less rendering)."
 ;
 static int arg_handle_background_mode_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -695,7 +695,7 @@ static int arg_handle_background_mode_set(int UNUSED(argc), const char **UNUSED(

 static const char arg_handle_debug_mode_set_doc[] =
 "\n"
-"\tTurn debugging on\n"
+"\tTurn debugging on.\n"
 "\n"
 "\t* Enables memory error detection\n"
 "\t* Disables mouse grab (to interact with a debugger in some cases)\n"
@ -720,30 +720,30 @@ static int arg_handle_debug_mode_set(int UNUSED(argc), const char **UNUSED(argv)

 #ifdef WITH_FFMPEG
 static const char arg_handle_debug_mode_generic_set_doc_ffmpeg[] =
-"\n\tEnable debug messages from FFmpeg library";
+"\n\tEnable debug messages from FFmpeg library.";
 #endif
 #ifdef WITH_FREESTYLE
 static const char arg_handle_debug_mode_generic_set_doc_freestyle[] =
-"\n\tEnable debug messages for FreeStyle";
+"\n\tEnable debug messages for FreeStyle.";
 #endif
 static const char arg_handle_debug_mode_generic_set_doc_python[] =
-"\n\tEnable debug messages for Python";
+"\n\tEnable debug messages for Python.";
 static const char arg_handle_debug_mode_generic_set_doc_events[] =
-"\n\tEnable debug messages for the event system";
+"\n\tEnable debug messages for the event system.";
 static const char arg_handle_debug_mode_generic_set_doc_handlers[] =
-"\n\tEnable debug messages for event handling";
+"\n\tEnable debug messages for event handling.";
 static const char arg_handle_debug_mode_generic_set_doc_wm[] =
-"\n\tEnable debug messages for the window manager, also prints every operator call";
+"\n\tEnable debug messages for the window manager, also prints every operator call.";
 static const char arg_handle_debug_mode_generic_set_doc_jobs[] =
 "\n\tEnable time profiling for background jobs.";
 static const char arg_handle_debug_mode_generic_set_doc_gpu[] =
 "\n\tEnable gpu debug context and information for OpenGL 4.3+.";
 static const char arg_handle_debug_mode_generic_set_doc_depsgraph[] =
-"\n\tEnable debug messages from dependency graph";
+"\n\tEnable debug messages from dependency graph.";
 static const char arg_handle_debug_mode_generic_set_doc_depsgraph_no_threads[] =
-"\n\tSwitch dependency graph to a single threaded evaluation";
+"\n\tSwitch dependency graph to a single threaded evaluation.";
 static const char arg_handle_debug_mode_generic_set_doc_gpumem[] =
-"\n\tEnable GPU memory stats in status bar";
+"\n\tEnable GPU memory stats in status bar.";

 static int arg_handle_debug_mode_generic_set(int UNUSED(argc), const char **UNUSED(argv), void *data)
 {
@ -752,7 +752,7 @@ static int arg_handle_debug_mode_generic_set(int UNUSED(argc), const char **UNUS
 }

 static const char arg_handle_debug_mode_io_doc[] =
-"\n\tEnable debug messages for I/O (collada, ...)";
+"\n\tEnable debug messages for I/O (collada, ...).";
 static int arg_handle_debug_mode_io(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
 	G.debug |= G_DEBUG_IO;
@ -760,7 +760,7 @@ static int arg_handle_debug_mode_io(int UNUSED(argc), const char **UNUSED(argv),
 }

 static const char arg_handle_debug_mode_all_doc[] =
-"\n\tEnable all debug messages";
+"\n\tEnable all debug messages.";
 static int arg_handle_debug_mode_all(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
 	G.debug |= G_DEBUG_ALL;
@ -775,7 +775,7 @@ static int arg_handle_debug_mode_all(int UNUSED(argc), const char **UNUSED(argv)

 #ifdef WITH_LIBMV
 static const char arg_handle_debug_mode_libmv_doc[] =
-"\n\tEnable debug messages from libmv library"
+"\n\tEnable debug messages from libmv library."
 ;
 static int arg_handle_debug_mode_libmv(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -787,7 +787,7 @@ static int arg_handle_debug_mode_libmv(int UNUSED(argc), const char **UNUSED(arg

 #ifdef WITH_CYCLES_LOGGING
 static const char arg_handle_debug_mode_cycles_doc[] =
-"\n\tEnable debug messages from Cycles"
+"\n\tEnable debug messages from Cycles."
 ;
 static int arg_handle_debug_mode_cycles(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -797,7 +797,7 @@ static int arg_handle_debug_mode_cycles(int UNUSED(argc), const char **UNUSED(ar
 #endif

 static const char arg_handle_debug_mode_memory_set_doc[] =
-"\n\tEnable fully guarded memory allocation and debugging"
+"\n\tEnable fully guarded memory allocation and debugging."
 ;
 static int arg_handle_debug_mode_memory_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -807,7 +807,7 @@ static int arg_handle_debug_mode_memory_set(int UNUSED(argc), const char **UNUSE

 static const char arg_handle_debug_value_set_doc[] =
 "<value>\n"
-"\tSet debug value of <value> on startup\n"
+"\tSet debug value of <value> on startup."
 ;
 static int arg_handle_debug_value_set(int argc, const char **argv, void *UNUSED(data))
 {
@ -831,7 +831,7 @@ static int arg_handle_debug_value_set(int argc, const char **argv, void *UNUSED(
 }

 static const char arg_handle_debug_fpe_set_doc[] =
-"\n\tEnable floating point exceptions"
+"\n\tEnable floating point exceptions."
 ;
 static int arg_handle_debug_fpe_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -840,7 +840,7 @@ static int arg_handle_debug_fpe_set(int UNUSED(argc), const char **UNUSED(argv),
 }

 static const char arg_handle_factory_startup_set_doc[] =
-"\n\tSkip reading the " STRINGIFY(BLENDER_STARTUP_FILE) " in the users home directory"
+"\n\tSkip reading the " STRINGIFY(BLENDER_STARTUP_FILE) " in the users home directory."
 ;
 static int arg_handle_factory_startup_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -849,11 +849,11 @@ static int arg_handle_factory_startup_set(int UNUSED(argc), const char **UNUSED(
 }

 static const char arg_handle_env_system_set_doc_datafiles[] =
-"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_DATAFILES)" environment variable";
+"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_DATAFILES)" environment variable.";
 static const char arg_handle_env_system_set_doc_scripts[] =
-"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_SCRIPTS)" environment variable";
+"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_SCRIPTS)" environment variable.";
 static const char arg_handle_env_system_set_doc_python[] =
-"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_PYTHON)" environment variable";
+"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_PYTHON)" environment variable.";

 static int arg_handle_env_system_set(int argc, const char **argv, void *UNUSED(data))
 {
@ -879,13 +879,19 @@ static int arg_handle_env_system_set(int argc, const char **argv, void *UNUSED(d

 static const char arg_handle_playback_mode_doc[] =
 "<options> <file(s)>\n"
-"\tPlayback <file(s)>, only operates this way when not running in background.\n"
-"\t\t-p <sx> <sy>\tOpen with lower left corner at <sx>, <sy>\n"
-"\t\t-m\t\tRead from disk (Do not buffer)\n"
-"\t\t-f <fps> <fps-base>\t\tSpecify FPS to start with\n"
-"\t\t-j <frame>\tSet frame step to <frame>\n"
-"\t\t-s <frame>\tPlay from <frame>\n"
-"\t\t-e <frame>\tPlay until <frame>"
+"\tPlayback <file(s)>, only operates this way when not running in background.\n\n"
+"\t-p <sx> <sy>\n"
+"\t\tOpen with lower left corner at <sx>, <sy>.\n"
+"\t-m\n"
+"\t\tRead from disk (Do not buffer).\n"
+"\t-f <fps> <fps-base>\n"
+"\t\tSpecify FPS to start with.\n"
+"\t-j <frame>\n"
+"\t\tSet frame step to <frame>.\n"
+"\t-s <frame>\n"
+"\t\tPlay from <frame>.\n"
+"\t-e <frame>\n"
+"\t\tPlay until <frame>."
 ;
 static int arg_handle_playback_mode(int argc, const char **argv, void *UNUSED(data))
 {
@ -905,7 +911,7 @@ static int arg_handle_playback_mode(int argc, const char **argv, void *UNUSED(da

 static const char arg_handle_window_geometry_doc[] =
 "<sx> <sy> <w> <h>\n"
-"\tOpen with lower left corner at <sx>, <sy> and width and height as <w>, <h>"
+"\tOpen with lower left corner at <sx>, <sy> and width and height as <w>, <h>."
 ;
 static int arg_handle_window_geometry(int argc, const char **argv, void *UNUSED(data))
 {
@ -931,7 +937,7 @@ static int arg_handle_window_geometry(int argc, const char **argv, void *UNUSED(
 }

 static const char arg_handle_native_pixels_set_doc[] =
-"\n\tDo not use native pixel size, for high resolution displays (MacBook 'Retina')"
+"\n\tDo not use native pixel size, for high resolution displays (MacBook 'Retina')."
 ;
 static int arg_handle_native_pixels_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -940,7 +946,7 @@ static int arg_handle_native_pixels_set(int UNUSED(argc), const char **UNUSED(ar
 }

 static const char arg_handle_with_borders_doc[] =
-"\n\tForce opening with borders"
+"\n\tForce opening with borders."
 ;
 static int arg_handle_with_borders(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -949,7 +955,7 @@ static int arg_handle_with_borders(int UNUSED(argc), const char **UNUSED(argv),
 }

 static const char arg_handle_without_borders_doc[] =
-"\n\tForce opening without borders"
+"\n\tForce opening without borders."
 ;
 static int arg_handle_without_borders(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -960,7 +966,7 @@ static int arg_handle_without_borders(int UNUSED(argc), const char **UNUSED(argv
 extern bool wm_start_with_console; /* wm_init_exit.c */

 static const char arg_handle_start_with_console_doc[] =
-"\n\tStart with the console window open (ignored if -b is set), (Windows only)"
+"\n\tStart with the console window open (ignored if -b is set), (Windows only)."
 ;
 static int arg_handle_start_with_console(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -969,10 +975,10 @@ static int arg_handle_start_with_console(int UNUSED(argc), const char **UNUSED(a
 }

 static const char arg_handle_register_extension_doc[] =
-"\n\tRegister blend-file extension, then exit (Windows only)"
+"\n\tRegister blend-file extension, then exit (Windows only)."
 ;
 static const char arg_handle_register_extension_doc_silent[] =
-"\n\tSilently register blend-file extension, then exit (Windows only)"
+"\n\tSilently register blend-file extension, then exit (Windows only)."
 ;
 static int arg_handle_register_extension(int UNUSED(argc), const char **UNUSED(argv), void *data)
 {
@ -987,7 +993,7 @@ static int arg_handle_register_extension(int UNUSED(argc), const char **UNUSED(a
 }

 static const char arg_handle_joystick_disable_doc[] =
-"\n\tDisable joystick support"
+"\n\tDisable joystick support."
 ;
 static int arg_handle_joystick_disable(int UNUSED(argc), const char **UNUSED(argv), void *data)
 {
@ -1008,7 +1014,7 @@ static int arg_handle_joystick_disable(int UNUSED(argc), const char **UNUSED(arg
 }

 static const char arg_handle_audio_disable_doc[] =
-"\n\tForce sound system to None"
+"\n\tForce sound system to None."
 ;
 static int arg_handle_audio_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
 {
@ -1017,7 +1023,7 @@ static int arg_handle_audio_disable(int UNUSED(argc), const char **UNUSED(argv),
 }

 static const char arg_handle_audio_set_doc[] =
-"\n\tForce sound system to a specific device\n\tThe name is the same as found in the user preferences, e.g. OpenAL."
+"\n\tForce sound system to a specific device.\n\t'NULL' 'SDL' 'OPENAL' 'JACK'."
 ;
 static int arg_handle_audio_set(int argc, const char **argv, void *UNUSED(data))
 {
@ -1067,7 +1073,7 @@ static int arg_handle_output_set(int argc, const char **argv, void *data)

 static const char arg_handle_engine_set_doc[] =
 "<engine>\n"
-"\tSpecify the render engine\n\tuse -E help to list available engines"
+"\tSpecify the render engine.\n\tUse -E help to list available engines."
 ;
 static int arg_handle_engine_set(int argc, const char **argv, void *data)
 {
@ -1110,11 +1116,11 @@ static int arg_handle_engine_set(int argc, const char **argv, void *data)

 static const char arg_handle_image_type_set_doc[] =
 "<format>\n"
-"\tSet the render format, Valid options are...\n"
-"\t\tTGA RAWTGA JPEG IRIS IRIZ\n"
-"\t\tAVIRAW AVIJPEG PNG BMP\n"
-"\t(formats that can be compiled into blender, not available on all systems)\n"
-"\t\tHDR TIFF EXR MULTILAYER MPEG FRAMESERVER CINEON DPX DDS JP2"
+"\tSet the render format.\n"
+"\tValid options are 'TGA' 'RAWTGA' 'JPEG' 'IRIS' 'IRIZ' 'AVIRAW' 'AVIJPEG' 'PNG' 'BMP'\n"
+"\n"
+"\tFormats that can be compiled into Blender, not available on all systems: 'HDR' 'TIFF' 'EXR' 'MULTILAYER'\n"
+"\t'MPEG' 'FRAMESERVER' 'CINEON' 'DPX' 'DDS' 'JP2'"
 ;
 static int arg_handle_image_type_set(int argc, const char **argv, void *data)
 {
@ -1202,7 +1208,7 @@ static int arg_handle_verbosity_set(int argc, const char **argv, void *UNUSED(da

 static const char arg_handle_extension_set_doc[] =
 "<bool>\n"
-"\tSet option to add the file extension to the end of the file"
+"\tSet option to add the file extension to the end of the file."
 ;
 static int arg_handle_extension_set(int argc, const char **argv, void *data)
 {
@ -1234,10 +1240,15 @@ static int arg_handle_extension_set(int argc, const char **argv, void *data)

 static const char arg_handle_ge_parameters_set_doc[] =
 "Game Engine specific options\n"
-"\t-g fixedtime\t\tRun on 50 hertz without dropping frames\n"
-"\t-g vertexarrays\t\tUse Vertex Arrays for rendering (usually faster)\n"
-"\t-g nomipmap\t\tNo Texture Mipmapping\n"
-"\t-g linearmipmap\t\tLinear Texture Mipmapping instead of Nearest (default)"
+"\n"
+"\t'fixedtime'\n"
+"\t\tRun on 50 hertz without dropping frames.\n"
+"\t'vertexarrays'\n"
+"\t\tUse Vertex Arrays for rendering (usually faster).\n"
+"\t'nomipmap'\n"
+"\t\tNo Texture Mipmapping.\n"
+"\t'linearmipmap'\n"
+"\t\tLinear Texture Mipmapping instead of Nearest (default)."
 ;
 static int arg_handle_ge_parameters_set(int argc, const char **argv, void *data)
 {
@ -1270,7 +1281,7 @@ static int arg_handle_ge_parameters_set(int argc, const char **argv, void *data)
 #endif
 			}
 			else {
-				printf("error: argument assignment (%s) without value.\n", paramname);
+				printf("Error: argument assignment (%s) without value.\n", paramname);
 				return 0;
 			}
 			/* name arg eaten */
@ -1361,7 +1372,7 @@ static int arg_handle_render_frame(int argc, const char **argv, void *data)
 }

 static const char arg_handle_render_animation_doc[] =
-"\n\tRender frames from start to end (inclusive)"
+"\n\tRender frames from start to end (inclusive)."
 ;
 static int arg_handle_render_animation(int UNUSED(argc), const char **UNUSED(argv), void *data)
 {
@ -1387,7 +1398,7 @@ static int arg_handle_render_animation(int UNUSED(argc), const char **UNUSED(arg

 static const char arg_handle_scene_set_doc[] =
 "<name>\n"
-"\tSet the active scene <name> for rendering"
+"\tSet the active scene <name> for rendering."
 ;
 static int arg_handle_scene_set(int argc, const char **argv, void *data)
 {
@ -1469,7 +1480,7 @@ static int arg_handle_frame_end_set(int argc, const char **argv, void *data)

 static const char arg_handle_frame_skip_set_doc[] =
 "<frames>\n"
-"\tSet number of frames to step forward after each rendered frame"
+"\tSet number of frames to step forward after each rendered frame."
 ;
 static int arg_handle_frame_skip_set(int argc, const char **argv, void *data)
 {
@ -1497,7 +1508,7 @@ static int arg_handle_frame_skip_set(int argc, const char **argv, void *data)

 static const char arg_handle_python_file_run_doc[] =
 "<filename>\n"
-"\tRun the given Python script file"
+"\tRun the given Python script file."
 ;
 static int arg_handle_python_file_run(int argc, const char **argv, void *data)
 {
@ -1525,14 +1536,14 @@ static int arg_handle_python_file_run(int argc, const char **argv, void *data)
 	}
 #else
 	UNUSED_VARS(argc, argv, data);
-	printf("This blender was built without python support\n");
+	printf("This Blender was built without Python support\n");
 	return 0;
 #endif /* WITH_PYTHON */
 }

 static const char arg_handle_python_text_run_doc[] =
 "<name>\n"
-"\tRun the given Python script text block"
+"\tRun the given Python script text block."
 ;
 static int arg_handle_python_text_run(int argc, const char **argv, void *data)
 {
@ -1566,14 +1577,14 @@ static int arg_handle_python_text_run(int argc, const char **argv, void *data)
 	}
 #else
 	UNUSED_VARS(argc, argv, data);
-	printf("This blender was built without python support\n");
+	printf("This Blender was built without Python support\n");
 	return 0;
 #endif /* WITH_PYTHON */
 }

 static const char arg_handle_python_expr_run_doc[] =
 "<expression>\n"
-"\tRun the given expression as a Python script"
+"\tRun the given expression as a Python script."
 ;
 static int arg_handle_python_expr_run(int argc, const char **argv, void *data)
 {
@ -1596,13 +1607,13 @@ static int arg_handle_python_expr_run(int argc, const char **argv, void *data)
 	}
 #else
 	UNUSED_VARS(argc, argv, data);
-	printf("This blender was built without python support\n");
+	printf("This Blender was built without Python support\n");
 	return 0;
 #endif /* WITH_PYTHON */
 }

 static const char arg_handle_python_console_run_doc[] =
-"\n\tRun blender with an interactive console"
+"\n\tRun Blender with an interactive console."
 ;
 static int arg_handle_python_console_run(int UNUSED(argc), const char **argv, void *data)
 {
@ -1614,7 +1625,7 @@ static int arg_handle_python_console_run(int UNUSED(argc), const char **argv, vo
 	return 0;
 #else
 	UNUSED_VARS(argv, data);
-	printf("This blender was built without python support\n");
+	printf("This Blender was built without python support\n");
 	return 0;
 #endif /* WITH_PYTHON */
 }
@ -1646,7 +1657,7 @@ static int arg_handle_python_exit_code_set(int argc, const char **argv, void *UN
 }

 static const char arg_handle_addons_set_doc[] =
-"\n\tComma separated list of add-ons (no spaces)"
+"\n\tComma separated list of add-ons (no spaces)."
 ;
 static int arg_handle_addons_set(int argc, const char **argv, void *data)
 {