diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e6520cfdda..ad5fb57db44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -731,6 +731,17 @@ if(WITH_INTERNATIONAL) endif() if(WITH_PYTHON) + # While we have this as an '#error' in bpy_util.h, + # upgrading Python tends to cause confusion for users who build. + # Give the error message early to make this more obvious. + # + # Do this before main 'platform_*' checks, + # because UNIX will search for the old Python paths which may not exist. + # giving errors about missing paths before this case is met. + if(DEFINED PYTHON_VERSION AND "${PYTHON_VERSION}" VERSION_LESS "3.6") + message(FATAL_ERROR "At least Python 3.6 is required to build") + endif() + if(NOT EXISTS "${CMAKE_SOURCE_DIR}/release/scripts/addons/modules") message(WARNING "Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, " diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 130b9434255..de0ab989d8e 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1188,7 +1188,7 @@ class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel): sub = col.column() sub.active = use_cpu(context) sub.prop(cworld, "volume_sampling", text="") - sub.prop(cworld, "volume_interpolation", text="") + col.prop(cworld, "volume_interpolation", text="") col.prop(cworld, "homogeneous_volume", text="Homogeneous") @@ -1287,7 +1287,7 @@ class CYCLES_MATERIAL_PT_settings(CyclesButtonsPanel, Panel): sub = col.column() sub.active = use_cpu(context) sub.prop(cmat, "volume_sampling", text="") - sub.prop(cmat, "volume_interpolation", text="") + col.prop(cmat, "volume_interpolation", text="") col.prop(cmat, "homogeneous_volume", text="Homogeneous") layout.separator() diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 8736a6927e0..fe0bcc5b91f 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -26,6 +26,7 @@ #include "util/util_stats.h" #include "util/util_string.h" #include "util/util_thread.h" +#include "util/util_texture.h" #include "util/util_types.h" #include "util/util_vector.h" diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 19e3c0a9075..ac6d3246d38 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -163,6 +163,9 @@ public: TaskPool task_pool; KernelGlobals kernel_globals; + device_vector texture_info; + bool need_texture_info; + #ifdef WITH_OSL OSLGlobals osl_globals; #endif @@ -235,6 +238,8 @@ public: VLOG(1) << "Will be using split kernel."; } + need_texture_info = false; + #define REGISTER_SPLIT_KERNEL(name) split_kernels[#name] = KernelFunctions(KERNEL_FUNCTIONS(name)) REGISTER_SPLIT_KERNEL(path_init); REGISTER_SPLIT_KERNEL(scene_intersect); @@ -261,6 +266,7 @@ public: ~CPUDevice() { task_pool.stop(); + tex_free(texture_info); } virtual bool show_samples() const @@ -268,6 +274,15 @@ public: return (TaskScheduler::num_threads() == 1); } + void load_texture_info() + { + if(need_texture_info) { + tex_free(texture_info); + tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT); + need_texture_info = false; + } + } + void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) { if(name) { @@ -333,14 +348,47 @@ public: VLOG(1) << "Texture allocate: " << name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - kernel_tex_copy(&kernel_globals, - name, - mem.data_pointer, - mem.data_width, - mem.data_height, - mem.data_depth, - interpolation, - extension); + + if(interpolation == INTERPOLATION_NONE) { + /* Data texture. */ + kernel_tex_copy(&kernel_globals, + name, + mem.data_pointer, + mem.data_width, + mem.data_height, + mem.data_depth, + interpolation, + extension); + } + else { + /* Image Texture. */ + int flat_slot = 0; + if(string_startswith(name, "__tex_image")) { + int pos = string(name).rfind("_"); + flat_slot = atoi(name + pos + 1); + } + else { + assert(0); + } + + if(flat_slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount + * of re-allocations. */ + texture_info.resize(flat_slot + 128); + } + + TextureInfo& info = texture_info.get_data()[flat_slot]; + info.data = (uint64_t)mem.data_pointer; + info.cl_buffer = 0; + info.interpolation = interpolation; + info.extension = extension; + info.width = mem.data_width; + info.height = mem.data_height; + info.depth = mem.data_depth; + + need_texture_info = true; + } + mem.device_pointer = mem.data_pointer; mem.device_size = mem.memory_size(); stats.mem_alloc(mem.device_size); @@ -352,6 +400,7 @@ public: mem.device_pointer = 0; stats.mem_free(mem.device_size); mem.device_size = 0; + need_texture_info = true; } } @@ -784,6 +833,9 @@ public: void task_add(DeviceTask& task) { + /* Load texture info. */ + load_texture_info(); + /* split task into smaller ones */ list tasks; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 48ffa1484fb..3d209e5560c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -129,7 +129,7 @@ public: CUcontext cuContext; CUmodule cuModule, cuFilterModule; map tex_interp_map; - map tex_bindless_map; + map tex_bindless_map; int cuDevId; int cuDevArchitecture; bool first_error; @@ -145,8 +145,8 @@ public: map pixel_mem_map; /* Bindless Textures */ - device_vector bindless_mapping; - bool need_bindless_mapping; + device_vector texture_info; + bool need_texture_info; CUdeviceptr cuda_device_ptr(device_ptr mem) { @@ -231,7 +231,7 @@ public: split_kernel = NULL; - need_bindless_mapping = false; + need_texture_info = false; /* intialize */ if(cuda_error(cuInit(0))) @@ -274,7 +274,7 @@ public: delete split_kernel; if(info.has_bindless_textures) { - tex_free(bindless_mapping); + tex_free(texture_info); } cuda_assert(cuCtxDestroy(cuContext)); @@ -544,12 +544,12 @@ public: return (result == CUDA_SUCCESS); } - void load_bindless_mapping() + void load_texture_info() { - if(info.has_bindless_textures && need_bindless_mapping) { - tex_free(bindless_mapping); - tex_alloc("__bindless_mapping", bindless_mapping, INTERPOLATION_NONE, EXTENSION_REPEAT); - need_bindless_mapping = false; + if(info.has_bindless_textures && need_texture_info) { + tex_free(texture_info); + tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT); + need_texture_info = false; } } @@ -646,8 +646,7 @@ public: << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - /* Check if we are on sm_30 or above. - * We use arrays and bindles textures for storage there */ + /* Check if we are on sm_30 or above, for bindless textures. */ bool has_bindless_textures = info.has_bindless_textures; /* General variables for both architectures */ @@ -679,20 +678,10 @@ public: filter_mode = CU_TR_FILTER_MODE_LINEAR; } - CUarray_format_enum format; - switch(mem.data_type) { - case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; - case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break; - case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break; - case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break; - case TYPE_HALF: format = CU_AD_FORMAT_HALF; break; - default: assert(0); return; - } - /* General variables for Fermi */ CUtexref texref = NULL; - if(!has_bindless_textures) { + if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) { if(mem.data_depth > 1) { /* Kernel uses different bind names for 2d and 3d float textures, * so we have to adjust couple of things here. @@ -711,41 +700,41 @@ public: } } - /* Data Storage */ if(interpolation == INTERPOLATION_NONE) { - if(has_bindless_textures) { - mem_alloc(NULL, mem, MEM_READ_ONLY); - mem_copy_to(mem); + /* Data Storage */ + mem_alloc(NULL, mem, MEM_READ_ONLY); + mem_copy_to(mem); - CUdeviceptr cumem; - size_t cubytes; + CUdeviceptr cumem; + size_t cubytes; - cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str())); + cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str())); - if(cubytes == 8) { - /* 64 bit device pointer */ - uint64_t ptr = mem.device_pointer; - cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); - } - else { - /* 32 bit device pointer */ - uint32_t ptr = (uint32_t)mem.device_pointer; - cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); - } + if(cubytes == 8) { + /* 64 bit device pointer */ + uint64_t ptr = mem.device_pointer; + cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); } else { - mem_alloc(NULL, mem, MEM_READ_ONLY); - mem_copy_to(mem); - - cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)); - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)); - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)); + /* 32 bit device pointer */ + uint32_t ptr = (uint32_t)mem.device_pointer; + cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); } } - /* Texture Storage */ else { + /* Texture Storage */ CUarray handle = NULL; + CUarray_format_enum format; + switch(mem.data_type) { + case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; + case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break; + case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break; + case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break; + case TYPE_HALF: format = CU_AD_FORMAT_HALF; break; + default: assert(0); return; + } + if(mem.data_depth > 1) { CUDA_ARRAY3D_DESCRIPTOR desc; @@ -810,8 +799,8 @@ public: stats.mem_alloc(size); - /* Bindless Textures - Kepler */ if(has_bindless_textures) { + /* Bindless Textures - Kepler */ int flat_slot = 0; if(string_startswith(name, "__tex_image")) { int pos = string(name).rfind("_"); @@ -844,35 +833,39 @@ public: } /* Resize once */ - if(flat_slot >= bindless_mapping.size()) { + if(flat_slot >= texture_info.size()) { /* Allocate some slots in advance, to reduce amount - * of re-allocations. - */ - bindless_mapping.resize(flat_slot + 128); + * of re-allocations. */ + texture_info.resize(flat_slot + 128); } /* Set Mapping and tag that we need to (re-)upload to device */ - bindless_mapping.get_data()[flat_slot] = (uint)tex; - tex_bindless_map[mem.device_pointer] = (uint)tex; - need_bindless_mapping = true; + TextureInfo& info = texture_info.get_data()[flat_slot]; + info.data = (uint64_t)tex; + info.cl_buffer = 0; + info.interpolation = interpolation; + info.extension = extension; + info.width = mem.data_width; + info.height = mem.data_height; + info.depth = mem.data_depth; + + tex_bindless_map[mem.device_pointer] = tex; + need_texture_info = true; } - /* Regular Textures - Fermi */ else { + /* Regular Textures - Fermi */ cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT)); cuda_assert(cuTexRefSetFilterMode(texref, filter_mode)); cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)); - } - } - /* Fermi, Data and Image Textures */ - if(!has_bindless_textures) { - cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode)); - cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode)); - if(mem.data_depth > 1) { - cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode)); - } + cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode)); + cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode)); + if(mem.data_depth > 1) { + cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode)); + } - cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)); + cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)); + } } /* Fermi and Kepler */ @@ -888,8 +881,8 @@ public: /* Free CUtexObject (Bindless Textures) */ if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) { - uint flat_slot = tex_bindless_map[mem.device_pointer]; - cuTexObjectDestroy(flat_slot); + CUtexObject tex = tex_bindless_map[mem.device_pointer]; + cuTexObjectDestroy(tex); } tex_interp_map.erase(tex_interp_map.find(mem.device_pointer)); @@ -1737,9 +1730,6 @@ public: if(task->type == DeviceTask::RENDER) { RenderTile tile; - /* Upload Bindless Mapping */ - load_bindless_mapping(); - DeviceRequestedFeatures requested_features; if(use_split_kernel()) { if(!use_adaptive_compilation()) { @@ -1780,9 +1770,6 @@ public: } } else if(task->type == DeviceTask::SHADER) { - /* Upload Bindless Mapping */ - load_bindless_mapping(); - shader(*task); cuda_assert(cuCtxSynchronize()); @@ -1805,9 +1792,12 @@ public: void task_add(DeviceTask& task) { - if(task.type == DeviceTask::FILM_CONVERT) { - CUDAContextScope scope(this); + CUDAContextScope scope(this); + /* Load texture info. */ + load_texture_info(); + + if(task.type == DeviceTask::FILM_CONVERT) { /* must be done in main thread due to opengl access */ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); cuda_assert(cuCtxSynchronize()); diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 26bf4a9af5b..bd956e29083 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -545,15 +545,10 @@ private: MemoryManager memory_manager; friend class MemoryManager; - struct tex_info_t { - uint buffer, padding; - cl_ulong offset; - uint width, height, depth, options; - }; - static_assert_align(tex_info_t, 16); + static_assert_align(TextureInfo, 16); - vector texture_descriptors; - device_memory texture_descriptors_buffer; + vector texture_info; + device_memory texture_info_buffer; struct Texture { Texture() {} diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 3db3efd1103..486ef89d22e 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -136,11 +136,11 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou return; } - /* Allocate this right away so that texture_descriptors_buffer is placed at offset 0 in the device memory buffers */ - texture_descriptors.resize(1); - texture_descriptors_buffer.resize(1); - texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0]; - memory_manager.alloc("texture_descriptors", texture_descriptors_buffer); + /* Allocate this right away so that texture_info_buffer is placed at offset 0 in the device memory buffers */ + texture_info.resize(1); + texture_info_buffer.resize(1); + texture_info_buffer.data_pointer = (device_ptr)&texture_info[0]; + memory_manager.alloc("texture_info", texture_info_buffer); fprintf(stderr, "Device init success\n"); device_initialized = true; @@ -625,7 +625,7 @@ void OpenCLDeviceBase::flush_texture_buffers() vector texture_slots; -#define KERNEL_TEX(type, ttype, name) \ +#define KERNEL_TEX(type, name) \ if(textures.find(#name) != textures.end()) { \ texture_slots.push_back(texture_slot_t(#name, num_slots)); \ } \ @@ -647,55 +647,38 @@ void OpenCLDeviceBase::flush_texture_buffers() } /* Realloc texture descriptors buffer. */ - memory_manager.free(texture_descriptors_buffer); + memory_manager.free(texture_info_buffer); - texture_descriptors.resize(num_slots); - texture_descriptors_buffer.resize(num_slots * sizeof(tex_info_t)); - texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0]; + texture_info.resize(num_slots); + texture_info_buffer.resize(num_slots * sizeof(TextureInfo)); + texture_info_buffer.data_pointer = (device_ptr)&texture_info[0]; - memory_manager.alloc("texture_descriptors", texture_descriptors_buffer); + memory_manager.alloc("texture_info", texture_info_buffer); /* Fill in descriptors */ foreach(texture_slot_t& slot, texture_slots) { Texture& tex = textures[slot.name]; - tex_info_t& info = texture_descriptors[slot.slot]; + TextureInfo& info = texture_info[slot.slot]; MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name); - info.offset = desc.offset; - info.buffer = desc.device_buffer; + info.data = desc.offset; + info.cl_buffer = desc.device_buffer; if(string_startswith(slot.name, "__tex_image")) { info.width = tex.mem->data_width; info.height = tex.mem->data_height; info.depth = tex.mem->data_depth; - info.options = 0; - - if(tex.interpolation == INTERPOLATION_CLOSEST) { - info.options |= (1 << 0); - } - - switch(tex.extension) { - case EXTENSION_REPEAT: - info.options |= (1 << 1); - break; - case EXTENSION_EXTEND: - info.options |= (1 << 2); - break; - case EXTENSION_CLIP: - info.options |= (1 << 3); - break; - default: - break; - } + info.interpolation = tex.interpolation; + info.extension = tex.extension; } } /* Force write of descriptors. */ - memory_manager.free(texture_descriptors_buffer); - memory_manager.alloc("texture_descriptors", texture_descriptors_buffer); + memory_manager.free(texture_info_buffer); + memory_manager.alloc("texture_info", texture_info_buffer); } void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 976cc9df46d..b4e9419ebbd 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -117,14 +117,8 @@ public: ccl_constant KernelData *data; ccl_global char *buffers[8]; - typedef struct _tex_info_t { - uint buffer, padding; - uint64_t offset; - uint width, height, depth, options; - } _tex_info_t; - -#define KERNEL_TEX(type, ttype, name) \ - _tex_info_t name; +#define KERNEL_TEX(type, name) \ + TextureInfo name; #include "kernel/kernel_textures.h" #undef KERNEL_TEX diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index b10dd05cb9b..bd51bc4d371 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -8,7 +8,7 @@ set(INC_SYS ) -set(SRC +set(SRC_CPU_KERNELS kernels/cpu/kernel.cpp kernels/cpu/kernel_sse2.cpp kernels/cpu/kernel_sse3.cpp @@ -27,6 +27,15 @@ set(SRC kernels/cpu/filter_sse41.cpp kernels/cpu/filter_avx.cpp kernels/cpu/filter_avx2.cpp +) + +set(SRC_CUDA_KERNELS + kernels/cuda/kernel.cu + kernels/cuda/kernel_split.cu + kernels/cuda/filter.cu +) + +set(SRC_OPENCL_KERNELS kernels/opencl/kernel.cl kernels/opencl/kernel_state_buffer_size.cl kernels/opencl/kernel_split.cl @@ -50,9 +59,6 @@ set(SRC kernels/opencl/kernel_indirect_subsurface.cl kernels/opencl/kernel_buffer_update.cl kernels/opencl/filter.cl - kernels/cuda/kernel.cu - kernels/cuda/kernel_split.cu - kernels/cuda/filter.cu ) set(SRC_BVH_HEADERS @@ -83,7 +89,6 @@ set(SRC_HEADERS kernel_emission.h kernel_film.h kernel_globals.h - kernel_image_opencl.h kernel_jitter.h kernel_light.h kernel_math.h @@ -119,10 +124,12 @@ set(SRC_KERNELS_CPU_HEADERS set(SRC_KERNELS_CUDA_HEADERS kernels/cuda/kernel_config.h + kernels/cuda/kernel_cuda_image.h ) set(SRC_KERNELS_OPENCL_HEADERS kernels/opencl/kernel_split_function.h + kernels/opencl/kernel_opencl_image.h ) set(SRC_CLOSURE_HEADERS @@ -457,7 +464,9 @@ if(CXX_HAS_AVX2) endif() add_library(cycles_kernel - ${SRC} + ${SRC_CPU_KERNELS} + ${SRC_CUDA_KERNELS} + ${SRC_OPENCL_KERNELS} ${SRC_HEADERS} ${SRC_KERNELS_CPU_HEADERS} ${SRC_KERNELS_CUDA_HEADERS} @@ -484,34 +493,10 @@ endif() #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_sort.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inactive.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/filter.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPENCL_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CUDA_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPENCL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure) diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index b12e248f0a3..a780bd0cf28 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -605,8 +605,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure /* if fresnel is used, calculate the color with reflection_color(...) */ if(use_fresnel) { - *pdf = 1.0f; - *eval = reflection_color(bsdf, *omega_in, m); + *eval *= reflection_color(bsdf, *omega_in, m); } label = LABEL_REFLECT | LABEL_SINGULAR; diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 698cd6b03fd..6be448c4fa4 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -29,21 +29,6 @@ CCL_NAMESPACE_BEGIN /* Return position normalized to 0..1 in mesh bounds */ -#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 -ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z) -{ - float4 r; - switch(id) { - case 0: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_000, x, y, z); break; - case 8: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_008, x, y, z); break; - case 16: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_016, x, y, z); break; - case 24: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_024, x, y, z); break; - case 32: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_032, x, y, z); break; - } - return r; -} -#endif /* __KERNEL_CUDA__ */ - ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P) @@ -65,23 +50,8 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { float3 P = volume_normalized_position(kg, sd, sd->P); -#ifdef __KERNEL_CUDA__ -# if __CUDA_ARCH__ >= 300 - CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset); - float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z); - float4 r = make_float4(f, f, f, 1.0f); -# else - float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z); -# endif -#elif defined(__KERNEL_OPENCL__) - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z); -#else - float4 r; - if(sd->flag & SD_VOLUME_CUBIC) - r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC); - else - r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z); -#endif + InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; @@ -92,22 +62,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { float3 P = volume_normalized_position(kg, sd, sd->P); -#ifdef __KERNEL_CUDA__ -# if __CUDA_ARCH__ >= 300 - CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset); - float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z); -# else - float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z); -# endif -#elif defined(__KERNEL_OPENCL__) - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z); -#else - float4 r; - if(sd->flag & SD_VOLUME_CUBIC) - r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC); - else - r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z); -#endif + InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 93934ee6b38..6f63c8f77a2 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -74,7 +74,7 @@ CCL_NAMESPACE_BEGIN * pointer lookup. */ template struct texture { - ccl_always_inline T fetch(int index) + ccl_always_inline const T& fetch(int index) { kernel_assert(index >= 0 && index < width); return data[index]; @@ -112,449 +112,6 @@ template struct texture { int width; }; -template struct texture_image { -#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ - { \ - u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ - u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ - u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ - u[3] = (1.0f / 6.0f) * t * t * t; \ - } (void)0 - - ccl_always_inline float4 read(float4 r) - { - return r; - } - - ccl_always_inline float4 read(uchar4 r) - { - float f = 1.0f/255.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - - ccl_always_inline float4 read(uchar r) - { - float f = r*(1.0f/255.0f); - return make_float4(f, f, f, 1.0f); - } - - ccl_always_inline float4 read(float r) - { - /* TODO(dingto): Optimize this, so interpolation - * happens on float instead of float4 */ - return make_float4(r, r, r, 1.0f); - } - - ccl_always_inline float4 read(half4 r) - { - return half4_to_float4(r); - } - - ccl_always_inline float4 read(half r) - { - float f = half_to_float(r); - return make_float4(f, f, f, 1.0f); - } - - ccl_always_inline int wrap_periodic(int x, int width) - { - x %= width; - if(x < 0) - x += width; - return x; - } - - ccl_always_inline int wrap_clamp(int x, int width) - { - return clamp(x, 0, width-1); - } - - ccl_always_inline float frac(float x, int *ix) - { - int i = float_to_int(x) - ((x < 0.0f)? 1: 0); - *ix = i; - return x - (float)i; - } - - ccl_always_inline float4 interp(float x, float y) - { - if(UNLIKELY(!data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - int ix, iy, nix, niy; - - if(interpolation == INTERPOLATION_CLOSEST) { - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return read(data[ix + iy*width]); - } - else if(interpolation == INTERPOLATION_LINEAR) { - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]); - r += (1.0f - ty)*tx*read(data[nix + iy*width]); - r += ty*(1.0f - tx)*read(data[ix + niy*width]); - r += ty*tx*read(data[nix + niy*width]); - - return r; - } - else { - /* Bicubic b-spline interpolation. */ - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - int pix, piy, nnix, nniy; - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, - width * iy, - width * niy, - width * nniy}; - float u[4], v[4]; - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ -#define DATA(x, y) (read(data[xc[x] + yc[y]])) -#define TERM(col) \ - (v[col] * (u[0] * DATA(0, col) + \ - u[1] * DATA(1, col) + \ - u[2] * DATA(2, col) + \ - u[3] * DATA(3, col))) - - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - - /* Actual interpolation. */ - return TERM(0) + TERM(1) + TERM(2) + TERM(3); - -#undef TERM -#undef DATA - } - } - - ccl_always_inline float4 interp_3d(float x, float y, float z) - { - return interp_3d_ex(x, y, z, interpolation); - } - - ccl_always_inline float4 interp_3d_ex_closest(float x, float y, float z) - { - int ix, iy, iz; - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - frac(z*(float)depth, &iz); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - return read(data[ix + iy*width + iz*width*height]); - } - - ccl_always_inline float4 interp_3d_ex_linear(float x, float y, float z) - { - int ix, iy, iz; - int nix, niy, niz; - - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - float tz = frac(z*(float)depth - 0.5f, &iz); - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - float4 r; - - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); - r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); - r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); - r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); - - r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); - r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); - r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); - r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); - - return r; - } - - /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are - * causing stack overflow issue in this function unless it is inlined. - * - * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization - * enabled. - */ -#ifdef __GNUC__ - ccl_always_inline -#else - ccl_never_inline -#endif - float4 interp_3d_ex_tricubic(float x, float y, float z) - { - int ix, iy, iz; - int nix, niy, niz; - /* Tricubic b-spline interpolation. */ - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - const float tz = frac(z*(float)depth - 0.5f, &iz); - int pix, piy, piz, nnix, nniy, nniz; - - switch(extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - piz = wrap_periodic(iz-1, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - nniz = wrap_periodic(iz+2, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - piz = wrap_clamp(iz-1, depth); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - nniz = wrap_clamp(iz+2, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, - width * iy, - width * niy, - width * nniy}; - const int zc[4] = {width * height * piz, - width * height * iz, - width * height * niz, - width * height * nniz}; - float u[4], v[4], w[4]; - - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ -#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) -#define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + \ - u[1] * DATA(1, col, row) + \ - u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) -#define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + \ - COL_TERM(1, row) + \ - COL_TERM(2, row) + \ - COL_TERM(3, row))) - - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); - - /* Actual interpolation. */ - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); - -#undef COL_TERM -#undef ROW_TERM -#undef DATA - } - - ccl_always_inline float4 interp_3d_ex(float x, float y, float z, - int interpolation = INTERPOLATION_LINEAR) - { - if(UNLIKELY(!data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - switch(interpolation) { - case INTERPOLATION_CLOSEST: - return interp_3d_ex_closest(x, y, z); - case INTERPOLATION_LINEAR: - return interp_3d_ex_linear(x, y, z); - default: - return interp_3d_ex_tricubic(x, y, z); - } - } - - ccl_always_inline void dimensions_set(int width_, int height_, int depth_) - { - width = width_; - height = height_; - depth = depth_; - } - - T *data; - int interpolation; - ExtensionType extension; - int width, height, depth; -#undef SET_CUBIC_SPLINE_WEIGHTS -}; - -typedef texture texture_float4; -typedef texture texture_float2; -typedef texture texture_float; -typedef texture texture_uint; -typedef texture texture_int; -typedef texture texture_uint4; -typedef texture texture_uchar4; -typedef texture texture_uchar; -typedef texture_image texture_image_float; -typedef texture_image texture_image_uchar; -typedef texture_image texture_image_half; -typedef texture_image texture_image_float4; -typedef texture_image texture_image_uchar4; -typedef texture_image texture_image_half4; - /* Macros to handle different memory storage on different devices */ #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index)) @@ -563,10 +120,6 @@ typedef texture_image texture_image_half4; #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index)) #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size)) -#define kernel_tex_image_interp(tex,x,y) kernel_tex_image_interp_impl(kg,tex,x,y) -#define kernel_tex_image_interp_3d(tex, x, y, z) kernel_tex_image_interp_3d_impl(kg,tex,x,y,z) -#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) kernel_tex_image_interp_3d_ex_impl(kg,tex, x, y, z, interpolation) - #define kernel_data (kg->__data) #ifdef __KERNEL_SSE2__ diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 2e8ca48c413..fa512f80e41 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -126,42 +126,16 @@ ccl_device_inline uint ccl_num_groups(uint d) /* Textures */ -typedef texture texture_float4; -typedef texture texture_float2; -typedef texture texture_float; -typedef texture texture_uint; -typedef texture texture_int; -typedef texture texture_uint4; -typedef texture texture_uchar; -typedef texture texture_uchar4; +/* Use arrays for regular data. This is a little slower than textures on Fermi, + * but allows for cleaner code and we will stop supporting Fermi soon. */ +#define kernel_tex_fetch(t, index) t[(index)] + +/* On Kepler (6xx) and above, we use Bindless Textures for images. + * On Fermi cards (4xx and 5xx), we have to use regular textures. */ +#if __CUDA_ARCH__ < 300 typedef texture texture_image_float4; typedef texture texture_image3d_float4; typedef texture texture_image_uchar4; - -/* Macros to handle different memory storage on different devices */ - -/* On Fermi cards (4xx and 5xx), we use regular textures for both data and images. - * On Kepler (6xx) and above, we use Bindless Textures for images and arrays for data. - * - * Arrays are necessary in order to use the full VRAM on newer cards, and it's slightly faster. - * Using Arrays on Fermi turned out to be slower.*/ - -/* Fermi */ -#if __CUDA_ARCH__ < 300 -# define __KERNEL_CUDA_TEX_STORAGE__ -# define kernel_tex_fetch(t, index) tex1Dfetch(t, index) - -# define kernel_tex_image_interp(t, x, y) tex2D(t, x, y) -# define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z) - -/* Kepler */ -#else -# define kernel_tex_fetch(t, index) t[(index)] - -# define kernel_tex_image_interp_float4(t, x, y) tex2D(t, x, y) -# define kernel_tex_image_interp_float(t, x, y) tex2D(t, x, y) -# define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D(t, x, y, z) -# define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D(t, x, y, z) #endif #define kernel_data __data diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 7f81523791b..b02e3bc576d 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -144,7 +144,7 @@ /* data lookup defines */ #define kernel_data (*kg->data) -#define kernel_tex_fetch(tex, index) ((ccl_global tex##_t*)(kg->buffers[kg->tex.buffer] + kg->tex.offset))[(index)] +#define kernel_tex_fetch(tex, index) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))[(index)] /* define NULL */ #define NULL 0 diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 9d55183d94b..97d4726407b 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -46,14 +46,7 @@ struct Intersection; struct VolumeStep; typedef struct KernelGlobals { - vector texture_float4_images; - vector texture_byte4_images; - vector texture_half4_images; - vector texture_float_images; - vector texture_byte_images; - vector texture_half_images; - -# define KERNEL_TEX(type, ttype, name) ttype name; +# define KERNEL_TEX(type, name) texture name; # define KERNEL_IMAGE_TEX(type, ttype, name) # include "kernel/kernel_textures.h" @@ -99,11 +92,7 @@ typedef struct KernelGlobals { Intersection hits_stack[64]; } KernelGlobals; -# ifdef __KERNEL_CUDA_TEX_STORAGE__ -# define KERNEL_TEX(type, ttype, name) ttype name; -# else -# define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name; -# endif +# define KERNEL_TEX(type, name) const __constant__ __device__ type *name; # define KERNEL_IMAGE_TEX(type, ttype, name) ttype name; # include "kernel/kernel_textures.h" @@ -113,22 +102,16 @@ typedef struct KernelGlobals { #ifdef __KERNEL_OPENCL__ -# define KERNEL_TEX(type, ttype, name) \ +# define KERNEL_TEX(type, name) \ typedef type name##_t; # include "kernel/kernel_textures.h" -typedef struct tex_info_t { - uint buffer, padding; - uint64_t offset; - uint width, height, depth, options; -} tex_info_t; - typedef ccl_addr_space struct KernelGlobals { ccl_constant KernelData *data; ccl_global char *buffers[8]; -# define KERNEL_TEX(type, ttype, name) \ - tex_info_t name; +# define KERNEL_TEX(type, name) \ + TextureInfo name; # include "kernel/kernel_textures.h" # ifdef __SPLIT_KERNEL__ @@ -176,9 +159,9 @@ ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg) if(ccl_local_id(0) + ccl_local_id(1) == 0) # endif { - ccl_global tex_info_t *info = (ccl_global tex_info_t*)kg->buffers[0]; + ccl_global TextureInfo *info = (ccl_global TextureInfo*)kg->buffers[0]; -# define KERNEL_TEX(type, ttype, name) \ +# define KERNEL_TEX(type, name) \ kg->name = *(info++); # include "kernel/kernel_textures.h" } diff --git a/intern/cycles/kernel/kernel_image_opencl.h b/intern/cycles/kernel/kernel_image_opencl.h deleted file mode 100644 index 9e3373432ec..00000000000 --- a/intern/cycles/kernel/kernel_image_opencl.h +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright 2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/* For OpenCL we do manual lookup and interpolation. */ - -ccl_device_inline ccl_global tex_info_t* kernel_tex_info(KernelGlobals *kg, uint id) { - const uint tex_offset = id -#define KERNEL_TEX(type, ttype, name) + 1 -#include "kernel/kernel_textures.h" - ; - - return &((ccl_global tex_info_t*)kg->buffers[0])[tex_offset]; -} - -#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->buffer] + info->offset))[(index)] - -ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) -{ - const ccl_global tex_info_t *info = kernel_tex_info(kg, id); - const int texture_type = kernel_tex_type(id); - - /* Float4 */ - if(texture_type == IMAGE_DATA_TYPE_FLOAT4) { - return tex_fetch(float4, info, offset); - } - /* Byte4 */ - else if(texture_type == IMAGE_DATA_TYPE_BYTE4) { - uchar4 r = tex_fetch(uchar4, info, offset); - float f = 1.0f/255.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - /* Float */ - else if(texture_type == IMAGE_DATA_TYPE_FLOAT) { - float f = tex_fetch(float, info, offset); - return make_float4(f, f, f, 1.0f); - } - /* Byte */ - else { - uchar r = tex_fetch(uchar, info, offset); - float f = r * (1.0f/255.0f); - return make_float4(f, f, f, 1.0f); - } -} - -ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) -{ - x %= width; - if(x < 0) - x += width; - return x; -} - -ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width) -{ - return clamp(x, 0, width-1); -} - -ccl_device_inline float svm_image_texture_frac(float x, int *ix) -{ - int i = float_to_int(x) - ((x < 0.0f)? 1: 0); - *ix = i; - return x - (float)i; -} - -ccl_device_inline uint kernel_decode_image_interpolation(uint info) -{ - return (info & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR; -} - -ccl_device_inline uint kernel_decode_image_extension(uint info) -{ - if(info & (1 << 1)) { - return EXTENSION_REPEAT; - } - else if(info & (1 << 2)) { - return EXTENSION_EXTEND; - } - else { - return EXTENSION_CLIP; - } -} - -ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) -{ - const ccl_global tex_info_t *info = kernel_tex_info(kg, id); - - uint width = info->width; - uint height = info->height; - uint offset = 0; - - /* Decode image options. */ - uint interpolation = kernel_decode_image_interpolation(info->options); - uint extension = kernel_decode_image_extension(info->options); - - /* Actual sampling. */ - float4 r; - int ix, iy, nix, niy; - if(interpolation == INTERPOLATION_CLOSEST) { - svm_image_texture_frac(x*width, &ix); - svm_image_texture_frac(y*height, &iy); - - if(extension == EXTENSION_REPEAT) { - ix = svm_image_texture_wrap_periodic(ix, width); - iy = svm_image_texture_wrap_periodic(iy, height); - } - else { - if(extension == EXTENSION_CLIP) { - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - /* Fall through. */ - /* EXTENSION_EXTEND */ - ix = svm_image_texture_wrap_clamp(ix, width); - iy = svm_image_texture_wrap_clamp(iy, height); - } - - r = svm_image_texture_read(kg, id, offset + ix + iy*width); - } - else { /* INTERPOLATION_LINEAR */ - float tx = svm_image_texture_frac(x*width - 0.5f, &ix); - float ty = svm_image_texture_frac(y*height - 0.5f, &iy); - - if(extension == EXTENSION_REPEAT) { - ix = svm_image_texture_wrap_periodic(ix, width); - iy = svm_image_texture_wrap_periodic(iy, height); - - nix = svm_image_texture_wrap_periodic(ix+1, width); - niy = svm_image_texture_wrap_periodic(iy+1, height); - } - else { - if(extension == EXTENSION_CLIP) { - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - nix = svm_image_texture_wrap_clamp(ix+1, width); - niy = svm_image_texture_wrap_clamp(iy+1, height); - ix = svm_image_texture_wrap_clamp(ix, width); - iy = svm_image_texture_wrap_clamp(iy, height); - } - - r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width); - r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width); - r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width); - r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width); - } - return r; -} - - -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) -{ - const ccl_global tex_info_t *info = kernel_tex_info(kg, id); - - uint width = info->width; - uint height = info->height; - uint offset = 0; - uint depth = info->depth; - - /* Decode image options. */ - uint interpolation = kernel_decode_image_interpolation(info->options); - uint extension = kernel_decode_image_extension(info->options); - - /* Actual sampling. */ - float4 r; - int ix, iy, iz, nix, niy, niz; - if(interpolation == INTERPOLATION_CLOSEST) { - svm_image_texture_frac(x*width, &ix); - svm_image_texture_frac(y*height, &iy); - svm_image_texture_frac(z*depth, &iz); - - if(extension == EXTENSION_REPEAT) { - ix = svm_image_texture_wrap_periodic(ix, width); - iy = svm_image_texture_wrap_periodic(iy, height); - iz = svm_image_texture_wrap_periodic(iz, depth); - } - else { - if(extension == EXTENSION_CLIP) { - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - /* Fall through. */ - /* EXTENSION_EXTEND */ - ix = svm_image_texture_wrap_clamp(ix, width); - iy = svm_image_texture_wrap_clamp(iy, height); - iz = svm_image_texture_wrap_clamp(iz, depth); - } - r = svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height); - } - else { /* INTERPOLATION_LINEAR */ - float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix); - float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy); - float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz); - - if(extension == EXTENSION_REPEAT) { - ix = svm_image_texture_wrap_periodic(ix, width); - iy = svm_image_texture_wrap_periodic(iy, height); - iz = svm_image_texture_wrap_periodic(iz, depth); - - nix = svm_image_texture_wrap_periodic(ix+1, width); - niy = svm_image_texture_wrap_periodic(iy+1, height); - niz = svm_image_texture_wrap_periodic(iz+1, depth); - } - else { - if(extension == EXTENSION_CLIP) { - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - /* Fall through. */ - /* EXTENSION_EXTEND */ - nix = svm_image_texture_wrap_clamp(ix+1, width); - niy = svm_image_texture_wrap_clamp(iy+1, height); - niz = svm_image_texture_wrap_clamp(iz+1, depth); - - ix = svm_image_texture_wrap_clamp(ix, width); - iy = svm_image_texture_wrap_clamp(iy, height); - iz = svm_image_texture_wrap_clamp(iz, depth); - } - - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height); - r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + iz*width*height); - r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + iz*width*height); - r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + iz*width*height); - - r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + niz*width*height); - r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + niz*width*height); - r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + niz*width*height); - r += tz*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + niz*width*height); - } - return r; -} diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 5eab28a2953..344b2223573 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -15,7 +15,7 @@ */ #ifndef KERNEL_TEX -# define KERNEL_TEX(type, ttype, name) +# define KERNEL_TEX(type, name) #endif #ifndef KERNEL_IMAGE_TEX @@ -23,63 +23,65 @@ #endif /* bvh */ -KERNEL_TEX(float4, texture_float4, __bvh_nodes) -KERNEL_TEX(float4, texture_float4, __bvh_leaf_nodes) -KERNEL_TEX(float4, texture_float4, __prim_tri_verts) -KERNEL_TEX(uint, texture_uint, __prim_tri_index) -KERNEL_TEX(uint, texture_uint, __prim_type) -KERNEL_TEX(uint, texture_uint, __prim_visibility) -KERNEL_TEX(uint, texture_uint, __prim_index) -KERNEL_TEX(uint, texture_uint, __prim_object) -KERNEL_TEX(uint, texture_uint, __object_node) -KERNEL_TEX(float2, texture_float2, __prim_time) +KERNEL_TEX(float4, __bvh_nodes) +KERNEL_TEX(float4, __bvh_leaf_nodes) +KERNEL_TEX(float4, __prim_tri_verts) +KERNEL_TEX(uint, __prim_tri_index) +KERNEL_TEX(uint, __prim_type) +KERNEL_TEX(uint, __prim_visibility) +KERNEL_TEX(uint, __prim_index) +KERNEL_TEX(uint, __prim_object) +KERNEL_TEX(uint, __object_node) +KERNEL_TEX(float2, __prim_time) /* objects */ -KERNEL_TEX(float4, texture_float4, __objects) -KERNEL_TEX(float4, texture_float4, __objects_vector) +KERNEL_TEX(float4, __objects) +KERNEL_TEX(float4, __objects_vector) /* triangles */ -KERNEL_TEX(uint, texture_uint, __tri_shader) -KERNEL_TEX(float4, texture_float4, __tri_vnormal) -KERNEL_TEX(uint4, texture_uint4, __tri_vindex) -KERNEL_TEX(uint, texture_uint, __tri_patch) -KERNEL_TEX(float2, texture_float2, __tri_patch_uv) +KERNEL_TEX(uint, __tri_shader) +KERNEL_TEX(float4, __tri_vnormal) +KERNEL_TEX(uint4, __tri_vindex) +KERNEL_TEX(uint, __tri_patch) +KERNEL_TEX(float2, __tri_patch_uv) /* curves */ -KERNEL_TEX(float4, texture_float4, __curves) -KERNEL_TEX(float4, texture_float4, __curve_keys) +KERNEL_TEX(float4, __curves) +KERNEL_TEX(float4, __curve_keys) /* patches */ -KERNEL_TEX(uint, texture_uint, __patches) +KERNEL_TEX(uint, __patches) /* attributes */ -KERNEL_TEX(uint4, texture_uint4, __attributes_map) -KERNEL_TEX(float, texture_float, __attributes_float) -KERNEL_TEX(float4, texture_float4, __attributes_float3) -KERNEL_TEX(uchar4, texture_uchar4, __attributes_uchar4) +KERNEL_TEX(uint4, __attributes_map) +KERNEL_TEX(float, __attributes_float) +KERNEL_TEX(float4, __attributes_float3) +KERNEL_TEX(uchar4, __attributes_uchar4) /* lights */ -KERNEL_TEX(float4, texture_float4, __light_distribution) -KERNEL_TEX(float4, texture_float4, __light_data) -KERNEL_TEX(float2, texture_float2, __light_background_marginal_cdf) -KERNEL_TEX(float2, texture_float2, __light_background_conditional_cdf) +KERNEL_TEX(float4, __light_distribution) +KERNEL_TEX(float4, __light_data) +KERNEL_TEX(float2, __light_background_marginal_cdf) +KERNEL_TEX(float2, __light_background_conditional_cdf) /* particles */ -KERNEL_TEX(float4, texture_float4, __particles) +KERNEL_TEX(float4, __particles) /* shaders */ -KERNEL_TEX(uint4, texture_uint4, __svm_nodes) -KERNEL_TEX(uint, texture_uint, __shader_flag) -KERNEL_TEX(uint, texture_uint, __object_flag) +KERNEL_TEX(uint4, __svm_nodes) +KERNEL_TEX(uint, __shader_flag) +KERNEL_TEX(uint, __object_flag) /* lookup tables */ -KERNEL_TEX(float, texture_float, __lookup_table) +KERNEL_TEX(float, __lookup_table) /* sobol */ -KERNEL_TEX(uint, texture_uint, __sobol_directions) +KERNEL_TEX(uint, __sobol_directions) -#ifdef __KERNEL_CUDA__ -# if __CUDA_ARCH__ < 300 +#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ >= 300 +/* image textures */ +KERNEL_TEX(TextureInfo, __texture_info) +#else /* full-float image */ KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_000) KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_008) @@ -180,12 +182,7 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_641) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_649) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_657) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_665) - -# else -/* bindless textures */ -KERNEL_TEX(uint, texture_uint, __bindless_mapping) -# endif /* __CUDA_ARCH__ */ -#endif /* __KERNEL_CUDA__ */ +#endif /* defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 */ #undef KERNEL_TEX #undef KERNEL_IMAGE_TEX diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index 998619ac897..7679ab4f111 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -84,130 +84,16 @@ void kernel_tex_copy(KernelGlobals *kg, if(0) { } -#define KERNEL_TEX(type, ttype, tname) \ +#define KERNEL_TEX(type, tname) \ else if(strcmp(name, #tname) == 0) { \ kg->tname.data = (type*)mem; \ kg->tname.width = width; \ } -#define KERNEL_IMAGE_TEX(type, ttype, tname) +#define KERNEL_IMAGE_TEX(type, tname) #include "kernel/kernel_textures.h" - - else if(strstr(name, "__tex_image_float4")) { - texture_image_float4 *tex = NULL; - int id = atoi(name + strlen("__tex_image_float4_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_float4_images.size()) { - kg->texture_float4_images.resize(array_index+1); - } - tex = &kg->texture_float4_images[array_index]; - } - - if(tex) { - tex->data = (float4*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_float")) { - texture_image_float *tex = NULL; - int id = atoi(name + strlen("__tex_image_float_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_float_images.size()) { - kg->texture_float_images.resize(array_index+1); - } - tex = &kg->texture_float_images[array_index]; - } - - if(tex) { - tex->data = (float*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_byte4")) { - texture_image_uchar4 *tex = NULL; - int id = atoi(name + strlen("__tex_image_byte4_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_byte4_images.size()) { - kg->texture_byte4_images.resize(array_index+1); - } - tex = &kg->texture_byte4_images[array_index]; - } - - if(tex) { - tex->data = (uchar4*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_byte")) { - texture_image_uchar *tex = NULL; - int id = atoi(name + strlen("__tex_image_byte_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_byte_images.size()) { - kg->texture_byte_images.resize(array_index+1); - } - tex = &kg->texture_byte_images[array_index]; - } - - if(tex) { - tex->data = (uchar*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_half4")) { - texture_image_half4 *tex = NULL; - int id = atoi(name + strlen("__tex_image_half4_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_half4_images.size()) { - kg->texture_half4_images.resize(array_index+1); - } - tex = &kg->texture_half4_images[array_index]; - } - - if(tex) { - tex->data = (half4*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else if(strstr(name, "__tex_image_half")) { - texture_image_half *tex = NULL; - int id = atoi(name + strlen("__tex_image_half_")); - int array_index = kernel_tex_index(id); - - if(array_index >= 0) { - if(array_index >= kg->texture_half_images.size()) { - kg->texture_half_images.resize(array_index+1); - } - tex = &kg->texture_half_images[array_index]; - } - - if(tex) { - tex->data = (half*)mem; - tex->dimensions_set(width, height, depth); - tex->interpolation = interpolation; - tex->extension = extension; - } - } - else + else { assert(0); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index f6bb4c25012..37ba0f692be 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -17,70 +17,478 @@ #ifndef __KERNEL_CPU_IMAGE_H__ #define __KERNEL_CPU_IMAGE_H__ -#ifdef __KERNEL_CPU__ - CCL_NAMESPACE_BEGIN -ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y) +template struct TextureInterpolator { +#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ + { \ + u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ + u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ + u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } (void)0 + + static ccl_always_inline float4 read(float4 r) + { + return r; + } + + static ccl_always_inline float4 read(uchar4 r) + { + float f = 1.0f/255.0f; + return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); + } + + static ccl_always_inline float4 read(uchar r) + { + float f = r*(1.0f/255.0f); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline float4 read(float r) + { + /* TODO(dingto): Optimize this, so interpolation + * happens on float instead of float4 */ + return make_float4(r, r, r, 1.0f); + } + + static ccl_always_inline float4 read(half4 r) + { + return half4_to_float4(r); + } + + static ccl_always_inline float4 read(half r) + { + float f = half_to_float(r); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline int wrap_periodic(int x, int width) + { + x %= width; + if(x < 0) + x += width; + return x; + } + + static ccl_always_inline int wrap_clamp(int x, int width) + { + return clamp(x, 0, width-1); + } + + static ccl_always_inline float frac(float x, int *ix) + { + int i = float_to_int(x) - ((x < 0.0f)? 1: 0); + *ix = i; + return x - (float)i; + } + + static ccl_always_inline float4 interp(const TextureInfo& info, float x, float y) + { + if(UNLIKELY(!info.data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + const T *data = (const T*)info.data; + int width = info.width; + int height = info.height; + int ix, iy, nix, niy; + + if(info.interpolation == INTERPOLATION_CLOSEST) { + frac(x*(float)width, &ix); + frac(y*(float)height, &iy); + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return read(data[ix + iy*width]); + } + else if(info.interpolation == INTERPOLATION_LINEAR) { + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]); + r += (1.0f - ty)*tx*read(data[nix + iy*width]); + r += ty*(1.0f - tx)*read(data[ix + niy*width]); + r += ty*tx*read(data[nix + niy*width]); + + return r; + } + else { + /* Bicubic b-spline interpolation. */ + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + int pix, piy, nnix, nniy; + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + float u[4], v[4]; + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define DATA(x, y) (read(data[xc[x] + yc[y]])) +#define TERM(col) \ + (v[col] * (u[0] * DATA(0, col) + \ + u[1] * DATA(1, col) + \ + u[2] * DATA(2, col) + \ + u[3] * DATA(3, col))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + + /* Actual interpolation. */ + return TERM(0) + TERM(1) + TERM(2) + TERM(3); + +#undef TERM +#undef DATA + } + } + + static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info, float x, float y, float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + + frac(x*(float)width, &ix); + frac(y*(float)height, &iy); + frac(z*(float)depth, &iz); + + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const T *data = (const T*)info.data; + return read(data[ix + iy*width + iz*width*height]); + } + + static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, float x, float y, float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + int nix, niy, niz; + + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + float tz = frac(z*(float)depth - 0.5f, &iz); + + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const T *data = (const T*)info.data; + float4 r; + + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); + r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); + r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); + r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); + + r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); + r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); + r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); + r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); + + return r; + } + + /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are + * causing stack overflow issue in this function unless it is inlined. + * + * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization + * enabled. + */ +#ifdef __GNUC__ + static ccl_always_inline +#else + static ccl_never_inline +#endif + float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + int nix, niy, niz; + /* Tricubic b-spline interpolation. */ + const float tx = frac(x*(float)width - 0.5f, &ix); + const float ty = frac(y*(float)height - 0.5f, &iy); + const float tz = frac(z*(float)depth - 0.5f, &iz); + int pix, piy, piz, nnix, nniy, nniz; + + switch(info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + piz = wrap_periodic(iz-1, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + nniz = wrap_periodic(iz+2, depth); + break; + case EXTENSION_CLIP: + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + piz = wrap_clamp(iz-1, depth); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + nniz = wrap_clamp(iz+2, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + const int zc[4] = {width * height * piz, + width * height * iz, + width * height * niz, + width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) +#define COL_TERM(col, row) \ + (v[col] * (u[0] * DATA(0, col, row) + \ + u[1] * DATA(1, col, row) + \ + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) +#define ROW_TERM(row) \ + (w[row] * (COL_TERM(0, row) + \ + COL_TERM(1, row) + \ + COL_TERM(2, row) + \ + COL_TERM(3, row))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); + + /* Actual interpolation. */ + const T *data = (const T*)info.data; + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + +#undef COL_TERM +#undef ROW_TERM +#undef DATA + } + + static ccl_always_inline float4 interp_3d(const TextureInfo& info, + float x, float y, float z, + InterpolationType interp) + { + if(UNLIKELY(!info.data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) { + case INTERPOLATION_CLOSEST: + return interp_3d_closest(info, x, y, z); + case INTERPOLATION_LINEAR: + return interp_3d_linear(info, x, y, z); + default: + return interp_3d_tricubic(info, x, y, z); + } + } +#undef SET_CUBIC_SPLINE_WEIGHTS +}; + +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { - switch(kernel_tex_type(tex)) { + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); + + switch(kernel_tex_type(id)) { case IMAGE_DATA_TYPE_HALF: - return kg->texture_half_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator::interp(info, x, y); case IMAGE_DATA_TYPE_BYTE: - return kg->texture_byte_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator::interp(info, x, y); case IMAGE_DATA_TYPE_FLOAT: - return kg->texture_float_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator::interp(info, x, y); case IMAGE_DATA_TYPE_HALF4: - return kg->texture_half4_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator::interp(info, x, y); case IMAGE_DATA_TYPE_BYTE4: - return kg->texture_byte4_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator::interp(info, x, y); case IMAGE_DATA_TYPE_FLOAT4: default: - return kg->texture_float4_images[kernel_tex_index(tex)].interp(x, y); + return TextureInterpolator::interp(info, x, y); } } -ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) { - switch(kernel_tex_type(tex)) { - case IMAGE_DATA_TYPE_HALF: - return kg->texture_half_images[kernel_tex_index(tex)].interp_3d(x, y, z); - case IMAGE_DATA_TYPE_BYTE: - return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d(x, y, z); - case IMAGE_DATA_TYPE_FLOAT: - return kg->texture_float_images[kernel_tex_index(tex)].interp_3d(x, y, z); - case IMAGE_DATA_TYPE_HALF4: - return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d(x, y, z); - case IMAGE_DATA_TYPE_BYTE4: - return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d(x, y, z); - case IMAGE_DATA_TYPE_FLOAT4: - default: - return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d(x, y, z); - } -} + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); -ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation) -{ - switch(kernel_tex_type(tex)) { + switch(kernel_tex_type(id)) { case IMAGE_DATA_TYPE_HALF: - return kg->texture_half_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_BYTE: - return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_FLOAT: - return kg->texture_float_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_HALF4: - return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_BYTE4: - return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator::interp_3d(info, x, y, z, interp); case IMAGE_DATA_TYPE_FLOAT4: default: - return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + return TextureInterpolator::interp_3d(info, x, y, z, interp); } } CCL_NAMESPACE_END -#endif // __KERNEL_CPU__ - - #endif // __KERNEL_CPU_IMAGE_H__ diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 1ac6afd167a..3c93e00ccf1 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -26,6 +26,7 @@ #include "kernel/kernel_math.h" #include "kernel/kernel_types.h" #include "kernel/kernel_globals.h" +#include "kernel/kernels/cuda/kernel_cuda_image.h" #include "kernel/kernel_film.h" #include "kernel/kernel_path.h" #include "kernel/kernel_path_branched.h" diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h new file mode 100644 index 00000000000..269e74f6164 --- /dev/null +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -0,0 +1,310 @@ +/* + * Copyright 2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if __CUDA_ARCH__ >= 300 + +/* Kepler */ + +/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ +ccl_device float cubic_w0(float a) +{ + return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f); +} + +ccl_device float cubic_w1(float a) +{ + return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f); +} + +ccl_device float cubic_w2(float a) +{ + return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f); +} + +ccl_device float cubic_w3(float a) +{ + return (1.0f/6.0f)*(a*a*a); +} + +/* g0 and g1 are the two amplitude functions. */ +ccl_device float cubic_g0(float a) +{ + return cubic_w0(a) + cubic_w1(a); +} + +ccl_device float cubic_g1(float a) +{ + return cubic_w2(a) + cubic_w3(a); +} + +/* h0 and h1 are the two offset functions */ +ccl_device float cubic_h0(float a) +{ + /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ + return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f; +} + +ccl_device float cubic_h1(float a) +{ + return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; +} + +/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ +template +ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y) +{ + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + + float px = floor(x); + float py = floor(y); + float fx = x - px; + float fy = y - py; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float x0 = (px + cubic_h0(fx)) / info.width; + float x1 = (px + cubic_h1(fx)) / info.width; + float y0 = (py + cubic_h0(fy)) / info.height; + float y1 = (py + cubic_h1(fy)) / info.height; + + return cubic_g0(fy) * (g0x * tex2D(tex, x0, y0) + + g1x * tex2D(tex, x1, y0)) + + cubic_g1(fy) * (g0x * tex2D(tex, x0, y1) + + g1x * tex2D(tex, x1, y1)); +} + +/* Fast tricubic texture lookup using 8 bilinear lookups. */ +template +ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z) +{ + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + z = (z * info.depth) - 0.5f; + + float px = floor(x); + float py = floor(y); + float pz = floor(z); + float fx = x - px; + float fy = y - py; + float fz = z - pz; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float g0y = cubic_g0(fy); + float g1y = cubic_g1(fy); + float g0z = cubic_g0(fz); + float g1z = cubic_g1(fz); + + float x0 = (px + cubic_h0(fx)) / info.width; + float x1 = (px + cubic_h1(fx)) / info.width; + float y0 = (py + cubic_h0(fy)) / info.height; + float y1 = (py + cubic_h1(fy)) / info.height; + float z0 = (pz + cubic_h0(fz)) / info.depth; + float z1 = (pz + cubic_h1(fz)) / info.depth; + + return g0z * (g0y * (g0x * tex3D(tex, x0, y0, z0) + + g1x * tex3D(tex, x1, y0, z0)) + + g1y * (g0x * tex3D(tex, x0, y1, z0) + + g1x * tex3D(tex, x1, y1, z0))) + + g1z * (g0y * (g0x * tex3D(tex, x0, y0, z1) + + g1x * tex3D(tex, x1, y0, z1)) + + g1y * (g0x * tex3D(tex, x0, y1, z1) + + g1x * tex3D(tex, x1, y1, z1))); +} + +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) +{ + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); + CUtexObject tex = (CUtexObject)info.data; + + /* float4, byte4 and half4 */ + const int texture_type = kernel_tex_type(id); + if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || + texture_type == IMAGE_DATA_TYPE_BYTE4 || + texture_type == IMAGE_DATA_TYPE_HALF4) + { + if(info.interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic(info, tex, x, y); + } + else { + return tex2D(tex, x, y); + } + } + /* float, byte and half */ + else { + float f; + + if(info.interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic(info, tex, x, y); + } + else { + f = tex2D(tex, x, y); + } + + return make_float4(f, f, f, 1.0f); + } +} + +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) +{ + const TextureInfo& info = kernel_tex_fetch(__texture_info, id); + CUtexObject tex = (CUtexObject)info.data; + uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp; + + const int texture_type = kernel_tex_type(id); + if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || + texture_type == IMAGE_DATA_TYPE_BYTE4 || + texture_type == IMAGE_DATA_TYPE_HALF4) + { + if(interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic_3d(info, tex, x, y, z); + } + else { + return tex3D(tex, x, y, z); + } + } + else { + float f; + + if(interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic_3d(info, tex, x, y, z); + } + else { + f = tex3D(tex, x, y, z); + } + + return make_float4(f, f, f, 1.0f); + } +} + +#else + +/* Fermi */ + +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) +{ + float4 r; + switch(id) { + case 0: r = tex2D(__tex_image_float4_000, x, y); break; + case 8: r = tex2D(__tex_image_float4_008, x, y); break; + case 16: r = tex2D(__tex_image_float4_016, x, y); break; + case 24: r = tex2D(__tex_image_float4_024, x, y); break; + case 32: r = tex2D(__tex_image_float4_032, x, y); break; + case 1: r = tex2D(__tex_image_byte4_001, x, y); break; + case 9: r = tex2D(__tex_image_byte4_009, x, y); break; + case 17: r = tex2D(__tex_image_byte4_017, x, y); break; + case 25: r = tex2D(__tex_image_byte4_025, x, y); break; + case 33: r = tex2D(__tex_image_byte4_033, x, y); break; + case 41: r = tex2D(__tex_image_byte4_041, x, y); break; + case 49: r = tex2D(__tex_image_byte4_049, x, y); break; + case 57: r = tex2D(__tex_image_byte4_057, x, y); break; + case 65: r = tex2D(__tex_image_byte4_065, x, y); break; + case 73: r = tex2D(__tex_image_byte4_073, x, y); break; + case 81: r = tex2D(__tex_image_byte4_081, x, y); break; + case 89: r = tex2D(__tex_image_byte4_089, x, y); break; + case 97: r = tex2D(__tex_image_byte4_097, x, y); break; + case 105: r = tex2D(__tex_image_byte4_105, x, y); break; + case 113: r = tex2D(__tex_image_byte4_113, x, y); break; + case 121: r = tex2D(__tex_image_byte4_121, x, y); break; + case 129: r = tex2D(__tex_image_byte4_129, x, y); break; + case 137: r = tex2D(__tex_image_byte4_137, x, y); break; + case 145: r = tex2D(__tex_image_byte4_145, x, y); break; + case 153: r = tex2D(__tex_image_byte4_153, x, y); break; + case 161: r = tex2D(__tex_image_byte4_161, x, y); break; + case 169: r = tex2D(__tex_image_byte4_169, x, y); break; + case 177: r = tex2D(__tex_image_byte4_177, x, y); break; + case 185: r = tex2D(__tex_image_byte4_185, x, y); break; + case 193: r = tex2D(__tex_image_byte4_193, x, y); break; + case 201: r = tex2D(__tex_image_byte4_201, x, y); break; + case 209: r = tex2D(__tex_image_byte4_209, x, y); break; + case 217: r = tex2D(__tex_image_byte4_217, x, y); break; + case 225: r = tex2D(__tex_image_byte4_225, x, y); break; + case 233: r = tex2D(__tex_image_byte4_233, x, y); break; + case 241: r = tex2D(__tex_image_byte4_241, x, y); break; + case 249: r = tex2D(__tex_image_byte4_249, x, y); break; + case 257: r = tex2D(__tex_image_byte4_257, x, y); break; + case 265: r = tex2D(__tex_image_byte4_265, x, y); break; + case 273: r = tex2D(__tex_image_byte4_273, x, y); break; + case 281: r = tex2D(__tex_image_byte4_281, x, y); break; + case 289: r = tex2D(__tex_image_byte4_289, x, y); break; + case 297: r = tex2D(__tex_image_byte4_297, x, y); break; + case 305: r = tex2D(__tex_image_byte4_305, x, y); break; + case 313: r = tex2D(__tex_image_byte4_313, x, y); break; + case 321: r = tex2D(__tex_image_byte4_321, x, y); break; + case 329: r = tex2D(__tex_image_byte4_329, x, y); break; + case 337: r = tex2D(__tex_image_byte4_337, x, y); break; + case 345: r = tex2D(__tex_image_byte4_345, x, y); break; + case 353: r = tex2D(__tex_image_byte4_353, x, y); break; + case 361: r = tex2D(__tex_image_byte4_361, x, y); break; + case 369: r = tex2D(__tex_image_byte4_369, x, y); break; + case 377: r = tex2D(__tex_image_byte4_377, x, y); break; + case 385: r = tex2D(__tex_image_byte4_385, x, y); break; + case 393: r = tex2D(__tex_image_byte4_393, x, y); break; + case 401: r = tex2D(__tex_image_byte4_401, x, y); break; + case 409: r = tex2D(__tex_image_byte4_409, x, y); break; + case 417: r = tex2D(__tex_image_byte4_417, x, y); break; + case 425: r = tex2D(__tex_image_byte4_425, x, y); break; + case 433: r = tex2D(__tex_image_byte4_433, x, y); break; + case 441: r = tex2D(__tex_image_byte4_441, x, y); break; + case 449: r = tex2D(__tex_image_byte4_449, x, y); break; + case 457: r = tex2D(__tex_image_byte4_457, x, y); break; + case 465: r = tex2D(__tex_image_byte4_465, x, y); break; + case 473: r = tex2D(__tex_image_byte4_473, x, y); break; + case 481: r = tex2D(__tex_image_byte4_481, x, y); break; + case 489: r = tex2D(__tex_image_byte4_489, x, y); break; + case 497: r = tex2D(__tex_image_byte4_497, x, y); break; + case 505: r = tex2D(__tex_image_byte4_505, x, y); break; + case 513: r = tex2D(__tex_image_byte4_513, x, y); break; + case 521: r = tex2D(__tex_image_byte4_521, x, y); break; + case 529: r = tex2D(__tex_image_byte4_529, x, y); break; + case 537: r = tex2D(__tex_image_byte4_537, x, y); break; + case 545: r = tex2D(__tex_image_byte4_545, x, y); break; + case 553: r = tex2D(__tex_image_byte4_553, x, y); break; + case 561: r = tex2D(__tex_image_byte4_561, x, y); break; + case 569: r = tex2D(__tex_image_byte4_569, x, y); break; + case 577: r = tex2D(__tex_image_byte4_577, x, y); break; + case 585: r = tex2D(__tex_image_byte4_585, x, y); break; + case 593: r = tex2D(__tex_image_byte4_593, x, y); break; + case 601: r = tex2D(__tex_image_byte4_601, x, y); break; + case 609: r = tex2D(__tex_image_byte4_609, x, y); break; + case 617: r = tex2D(__tex_image_byte4_617, x, y); break; + case 625: r = tex2D(__tex_image_byte4_625, x, y); break; + case 633: r = tex2D(__tex_image_byte4_633, x, y); break; + case 641: r = tex2D(__tex_image_byte4_641, x, y); break; + case 649: r = tex2D(__tex_image_byte4_649, x, y); break; + case 657: r = tex2D(__tex_image_byte4_657, x, y); break; + case 665: r = tex2D(__tex_image_byte4_665, x, y); break; + default: r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return r; +} + +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) +{ + float4 r; + switch(id) { + case 0: r = tex3D(__tex_image_float4_3d_000, x, y, z); break; + case 8: r = tex3D(__tex_image_float4_3d_008, x, y, z); break; + case 16: r = tex3D(__tex_image_float4_3d_016, x, y, z); break; + case 24: r = tex3D(__tex_image_float4_3d_024, x, y, z); break; + case 32: r = tex3D(__tex_image_float4_3d_032, x, y, z); break; + } + return r; +} + +#endif + diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 66b6e19de84..9d5d784e140 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -20,7 +20,7 @@ #include "kernel/kernel_math.h" #include "kernel/kernel_types.h" #include "kernel/kernel_globals.h" -#include "kernel/kernel_image_opencl.h" +#include "kernel/kernels/opencl/kernel_opencl_image.h" #include "kernel/kernel_film.h" diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h new file mode 100644 index 00000000000..d908af78c7a --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -0,0 +1,341 @@ +/* + * Copyright 2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* For OpenCL we do manual lookup and interpolation. */ + +ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) { + const uint tex_offset = id +#define KERNEL_TEX(type, name) + 1 +#include "kernel/kernel_textures.h" + ; + + return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset]; +} + +#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)] + +ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) +{ + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + const int texture_type = kernel_tex_type(id); + + /* Float4 */ + if(texture_type == IMAGE_DATA_TYPE_FLOAT4) { + return tex_fetch(float4, info, offset); + } + /* Byte4 */ + else if(texture_type == IMAGE_DATA_TYPE_BYTE4) { + uchar4 r = tex_fetch(uchar4, info, offset); + float f = 1.0f/255.0f; + return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); + } + /* Float */ + else if(texture_type == IMAGE_DATA_TYPE_FLOAT) { + float f = tex_fetch(float, info, offset); + return make_float4(f, f, f, 1.0f); + } + /* Byte */ + else { + uchar r = tex_fetch(uchar, info, offset); + float f = r * (1.0f/255.0f); + return make_float4(f, f, f, 1.0f); + } +} + +ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) +{ + x %= width; + if(x < 0) + x += width; + return x; +} + +ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width) +{ + return clamp(x, 0, width-1); +} + +ccl_device_inline float svm_image_texture_frac(float x, int *ix) +{ + int i = float_to_int(x) - ((x < 0.0f)? 1: 0); + *ix = i; + return x - (float)i; +} + +#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ + { \ + u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ + u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ + u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } (void)0 + +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) +{ + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + + uint width = info->width; + uint height = info->height; + uint interpolation = info->interpolation; + uint extension = info->extension; + + /* Actual sampling. */ + if(interpolation == INTERPOLATION_CLOSEST) { + int ix, iy; + svm_image_texture_frac(x*width, &ix); + svm_image_texture_frac(y*height, &iy); + + if(extension == EXTENSION_REPEAT) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + } + else { + if(extension == EXTENSION_CLIP) { + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + /* Fall through. */ + /* EXTENSION_EXTEND */ + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + } + + return svm_image_texture_read(kg, id, ix + iy*width); + } + else { + /* Bilinear or bicubic interpolation. */ + int ix, iy, nix, niy; + float tx = svm_image_texture_frac(x*width - 0.5f, &ix); + float ty = svm_image_texture_frac(y*height - 0.5f, &iy); + + if(extension == EXTENSION_REPEAT) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + nix = svm_image_texture_wrap_periodic(ix+1, width); + niy = svm_image_texture_wrap_periodic(iy+1, height); + } + else { + if(extension == EXTENSION_CLIP) { + if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + nix = svm_image_texture_wrap_clamp(ix+1, width); + niy = svm_image_texture_wrap_clamp(iy+1, height); + } + + if(interpolation == INTERPOLATION_LINEAR) { + /* Bilinear interpolation. */ + float4 r; + r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width); + r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width); + r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width); + r += ty*tx*svm_image_texture_read(kg, id, nix + niy*width); + return r; + } + + /* Bicubic interpolation. */ + int pix, piy, nnix, nniy; + if(extension == EXTENSION_REPEAT) { + pix = svm_image_texture_wrap_periodic(ix-1, width); + piy = svm_image_texture_wrap_periodic(iy-1, height); + nnix = svm_image_texture_wrap_periodic(ix+2, width); + nniy = svm_image_texture_wrap_periodic(iy+2, height); + } + else { + pix = svm_image_texture_wrap_clamp(ix-1, width); + piy = svm_image_texture_wrap_clamp(iy-1, height); + nnix = svm_image_texture_wrap_clamp(ix+2, width); + nniy = svm_image_texture_wrap_clamp(iy+2, height); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + float u[4], v[4]; + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define DATA(x, y) (svm_image_texture_read(kg, id, xc[x] + yc[y])) +#define TERM(col) \ + (v[col] * (u[0] * DATA(0, col) + \ + u[1] * DATA(1, col) + \ + u[2] * DATA(2, col) + \ + u[3] * DATA(3, col))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + + /* Actual interpolation. */ + return TERM(0) + TERM(1) + TERM(2) + TERM(3); +#undef TERM +#undef DATA + } +} + + +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp) +{ + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + + uint width = info->width; + uint height = info->height; + uint depth = info->depth; + uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp; + uint extension = info->extension; + + /* Actual sampling. */ + if(interpolation == INTERPOLATION_CLOSEST) { + int ix, iy, iz; + svm_image_texture_frac(x*width, &ix); + svm_image_texture_frac(y*height, &iy); + svm_image_texture_frac(z*depth, &iz); + + if(extension == EXTENSION_REPEAT) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + iz = svm_image_texture_wrap_periodic(iz, depth); + } + else { + if(extension == EXTENSION_CLIP) { + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + /* Fall through. */ + /* EXTENSION_EXTEND */ + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + iz = svm_image_texture_wrap_clamp(iz, depth); + } + return svm_image_texture_read(kg, id, ix + iy*width + iz*width*height); + } + else { + /* Bilinear or bicubic interpolation. */ + int ix, iy, iz, nix, niy, niz; + float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix); + float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy); + float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz); + + if(extension == EXTENSION_REPEAT) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + iz = svm_image_texture_wrap_periodic(iz, depth); + + nix = svm_image_texture_wrap_periodic(ix+1, width); + niy = svm_image_texture_wrap_periodic(iy+1, height); + niz = svm_image_texture_wrap_periodic(iz+1, depth); + } + else { + if(extension == EXTENSION_CLIP) { + if(x < 0.0f || y < 0.0f || z < 0.0f || + x > 1.0f || y > 1.0f || z > 1.0f) + { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + /* Fall through. */ + /* EXTENSION_EXTEND */ + nix = svm_image_texture_wrap_clamp(ix+1, width); + niy = svm_image_texture_wrap_clamp(iy+1, height); + niz = svm_image_texture_wrap_clamp(iz+1, depth); + + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + iz = svm_image_texture_wrap_clamp(iz, depth); + } + + if(interpolation == INTERPOLATION_LINEAR) { + /* Bilinear interpolation. */ + float4 r; + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width + iz*width*height); + r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width + iz*width*height); + r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width + iz*width*height); + r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, nix + niy*width + iz*width*height); + + r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width + niz*width*height); + r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width + niz*width*height); + r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width + niz*width*height); + r += tz*ty*tx*svm_image_texture_read(kg, id, nix + niy*width + niz*width*height); + return r; + } + + /* Bicubic interpolation. */ + int pix, piy, piz, nnix, nniy, nniz; + if(extension == EXTENSION_REPEAT) { + pix = svm_image_texture_wrap_periodic(ix-1, width); + piy = svm_image_texture_wrap_periodic(iy-1, height); + piz = svm_image_texture_wrap_periodic(iz-1, depth); + nnix = svm_image_texture_wrap_periodic(ix+2, width); + nniy = svm_image_texture_wrap_periodic(iy+2, height); + nniz = svm_image_texture_wrap_periodic(iz+2, depth); + } + else { + pix = svm_image_texture_wrap_clamp(ix-1, width); + piy = svm_image_texture_wrap_clamp(iy-1, height); + piz = svm_image_texture_wrap_clamp(iz-1, depth); + nnix = svm_image_texture_wrap_clamp(ix+2, width); + nniy = svm_image_texture_wrap_clamp(iy+2, height); + nniz = svm_image_texture_wrap_clamp(iz+2, depth); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + const int zc[4] = {width * height * piz, + width * height * iz, + width * height * niz, + width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define DATA(x, y, z) (svm_image_texture_read(kg, id, xc[x] + yc[y] + zc[z])) +#define COL_TERM(col, row) \ + (v[col] * (u[0] * DATA(0, col, row) + \ + u[1] * DATA(1, col, row) + \ + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) +#define ROW_TERM(row) \ + (w[row] * (COL_TERM(0, row) + \ + COL_TERM(1, row) + \ + COL_TERM(2, row) + \ + COL_TERM(3, row))) + + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); + + /* Actual interpolation. */ + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + +#undef COL_TERM +#undef ROW_TERM +#undef DATA + } +} + +#undef SET_CUBIC_SPLINE_WEIGHTS diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 8ad2e12b067..8ae004031e1 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -962,7 +962,7 @@ bool OSLRenderServices::texture(ustring filename, if(filename.length() && filename[0] == '@') { int slot = atoi(filename.c_str() + 1); - float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t); + float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t); result[0] = rgba[0]; if(nchannels > 1) @@ -1043,7 +1043,7 @@ bool OSLRenderServices::texture3d(ustring filename, bool status; if(filename.length() && filename[0] == '@') { int slot = atoi(filename.c_str() + 1); - float4 rgba = kernel_tex_image_interp_3d(slot, P.x, P.y, P.z); + float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE); result[0] = rgba[0]; if(nchannels > 1) diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index 558d327bc76..21886ee62ee 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -29,7 +29,10 @@ #endif #ifdef __KERNEL_OPENCL__ -# include "kernel/kernel_image_opencl.h" +# include "kernel/kernels/opencl/kernel_opencl_image.h" +#endif +#ifdef __KERNEL_CUDA__ +# include "kernel/kernels/cuda/kernel_cuda_image.h" #endif #ifdef __KERNEL_CPU__ # include "kernel/kernels/cpu/kernel_cpu_image.h" diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 6d6e92e73f6..4226e7adfe0 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -18,135 +18,7 @@ CCL_NAMESPACE_BEGIN ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha) { -#ifdef __KERNEL_CPU__ - float4 r = kernel_tex_image_interp(id, x, y); -#elif defined(__KERNEL_OPENCL__) float4 r = kernel_tex_image_interp(kg, id, x, y); -#else - float4 r; - -# if __CUDA_ARCH__ < 300 - /* not particularly proud of this massive switch, what are the - * alternatives? - * - use a single big 1D texture, and do our own lookup/filtering - * - group by size and use a 3d texture, performance impact - * - group into larger texture with some padding for correct lerp - * - * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler), - * and we cannot use all since we still need some for other storage */ - - switch(id) { - case 0: r = kernel_tex_image_interp(__tex_image_float4_000, x, y); break; - case 8: r = kernel_tex_image_interp(__tex_image_float4_008, x, y); break; - case 16: r = kernel_tex_image_interp(__tex_image_float4_016, x, y); break; - case 24: r = kernel_tex_image_interp(__tex_image_float4_024, x, y); break; - case 32: r = kernel_tex_image_interp(__tex_image_float4_032, x, y); break; - case 1: r = kernel_tex_image_interp(__tex_image_byte4_001, x, y); break; - case 9: r = kernel_tex_image_interp(__tex_image_byte4_009, x, y); break; - case 17: r = kernel_tex_image_interp(__tex_image_byte4_017, x, y); break; - case 25: r = kernel_tex_image_interp(__tex_image_byte4_025, x, y); break; - case 33: r = kernel_tex_image_interp(__tex_image_byte4_033, x, y); break; - case 41: r = kernel_tex_image_interp(__tex_image_byte4_041, x, y); break; - case 49: r = kernel_tex_image_interp(__tex_image_byte4_049, x, y); break; - case 57: r = kernel_tex_image_interp(__tex_image_byte4_057, x, y); break; - case 65: r = kernel_tex_image_interp(__tex_image_byte4_065, x, y); break; - case 73: r = kernel_tex_image_interp(__tex_image_byte4_073, x, y); break; - case 81: r = kernel_tex_image_interp(__tex_image_byte4_081, x, y); break; - case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break; - case 97: r = kernel_tex_image_interp(__tex_image_byte4_097, x, y); break; - case 105: r = kernel_tex_image_interp(__tex_image_byte4_105, x, y); break; - case 113: r = kernel_tex_image_interp(__tex_image_byte4_113, x, y); break; - case 121: r = kernel_tex_image_interp(__tex_image_byte4_121, x, y); break; - case 129: r = kernel_tex_image_interp(__tex_image_byte4_129, x, y); break; - case 137: r = kernel_tex_image_interp(__tex_image_byte4_137, x, y); break; - case 145: r = kernel_tex_image_interp(__tex_image_byte4_145, x, y); break; - case 153: r = kernel_tex_image_interp(__tex_image_byte4_153, x, y); break; - case 161: r = kernel_tex_image_interp(__tex_image_byte4_161, x, y); break; - case 169: r = kernel_tex_image_interp(__tex_image_byte4_169, x, y); break; - case 177: r = kernel_tex_image_interp(__tex_image_byte4_177, x, y); break; - case 185: r = kernel_tex_image_interp(__tex_image_byte4_185, x, y); break; - case 193: r = kernel_tex_image_interp(__tex_image_byte4_193, x, y); break; - case 201: r = kernel_tex_image_interp(__tex_image_byte4_201, x, y); break; - case 209: r = kernel_tex_image_interp(__tex_image_byte4_209, x, y); break; - case 217: r = kernel_tex_image_interp(__tex_image_byte4_217, x, y); break; - case 225: r = kernel_tex_image_interp(__tex_image_byte4_225, x, y); break; - case 233: r = kernel_tex_image_interp(__tex_image_byte4_233, x, y); break; - case 241: r = kernel_tex_image_interp(__tex_image_byte4_241, x, y); break; - case 249: r = kernel_tex_image_interp(__tex_image_byte4_249, x, y); break; - case 257: r = kernel_tex_image_interp(__tex_image_byte4_257, x, y); break; - case 265: r = kernel_tex_image_interp(__tex_image_byte4_265, x, y); break; - case 273: r = kernel_tex_image_interp(__tex_image_byte4_273, x, y); break; - case 281: r = kernel_tex_image_interp(__tex_image_byte4_281, x, y); break; - case 289: r = kernel_tex_image_interp(__tex_image_byte4_289, x, y); break; - case 297: r = kernel_tex_image_interp(__tex_image_byte4_297, x, y); break; - case 305: r = kernel_tex_image_interp(__tex_image_byte4_305, x, y); break; - case 313: r = kernel_tex_image_interp(__tex_image_byte4_313, x, y); break; - case 321: r = kernel_tex_image_interp(__tex_image_byte4_321, x, y); break; - case 329: r = kernel_tex_image_interp(__tex_image_byte4_329, x, y); break; - case 337: r = kernel_tex_image_interp(__tex_image_byte4_337, x, y); break; - case 345: r = kernel_tex_image_interp(__tex_image_byte4_345, x, y); break; - case 353: r = kernel_tex_image_interp(__tex_image_byte4_353, x, y); break; - case 361: r = kernel_tex_image_interp(__tex_image_byte4_361, x, y); break; - case 369: r = kernel_tex_image_interp(__tex_image_byte4_369, x, y); break; - case 377: r = kernel_tex_image_interp(__tex_image_byte4_377, x, y); break; - case 385: r = kernel_tex_image_interp(__tex_image_byte4_385, x, y); break; - case 393: r = kernel_tex_image_interp(__tex_image_byte4_393, x, y); break; - case 401: r = kernel_tex_image_interp(__tex_image_byte4_401, x, y); break; - case 409: r = kernel_tex_image_interp(__tex_image_byte4_409, x, y); break; - case 417: r = kernel_tex_image_interp(__tex_image_byte4_417, x, y); break; - case 425: r = kernel_tex_image_interp(__tex_image_byte4_425, x, y); break; - case 433: r = kernel_tex_image_interp(__tex_image_byte4_433, x, y); break; - case 441: r = kernel_tex_image_interp(__tex_image_byte4_441, x, y); break; - case 449: r = kernel_tex_image_interp(__tex_image_byte4_449, x, y); break; - case 457: r = kernel_tex_image_interp(__tex_image_byte4_457, x, y); break; - case 465: r = kernel_tex_image_interp(__tex_image_byte4_465, x, y); break; - case 473: r = kernel_tex_image_interp(__tex_image_byte4_473, x, y); break; - case 481: r = kernel_tex_image_interp(__tex_image_byte4_481, x, y); break; - case 489: r = kernel_tex_image_interp(__tex_image_byte4_489, x, y); break; - case 497: r = kernel_tex_image_interp(__tex_image_byte4_497, x, y); break; - case 505: r = kernel_tex_image_interp(__tex_image_byte4_505, x, y); break; - case 513: r = kernel_tex_image_interp(__tex_image_byte4_513, x, y); break; - case 521: r = kernel_tex_image_interp(__tex_image_byte4_521, x, y); break; - case 529: r = kernel_tex_image_interp(__tex_image_byte4_529, x, y); break; - case 537: r = kernel_tex_image_interp(__tex_image_byte4_537, x, y); break; - case 545: r = kernel_tex_image_interp(__tex_image_byte4_545, x, y); break; - case 553: r = kernel_tex_image_interp(__tex_image_byte4_553, x, y); break; - case 561: r = kernel_tex_image_interp(__tex_image_byte4_561, x, y); break; - case 569: r = kernel_tex_image_interp(__tex_image_byte4_569, x, y); break; - case 577: r = kernel_tex_image_interp(__tex_image_byte4_577, x, y); break; - case 585: r = kernel_tex_image_interp(__tex_image_byte4_585, x, y); break; - case 593: r = kernel_tex_image_interp(__tex_image_byte4_593, x, y); break; - case 601: r = kernel_tex_image_interp(__tex_image_byte4_601, x, y); break; - case 609: r = kernel_tex_image_interp(__tex_image_byte4_609, x, y); break; - case 617: r = kernel_tex_image_interp(__tex_image_byte4_617, x, y); break; - case 625: r = kernel_tex_image_interp(__tex_image_byte4_625, x, y); break; - case 633: r = kernel_tex_image_interp(__tex_image_byte4_633, x, y); break; - case 641: r = kernel_tex_image_interp(__tex_image_byte4_641, x, y); break; - case 649: r = kernel_tex_image_interp(__tex_image_byte4_649, x, y); break; - case 657: r = kernel_tex_image_interp(__tex_image_byte4_657, x, y); break; - case 665: r = kernel_tex_image_interp(__tex_image_byte4_665, x, y); break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } -# else - CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); - /* float4, byte4 and half4 */ - const int texture_type = kernel_tex_type(id); - if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || - texture_type == IMAGE_DATA_TYPE_BYTE4 || - texture_type == IMAGE_DATA_TYPE_HALF4) - { - r = kernel_tex_image_interp_float4(tex, x, y); - } - /* float, byte and half */ - else { - float f = kernel_tex_image_interp_float(tex, x, y); - r = make_float4(f, f, f, 1.0f); - } -# endif -#endif - const float alpha = r.w; if(use_alpha && alpha != 1.0f && alpha != 0.0f) { diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index f4a5b2b2994..d967516a5c9 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -42,29 +42,8 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg, tfm.w = read_node_float(kg, offset); co = transform_point(&tfm, co); } - float4 r; -# if defined(__KERNEL_CUDA__) -# if __CUDA_ARCH__ >= 300 - CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); - const int texture_type = kernel_tex_type(id); - if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || - texture_type == IMAGE_DATA_TYPE_BYTE4 || - texture_type == IMAGE_DATA_TYPE_HALF4) - { - r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z); - } - else { - float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z); - r = make_float4(f, f, f, 1.0f); - } -# else /* __CUDA_ARCH__ >= 300 */ - r = volume_image_texture_3d(id, co.x, co.y, co.z); -# endif -# elif defined(__KERNEL_OPENCL__) - r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z); -# else - r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z); -# endif /* __KERNEL_CUDA__ */ + + float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE); #else float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); #endif diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index df255f43059..cec03dc5e6e 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -46,12 +46,64 @@ CCL_NAMESPACE_BEGIN #if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300) # define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4) -# define kernel_tex_index(tex) (tex) #else # define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK) -# define kernel_tex_index(tex) (tex >> IMAGE_DATA_TYPE_SHIFT) #endif +/* Interpolation types for textures + * cuda also use texture space to store other objects */ +typedef enum InterpolationType { + INTERPOLATION_NONE = -1, + INTERPOLATION_LINEAR = 0, + INTERPOLATION_CLOSEST = 1, + INTERPOLATION_CUBIC = 2, + INTERPOLATION_SMART = 3, + + INTERPOLATION_NUM_TYPES, +} InterpolationType; + +/* Texture types + * Since we store the type in the lower bits of a flat index, + * the shift and bit mask constant below need to be kept in sync. */ +typedef enum ImageDataType { + IMAGE_DATA_TYPE_FLOAT4 = 0, + IMAGE_DATA_TYPE_BYTE4 = 1, + IMAGE_DATA_TYPE_HALF4 = 2, + IMAGE_DATA_TYPE_FLOAT = 3, + IMAGE_DATA_TYPE_BYTE = 4, + IMAGE_DATA_TYPE_HALF = 5, + + IMAGE_DATA_NUM_TYPES +} ImageDataType; + +#define IMAGE_DATA_TYPE_SHIFT 3 +#define IMAGE_DATA_TYPE_MASK 0x7 + +/* Extension types for textures. + * + * Defines how the image is extrapolated past its original bounds. */ +typedef enum ExtensionType { + /* Cause the image to repeat horizontally and vertically. */ + EXTENSION_REPEAT = 0, + /* Extend by repeating edge pixels of the image. */ + EXTENSION_EXTEND = 1, + /* Clip to image size and set exterior pixels as transparent. */ + EXTENSION_CLIP = 2, + + EXTENSION_NUM_TYPES, +} ExtensionType; + +typedef struct TextureInfo { + /* Pointer, offset or texture depending on device. */ + uint64_t data; + /* Buffer number for OpenCL. */ + uint cl_buffer; + /* Interpolation and extension type. */ + uint interpolation, extension; + /* Dimensions. */ + uint width, height, depth; +} TextureInfo; + CCL_NAMESPACE_END #endif /* __UTIL_TEXTURE_H__ */ diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index aabca6c81fc..84206a7ba5a 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -101,52 +101,6 @@ ccl_device_inline size_t round_down(size_t x, size_t multiple) return (x / multiple) * multiple; } -/* Interpolation types for textures - * cuda also use texture space to store other objects */ -enum InterpolationType { - INTERPOLATION_NONE = -1, - INTERPOLATION_LINEAR = 0, - INTERPOLATION_CLOSEST = 1, - INTERPOLATION_CUBIC = 2, - INTERPOLATION_SMART = 3, - - INTERPOLATION_NUM_TYPES, -}; - -/* Texture types - * Since we store the type in the lower bits of a flat index, - * the shift and bit mask constant below need to be kept in sync. - */ - -enum ImageDataType { - IMAGE_DATA_TYPE_FLOAT4 = 0, - IMAGE_DATA_TYPE_BYTE4 = 1, - IMAGE_DATA_TYPE_HALF4 = 2, - IMAGE_DATA_TYPE_FLOAT = 3, - IMAGE_DATA_TYPE_BYTE = 4, - IMAGE_DATA_TYPE_HALF = 5, - - IMAGE_DATA_NUM_TYPES -}; - -#define IMAGE_DATA_TYPE_SHIFT 3 -#define IMAGE_DATA_TYPE_MASK 0x7 - -/* Extension types for textures. - * - * Defines how the image is extrapolated past its original bounds. - */ -enum ExtensionType { - /* Cause the image to repeat horizontally and vertically. */ - EXTENSION_REPEAT = 0, - /* Extend by repeating edge pixels of the image. */ - EXTENSION_EXTEND = 1, - /* Clip to image size and set exterior pixels as transparent. */ - EXTENSION_CLIP = 2, - - EXTENSION_NUM_TYPES, -}; - CCL_NAMESPACE_END /* Vectorized types declaration. */ diff --git a/source/blender/editors/sculpt_paint/paint_vertex.c b/source/blender/editors/sculpt_paint/paint_vertex.c index fd88ea2d15f..3fa1eda5d1e 100644 --- a/source/blender/editors/sculpt_paint/paint_vertex.c +++ b/source/blender/editors/sculpt_paint/paint_vertex.c @@ -149,6 +149,10 @@ static bool vwpaint_use_normal(const VPaint *vp) ((vp->paint.brush->flag & BRUSH_FRONTFACE_FALLOFF) != 0); } +static bool brush_use_accumulate(const Brush *brush) +{ + return (brush->flag & BRUSH_ACCUMULATE) != 0 || brush->vertexpaint_tool == PAINT_BLEND_SMEAR; +} static MDeformVert *defweight_prev_init(MDeformVert *dvert_prev, MDeformVert *dvert_curr, int index) { @@ -272,7 +276,7 @@ static uint vpaint_blend( uint color_blend = ED_vpaint_blend_tool(tool, color_curr, color_paint, alpha_i); /* if no accumulate, clip color adding with colorig & orig alpha */ - if ((brush->flag & BRUSH_ACCUMULATE) == 0) { + if (!brush_use_accumulate(brush)) { uint color_test, a; char *cp, *ct, *co; @@ -784,7 +788,7 @@ static void do_weight_paint_vertex_single( dw_mirr = NULL; } - if ((wp->paint.brush->flag & BRUSH_ACCUMULATE) == 0) { + if (!brush_use_accumulate(wp->paint.brush)) { MDeformVert *dvert_prev = ob->sculpt->mode.wpaint.dvert_prev; MDeformVert *dv_prev = defweight_prev_init(dvert_prev, me->dvert, index); if (index_mirr != -1) { @@ -900,7 +904,7 @@ static void do_weight_paint_vertex_multi( return; } - if ((wp->paint.brush->flag & BRUSH_ACCUMULATE) == 0) { + if (!brush_use_accumulate(wp->paint.brush)) { MDeformVert *dvert_prev = ob->sculpt->mode.wpaint.dvert_prev; MDeformVert *dv_prev = defweight_prev_init(dvert_prev, me->dvert, index); if (index_mirr != -1) { @@ -1031,7 +1035,7 @@ static void vertex_paint_init_session_data(const ToolSettings *ts, Object *ob) /* Create average brush arrays */ if (ob->mode == OB_MODE_VERTEX_PAINT) { - if ((brush->flag & BRUSH_ACCUMULATE) == 0) { + if (!brush_use_accumulate(brush)) { if (ob->sculpt->mode.vpaint.previous_color == NULL) { ob->sculpt->mode.vpaint.previous_color = MEM_callocN(me->totloop * sizeof(uint), __func__); @@ -1042,7 +1046,7 @@ static void vertex_paint_init_session_data(const ToolSettings *ts, Object *ob) } } else if (ob->mode == OB_MODE_WEIGHT_PAINT) { - if ((brush->flag & BRUSH_ACCUMULATE) == 0) { + if (!brush_use_accumulate(brush)) { if (ob->sculpt->mode.wpaint.alpha_weight == NULL) { ob->sculpt->mode.wpaint.alpha_weight = MEM_callocN(me->totvert * sizeof(float), __func__); @@ -1204,11 +1208,9 @@ struct WPaintData { int defbase_tot; - /* Special storage for smear brush, avoid feedback loop - update each step and swap. */ - struct { - float *weight_prev; - float *weight_curr; - } smear; + /* original weight values for use in blur/smear */ + float *precomputed_weight; + bool precomputed_weight_ready; }; /* Initialize the stroke cache invariants from operator properties */ @@ -1437,24 +1439,8 @@ static bool wpaint_stroke_test_start(bContext *C, wmOperator *op, const float mo wpd->mirror.lock = tmpflags; } - if (vp->paint.brush->vertexpaint_tool == PAINT_BLEND_SMEAR) { - wpd->smear.weight_prev = MEM_mallocN(sizeof(float) * me->totvert, __func__); - const MDeformVert *dv = me->dvert; - if (wpd->do_multipaint) { - const bool do_auto_normalize = ((ts->auto_normalize != 0) && (wpd->vgroup_validmap != NULL)); - for (int i = 0; i < me->totvert; i++, dv++) { - float weight = BKE_defvert_multipaint_collective_weight( - dv, wpd->defbase_tot, wpd->defbase_sel, wpd->defbase_tot_sel, do_auto_normalize); - CLAMP(weight, 0.0f, 1.0f); - wpd->smear.weight_prev[i] = weight; - } - } - else { - for (int i = 0; i < me->totvert; i++, dv++) { - wpd->smear.weight_prev[i] = defvert_find_weight(dv, wpd->active.index); - } - } - wpd->smear.weight_curr = MEM_dupallocN(wpd->smear.weight_prev); + if (ELEM(vp->paint.brush->vertexpaint_tool, PAINT_BLEND_SMEAR, PAINT_BLEND_BLUR)) { + wpd->precomputed_weight = MEM_mallocN(sizeof(float) * me->totvert, __func__); } /* imat for normals */ @@ -1512,6 +1498,33 @@ static float wpaint_get_active_weight(const MDeformVert *dv, const WeightPaintIn } } +static void do_wpaint_precompute_weight_cb_ex( + void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) +{ + SculptThreadedTaskData *data = userdata; + const MDeformVert *dv = &data->me->dvert[n]; + + data->wpd->precomputed_weight[n] = wpaint_get_active_weight(dv, data->wpi); +} + +static void precompute_weight_values( + bContext *C, Object *ob, Brush *brush, struct WPaintData *wpd, WeightPaintInfo *wpi, Mesh *me) +{ + if (wpd->precomputed_weight_ready && !brush_use_accumulate(brush)) + return; + + /* threaded loop over vertices */ + SculptThreadedTaskData data = { + .C = C, .ob = ob, .wpd = wpd, .wpi = wpi, .me = me, + }; + + BLI_task_parallel_range_ex( + 0, me->totvert, &data, NULL, 0, do_wpaint_precompute_weight_cb_ex, + true, false); + + wpd->precomputed_weight_ready = true; +} + static void do_wpaint_brush_blur_task_cb_ex( void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) { @@ -1560,8 +1573,7 @@ static void do_wpaint_brush_blur_task_cb_ex( for (int k = 0; k < mp->totloop; k++) { const int l_index = mp->loopstart + k; const MLoop *ml = &data->me->mloop[l_index]; - const MDeformVert *dv = &data->me->dvert[ml->v]; - weight_final += wpaint_get_active_weight(dv, data->wpi); + weight_final += data->wpd->precomputed_weight[ml->v]; } } @@ -1681,7 +1693,7 @@ static void do_wpaint_brush_smear_task_cb_ex( if (stroke_dot > stroke_dot_max) { stroke_dot_max = stroke_dot; - weight_final = data->wpd->smear.weight_prev[v_other_index]; + weight_final = data->wpd->precomputed_weight[v_other_index]; do_color = true; } } @@ -1693,12 +1705,13 @@ static void do_wpaint_brush_smear_task_cb_ex( const float final_alpha = brush_fade * brush_strength * grid_alpha * brush_alpha_pressure; + + if (final_alpha <= 0.0f) + continue; + do_weight_paint_vertex( data->vp, data->ob, data->wpi, v_index, final_alpha, (float)weight_final); - /* Access the weight again because it might not have been applied completely. */ - data->wpd->smear.weight_curr[v_index] = - wpaint_get_active_weight(&data->me->dvert[v_index], data->wpi); } } } @@ -2064,14 +2077,14 @@ static void wpaint_stroke_update_step(bContext *C, struct PaintStroke *stroke, P wpi.brush_alpha_value = brush_alpha_value; /* *** done setting up WeightPaintInfo *** */ + if (wpd->precomputed_weight) { + precompute_weight_values(C, ob, brush, wpd, &wpi, ob->data); + } + wpaint_do_symmetrical_brush_actions(C, ob, wp, sd, wpd, &wpi); swap_m4m4(vc->rv3d->persmat, mat); - if (wp->paint.brush->vertexpaint_tool == PAINT_BLEND_SMEAR) { - SWAP(float *, wpd->smear.weight_curr, wpd->smear.weight_prev); - } - /* calculate pivot for rotation around seletion if needed */ /* also needed for "View Selected" on last stroke */ paint_last_stroke_update(scene, vc->ar, mval); @@ -2121,10 +2134,8 @@ static void wpaint_stroke_done(const bContext *C, struct PaintStroke *stroke) MEM_freeN((void *)wpd->active.lock); if (wpd->mirror.lock) MEM_freeN((void *)wpd->mirror.lock); - if (wpd->smear.weight_prev) - MEM_freeN(wpd->smear.weight_prev); - if (wpd->smear.weight_curr) - MEM_freeN(wpd->smear.weight_curr); + if (wpd->precomputed_weight) + MEM_freeN(wpd->precomputed_weight); MEM_freeN(wpd); } diff --git a/source/blender/makesrna/intern/rna_nodetree.c b/source/blender/makesrna/intern/rna_nodetree.c index 55ac8a32d80..29d68111bac 100644 --- a/source/blender/makesrna/intern/rna_nodetree.c +++ b/source/blender/makesrna/intern/rna_nodetree.c @@ -3740,7 +3740,7 @@ static const EnumPropertyItem sh_tex_prop_interpolation_items[] = { {SHD_INTERP_CLOSEST, "Closest", 0, "Closest", "No interpolation (sample closest texel)"}, {SHD_INTERP_CUBIC, "Cubic", 0, "Cubic", - "Cubic interpolation (CPU only)"}, + "Cubic interpolation"}, {SHD_INTERP_SMART, "Smart", 0, "Smart", "Bicubic when magnifying, else bilinear (OSL only)"}, {0, NULL, 0, NULL, NULL} @@ -4106,7 +4106,7 @@ static void def_sh_tex_pointdensity(StructRNA *srna) {SHD_INTERP_LINEAR, "Linear", 0, "Linear", "Linear interpolation"}, {SHD_INTERP_CUBIC, "Cubic", 0, "Cubic", - "Cubic interpolation (CPU only)"}, + "Cubic interpolation"}, {0, NULL, 0, NULL, NULL} }; diff --git a/source/creator/creator_args.c b/source/creator/creator_args.c index c38f19397c3..841eef4c0e8 100644 --- a/source/creator/creator_args.c +++ b/source/creator/creator_args.c @@ -439,7 +439,7 @@ static void arg_py_context_restore( * \{ */ static const char arg_handle_print_version_doc[] = -"\n\tPrint Blender version and exit" +"\n\tPrint Blender version and exit." ; static int arg_handle_print_version(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -463,10 +463,10 @@ static int arg_handle_print_version(int UNUSED(argc), const char **UNUSED(argv), } static const char arg_handle_print_help_doc[] = -"\n\tPrint this help text and exit" +"\n\tPrint this help text and exit." ; static const char arg_handle_print_help_doc_win32[] = -"\n\tPrint this help text and exit (windows only)" +"\n\tPrint this help text and exit (windows only)." ; static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), void *data) { @@ -593,16 +593,16 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo printf("Argument Parsing:\n"); printf("\tArguments must be separated by white space, eg:\n"); printf("\t# blender -ba test.blend\n"); - printf("\t...will ignore the 'a'\n"); + printf("\t...will ignore the 'a'.\n"); printf("\t# blender -b test.blend -f8\n"); - printf("\t...will ignore '8' because there is no space between the '-f' and the frame value\n\n"); + printf("\t...will ignore '8' because there is no space between the '-f' and the frame value.\n\n"); printf("Argument Order:\n"); printf("\tArguments are executed in the order they are given. eg:\n"); printf("\t# blender --background test.blend --render-frame 1 --render-output '/tmp'\n"); - printf("\t...will not render to '/tmp' because '--render-frame 1' renders before the output path is set\n"); + printf("\t...will not render to '/tmp' because '--render-frame 1' renders before the output path is set.\n"); printf("\t# blender --background --render-output /tmp test.blend --render-frame 1\n"); - printf("\t...will not render to '/tmp' because loading the blend-file overwrites the render output that was set\n"); + printf("\t...will not render to '/tmp' because loading the blend-file overwrites the render output that was set.\n"); printf("\t# blender --background test.blend --render-output /tmp --render-frame 1\n"); printf("\t...works as expected.\n\n"); @@ -612,7 +612,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo printf(" $BLENDER_SYSTEM_SCRIPTS Directory for system wide scripts.\n"); printf(" $BLENDER_USER_DATAFILES Directory for user data files (icons, translations, ..).\n"); printf(" $BLENDER_SYSTEM_DATAFILES Directory for system wide data files.\n"); - printf(" $BLENDER_SYSTEM_PYTHON Directory for system python libraries.\n"); + printf(" $BLENDER_SYSTEM_PYTHON Directory for system Python libraries.\n"); #ifdef WIN32 printf(" $TEMP Store temporary files here.\n"); #else @@ -621,7 +621,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo #ifdef WITH_SDL printf(" $SDL_AUDIODRIVER LibSDL audio driver - alsa, esd, dma.\n"); #endif - printf(" $PYTHONHOME Path to the python directory, eg. /usr/lib/python.\n\n"); + printf(" $PYTHONHOME Path to the Python directory, eg. /usr/lib/python.\n\n"); exit(0); @@ -629,7 +629,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo } static const char arg_handle_arguments_end_doc[] = -"\n\tEnds option processing, following arguments passed unchanged. Access via Python's 'sys.argv'" +"\n\tEnd option processing, following arguments passed unchanged. Access via Python's 'sys.argv'." ; static int arg_handle_arguments_end(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -646,10 +646,10 @@ static int arg_handle_arguments_end(int UNUSED(argc), const char **UNUSED(argv), #endif static const char arg_handle_python_set_doc_enable[] = -"\n\tEnable automatic Python script execution" PY_ENABLE_AUTO +"\n\tEnable automatic Python script execution" PY_ENABLE_AUTO "." ; static const char arg_handle_python_set_doc_disable[] = -"\n\tDisable automatic Python script execution (pydrivers & startup scripts)" PY_DISABLE_AUTO +"\n\tDisable automatic Python script execution (pydrivers & startup scripts)" PY_DISABLE_AUTO "." ; #undef PY_ENABLE_AUTO #undef PY_DISABLE_AUTO @@ -667,7 +667,7 @@ static int arg_handle_python_set(int UNUSED(argc), const char **UNUSED(argv), vo } static const char arg_handle_crash_handler_disable_doc[] = -"\n\tDisable the crash handler" +"\n\tDisable the crash handler." ; static int arg_handle_crash_handler_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -676,7 +676,7 @@ static int arg_handle_crash_handler_disable(int UNUSED(argc), const char **UNUSE } static const char arg_handle_abort_handler_disable_doc[] = -"\n\tDisable the abort handler" +"\n\tDisable the abort handler." ; static int arg_handle_abort_handler_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -685,7 +685,7 @@ static int arg_handle_abort_handler_disable(int UNUSED(argc), const char **UNUSE } static const char arg_handle_background_mode_set_doc[] = -"\n\tRun in background (often used for UI-less rendering)" +"\n\tRun in background (often used for UI-less rendering)." ; static int arg_handle_background_mode_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -695,7 +695,7 @@ static int arg_handle_background_mode_set(int UNUSED(argc), const char **UNUSED( static const char arg_handle_debug_mode_set_doc[] = "\n" -"\tTurn debugging on\n" +"\tTurn debugging on.\n" "\n" "\t* Enables memory error detection\n" "\t* Disables mouse grab (to interact with a debugger in some cases)\n" @@ -720,30 +720,30 @@ static int arg_handle_debug_mode_set(int UNUSED(argc), const char **UNUSED(argv) #ifdef WITH_FFMPEG static const char arg_handle_debug_mode_generic_set_doc_ffmpeg[] = -"\n\tEnable debug messages from FFmpeg library"; +"\n\tEnable debug messages from FFmpeg library."; #endif #ifdef WITH_FREESTYLE static const char arg_handle_debug_mode_generic_set_doc_freestyle[] = -"\n\tEnable debug messages for FreeStyle"; +"\n\tEnable debug messages for FreeStyle."; #endif static const char arg_handle_debug_mode_generic_set_doc_python[] = -"\n\tEnable debug messages for Python"; +"\n\tEnable debug messages for Python."; static const char arg_handle_debug_mode_generic_set_doc_events[] = -"\n\tEnable debug messages for the event system"; +"\n\tEnable debug messages for the event system."; static const char arg_handle_debug_mode_generic_set_doc_handlers[] = -"\n\tEnable debug messages for event handling"; +"\n\tEnable debug messages for event handling."; static const char arg_handle_debug_mode_generic_set_doc_wm[] = -"\n\tEnable debug messages for the window manager, also prints every operator call"; +"\n\tEnable debug messages for the window manager, also prints every operator call."; static const char arg_handle_debug_mode_generic_set_doc_jobs[] = "\n\tEnable time profiling for background jobs."; static const char arg_handle_debug_mode_generic_set_doc_gpu[] = "\n\tEnable gpu debug context and information for OpenGL 4.3+."; static const char arg_handle_debug_mode_generic_set_doc_depsgraph[] = -"\n\tEnable debug messages from dependency graph"; +"\n\tEnable debug messages from dependency graph."; static const char arg_handle_debug_mode_generic_set_doc_depsgraph_no_threads[] = -"\n\tSwitch dependency graph to a single threaded evaluation"; +"\n\tSwitch dependency graph to a single threaded evaluation."; static const char arg_handle_debug_mode_generic_set_doc_gpumem[] = -"\n\tEnable GPU memory stats in status bar"; +"\n\tEnable GPU memory stats in status bar."; static int arg_handle_debug_mode_generic_set(int UNUSED(argc), const char **UNUSED(argv), void *data) { @@ -752,7 +752,7 @@ static int arg_handle_debug_mode_generic_set(int UNUSED(argc), const char **UNUS } static const char arg_handle_debug_mode_io_doc[] = -"\n\tEnable debug messages for I/O (collada, ...)"; +"\n\tEnable debug messages for I/O (collada, ...)."; static int arg_handle_debug_mode_io(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { G.debug |= G_DEBUG_IO; @@ -760,7 +760,7 @@ static int arg_handle_debug_mode_io(int UNUSED(argc), const char **UNUSED(argv), } static const char arg_handle_debug_mode_all_doc[] = -"\n\tEnable all debug messages"; +"\n\tEnable all debug messages."; static int arg_handle_debug_mode_all(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { G.debug |= G_DEBUG_ALL; @@ -775,7 +775,7 @@ static int arg_handle_debug_mode_all(int UNUSED(argc), const char **UNUSED(argv) #ifdef WITH_LIBMV static const char arg_handle_debug_mode_libmv_doc[] = -"\n\tEnable debug messages from libmv library" +"\n\tEnable debug messages from libmv library." ; static int arg_handle_debug_mode_libmv(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -787,7 +787,7 @@ static int arg_handle_debug_mode_libmv(int UNUSED(argc), const char **UNUSED(arg #ifdef WITH_CYCLES_LOGGING static const char arg_handle_debug_mode_cycles_doc[] = -"\n\tEnable debug messages from Cycles" +"\n\tEnable debug messages from Cycles." ; static int arg_handle_debug_mode_cycles(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -797,7 +797,7 @@ static int arg_handle_debug_mode_cycles(int UNUSED(argc), const char **UNUSED(ar #endif static const char arg_handle_debug_mode_memory_set_doc[] = -"\n\tEnable fully guarded memory allocation and debugging" +"\n\tEnable fully guarded memory allocation and debugging." ; static int arg_handle_debug_mode_memory_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -807,7 +807,7 @@ static int arg_handle_debug_mode_memory_set(int UNUSED(argc), const char **UNUSE static const char arg_handle_debug_value_set_doc[] = "\n" -"\tSet debug value of on startup\n" +"\tSet debug value of on startup." ; static int arg_handle_debug_value_set(int argc, const char **argv, void *UNUSED(data)) { @@ -831,7 +831,7 @@ static int arg_handle_debug_value_set(int argc, const char **argv, void *UNUSED( } static const char arg_handle_debug_fpe_set_doc[] = -"\n\tEnable floating point exceptions" +"\n\tEnable floating point exceptions." ; static int arg_handle_debug_fpe_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -840,7 +840,7 @@ static int arg_handle_debug_fpe_set(int UNUSED(argc), const char **UNUSED(argv), } static const char arg_handle_factory_startup_set_doc[] = -"\n\tSkip reading the " STRINGIFY(BLENDER_STARTUP_FILE) " in the users home directory" +"\n\tSkip reading the " STRINGIFY(BLENDER_STARTUP_FILE) " in the users home directory." ; static int arg_handle_factory_startup_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -849,11 +849,11 @@ static int arg_handle_factory_startup_set(int UNUSED(argc), const char **UNUSED( } static const char arg_handle_env_system_set_doc_datafiles[] = -"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_DATAFILES)" environment variable"; +"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_DATAFILES)" environment variable."; static const char arg_handle_env_system_set_doc_scripts[] = -"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_SCRIPTS)" environment variable"; +"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_SCRIPTS)" environment variable."; static const char arg_handle_env_system_set_doc_python[] = -"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_PYTHON)" environment variable"; +"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_PYTHON)" environment variable."; static int arg_handle_env_system_set(int argc, const char **argv, void *UNUSED(data)) { @@ -879,13 +879,19 @@ static int arg_handle_env_system_set(int argc, const char **argv, void *UNUSED(d static const char arg_handle_playback_mode_doc[] = " \n" -"\tPlayback , only operates this way when not running in background.\n" -"\t\t-p \tOpen with lower left corner at , \n" -"\t\t-m\t\tRead from disk (Do not buffer)\n" -"\t\t-f \t\tSpecify FPS to start with\n" -"\t\t-j \tSet frame step to \n" -"\t\t-s \tPlay from \n" -"\t\t-e \tPlay until " +"\tPlayback , only operates this way when not running in background.\n\n" +"\t-p \n" +"\t\tOpen with lower left corner at , .\n" +"\t-m\n" +"\t\tRead from disk (Do not buffer).\n" +"\t-f \n" +"\t\tSpecify FPS to start with.\n" +"\t-j \n" +"\t\tSet frame step to .\n" +"\t-s \n" +"\t\tPlay from .\n" +"\t-e \n" +"\t\tPlay until ." ; static int arg_handle_playback_mode(int argc, const char **argv, void *UNUSED(data)) { @@ -905,7 +911,7 @@ static int arg_handle_playback_mode(int argc, const char **argv, void *UNUSED(da static const char arg_handle_window_geometry_doc[] = " \n" -"\tOpen with lower left corner at , and width and height as , " +"\tOpen with lower left corner at , and width and height as , ." ; static int arg_handle_window_geometry(int argc, const char **argv, void *UNUSED(data)) { @@ -931,7 +937,7 @@ static int arg_handle_window_geometry(int argc, const char **argv, void *UNUSED( } static const char arg_handle_native_pixels_set_doc[] = -"\n\tDo not use native pixel size, for high resolution displays (MacBook 'Retina')" +"\n\tDo not use native pixel size, for high resolution displays (MacBook 'Retina')." ; static int arg_handle_native_pixels_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -940,7 +946,7 @@ static int arg_handle_native_pixels_set(int UNUSED(argc), const char **UNUSED(ar } static const char arg_handle_with_borders_doc[] = -"\n\tForce opening with borders" +"\n\tForce opening with borders." ; static int arg_handle_with_borders(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -949,7 +955,7 @@ static int arg_handle_with_borders(int UNUSED(argc), const char **UNUSED(argv), } static const char arg_handle_without_borders_doc[] = -"\n\tForce opening without borders" +"\n\tForce opening without borders." ; static int arg_handle_without_borders(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -960,7 +966,7 @@ static int arg_handle_without_borders(int UNUSED(argc), const char **UNUSED(argv extern bool wm_start_with_console; /* wm_init_exit.c */ static const char arg_handle_start_with_console_doc[] = -"\n\tStart with the console window open (ignored if -b is set), (Windows only)" +"\n\tStart with the console window open (ignored if -b is set), (Windows only)." ; static int arg_handle_start_with_console(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -969,10 +975,10 @@ static int arg_handle_start_with_console(int UNUSED(argc), const char **UNUSED(a } static const char arg_handle_register_extension_doc[] = -"\n\tRegister blend-file extension, then exit (Windows only)" +"\n\tRegister blend-file extension, then exit (Windows only)." ; static const char arg_handle_register_extension_doc_silent[] = -"\n\tSilently register blend-file extension, then exit (Windows only)" +"\n\tSilently register blend-file extension, then exit (Windows only)." ; static int arg_handle_register_extension(int UNUSED(argc), const char **UNUSED(argv), void *data) { @@ -987,7 +993,7 @@ static int arg_handle_register_extension(int UNUSED(argc), const char **UNUSED(a } static const char arg_handle_joystick_disable_doc[] = -"\n\tDisable joystick support" +"\n\tDisable joystick support." ; static int arg_handle_joystick_disable(int UNUSED(argc), const char **UNUSED(argv), void *data) { @@ -1008,7 +1014,7 @@ static int arg_handle_joystick_disable(int UNUSED(argc), const char **UNUSED(arg } static const char arg_handle_audio_disable_doc[] = -"\n\tForce sound system to None" +"\n\tForce sound system to None." ; static int arg_handle_audio_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -1017,7 +1023,7 @@ static int arg_handle_audio_disable(int UNUSED(argc), const char **UNUSED(argv), } static const char arg_handle_audio_set_doc[] = -"\n\tForce sound system to a specific device\n\tThe name is the same as found in the user preferences, e.g. OpenAL." +"\n\tForce sound system to a specific device.\n\t'NULL' 'SDL' 'OPENAL' 'JACK'." ; static int arg_handle_audio_set(int argc, const char **argv, void *UNUSED(data)) { @@ -1067,7 +1073,7 @@ static int arg_handle_output_set(int argc, const char **argv, void *data) static const char arg_handle_engine_set_doc[] = "\n" -"\tSpecify the render engine\n\tuse -E help to list available engines" +"\tSpecify the render engine.\n\tUse -E help to list available engines." ; static int arg_handle_engine_set(int argc, const char **argv, void *data) { @@ -1110,11 +1116,11 @@ static int arg_handle_engine_set(int argc, const char **argv, void *data) static const char arg_handle_image_type_set_doc[] = "\n" -"\tSet the render format, Valid options are...\n" -"\t\tTGA RAWTGA JPEG IRIS IRIZ\n" -"\t\tAVIRAW AVIJPEG PNG BMP\n" -"\t(formats that can be compiled into blender, not available on all systems)\n" -"\t\tHDR TIFF EXR MULTILAYER MPEG FRAMESERVER CINEON DPX DDS JP2" +"\tSet the render format.\n" +"\tValid options are 'TGA' 'RAWTGA' 'JPEG' 'IRIS' 'IRIZ' 'AVIRAW' 'AVIJPEG' 'PNG' 'BMP'\n" +"\n" +"\tFormats that can be compiled into Blender, not available on all systems: 'HDR' 'TIFF' 'EXR' 'MULTILAYER'\n" +"\t'MPEG' 'FRAMESERVER' 'CINEON' 'DPX' 'DDS' 'JP2'" ; static int arg_handle_image_type_set(int argc, const char **argv, void *data) { @@ -1202,7 +1208,7 @@ static int arg_handle_verbosity_set(int argc, const char **argv, void *UNUSED(da static const char arg_handle_extension_set_doc[] = "\n" -"\tSet option to add the file extension to the end of the file" +"\tSet option to add the file extension to the end of the file." ; static int arg_handle_extension_set(int argc, const char **argv, void *data) { @@ -1234,10 +1240,15 @@ static int arg_handle_extension_set(int argc, const char **argv, void *data) static const char arg_handle_ge_parameters_set_doc[] = "Game Engine specific options\n" -"\t-g fixedtime\t\tRun on 50 hertz without dropping frames\n" -"\t-g vertexarrays\t\tUse Vertex Arrays for rendering (usually faster)\n" -"\t-g nomipmap\t\tNo Texture Mipmapping\n" -"\t-g linearmipmap\t\tLinear Texture Mipmapping instead of Nearest (default)" +"\n" +"\t'fixedtime'\n" +"\t\tRun on 50 hertz without dropping frames.\n" +"\t'vertexarrays'\n" +"\t\tUse Vertex Arrays for rendering (usually faster).\n" +"\t'nomipmap'\n" +"\t\tNo Texture Mipmapping.\n" +"\t'linearmipmap'\n" +"\t\tLinear Texture Mipmapping instead of Nearest (default)." ; static int arg_handle_ge_parameters_set(int argc, const char **argv, void *data) { @@ -1270,7 +1281,7 @@ static int arg_handle_ge_parameters_set(int argc, const char **argv, void *data) #endif } else { - printf("error: argument assignment (%s) without value.\n", paramname); + printf("Error: argument assignment (%s) without value.\n", paramname); return 0; } /* name arg eaten */ @@ -1361,7 +1372,7 @@ static int arg_handle_render_frame(int argc, const char **argv, void *data) } static const char arg_handle_render_animation_doc[] = -"\n\tRender frames from start to end (inclusive)" +"\n\tRender frames from start to end (inclusive)." ; static int arg_handle_render_animation(int UNUSED(argc), const char **UNUSED(argv), void *data) { @@ -1387,7 +1398,7 @@ static int arg_handle_render_animation(int UNUSED(argc), const char **UNUSED(arg static const char arg_handle_scene_set_doc[] = "\n" -"\tSet the active scene for rendering" +"\tSet the active scene for rendering." ; static int arg_handle_scene_set(int argc, const char **argv, void *data) { @@ -1469,7 +1480,7 @@ static int arg_handle_frame_end_set(int argc, const char **argv, void *data) static const char arg_handle_frame_skip_set_doc[] = "\n" -"\tSet number of frames to step forward after each rendered frame" +"\tSet number of frames to step forward after each rendered frame." ; static int arg_handle_frame_skip_set(int argc, const char **argv, void *data) { @@ -1497,7 +1508,7 @@ static int arg_handle_frame_skip_set(int argc, const char **argv, void *data) static const char arg_handle_python_file_run_doc[] = "\n" -"\tRun the given Python script file" +"\tRun the given Python script file." ; static int arg_handle_python_file_run(int argc, const char **argv, void *data) { @@ -1525,14 +1536,14 @@ static int arg_handle_python_file_run(int argc, const char **argv, void *data) } #else UNUSED_VARS(argc, argv, data); - printf("This blender was built without python support\n"); + printf("This Blender was built without Python support\n"); return 0; #endif /* WITH_PYTHON */ } static const char arg_handle_python_text_run_doc[] = "\n" -"\tRun the given Python script text block" +"\tRun the given Python script text block." ; static int arg_handle_python_text_run(int argc, const char **argv, void *data) { @@ -1566,14 +1577,14 @@ static int arg_handle_python_text_run(int argc, const char **argv, void *data) } #else UNUSED_VARS(argc, argv, data); - printf("This blender was built without python support\n"); + printf("This Blender was built without Python support\n"); return 0; #endif /* WITH_PYTHON */ } static const char arg_handle_python_expr_run_doc[] = "\n" -"\tRun the given expression as a Python script" +"\tRun the given expression as a Python script." ; static int arg_handle_python_expr_run(int argc, const char **argv, void *data) { @@ -1596,13 +1607,13 @@ static int arg_handle_python_expr_run(int argc, const char **argv, void *data) } #else UNUSED_VARS(argc, argv, data); - printf("This blender was built without python support\n"); + printf("This Blender was built without Python support\n"); return 0; #endif /* WITH_PYTHON */ } static const char arg_handle_python_console_run_doc[] = -"\n\tRun blender with an interactive console" +"\n\tRun Blender with an interactive console." ; static int arg_handle_python_console_run(int UNUSED(argc), const char **argv, void *data) { @@ -1614,7 +1625,7 @@ static int arg_handle_python_console_run(int UNUSED(argc), const char **argv, vo return 0; #else UNUSED_VARS(argv, data); - printf("This blender was built without python support\n"); + printf("This Blender was built without python support\n"); return 0; #endif /* WITH_PYTHON */ } @@ -1646,7 +1657,7 @@ static int arg_handle_python_exit_code_set(int argc, const char **argv, void *UN } static const char arg_handle_addons_set_doc[] = -"\n\tComma separated list of add-ons (no spaces)" +"\n\tComma separated list of add-ons (no spaces)." ; static int arg_handle_addons_set(int argc, const char **argv, void *data) {