Merge branch 'master' into blender2.8

This commit is contained in:
Campbell Barton 2017-10-10 01:36:36 +11:00
commit 6ec43a765b
32 changed files with 1551 additions and 1502 deletions

@ -731,6 +731,17 @@ if(WITH_INTERNATIONAL)
endif()
if(WITH_PYTHON)
# While we have this as an '#error' in bpy_util.h,
# upgrading Python tends to cause confusion for users who build.
# Give the error message early to make this more obvious.
#
# Do this before main 'platform_*' checks,
# because UNIX will search for the old Python paths which may not exist.
# giving errors about missing paths before this case is met.
if(DEFINED PYTHON_VERSION AND "${PYTHON_VERSION}" VERSION_LESS "3.6")
message(FATAL_ERROR "At least Python 3.6 is required to build")
endif()
if(NOT EXISTS "${CMAKE_SOURCE_DIR}/release/scripts/addons/modules")
message(WARNING
"Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, "

@ -1188,7 +1188,7 @@ class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel):
sub = col.column()
sub.active = use_cpu(context)
sub.prop(cworld, "volume_sampling", text="")
sub.prop(cworld, "volume_interpolation", text="")
col.prop(cworld, "volume_interpolation", text="")
col.prop(cworld, "homogeneous_volume", text="Homogeneous")
@ -1287,7 +1287,7 @@ class CYCLES_MATERIAL_PT_settings(CyclesButtonsPanel, Panel):
sub = col.column()
sub.active = use_cpu(context)
sub.prop(cmat, "volume_sampling", text="")
sub.prop(cmat, "volume_interpolation", text="")
col.prop(cmat, "volume_interpolation", text="")
col.prop(cmat, "homogeneous_volume", text="Homogeneous")
layout.separator()

@ -26,6 +26,7 @@
#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_thread.h"
#include "util/util_texture.h"
#include "util/util_types.h"
#include "util/util_vector.h"

@ -163,6 +163,9 @@ public:
TaskPool task_pool;
KernelGlobals kernel_globals;
device_vector<TextureInfo> texture_info;
bool need_texture_info;
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
@ -235,6 +238,8 @@ public:
VLOG(1) << "Will be using split kernel.";
}
need_texture_info = false;
#define REGISTER_SPLIT_KERNEL(name) split_kernels[#name] = KernelFunctions<void(*)(KernelGlobals*, KernelData*)>(KERNEL_FUNCTIONS(name))
REGISTER_SPLIT_KERNEL(path_init);
REGISTER_SPLIT_KERNEL(scene_intersect);
@ -261,6 +266,7 @@ public:
~CPUDevice()
{
task_pool.stop();
tex_free(texture_info);
}
virtual bool show_samples() const
@ -268,6 +274,15 @@ public:
return (TaskScheduler::num_threads() == 1);
}
void load_texture_info()
{
if(need_texture_info) {
tex_free(texture_info);
tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
need_texture_info = false;
}
}
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
{
if(name) {
@ -333,14 +348,47 @@ public:
VLOG(1) << "Texture allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
kernel_tex_copy(&kernel_globals,
name,
mem.data_pointer,
mem.data_width,
mem.data_height,
mem.data_depth,
interpolation,
extension);
if(interpolation == INTERPOLATION_NONE) {
/* Data texture. */
kernel_tex_copy(&kernel_globals,
name,
mem.data_pointer,
mem.data_width,
mem.data_height,
mem.data_depth,
interpolation,
extension);
}
else {
/* Image Texture. */
int flat_slot = 0;
if(string_startswith(name, "__tex_image")) {
int pos = string(name).rfind("_");
flat_slot = atoi(name + pos + 1);
}
else {
assert(0);
}
if(flat_slot >= texture_info.size()) {
/* Allocate some slots in advance, to reduce amount
* of re-allocations. */
texture_info.resize(flat_slot + 128);
}
TextureInfo& info = texture_info.get_data()[flat_slot];
info.data = (uint64_t)mem.data_pointer;
info.cl_buffer = 0;
info.interpolation = interpolation;
info.extension = extension;
info.width = mem.data_width;
info.height = mem.data_height;
info.depth = mem.data_depth;
need_texture_info = true;
}
mem.device_pointer = mem.data_pointer;
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
@ -352,6 +400,7 @@ public:
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
mem.device_size = 0;
need_texture_info = true;
}
}
@ -784,6 +833,9 @@ public:
void task_add(DeviceTask& task)
{
/* Load texture info. */
load_texture_info();
/* split task into smaller ones */
list<DeviceTask> tasks;

@ -129,7 +129,7 @@ public:
CUcontext cuContext;
CUmodule cuModule, cuFilterModule;
map<device_ptr, bool> tex_interp_map;
map<device_ptr, uint> tex_bindless_map;
map<device_ptr, CUtexObject> tex_bindless_map;
int cuDevId;
int cuDevArchitecture;
bool first_error;
@ -145,8 +145,8 @@ public:
map<device_ptr, PixelMem> pixel_mem_map;
/* Bindless Textures */
device_vector<uint> bindless_mapping;
bool need_bindless_mapping;
device_vector<TextureInfo> texture_info;
bool need_texture_info;
CUdeviceptr cuda_device_ptr(device_ptr mem)
{
@ -231,7 +231,7 @@ public:
split_kernel = NULL;
need_bindless_mapping = false;
need_texture_info = false;
/* intialize */
if(cuda_error(cuInit(0)))
@ -274,7 +274,7 @@ public:
delete split_kernel;
if(info.has_bindless_textures) {
tex_free(bindless_mapping);
tex_free(texture_info);
}
cuda_assert(cuCtxDestroy(cuContext));
@ -544,12 +544,12 @@ public:
return (result == CUDA_SUCCESS);
}
void load_bindless_mapping()
void load_texture_info()
{
if(info.has_bindless_textures && need_bindless_mapping) {
tex_free(bindless_mapping);
tex_alloc("__bindless_mapping", bindless_mapping, INTERPOLATION_NONE, EXTENSION_REPEAT);
need_bindless_mapping = false;
if(info.has_bindless_textures && need_texture_info) {
tex_free(texture_info);
tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
need_texture_info = false;
}
}
@ -646,8 +646,7 @@ public:
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
/* Check if we are on sm_30 or above.
* We use arrays and bindles textures for storage there */
/* Check if we are on sm_30 or above, for bindless textures. */
bool has_bindless_textures = info.has_bindless_textures;
/* General variables for both architectures */
@ -679,20 +678,10 @@ public:
filter_mode = CU_TR_FILTER_MODE_LINEAR;
}
CUarray_format_enum format;
switch(mem.data_type) {
case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
default: assert(0); return;
}
/* General variables for Fermi */
CUtexref texref = NULL;
if(!has_bindless_textures) {
if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) {
if(mem.data_depth > 1) {
/* Kernel uses different bind names for 2d and 3d float textures,
* so we have to adjust couple of things here.
@ -711,41 +700,41 @@ public:
}
}
/* Data Storage */
if(interpolation == INTERPOLATION_NONE) {
if(has_bindless_textures) {
mem_alloc(NULL, mem, MEM_READ_ONLY);
mem_copy_to(mem);
/* Data Storage */
mem_alloc(NULL, mem, MEM_READ_ONLY);
mem_copy_to(mem);
CUdeviceptr cumem;
size_t cubytes;
CUdeviceptr cumem;
size_t cubytes;
cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
if(cubytes == 8) {
/* 64 bit device pointer */
uint64_t ptr = mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
else {
/* 32 bit device pointer */
uint32_t ptr = (uint32_t)mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
if(cubytes == 8) {
/* 64 bit device pointer */
uint64_t ptr = mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
else {
mem_alloc(NULL, mem, MEM_READ_ONLY);
mem_copy_to(mem);
cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
/* 32 bit device pointer */
uint32_t ptr = (uint32_t)mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
}
/* Texture Storage */
else {
/* Texture Storage */
CUarray handle = NULL;
CUarray_format_enum format;
switch(mem.data_type) {
case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
default: assert(0); return;
}
if(mem.data_depth > 1) {
CUDA_ARRAY3D_DESCRIPTOR desc;
@ -810,8 +799,8 @@ public:
stats.mem_alloc(size);
/* Bindless Textures - Kepler */
if(has_bindless_textures) {
/* Bindless Textures - Kepler */
int flat_slot = 0;
if(string_startswith(name, "__tex_image")) {
int pos = string(name).rfind("_");
@ -844,35 +833,39 @@ public:
}
/* Resize once */
if(flat_slot >= bindless_mapping.size()) {
if(flat_slot >= texture_info.size()) {
/* Allocate some slots in advance, to reduce amount
* of re-allocations.
*/
bindless_mapping.resize(flat_slot + 128);
* of re-allocations. */
texture_info.resize(flat_slot + 128);
}
/* Set Mapping and tag that we need to (re-)upload to device */
bindless_mapping.get_data()[flat_slot] = (uint)tex;
tex_bindless_map[mem.device_pointer] = (uint)tex;
need_bindless_mapping = true;
TextureInfo& info = texture_info.get_data()[flat_slot];
info.data = (uint64_t)tex;
info.cl_buffer = 0;
info.interpolation = interpolation;
info.extension = extension;
info.width = mem.data_width;
info.height = mem.data_height;
info.depth = mem.data_depth;
tex_bindless_map[mem.device_pointer] = tex;
need_texture_info = true;
}
/* Regular Textures - Fermi */
else {
/* Regular Textures - Fermi */
cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
}
}
/* Fermi, Data and Image Textures */
if(!has_bindless_textures) {
cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
if(mem.data_depth > 1) {
cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
}
cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
if(mem.data_depth > 1) {
cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
}
cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
}
}
/* Fermi and Kepler */
@ -888,8 +881,8 @@ public:
/* Free CUtexObject (Bindless Textures) */
if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) {
uint flat_slot = tex_bindless_map[mem.device_pointer];
cuTexObjectDestroy(flat_slot);
CUtexObject tex = tex_bindless_map[mem.device_pointer];
cuTexObjectDestroy(tex);
}
tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
@ -1737,9 +1730,6 @@ public:
if(task->type == DeviceTask::RENDER) {
RenderTile tile;
/* Upload Bindless Mapping */
load_bindless_mapping();
DeviceRequestedFeatures requested_features;
if(use_split_kernel()) {
if(!use_adaptive_compilation()) {
@ -1780,9 +1770,6 @@ public:
}
}
else if(task->type == DeviceTask::SHADER) {
/* Upload Bindless Mapping */
load_bindless_mapping();
shader(*task);
cuda_assert(cuCtxSynchronize());
@ -1805,9 +1792,12 @@ public:
void task_add(DeviceTask& task)
{
if(task.type == DeviceTask::FILM_CONVERT) {
CUDAContextScope scope(this);
CUDAContextScope scope(this);
/* Load texture info. */
load_texture_info();
if(task.type == DeviceTask::FILM_CONVERT) {
/* must be done in main thread due to opengl access */
film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
cuda_assert(cuCtxSynchronize());

@ -545,15 +545,10 @@ private:
MemoryManager memory_manager;
friend class MemoryManager;
struct tex_info_t {
uint buffer, padding;
cl_ulong offset;
uint width, height, depth, options;
};
static_assert_align(tex_info_t, 16);
static_assert_align(TextureInfo, 16);
vector<tex_info_t> texture_descriptors;
device_memory texture_descriptors_buffer;
vector<TextureInfo> texture_info;
device_memory texture_info_buffer;
struct Texture {
Texture() {}

@ -136,11 +136,11 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
return;
}
/* Allocate this right away so that texture_descriptors_buffer is placed at offset 0 in the device memory buffers */
texture_descriptors.resize(1);
texture_descriptors_buffer.resize(1);
texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0];
memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
/* Allocate this right away so that texture_info_buffer is placed at offset 0 in the device memory buffers */
texture_info.resize(1);
texture_info_buffer.resize(1);
texture_info_buffer.data_pointer = (device_ptr)&texture_info[0];
memory_manager.alloc("texture_info", texture_info_buffer);
fprintf(stderr, "Device init success\n");
device_initialized = true;
@ -625,7 +625,7 @@ void OpenCLDeviceBase::flush_texture_buffers()
vector<texture_slot_t> texture_slots;
#define KERNEL_TEX(type, ttype, name) \
#define KERNEL_TEX(type, name) \
if(textures.find(#name) != textures.end()) { \
texture_slots.push_back(texture_slot_t(#name, num_slots)); \
} \
@ -647,55 +647,38 @@ void OpenCLDeviceBase::flush_texture_buffers()
}
/* Realloc texture descriptors buffer. */
memory_manager.free(texture_descriptors_buffer);
memory_manager.free(texture_info_buffer);
texture_descriptors.resize(num_slots);
texture_descriptors_buffer.resize(num_slots * sizeof(tex_info_t));
texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0];
texture_info.resize(num_slots);
texture_info_buffer.resize(num_slots * sizeof(TextureInfo));
texture_info_buffer.data_pointer = (device_ptr)&texture_info[0];
memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
memory_manager.alloc("texture_info", texture_info_buffer);
/* Fill in descriptors */
foreach(texture_slot_t& slot, texture_slots) {
Texture& tex = textures[slot.name];
tex_info_t& info = texture_descriptors[slot.slot];
TextureInfo& info = texture_info[slot.slot];
MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);
info.offset = desc.offset;
info.buffer = desc.device_buffer;
info.data = desc.offset;
info.cl_buffer = desc.device_buffer;
if(string_startswith(slot.name, "__tex_image")) {
info.width = tex.mem->data_width;
info.height = tex.mem->data_height;
info.depth = tex.mem->data_depth;
info.options = 0;
if(tex.interpolation == INTERPOLATION_CLOSEST) {
info.options |= (1 << 0);
}
switch(tex.extension) {
case EXTENSION_REPEAT:
info.options |= (1 << 1);
break;
case EXTENSION_EXTEND:
info.options |= (1 << 2);
break;
case EXTENSION_CLIP:
info.options |= (1 << 3);
break;
default:
break;
}
info.interpolation = tex.interpolation;
info.extension = tex.extension;
}
}
/* Force write of descriptors. */
memory_manager.free(texture_descriptors_buffer);
memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
memory_manager.free(texture_info_buffer);
memory_manager.alloc("texture_info", texture_info_buffer);
}
void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)

@ -117,14 +117,8 @@ public:
ccl_constant KernelData *data;
ccl_global char *buffers[8];
typedef struct _tex_info_t {
uint buffer, padding;
uint64_t offset;
uint width, height, depth, options;
} _tex_info_t;
#define KERNEL_TEX(type, ttype, name) \
_tex_info_t name;
#define KERNEL_TEX(type, name) \
TextureInfo name;
#include "kernel/kernel_textures.h"
#undef KERNEL_TEX

@ -8,7 +8,7 @@ set(INC_SYS
)
set(SRC
set(SRC_CPU_KERNELS
kernels/cpu/kernel.cpp
kernels/cpu/kernel_sse2.cpp
kernels/cpu/kernel_sse3.cpp
@ -27,6 +27,15 @@ set(SRC
kernels/cpu/filter_sse41.cpp
kernels/cpu/filter_avx.cpp
kernels/cpu/filter_avx2.cpp
)
set(SRC_CUDA_KERNELS
kernels/cuda/kernel.cu
kernels/cuda/kernel_split.cu
kernels/cuda/filter.cu
)
set(SRC_OPENCL_KERNELS
kernels/opencl/kernel.cl
kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split.cl
@ -50,9 +59,6 @@ set(SRC
kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl
kernels/opencl/filter.cl
kernels/cuda/kernel.cu
kernels/cuda/kernel_split.cu
kernels/cuda/filter.cu
)
set(SRC_BVH_HEADERS
@ -83,7 +89,6 @@ set(SRC_HEADERS
kernel_emission.h
kernel_film.h
kernel_globals.h
kernel_image_opencl.h
kernel_jitter.h
kernel_light.h
kernel_math.h
@ -119,10 +124,12 @@ set(SRC_KERNELS_CPU_HEADERS
set(SRC_KERNELS_CUDA_HEADERS
kernels/cuda/kernel_config.h
kernels/cuda/kernel_cuda_image.h
)
set(SRC_KERNELS_OPENCL_HEADERS
kernels/opencl/kernel_split_function.h
kernels/opencl/kernel_opencl_image.h
)
set(SRC_CLOSURE_HEADERS
@ -457,7 +464,9 @@ if(CXX_HAS_AVX2)
endif()
add_library(cycles_kernel
${SRC}
${SRC_CPU_KERNELS}
${SRC_CUDA_KERNELS}
${SRC_OPENCL_KERNELS}
${SRC_HEADERS}
${SRC_KERNELS_CPU_HEADERS}
${SRC_KERNELS_CUDA_HEADERS}
@ -484,34 +493,10 @@ endif()
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_sort.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_enqueue_inactive.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_split_function.h" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/filter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/filter.cu" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPENCL_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CUDA_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPENCL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)

@ -605,8 +605,7 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure
/* if fresnel is used, calculate the color with reflection_color(...) */
if(use_fresnel) {
*pdf = 1.0f;
*eval = reflection_color(bsdf, *omega_in, m);
*eval *= reflection_color(bsdf, *omega_in, m);
}
label = LABEL_REFLECT | LABEL_SINGULAR;

@ -29,21 +29,6 @@ CCL_NAMESPACE_BEGIN
/* Return position normalized to 0..1 in mesh bounds */
#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300
ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
{
float4 r;
switch(id) {
case 0: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_000, x, y, z); break;
case 8: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_008, x, y, z); break;
case 16: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_016, x, y, z); break;
case 24: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_024, x, y, z); break;
case 32: r = kernel_tex_image_interp_3d(__tex_image_float4_3d_032, x, y, z); break;
}
return r;
}
#endif /* __KERNEL_CUDA__ */
ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
const ShaderData *sd,
float3 P)
@ -65,23 +50,8 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
#ifdef __KERNEL_CUDA__
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z);
float4 r = make_float4(f, f, f, 1.0f);
# else
float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
# endif
#elif defined(__KERNEL_OPENCL__)
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
#else
float4 r;
if(sd->flag & SD_VOLUME_CUBIC)
r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
else
r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z);
#endif
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
@ -92,22 +62,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
#ifdef __KERNEL_CUDA__
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
# else
float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
# endif
#elif defined(__KERNEL_OPENCL__)
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
#else
float4 r;
if(sd->flag & SD_VOLUME_CUBIC)
r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
else
r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z);
#endif
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);

@ -74,7 +74,7 @@ CCL_NAMESPACE_BEGIN
* pointer lookup. */
template<typename T> struct texture {
ccl_always_inline T fetch(int index)
ccl_always_inline const T& fetch(int index)
{
kernel_assert(index >= 0 && index < width);
return data[index];
@ -112,449 +112,6 @@ template<typename T> struct texture {
int width;
};
template<typename T> struct texture_image {
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
{ \
u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
} (void)0
ccl_always_inline float4 read(float4 r)
{
return r;
}
ccl_always_inline float4 read(uchar4 r)
{
float f = 1.0f/255.0f;
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
}
ccl_always_inline float4 read(uchar r)
{
float f = r*(1.0f/255.0f);
return make_float4(f, f, f, 1.0f);
}
ccl_always_inline float4 read(float r)
{
/* TODO(dingto): Optimize this, so interpolation
* happens on float instead of float4 */
return make_float4(r, r, r, 1.0f);
}
ccl_always_inline float4 read(half4 r)
{
return half4_to_float4(r);
}
ccl_always_inline float4 read(half r)
{
float f = half_to_float(r);
return make_float4(f, f, f, 1.0f);
}
ccl_always_inline int wrap_periodic(int x, int width)
{
x %= width;
if(x < 0)
x += width;
return x;
}
ccl_always_inline int wrap_clamp(int x, int width)
{
return clamp(x, 0, width-1);
}
ccl_always_inline float frac(float x, int *ix)
{
int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
*ix = i;
return x - (float)i;
}
ccl_always_inline float4 interp(float x, float y)
{
if(UNLIKELY(!data))
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
int ix, iy, nix, niy;
if(interpolation == INTERPOLATION_CLOSEST) {
frac(x*(float)width, &ix);
frac(y*(float)height, &iy);
switch(extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
return read(data[ix + iy*width]);
}
else if(interpolation == INTERPOLATION_LINEAR) {
float tx = frac(x*(float)width - 0.5f, &ix);
float ty = frac(y*(float)height - 0.5f, &iy);
switch(extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
nix = wrap_periodic(ix+1, width);
niy = wrap_periodic(iy+1, height);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
nix = wrap_clamp(ix+1, width);
niy = wrap_clamp(iy+1, height);
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
r += (1.0f - ty)*tx*read(data[nix + iy*width]);
r += ty*(1.0f - tx)*read(data[ix + niy*width]);
r += ty*tx*read(data[nix + niy*width]);
return r;
}
else {
/* Bicubic b-spline interpolation. */
float tx = frac(x*(float)width - 0.5f, &ix);
float ty = frac(y*(float)height - 0.5f, &iy);
int pix, piy, nnix, nniy;
switch(extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
pix = wrap_periodic(ix-1, width);
piy = wrap_periodic(iy-1, height);
nix = wrap_periodic(ix+1, width);
niy = wrap_periodic(iy+1, height);
nnix = wrap_periodic(ix+2, width);
nniy = wrap_periodic(iy+2, height);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
pix = wrap_clamp(ix-1, width);
piy = wrap_clamp(iy-1, height);
nix = wrap_clamp(ix+1, width);
niy = wrap_clamp(iy+1, height);
nnix = wrap_clamp(ix+2, width);
nniy = wrap_clamp(iy+2, height);
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {width * piy,
width * iy,
width * niy,
width * nniy};
float u[4], v[4];
/* Some helper macro to keep code reasonable size,
* let compiler to inline all the matrix multiplications.
*/
#define DATA(x, y) (read(data[xc[x] + yc[y]]))
#define TERM(col) \
(v[col] * (u[0] * DATA(0, col) + \
u[1] * DATA(1, col) + \
u[2] * DATA(2, col) + \
u[3] * DATA(3, col)))
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
/* Actual interpolation. */
return TERM(0) + TERM(1) + TERM(2) + TERM(3);
#undef TERM
#undef DATA
}
}
ccl_always_inline float4 interp_3d(float x, float y, float z)
{
return interp_3d_ex(x, y, z, interpolation);
}
ccl_always_inline float4 interp_3d_ex_closest(float x, float y, float z)
{
int ix, iy, iz;
frac(x*(float)width, &ix);
frac(y*(float)height, &iy);
frac(z*(float)depth, &iz);
switch(extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
iz = wrap_periodic(iz, depth);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
iz = wrap_clamp(iz, depth);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
return read(data[ix + iy*width + iz*width*height]);
}
ccl_always_inline float4 interp_3d_ex_linear(float x, float y, float z)
{
int ix, iy, iz;
int nix, niy, niz;
float tx = frac(x*(float)width - 0.5f, &ix);
float ty = frac(y*(float)height - 0.5f, &iy);
float tz = frac(z*(float)depth - 0.5f, &iz);
switch(extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
iz = wrap_periodic(iz, depth);
nix = wrap_periodic(ix+1, width);
niy = wrap_periodic(iy+1, height);
niz = wrap_periodic(iz+1, depth);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
nix = wrap_clamp(ix+1, width);
niy = wrap_clamp(iy+1, height);
niz = wrap_clamp(iz+1, depth);
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
iz = wrap_clamp(iz, depth);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
float4 r;
r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
return r;
}
/* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
* causing stack overflow issue in this function unless it is inlined.
*
* Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
* enabled.
*/
#ifdef __GNUC__
ccl_always_inline
#else
ccl_never_inline
#endif
float4 interp_3d_ex_tricubic(float x, float y, float z)
{
int ix, iy, iz;
int nix, niy, niz;
/* Tricubic b-spline interpolation. */
const float tx = frac(x*(float)width - 0.5f, &ix);
const float ty = frac(y*(float)height - 0.5f, &iy);
const float tz = frac(z*(float)depth - 0.5f, &iz);
int pix, piy, piz, nnix, nniy, nniz;
switch(extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
iz = wrap_periodic(iz, depth);
pix = wrap_periodic(ix-1, width);
piy = wrap_periodic(iy-1, height);
piz = wrap_periodic(iz-1, depth);
nix = wrap_periodic(ix+1, width);
niy = wrap_periodic(iy+1, height);
niz = wrap_periodic(iz+1, depth);
nnix = wrap_periodic(ix+2, width);
nniy = wrap_periodic(iy+2, height);
nniz = wrap_periodic(iz+2, depth);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
pix = wrap_clamp(ix-1, width);
piy = wrap_clamp(iy-1, height);
piz = wrap_clamp(iz-1, depth);
nix = wrap_clamp(ix+1, width);
niy = wrap_clamp(iy+1, height);
niz = wrap_clamp(iz+1, depth);
nnix = wrap_clamp(ix+2, width);
nniy = wrap_clamp(iy+2, height);
nniz = wrap_clamp(iz+2, depth);
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
iz = wrap_clamp(iz, depth);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {width * piy,
width * iy,
width * niy,
width * nniy};
const int zc[4] = {width * height * piz,
width * height * iz,
width * height * niz,
width * height * nniz};
float u[4], v[4], w[4];
/* Some helper macro to keep code reasonable size,
* let compiler to inline all the matrix multiplications.
*/
#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
#define COL_TERM(col, row) \
(v[col] * (u[0] * DATA(0, col, row) + \
u[1] * DATA(1, col, row) + \
u[2] * DATA(2, col, row) + \
u[3] * DATA(3, col, row)))
#define ROW_TERM(row) \
(w[row] * (COL_TERM(0, row) + \
COL_TERM(1, row) + \
COL_TERM(2, row) + \
COL_TERM(3, row)))
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
SET_CUBIC_SPLINE_WEIGHTS(w, tz);
/* Actual interpolation. */
return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
#undef COL_TERM
#undef ROW_TERM
#undef DATA
}
ccl_always_inline float4 interp_3d_ex(float x, float y, float z,
int interpolation = INTERPOLATION_LINEAR)
{
if(UNLIKELY(!data))
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
switch(interpolation) {
case INTERPOLATION_CLOSEST:
return interp_3d_ex_closest(x, y, z);
case INTERPOLATION_LINEAR:
return interp_3d_ex_linear(x, y, z);
default:
return interp_3d_ex_tricubic(x, y, z);
}
}
ccl_always_inline void dimensions_set(int width_, int height_, int depth_)
{
width = width_;
height = height_;
depth = depth_;
}
T *data;
int interpolation;
ExtensionType extension;
int width, height, depth;
#undef SET_CUBIC_SPLINE_WEIGHTS
};
typedef texture<float4> texture_float4;
typedef texture<float2> texture_float2;
typedef texture<float> texture_float;
typedef texture<uint> texture_uint;
typedef texture<int> texture_int;
typedef texture<uint4> texture_uint4;
typedef texture<uchar4> texture_uchar4;
typedef texture<uchar> texture_uchar;
typedef texture_image<float> texture_image_float;
typedef texture_image<uchar> texture_image_uchar;
typedef texture_image<half> texture_image_half;
typedef texture_image<float4> texture_image_float4;
typedef texture_image<uchar4> texture_image_uchar4;
typedef texture_image<half4> texture_image_half4;
/* Macros to handle different memory storage on different devices */
#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
@ -563,10 +120,6 @@ typedef texture_image<half4> texture_image_half4;
#define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
#define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
#define kernel_tex_image_interp(tex,x,y) kernel_tex_image_interp_impl(kg,tex,x,y)
#define kernel_tex_image_interp_3d(tex, x, y, z) kernel_tex_image_interp_3d_impl(kg,tex,x,y,z)
#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) kernel_tex_image_interp_3d_ex_impl(kg,tex, x, y, z, interpolation)
#define kernel_data (kg->__data)
#ifdef __KERNEL_SSE2__

@ -126,42 +126,16 @@ ccl_device_inline uint ccl_num_groups(uint d)
/* Textures */
typedef texture<float4, 1> texture_float4;
typedef texture<float2, 1> texture_float2;
typedef texture<float, 1> texture_float;
typedef texture<uint, 1> texture_uint;
typedef texture<int, 1> texture_int;
typedef texture<uint4, 1> texture_uint4;
typedef texture<uchar, 1> texture_uchar;
typedef texture<uchar4, 1> texture_uchar4;
/* Use arrays for regular data. This is a little slower than textures on Fermi,
* but allows for cleaner code and we will stop supporting Fermi soon. */
#define kernel_tex_fetch(t, index) t[(index)]
/* On Kepler (6xx) and above, we use Bindless Textures for images.
* On Fermi cards (4xx and 5xx), we have to use regular textures. */
#if __CUDA_ARCH__ < 300
typedef texture<float4, 2> texture_image_float4;
typedef texture<float4, 3> texture_image3d_float4;
typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
/* Macros to handle different memory storage on different devices */
/* On Fermi cards (4xx and 5xx), we use regular textures for both data and images.
* On Kepler (6xx) and above, we use Bindless Textures for images and arrays for data.
*
* Arrays are necessary in order to use the full VRAM on newer cards, and it's slightly faster.
* Using Arrays on Fermi turned out to be slower.*/
/* Fermi */
#if __CUDA_ARCH__ < 300
# define __KERNEL_CUDA_TEX_STORAGE__
# define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
# define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
# define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
/* Kepler */
#else
# define kernel_tex_fetch(t, index) t[(index)]
# define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y)
# define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y)
# define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z)
# define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z)
#endif
#define kernel_data __data

@ -144,7 +144,7 @@
/* data lookup defines */
#define kernel_data (*kg->data)
#define kernel_tex_fetch(tex, index) ((ccl_global tex##_t*)(kg->buffers[kg->tex.buffer] + kg->tex.offset))[(index)]
#define kernel_tex_fetch(tex, index) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))[(index)]
/* define NULL */
#define NULL 0

@ -46,14 +46,7 @@ struct Intersection;
struct VolumeStep;
typedef struct KernelGlobals {
vector<texture_image_float4> texture_float4_images;
vector<texture_image_uchar4> texture_byte4_images;
vector<texture_image_half4> texture_half4_images;
vector<texture_image_float> texture_float_images;
vector<texture_image_uchar> texture_byte_images;
vector<texture_image_half> texture_half_images;
# define KERNEL_TEX(type, ttype, name) ttype name;
# define KERNEL_TEX(type, name) texture<type> name;
# define KERNEL_IMAGE_TEX(type, ttype, name)
# include "kernel/kernel_textures.h"
@ -99,11 +92,7 @@ typedef struct KernelGlobals {
Intersection hits_stack[64];
} KernelGlobals;
# ifdef __KERNEL_CUDA_TEX_STORAGE__
# define KERNEL_TEX(type, ttype, name) ttype name;
# else
# define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name;
# endif
# define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
# define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
# include "kernel/kernel_textures.h"
@ -113,22 +102,16 @@ typedef struct KernelGlobals {
#ifdef __KERNEL_OPENCL__
# define KERNEL_TEX(type, ttype, name) \
# define KERNEL_TEX(type, name) \
typedef type name##_t;
# include "kernel/kernel_textures.h"
typedef struct tex_info_t {
uint buffer, padding;
uint64_t offset;
uint width, height, depth, options;
} tex_info_t;
typedef ccl_addr_space struct KernelGlobals {
ccl_constant KernelData *data;
ccl_global char *buffers[8];
# define KERNEL_TEX(type, ttype, name) \
tex_info_t name;
# define KERNEL_TEX(type, name) \
TextureInfo name;
# include "kernel/kernel_textures.h"
# ifdef __SPLIT_KERNEL__
@ -176,9 +159,9 @@ ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg)
if(ccl_local_id(0) + ccl_local_id(1) == 0)
# endif
{
ccl_global tex_info_t *info = (ccl_global tex_info_t*)kg->buffers[0];
ccl_global TextureInfo *info = (ccl_global TextureInfo*)kg->buffers[0];
# define KERNEL_TEX(type, ttype, name) \
# define KERNEL_TEX(type, name) \
kg->name = *(info++);
# include "kernel/kernel_textures.h"
}

@ -1,252 +0,0 @@
/*
* Copyright 2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* For OpenCL we do manual lookup and interpolation. */
ccl_device_inline ccl_global tex_info_t* kernel_tex_info(KernelGlobals *kg, uint id) {
const uint tex_offset = id
#define KERNEL_TEX(type, ttype, name) + 1
#include "kernel/kernel_textures.h"
;
return &((ccl_global tex_info_t*)kg->buffers[0])[tex_offset];
}
#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->buffer] + info->offset))[(index)]
ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
{
const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
const int texture_type = kernel_tex_type(id);
/* Float4 */
if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
return tex_fetch(float4, info, offset);
}
/* Byte4 */
else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
uchar4 r = tex_fetch(uchar4, info, offset);
float f = 1.0f/255.0f;
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
}
/* Float */
else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
float f = tex_fetch(float, info, offset);
return make_float4(f, f, f, 1.0f);
}
/* Byte */
else {
uchar r = tex_fetch(uchar, info, offset);
float f = r * (1.0f/255.0f);
return make_float4(f, f, f, 1.0f);
}
}
ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
{
x %= width;
if(x < 0)
x += width;
return x;
}
ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
{
return clamp(x, 0, width-1);
}
ccl_device_inline float svm_image_texture_frac(float x, int *ix)
{
int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
*ix = i;
return x - (float)i;
}
ccl_device_inline uint kernel_decode_image_interpolation(uint info)
{
return (info & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
}
ccl_device_inline uint kernel_decode_image_extension(uint info)
{
if(info & (1 << 1)) {
return EXTENSION_REPEAT;
}
else if(info & (1 << 2)) {
return EXTENSION_EXTEND;
}
else {
return EXTENSION_CLIP;
}
}
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
uint width = info->width;
uint height = info->height;
uint offset = 0;
/* Decode image options. */
uint interpolation = kernel_decode_image_interpolation(info->options);
uint extension = kernel_decode_image_extension(info->options);
/* Actual sampling. */
float4 r;
int ix, iy, nix, niy;
if(interpolation == INTERPOLATION_CLOSEST) {
svm_image_texture_frac(x*width, &ix);
svm_image_texture_frac(y*height, &iy);
if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
}
else {
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
/* Fall through. */
/* EXTENSION_EXTEND */
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
}
r = svm_image_texture_read(kg, id, offset + ix + iy*width);
}
else { /* INTERPOLATION_LINEAR */
float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
nix = svm_image_texture_wrap_periodic(ix+1, width);
niy = svm_image_texture_wrap_periodic(iy+1, height);
}
else {
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
nix = svm_image_texture_wrap_clamp(ix+1, width);
niy = svm_image_texture_wrap_clamp(iy+1, height);
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
}
r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
}
return r;
}
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
{
const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
uint width = info->width;
uint height = info->height;
uint offset = 0;
uint depth = info->depth;
/* Decode image options. */
uint interpolation = kernel_decode_image_interpolation(info->options);
uint extension = kernel_decode_image_extension(info->options);
/* Actual sampling. */
float4 r;
int ix, iy, iz, nix, niy, niz;
if(interpolation == INTERPOLATION_CLOSEST) {
svm_image_texture_frac(x*width, &ix);
svm_image_texture_frac(y*height, &iy);
svm_image_texture_frac(z*depth, &iz);
if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
iz = svm_image_texture_wrap_periodic(iz, depth);
}
else {
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
/* Fall through. */
/* EXTENSION_EXTEND */
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
iz = svm_image_texture_wrap_clamp(iz, depth);
}
r = svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
}
else { /* INTERPOLATION_LINEAR */
float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix);
float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy);
float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz);
if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
iz = svm_image_texture_wrap_periodic(iz, depth);
nix = svm_image_texture_wrap_periodic(ix+1, width);
niy = svm_image_texture_wrap_periodic(iy+1, height);
niz = svm_image_texture_wrap_periodic(iz+1, depth);
}
else {
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
/* Fall through. */
/* EXTENSION_EXTEND */
nix = svm_image_texture_wrap_clamp(ix+1, width);
niy = svm_image_texture_wrap_clamp(iy+1, height);
niz = svm_image_texture_wrap_clamp(iz+1, depth);
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
iz = svm_image_texture_wrap_clamp(iz, depth);
}
r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + iz*width*height);
r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + iz*width*height);
r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + iz*width*height);
r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + niz*width*height);
r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + niz*width*height);
r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + niz*width*height);
r += tz*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + niz*width*height);
}
return r;
}

@ -15,7 +15,7 @@
*/
#ifndef KERNEL_TEX
# define KERNEL_TEX(type, ttype, name)
# define KERNEL_TEX(type, name)
#endif
#ifndef KERNEL_IMAGE_TEX
@ -23,63 +23,65 @@
#endif
/* bvh */
KERNEL_TEX(float4, texture_float4, __bvh_nodes)
KERNEL_TEX(float4, texture_float4, __bvh_leaf_nodes)
KERNEL_TEX(float4, texture_float4, __prim_tri_verts)
KERNEL_TEX(uint, texture_uint, __prim_tri_index)
KERNEL_TEX(uint, texture_uint, __prim_type)
KERNEL_TEX(uint, texture_uint, __prim_visibility)
KERNEL_TEX(uint, texture_uint, __prim_index)
KERNEL_TEX(uint, texture_uint, __prim_object)
KERNEL_TEX(uint, texture_uint, __object_node)
KERNEL_TEX(float2, texture_float2, __prim_time)
KERNEL_TEX(float4, __bvh_nodes)
KERNEL_TEX(float4, __bvh_leaf_nodes)
KERNEL_TEX(float4, __prim_tri_verts)
KERNEL_TEX(uint, __prim_tri_index)
KERNEL_TEX(uint, __prim_type)
KERNEL_TEX(uint, __prim_visibility)
KERNEL_TEX(uint, __prim_index)
KERNEL_TEX(uint, __prim_object)
KERNEL_TEX(uint, __object_node)
KERNEL_TEX(float2, __prim_time)
/* objects */
KERNEL_TEX(float4, texture_float4, __objects)
KERNEL_TEX(float4, texture_float4, __objects_vector)
KERNEL_TEX(float4, __objects)
KERNEL_TEX(float4, __objects_vector)
/* triangles */
KERNEL_TEX(uint, texture_uint, __tri_shader)
KERNEL_TEX(float4, texture_float4, __tri_vnormal)
KERNEL_TEX(uint4, texture_uint4, __tri_vindex)
KERNEL_TEX(uint, texture_uint, __tri_patch)
KERNEL_TEX(float2, texture_float2, __tri_patch_uv)
KERNEL_TEX(uint, __tri_shader)
KERNEL_TEX(float4, __tri_vnormal)
KERNEL_TEX(uint4, __tri_vindex)
KERNEL_TEX(uint, __tri_patch)
KERNEL_TEX(float2, __tri_patch_uv)
/* curves */
KERNEL_TEX(float4, texture_float4, __curves)
KERNEL_TEX(float4, texture_float4, __curve_keys)
KERNEL_TEX(float4, __curves)
KERNEL_TEX(float4, __curve_keys)
/* patches */
KERNEL_TEX(uint, texture_uint, __patches)
KERNEL_TEX(uint, __patches)
/* attributes */
KERNEL_TEX(uint4, texture_uint4, __attributes_map)
KERNEL_TEX(float, texture_float, __attributes_float)
KERNEL_TEX(float4, texture_float4, __attributes_float3)
KERNEL_TEX(uchar4, texture_uchar4, __attributes_uchar4)
KERNEL_TEX(uint4, __attributes_map)
KERNEL_TEX(float, __attributes_float)
KERNEL_TEX(float4, __attributes_float3)
KERNEL_TEX(uchar4, __attributes_uchar4)
/* lights */
KERNEL_TEX(float4, texture_float4, __light_distribution)
KERNEL_TEX(float4, texture_float4, __light_data)
KERNEL_TEX(float2, texture_float2, __light_background_marginal_cdf)
KERNEL_TEX(float2, texture_float2, __light_background_conditional_cdf)
KERNEL_TEX(float4, __light_distribution)
KERNEL_TEX(float4, __light_data)
KERNEL_TEX(float2, __light_background_marginal_cdf)
KERNEL_TEX(float2, __light_background_conditional_cdf)
/* particles */
KERNEL_TEX(float4, texture_float4, __particles)
KERNEL_TEX(float4, __particles)
/* shaders */
KERNEL_TEX(uint4, texture_uint4, __svm_nodes)
KERNEL_TEX(uint, texture_uint, __shader_flag)
KERNEL_TEX(uint, texture_uint, __object_flag)
KERNEL_TEX(uint4, __svm_nodes)
KERNEL_TEX(uint, __shader_flag)
KERNEL_TEX(uint, __object_flag)
/* lookup tables */
KERNEL_TEX(float, texture_float, __lookup_table)
KERNEL_TEX(float, __lookup_table)
/* sobol */
KERNEL_TEX(uint, texture_uint, __sobol_directions)
KERNEL_TEX(uint, __sobol_directions)
#ifdef __KERNEL_CUDA__
# if __CUDA_ARCH__ < 300
#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ >= 300
/* image textures */
KERNEL_TEX(TextureInfo, __texture_info)
#else
/* full-float image */
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_000)
KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_008)
@ -180,12 +182,7 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_641)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_649)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_657)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_665)
# else
/* bindless textures */
KERNEL_TEX(uint, texture_uint, __bindless_mapping)
# endif /* __CUDA_ARCH__ */
#endif /* __KERNEL_CUDA__ */
#endif /* defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 */
#undef KERNEL_TEX
#undef KERNEL_IMAGE_TEX

@ -84,130 +84,16 @@ void kernel_tex_copy(KernelGlobals *kg,
if(0) {
}
#define KERNEL_TEX(type, ttype, tname) \
#define KERNEL_TEX(type, tname) \
else if(strcmp(name, #tname) == 0) { \
kg->tname.data = (type*)mem; \
kg->tname.width = width; \
}
#define KERNEL_IMAGE_TEX(type, ttype, tname)
#define KERNEL_IMAGE_TEX(type, tname)
#include "kernel/kernel_textures.h"
else if(strstr(name, "__tex_image_float4")) {
texture_image_float4 *tex = NULL;
int id = atoi(name + strlen("__tex_image_float4_"));
int array_index = kernel_tex_index(id);
if(array_index >= 0) {
if(array_index >= kg->texture_float4_images.size()) {
kg->texture_float4_images.resize(array_index+1);
}
tex = &kg->texture_float4_images[array_index];
}
if(tex) {
tex->data = (float4*)mem;
tex->dimensions_set(width, height, depth);
tex->interpolation = interpolation;
tex->extension = extension;
}
}
else if(strstr(name, "__tex_image_float")) {
texture_image_float *tex = NULL;
int id = atoi(name + strlen("__tex_image_float_"));
int array_index = kernel_tex_index(id);
if(array_index >= 0) {
if(array_index >= kg->texture_float_images.size()) {
kg->texture_float_images.resize(array_index+1);
}
tex = &kg->texture_float_images[array_index];
}
if(tex) {
tex->data = (float*)mem;
tex->dimensions_set(width, height, depth);
tex->interpolation = interpolation;
tex->extension = extension;
}
}
else if(strstr(name, "__tex_image_byte4")) {
texture_image_uchar4 *tex = NULL;
int id = atoi(name + strlen("__tex_image_byte4_"));
int array_index = kernel_tex_index(id);
if(array_index >= 0) {
if(array_index >= kg->texture_byte4_images.size()) {
kg->texture_byte4_images.resize(array_index+1);
}
tex = &kg->texture_byte4_images[array_index];
}
if(tex) {
tex->data = (uchar4*)mem;
tex->dimensions_set(width, height, depth);
tex->interpolation = interpolation;
tex->extension = extension;
}
}
else if(strstr(name, "__tex_image_byte")) {
texture_image_uchar *tex = NULL;
int id = atoi(name + strlen("__tex_image_byte_"));
int array_index = kernel_tex_index(id);
if(array_index >= 0) {
if(array_index >= kg->texture_byte_images.size()) {
kg->texture_byte_images.resize(array_index+1);
}
tex = &kg->texture_byte_images[array_index];
}
if(tex) {
tex->data = (uchar*)mem;
tex->dimensions_set(width, height, depth);
tex->interpolation = interpolation;
tex->extension = extension;
}
}
else if(strstr(name, "__tex_image_half4")) {
texture_image_half4 *tex = NULL;
int id = atoi(name + strlen("__tex_image_half4_"));
int array_index = kernel_tex_index(id);
if(array_index >= 0) {
if(array_index >= kg->texture_half4_images.size()) {
kg->texture_half4_images.resize(array_index+1);
}
tex = &kg->texture_half4_images[array_index];
}
if(tex) {
tex->data = (half4*)mem;
tex->dimensions_set(width, height, depth);
tex->interpolation = interpolation;
tex->extension = extension;
}
}
else if(strstr(name, "__tex_image_half")) {
texture_image_half *tex = NULL;
int id = atoi(name + strlen("__tex_image_half_"));
int array_index = kernel_tex_index(id);
if(array_index >= 0) {
if(array_index >= kg->texture_half_images.size()) {
kg->texture_half_images.resize(array_index+1);
}
tex = &kg->texture_half_images[array_index];
}
if(tex) {
tex->data = (half*)mem;
tex->dimensions_set(width, height, depth);
tex->interpolation = interpolation;
tex->extension = extension;
}
}
else
else {
assert(0);
}
}
CCL_NAMESPACE_END

@ -17,70 +17,478 @@
#ifndef __KERNEL_CPU_IMAGE_H__
#define __KERNEL_CPU_IMAGE_H__
#ifdef __KERNEL_CPU__
CCL_NAMESPACE_BEGIN
ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y)
template<typename T> struct TextureInterpolator {
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
{ \
u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
} (void)0
static ccl_always_inline float4 read(float4 r)
{
return r;
}
static ccl_always_inline float4 read(uchar4 r)
{
float f = 1.0f/255.0f;
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
}
static ccl_always_inline float4 read(uchar r)
{
float f = r*(1.0f/255.0f);
return make_float4(f, f, f, 1.0f);
}
static ccl_always_inline float4 read(float r)
{
/* TODO(dingto): Optimize this, so interpolation
* happens on float instead of float4 */
return make_float4(r, r, r, 1.0f);
}
static ccl_always_inline float4 read(half4 r)
{
return half4_to_float4(r);
}
static ccl_always_inline float4 read(half r)
{
float f = half_to_float(r);
return make_float4(f, f, f, 1.0f);
}
static ccl_always_inline int wrap_periodic(int x, int width)
{
x %= width;
if(x < 0)
x += width;
return x;
}
static ccl_always_inline int wrap_clamp(int x, int width)
{
return clamp(x, 0, width-1);
}
static ccl_always_inline float frac(float x, int *ix)
{
int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
*ix = i;
return x - (float)i;
}
static ccl_always_inline float4 interp(const TextureInfo& info, float x, float y)
{
if(UNLIKELY(!info.data))
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
const T *data = (const T*)info.data;
int width = info.width;
int height = info.height;
int ix, iy, nix, niy;
if(info.interpolation == INTERPOLATION_CLOSEST) {
frac(x*(float)width, &ix);
frac(y*(float)height, &iy);
switch(info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
return read(data[ix + iy*width]);
}
else if(info.interpolation == INTERPOLATION_LINEAR) {
float tx = frac(x*(float)width - 0.5f, &ix);
float ty = frac(y*(float)height - 0.5f, &iy);
switch(info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
nix = wrap_periodic(ix+1, width);
niy = wrap_periodic(iy+1, height);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
nix = wrap_clamp(ix+1, width);
niy = wrap_clamp(iy+1, height);
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
r += (1.0f - ty)*tx*read(data[nix + iy*width]);
r += ty*(1.0f - tx)*read(data[ix + niy*width]);
r += ty*tx*read(data[nix + niy*width]);
return r;
}
else {
/* Bicubic b-spline interpolation. */
float tx = frac(x*(float)width - 0.5f, &ix);
float ty = frac(y*(float)height - 0.5f, &iy);
int pix, piy, nnix, nniy;
switch(info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
pix = wrap_periodic(ix-1, width);
piy = wrap_periodic(iy-1, height);
nix = wrap_periodic(ix+1, width);
niy = wrap_periodic(iy+1, height);
nnix = wrap_periodic(ix+2, width);
nniy = wrap_periodic(iy+2, height);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
pix = wrap_clamp(ix-1, width);
piy = wrap_clamp(iy-1, height);
nix = wrap_clamp(ix+1, width);
niy = wrap_clamp(iy+1, height);
nnix = wrap_clamp(ix+2, width);
nniy = wrap_clamp(iy+2, height);
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {width * piy,
width * iy,
width * niy,
width * nniy};
float u[4], v[4];
/* Some helper macro to keep code reasonable size,
* let compiler to inline all the matrix multiplications.
*/
#define DATA(x, y) (read(data[xc[x] + yc[y]]))
#define TERM(col) \
(v[col] * (u[0] * DATA(0, col) + \
u[1] * DATA(1, col) + \
u[2] * DATA(2, col) + \
u[3] * DATA(3, col)))
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
/* Actual interpolation. */
return TERM(0) + TERM(1) + TERM(2) + TERM(3);
#undef TERM
#undef DATA
}
}
static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info, float x, float y, float z)
{
int width = info.width;
int height = info.height;
int depth = info.depth;
int ix, iy, iz;
frac(x*(float)width, &ix);
frac(y*(float)height, &iy);
frac(z*(float)depth, &iz);
switch(info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
iz = wrap_periodic(iz, depth);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
iz = wrap_clamp(iz, depth);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
const T *data = (const T*)info.data;
return read(data[ix + iy*width + iz*width*height]);
}
static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, float x, float y, float z)
{
int width = info.width;
int height = info.height;
int depth = info.depth;
int ix, iy, iz;
int nix, niy, niz;
float tx = frac(x*(float)width - 0.5f, &ix);
float ty = frac(y*(float)height - 0.5f, &iy);
float tz = frac(z*(float)depth - 0.5f, &iz);
switch(info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
iz = wrap_periodic(iz, depth);
nix = wrap_periodic(ix+1, width);
niy = wrap_periodic(iy+1, height);
niz = wrap_periodic(iz+1, depth);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
nix = wrap_clamp(ix+1, width);
niy = wrap_clamp(iy+1, height);
niz = wrap_clamp(iz+1, depth);
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
iz = wrap_clamp(iz, depth);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
const T *data = (const T*)info.data;
float4 r;
r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
return r;
}
/* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
* causing stack overflow issue in this function unless it is inlined.
*
* Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
* enabled.
*/
#ifdef __GNUC__
static ccl_always_inline
#else
static ccl_never_inline
#endif
float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z)
{
int width = info.width;
int height = info.height;
int depth = info.depth;
int ix, iy, iz;
int nix, niy, niz;
/* Tricubic b-spline interpolation. */
const float tx = frac(x*(float)width - 0.5f, &ix);
const float ty = frac(y*(float)height - 0.5f, &iy);
const float tz = frac(z*(float)depth - 0.5f, &iz);
int pix, piy, piz, nnix, nniy, nniz;
switch(info.extension) {
case EXTENSION_REPEAT:
ix = wrap_periodic(ix, width);
iy = wrap_periodic(iy, height);
iz = wrap_periodic(iz, depth);
pix = wrap_periodic(ix-1, width);
piy = wrap_periodic(iy-1, height);
piz = wrap_periodic(iz-1, depth);
nix = wrap_periodic(ix+1, width);
niy = wrap_periodic(iy+1, height);
niz = wrap_periodic(iz+1, depth);
nnix = wrap_periodic(ix+2, width);
nniy = wrap_periodic(iy+2, height);
nniz = wrap_periodic(iz+2, depth);
break;
case EXTENSION_CLIP:
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
ATTR_FALLTHROUGH;
case EXTENSION_EXTEND:
pix = wrap_clamp(ix-1, width);
piy = wrap_clamp(iy-1, height);
piz = wrap_clamp(iz-1, depth);
nix = wrap_clamp(ix+1, width);
niy = wrap_clamp(iy+1, height);
niz = wrap_clamp(iz+1, depth);
nnix = wrap_clamp(ix+2, width);
nniy = wrap_clamp(iy+2, height);
nniz = wrap_clamp(iz+2, depth);
ix = wrap_clamp(ix, width);
iy = wrap_clamp(iy, height);
iz = wrap_clamp(iz, depth);
break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {width * piy,
width * iy,
width * niy,
width * nniy};
const int zc[4] = {width * height * piz,
width * height * iz,
width * height * niz,
width * height * nniz};
float u[4], v[4], w[4];
/* Some helper macro to keep code reasonable size,
* let compiler to inline all the matrix multiplications.
*/
#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
#define COL_TERM(col, row) \
(v[col] * (u[0] * DATA(0, col, row) + \
u[1] * DATA(1, col, row) + \
u[2] * DATA(2, col, row) + \
u[3] * DATA(3, col, row)))
#define ROW_TERM(row) \
(w[row] * (COL_TERM(0, row) + \
COL_TERM(1, row) + \
COL_TERM(2, row) + \
COL_TERM(3, row)))
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
SET_CUBIC_SPLINE_WEIGHTS(w, tz);
/* Actual interpolation. */
const T *data = (const T*)info.data;
return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
#undef COL_TERM
#undef ROW_TERM
#undef DATA
}
static ccl_always_inline float4 interp_3d(const TextureInfo& info,
float x, float y, float z,
InterpolationType interp)
{
if(UNLIKELY(!info.data))
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) {
case INTERPOLATION_CLOSEST:
return interp_3d_closest(info, x, y, z);
case INTERPOLATION_LINEAR:
return interp_3d_linear(info, x, y, z);
default:
return interp_3d_tricubic(info, x, y, z);
}
}
#undef SET_CUBIC_SPLINE_WEIGHTS
};
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
switch(kernel_tex_type(tex)) {
const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
switch(kernel_tex_type(id)) {
case IMAGE_DATA_TYPE_HALF:
return kg->texture_half_images[kernel_tex_index(tex)].interp(x, y);
return TextureInterpolator<half>::interp(info, x, y);
case IMAGE_DATA_TYPE_BYTE:
return kg->texture_byte_images[kernel_tex_index(tex)].interp(x, y);
return TextureInterpolator<uchar>::interp(info, x, y);
case IMAGE_DATA_TYPE_FLOAT:
return kg->texture_float_images[kernel_tex_index(tex)].interp(x, y);
return TextureInterpolator<float>::interp(info, x, y);
case IMAGE_DATA_TYPE_HALF4:
return kg->texture_half4_images[kernel_tex_index(tex)].interp(x, y);
return TextureInterpolator<half4>::interp(info, x, y);
case IMAGE_DATA_TYPE_BYTE4:
return kg->texture_byte4_images[kernel_tex_index(tex)].interp(x, y);
return TextureInterpolator<uchar4>::interp(info, x, y);
case IMAGE_DATA_TYPE_FLOAT4:
default:
return kg->texture_float4_images[kernel_tex_index(tex)].interp(x, y);
return TextureInterpolator<float4>::interp(info, x, y);
}
}
ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z)
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
switch(kernel_tex_type(tex)) {
case IMAGE_DATA_TYPE_HALF:
return kg->texture_half_images[kernel_tex_index(tex)].interp_3d(x, y, z);
case IMAGE_DATA_TYPE_BYTE:
return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d(x, y, z);
case IMAGE_DATA_TYPE_FLOAT:
return kg->texture_float_images[kernel_tex_index(tex)].interp_3d(x, y, z);
case IMAGE_DATA_TYPE_HALF4:
return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
case IMAGE_DATA_TYPE_BYTE4:
return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
case IMAGE_DATA_TYPE_FLOAT4:
default:
return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
}
}
const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
{
switch(kernel_tex_type(tex)) {
switch(kernel_tex_type(id)) {
case IMAGE_DATA_TYPE_HALF:
return kg->texture_half_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_BYTE:
return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_FLOAT:
return kg->texture_float_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_HALF4:
return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_BYTE4:
return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_FLOAT4:
default:
return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
}
}
CCL_NAMESPACE_END
#endif // __KERNEL_CPU__
#endif // __KERNEL_CPU_IMAGE_H__

@ -26,6 +26,7 @@
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
#include "kernel/kernel_globals.h"
#include "kernel/kernels/cuda/kernel_cuda_image.h"
#include "kernel/kernel_film.h"
#include "kernel/kernel_path.h"
#include "kernel/kernel_path_branched.h"

@ -0,0 +1,310 @@
/*
* Copyright 2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#if __CUDA_ARCH__ >= 300
/* Kepler */
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
ccl_device float cubic_w0(float a)
{
return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f);
}
ccl_device float cubic_w1(float a)
{
return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f);
}
ccl_device float cubic_w2(float a)
{
return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f);
}
ccl_device float cubic_w3(float a)
{
return (1.0f/6.0f)*(a*a*a);
}
/* g0 and g1 are the two amplitude functions. */
ccl_device float cubic_g0(float a)
{
return cubic_w0(a) + cubic_w1(a);
}
ccl_device float cubic_g1(float a)
{
return cubic_w2(a) + cubic_w3(a);
}
/* h0 and h1 are the two offset functions */
ccl_device float cubic_h0(float a)
{
/* Note +0.5 offset to compensate for CUDA linear filtering convention. */
return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
}
ccl_device float cubic_h1(float a)
{
return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
}
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
template<typename T>
ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y)
{
x = (x * info.width) - 0.5f;
y = (y * info.height) - 0.5f;
float px = floor(x);
float py = floor(y);
float fx = x - px;
float fy = y - py;
float g0x = cubic_g0(fx);
float g1x = cubic_g1(fx);
float x0 = (px + cubic_h0(fx)) / info.width;
float x1 = (px + cubic_h1(fx)) / info.width;
float y0 = (py + cubic_h0(fy)) / info.height;
float y1 = (py + cubic_h1(fy)) / info.height;
return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) +
g1x * tex2D<T>(tex, x1, y0)) +
cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) +
g1x * tex2D<T>(tex, x1, y1));
}
/* Fast tricubic texture lookup using 8 bilinear lookups. */
template<typename T>
ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z)
{
x = (x * info.width) - 0.5f;
y = (y * info.height) - 0.5f;
z = (z * info.depth) - 0.5f;
float px = floor(x);
float py = floor(y);
float pz = floor(z);
float fx = x - px;
float fy = y - py;
float fz = z - pz;
float g0x = cubic_g0(fx);
float g1x = cubic_g1(fx);
float g0y = cubic_g0(fy);
float g1y = cubic_g1(fy);
float g0z = cubic_g0(fz);
float g1z = cubic_g1(fz);
float x0 = (px + cubic_h0(fx)) / info.width;
float x1 = (px + cubic_h1(fx)) / info.width;
float y0 = (py + cubic_h0(fy)) / info.height;
float y1 = (py + cubic_h1(fy)) / info.height;
float z0 = (pz + cubic_h0(fz)) / info.depth;
float z1 = (pz + cubic_h1(fz)) / info.depth;
return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) +
g1x * tex3D<T>(tex, x1, y0, z0)) +
g1y * (g0x * tex3D<T>(tex, x0, y1, z0) +
g1x * tex3D<T>(tex, x1, y1, z0))) +
g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) +
g1x * tex3D<T>(tex, x1, y0, z1)) +
g1y * (g0x * tex3D<T>(tex, x0, y1, z1) +
g1x * tex3D<T>(tex, x1, y1, z1)));
}
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
CUtexObject tex = (CUtexObject)info.data;
/* float4, byte4 and half4 */
const int texture_type = kernel_tex_type(id);
if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4)
{
if(info.interpolation == INTERPOLATION_CUBIC) {
return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
}
else {
return tex2D<float4>(tex, x, y);
}
}
/* float, byte and half */
else {
float f;
if(info.interpolation == INTERPOLATION_CUBIC) {
f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
}
else {
f = tex2D<float>(tex, x, y);
}
return make_float4(f, f, f, 1.0f);
}
}
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
CUtexObject tex = (CUtexObject)info.data;
uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp;
const int texture_type = kernel_tex_type(id);
if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4)
{
if(interpolation == INTERPOLATION_CUBIC) {
return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
}
else {
return tex3D<float4>(tex, x, y, z);
}
}
else {
float f;
if(interpolation == INTERPOLATION_CUBIC) {
f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
}
else {
f = tex3D<float>(tex, x, y, z);
}
return make_float4(f, f, f, 1.0f);
}
}
#else
/* Fermi */
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
float4 r;
switch(id) {
case 0: r = tex2D(__tex_image_float4_000, x, y); break;
case 8: r = tex2D(__tex_image_float4_008, x, y); break;
case 16: r = tex2D(__tex_image_float4_016, x, y); break;
case 24: r = tex2D(__tex_image_float4_024, x, y); break;
case 32: r = tex2D(__tex_image_float4_032, x, y); break;
case 1: r = tex2D(__tex_image_byte4_001, x, y); break;
case 9: r = tex2D(__tex_image_byte4_009, x, y); break;
case 17: r = tex2D(__tex_image_byte4_017, x, y); break;
case 25: r = tex2D(__tex_image_byte4_025, x, y); break;
case 33: r = tex2D(__tex_image_byte4_033, x, y); break;
case 41: r = tex2D(__tex_image_byte4_041, x, y); break;
case 49: r = tex2D(__tex_image_byte4_049, x, y); break;
case 57: r = tex2D(__tex_image_byte4_057, x, y); break;
case 65: r = tex2D(__tex_image_byte4_065, x, y); break;
case 73: r = tex2D(__tex_image_byte4_073, x, y); break;
case 81: r = tex2D(__tex_image_byte4_081, x, y); break;
case 89: r = tex2D(__tex_image_byte4_089, x, y); break;
case 97: r = tex2D(__tex_image_byte4_097, x, y); break;
case 105: r = tex2D(__tex_image_byte4_105, x, y); break;
case 113: r = tex2D(__tex_image_byte4_113, x, y); break;
case 121: r = tex2D(__tex_image_byte4_121, x, y); break;
case 129: r = tex2D(__tex_image_byte4_129, x, y); break;
case 137: r = tex2D(__tex_image_byte4_137, x, y); break;
case 145: r = tex2D(__tex_image_byte4_145, x, y); break;
case 153: r = tex2D(__tex_image_byte4_153, x, y); break;
case 161: r = tex2D(__tex_image_byte4_161, x, y); break;
case 169: r = tex2D(__tex_image_byte4_169, x, y); break;
case 177: r = tex2D(__tex_image_byte4_177, x, y); break;
case 185: r = tex2D(__tex_image_byte4_185, x, y); break;
case 193: r = tex2D(__tex_image_byte4_193, x, y); break;
case 201: r = tex2D(__tex_image_byte4_201, x, y); break;
case 209: r = tex2D(__tex_image_byte4_209, x, y); break;
case 217: r = tex2D(__tex_image_byte4_217, x, y); break;
case 225: r = tex2D(__tex_image_byte4_225, x, y); break;
case 233: r = tex2D(__tex_image_byte4_233, x, y); break;
case 241: r = tex2D(__tex_image_byte4_241, x, y); break;
case 249: r = tex2D(__tex_image_byte4_249, x, y); break;
case 257: r = tex2D(__tex_image_byte4_257, x, y); break;
case 265: r = tex2D(__tex_image_byte4_265, x, y); break;
case 273: r = tex2D(__tex_image_byte4_273, x, y); break;
case 281: r = tex2D(__tex_image_byte4_281, x, y); break;
case 289: r = tex2D(__tex_image_byte4_289, x, y); break;
case 297: r = tex2D(__tex_image_byte4_297, x, y); break;
case 305: r = tex2D(__tex_image_byte4_305, x, y); break;
case 313: r = tex2D(__tex_image_byte4_313, x, y); break;
case 321: r = tex2D(__tex_image_byte4_321, x, y); break;
case 329: r = tex2D(__tex_image_byte4_329, x, y); break;
case 337: r = tex2D(__tex_image_byte4_337, x, y); break;
case 345: r = tex2D(__tex_image_byte4_345, x, y); break;
case 353: r = tex2D(__tex_image_byte4_353, x, y); break;
case 361: r = tex2D(__tex_image_byte4_361, x, y); break;
case 369: r = tex2D(__tex_image_byte4_369, x, y); break;
case 377: r = tex2D(__tex_image_byte4_377, x, y); break;
case 385: r = tex2D(__tex_image_byte4_385, x, y); break;
case 393: r = tex2D(__tex_image_byte4_393, x, y); break;
case 401: r = tex2D(__tex_image_byte4_401, x, y); break;
case 409: r = tex2D(__tex_image_byte4_409, x, y); break;
case 417: r = tex2D(__tex_image_byte4_417, x, y); break;
case 425: r = tex2D(__tex_image_byte4_425, x, y); break;
case 433: r = tex2D(__tex_image_byte4_433, x, y); break;
case 441: r = tex2D(__tex_image_byte4_441, x, y); break;
case 449: r = tex2D(__tex_image_byte4_449, x, y); break;
case 457: r = tex2D(__tex_image_byte4_457, x, y); break;
case 465: r = tex2D(__tex_image_byte4_465, x, y); break;
case 473: r = tex2D(__tex_image_byte4_473, x, y); break;
case 481: r = tex2D(__tex_image_byte4_481, x, y); break;
case 489: r = tex2D(__tex_image_byte4_489, x, y); break;
case 497: r = tex2D(__tex_image_byte4_497, x, y); break;
case 505: r = tex2D(__tex_image_byte4_505, x, y); break;
case 513: r = tex2D(__tex_image_byte4_513, x, y); break;
case 521: r = tex2D(__tex_image_byte4_521, x, y); break;
case 529: r = tex2D(__tex_image_byte4_529, x, y); break;
case 537: r = tex2D(__tex_image_byte4_537, x, y); break;
case 545: r = tex2D(__tex_image_byte4_545, x, y); break;
case 553: r = tex2D(__tex_image_byte4_553, x, y); break;
case 561: r = tex2D(__tex_image_byte4_561, x, y); break;
case 569: r = tex2D(__tex_image_byte4_569, x, y); break;
case 577: r = tex2D(__tex_image_byte4_577, x, y); break;
case 585: r = tex2D(__tex_image_byte4_585, x, y); break;
case 593: r = tex2D(__tex_image_byte4_593, x, y); break;
case 601: r = tex2D(__tex_image_byte4_601, x, y); break;
case 609: r = tex2D(__tex_image_byte4_609, x, y); break;
case 617: r = tex2D(__tex_image_byte4_617, x, y); break;
case 625: r = tex2D(__tex_image_byte4_625, x, y); break;
case 633: r = tex2D(__tex_image_byte4_633, x, y); break;
case 641: r = tex2D(__tex_image_byte4_641, x, y); break;
case 649: r = tex2D(__tex_image_byte4_649, x, y); break;
case 657: r = tex2D(__tex_image_byte4_657, x, y); break;
case 665: r = tex2D(__tex_image_byte4_665, x, y); break;
default: r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
return r;
}
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
{
float4 r;
switch(id) {
case 0: r = tex3D(__tex_image_float4_3d_000, x, y, z); break;
case 8: r = tex3D(__tex_image_float4_3d_008, x, y, z); break;
case 16: r = tex3D(__tex_image_float4_3d_016, x, y, z); break;
case 24: r = tex3D(__tex_image_float4_3d_024, x, y, z); break;
case 32: r = tex3D(__tex_image_float4_3d_032, x, y, z); break;
}
return r;
}
#endif

@ -20,7 +20,7 @@
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
#include "kernel/kernel_globals.h"
#include "kernel/kernel_image_opencl.h"
#include "kernel/kernels/opencl/kernel_opencl_image.h"
#include "kernel/kernel_film.h"

@ -0,0 +1,341 @@
/*
* Copyright 2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* For OpenCL we do manual lookup and interpolation. */
ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) {
const uint tex_offset = id
#define KERNEL_TEX(type, name) + 1
#include "kernel/kernel_textures.h"
;
return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset];
}
#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)]
ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
{
const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
const int texture_type = kernel_tex_type(id);
/* Float4 */
if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
return tex_fetch(float4, info, offset);
}
/* Byte4 */
else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
uchar4 r = tex_fetch(uchar4, info, offset);
float f = 1.0f/255.0f;
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
}
/* Float */
else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
float f = tex_fetch(float, info, offset);
return make_float4(f, f, f, 1.0f);
}
/* Byte */
else {
uchar r = tex_fetch(uchar, info, offset);
float f = r * (1.0f/255.0f);
return make_float4(f, f, f, 1.0f);
}
}
ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
{
x %= width;
if(x < 0)
x += width;
return x;
}
ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
{
return clamp(x, 0, width-1);
}
ccl_device_inline float svm_image_texture_frac(float x, int *ix)
{
int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
*ix = i;
return x - (float)i;
}
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
{ \
u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
} (void)0
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
uint width = info->width;
uint height = info->height;
uint interpolation = info->interpolation;
uint extension = info->extension;
/* Actual sampling. */
if(interpolation == INTERPOLATION_CLOSEST) {
int ix, iy;
svm_image_texture_frac(x*width, &ix);
svm_image_texture_frac(y*height, &iy);
if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
}
else {
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
/* Fall through. */
/* EXTENSION_EXTEND */
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
}
return svm_image_texture_read(kg, id, ix + iy*width);
}
else {
/* Bilinear or bicubic interpolation. */
int ix, iy, nix, niy;
float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
nix = svm_image_texture_wrap_periodic(ix+1, width);
niy = svm_image_texture_wrap_periodic(iy+1, height);
}
else {
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
nix = svm_image_texture_wrap_clamp(ix+1, width);
niy = svm_image_texture_wrap_clamp(iy+1, height);
}
if(interpolation == INTERPOLATION_LINEAR) {
/* Bilinear interpolation. */
float4 r;
r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width);
r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width);
r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width);
r += ty*tx*svm_image_texture_read(kg, id, nix + niy*width);
return r;
}
/* Bicubic interpolation. */
int pix, piy, nnix, nniy;
if(extension == EXTENSION_REPEAT) {
pix = svm_image_texture_wrap_periodic(ix-1, width);
piy = svm_image_texture_wrap_periodic(iy-1, height);
nnix = svm_image_texture_wrap_periodic(ix+2, width);
nniy = svm_image_texture_wrap_periodic(iy+2, height);
}
else {
pix = svm_image_texture_wrap_clamp(ix-1, width);
piy = svm_image_texture_wrap_clamp(iy-1, height);
nnix = svm_image_texture_wrap_clamp(ix+2, width);
nniy = svm_image_texture_wrap_clamp(iy+2, height);
}
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {width * piy,
width * iy,
width * niy,
width * nniy};
float u[4], v[4];
/* Some helper macro to keep code reasonable size,
* let compiler to inline all the matrix multiplications.
*/
#define DATA(x, y) (svm_image_texture_read(kg, id, xc[x] + yc[y]))
#define TERM(col) \
(v[col] * (u[0] * DATA(0, col) + \
u[1] * DATA(1, col) + \
u[2] * DATA(2, col) + \
u[3] * DATA(3, col)))
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
/* Actual interpolation. */
return TERM(0) + TERM(1) + TERM(2) + TERM(3);
#undef TERM
#undef DATA
}
}
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
{
const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
uint width = info->width;
uint height = info->height;
uint depth = info->depth;
uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp;
uint extension = info->extension;
/* Actual sampling. */
if(interpolation == INTERPOLATION_CLOSEST) {
int ix, iy, iz;
svm_image_texture_frac(x*width, &ix);
svm_image_texture_frac(y*height, &iy);
svm_image_texture_frac(z*depth, &iz);
if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
iz = svm_image_texture_wrap_periodic(iz, depth);
}
else {
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
/* Fall through. */
/* EXTENSION_EXTEND */
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
iz = svm_image_texture_wrap_clamp(iz, depth);
}
return svm_image_texture_read(kg, id, ix + iy*width + iz*width*height);
}
else {
/* Bilinear or bicubic interpolation. */
int ix, iy, iz, nix, niy, niz;
float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix);
float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy);
float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz);
if(extension == EXTENSION_REPEAT) {
ix = svm_image_texture_wrap_periodic(ix, width);
iy = svm_image_texture_wrap_periodic(iy, height);
iz = svm_image_texture_wrap_periodic(iz, depth);
nix = svm_image_texture_wrap_periodic(ix+1, width);
niy = svm_image_texture_wrap_periodic(iy+1, height);
niz = svm_image_texture_wrap_periodic(iz+1, depth);
}
else {
if(extension == EXTENSION_CLIP) {
if(x < 0.0f || y < 0.0f || z < 0.0f ||
x > 1.0f || y > 1.0f || z > 1.0f)
{
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
/* Fall through. */
/* EXTENSION_EXTEND */
nix = svm_image_texture_wrap_clamp(ix+1, width);
niy = svm_image_texture_wrap_clamp(iy+1, height);
niz = svm_image_texture_wrap_clamp(iz+1, depth);
ix = svm_image_texture_wrap_clamp(ix, width);
iy = svm_image_texture_wrap_clamp(iy, height);
iz = svm_image_texture_wrap_clamp(iz, depth);
}
if(interpolation == INTERPOLATION_LINEAR) {
/* Bilinear interpolation. */
float4 r;
r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width + iz*width*height);
r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width + iz*width*height);
r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width + iz*width*height);
r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, nix + niy*width + iz*width*height);
r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width + niz*width*height);
r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width + niz*width*height);
r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width + niz*width*height);
r += tz*ty*tx*svm_image_texture_read(kg, id, nix + niy*width + niz*width*height);
return r;
}
/* Bicubic interpolation. */
int pix, piy, piz, nnix, nniy, nniz;
if(extension == EXTENSION_REPEAT) {
pix = svm_image_texture_wrap_periodic(ix-1, width);
piy = svm_image_texture_wrap_periodic(iy-1, height);
piz = svm_image_texture_wrap_periodic(iz-1, depth);
nnix = svm_image_texture_wrap_periodic(ix+2, width);
nniy = svm_image_texture_wrap_periodic(iy+2, height);
nniz = svm_image_texture_wrap_periodic(iz+2, depth);
}
else {
pix = svm_image_texture_wrap_clamp(ix-1, width);
piy = svm_image_texture_wrap_clamp(iy-1, height);
piz = svm_image_texture_wrap_clamp(iz-1, depth);
nnix = svm_image_texture_wrap_clamp(ix+2, width);
nniy = svm_image_texture_wrap_clamp(iy+2, height);
nniz = svm_image_texture_wrap_clamp(iz+2, depth);
}
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {width * piy,
width * iy,
width * niy,
width * nniy};
const int zc[4] = {width * height * piz,
width * height * iz,
width * height * niz,
width * height * nniz};
float u[4], v[4], w[4];
/* Some helper macro to keep code reasonable size,
* let compiler to inline all the matrix multiplications.
*/
#define DATA(x, y, z) (svm_image_texture_read(kg, id, xc[x] + yc[y] + zc[z]))
#define COL_TERM(col, row) \
(v[col] * (u[0] * DATA(0, col, row) + \
u[1] * DATA(1, col, row) + \
u[2] * DATA(2, col, row) + \
u[3] * DATA(3, col, row)))
#define ROW_TERM(row) \
(w[row] * (COL_TERM(0, row) + \
COL_TERM(1, row) + \
COL_TERM(2, row) + \
COL_TERM(3, row)))
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
SET_CUBIC_SPLINE_WEIGHTS(w, tz);
/* Actual interpolation. */
return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
#undef COL_TERM
#undef ROW_TERM
#undef DATA
}
}
#undef SET_CUBIC_SPLINE_WEIGHTS

@ -962,7 +962,7 @@ bool OSLRenderServices::texture(ustring filename,
if(filename.length() && filename[0] == '@') {
int slot = atoi(filename.c_str() + 1);
float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t);
float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t);
result[0] = rgba[0];
if(nchannels > 1)
@ -1043,7 +1043,7 @@ bool OSLRenderServices::texture3d(ustring filename,
bool status;
if(filename.length() && filename[0] == '@') {
int slot = atoi(filename.c_str() + 1);
float4 rgba = kernel_tex_image_interp_3d(slot, P.x, P.y, P.z);
float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE);
result[0] = rgba[0];
if(nchannels > 1)

@ -29,7 +29,10 @@
#endif
#ifdef __KERNEL_OPENCL__
# include "kernel/kernel_image_opencl.h"
# include "kernel/kernels/opencl/kernel_opencl_image.h"
#endif
#ifdef __KERNEL_CUDA__
# include "kernel/kernels/cuda/kernel_cuda_image.h"
#endif
#ifdef __KERNEL_CPU__
# include "kernel/kernels/cpu/kernel_cpu_image.h"

@ -18,135 +18,7 @@ CCL_NAMESPACE_BEGIN
ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
{
#ifdef __KERNEL_CPU__
float4 r = kernel_tex_image_interp(id, x, y);
#elif defined(__KERNEL_OPENCL__)
float4 r = kernel_tex_image_interp(kg, id, x, y);
#else
float4 r;
# if __CUDA_ARCH__ < 300
/* not particularly proud of this massive switch, what are the
* alternatives?
* - use a single big 1D texture, and do our own lookup/filtering
* - group by size and use a 3d texture, performance impact
* - group into larger texture with some padding for correct lerp
*
* also note that cuda has a textures limit (128 for Fermi, 256 for Kepler),
* and we cannot use all since we still need some for other storage */
switch(id) {
case 0: r = kernel_tex_image_interp(__tex_image_float4_000, x, y); break;
case 8: r = kernel_tex_image_interp(__tex_image_float4_008, x, y); break;
case 16: r = kernel_tex_image_interp(__tex_image_float4_016, x, y); break;
case 24: r = kernel_tex_image_interp(__tex_image_float4_024, x, y); break;
case 32: r = kernel_tex_image_interp(__tex_image_float4_032, x, y); break;
case 1: r = kernel_tex_image_interp(__tex_image_byte4_001, x, y); break;
case 9: r = kernel_tex_image_interp(__tex_image_byte4_009, x, y); break;
case 17: r = kernel_tex_image_interp(__tex_image_byte4_017, x, y); break;
case 25: r = kernel_tex_image_interp(__tex_image_byte4_025, x, y); break;
case 33: r = kernel_tex_image_interp(__tex_image_byte4_033, x, y); break;
case 41: r = kernel_tex_image_interp(__tex_image_byte4_041, x, y); break;
case 49: r = kernel_tex_image_interp(__tex_image_byte4_049, x, y); break;
case 57: r = kernel_tex_image_interp(__tex_image_byte4_057, x, y); break;
case 65: r = kernel_tex_image_interp(__tex_image_byte4_065, x, y); break;
case 73: r = kernel_tex_image_interp(__tex_image_byte4_073, x, y); break;
case 81: r = kernel_tex_image_interp(__tex_image_byte4_081, x, y); break;
case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break;
case 97: r = kernel_tex_image_interp(__tex_image_byte4_097, x, y); break;
case 105: r = kernel_tex_image_interp(__tex_image_byte4_105, x, y); break;
case 113: r = kernel_tex_image_interp(__tex_image_byte4_113, x, y); break;
case 121: r = kernel_tex_image_interp(__tex_image_byte4_121, x, y); break;
case 129: r = kernel_tex_image_interp(__tex_image_byte4_129, x, y); break;
case 137: r = kernel_tex_image_interp(__tex_image_byte4_137, x, y); break;
case 145: r = kernel_tex_image_interp(__tex_image_byte4_145, x, y); break;
case 153: r = kernel_tex_image_interp(__tex_image_byte4_153, x, y); break;
case 161: r = kernel_tex_image_interp(__tex_image_byte4_161, x, y); break;
case 169: r = kernel_tex_image_interp(__tex_image_byte4_169, x, y); break;
case 177: r = kernel_tex_image_interp(__tex_image_byte4_177, x, y); break;
case 185: r = kernel_tex_image_interp(__tex_image_byte4_185, x, y); break;
case 193: r = kernel_tex_image_interp(__tex_image_byte4_193, x, y); break;
case 201: r = kernel_tex_image_interp(__tex_image_byte4_201, x, y); break;
case 209: r = kernel_tex_image_interp(__tex_image_byte4_209, x, y); break;
case 217: r = kernel_tex_image_interp(__tex_image_byte4_217, x, y); break;
case 225: r = kernel_tex_image_interp(__tex_image_byte4_225, x, y); break;
case 233: r = kernel_tex_image_interp(__tex_image_byte4_233, x, y); break;
case 241: r = kernel_tex_image_interp(__tex_image_byte4_241, x, y); break;
case 249: r = kernel_tex_image_interp(__tex_image_byte4_249, x, y); break;
case 257: r = kernel_tex_image_interp(__tex_image_byte4_257, x, y); break;
case 265: r = kernel_tex_image_interp(__tex_image_byte4_265, x, y); break;
case 273: r = kernel_tex_image_interp(__tex_image_byte4_273, x, y); break;
case 281: r = kernel_tex_image_interp(__tex_image_byte4_281, x, y); break;
case 289: r = kernel_tex_image_interp(__tex_image_byte4_289, x, y); break;
case 297: r = kernel_tex_image_interp(__tex_image_byte4_297, x, y); break;
case 305: r = kernel_tex_image_interp(__tex_image_byte4_305, x, y); break;
case 313: r = kernel_tex_image_interp(__tex_image_byte4_313, x, y); break;
case 321: r = kernel_tex_image_interp(__tex_image_byte4_321, x, y); break;
case 329: r = kernel_tex_image_interp(__tex_image_byte4_329, x, y); break;
case 337: r = kernel_tex_image_interp(__tex_image_byte4_337, x, y); break;
case 345: r = kernel_tex_image_interp(__tex_image_byte4_345, x, y); break;
case 353: r = kernel_tex_image_interp(__tex_image_byte4_353, x, y); break;
case 361: r = kernel_tex_image_interp(__tex_image_byte4_361, x, y); break;
case 369: r = kernel_tex_image_interp(__tex_image_byte4_369, x, y); break;
case 377: r = kernel_tex_image_interp(__tex_image_byte4_377, x, y); break;
case 385: r = kernel_tex_image_interp(__tex_image_byte4_385, x, y); break;
case 393: r = kernel_tex_image_interp(__tex_image_byte4_393, x, y); break;
case 401: r = kernel_tex_image_interp(__tex_image_byte4_401, x, y); break;
case 409: r = kernel_tex_image_interp(__tex_image_byte4_409, x, y); break;
case 417: r = kernel_tex_image_interp(__tex_image_byte4_417, x, y); break;
case 425: r = kernel_tex_image_interp(__tex_image_byte4_425, x, y); break;
case 433: r = kernel_tex_image_interp(__tex_image_byte4_433, x, y); break;
case 441: r = kernel_tex_image_interp(__tex_image_byte4_441, x, y); break;
case 449: r = kernel_tex_image_interp(__tex_image_byte4_449, x, y); break;
case 457: r = kernel_tex_image_interp(__tex_image_byte4_457, x, y); break;
case 465: r = kernel_tex_image_interp(__tex_image_byte4_465, x, y); break;
case 473: r = kernel_tex_image_interp(__tex_image_byte4_473, x, y); break;
case 481: r = kernel_tex_image_interp(__tex_image_byte4_481, x, y); break;
case 489: r = kernel_tex_image_interp(__tex_image_byte4_489, x, y); break;
case 497: r = kernel_tex_image_interp(__tex_image_byte4_497, x, y); break;
case 505: r = kernel_tex_image_interp(__tex_image_byte4_505, x, y); break;
case 513: r = kernel_tex_image_interp(__tex_image_byte4_513, x, y); break;
case 521: r = kernel_tex_image_interp(__tex_image_byte4_521, x, y); break;
case 529: r = kernel_tex_image_interp(__tex_image_byte4_529, x, y); break;
case 537: r = kernel_tex_image_interp(__tex_image_byte4_537, x, y); break;
case 545: r = kernel_tex_image_interp(__tex_image_byte4_545, x, y); break;
case 553: r = kernel_tex_image_interp(__tex_image_byte4_553, x, y); break;
case 561: r = kernel_tex_image_interp(__tex_image_byte4_561, x, y); break;
case 569: r = kernel_tex_image_interp(__tex_image_byte4_569, x, y); break;
case 577: r = kernel_tex_image_interp(__tex_image_byte4_577, x, y); break;
case 585: r = kernel_tex_image_interp(__tex_image_byte4_585, x, y); break;
case 593: r = kernel_tex_image_interp(__tex_image_byte4_593, x, y); break;
case 601: r = kernel_tex_image_interp(__tex_image_byte4_601, x, y); break;
case 609: r = kernel_tex_image_interp(__tex_image_byte4_609, x, y); break;
case 617: r = kernel_tex_image_interp(__tex_image_byte4_617, x, y); break;
case 625: r = kernel_tex_image_interp(__tex_image_byte4_625, x, y); break;
case 633: r = kernel_tex_image_interp(__tex_image_byte4_633, x, y); break;
case 641: r = kernel_tex_image_interp(__tex_image_byte4_641, x, y); break;
case 649: r = kernel_tex_image_interp(__tex_image_byte4_649, x, y); break;
case 657: r = kernel_tex_image_interp(__tex_image_byte4_657, x, y); break;
case 665: r = kernel_tex_image_interp(__tex_image_byte4_665, x, y); break;
default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
# else
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
/* float4, byte4 and half4 */
const int texture_type = kernel_tex_type(id);
if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4)
{
r = kernel_tex_image_interp_float4(tex, x, y);
}
/* float, byte and half */
else {
float f = kernel_tex_image_interp_float(tex, x, y);
r = make_float4(f, f, f, 1.0f);
}
# endif
#endif
const float alpha = r.w;
if(use_alpha && alpha != 1.0f && alpha != 0.0f) {

@ -42,29 +42,8 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
tfm.w = read_node_float(kg, offset);
co = transform_point(&tfm, co);
}
float4 r;
# if defined(__KERNEL_CUDA__)
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
const int texture_type = kernel_tex_type(id);
if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4)
{
r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z);
}
else {
float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z);
r = make_float4(f, f, f, 1.0f);
}
# else /* __CUDA_ARCH__ >= 300 */
r = volume_image_texture_3d(id, co.x, co.y, co.z);
# endif
# elif defined(__KERNEL_OPENCL__)
r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z);
# else
r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z);
# endif /* __KERNEL_CUDA__ */
float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
#else
float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#endif

@ -46,12 +46,64 @@ CCL_NAMESPACE_BEGIN
#if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300)
# define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4)
# define kernel_tex_index(tex) (tex)
#else
# define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
# define kernel_tex_index(tex) (tex >> IMAGE_DATA_TYPE_SHIFT)
#endif
/* Interpolation types for textures
* cuda also use texture space to store other objects */
typedef enum InterpolationType {
INTERPOLATION_NONE = -1,
INTERPOLATION_LINEAR = 0,
INTERPOLATION_CLOSEST = 1,
INTERPOLATION_CUBIC = 2,
INTERPOLATION_SMART = 3,
INTERPOLATION_NUM_TYPES,
} InterpolationType;
/* Texture types
* Since we store the type in the lower bits of a flat index,
* the shift and bit mask constant below need to be kept in sync. */
typedef enum ImageDataType {
IMAGE_DATA_TYPE_FLOAT4 = 0,
IMAGE_DATA_TYPE_BYTE4 = 1,
IMAGE_DATA_TYPE_HALF4 = 2,
IMAGE_DATA_TYPE_FLOAT = 3,
IMAGE_DATA_TYPE_BYTE = 4,
IMAGE_DATA_TYPE_HALF = 5,
IMAGE_DATA_NUM_TYPES
} ImageDataType;
#define IMAGE_DATA_TYPE_SHIFT 3
#define IMAGE_DATA_TYPE_MASK 0x7
/* Extension types for textures.
*
* Defines how the image is extrapolated past its original bounds. */
typedef enum ExtensionType {
/* Cause the image to repeat horizontally and vertically. */
EXTENSION_REPEAT = 0,
/* Extend by repeating edge pixels of the image. */
EXTENSION_EXTEND = 1,
/* Clip to image size and set exterior pixels as transparent. */
EXTENSION_CLIP = 2,
EXTENSION_NUM_TYPES,
} ExtensionType;
typedef struct TextureInfo {
/* Pointer, offset or texture depending on device. */
uint64_t data;
/* Buffer number for OpenCL. */
uint cl_buffer;
/* Interpolation and extension type. */
uint interpolation, extension;
/* Dimensions. */
uint width, height, depth;
} TextureInfo;
CCL_NAMESPACE_END
#endif /* __UTIL_TEXTURE_H__ */

@ -101,52 +101,6 @@ ccl_device_inline size_t round_down(size_t x, size_t multiple)
return (x / multiple) * multiple;
}
/* Interpolation types for textures
* cuda also use texture space to store other objects */
enum InterpolationType {
INTERPOLATION_NONE = -1,
INTERPOLATION_LINEAR = 0,
INTERPOLATION_CLOSEST = 1,
INTERPOLATION_CUBIC = 2,
INTERPOLATION_SMART = 3,
INTERPOLATION_NUM_TYPES,
};
/* Texture types
* Since we store the type in the lower bits of a flat index,
* the shift and bit mask constant below need to be kept in sync.
*/
enum ImageDataType {
IMAGE_DATA_TYPE_FLOAT4 = 0,
IMAGE_DATA_TYPE_BYTE4 = 1,
IMAGE_DATA_TYPE_HALF4 = 2,
IMAGE_DATA_TYPE_FLOAT = 3,
IMAGE_DATA_TYPE_BYTE = 4,
IMAGE_DATA_TYPE_HALF = 5,
IMAGE_DATA_NUM_TYPES
};
#define IMAGE_DATA_TYPE_SHIFT 3
#define IMAGE_DATA_TYPE_MASK 0x7
/* Extension types for textures.
*
* Defines how the image is extrapolated past its original bounds.
*/
enum ExtensionType {
/* Cause the image to repeat horizontally and vertically. */
EXTENSION_REPEAT = 0,
/* Extend by repeating edge pixels of the image. */
EXTENSION_EXTEND = 1,
/* Clip to image size and set exterior pixels as transparent. */
EXTENSION_CLIP = 2,
EXTENSION_NUM_TYPES,
};
CCL_NAMESPACE_END
/* Vectorized types declaration. */

@ -149,6 +149,10 @@ static bool vwpaint_use_normal(const VPaint *vp)
((vp->paint.brush->flag & BRUSH_FRONTFACE_FALLOFF) != 0);
}
static bool brush_use_accumulate(const Brush *brush)
{
return (brush->flag & BRUSH_ACCUMULATE) != 0 || brush->vertexpaint_tool == PAINT_BLEND_SMEAR;
}
static MDeformVert *defweight_prev_init(MDeformVert *dvert_prev, MDeformVert *dvert_curr, int index)
{
@ -272,7 +276,7 @@ static uint vpaint_blend(
uint color_blend = ED_vpaint_blend_tool(tool, color_curr, color_paint, alpha_i);
/* if no accumulate, clip color adding with colorig & orig alpha */
if ((brush->flag & BRUSH_ACCUMULATE) == 0) {
if (!brush_use_accumulate(brush)) {
uint color_test, a;
char *cp, *ct, *co;
@ -784,7 +788,7 @@ static void do_weight_paint_vertex_single(
dw_mirr = NULL;
}
if ((wp->paint.brush->flag & BRUSH_ACCUMULATE) == 0) {
if (!brush_use_accumulate(wp->paint.brush)) {
MDeformVert *dvert_prev = ob->sculpt->mode.wpaint.dvert_prev;
MDeformVert *dv_prev = defweight_prev_init(dvert_prev, me->dvert, index);
if (index_mirr != -1) {
@ -900,7 +904,7 @@ static void do_weight_paint_vertex_multi(
return;
}
if ((wp->paint.brush->flag & BRUSH_ACCUMULATE) == 0) {
if (!brush_use_accumulate(wp->paint.brush)) {
MDeformVert *dvert_prev = ob->sculpt->mode.wpaint.dvert_prev;
MDeformVert *dv_prev = defweight_prev_init(dvert_prev, me->dvert, index);
if (index_mirr != -1) {
@ -1031,7 +1035,7 @@ static void vertex_paint_init_session_data(const ToolSettings *ts, Object *ob)
/* Create average brush arrays */
if (ob->mode == OB_MODE_VERTEX_PAINT) {
if ((brush->flag & BRUSH_ACCUMULATE) == 0) {
if (!brush_use_accumulate(brush)) {
if (ob->sculpt->mode.vpaint.previous_color == NULL) {
ob->sculpt->mode.vpaint.previous_color =
MEM_callocN(me->totloop * sizeof(uint), __func__);
@ -1042,7 +1046,7 @@ static void vertex_paint_init_session_data(const ToolSettings *ts, Object *ob)
}
}
else if (ob->mode == OB_MODE_WEIGHT_PAINT) {
if ((brush->flag & BRUSH_ACCUMULATE) == 0) {
if (!brush_use_accumulate(brush)) {
if (ob->sculpt->mode.wpaint.alpha_weight == NULL) {
ob->sculpt->mode.wpaint.alpha_weight =
MEM_callocN(me->totvert * sizeof(float), __func__);
@ -1204,11 +1208,9 @@ struct WPaintData {
int defbase_tot;
/* Special storage for smear brush, avoid feedback loop - update each step and swap. */
struct {
float *weight_prev;
float *weight_curr;
} smear;
/* original weight values for use in blur/smear */
float *precomputed_weight;
bool precomputed_weight_ready;
};
/* Initialize the stroke cache invariants from operator properties */
@ -1437,24 +1439,8 @@ static bool wpaint_stroke_test_start(bContext *C, wmOperator *op, const float mo
wpd->mirror.lock = tmpflags;
}
if (vp->paint.brush->vertexpaint_tool == PAINT_BLEND_SMEAR) {
wpd->smear.weight_prev = MEM_mallocN(sizeof(float) * me->totvert, __func__);
const MDeformVert *dv = me->dvert;
if (wpd->do_multipaint) {
const bool do_auto_normalize = ((ts->auto_normalize != 0) && (wpd->vgroup_validmap != NULL));
for (int i = 0; i < me->totvert; i++, dv++) {
float weight = BKE_defvert_multipaint_collective_weight(
dv, wpd->defbase_tot, wpd->defbase_sel, wpd->defbase_tot_sel, do_auto_normalize);
CLAMP(weight, 0.0f, 1.0f);
wpd->smear.weight_prev[i] = weight;
}
}
else {
for (int i = 0; i < me->totvert; i++, dv++) {
wpd->smear.weight_prev[i] = defvert_find_weight(dv, wpd->active.index);
}
}
wpd->smear.weight_curr = MEM_dupallocN(wpd->smear.weight_prev);
if (ELEM(vp->paint.brush->vertexpaint_tool, PAINT_BLEND_SMEAR, PAINT_BLEND_BLUR)) {
wpd->precomputed_weight = MEM_mallocN(sizeof(float) * me->totvert, __func__);
}
/* imat for normals */
@ -1512,6 +1498,33 @@ static float wpaint_get_active_weight(const MDeformVert *dv, const WeightPaintIn
}
}
static void do_wpaint_precompute_weight_cb_ex(
void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id))
{
SculptThreadedTaskData *data = userdata;
const MDeformVert *dv = &data->me->dvert[n];
data->wpd->precomputed_weight[n] = wpaint_get_active_weight(dv, data->wpi);
}
static void precompute_weight_values(
bContext *C, Object *ob, Brush *brush, struct WPaintData *wpd, WeightPaintInfo *wpi, Mesh *me)
{
if (wpd->precomputed_weight_ready && !brush_use_accumulate(brush))
return;
/* threaded loop over vertices */
SculptThreadedTaskData data = {
.C = C, .ob = ob, .wpd = wpd, .wpi = wpi, .me = me,
};
BLI_task_parallel_range_ex(
0, me->totvert, &data, NULL, 0, do_wpaint_precompute_weight_cb_ex,
true, false);
wpd->precomputed_weight_ready = true;
}
static void do_wpaint_brush_blur_task_cb_ex(
void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id))
{
@ -1560,8 +1573,7 @@ static void do_wpaint_brush_blur_task_cb_ex(
for (int k = 0; k < mp->totloop; k++) {
const int l_index = mp->loopstart + k;
const MLoop *ml = &data->me->mloop[l_index];
const MDeformVert *dv = &data->me->dvert[ml->v];
weight_final += wpaint_get_active_weight(dv, data->wpi);
weight_final += data->wpd->precomputed_weight[ml->v];
}
}
@ -1681,7 +1693,7 @@ static void do_wpaint_brush_smear_task_cb_ex(
if (stroke_dot > stroke_dot_max) {
stroke_dot_max = stroke_dot;
weight_final = data->wpd->smear.weight_prev[v_other_index];
weight_final = data->wpd->precomputed_weight[v_other_index];
do_color = true;
}
}
@ -1693,12 +1705,13 @@ static void do_wpaint_brush_smear_task_cb_ex(
const float final_alpha =
brush_fade * brush_strength *
grid_alpha * brush_alpha_pressure;
if (final_alpha <= 0.0f)
continue;
do_weight_paint_vertex(
data->vp, data->ob, data->wpi,
v_index, final_alpha, (float)weight_final);
/* Access the weight again because it might not have been applied completely. */
data->wpd->smear.weight_curr[v_index] =
wpaint_get_active_weight(&data->me->dvert[v_index], data->wpi);
}
}
}
@ -2064,14 +2077,14 @@ static void wpaint_stroke_update_step(bContext *C, struct PaintStroke *stroke, P
wpi.brush_alpha_value = brush_alpha_value;
/* *** done setting up WeightPaintInfo *** */
if (wpd->precomputed_weight) {
precompute_weight_values(C, ob, brush, wpd, &wpi, ob->data);
}
wpaint_do_symmetrical_brush_actions(C, ob, wp, sd, wpd, &wpi);
swap_m4m4(vc->rv3d->persmat, mat);
if (wp->paint.brush->vertexpaint_tool == PAINT_BLEND_SMEAR) {
SWAP(float *, wpd->smear.weight_curr, wpd->smear.weight_prev);
}
/* calculate pivot for rotation around seletion if needed */
/* also needed for "View Selected" on last stroke */
paint_last_stroke_update(scene, vc->ar, mval);
@ -2121,10 +2134,8 @@ static void wpaint_stroke_done(const bContext *C, struct PaintStroke *stroke)
MEM_freeN((void *)wpd->active.lock);
if (wpd->mirror.lock)
MEM_freeN((void *)wpd->mirror.lock);
if (wpd->smear.weight_prev)
MEM_freeN(wpd->smear.weight_prev);
if (wpd->smear.weight_curr)
MEM_freeN(wpd->smear.weight_curr);
if (wpd->precomputed_weight)
MEM_freeN(wpd->precomputed_weight);
MEM_freeN(wpd);
}

@ -3740,7 +3740,7 @@ static const EnumPropertyItem sh_tex_prop_interpolation_items[] = {
{SHD_INTERP_CLOSEST, "Closest", 0, "Closest",
"No interpolation (sample closest texel)"},
{SHD_INTERP_CUBIC, "Cubic", 0, "Cubic",
"Cubic interpolation (CPU only)"},
"Cubic interpolation"},
{SHD_INTERP_SMART, "Smart", 0, "Smart",
"Bicubic when magnifying, else bilinear (OSL only)"},
{0, NULL, 0, NULL, NULL}
@ -4106,7 +4106,7 @@ static void def_sh_tex_pointdensity(StructRNA *srna)
{SHD_INTERP_LINEAR, "Linear", 0, "Linear",
"Linear interpolation"},
{SHD_INTERP_CUBIC, "Cubic", 0, "Cubic",
"Cubic interpolation (CPU only)"},
"Cubic interpolation"},
{0, NULL, 0, NULL, NULL}
};

@ -439,7 +439,7 @@ static void arg_py_context_restore(
* \{ */
static const char arg_handle_print_version_doc[] =
"\n\tPrint Blender version and exit"
"\n\tPrint Blender version and exit."
;
static int arg_handle_print_version(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -463,10 +463,10 @@ static int arg_handle_print_version(int UNUSED(argc), const char **UNUSED(argv),
}
static const char arg_handle_print_help_doc[] =
"\n\tPrint this help text and exit"
"\n\tPrint this help text and exit."
;
static const char arg_handle_print_help_doc_win32[] =
"\n\tPrint this help text and exit (windows only)"
"\n\tPrint this help text and exit (windows only)."
;
static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), void *data)
{
@ -593,16 +593,16 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo
printf("Argument Parsing:\n");
printf("\tArguments must be separated by white space, eg:\n");
printf("\t# blender -ba test.blend\n");
printf("\t...will ignore the 'a'\n");
printf("\t...will ignore the 'a'.\n");
printf("\t# blender -b test.blend -f8\n");
printf("\t...will ignore '8' because there is no space between the '-f' and the frame value\n\n");
printf("\t...will ignore '8' because there is no space between the '-f' and the frame value.\n\n");
printf("Argument Order:\n");
printf("\tArguments are executed in the order they are given. eg:\n");
printf("\t# blender --background test.blend --render-frame 1 --render-output '/tmp'\n");
printf("\t...will not render to '/tmp' because '--render-frame 1' renders before the output path is set\n");
printf("\t...will not render to '/tmp' because '--render-frame 1' renders before the output path is set.\n");
printf("\t# blender --background --render-output /tmp test.blend --render-frame 1\n");
printf("\t...will not render to '/tmp' because loading the blend-file overwrites the render output that was set\n");
printf("\t...will not render to '/tmp' because loading the blend-file overwrites the render output that was set.\n");
printf("\t# blender --background test.blend --render-output /tmp --render-frame 1\n");
printf("\t...works as expected.\n\n");
@ -612,7 +612,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo
printf(" $BLENDER_SYSTEM_SCRIPTS Directory for system wide scripts.\n");
printf(" $BLENDER_USER_DATAFILES Directory for user data files (icons, translations, ..).\n");
printf(" $BLENDER_SYSTEM_DATAFILES Directory for system wide data files.\n");
printf(" $BLENDER_SYSTEM_PYTHON Directory for system python libraries.\n");
printf(" $BLENDER_SYSTEM_PYTHON Directory for system Python libraries.\n");
#ifdef WIN32
printf(" $TEMP Store temporary files here.\n");
#else
@ -621,7 +621,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo
#ifdef WITH_SDL
printf(" $SDL_AUDIODRIVER LibSDL audio driver - alsa, esd, dma.\n");
#endif
printf(" $PYTHONHOME Path to the python directory, eg. /usr/lib/python.\n\n");
printf(" $PYTHONHOME Path to the Python directory, eg. /usr/lib/python.\n\n");
exit(0);
@ -629,7 +629,7 @@ static int arg_handle_print_help(int UNUSED(argc), const char **UNUSED(argv), vo
}
static const char arg_handle_arguments_end_doc[] =
"\n\tEnds option processing, following arguments passed unchanged. Access via Python's 'sys.argv'"
"\n\tEnd option processing, following arguments passed unchanged. Access via Python's 'sys.argv'."
;
static int arg_handle_arguments_end(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -646,10 +646,10 @@ static int arg_handle_arguments_end(int UNUSED(argc), const char **UNUSED(argv),
#endif
static const char arg_handle_python_set_doc_enable[] =
"\n\tEnable automatic Python script execution" PY_ENABLE_AUTO
"\n\tEnable automatic Python script execution" PY_ENABLE_AUTO "."
;
static const char arg_handle_python_set_doc_disable[] =
"\n\tDisable automatic Python script execution (pydrivers & startup scripts)" PY_DISABLE_AUTO
"\n\tDisable automatic Python script execution (pydrivers & startup scripts)" PY_DISABLE_AUTO "."
;
#undef PY_ENABLE_AUTO
#undef PY_DISABLE_AUTO
@ -667,7 +667,7 @@ static int arg_handle_python_set(int UNUSED(argc), const char **UNUSED(argv), vo
}
static const char arg_handle_crash_handler_disable_doc[] =
"\n\tDisable the crash handler"
"\n\tDisable the crash handler."
;
static int arg_handle_crash_handler_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -676,7 +676,7 @@ static int arg_handle_crash_handler_disable(int UNUSED(argc), const char **UNUSE
}
static const char arg_handle_abort_handler_disable_doc[] =
"\n\tDisable the abort handler"
"\n\tDisable the abort handler."
;
static int arg_handle_abort_handler_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -685,7 +685,7 @@ static int arg_handle_abort_handler_disable(int UNUSED(argc), const char **UNUSE
}
static const char arg_handle_background_mode_set_doc[] =
"\n\tRun in background (often used for UI-less rendering)"
"\n\tRun in background (often used for UI-less rendering)."
;
static int arg_handle_background_mode_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -695,7 +695,7 @@ static int arg_handle_background_mode_set(int UNUSED(argc), const char **UNUSED(
static const char arg_handle_debug_mode_set_doc[] =
"\n"
"\tTurn debugging on\n"
"\tTurn debugging on.\n"
"\n"
"\t* Enables memory error detection\n"
"\t* Disables mouse grab (to interact with a debugger in some cases)\n"
@ -720,30 +720,30 @@ static int arg_handle_debug_mode_set(int UNUSED(argc), const char **UNUSED(argv)
#ifdef WITH_FFMPEG
static const char arg_handle_debug_mode_generic_set_doc_ffmpeg[] =
"\n\tEnable debug messages from FFmpeg library";
"\n\tEnable debug messages from FFmpeg library.";
#endif
#ifdef WITH_FREESTYLE
static const char arg_handle_debug_mode_generic_set_doc_freestyle[] =
"\n\tEnable debug messages for FreeStyle";
"\n\tEnable debug messages for FreeStyle.";
#endif
static const char arg_handle_debug_mode_generic_set_doc_python[] =
"\n\tEnable debug messages for Python";
"\n\tEnable debug messages for Python.";
static const char arg_handle_debug_mode_generic_set_doc_events[] =
"\n\tEnable debug messages for the event system";
"\n\tEnable debug messages for the event system.";
static const char arg_handle_debug_mode_generic_set_doc_handlers[] =
"\n\tEnable debug messages for event handling";
"\n\tEnable debug messages for event handling.";
static const char arg_handle_debug_mode_generic_set_doc_wm[] =
"\n\tEnable debug messages for the window manager, also prints every operator call";
"\n\tEnable debug messages for the window manager, also prints every operator call.";
static const char arg_handle_debug_mode_generic_set_doc_jobs[] =
"\n\tEnable time profiling for background jobs.";
static const char arg_handle_debug_mode_generic_set_doc_gpu[] =
"\n\tEnable gpu debug context and information for OpenGL 4.3+.";
static const char arg_handle_debug_mode_generic_set_doc_depsgraph[] =
"\n\tEnable debug messages from dependency graph";
"\n\tEnable debug messages from dependency graph.";
static const char arg_handle_debug_mode_generic_set_doc_depsgraph_no_threads[] =
"\n\tSwitch dependency graph to a single threaded evaluation";
"\n\tSwitch dependency graph to a single threaded evaluation.";
static const char arg_handle_debug_mode_generic_set_doc_gpumem[] =
"\n\tEnable GPU memory stats in status bar";
"\n\tEnable GPU memory stats in status bar.";
static int arg_handle_debug_mode_generic_set(int UNUSED(argc), const char **UNUSED(argv), void *data)
{
@ -752,7 +752,7 @@ static int arg_handle_debug_mode_generic_set(int UNUSED(argc), const char **UNUS
}
static const char arg_handle_debug_mode_io_doc[] =
"\n\tEnable debug messages for I/O (collada, ...)";
"\n\tEnable debug messages for I/O (collada, ...).";
static int arg_handle_debug_mode_io(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
G.debug |= G_DEBUG_IO;
@ -760,7 +760,7 @@ static int arg_handle_debug_mode_io(int UNUSED(argc), const char **UNUSED(argv),
}
static const char arg_handle_debug_mode_all_doc[] =
"\n\tEnable all debug messages";
"\n\tEnable all debug messages.";
static int arg_handle_debug_mode_all(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
G.debug |= G_DEBUG_ALL;
@ -775,7 +775,7 @@ static int arg_handle_debug_mode_all(int UNUSED(argc), const char **UNUSED(argv)
#ifdef WITH_LIBMV
static const char arg_handle_debug_mode_libmv_doc[] =
"\n\tEnable debug messages from libmv library"
"\n\tEnable debug messages from libmv library."
;
static int arg_handle_debug_mode_libmv(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -787,7 +787,7 @@ static int arg_handle_debug_mode_libmv(int UNUSED(argc), const char **UNUSED(arg
#ifdef WITH_CYCLES_LOGGING
static const char arg_handle_debug_mode_cycles_doc[] =
"\n\tEnable debug messages from Cycles"
"\n\tEnable debug messages from Cycles."
;
static int arg_handle_debug_mode_cycles(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -797,7 +797,7 @@ static int arg_handle_debug_mode_cycles(int UNUSED(argc), const char **UNUSED(ar
#endif
static const char arg_handle_debug_mode_memory_set_doc[] =
"\n\tEnable fully guarded memory allocation and debugging"
"\n\tEnable fully guarded memory allocation and debugging."
;
static int arg_handle_debug_mode_memory_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -807,7 +807,7 @@ static int arg_handle_debug_mode_memory_set(int UNUSED(argc), const char **UNUSE
static const char arg_handle_debug_value_set_doc[] =
"<value>\n"
"\tSet debug value of <value> on startup\n"
"\tSet debug value of <value> on startup."
;
static int arg_handle_debug_value_set(int argc, const char **argv, void *UNUSED(data))
{
@ -831,7 +831,7 @@ static int arg_handle_debug_value_set(int argc, const char **argv, void *UNUSED(
}
static const char arg_handle_debug_fpe_set_doc[] =
"\n\tEnable floating point exceptions"
"\n\tEnable floating point exceptions."
;
static int arg_handle_debug_fpe_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -840,7 +840,7 @@ static int arg_handle_debug_fpe_set(int UNUSED(argc), const char **UNUSED(argv),
}
static const char arg_handle_factory_startup_set_doc[] =
"\n\tSkip reading the " STRINGIFY(BLENDER_STARTUP_FILE) " in the users home directory"
"\n\tSkip reading the " STRINGIFY(BLENDER_STARTUP_FILE) " in the users home directory."
;
static int arg_handle_factory_startup_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -849,11 +849,11 @@ static int arg_handle_factory_startup_set(int UNUSED(argc), const char **UNUSED(
}
static const char arg_handle_env_system_set_doc_datafiles[] =
"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_DATAFILES)" environment variable";
"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_DATAFILES)" environment variable.";
static const char arg_handle_env_system_set_doc_scripts[] =
"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_SCRIPTS)" environment variable";
"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_SCRIPTS)" environment variable.";
static const char arg_handle_env_system_set_doc_python[] =
"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_PYTHON)" environment variable";
"\n\tSet the "STRINGIFY_ARG (BLENDER_SYSTEM_PYTHON)" environment variable.";
static int arg_handle_env_system_set(int argc, const char **argv, void *UNUSED(data))
{
@ -879,13 +879,19 @@ static int arg_handle_env_system_set(int argc, const char **argv, void *UNUSED(d
static const char arg_handle_playback_mode_doc[] =
"<options> <file(s)>\n"
"\tPlayback <file(s)>, only operates this way when not running in background.\n"
"\t\t-p <sx> <sy>\tOpen with lower left corner at <sx>, <sy>\n"
"\t\t-m\t\tRead from disk (Do not buffer)\n"
"\t\t-f <fps> <fps-base>\t\tSpecify FPS to start with\n"
"\t\t-j <frame>\tSet frame step to <frame>\n"
"\t\t-s <frame>\tPlay from <frame>\n"
"\t\t-e <frame>\tPlay until <frame>"
"\tPlayback <file(s)>, only operates this way when not running in background.\n\n"
"\t-p <sx> <sy>\n"
"\t\tOpen with lower left corner at <sx>, <sy>.\n"
"\t-m\n"
"\t\tRead from disk (Do not buffer).\n"
"\t-f <fps> <fps-base>\n"
"\t\tSpecify FPS to start with.\n"
"\t-j <frame>\n"
"\t\tSet frame step to <frame>.\n"
"\t-s <frame>\n"
"\t\tPlay from <frame>.\n"
"\t-e <frame>\n"
"\t\tPlay until <frame>."
;
static int arg_handle_playback_mode(int argc, const char **argv, void *UNUSED(data))
{
@ -905,7 +911,7 @@ static int arg_handle_playback_mode(int argc, const char **argv, void *UNUSED(da
static const char arg_handle_window_geometry_doc[] =
"<sx> <sy> <w> <h>\n"
"\tOpen with lower left corner at <sx>, <sy> and width and height as <w>, <h>"
"\tOpen with lower left corner at <sx>, <sy> and width and height as <w>, <h>."
;
static int arg_handle_window_geometry(int argc, const char **argv, void *UNUSED(data))
{
@ -931,7 +937,7 @@ static int arg_handle_window_geometry(int argc, const char **argv, void *UNUSED(
}
static const char arg_handle_native_pixels_set_doc[] =
"\n\tDo not use native pixel size, for high resolution displays (MacBook 'Retina')"
"\n\tDo not use native pixel size, for high resolution displays (MacBook 'Retina')."
;
static int arg_handle_native_pixels_set(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -940,7 +946,7 @@ static int arg_handle_native_pixels_set(int UNUSED(argc), const char **UNUSED(ar
}
static const char arg_handle_with_borders_doc[] =
"\n\tForce opening with borders"
"\n\tForce opening with borders."
;
static int arg_handle_with_borders(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -949,7 +955,7 @@ static int arg_handle_with_borders(int UNUSED(argc), const char **UNUSED(argv),
}
static const char arg_handle_without_borders_doc[] =
"\n\tForce opening without borders"
"\n\tForce opening without borders."
;
static int arg_handle_without_borders(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -960,7 +966,7 @@ static int arg_handle_without_borders(int UNUSED(argc), const char **UNUSED(argv
extern bool wm_start_with_console; /* wm_init_exit.c */
static const char arg_handle_start_with_console_doc[] =
"\n\tStart with the console window open (ignored if -b is set), (Windows only)"
"\n\tStart with the console window open (ignored if -b is set), (Windows only)."
;
static int arg_handle_start_with_console(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -969,10 +975,10 @@ static int arg_handle_start_with_console(int UNUSED(argc), const char **UNUSED(a
}
static const char arg_handle_register_extension_doc[] =
"\n\tRegister blend-file extension, then exit (Windows only)"
"\n\tRegister blend-file extension, then exit (Windows only)."
;
static const char arg_handle_register_extension_doc_silent[] =
"\n\tSilently register blend-file extension, then exit (Windows only)"
"\n\tSilently register blend-file extension, then exit (Windows only)."
;
static int arg_handle_register_extension(int UNUSED(argc), const char **UNUSED(argv), void *data)
{
@ -987,7 +993,7 @@ static int arg_handle_register_extension(int UNUSED(argc), const char **UNUSED(a
}
static const char arg_handle_joystick_disable_doc[] =
"\n\tDisable joystick support"
"\n\tDisable joystick support."
;
static int arg_handle_joystick_disable(int UNUSED(argc), const char **UNUSED(argv), void *data)
{
@ -1008,7 +1014,7 @@ static int arg_handle_joystick_disable(int UNUSED(argc), const char **UNUSED(arg
}
static const char arg_handle_audio_disable_doc[] =
"\n\tForce sound system to None"
"\n\tForce sound system to None."
;
static int arg_handle_audio_disable(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data))
{
@ -1017,7 +1023,7 @@ static int arg_handle_audio_disable(int UNUSED(argc), const char **UNUSED(argv),
}
static const char arg_handle_audio_set_doc[] =
"\n\tForce sound system to a specific device\n\tThe name is the same as found in the user preferences, e.g. OpenAL."
"\n\tForce sound system to a specific device.\n\t'NULL' 'SDL' 'OPENAL' 'JACK'."
;
static int arg_handle_audio_set(int argc, const char **argv, void *UNUSED(data))
{
@ -1067,7 +1073,7 @@ static int arg_handle_output_set(int argc, const char **argv, void *data)
static const char arg_handle_engine_set_doc[] =
"<engine>\n"
"\tSpecify the render engine\n\tuse -E help to list available engines"
"\tSpecify the render engine.\n\tUse -E help to list available engines."
;
static int arg_handle_engine_set(int argc, const char **argv, void *data)
{
@ -1110,11 +1116,11 @@ static int arg_handle_engine_set(int argc, const char **argv, void *data)
static const char arg_handle_image_type_set_doc[] =
"<format>\n"
"\tSet the render format, Valid options are...\n"
"\t\tTGA RAWTGA JPEG IRIS IRIZ\n"
"\t\tAVIRAW AVIJPEG PNG BMP\n"
"\t(formats that can be compiled into blender, not available on all systems)\n"
"\t\tHDR TIFF EXR MULTILAYER MPEG FRAMESERVER CINEON DPX DDS JP2"
"\tSet the render format.\n"
"\tValid options are 'TGA' 'RAWTGA' 'JPEG' 'IRIS' 'IRIZ' 'AVIRAW' 'AVIJPEG' 'PNG' 'BMP'\n"
"\n"
"\tFormats that can be compiled into Blender, not available on all systems: 'HDR' 'TIFF' 'EXR' 'MULTILAYER'\n"
"\t'MPEG' 'FRAMESERVER' 'CINEON' 'DPX' 'DDS' 'JP2'"
;
static int arg_handle_image_type_set(int argc, const char **argv, void *data)
{
@ -1202,7 +1208,7 @@ static int arg_handle_verbosity_set(int argc, const char **argv, void *UNUSED(da
static const char arg_handle_extension_set_doc[] =
"<bool>\n"
"\tSet option to add the file extension to the end of the file"
"\tSet option to add the file extension to the end of the file."
;
static int arg_handle_extension_set(int argc, const char **argv, void *data)
{
@ -1234,10 +1240,15 @@ static int arg_handle_extension_set(int argc, const char **argv, void *data)
static const char arg_handle_ge_parameters_set_doc[] =
"Game Engine specific options\n"
"\t-g fixedtime\t\tRun on 50 hertz without dropping frames\n"
"\t-g vertexarrays\t\tUse Vertex Arrays for rendering (usually faster)\n"
"\t-g nomipmap\t\tNo Texture Mipmapping\n"
"\t-g linearmipmap\t\tLinear Texture Mipmapping instead of Nearest (default)"
"\n"
"\t'fixedtime'\n"
"\t\tRun on 50 hertz without dropping frames.\n"
"\t'vertexarrays'\n"
"\t\tUse Vertex Arrays for rendering (usually faster).\n"
"\t'nomipmap'\n"
"\t\tNo Texture Mipmapping.\n"
"\t'linearmipmap'\n"
"\t\tLinear Texture Mipmapping instead of Nearest (default)."
;
static int arg_handle_ge_parameters_set(int argc, const char **argv, void *data)
{
@ -1270,7 +1281,7 @@ static int arg_handle_ge_parameters_set(int argc, const char **argv, void *data)
#endif
}
else {
printf("error: argument assignment (%s) without value.\n", paramname);
printf("Error: argument assignment (%s) without value.\n", paramname);
return 0;
}
/* name arg eaten */
@ -1361,7 +1372,7 @@ static int arg_handle_render_frame(int argc, const char **argv, void *data)
}
static const char arg_handle_render_animation_doc[] =
"\n\tRender frames from start to end (inclusive)"
"\n\tRender frames from start to end (inclusive)."
;
static int arg_handle_render_animation(int UNUSED(argc), const char **UNUSED(argv), void *data)
{
@ -1387,7 +1398,7 @@ static int arg_handle_render_animation(int UNUSED(argc), const char **UNUSED(arg
static const char arg_handle_scene_set_doc[] =
"<name>\n"
"\tSet the active scene <name> for rendering"
"\tSet the active scene <name> for rendering."
;
static int arg_handle_scene_set(int argc, const char **argv, void *data)
{
@ -1469,7 +1480,7 @@ static int arg_handle_frame_end_set(int argc, const char **argv, void *data)
static const char arg_handle_frame_skip_set_doc[] =
"<frames>\n"
"\tSet number of frames to step forward after each rendered frame"
"\tSet number of frames to step forward after each rendered frame."
;
static int arg_handle_frame_skip_set(int argc, const char **argv, void *data)
{
@ -1497,7 +1508,7 @@ static int arg_handle_frame_skip_set(int argc, const char **argv, void *data)
static const char arg_handle_python_file_run_doc[] =
"<filename>\n"
"\tRun the given Python script file"
"\tRun the given Python script file."
;
static int arg_handle_python_file_run(int argc, const char **argv, void *data)
{
@ -1525,14 +1536,14 @@ static int arg_handle_python_file_run(int argc, const char **argv, void *data)
}
#else
UNUSED_VARS(argc, argv, data);
printf("This blender was built without python support\n");
printf("This Blender was built without Python support\n");
return 0;
#endif /* WITH_PYTHON */
}
static const char arg_handle_python_text_run_doc[] =
"<name>\n"
"\tRun the given Python script text block"
"\tRun the given Python script text block."
;
static int arg_handle_python_text_run(int argc, const char **argv, void *data)
{
@ -1566,14 +1577,14 @@ static int arg_handle_python_text_run(int argc, const char **argv, void *data)
}
#else
UNUSED_VARS(argc, argv, data);
printf("This blender was built without python support\n");
printf("This Blender was built without Python support\n");
return 0;
#endif /* WITH_PYTHON */
}
static const char arg_handle_python_expr_run_doc[] =
"<expression>\n"
"\tRun the given expression as a Python script"
"\tRun the given expression as a Python script."
;
static int arg_handle_python_expr_run(int argc, const char **argv, void *data)
{
@ -1596,13 +1607,13 @@ static int arg_handle_python_expr_run(int argc, const char **argv, void *data)
}
#else
UNUSED_VARS(argc, argv, data);
printf("This blender was built without python support\n");
printf("This Blender was built without Python support\n");
return 0;
#endif /* WITH_PYTHON */
}
static const char arg_handle_python_console_run_doc[] =
"\n\tRun blender with an interactive console"
"\n\tRun Blender with an interactive console."
;
static int arg_handle_python_console_run(int UNUSED(argc), const char **argv, void *data)
{
@ -1614,7 +1625,7 @@ static int arg_handle_python_console_run(int UNUSED(argc), const char **argv, vo
return 0;
#else
UNUSED_VARS(argv, data);
printf("This blender was built without python support\n");
printf("This Blender was built without python support\n");
return 0;
#endif /* WITH_PYTHON */
}
@ -1646,7 +1657,7 @@ static int arg_handle_python_exit_code_set(int argc, const char **argv, void *UN
}
static const char arg_handle_addons_set_doc[] =
"\n\tComma separated list of add-ons (no spaces)"
"\n\tComma separated list of add-ons (no spaces)."
;
static int arg_handle_addons_set(int argc, const char **argv, void *data)
{