From dd9c1b7fbf501ef58c9952150698fb5ce3c45903 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 13 May 2012 12:32:44 +0000 Subject: [PATCH] Cycles: OpenCL image texture support, fix an attribute node issue and refactor feature enabling #defines a bit. --- intern/cycles/device/device.h | 2 + intern/cycles/device/device_cpu.cpp | 1 + intern/cycles/device/device_cuda.cpp | 1 + intern/cycles/device/device_multi.cpp | 2 + intern/cycles/device/device_opencl.cpp | 9 +-- intern/cycles/kernel/kernel_compat_cpu.h | 1 + intern/cycles/kernel/kernel_compat_cuda.h | 1 + intern/cycles/kernel/kernel_light.h | 2 + intern/cycles/kernel/kernel_textures.h | 5 +- intern/cycles/kernel/kernel_types.h | 31 ++++++++-- intern/cycles/kernel/svm/svm.h | 16 ++++- intern/cycles/kernel/svm/svm_attribute.h | 13 ++-- intern/cycles/kernel/svm/svm_image.h | 75 +++++++++++++++++++++-- intern/cycles/render/image.cpp | 60 +++++++++++++++++- intern/cycles/render/image.h | 4 ++ intern/cycles/render/mesh.cpp | 2 +- intern/cycles/render/scene.cpp | 2 + intern/cycles/render/scene.h | 4 ++ intern/cycles/util/util_math.h | 14 +++++ 19 files changed, 222 insertions(+), 23 deletions(-) diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 87f255e54e7..b17abac2a1b 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -53,6 +53,7 @@ public: int num; bool display_device; bool advanced_shading; + bool pack_images; vector multi_devices; DeviceInfo() @@ -62,6 +63,7 @@ public: num = 0; display_device = false; advanced_shading = true; + pack_images = false; } }; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 07988d32aff..5f422332cd2 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -260,6 +260,7 @@ void device_cpu_info(vector& devices) info.id = "CPU"; info.num = 0; info.advanced_shading = true; + info.pack_images = false; devices.insert(devices.begin(), info); } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 0a780e5f576..937dbf2d87c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -877,6 +877,7 @@ void device_cuda_info(vector& devices) int major, minor; cuDeviceComputeCapability(&major, &minor, num); info.advanced_shading = (major >= 2); + info.pack_images = false; /* if device has a kernel timeout, assume it is used for display */ if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) { diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 9f7d65e640b..83e69b98f5d 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -304,6 +304,7 @@ static bool device_multi_add(vector& devices, DeviceType type, bool int num_added = 0, num_display = 0; info.advanced_shading = with_advanced_shading; + info.pack_images = false; foreach(DeviceInfo& subinfo, devices) { if(subinfo.type == type) { @@ -326,6 +327,7 @@ static bool device_multi_add(vector& devices, DeviceType type, bool info.multi_devices.push_back(subinfo); if(subinfo.display_device) info.display_device = true; + info.pack_images = info.pack_images || subinfo.pack_images; num_added++; } } diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 322b7ad3eb9..09235506048 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -212,7 +212,7 @@ public: { char version[256]; - int major, minor, req_major = 1, req_minor = 1; + int major, minor, req_major = 1, req_minor = 0; clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL); @@ -300,15 +300,15 @@ public: /* Multi Closure for nVidia cards */ if(platform_name == "NVIDIA CUDA") - build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose "; + build_options += "-D__KERNEL_SHADING__ -D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose "; /* No Float3 for Apple */ else if(platform_name == "Apple") - build_options += "-D__CL_NO_FLOAT3__ "; + build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_APPLE__ "; /* Basic shading for AMD cards (non Apple) */ else if(platform_name == "AMD Accelerated Parallel Processing") - build_options += "-D__KERNEL_SHADING__ -D__CL_NO_FLOAT3__ "; + build_options += "-D__CL_NO_FLOAT3__ -D__KERNEL_OPENCL_AMD__ "; return build_options; } @@ -743,6 +743,7 @@ void device_opencl_info(vector& devices) /* we don't know if it's used for display, but assume it is */ info.display_device = true; info.advanced_shading = false; + info.pack_images = true; devices.push_back(info); } diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 79f894bfdac..2bd0b61b4fa 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -146,6 +146,7 @@ typedef texture texture_float; typedef texture texture_uint; typedef texture texture_int; typedef texture texture_uint4; +typedef texture texture_uchar4; typedef texture_image texture_image_float4; typedef texture_image texture_image_uchar4; diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index cc719bfadbc..2f9f2c45e88 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -50,6 +50,7 @@ typedef texture texture_float; typedef texture texture_uint; typedef texture texture_int; typedef texture texture_uint4; +typedef texture texture_uchar4; typedef texture texture_image_float4; typedef texture texture_image_uchar4; diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index cd9557bd0bf..e9e7fbd4ca1 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -59,6 +59,7 @@ __device float3 area_light_sample(float3 axisu, float3 axisv, float randu, float return axisu*randu + axisv*randv; } +#ifdef __BACKGROUND_MIS__ __device float3 background_light_sample(KernelGlobals *kg, float randu, float randv, float *pdf) { /* for the following, the CDF values are actually a pair of floats, with the @@ -165,6 +166,7 @@ __device float background_light_pdf(KernelGlobals *kg, float3 direction) return pdf * kernel_data.integrator.pdf_lights; } +#endif __device void regular_light_sample(KernelGlobals *kg, int point, float randu, float randv, float3 P, LightSample *ls, float *pdf) diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index f4de4c100c4..4ab2574c8ad 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -7,7 +7,6 @@ #define KERNEL_IMAGE_TEX(type, ttype, name) #endif - /* bvh */ KERNEL_TEX(float4, texture_float4, __bvh_nodes) KERNEL_TEX(float4, texture_float4, __tri_woop) @@ -151,6 +150,10 @@ KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_097) KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_098) KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_099) +/* packed image (opencl) */ +KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed) +KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info) + #undef KERNEL_TEX #undef KERNEL_IMAGE_TEX diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index edca9f8d34d..a64c850d35a 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -49,8 +49,30 @@ CCL_NAMESPACE_BEGIN #endif #ifdef __KERNEL_OPENCL__ -//#define __KERNEL_SHADING__ -//#define __KERNEL_ADV_SHADING__ + +#ifdef __KERNEL_OPENCL_NVIDIA__ +#define __KERNEL_SHADING__ +#define __MULTI_CLOSURE__ +#endif + +#ifdef __KERNEL_OPENCL_APPLE__ +//#define __SVM__ +//#define __EMISSION__ +//#define __IMAGE_TEXTURES__ +//#define __HOLDOUT__ +//#define __PROCEDURAL_TEXTURES__ +//#define __EXTRA_NODES__ +#endif + +#ifdef __KERNEL_OPENCL_AMD__ +#define __SVM__ +#define __EMISSION__ +#define __IMAGE_TEXTURES__ +#define __HOLDOUT__ +#define __PROCEDURAL_TEXTURES__ +#define __EXTRA_NODES__ +#endif + #endif /* kernel features */ @@ -69,7 +91,9 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SHADING__ #define __SVM__ #define __EMISSION__ -#define __TEXTURES__ +#define __PROCEDURAL_TEXTURES__ +#define __IMAGE_TEXTURES__ +#define __EXTRA_NODES__ #define __HOLDOUT__ #endif @@ -85,7 +109,6 @@ CCL_NAMESPACE_BEGIN //#define __MULTI_LIGHT__ //#define __OSL__ //#define __SOBOL_FULL_SCREEN__ -//#define __MODIFY_TP__ //#define __QBVH__ /* Shader Evaluation */ diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 5f4d7bbd0c4..12ed61673d1 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -216,13 +216,15 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_JUMP: offset = node.y; break; -#ifdef __TEXTURES__ +#ifdef __IMAGE_TEXTURES__ case NODE_TEX_IMAGE: svm_node_tex_image(kg, sd, stack, node); break; case NODE_TEX_ENVIRONMENT: svm_node_tex_environment(kg, sd, stack, node); break; +#endif +#ifdef __PROCEDURAL_TEXTURES__ case NODE_TEX_SKY: svm_node_tex_sky(kg, sd, stack, node.y, node.z); break; @@ -254,6 +256,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_GEOMETRY: svm_node_geometry(sd, stack, node.y, node.z); break; +#ifdef __EXTRA_NODES__ case NODE_GEOMETRY_BUMP_DX: svm_node_geometry_bump_dx(sd, stack, node.y, node.z); break; @@ -263,6 +266,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_LIGHT_PATH: svm_node_light_path(sd, stack, node.y, node.z, path_flag); break; +#endif case NODE_CONVERT: svm_node_convert(sd, stack, node.y, node.z, node.w); break; @@ -272,6 +276,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_VALUE_V: svm_node_value_v(kg, sd, stack, node.y, &offset); break; +#ifdef __EXTRA_NODES__ case NODE_INVERT: svm_node_invert(sd, stack, node.y, node.z, node.w); break; @@ -293,21 +298,25 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_HSV: svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); break; +#endif case NODE_ATTR: svm_node_attr(kg, sd, stack, node); break; +#ifdef __EXTRA_NODES__ case NODE_ATTR_BUMP_DX: svm_node_attr_bump_dx(kg, sd, stack, node); break; case NODE_ATTR_BUMP_DY: svm_node_attr_bump_dy(kg, sd, stack, node); break; +#endif case NODE_FRESNEL: svm_node_fresnel(sd, stack, node.y, node.z, node.w); break; case NODE_LAYER_WEIGHT: svm_node_layer_weight(sd, stack, node); break; +#ifdef __EXTRA_NODES__ case NODE_SET_DISPLACEMENT: svm_node_set_displacement(sd, stack, node.y); break; @@ -323,6 +332,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_NORMAL: svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset); break; +#endif case NODE_MAPPING: svm_node_mapping(kg, sd, stack, node.y, node.z, &offset); break; @@ -332,15 +342,18 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_TEX_COORD: svm_node_tex_coord(kg, sd, stack, node.y, node.z); break; +#ifdef __EXTRA_NODES__ case NODE_TEX_COORD_BUMP_DX: svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z); break; case NODE_TEX_COORD_BUMP_DY: svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z); break; +#endif case NODE_EMISSION_SET_WEIGHT_TOTAL: svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w); break; +#ifdef __EXTRA_NODES__ case NODE_RGB_RAMP: svm_node_rgb_ramp(kg, sd, stack, node, &offset); break; @@ -350,6 +363,7 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_LIGHT_FALLOFF: svm_node_light_falloff(sd, stack, node); break; +#endif case NODE_END: default: #ifndef __MULTI_CLOSURE__ diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index 3a94f08d42f..ed70a6dc423 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN __device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeType *type, - NodeAttributeType *mesh_type, AttributeElement *elem, uint *offset, uint *out_offset) + NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset) { if(sd->object != ~0) { /* find attribute by unique id */ @@ -35,7 +35,7 @@ __device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, /* return result */ *elem = (AttributeElement)attr_map.y; - *offset = attr_map.z; + *offset = as_int(attr_map.z); *mesh_type = (NodeAttributeType)attr_map.w; } else { @@ -53,7 +53,8 @@ __device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uin { NodeAttributeType type, mesh_type; AttributeElement elem; - uint offset, out_offset; + uint out_offset; + int offset; svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); @@ -84,7 +85,8 @@ __device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *st { NodeAttributeType type, mesh_type; AttributeElement elem; - uint offset, out_offset; + uint out_offset; + int offset; svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); @@ -119,7 +121,8 @@ __device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *st { NodeAttributeType type, mesh_type; AttributeElement elem; - uint offset, out_offset; + uint out_offset; + int offset; svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index eddd0f7034a..388f006c40f 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -18,6 +18,75 @@ CCL_NAMESPACE_BEGIN +#ifdef __KERNEL_OPENCL__ + +/* For OpenCL all images are packed in a single array, and we do manual lookup + * and interpolation. */ + +__device_inline float4 svm_image_texture_read(KernelGlobals *kg, int offset) +{ + uchar4 r = kernel_tex_fetch(__tex_image_packed, offset); + float f = 1.0f/255.0f; + return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); +} + +__device_inline int svm_image_texture_wrap_periodic(int x, int width) +{ + x %= width; + if(x < 0) + x += width; + return x; +} + +__device_inline int svm_image_texture_wrap_clamp(int x, int width) +{ + return clamp(x, 0, width-1); +} + +__device_inline float svm_image_texture_frac(float x, int *ix) +{ + int i = (int)x - ((x < 0.0f)? 1: 0); + *ix = i; + return x - (float)i; +} + +__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) +{ + uint4 info = kernel_tex_fetch(__tex_image_packed_info, id); + uint width = info.x; + uint height = info.y; + uint offset = info.z; + uint periodic = info.w; + + int ix, iy, nix, niy; + float tx = svm_image_texture_frac(x*width, &ix); + float ty = svm_image_texture_frac(y*height, &iy); + + if(periodic) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + + nix = svm_image_texture_wrap_periodic(ix+1, width); + niy = svm_image_texture_wrap_periodic(iy+1, height); + } + else { + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + + nix = svm_image_texture_wrap_clamp(ix+1, width); + niy = svm_image_texture_wrap_clamp(iy+1, height); + } + + float4 r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width); + r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width); + r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width); + r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width); + + return r; +} + +#else + __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) { float4 r; @@ -31,9 +100,6 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) also note that cuda has 128 textures limit, we use 100 now, since we still need some for other storage */ -#ifdef __KERNEL_OPENCL__ - r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* todo */ -#else switch(id) { case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break; case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break; @@ -139,11 +205,12 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } -#endif return r; } +#endif + __device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { uint id = node.y; diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index b9e02467450..6417d0e2103 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -34,6 +34,7 @@ CCL_NAMESPACE_BEGIN ImageManager::ImageManager() { need_update = true; + pack_images = false; osl_texture_system = NULL; } @@ -45,6 +46,11 @@ ImageManager::~ImageManager() assert(!float_images[slot]); } +void ImageManager::set_pack_images(bool pack_images_) +{ + pack_images = pack_images_; +} + void ImageManager::set_osl_texture_system(void *texture_system) { osl_texture_system = texture_system; @@ -84,7 +90,7 @@ int ImageManager::add_image(const string& filename, bool& is_float) size_t slot; /* load image info and find out if we need a float texture */ - is_float = is_float_image(filename); + is_float = (pack_images)? false: is_float_image(filename); if(is_float) { /* find existing image */ @@ -361,7 +367,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl if(slot >= 10) name = string_printf("__tex_image_float_0%d", slot); else name = string_printf("__tex_image_float_00%d", slot); - device->tex_alloc(name.c_str(), tex_img, true, true); + if(!pack_images) + device->tex_alloc(name.c_str(), tex_img, true, true); } else { string filename = path_filename(images[slot]->filename); @@ -387,7 +394,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl if(slot >= 10) name = string_printf("__tex_image_0%d", slot); else name = string_printf("__tex_image_00%d", slot); - device->tex_alloc(name.c_str(), tex_img, true, true); + if(!pack_images) + device->tex_alloc(name.c_str(), tex_img, true, true); } img->need_load = false; @@ -466,9 +474,49 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& pool.wait_work(); + if(pack_images) + device_pack_images(device, dscene, progress); + need_update = false; } +void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progress& progess) +{ + /* for OpenCL, we pack all image textures inside a single big texture, and + will do our own interpolation in the kernel */ + size_t size = 0; + + for(size_t slot = 0; slot < images.size(); slot++) { + if(!images[slot]) + continue; + + device_vector& tex_img = dscene->tex_image[slot]; + size += tex_img.size(); + } + + uint4 *info = dscene->tex_image_packed_info.resize(images.size()); + uchar4 *pixels = dscene->tex_image_packed.resize(size); + + size_t offset = 0; + + for(size_t slot = 0; slot < images.size(); slot++) { + if(!images[slot]) + continue; + + device_vector& tex_img = dscene->tex_image[slot]; + + info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, 1); + + memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + offset += tex_img.size(); + } + + if(dscene->tex_image_packed.size()) + device->tex_alloc("__tex_image_packed", dscene->tex_image_packed); + if(dscene->tex_image_packed_info.size()) + device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info); +} + void ImageManager::device_free(Device *device, DeviceScene *dscene) { for(size_t slot = 0; slot < images.size(); slot++) @@ -476,6 +524,12 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene) for(size_t slot = 0; slot < float_images.size(); slot++) device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START); + device->tex_free(dscene->tex_image_packed); + dscene->tex_image_packed.clear(); + + device->tex_free(dscene->tex_image_packed_info); + dscene->tex_image_packed_info.clear(); + images.clear(); float_images.clear(); } diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index cc01b4a8e4c..2b5e53cabe1 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -47,6 +47,7 @@ public: void device_free(Device *device, DeviceScene *dscene); void set_osl_texture_system(void *texture_system); + void set_pack_images(bool pack_images_); bool need_update; @@ -61,12 +62,15 @@ private: vector images; vector float_images; void *osl_texture_system; + bool pack_images; bool file_load_image(Image *img, device_vector& tex_img); bool file_load_float_image(Image *img, device_vector& tex_img); void device_load_image(Device *device, DeviceScene *dscene, int slot, Progress *progess); void device_free_image(Device *device, DeviceScene *dscene, int slot); + + void device_pack_images(Device *device, DeviceScene *dscene, Progress& progess); }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 0422f97a706..cabbd5760c2 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -421,7 +421,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce attr_map[index].x = id; attr_map[index].y = req.element; - attr_map[index].z = req.offset; + attr_map[index].z = as_uint(req.offset); if(req.type == TypeDesc::TypeFloat) attr_map[index].w = NODE_ATTR_FLOAT; diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index b6453339d41..a5f90bfe34b 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -111,6 +111,8 @@ void Scene::device_update(Device *device_, Progress& progress) * - Displacement shader must have all shader data available. * - Light manager needs final mesh data to compute emission CDF. */ + + image_manager->set_pack_images(device->info.pack_images); progress.set_status("Updating Background"); background->device_update(device, &dscene, this); diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index ca4d9fc9625..90bc47d5c8e 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -97,6 +97,10 @@ public: device_vector tex_image[TEX_NUM_IMAGES]; device_vector tex_float_image[TEX_NUM_FLOAT_IMAGES]; + /* opencl images */ + device_vector tex_image_packed; + device_vector tex_image_packed_info; + KernelData data; }; diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 8c0e7105b22..fee2f10085b 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -965,6 +965,20 @@ __device_inline void print_int4(const char *label, const int4& a) #ifndef __KERNEL_OPENCL__ +__device_inline unsigned int as_int(uint i) +{ + union { unsigned int ui; int i; } u; + u.ui = i; + return u.i; +} + +__device_inline unsigned int as_uint(int i) +{ + union { unsigned int ui; int i; } u; + u.i = i; + return u.ui; +} + __device_inline unsigned int as_uint(float f) { union { unsigned int i; float f; } u;