diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 3e3cd7515c7..98997ae0968 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -474,9 +474,20 @@ public: InterpolationType interpolation, ExtensionType extension) { - /* todo: support 3D textures, only CPU for now */ VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes."; + string bind_name = name; + if(mem.data_depth > 1) { + /* Kernel uses different bind names for 2d and 3d float textures, + * so we have to adjust couple of things here. + */ + vector tokens; + string_split(tokens, name, "_"); + bind_name = string_printf("__tex_image_%s3d_%s", + tokens[2].c_str(), + tokens[3].c_str()); + } + /* determine format */ CUarray_format_enum format; size_t dsize = datatype_size(mem.data_type); @@ -496,7 +507,7 @@ public: CUtexref texref = NULL; cuda_push_context(); - cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)); + cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str())); if(!texref) { cuda_pop_context(); @@ -505,20 +516,49 @@ public: if(interpolation != INTERPOLATION_NONE) { CUarray handle = NULL; - CUDA_ARRAY_DESCRIPTOR desc; - desc.Width = mem.data_width; - desc.Height = mem.data_height; - desc.Format = format; - desc.NumChannels = mem.data_elements; + if(mem.data_depth > 1) { + CUDA_ARRAY3D_DESCRIPTOR desc; - cuda_assert(cuArrayCreate(&handle, &desc)); + desc.Width = mem.data_width; + desc.Height = mem.data_height; + desc.Depth = mem.data_depth; + desc.Format = format; + desc.NumChannels = mem.data_elements; + desc.Flags = 0; + + cuda_assert(cuArray3DCreate(&handle, &desc)); + } + else { + CUDA_ARRAY_DESCRIPTOR desc; + + desc.Width = mem.data_width; + desc.Height = mem.data_height; + desc.Format = format; + desc.NumChannels = mem.data_elements; + + cuda_assert(cuArrayCreate(&handle, &desc)); + } if(!handle) { cuda_pop_context(); return; } + if(mem.data_depth > 1) { + CUDA_MEMCPY3D param; + memset(¶m, 0, sizeof(param)); + param.dstMemoryType = CU_MEMORYTYPE_ARRAY; + param.dstArray = handle; + param.srcMemoryType = CU_MEMORYTYPE_HOST; + param.srcHost = (void*)mem.data_pointer; + param.srcPitch = mem.data_width*dsize*mem.data_elements; + param.WidthInBytes = param.srcPitch; + param.Height = mem.data_height; + param.Depth = mem.data_depth; + + cuda_assert(cuMemcpy3D(¶m)); + } if(mem.data_height > 1) { CUDA_MEMCPY2D param; memset(¶m, 0, sizeof(param)); @@ -595,7 +635,7 @@ public: CUdeviceptr cumem; size_t cubytes; - cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name)); + cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str())); if(cubytes == 8) { /* 64 bit device pointer */ diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index c72afa2a3a4..14b6738b23e 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -29,6 +29,21 @@ CCL_NAMESPACE_BEGIN /* Return position normalized to 0..1 in mesh bounds */ +#ifdef __KERNEL_GPU__ +ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z) +{ + float4 r; + switch(id) { + case 0: r = kernel_tex_image_interp_3d(__tex_image_float3d_000, x, y, z); break; + case 1: r = kernel_tex_image_interp_3d(__tex_image_float3d_001, x, y, z); break; + case 2: r = kernel_tex_image_interp_3d(__tex_image_float3d_002, x, y, z); break; + case 3: r = kernel_tex_image_interp_3d(__tex_image_float3d_003, x, y, z); break; + case 4: r = kernel_tex_image_interp_3d(__tex_image_float3d_004, x, y, z); break; + } + return r; +} +#endif /* __KERNEL_GPU__ */ + ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P) { /* todo: optimize this so it's just a single matrix multiplication when @@ -50,7 +65,7 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, { float3 P = volume_normalized_position(kg, sd, sd->P); #ifdef __KERNEL_GPU__ - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float4 r = volume_image_texture_3d(id, P.x, P.y, P.z); #else float4 r; if(sd->flag & SD_VOLUME_CUBIC) @@ -70,7 +85,7 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *s { float3 P = volume_normalized_position(kg, sd, sd->P); #ifdef __KERNEL_GPU__ - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float4 r = volume_image_texture_3d(id, P.x, P.y, P.z); #else float4 r; if(sd->flag & SD_VOLUME_CUBIC) diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 9fdd3abfec3..193c255610c 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -62,6 +62,7 @@ typedef texture texture_int; typedef texture texture_uint4; typedef texture texture_uchar4; typedef texture texture_image_float4; +typedef texture texture_image3d_float4; typedef texture texture_image_uchar4; /* Macros to handle different memory storage on different devices */ @@ -79,6 +80,7 @@ typedef texture texture_image_uchar4; #define kernel_tex_fetch(t, index) t[(index)] #endif #define kernel_tex_image_interp(t, x, y) tex2D(t, x, y) +#define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z) #define kernel_data __data diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index f545a056cc8..24cb1c34817 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -79,6 +79,12 @@ KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_002) KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_003) KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_004) +KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_000) +KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_001) +KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_002) +KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_003) +KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float3d_004) + /* image */ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_005) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_006) diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 633e1edfb19..9865da2e8cd 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -447,11 +447,11 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a svm_node_blackbody(kg, sd, stack, node.y, node.z); break; # endif /* __EXTRA_NODES__ */ -# if NODES_FEATURE(NODE_FEATURE_VOLUME) && !defined(__KERNEL_GPU__) +# if NODES_FEATURE(NODE_FEATURE_VOLUME) case NODE_TEX_VOXEL: svm_node_tex_voxel(kg, sd, stack, node, &offset); break; -# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) && !defined(__KERNEL_GPU__) */ +# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ #endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */ case NODE_END: return; diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index caf0b37ba35..86d3262795f 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -246,13 +246,13 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, case 90: r = kernel_tex_image_interp(__tex_image_090, x, y); break; case 91: r = kernel_tex_image_interp(__tex_image_091, x, y); break; case 92: r = kernel_tex_image_interp(__tex_image_092, x, y); break; + +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) case 93: r = kernel_tex_image_interp(__tex_image_093, x, y); break; case 94: r = kernel_tex_image_interp(__tex_image_094, x, y); break; case 95: r = kernel_tex_image_interp(__tex_image_095, x, y); break; case 96: r = kernel_tex_image_interp(__tex_image_096, x, y); break; case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break; - -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break; case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break; case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break; diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index 31cad5ec887..af03ce3fe12 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -16,8 +16,6 @@ CCL_NAMESPACE_BEGIN -#if !defined(__KERNEL_GPU__) - /* TODO(sergey): Think of making it more generic volume-type attribute * sampler. */ @@ -43,13 +41,15 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg, tfm.w = read_node_float(kg, offset); co = transform_point(&tfm, co); } +#if defined(__KERNEL_GPU__) + float4 r = volume_image_texture_3d(id, co.x, co.y, co.z); +#else float4 r = kernel_tex_image_interp_3d(id, co.x, co.y, co.z); +#endif if (stack_valid(density_out_offset)) stack_store_float(stack, density_out_offset, r.w); if (stack_valid(color_out_offset)) stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z)); } -#endif /* !defined(__KERNEL_GPU__) */ - CCL_NAMESPACE_END diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index c79c152afde..c5561e16cb3 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN /* generic */ -#define TEX_NUM_IMAGES 94 +#define TEX_NUM_IMAGES 88 #define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES /* extended gpu */