forked from bartvdbraak/blender
Cycles: OpenCL tweaks
* Reduce kernel arguments size, helps compile for apple nvidia. * Fix use of unitialized variable in displace kernel. * Use build flags in opencl kernel md5 hash. * Reorganize code for kernel feature #defines a bit.
This commit is contained in:
parent
c71e31eb4f
commit
47853bf6f6
@ -260,12 +260,9 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool build_kernel(const string& kernel_path)
|
||||
string kernel_build_options()
|
||||
{
|
||||
string build_options = "";
|
||||
|
||||
build_options += "-I " + kernel_path + ""; /* todo: escape path */
|
||||
build_options += " -cl-fast-relaxed-math ";
|
||||
string build_options = " -cl-fast-relaxed-math ";
|
||||
|
||||
/* Full Shading only on NVIDIA cards at the moment */
|
||||
char vendor[256];
|
||||
@ -273,14 +270,19 @@ public:
|
||||
clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL);
|
||||
string name = vendor;
|
||||
|
||||
if (name == "NVIDIA CUDA") {
|
||||
build_options += "-D __SVM__ ";
|
||||
build_options += "-D __EMISSION__ ";
|
||||
build_options += "-D __TEXTURES__ ";
|
||||
build_options += "-D __HOLDOUT__ ";
|
||||
build_options += "-D __MULTI_CLOSURE__ ";
|
||||
if(name == "NVIDIA CUDA")
|
||||
build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ ";
|
||||
|
||||
return build_options;
|
||||
}
|
||||
|
||||
bool build_kernel(const string& kernel_path)
|
||||
{
|
||||
string build_options = "";
|
||||
|
||||
build_options += "-I " + kernel_path + ""; /* todo: escape path, but it doesn't get parsed correct? */
|
||||
build_options += kernel_build_options();
|
||||
|
||||
ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL);
|
||||
|
||||
if(ciErr != CL_SUCCESS) {
|
||||
@ -344,6 +346,9 @@ public:
|
||||
md5.append((uint8_t*)name, strlen(name));
|
||||
md5.append((uint8_t*)driver, strlen(driver));
|
||||
|
||||
string options = kernel_build_options();
|
||||
md5.append((uint8_t*)options.c_str(), options.size());
|
||||
|
||||
return md5.get_hex();
|
||||
}
|
||||
|
||||
@ -563,24 +568,20 @@ public:
|
||||
cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
|
||||
{
|
||||
cl_mem ptr;
|
||||
cl_int size, err = 0;
|
||||
cl_int err = 0;
|
||||
|
||||
if(mem_map.find(name) != mem_map.end()) {
|
||||
device_memory *mem = mem_map[name];
|
||||
|
||||
ptr = CL_MEM_PTR(mem->device_pointer);
|
||||
size = mem->data_width;
|
||||
}
|
||||
else {
|
||||
/* work around NULL not working, even though the spec says otherwise */
|
||||
ptr = CL_MEM_PTR(null_mem);
|
||||
size = 1;
|
||||
}
|
||||
|
||||
err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
|
||||
opencl_assert(err);
|
||||
err |= clSetKernelArg(kernel, (*narg)++, sizeof(size), (void*)&size);
|
||||
opencl_assert(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -33,8 +33,7 @@ __kernel void kernel_ocl_path_trace(
|
||||
__global uint *rng_state,
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
__global type *name, \
|
||||
int name##_width,
|
||||
__global type *name,
|
||||
#include "kernel_textures.h"
|
||||
|
||||
int sample,
|
||||
@ -45,8 +44,7 @@ __kernel void kernel_ocl_path_trace(
|
||||
kg->data = data;
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
kg->name = name; \
|
||||
kg->name##_width = name##_width;
|
||||
kg->name = name;
|
||||
#include "kernel_textures.h"
|
||||
|
||||
int x = sx + get_global_id(0);
|
||||
@ -62,8 +60,7 @@ __kernel void kernel_ocl_tonemap(
|
||||
__global float4 *buffer,
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
__global type *name, \
|
||||
int name##_width,
|
||||
__global type *name,
|
||||
#include "kernel_textures.h"
|
||||
|
||||
int sample, int resolution,
|
||||
@ -74,8 +71,7 @@ __kernel void kernel_ocl_tonemap(
|
||||
kg->data = data;
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
kg->name = name; \
|
||||
kg->name##_width = name##_width;
|
||||
kg->name = name;
|
||||
#include "kernel_textures.h"
|
||||
|
||||
int x = sx + get_global_id(0);
|
||||
|
@ -127,8 +127,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa
|
||||
__device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v, float lens_u, float lens_v, Ray *ray)
|
||||
{
|
||||
/* pixel filter */
|
||||
float raster_x = x + kernel_tex_interp(__filter_table, filter_u);
|
||||
float raster_y = y + kernel_tex_interp(__filter_table, filter_v);
|
||||
float raster_x = x + kernel_tex_interp(__filter_table, filter_u, FILTER_TABLE_SIZE);
|
||||
float raster_y = y + kernel_tex_interp(__filter_table, filter_v, FILTER_TABLE_SIZE);
|
||||
|
||||
/* motion blur */
|
||||
//ray->time = lerp(time_t, kernel_data.cam.shutter_open, kernel_data.cam.shutter_close);
|
||||
|
@ -55,8 +55,10 @@ template<typename T> struct texture {
|
||||
return ((__m128i*)data)[index];
|
||||
}*/
|
||||
|
||||
float interp(float x)
|
||||
float interp(float x, int size)
|
||||
{
|
||||
kernel_assert(size == width);
|
||||
|
||||
x = clamp(x, 0.0f, 1.0f)*width;
|
||||
|
||||
int index = min((int)x, width-1);
|
||||
@ -151,7 +153,7 @@ typedef texture_image<uchar4> texture_image_uchar4;
|
||||
#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
|
||||
#define kernel_tex_fetch_m128(tex, index) (kg->tex.fetch_m128(index))
|
||||
#define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index))
|
||||
#define kernel_tex_interp(tex, t) (kg->tex.interp(t))
|
||||
#define kernel_tex_interp(tex, t, size) (kg->tex.interp(t, size))
|
||||
#define kernel_tex_image_interp(tex, x, y) (kg->tex.interp(x, y))
|
||||
|
||||
#define kernel_data (kg->__data)
|
||||
|
@ -55,7 +55,7 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
|
||||
/* Macros to handle different memory storage on different devices */
|
||||
|
||||
#define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
|
||||
#define kernel_tex_interp(t, x) tex1D(t, x)
|
||||
#define kernel_tex_interp(t, x, size) tex1D(t, x)
|
||||
#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
|
||||
|
||||
#define kernel_data __data
|
||||
|
@ -100,7 +100,7 @@ __device float kernel_tex_interp_(__global float *data, int width, float x)
|
||||
|
||||
/* data lookup defines */
|
||||
#define kernel_data (*kg->data)
|
||||
#define kernel_tex_interp(t, x) kernel_tex_interp_(kg->t, kg->t##_width, x)
|
||||
#define kernel_tex_interp(t, x, size) kernel_tex_interp_(kg->t, size, x)
|
||||
#define kernel_tex_fetch(t, index) kg->t[index]
|
||||
|
||||
/* define NULL */
|
||||
|
@ -77,8 +77,7 @@ typedef struct KernelGlobals {
|
||||
__constant KernelData *data;
|
||||
|
||||
#define KERNEL_TEX(type, ttype, name) \
|
||||
__global type *name; \
|
||||
int name##_width;
|
||||
__global type *name;
|
||||
#include "kernel_textures.h"
|
||||
} KernelGlobals;
|
||||
|
||||
|
@ -226,7 +226,7 @@ __device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
|
||||
Ng = triangle_normal_MT(kg, prim, &shader);
|
||||
|
||||
/* force smooth shading for displacement */
|
||||
sd->shader |= SHADER_SMOOTH_NORMAL;
|
||||
shader |= SHADER_SMOOTH_NORMAL;
|
||||
|
||||
/* watch out: no instance transform currently */
|
||||
|
||||
|
@ -25,9 +25,30 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* constants */
|
||||
#define OBJECT_SIZE 16
|
||||
#define LIGHT_SIZE 4
|
||||
#define FILTER_TABLE_SIZE 256
|
||||
|
||||
/* device capabilities */
|
||||
#ifdef __KERNEL_CPU__
|
||||
#define __KERNEL_SHADING__
|
||||
#define __KERNEL_ADV_SHADING__
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_CUDA__
|
||||
#define __KERNEL_SHADING__
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
#define __KERNEL_ADV_SHADING__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_OPENCL__
|
||||
//#define __KERNEL_SHADING__
|
||||
//#define __KERNEL_ADV_SHADING__
|
||||
#endif
|
||||
|
||||
/* kernel features */
|
||||
#define __SOBOL__
|
||||
#define __INSTANCING__
|
||||
#define __DPDU__
|
||||
@ -39,27 +60,20 @@ CCL_NAMESPACE_BEGIN
|
||||
#define __CAMERA_CLIPPING__
|
||||
#define __INTERSECTION_REFINE__
|
||||
|
||||
#ifndef __KERNEL_OPENCL__
|
||||
#ifdef __KERNEL_SHADING__
|
||||
#define __SVM__
|
||||
#define __EMISSION__
|
||||
#define __TEXTURES__
|
||||
#define __HOLDOUT__
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_ADV_SHADING__
|
||||
#define __MULTI_CLOSURE__
|
||||
#define __TRANSPARENT_SHADOWS__
|
||||
#endif
|
||||
|
||||
//#define __MULTI_LIGHT__
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
#define __MULTI_CLOSURE__
|
||||
#define __TRANSPARENT_SHADOWS__
|
||||
//#define __OSL__
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_CUDA__
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
#define __MULTI_CLOSURE__
|
||||
#define __TRANSPARENT_SHADOWS__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//#define __SOBOL_FULL_SCREEN__
|
||||
//#define __MODIFY_TP__
|
||||
//#define __QBVH__
|
||||
|
@ -21,6 +21,8 @@
|
||||
#include "filter.h"
|
||||
#include "scene.h"
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
#include "util_algorithm.h"
|
||||
#include "util_debug.h"
|
||||
#include "util_math.h"
|
||||
@ -51,7 +53,7 @@ static float filter_func_gaussian(float v, float width)
|
||||
|
||||
static vector<float> filter_table(FilterType type, float width)
|
||||
{
|
||||
const int filter_table_size = 256;
|
||||
const int filter_table_size = FILTER_TABLE_SIZE;
|
||||
vector<float> filter_table_cdf(filter_table_size+1);
|
||||
vector<float> filter_table(filter_table_size+1);
|
||||
float (*filter_func)(float, float) = NULL;
|
||||
|
Loading…
Reference in New Issue
Block a user